def main():
    args = _parse_args()
    Config.set_dir(path.join(CURRENT_PATH, 'config.json'))
    for user_num in range(args.fake_users):
        channel_name = get_random_channel_name()
        video_name = get_random_video_name(channel_name)
        run(channel_name=channel_name, video_name=video_name, delay=1)
def test_get_login_access_token():
    Config.set_dir(path.join(CURRENT_PATH, 'config.json'))
    login = Login(host=Config.instance().get('PORTAL_SERVER'),
                  cache_path='/cache/path')
    access_token = login.login
    valid_access_token(access_token)
    assert login.url == path.join(login.host, 'Users/login')
Beispiel #3
0
def main():
    Config.set_dir(path.join(CURRENT_PATH, 'config.json'))
    rabbitmq = RabbitMqTasks('localhost',
                             exchange='',
                             queue_name='online_update_comment',
                             durable=True)
    while True:
        try:
            rabbitmq.consume(callback)
        except KeyboardInterrupt:
            os.system('pkill -15 python ./publish_comment_md.py')
            os.system('pkill -15 python ./publish_comment_redis.py')
            logger.warning(
                'keyboard interrupt, then kill publish_comment_md.py & publish_comment_redis.py'
            )
            break
        except Exception as e:
            os.system('pkill -15 python ./publish_comment_md.py')
            os.system('pkill -15 python ./publish_comment_redis.py')
            logger.warning(
                'keyboard interrupt, then kill publish_comment_md.py & publish_comment_redis.py'
            )
            logger.error('main exception: {}'.format(e))
            break
    rabbitmq.close()
Beispiel #4
0
def test_get_video_comment():
    Config.set_dir(path.join(os.getcwd(), 'src/config.json'))
    yt_comments = YoutubeComments(Config.instance().get('YOUTUBE_API_KEYS')[0])
    video_comments_detail = yt_comments.get_video_comment('RnAXPLG_di8')
    assert type(video_comments_detail) == dict
    assert type(video_comments_detail.get('RnAXPLG_di8')) == list
    _valid_comment(video_comments_detail.get('RnAXPLG_di8'))
def main():
    args = _parse_args()
    Config.set_dir(path.join(CURRENT_PATH, 'config.json'))
    yt_comments = YoutubeComments(args.youtube_api_key)
    if not args.video_id:
        logger.error('main fail with: video id ')
    video_comment_detail = yt_comments.get_video_comment(args.video_id)
    CommentsUnlabelData().save(TRAIN_DIR, video_comment_detail)
def _get_channels_id_code(channels_id):
    yt_channels = YoutubeChannel(host=Config.instance().get('PORTAL_SERVER'),
                                 cache_path=Config.instance().get('CACHE_DIR'))
    for channel in yt_channels:
        if channels_id != [] and channel.get('channelId') not in channels_id:
            continue

        yield channel.get('channelId'), channel.get('code')
def get_random_channel_name():
    yt_channels = YoutubeChannel(
        host=Config.instance().get('PORTAL_SERVER'),
        cache_path=Config.instance().get('CACHE_DIR'),
        filter_params={"fields": {
            "channelName": True
        }})
    return yt_channels[get_random_index(len(yt_channels))].get('channelName')
def get_random_video_name(channel_name):
    yt_videos = YoutubeVideo(
        host=Config.instance().get('PORTAL_SERVER'),
        cache_path=Config.instance().get('CACHE_DIR'),
        filter_params={"where": {
            "channelName": channel_name
        }})
    return yt_videos[get_random_index(len(yt_videos))].get('videoName')
Beispiel #9
0
 def _modify_video_update_times(self, video_id):
     yt_video = YoutubeVideo(
         host=Config.instance().get('PORTAL_SERVER'),
         cache_path=Config.instance().get('CACHE_DIR'),
         filter_params={"where": {"videoId": video_id}})
     for video in yt_video:
         if video_id == video.get('videoId', ''):
             yt_video.patch(
                 id=video['id'], json_data={'updateTimes': video.get('updateTimes', 0) + 1})
 def _get_videos_detail(self, channel_id):
     return YoutubeVideo(host=Config.instance().get('PORTAL_SERVER'),
                         cache_path=Config.instance().get('CACHE_DIR'),
                         filter_params={
                             "where": {
                                 "channelId": channel_id,
                                 "updateTimes": 0
                             }
                         })
Beispiel #11
0
def test_api_get_access_token():
    Config.set_dir(path.join(CURRENT_PATH, 'config.json'))
    api = Api(host=Config.instance().get('PORTAL_SERVER'),
              target_path='Youtube_channels',
              cache_path=Config.instance().get('CACHE_DIR'))
    filter_params = {'where': {'channelId': 'UC6FcYHEm7SO1jpu5TKjNXEA'}}
    params = api.update_params_token(params=filter_params)
    assert isinstance(params, dict)
    assert params.get('where') == {'channelId': 'UC6FcYHEm7SO1jpu5TKjNXEA'}
    valid_access_token(params.get('access_token'))
def main():
    args = _parse_args()
    Config.set_dir(path.join(CURRENT_PATH, 'config.json'))
    for channel_id, code in list(_get_channels_id_code(args.channels_id)):
        statistic_data = list(_get_commenter_statistic(code))
        md_statistic_commenter = MdStatisticCommenter(
            cluster=args.cluster,
            db=args.db,
            collection=_get_statistic_collection(channel_id))
        md_statistic_commenter.update_data(statistic_data)
Beispiel #13
0
def test_get_md_data():
    Config.set_dir(path.join(CURRENT_PATH, 'config.json'))
    md = Mongodb(cluster_name='raw-comment-chinese',
                 db_name='comment-chinese',
                 collection_name='comment-UC6FcYHEm7SO1jpu5TKjNXEA')
    doc = list(md.get({'commentId': 'Ugw-4khRtnDqAAmdp1Z4AaABAg'}))
    assert len(doc) == 1
    assert isinstance(doc[0], dict)
    assert 'videoId' in doc[0]
    assert 'author' in doc[0]
    assert 'text' in doc[0]
Beispiel #14
0
 def gen(self):
     channels = YoutubeChannel(
         host=Config.instance().get('PORTAL_SERVER'),
         cache_path=Config.instance().get('CACHE_DIR'),
         filter_params={"fields": {"channelId": True}})
     for channel in channels:
         channel_id = channel['channelId']
         collection = self.gen_collection(channel_id)
         super(MdCommentLoader, self).__init__(
             cluster_name=self.cluster, db_name=self.database, collection_name=collection)
         logger.info('gen channel id: {} comments data'.format(channel_id))
         comments_dataset = list(self.gen_comment_dataset())
         comments_dataframe = pd.DataFrame(comments_dataset, columns=self.columns)
         self._save(comments_dataframe, channel_id)
def main():
    args = _parse_args()
    Config.set_dir(path.join(CURRENT_PATH, 'config.json'))
    if not args.channels_id:
        yt_channels = YoutubeChannel(
            host=Config.instance().get('PORTAL_SERVER'),
            cache_path=Config.instance().get('CACHE_DIR'),
            filter_params={"fields": {
                "channelId": True
            }})
        args.channels_id = [
            channel_dict.get('channelId') for channel_dict in yt_channels
        ]
    MdCommentSentimentUpdater(cluster=args.cluster,
                              db=args.db).update_channels(
                                  args.channels_id, args.update_all)
def main():
    args = _parse_args()
    Config.set_dir(path.join(CURRENT_PATH, 'config.json'))
    mq_fanout = RabbitMqFanout(args.rabbitmq_host, args.rabbitmq_queue)
    redis_handler = RedisHandler(host=args.host, port=args.port, db=args.db)
    while True:
        try:
            mq_fanout.consume(redis_handler.callback)
        except KeyboardInterrupt:
            logger.warning('publish_comment_redis keyboard interrupt\n')
            break
        except Exception as e:
            logger.error('publish_comment_redis main exception: {}'.format(e))
            break
        finally:
            mq_fanout.close()
def main():
    args = _parse_args()
    Config.set_dir(path.join(CURRENT_PATH, 'config.json'))
    mq_fanout = RabbitMqFanout(args.rabbitmq_host, args.rabbitmq_queue)
    md_handler = MdHandler(
        cluster=args.cluster,
        database=args.db,
        collection=args.collection)
    while True:
        try:
            mq_fanout.consume(md_handler.callback)
        except KeyboardInterrupt:
            logger.warning('keyboard interrupt\n')
            break
        except Exception as e:
            logger.error('main exception: {}'.format(e))
            break
        finally:
            mq_fanout.close()
Beispiel #18
0
def test_api_get():
    TGOP_channel_data = {
        "id": 1,
        "channelName": "這群人TGOP",
        "channelId": "UC6FcYHEm7SO1jpu5TKjNXEA",
        "location": "TW",
        "category": "Entertainment",
        "language": "Chinese_Traditional",
        "contact": "*****@*****.**",
        "createdAt": "2008-06-07T00:00:00.000Z",
        "updateLock": None,
        "subscriber": None
    }
    Config.set_dir(path.join(CURRENT_PATH, 'config.json'))
    api = Api(host=Config.instance().get('PORTAL_SERVER'),
              target_path='Youtube_channels',
              cache_path=Config.instance().get('CACHE_DIR'))
    filter_params = {'where': {'channelId': 'UC6FcYHEm7SO1jpu5TKjNXEA'}}
    channel_data = api.get(params=filter_params)
    assert isinstance(channel_data, list)
    assert channel_data[0] == TGOP_channel_data
Beispiel #19
0
def main():
    args = _parse_args()
    Config.set_dir(path.join(CURRENT_PATH, 'config.json'))
    youtube_api = YoutubeApi(args.youtube_api_key)
    channels_detail = YoutubeChannel(
        host=Config.instance().get('PORTAL_SERVER'),
        cache_path=Config.instance().get('CACHE_DIR'))
    videos = YoutubeVideo(host=Config.instance().get('PORTAL_SERVER'),
                          cache_path=Config.instance().get('CACHE_DIR'),
                          filter_params={"fields": {
                              "videoId": True
                          }})
    video_id_series = [video['videoId'] for video in videos]
    logger.info('In main loading number of video id: {}'.format(
        len(video_id_series)))
    for channel in channels_detail:
        logger.info('gen videos by channel id {}'.format(channel['channelId']))
        video_detail = youtube_api.gen_channel_video(channel['channelId'],
                                                     max_result=50)
        if args.dry_run:
            return
        for key, detail in video_detail.items():
            if not video_id_exist(key, video_id_series):
                try:
                    videos.push(detail)
                    logger.info("push data: {}".format(detail))
                except Exception as e:
                    logger.error(
                        'error at pushing: {}, error message: {}'.format(
                            detail, e))
            else:
                logger.debug("Skip due to videoId '{}' exit".format(key))
Beispiel #20
0
def test_get_comments_with_sentiment_score():
    video_id = 'RnAXPLG_di8'
    test_data = defaultdict(list)
    test_data[video_id].append({
        'commentId': 'comment_id',
        'videoId': video_id,
        'authorChannelId': 'author_channel_id',
        'author': 'author display name',
        'text': 'text_display',
        'likeCount': 0,
        'publishedAt': '2019-06-27T13:49:30Z',
        'updatedAt': '2019-06-27T13:49:30Z',
        'replyCount': 1
    })
    Config.set_dir(path.join(os.getcwd(), 'src/config.json'))
    yt_comments = YoutubeComments(
        Config.instance().get('YOUTUBE_API_KEYS')[0])
    result = dict(
        yt_comments._get_comments_with_sentiment_score(test_data))
    assert result.get(video_id) is not None
    assert result.get(video_id)[0].get('text') is not None
    assert result.get(video_id)[0].get('sentimentScore') is not None
Beispiel #21
0
def run_update_comment(channel_id, video_id):
    cmd = Config.instance().get('UPDATE_VIDEO_COMMENT_CMD') + \
        ' --channel-id {} --video-id {}'.format(channel_id, video_id)
    logger.info('online_updater run : {}'.format(cmd))
    subprocess.Popen(cmd, shell=True)
def _get_commenter_statistic(code):
    hive_sql = '"select author, count(author) from  comment_{} group by author"'.format(
        code)
    result = popen(Config.instance().get('HIVE_CMD').format(hive_sql)).read()
    for line in result.splitlines():
        yield line.split('\t')
Beispiel #23
0
def main():
    Config.set_dir(path.join(CURRENT_DIR, '../config.json'))
    MdCommentLoader(
        cluster='raw-comment-chinese', db='comment-chinese'
    ).gen()
Beispiel #24
0
 def __init__(self):
     self.URL = Config.instance().get('SENTIMENT_API_URL')
     self._sess = None