Exemple #1
0
    def get_ids(self):
        db = Database()
        results = {}
        try:
            channel_ids = db.select('channel', *['channel_id', 'yt_id'])
            self.channel_ids = [c['yt_id'] for c in channel_ids]
            for c in channel_ids:
                results[c['yt_id']] = c['channel_id']

            video_ids = db.select('video', *['video_id', 'yt_id'])
            for v in video_ids:
                results[v['yt_id']] = v['video_id']
        except Exception as err:
            self.logger.error('Failed to retrieve database IDs. {}'.format(
                repr(err)))
        return results
Exemple #2
0
 def get_views(self, videos):
     d = Database()
     with open('.COLLECT') as f:
         collect_id = int(f.readline().strip())
     views = d.select('collect_video', *['video_id','MAX(view_count) as view_count'],where=['COLLECT_id > {} GROUP BY video_id'.format(collect_id - self.days - 10)])
     for v in videos:
         try:
             v['view_count'] = next(item for item in views if item['video_id'] == v['video_id'])['view_count']
         except StopIteration as e:
             print('Error with video {}:\n{}\n\n'.format(v, repr(e)))
             v['view_count'] = 0
Exemple #3
0
def start_temp_daemon():
    db = Database('Queues')
    while True:
        event = []
        for target_url in db.select():
            p = YoutubeTemp(target_url)
            event.append(p)
            p.start()
        is_running = True
        while is_running:
            has_running = False
            for p in event:
                if p.is_alive():
                    has_running = True
            if not has_running:
                is_running = False
        logger = get_logger('YoutubeTemp')
        logger.info('A check has finished.')
        sleep(sec)
Exemple #4
0
    def collect_info(self, key):
        api = APIRequest(api_key=key)
        db = Database()

        queue_attempts = 3

        # Gets channel from queue
        while queue_attempts:
            try:
                self.api_semaphore.acquire(blocking=True, timeout=120)
                channel_id = self.api_queue.get(timeout=120)
                print('{} - Thread {}'.format(self.api_queue.qsize(),
                                              current_thread().name))
            except Empty:
                queue_attempts -= 1
                self.logger.warning(
                    'API queue empty. Attempting {} more time(s).'.format(
                        queue_attempts))
                continue
            finally:
                self.api_semaphore.release()

            # Checks if channel is already in database, otherwise saves it
            channel_dbid = self.db_ids.get(channel_id, 0)
            if not channel_dbid:
                try:
                    db_id_query = db.select(
                        'channel',
                        *['channel_id'],
                        where=['yt_id LIKE "{}"'.format(channel_id)])

                    if not db_id_query:
                        request, response = api.list(
                            'channels', **{
                                'part': 'snippet',
                                'id': channel_id
                            })
                        try:
                            snippet = response['items'][0]['snippet']
                            channel_query = {
                                'yt_id':
                                channel_id,
                                'title':
                                snippet['title'],
                                'description':
                                snippet['description'],
                                'published_at':
                                self.parse_date(snippet['publishedAt'],
                                                return_datetime=True).strftime(
                                                    '%Y-%m-%d %H:%M:%S')
                            }
                            channel_dbid = db.insert('channel', channel_query)
                            self.db_ids[channel_id] = channel_dbid
                        except KeyError as err:
                            self.logger.error(
                                'KeyError while getting channel info: {}'.
                                format(repr(err)))
                            continue
                        except Exception as err:
                            self.logger.error(
                                'Error while getting channel info: {}'.format(
                                    repr(err)))
                            continue
                    else:
                        channel_dbid = db_id_query[0]['channel_id']
                        self.db_ids[channel_id] = channel_dbid
                except Exception as err:
                    self.logger.error(repr(err))
                    continue

            # Collects channel, attempting 3 times
            collect_attempts = 3
            while collect_attempts:
                try:
                    self.collect_channel(api, channel_id)
                    break
                except Exception as err:
                    collect_attempts -= 1
                    self.logger.error(
                        'Failed to collect channel {}: {} Attempting {} more times.'
                        .format(channel_id, repr(err), collect_attempts))
            if not collect_attempts:
                self.logger.info(
                    'Could not collect channel: {}'.format(channel_id))
                continue

            # Retrieves recent videos
            playlist_id = 'UU' + channel_id[
                2:]  # Playlist ID of channel c's uploads, can be derived from channel ID
            fetch_attempts = 3
            request, response = None, None
            while fetch_attempts:
                try:
                    request, response = api.list(
                        'playlistItems', **{
                            'part': 'contentDetails',
                            'playlistId': playlist_id,
                            'maxResults': 50
                        })
                    break
                except Exception as err:
                    fetch_attempts -= 1
                    self.logger.error(
                        'Error while getting videos for channel {}: {} Attempting {} more times.'
                        .format(channel_id, repr(err), fetch_attempts))
            if not fetch_attempts:
                self.logger.error(
                    'Failed to get videos for {}'.format(channel_id))
                continue
            video_list = []
            limit_reached = False
            while request and not limit_reached:
                for v in response['items']:
                    video_id = v['contentDetails']['videoId']
                    published_at = self.parse_date(
                        v['contentDetails']['videoPublishedAt'])
                    if self.now - published_at > self.limit:
                        limit_reached = True
                        break
                    try:
                        db_id_query = \
                            db.select('video', 'video_id', where=['yt_id LIKE "{}"'.format(video_id)])
                        if not db_id_query:
                            request, response = api.list(
                                'videos', **{
                                    'part': 'snippet,contentDetails',
                                    'id': video_id
                                })
                            try:
                                snippet = response['items'][0]['snippet']
                                content_details = response['items'][0][
                                    'contentDetails']
                                video_query = {
                                    'yt_id':
                                    video_id,
                                    'title':
                                    snippet['title'],
                                    'description':
                                    snippet['description'],
                                    'channel_id':
                                    channel_dbid,
                                    'length_seconds':
                                    int(
                                        parse_duration(
                                            content_details['duration']).
                                        total_seconds()),
                                    'published_at':
                                    self.parse_date(
                                        snippet['publishedAt'],
                                        return_datetime=True).strftime(
                                            '%Y-%m-%d %H:%M:%S')
                                }
                                video_dbid = db.insert('video', video_query)
                                self.db_ids[video_id] = video_dbid
                            except KeyError as err:
                                self.logger.error(
                                    'KeyError while getting video info: {}'.
                                    format(repr(err)))
                                continue
                        else:
                            video_dbid = db_id_query[0]['video_id']
                            self.db_ids[video_id] = video_dbid
                    except Exception as err:
                        self.logger.error(
                            'Error while getting video data: {}'.format(
                                repr(err)))
                        continue
                    video_list.append(video_id)
                collect_attempts = 3
                while collect_attempts:
                    try:
                        self.collect_videos(api, video_list)
                        break
                    except Exception as err:
                        collect_attempts -= 1
                        self.logger.error(
                            'Failed to collect videos from channel {}: '
                            '{} Attempting {} more times.'.format(
                                channel_id, repr(err), collect_attempts))
                    finally:
                        video_list.clear()
                if not collect_attempts:
                    self.logger.info(
                        'Could not collect videos from channel: {}'.format(
                            channel_id))

                if not limit_reached:
                    execute_attempts = 3
                    while execute_attempts:
                        try:
                            request, response = api.list_next(
                                'playlistItems', request, response)
                            break
                        except Exception as err:
                            execute_attempts -= 1
                            self.logger.error(
                                'Video fetch response error: {} Attempting {} more time(s)'
                                .format(' '.join(err.args), execute_attempts))
                            continue
                    if not execute_attempts:
                        break
            queue_attempts = 3
        self.logger.info('Finished execution for Thread {}'.format(
            get_ident()))