def get_ids(self): db = Database() results = {} try: channel_ids = db.select('channel', *['channel_id', 'yt_id']) self.channel_ids = [c['yt_id'] for c in channel_ids] for c in channel_ids: results[c['yt_id']] = c['channel_id'] video_ids = db.select('video', *['video_id', 'yt_id']) for v in video_ids: results[v['yt_id']] = v['video_id'] except Exception as err: self.logger.error('Failed to retrieve database IDs. {}'.format( repr(err))) return results
def get_views(self, videos): d = Database() with open('.COLLECT') as f: collect_id = int(f.readline().strip()) views = d.select('collect_video', *['video_id','MAX(view_count) as view_count'],where=['COLLECT_id > {} GROUP BY video_id'.format(collect_id - self.days - 10)]) for v in videos: try: v['view_count'] = next(item for item in views if item['video_id'] == v['video_id'])['view_count'] except StopIteration as e: print('Error with video {}:\n{}\n\n'.format(v, repr(e))) v['view_count'] = 0
def start_temp_daemon(): db = Database('Queues') while True: event = [] for target_url in db.select(): p = YoutubeTemp(target_url) event.append(p) p.start() is_running = True while is_running: has_running = False for p in event: if p.is_alive(): has_running = True if not has_running: is_running = False logger = get_logger('YoutubeTemp') logger.info('A check has finished.') sleep(sec)
def collect_info(self, key): api = APIRequest(api_key=key) db = Database() queue_attempts = 3 # Gets channel from queue while queue_attempts: try: self.api_semaphore.acquire(blocking=True, timeout=120) channel_id = self.api_queue.get(timeout=120) print('{} - Thread {}'.format(self.api_queue.qsize(), current_thread().name)) except Empty: queue_attempts -= 1 self.logger.warning( 'API queue empty. Attempting {} more time(s).'.format( queue_attempts)) continue finally: self.api_semaphore.release() # Checks if channel is already in database, otherwise saves it channel_dbid = self.db_ids.get(channel_id, 0) if not channel_dbid: try: db_id_query = db.select( 'channel', *['channel_id'], where=['yt_id LIKE "{}"'.format(channel_id)]) if not db_id_query: request, response = api.list( 'channels', **{ 'part': 'snippet', 'id': channel_id }) try: snippet = response['items'][0]['snippet'] channel_query = { 'yt_id': channel_id, 'title': snippet['title'], 'description': snippet['description'], 'published_at': self.parse_date(snippet['publishedAt'], return_datetime=True).strftime( '%Y-%m-%d %H:%M:%S') } channel_dbid = db.insert('channel', channel_query) self.db_ids[channel_id] = channel_dbid except KeyError as err: self.logger.error( 'KeyError while getting channel info: {}'. format(repr(err))) continue except Exception as err: self.logger.error( 'Error while getting channel info: {}'.format( repr(err))) continue else: channel_dbid = db_id_query[0]['channel_id'] self.db_ids[channel_id] = channel_dbid except Exception as err: self.logger.error(repr(err)) continue # Collects channel, attempting 3 times collect_attempts = 3 while collect_attempts: try: self.collect_channel(api, channel_id) break except Exception as err: collect_attempts -= 1 self.logger.error( 'Failed to collect channel {}: {} Attempting {} more times.' .format(channel_id, repr(err), collect_attempts)) if not collect_attempts: self.logger.info( 'Could not collect channel: {}'.format(channel_id)) continue # Retrieves recent videos playlist_id = 'UU' + channel_id[ 2:] # Playlist ID of channel c's uploads, can be derived from channel ID fetch_attempts = 3 request, response = None, None while fetch_attempts: try: request, response = api.list( 'playlistItems', **{ 'part': 'contentDetails', 'playlistId': playlist_id, 'maxResults': 50 }) break except Exception as err: fetch_attempts -= 1 self.logger.error( 'Error while getting videos for channel {}: {} Attempting {} more times.' .format(channel_id, repr(err), fetch_attempts)) if not fetch_attempts: self.logger.error( 'Failed to get videos for {}'.format(channel_id)) continue video_list = [] limit_reached = False while request and not limit_reached: for v in response['items']: video_id = v['contentDetails']['videoId'] published_at = self.parse_date( v['contentDetails']['videoPublishedAt']) if self.now - published_at > self.limit: limit_reached = True break try: db_id_query = \ db.select('video', 'video_id', where=['yt_id LIKE "{}"'.format(video_id)]) if not db_id_query: request, response = api.list( 'videos', **{ 'part': 'snippet,contentDetails', 'id': video_id }) try: snippet = response['items'][0]['snippet'] content_details = response['items'][0][ 'contentDetails'] video_query = { 'yt_id': video_id, 'title': snippet['title'], 'description': snippet['description'], 'channel_id': channel_dbid, 'length_seconds': int( parse_duration( content_details['duration']). total_seconds()), 'published_at': self.parse_date( snippet['publishedAt'], return_datetime=True).strftime( '%Y-%m-%d %H:%M:%S') } video_dbid = db.insert('video', video_query) self.db_ids[video_id] = video_dbid except KeyError as err: self.logger.error( 'KeyError while getting video info: {}'. format(repr(err))) continue else: video_dbid = db_id_query[0]['video_id'] self.db_ids[video_id] = video_dbid except Exception as err: self.logger.error( 'Error while getting video data: {}'.format( repr(err))) continue video_list.append(video_id) collect_attempts = 3 while collect_attempts: try: self.collect_videos(api, video_list) break except Exception as err: collect_attempts -= 1 self.logger.error( 'Failed to collect videos from channel {}: ' '{} Attempting {} more times.'.format( channel_id, repr(err), collect_attempts)) finally: video_list.clear() if not collect_attempts: self.logger.info( 'Could not collect videos from channel: {}'.format( channel_id)) if not limit_reached: execute_attempts = 3 while execute_attempts: try: request, response = api.list_next( 'playlistItems', request, response) break except Exception as err: execute_attempts -= 1 self.logger.error( 'Video fetch response error: {} Attempting {} more time(s)' .format(' '.join(err.args), execute_attempts)) continue if not execute_attempts: break queue_attempts = 3 self.logger.info('Finished execution for Thread {}'.format( get_ident()))