def update_channels(reporter: ProgressReporter, link: str = None): """Update all information for each channel. (No downloads performed)""" with get_db_context() as (engine, session): if session.query(Channel).count() == 0: raise UnknownChannel('No channels exist yet') if link: try: channel = session.query(Channel).filter_by(link=link).one() except NoResultFound: raise UnknownChannel(f'No channel with link: {link}') channels = [channel, ] else: channels = session.query(Channel).filter( Channel.url != None, # noqa Channel.url != '', or_( Channel.next_download == None, # noqa Channel.next_download <= today(), ) ).all() if len(channels) == 0: logger.warning(f'All channels are up to date') reporter.set_progress_total(0, len(channels)) reporter.send_progress(0, 0, f'{len(channels)} channels scheduled for update') # Randomize downloading of channels. shuffle(channels) logger.debug(f'Getting info for {len(channels)} channels') for idx, channel in enumerate(channels): reporter.send_progress(0, idx, f'Getting video list for {channel.name}') try: update_channel(channel) except Exception: logger.critical('Unable to fetch channel videos', exc_info=True) continue if channels: reporter.send_progress(0, len(channels), 'Done downloading video lists') else: reporter.finish(0, 'Done downloading video lists')
def process_captions(video: Video): """ Parse and insert captions for a video record. """ caption_path = get_absolute_video_caption(video) if not caption_path.exists(): raise UnknownCaptionFile() try: lines = get_unique_caption_lines(str(caption_path)) block = '\n'.join(lines) video.caption = block except UnicodeDecodeError: # Failed to decode the caption file # TODO handle this error logger.debug(f'Failed to decode caption file {caption_path}') except webvtt.errors.MalformedFileError: # File format is broken logger.debug(f'Failed to parse caption file {caption_path}')
async def main(): parser = argparse.ArgumentParser() parser.add_argument('-v', '--verbose', action='count') parser.add_argument('--verify-config', action='store_true', default=False, help='Verify the local.yaml, then exit.') sub_commands = parser.add_subparsers(title='sub-commands', dest='sub_commands') # API parser is always present api_parser = sub_commands.add_parser('api') # This dict keeps track of which main will be called for each sub-command api.init_parser(api_parser) # DB Parser for running Alembic migrations db_parser = sub_commands.add_parser('db') db_parser.add_argument('command') choices_to_mains = { 'api': api.main, } # Setup the modules' sub-commands for module_name, module in MODULES.items(): module.init_parser(sub_commands) update_choices_to_mains(sub_commands, choices_to_mains, module.main.main) args = parser.parse_args() if args.verify_config: verify_config() print('Config verified') return 0 if args.verbose == 1: logger.info('Setting verbosity to INFO') logger.setLevel(logging.INFO) elif args.verbose and args.verbose >= 2: logger.debug('Setting verbosity to DEBUG') logger.setLevel(logging.DEBUG) # Always warn about the log level so we know what will be logged logger.warning(f'Logging level: {logger.getEffectiveLevel()}') # Run DB migrations before anything else, if requested. if args.sub_commands == 'db': return db_main(args) import_settings_configs(MODULES) if args.sub_commands: module_main = choices_to_mains[args.sub_commands] return_code = module_main(args) else: parser.print_help() return_code = 1 return return_code
def upsert_video(session: Session, video_path: pathlib.Path, channel: Channel, idempotency: str = None, skip_captions=False, id_: str = None) -> Video: """ Insert a video into the DB. Also, find any meta-files near the video file and store them on the video row. If id_ is provided, update that entry. """ channel_dir = get_absolute_media_path(channel.directory) poster_path, description_path, caption_path, info_json_path = find_meta_files(video_path, relative_to=channel_dir) # Video paths should be relative to the channel's directory if video_path.is_absolute(): video_path = video_path.relative_to(channel_dir) name_match = NAME_PARSER.match(video_path.name) _ = upload_date = source_id = title = ext = None if name_match: _, upload_date, source_id, title, ext = name_match.groups() # Make sure the date is a valid date format, if not, leave it blank. Youtube-DL sometimes puts an NA in the date. # We may even get videos that weren't downloaded by WROLPi. if not upload_date or not upload_date.isdigit() or len(upload_date) != 8: logger.debug(f'Could not parse date from filename: {video_path}') upload_date = None duration = None if info_json_path: path = (channel_dir / info_json_path).absolute() try: with open(path) as fh: json_contents = json.load(fh) duration = json_contents['duration'] except json.decoder.JSONDecodeError: logger.warning(f'Failed to load JSON file to get duration: {path}') video_dict = dict( channel_id=channel.id, description_path=str(description_path) if description_path else None, ext=ext, poster_path=str(poster_path) if poster_path else None, source_id=source_id, title=title, upload_date=upload_date, video_path=str(video_path), caption_path=str(caption_path) if caption_path else None, idempotency=idempotency, info_json_path=str(info_json_path) if info_json_path else None, duration=duration, ) if id_: video = session.query(Video).filter(id=id_).one() video.update(video_dict) else: video = Video(**video_dict) session.add(video) session.flush() if skip_captions is False and caption_path: # Process captions only when requested process_captions(video) return video
async def insert_bulk_captions(video_ids: List[int]): with get_db_context(commit=True) as (engine, session): for idx, video_id in enumerate(video_ids): video = session.query(Video).filter_by(id=video_id).one() process_captions(video) logger.debug(f'Inserted {len(video_ids)} captions')