Beispiel #1
0
def update_channels(reporter: ProgressReporter, link: str = None):
    """Update all information for each channel.  (No downloads performed)"""

    with get_db_context() as (engine, session):
        if session.query(Channel).count() == 0:
            raise UnknownChannel('No channels exist yet')

        if link:
            try:
                channel = session.query(Channel).filter_by(link=link).one()
            except NoResultFound:
                raise UnknownChannel(f'No channel with link: {link}')
            channels = [channel, ]
        else:
            channels = session.query(Channel).filter(
                Channel.url != None,  # noqa
                Channel.url != '',
                or_(
                    Channel.next_download == None,  # noqa
                    Channel.next_download <= today(),
                )
            ).all()

        if len(channels) == 0:
            logger.warning(f'All channels are up to date')

    reporter.set_progress_total(0, len(channels))
    reporter.send_progress(0, 0, f'{len(channels)} channels scheduled for update')

    # Randomize downloading of channels.
    shuffle(channels)

    logger.debug(f'Getting info for {len(channels)} channels')
    for idx, channel in enumerate(channels):
        reporter.send_progress(0, idx, f'Getting video list for {channel.name}')
        try:
            update_channel(channel)
        except Exception:
            logger.critical('Unable to fetch channel videos', exc_info=True)
            continue

    if channels:
        reporter.send_progress(0, len(channels), 'Done downloading video lists')
    else:
        reporter.finish(0, 'Done downloading video lists')
Beispiel #2
0
def process_captions(video: Video):
    """
    Parse and insert captions for a video record.
    """
    caption_path = get_absolute_video_caption(video)
    if not caption_path.exists():
        raise UnknownCaptionFile()
    try:
        lines = get_unique_caption_lines(str(caption_path))
        block = '\n'.join(lines)
        video.caption = block
    except UnicodeDecodeError:
        # Failed to decode the caption file
        # TODO handle this error
        logger.debug(f'Failed to decode caption file {caption_path}')
    except webvtt.errors.MalformedFileError:
        # File format is broken
        logger.debug(f'Failed to parse caption file {caption_path}')
Beispiel #3
0
async def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-v', '--verbose', action='count')
    parser.add_argument('--verify-config',
                        action='store_true',
                        default=False,
                        help='Verify the local.yaml, then exit.')

    sub_commands = parser.add_subparsers(title='sub-commands',
                                         dest='sub_commands')

    # API parser is always present
    api_parser = sub_commands.add_parser('api')
    # This dict keeps track of which main will be called for each sub-command
    api.init_parser(api_parser)

    # DB Parser for running Alembic migrations
    db_parser = sub_commands.add_parser('db')
    db_parser.add_argument('command')

    choices_to_mains = {
        'api': api.main,
    }

    # Setup the modules' sub-commands
    for module_name, module in MODULES.items():
        module.init_parser(sub_commands)
        update_choices_to_mains(sub_commands, choices_to_mains,
                                module.main.main)

    args = parser.parse_args()

    if args.verify_config:
        verify_config()
        print('Config verified')
        return 0

    if args.verbose == 1:
        logger.info('Setting verbosity to INFO')
        logger.setLevel(logging.INFO)
    elif args.verbose and args.verbose >= 2:
        logger.debug('Setting verbosity to DEBUG')
        logger.setLevel(logging.DEBUG)

    # Always warn about the log level so we know what will be logged
    logger.warning(f'Logging level: {logger.getEffectiveLevel()}')

    # Run DB migrations before anything else, if requested.
    if args.sub_commands == 'db':
        return db_main(args)

    import_settings_configs(MODULES)

    if args.sub_commands:
        module_main = choices_to_mains[args.sub_commands]
        return_code = module_main(args)
    else:
        parser.print_help()
        return_code = 1

    return return_code
Beispiel #4
0
def upsert_video(session: Session, video_path: pathlib.Path, channel: Channel, idempotency: str = None,
                 skip_captions=False,
                 id_: str = None) -> Video:
    """
    Insert a video into the DB.  Also, find any meta-files near the video file and store them on the video row.

    If id_ is provided, update that entry.
    """
    channel_dir = get_absolute_media_path(channel.directory)
    poster_path, description_path, caption_path, info_json_path = find_meta_files(video_path, relative_to=channel_dir)

    # Video paths should be relative to the channel's directory
    if video_path.is_absolute():
        video_path = video_path.relative_to(channel_dir)

    name_match = NAME_PARSER.match(video_path.name)
    _ = upload_date = source_id = title = ext = None
    if name_match:
        _, upload_date, source_id, title, ext = name_match.groups()

    # Make sure the date is a valid date format, if not, leave it blank.  Youtube-DL sometimes puts an NA in the date.
    # We may even get videos that weren't downloaded by WROLPi.
    if not upload_date or not upload_date.isdigit() or len(upload_date) != 8:
        logger.debug(f'Could not parse date from filename: {video_path}')
        upload_date = None

    duration = None
    if info_json_path:
        path = (channel_dir / info_json_path).absolute()
        try:
            with open(path) as fh:
                json_contents = json.load(fh)
                duration = json_contents['duration']
        except json.decoder.JSONDecodeError:
            logger.warning(f'Failed to load JSON file to get duration: {path}')

    video_dict = dict(
        channel_id=channel.id,
        description_path=str(description_path) if description_path else None,
        ext=ext,
        poster_path=str(poster_path) if poster_path else None,
        source_id=source_id,
        title=title,
        upload_date=upload_date,
        video_path=str(video_path),
        caption_path=str(caption_path) if caption_path else None,
        idempotency=idempotency,
        info_json_path=str(info_json_path) if info_json_path else None,
        duration=duration,
    )

    if id_:
        video = session.query(Video).filter(id=id_).one()
        video.update(video_dict)
    else:
        video = Video(**video_dict)

    session.add(video)
    session.flush()

    if skip_captions is False and caption_path:
        # Process captions only when requested
        process_captions(video)

    return video
Beispiel #5
0
async def insert_bulk_captions(video_ids: List[int]):
    with get_db_context(commit=True) as (engine, session):
        for idx, video_id in enumerate(video_ids):
            video = session.query(Video).filter_by(id=video_id).one()
            process_captions(video)
    logger.debug(f'Inserted {len(video_ids)} captions')