Beispiel #1
0
def fetch_cmd(cfg, parser, parsed, args):
    if not parsed.quiet:
        parser.print_byline()

    projectpath = cfg.get('project', 'projectpath')
    jsonpath = os.path.join(projectpath, 'json')

    # source_url -> filename
    source_map = dict(
        (item['source_url'], fn)
        for fn, item in load_json_files(cfg)
    )

    if not os.path.exists(jsonpath):
        os.makedirs(jsonpath)

    try:
        url = cfg.get('project', 'url')
    except ConfigParser.NoOptionError:
        url = ''

    if not url:
        err('url not specified in steve.ini project config file.')
        err('Add "url = ..." to [project] section of steve.ini file.')
        return 1

    out('Scraping {0}...'.format(url))
    videos = scrape_videos(url)

    print 'Found {0} videos...'.format(len(videos))
    for i, video in enumerate(videos):
        if video['source_url'] in source_map and not parsed.force:
            print 'Skipping {0}... already exists.'.format(
                stringify(video['title']))
            continue

        filename = generate_filename(video['title'])
        filename = '{index:04d}_{basename}.json'.format(
            index=i, basename=filename[:40])

        print 'Working on {0}... ({1})'.format(
            stringify(video['title']), filename)

        f = open(os.path.join('json', filename), 'w')
        f.write(convert_to_json(video))
        f.close()

        # TODO: what if there's a file there already? on the first one,
        # prompt the user whether to stomp on existing files or skip.
    return 0
Beispiel #2
0
def fetch(cfg, ctx, quiet, force):
    """Fetches videos and generates JSON files."""
    if not quiet:
        click.echo(VERSION)

    jsonpath = cfg.get('project', 'jsonpath')

    # source_url -> filename
    source_map = dict(
        (item['source_url'], fn)
        for fn, item in load_json_files(cfg)
    )

    if not os.path.exists(jsonpath):
        os.makedirs(jsonpath)

    try:
        url = cfg.get('project', 'url')
    except NoOptionError:
        url = ''

    if not url:
        raise click.ClickException(
            u'url not specified in {0} project config file.\n\n'
            u'Add "url = ..." to [project] section of {0} file.'.format(
                get_project_config_file_name())
        )

    click.echo(u'Scraping {0}...'.format(url))
    click.echo(u'(This can take a *long* time with no indication of progress.)')
    videos = scrape_videos(url)

    click.echo(u'Found {0} videos...'.format(len(videos)))
    for i, video in enumerate(videos):
        if video['source_url'] in source_map and not force:
            click.echo(u'Skipping {0}... already exists.'.format(
                stringify(video['title'])))
            continue

        filename = generate_filename(video['title'])
        filename = '{index:04d}_{basename}.json'.format(
            index=i, basename=filename[:40])

        click.echo(u'Created {0}... ({1})'.format(
            stringify(video['title']), filename))

        with open(os.path.join(jsonpath, filename), 'w') as fp:
            fp.write(convert_to_json(video))
Beispiel #3
0
def fetch(cfg, ctx, quiet, force):
    """Fetches videos and generates JSON files."""
    if not quiet:
        click.echo(VERSION)

    jsonpath = cfg.get('project', 'jsonpath')

    # source_url -> filename
    source_map = dict(
        (item['source_url'], fn)
        for fn, item in load_json_files(cfg)
    )

    if not os.path.exists(jsonpath):
        os.makedirs(jsonpath)

    try:
        url = cfg.get('project', 'url')
    except ConfigParser.NoOptionError:
        url = ''

    if not url:
        raise click.ClickException(
            u'url not specified in {0} project config file.\n\n'
            u'Add "url = ..." to [project] section of {0} file.'.format(
                get_project_config_file_name())
        )

    click.echo(u'Scraping {0}...'.format(url))
    click.echo(u'(This can take a *long* time with no indication of progress.)')
    videos = scrape_videos(url)

    click.echo(u'Found {0} videos...'.format(len(videos)))
    for i, video in enumerate(videos):
        if video['source_url'] in source_map and not force:
            click.echo(u'Skipping {0}... already exists.'.format(
                stringify(video['title'])))
            continue

        filename = generate_filename(video['title'])
        filename = '{index:04d}_{basename}.json'.format(
            index=i, basename=filename[:40])

        click.echo(u'Created {0}... ({1})'.format(
            stringify(video['title']), filename))

        with open(os.path.join(jsonpath, filename), 'w') as fp:
            fp.write(convert_to_json(video))