Esempio n. 1
0
def scrapevideo(ctx, quiet, save, video_url):
    """Fetches metadata for a video from a site."""
    if not quiet:
        click.echo(VERSION)

    data = scrape_video(video_url)[0]
    if save:
        cfg = get_project_config()

        jsonpath = cfg.get('project', 'jsonpath')

        if not os.path.exists(jsonpath):
            os.makedirs(jsonpath)

        fn = generate_filename(data['title']) + '.json'
        full_path = os.path.join(jsonpath, fn)

        if os.path.exists(fn):
            raise click.ClickException(u'File "%s" already exists!' % fn)

        with open(full_path, 'w') as fp:
            fp.write(convert_to_json(data))
        click.echo(u'Saved as {0}'.format(fn))

    else:
        click.echo(convert_to_json(data))
Esempio n. 2
0
def scrapevideo_cmd(parser, parsed, args):
    if not parsed.quiet:
        parser.print_byline()

    video_url = parsed.video[0]
    data = scrapevideo(video_url, parsed.richard, 'object')
    if parsed.save:
        cfg = get_project_config()

        projectpath = cfg.get('project', 'projectpath')
        jsonpath = os.path.join(projectpath, 'json')

        if not os.path.exists(jsonpath):
            os.makedirs(jsonpath)

        fn = 'json/' + generate_filename(data['title']) + '.json'

        if os.path.exists(fn):
            err('It already exists!')
            return 1

        with open(fn, 'w') as fp:
            fp.write(convert_to_json(data))
        print 'Saved as {0}'.format(fn)
    else:
        print convert_to_json(data)
    return 0
Esempio n. 3
0
def scrapevideo_cmd(parser, parsed, args):
    if not parsed.quiet:
        parser.print_byline()

    video_url = parsed.video[0]
    data = scrapevideo(video_url, parsed.richard, 'object')
    if parsed.save:
        cfg = get_project_config()

        projectpath = cfg.get('project', 'projectpath')
        jsonpath = os.path.join(projectpath, 'json')

        if not os.path.exists(jsonpath):
            os.makedirs(jsonpath)

        fn = 'json/' + generate_filename(data['title']) + '.json'

        if os.path.exists(fn):
            err('It already exists!')
            return 1

        with open(fn, 'w') as fp:
            fp.write(convert_to_json(data))
        print 'Saved as {0}'.format(fn)
    else:
        print convert_to_json(data)
    return 0
Esempio n. 4
0
def scrapevideo(ctx, quiet, save, video_url):
    """Fetches metadata for a video from a site."""
    if not quiet:
        click.echo(VERSION)

    data = scrape_video(video_url)[0]
    if save:
        cfg = get_project_config()

        jsonpath = cfg.get('project', 'jsonpath')

        if not os.path.exists(jsonpath):
            os.makedirs(jsonpath)

        fn = generate_filename(data['title']) + '.json'
        full_path = os.path.join(jsonpath, fn)

        if os.path.exists(fn):
            raise click.ClickException(u'File "%s" already exists!' % fn)

        with open(full_path, 'w') as fp:
            fp.write(convert_to_json(data))
        click.echo(u'Saved as {0}'.format(fn))

    else:
        click.echo(convert_to_json(data))
Esempio n. 5
0
def fetch_cmd(cfg, parser, parsed, args):
    if not parsed.quiet:
        parser.print_byline()

    projectpath = cfg.get('project', 'projectpath')
    jsonpath = os.path.join(projectpath, 'json')

    # source_url -> filename
    source_map = dict(
        (item['source_url'], fn)
        for fn, item in load_json_files(cfg)
    )

    if not os.path.exists(jsonpath):
        os.makedirs(jsonpath)

    try:
        url = cfg.get('project', 'url')
    except ConfigParser.NoOptionError:
        url = ''

    if not url:
        err('url not specified in steve.ini project config file.')
        err('Add "url = ..." to [project] section of steve.ini file.')
        return 1

    if 'youtube' in url:
        try:
            youtube_embed = YOUTUBE_EMBED[cfg.get('project', 'youtube_embed')]
        except KeyError:
            err('youtube_embed must be either "iframe" or "object".')
            return 1
    else:
        youtube_embed = None

    out('Scraping {0}...'.format(url))
    videos = fetch_videos_from_url(url, youtube_embed)

    print 'Found {0} videos...'.format(len(videos))
    for i, video in enumerate(videos):
        if video['source_url'] in source_map and not parsed.force:
            print 'Skipping {0}... already exists.'.format(
                stringify(video['title']))
            continue

        filename = generate_filename(video['title'])
        filename = '{index:04d}_{basename}.json'.format(
            index=i, basename=filename[:40])

        print 'Working on {0}... ({1})'.format(
            stringify(video['title']), filename)

        f = open(os.path.join('json', filename), 'w')
        f.write(convert_to_json(video))
        f.close()

        # TODO: what if there's a file there already? on the first one,
        # prompt the user whether to stomp on existing files or skip.
    return 0
Esempio n. 6
0
def fetch_cmd(cfg, parser, parsed, args):
    if not parsed.quiet:
        parser.print_byline()

    projectpath = cfg.get('project', 'projectpath')
    jsonpath = os.path.join(projectpath, 'json')

    # source_url -> filename
    source_map = dict(
        (item['source_url'], fn) for fn, item in load_json_files(cfg))

    if not os.path.exists(jsonpath):
        os.makedirs(jsonpath)

    try:
        url = cfg.get('project', 'url')
    except ConfigParser.NoOptionError:
        url = ''

    if not url:
        err('url not specified in steve.ini project config file.')
        err('Add "url = ..." to [project] section of steve.ini file.')
        return 1

    if 'youtube' in url:
        try:
            youtube_embed = YOUTUBE_EMBED[cfg.get('project', 'youtube_embed')]
        except KeyError:
            err('youtube_embed must be either "iframe" or "object".')
            return 1
    else:
        youtube_embed = None

    out('Scraping {0}...'.format(url))
    videos = fetch_videos_from_url(url, youtube_embed)

    print 'Found {0} videos...'.format(len(videos))
    for i, video in enumerate(videos):
        if video['source_url'] in source_map and not parsed.force:
            print 'Skipping {0}... already exists.'.format(
                stringify(video['title']))
            continue

        filename = generate_filename(video['title'])
        filename = '{index:04d}_{basename}.json'.format(index=i,
                                                        basename=filename[:40])

        print 'Working on {0}... ({1})'.format(stringify(video['title']),
                                               filename)

        f = open(os.path.join('json', filename), 'w')
        f.write(convert_to_json(video))
        f.close()

        # TODO: what if there's a file there already? on the first one,
        # prompt the user whether to stomp on existing files or skip.
    return 0
Esempio n. 7
0
def fetch_cmd(cfg, parser, parsed, args):
    if not parsed.quiet:
        parser.print_byline()

    projectpath = cfg.get('project', 'projectpath')
    jsonpath = os.path.join(projectpath, 'json')

    if not os.path.exists(jsonpath):
        os.makedirs(jsonpath)

    try:
        url = cfg.get('project', 'url')
    except ConfigParser.NoOptionError:
        url = ''

    if not url:
        err('url not specified in steve.ini project config file.')
        err('Add "url = ..." to [project] section of steve.ini file.')
        return 1

    if 'youtube' in url:
        try:
            youtube_embed = YOUTUBE_EMBED[cfg.get('project', 'youtube_embed')]
        except KeyError:
            err('youtube_embed must be either "iframe" or "object".')
            return 1
    else:
        youtube_embed = None

    out('Scraping {0}...'.format(url))
    video_feed = vidscraper.auto_feed(url)
    video_feed.load()

    print 'Found {0} videos...'.format(video_feed.video_count)
    for i, video in enumerate(video_feed):
        filename = generate_filename(video.title or '')
        filename = '{index:04d}_{basename}.json'.format(
            index=i, basename=filename[:40])

        print 'Working on {0}... ({1})'.format(
            unicodedata.normalize('NFKD', video.title).encode(
                'ascii', 'ignore'),
            filename)
        item = vidscraper_to_dict(video, youtube_embed=youtube_embed)

        f = open(os.path.join('json', filename), 'w')
        f.write(convert_to_json(item))
        f.close()

        # TODO: what if there's a file there already? on the first one,
        # prompt the user whether to stomp on existing files or skip.
    return 0
Esempio n. 8
0
def fetch(cfg, ctx, quiet, force):
    """Fetches videos and generates JSON files."""
    if not quiet:
        click.echo(VERSION)

    jsonpath = cfg.get('project', 'jsonpath')

    # source_url -> filename
    source_map = dict(
        (item['source_url'], fn)
        for fn, item in load_json_files(cfg)
    )

    if not os.path.exists(jsonpath):
        os.makedirs(jsonpath)

    try:
        url = cfg.get('project', 'url')
    except NoOptionError:
        url = ''

    if not url:
        raise click.ClickException(
            u'url not specified in {0} project config file.\n\n'
            u'Add "url = ..." to [project] section of {0} file.'.format(
                get_project_config_file_name())
        )

    click.echo(u'Scraping {0}...'.format(url))
    click.echo(u'(This can take a *long* time with no indication of progress.)')
    videos = scrape_videos(url)

    click.echo(u'Found {0} videos...'.format(len(videos)))
    for i, video in enumerate(videos):
        if video['source_url'] in source_map and not force:
            click.echo(u'Skipping {0}... already exists.'.format(
                stringify(video['title'])))
            continue

        filename = generate_filename(video['title'])
        filename = '{index:04d}_{basename}.json'.format(
            index=i, basename=filename[:40])

        click.echo(u'Created {0}... ({1})'.format(
            stringify(video['title']), filename))

        with open(os.path.join(jsonpath, filename), 'w') as fp:
            fp.write(convert_to_json(video))
Esempio n. 9
0
def fetch(cfg, ctx, quiet, force):
    """Fetches videos and generates JSON files."""
    if not quiet:
        click.echo(VERSION)

    jsonpath = cfg.get('project', 'jsonpath')

    # source_url -> filename
    source_map = dict(
        (item['source_url'], fn)
        for fn, item in load_json_files(cfg)
    )

    if not os.path.exists(jsonpath):
        os.makedirs(jsonpath)

    try:
        url = cfg.get('project', 'url')
    except ConfigParser.NoOptionError:
        url = ''

    if not url:
        raise click.ClickException(
            u'url not specified in {0} project config file.\n\n'
            u'Add "url = ..." to [project] section of {0} file.'.format(
                get_project_config_file_name())
        )

    click.echo(u'Scraping {0}...'.format(url))
    click.echo(u'(This can take a *long* time with no indication of progress.)')
    videos = scrape_videos(url)

    click.echo(u'Found {0} videos...'.format(len(videos)))
    for i, video in enumerate(videos):
        if video['source_url'] in source_map and not force:
            click.echo(u'Skipping {0}... already exists.'.format(
                stringify(video['title'])))
            continue

        filename = generate_filename(video['title'])
        filename = '{index:04d}_{basename}.json'.format(
            index=i, basename=filename[:40])

        click.echo(u'Created {0}... ({1})'.format(
            stringify(video['title']), filename))

        with open(os.path.join(jsonpath, filename), 'w') as fp:
            fp.write(convert_to_json(video))