def scrapevideo(ctx, quiet, save, video_url): """Fetches metadata for a video from a site.""" if not quiet: click.echo(VERSION) data = scrape_video(video_url)[0] if save: cfg = get_project_config() jsonpath = cfg.get('project', 'jsonpath') if not os.path.exists(jsonpath): os.makedirs(jsonpath) fn = generate_filename(data['title']) + '.json' full_path = os.path.join(jsonpath, fn) if os.path.exists(fn): raise click.ClickException(u'File "%s" already exists!' % fn) with open(full_path, 'w') as fp: fp.write(convert_to_json(data)) click.echo(u'Saved as {0}'.format(fn)) else: click.echo(convert_to_json(data))
def scrapevideo_cmd(parser, parsed, args): if not parsed.quiet: parser.print_byline() video_url = parsed.video[0] data = scrapevideo(video_url, parsed.richard, 'object') if parsed.save: cfg = get_project_config() projectpath = cfg.get('project', 'projectpath') jsonpath = os.path.join(projectpath, 'json') if not os.path.exists(jsonpath): os.makedirs(jsonpath) fn = 'json/' + generate_filename(data['title']) + '.json' if os.path.exists(fn): err('It already exists!') return 1 with open(fn, 'w') as fp: fp.write(convert_to_json(data)) print 'Saved as {0}'.format(fn) else: print convert_to_json(data) return 0
def fetch_cmd(cfg, parser, parsed, args): if not parsed.quiet: parser.print_byline() projectpath = cfg.get('project', 'projectpath') jsonpath = os.path.join(projectpath, 'json') # source_url -> filename source_map = dict( (item['source_url'], fn) for fn, item in load_json_files(cfg) ) if not os.path.exists(jsonpath): os.makedirs(jsonpath) try: url = cfg.get('project', 'url') except ConfigParser.NoOptionError: url = '' if not url: err('url not specified in steve.ini project config file.') err('Add "url = ..." to [project] section of steve.ini file.') return 1 if 'youtube' in url: try: youtube_embed = YOUTUBE_EMBED[cfg.get('project', 'youtube_embed')] except KeyError: err('youtube_embed must be either "iframe" or "object".') return 1 else: youtube_embed = None out('Scraping {0}...'.format(url)) videos = fetch_videos_from_url(url, youtube_embed) print 'Found {0} videos...'.format(len(videos)) for i, video in enumerate(videos): if video['source_url'] in source_map and not parsed.force: print 'Skipping {0}... already exists.'.format( stringify(video['title'])) continue filename = generate_filename(video['title']) filename = '{index:04d}_{basename}.json'.format( index=i, basename=filename[:40]) print 'Working on {0}... ({1})'.format( stringify(video['title']), filename) f = open(os.path.join('json', filename), 'w') f.write(convert_to_json(video)) f.close() # TODO: what if there's a file there already? on the first one, # prompt the user whether to stomp on existing files or skip. return 0
def fetch_cmd(cfg, parser, parsed, args): if not parsed.quiet: parser.print_byline() projectpath = cfg.get('project', 'projectpath') jsonpath = os.path.join(projectpath, 'json') # source_url -> filename source_map = dict( (item['source_url'], fn) for fn, item in load_json_files(cfg)) if not os.path.exists(jsonpath): os.makedirs(jsonpath) try: url = cfg.get('project', 'url') except ConfigParser.NoOptionError: url = '' if not url: err('url not specified in steve.ini project config file.') err('Add "url = ..." to [project] section of steve.ini file.') return 1 if 'youtube' in url: try: youtube_embed = YOUTUBE_EMBED[cfg.get('project', 'youtube_embed')] except KeyError: err('youtube_embed must be either "iframe" or "object".') return 1 else: youtube_embed = None out('Scraping {0}...'.format(url)) videos = fetch_videos_from_url(url, youtube_embed) print 'Found {0} videos...'.format(len(videos)) for i, video in enumerate(videos): if video['source_url'] in source_map and not parsed.force: print 'Skipping {0}... already exists.'.format( stringify(video['title'])) continue filename = generate_filename(video['title']) filename = '{index:04d}_{basename}.json'.format(index=i, basename=filename[:40]) print 'Working on {0}... ({1})'.format(stringify(video['title']), filename) f = open(os.path.join('json', filename), 'w') f.write(convert_to_json(video)) f.close() # TODO: what if there's a file there already? on the first one, # prompt the user whether to stomp on existing files or skip. return 0
def fetch_cmd(cfg, parser, parsed, args): if not parsed.quiet: parser.print_byline() projectpath = cfg.get('project', 'projectpath') jsonpath = os.path.join(projectpath, 'json') if not os.path.exists(jsonpath): os.makedirs(jsonpath) try: url = cfg.get('project', 'url') except ConfigParser.NoOptionError: url = '' if not url: err('url not specified in steve.ini project config file.') err('Add "url = ..." to [project] section of steve.ini file.') return 1 if 'youtube' in url: try: youtube_embed = YOUTUBE_EMBED[cfg.get('project', 'youtube_embed')] except KeyError: err('youtube_embed must be either "iframe" or "object".') return 1 else: youtube_embed = None out('Scraping {0}...'.format(url)) video_feed = vidscraper.auto_feed(url) video_feed.load() print 'Found {0} videos...'.format(video_feed.video_count) for i, video in enumerate(video_feed): filename = generate_filename(video.title or '') filename = '{index:04d}_{basename}.json'.format( index=i, basename=filename[:40]) print 'Working on {0}... ({1})'.format( unicodedata.normalize('NFKD', video.title).encode( 'ascii', 'ignore'), filename) item = vidscraper_to_dict(video, youtube_embed=youtube_embed) f = open(os.path.join('json', filename), 'w') f.write(convert_to_json(item)) f.close() # TODO: what if there's a file there already? on the first one, # prompt the user whether to stomp on existing files or skip. return 0
def fetch(cfg, ctx, quiet, force): """Fetches videos and generates JSON files.""" if not quiet: click.echo(VERSION) jsonpath = cfg.get('project', 'jsonpath') # source_url -> filename source_map = dict( (item['source_url'], fn) for fn, item in load_json_files(cfg) ) if not os.path.exists(jsonpath): os.makedirs(jsonpath) try: url = cfg.get('project', 'url') except NoOptionError: url = '' if not url: raise click.ClickException( u'url not specified in {0} project config file.\n\n' u'Add "url = ..." to [project] section of {0} file.'.format( get_project_config_file_name()) ) click.echo(u'Scraping {0}...'.format(url)) click.echo(u'(This can take a *long* time with no indication of progress.)') videos = scrape_videos(url) click.echo(u'Found {0} videos...'.format(len(videos))) for i, video in enumerate(videos): if video['source_url'] in source_map and not force: click.echo(u'Skipping {0}... already exists.'.format( stringify(video['title']))) continue filename = generate_filename(video['title']) filename = '{index:04d}_{basename}.json'.format( index=i, basename=filename[:40]) click.echo(u'Created {0}... ({1})'.format( stringify(video['title']), filename)) with open(os.path.join(jsonpath, filename), 'w') as fp: fp.write(convert_to_json(video))
def fetch(cfg, ctx, quiet, force): """Fetches videos and generates JSON files.""" if not quiet: click.echo(VERSION) jsonpath = cfg.get('project', 'jsonpath') # source_url -> filename source_map = dict( (item['source_url'], fn) for fn, item in load_json_files(cfg) ) if not os.path.exists(jsonpath): os.makedirs(jsonpath) try: url = cfg.get('project', 'url') except ConfigParser.NoOptionError: url = '' if not url: raise click.ClickException( u'url not specified in {0} project config file.\n\n' u'Add "url = ..." to [project] section of {0} file.'.format( get_project_config_file_name()) ) click.echo(u'Scraping {0}...'.format(url)) click.echo(u'(This can take a *long* time with no indication of progress.)') videos = scrape_videos(url) click.echo(u'Found {0} videos...'.format(len(videos))) for i, video in enumerate(videos): if video['source_url'] in source_map and not force: click.echo(u'Skipping {0}... already exists.'.format( stringify(video['title']))) continue filename = generate_filename(video['title']) filename = '{index:04d}_{basename}.json'.format( index=i, basename=filename[:40]) click.echo(u'Created {0}... ({1})'.format( stringify(video['title']), filename)) with open(os.path.join(jsonpath, filename), 'w') as fp: fp.write(convert_to_json(video))