TOTAL = DOWNLOADED = ERRORS = SKIPPED = FAILED = 0 FINISHED = False # Create the specified directory if it doesn't already exist. if not pathexists(ARGS.dir): mkdir(ARGS.dir) # If a regex has been specified, compile the rule (once) RE_RULE = None if ARGS.regex: RE_RULE = re.compile(ARGS.regex) LAST = ARGS.last while not FINISHED: ITEMS = getitems(ARGS.reddit, LAST) if not ITEMS: # No more items to process break for ITEM in ITEMS: TOTAL += 1 if ITEM['score'] < ARGS.score: if ARGS.verbose: print ' SCORE: %s has score of %s which is lower than required score of %s.' % ( ITEM['id'], ITEM['score'], ARGS.score) SKIPPED += 1 continue elif ARGS.sfw and ITEM['over_18']:
TOTAL = DOWNLOADED = ERRORS = SKIPPED = FAILED = 0 FINISHED = False # Create the specified directory if it doesn't already exist. if not pathexists(ARGS.dir): mkdir(ARGS.dir) # If a regex has been specified, compile the rule (once) RE_RULE = None if ARGS.regex: RE_RULE = re.compile(ARGS.regex) LAST = ARGS.last while not FINISHED: ITEMS = getitems(ARGS.reddit, LAST) if not ITEMS: # No more items to process break for ITEM in ITEMS: TOTAL += 1 if ITEM['score'] < ARGS.score: if ARGS.verbose: print ' SCORE: %s has score of %s which is lower than required score of %s.' % (ITEM['id'], ITEM['score'], ARGS.score) SKIPPED += 1 continue elif ARGS.sfw and ITEM['over_18']: if ARGS.verbose:
def main(): p = argparse.ArgumentParser( description=('Downloads files with specified extension from the ' 'specified subreddit.')) p.add_argument('reddit', metavar='<subreddit>', help='Subreddit name.') p.add_argument('dir', metavar='<destdir>', help='Dir to put downloaded files in.') p.add_argument('--last', metavar='ID', default='', required=False, help='ID of the last downloaded file.') p.add_argument('--score', metavar='score', default=0, type=int, required=False, help='Minimum score of images to download.') p.add_argument('--num', metavar='count', default=0, type=int, required=False, help='Number of images to download.') p.add_argument('--update', default=False, action='store_true', required=False, help='Run until you encounter a file already downloaded.') p.add_argument('--sfw', default=False, action='store_true', required=False, help='Download safe for work images only.') p.add_argument('--nsfw', default=False, action='store_true', required=False, help='Download NSFW images only.') p.add_argument('--regex', default=None, action='store', required=False, help='Use Python regex to filter based on title.') p.add_argument('--verbose', default=False, action='store_true', required=False, help='Enable verbose output.') args = p.parse_args() print 'Downloading images from "%s" subreddit' % (args.reddit) nTotal = nDownloaded = nErrors = nSkipped = nFailed = 0 bFinished = False # Create the specified directory if it doesn't already exist. if not os.path.exists(args.dir): os.mkdir(args.dir) # If a regex has been specified, compile the rule (once) reRule = None if args.regex: reRule = re.compile(args.regex) lastId = args.last while not bFinished: postings = reddit.getitems(args.reddit, lastId) if not postings: # No more items to process break for post in postings: nTotal += 1 if post['score'] < args.score: if args.verbose: print (' SCORE: %s has score of %s which is lower' ' than required score of %s.' % (post['id'], post['score'], args.score)) nSkipped += 1 continue elif args.sfw and post['over_18']: if args.verbose: print ' NSFW: %s is marked as NSFW.' % (post['id']) nSkipped += 1 continue elif args.nsfw and not post['over_18']: if args.verbose: print ' Not NSFW, skipping %s' % (post['id']) nSkipped += 1 continue elif args.regex and not re.match(reRule, post['title']): if args.verbose: print ' Regex match failed' nSkipped += 1 continue for url in _extractUrls(post['url']): try: _downloadFromUrl(url, args.dir) # Image downloaded successfully! print ' Downloaded URL [%s].' % (url) nDownloaded += 1 if args.num > 0 and nDownloaded >= args.num: bFinished = True break except WrongFileTypeException as error: print ' %s' % (error) nSkipped += 1 except FileExistsException as error: print ' %s' % (error) nErrors += 1 if args.update: print ' Update complete, exiting.' bFinished = True break except urllib2.HTTPError as error: print (' HTTP error: Code %s for %s.' % (error.code, url)) nFailed += 1 except urllib2.URLError as error: print ' URL error: %s!' % (url) nFailed += 1 except httplib.InvalidURL as error: print ' Invalid URL: %s!' % (url) nFailed += 1 if bFinished: break lastId = post['id'] print ('Downloaded %d files (Processed %d, Skipped %d, Exists %d)' % (nDownloaded, nTotal, nSkipped, nErrors))
TOTAL = DOWNLOADED = ERRORS = SKIPPED = FAILED = 0 FINISHED = False # Create the specified directory if it doesn't already exist. if not pathexists(ARGS.dir): mkdir(ARGS.dir) # If a regex has been specified, compile the rule (once) RE_RULE = None if ARGS.regex: RE_RULE = re.compile(ARGS.regex) LAST = ARGS.last while not FINISHED: ITEMS = getitems(ARGS.reddit, ARGS.multireddit, LAST) if not ITEMS: # No more items to process break for ITEM in ITEMS: TOTAL += 1 if ITEM['score'] < ARGS.score: if ARGS.verbose: print ' SCORE: %s has score of %s which is lower than required score of %s.' % (ITEM['id'], ITEM['score'], ARGS.score) SKIPPED += 1 continue elif ARGS.sfw and ITEM['over_18']: if ARGS.verbose:
def main(): p = argparse.ArgumentParser( description=('Downloads files with specified extension from the ' 'specified subreddit.')) p.add_argument('reddit', metavar='<subreddit>', help='Subreddit name.') p.add_argument('dir', metavar='<destdir>', help='Dir to put downloaded files in.') p.add_argument('--last', metavar='ID', default='', required=False, help='ID of the last downloaded file.') p.add_argument('--score', metavar='score', default=0, type=int, required=False, help='Minimum score of images to download.') p.add_argument('--num', metavar='count', default=0, type=int, required=False, help='Number of images to download.') p.add_argument('--update', default=False, action='store_true', required=False, help='Run until you encounter a file already downloaded.') p.add_argument('--sfw', default=False, action='store_true', required=False, help='Download safe for work images only.') p.add_argument('--nsfw', default=False, action='store_true', required=False, help='Download NSFW images only.') p.add_argument('--regex', default=None, action='store', required=False, help='Use Python regex to filter based on title.') p.add_argument('--verbose', default=False, action='store_true', required=False, help='Enable verbose output.') args = p.parse_args() print 'Downloading images from "%s" subreddit' % (args.reddit) nTotal = nDownloaded = nErrors = nSkipped = nFailed = 0 bFinished = False # Create the specified directory if it doesn't already exist. if not os.path.exists(args.dir): os.mkdir(args.dir) # If a regex has been specified, compile the rule (once) reRule = None if args.regex: reRule = re.compile(args.regex) lastId = args.last while not bFinished: postings = reddit.getitems(args.reddit, lastId) if not postings: # No more items to process break for post in postings: nTotal += 1 if post['score'] < args.score: if args.verbose: print( ' SCORE: %s has score of %s which is lower' ' than required score of %s.' % (post['id'], post['score'], args.score)) nSkipped += 1 continue elif args.sfw and post['over_18']: if args.verbose: print ' NSFW: %s is marked as NSFW.' % (post['id']) nSkipped += 1 continue elif args.nsfw and not post['over_18']: if args.verbose: print ' Not NSFW, skipping %s' % (post['id']) nSkipped += 1 continue elif args.regex and not re.match(reRule, post['title']): if args.verbose: print ' Regex match failed' nSkipped += 1 continue for url in _extractUrls(post['url']): try: _downloadFromUrl(url, args.dir) # Image downloaded successfully! print ' Downloaded URL [%s].' % (url) nDownloaded += 1 if args.num > 0 and nDownloaded >= args.num: bFinished = True break except WrongFileTypeException as error: print ' %s' % (error) nSkipped += 1 except FileExistsException as error: print ' %s' % (error) nErrors += 1 if args.update: print ' Update complete, exiting.' bFinished = True break except urllib2.HTTPError as error: print(' HTTP error: Code %s for %s.' % (error.code, url)) nFailed += 1 except urllib2.URLError as error: print ' URL error: %s!' % (url) nFailed += 1 except httplib.InvalidURL as error: print ' Invalid URL: %s!' % (url) nFailed += 1 if bFinished: break lastId = post['id'] print('Downloaded %d files (Processed %d, Skipped %d, Exists %d)' % (nDownloaded, nTotal, nSkipped, nErrors))