Esempio n. 1
0
    TOTAL = DOWNLOADED = ERRORS = SKIPPED = FAILED = 0
    FINISHED = False

    # Create the specified directory if it doesn't already exist.
    if not pathexists(ARGS.dir):
        mkdir(ARGS.dir)

    # If a regex has been specified, compile the rule (once)
    RE_RULE = None
    if ARGS.regex:
        RE_RULE = re.compile(ARGS.regex)

    LAST = ARGS.last

    while not FINISHED:
        ITEMS = getitems(ARGS.reddit, LAST)
        if not ITEMS:
            # No more items to process
            break

        for ITEM in ITEMS:
            TOTAL += 1

            if ITEM['score'] < ARGS.score:
                if ARGS.verbose:
                    print '    SCORE: %s has score of %s which is lower than required score of %s.' % (
                        ITEM['id'], ITEM['score'], ARGS.score)

                SKIPPED += 1
                continue
            elif ARGS.sfw and ITEM['over_18']:
    TOTAL = DOWNLOADED = ERRORS = SKIPPED = FAILED = 0
    FINISHED = False

    # Create the specified directory if it doesn't already exist.
    if not pathexists(ARGS.dir):
        mkdir(ARGS.dir)

    # If a regex has been specified, compile the rule (once)
    RE_RULE = None
    if ARGS.regex:
        RE_RULE = re.compile(ARGS.regex)

    LAST = ARGS.last

    while not FINISHED:
        ITEMS = getitems(ARGS.reddit, LAST)
        if not ITEMS:
            # No more items to process
            break

        for ITEM in ITEMS:
            TOTAL += 1

            if ITEM['score'] < ARGS.score:
                if ARGS.verbose:
                    print '    SCORE: %s has score of %s which is lower than required score of %s.' % (ITEM['id'], ITEM['score'], ARGS.score)

                SKIPPED += 1
                continue
            elif ARGS.sfw and ITEM['over_18']:
                if ARGS.verbose:
Esempio n. 3
0
def main():
    p = argparse.ArgumentParser(
        description=('Downloads files with specified extension from the '
                     'specified subreddit.'))

    p.add_argument('reddit', metavar='<subreddit>', 
        help='Subreddit name.')
    p.add_argument('dir', metavar='<destdir>', 
        help='Dir to put downloaded files in.')
    p.add_argument('--last',
        metavar='ID', default='', required=False, 
        help='ID of the last downloaded file.')
    p.add_argument('--score',
        metavar='score', default=0, type=int, required=False, 
        help='Minimum score of images to download.')
    p.add_argument('--num',
        metavar='count', default=0, type=int, required=False, 
        help='Number of images to download.')
    p.add_argument('--update',
        default=False, action='store_true',
        required=False, 
        help='Run until you encounter a file already downloaded.')
    p.add_argument('--sfw',
        default=False, action='store_true', required=False, 
        help='Download safe for work images only.')
    p.add_argument('--nsfw',
        default=False, action='store_true', required=False, 
        help='Download NSFW images only.')
    p.add_argument('--regex',
        default=None, action='store', required=False, 
        help='Use Python regex to filter based on title.')
    p.add_argument('--verbose',
        default=False, action='store_true', required=False, 
        help='Enable verbose output.')

    args = p.parse_args()

    print 'Downloading images from "%s" subreddit' % (args.reddit)

    nTotal = nDownloaded = nErrors = nSkipped = nFailed = 0
    bFinished = False

    # Create the specified directory if it doesn't already exist.
    if not os.path.exists(args.dir):
        os.mkdir(args.dir)

    # If a regex has been specified, compile the rule (once)
    reRule = None
    if args.regex:
        reRule = re.compile(args.regex)

    lastId = args.last

    while not bFinished:
        postings = reddit.getitems(args.reddit, lastId)
        if not postings:
            # No more items to process
            break

        for post in postings:
            nTotal += 1

            if post['score'] < args.score:
                if args.verbose:
                    print ('    SCORE: %s has score of %s which is lower'
                           ' than required score of %s.' 
                           % (post['id'], post['score'], args.score))

                nSkipped += 1
                continue
            elif args.sfw and post['over_18']:
                if args.verbose:
                    print '    NSFW: %s is marked as NSFW.' % (post['id'])

                nSkipped += 1
                continue
            elif args.nsfw and not post['over_18']:
                if args.verbose:
                    print '    Not NSFW, skipping %s' % (post['id'])

                nSkipped += 1
                continue
            elif args.regex and not re.match(reRule, post['title']):
                if args.verbose:
                    print '    Regex match failed'

                nSkipped += 1
                continue

            for url in _extractUrls(post['url']):
                try:
                    _downloadFromUrl(url, args.dir)

                    # Image downloaded successfully!
                    print '    Downloaded URL [%s].' % (url)
                    nDownloaded += 1

                    if args.num > 0 and nDownloaded >= args.num:
                        bFinished = True
                        break
                except WrongFileTypeException as error:
                    print '    %s' % (error)
                    nSkipped += 1
                except FileExistsException as error:
                    print '    %s' % (error)
                    nErrors += 1
                    if args.update:
                        print '    Update complete, exiting.'
                        bFinished = True
                        break
                except urllib2.HTTPError as error:
                    print ('    HTTP error: Code %s for %s.' % 
                           (error.code, url))
                    nFailed += 1
                except urllib2.URLError as error:
                    print '    URL error: %s!' % (url)
                    nFailed += 1
                except httplib.InvalidURL as error:
                    print '    Invalid URL: %s!' % (url)
                    nFailed += 1

            if bFinished:
                break

        lastId = post['id']

    print ('Downloaded %d files (Processed %d, Skipped %d, Exists %d)' % 
           (nDownloaded, nTotal, nSkipped, nErrors))
    TOTAL = DOWNLOADED = ERRORS = SKIPPED = FAILED = 0
    FINISHED = False

    # Create the specified directory if it doesn't already exist.
    if not pathexists(ARGS.dir):
        mkdir(ARGS.dir)

    # If a regex has been specified, compile the rule (once)
    RE_RULE = None
    if ARGS.regex:
        RE_RULE = re.compile(ARGS.regex)

    LAST = ARGS.last

    while not FINISHED:
        ITEMS = getitems(ARGS.reddit, ARGS.multireddit, LAST)
        if not ITEMS:
            # No more items to process
            break

        for ITEM in ITEMS:
            TOTAL += 1

            if ITEM['score'] < ARGS.score:
                if ARGS.verbose:
                    print '    SCORE: %s has score of %s which is lower than required score of %s.' % (ITEM['id'], ITEM['score'], ARGS.score)

                SKIPPED += 1
                continue
            elif ARGS.sfw and ITEM['over_18']:
                if ARGS.verbose:
Esempio n. 5
0
def main():
    p = argparse.ArgumentParser(
        description=('Downloads files with specified extension from the '
                     'specified subreddit.'))

    p.add_argument('reddit', metavar='<subreddit>', help='Subreddit name.')
    p.add_argument('dir',
                   metavar='<destdir>',
                   help='Dir to put downloaded files in.')
    p.add_argument('--last',
                   metavar='ID',
                   default='',
                   required=False,
                   help='ID of the last downloaded file.')
    p.add_argument('--score',
                   metavar='score',
                   default=0,
                   type=int,
                   required=False,
                   help='Minimum score of images to download.')
    p.add_argument('--num',
                   metavar='count',
                   default=0,
                   type=int,
                   required=False,
                   help='Number of images to download.')
    p.add_argument('--update',
                   default=False,
                   action='store_true',
                   required=False,
                   help='Run until you encounter a file already downloaded.')
    p.add_argument('--sfw',
                   default=False,
                   action='store_true',
                   required=False,
                   help='Download safe for work images only.')
    p.add_argument('--nsfw',
                   default=False,
                   action='store_true',
                   required=False,
                   help='Download NSFW images only.')
    p.add_argument('--regex',
                   default=None,
                   action='store',
                   required=False,
                   help='Use Python regex to filter based on title.')
    p.add_argument('--verbose',
                   default=False,
                   action='store_true',
                   required=False,
                   help='Enable verbose output.')

    args = p.parse_args()

    print 'Downloading images from "%s" subreddit' % (args.reddit)

    nTotal = nDownloaded = nErrors = nSkipped = nFailed = 0
    bFinished = False

    # Create the specified directory if it doesn't already exist.
    if not os.path.exists(args.dir):
        os.mkdir(args.dir)

    # If a regex has been specified, compile the rule (once)
    reRule = None
    if args.regex:
        reRule = re.compile(args.regex)

    lastId = args.last

    while not bFinished:
        postings = reddit.getitems(args.reddit, lastId)
        if not postings:
            # No more items to process
            break

        for post in postings:
            nTotal += 1

            if post['score'] < args.score:
                if args.verbose:
                    print(
                        '    SCORE: %s has score of %s which is lower'
                        ' than required score of %s.' %
                        (post['id'], post['score'], args.score))

                nSkipped += 1
                continue
            elif args.sfw and post['over_18']:
                if args.verbose:
                    print '    NSFW: %s is marked as NSFW.' % (post['id'])

                nSkipped += 1
                continue
            elif args.nsfw and not post['over_18']:
                if args.verbose:
                    print '    Not NSFW, skipping %s' % (post['id'])

                nSkipped += 1
                continue
            elif args.regex and not re.match(reRule, post['title']):
                if args.verbose:
                    print '    Regex match failed'

                nSkipped += 1
                continue

            for url in _extractUrls(post['url']):
                try:
                    _downloadFromUrl(url, args.dir)

                    # Image downloaded successfully!
                    print '    Downloaded URL [%s].' % (url)
                    nDownloaded += 1

                    if args.num > 0 and nDownloaded >= args.num:
                        bFinished = True
                        break
                except WrongFileTypeException as error:
                    print '    %s' % (error)
                    nSkipped += 1
                except FileExistsException as error:
                    print '    %s' % (error)
                    nErrors += 1
                    if args.update:
                        print '    Update complete, exiting.'
                        bFinished = True
                        break
                except urllib2.HTTPError as error:
                    print('    HTTP error: Code %s for %s.' %
                          (error.code, url))
                    nFailed += 1
                except urllib2.URLError as error:
                    print '    URL error: %s!' % (url)
                    nFailed += 1
                except httplib.InvalidURL as error:
                    print '    Invalid URL: %s!' % (url)
                    nFailed += 1

            if bFinished:
                break

        lastId = post['id']

    print('Downloaded %d files (Processed %d, Skipped %d, Exists %d)' %
          (nDownloaded, nTotal, nSkipped, nErrors))