コード例 #1
0
ファイル: update_apple.py プロジェクト: Stamped/Stamped
def parseCommandLine():
    usage   = "Usage: %prog [options] query"
    version = "%prog " + __version__
    parser  = OptionParser(usage=usage, version=version)
    
    parser.add_option("-d", "--db", default=None, type="string", 
        action="store", dest="db", help="db to connect to")
    
    parser.add_option("-n", "--noop", default=False, action="store_true", 
        help="run in noop mode without modifying anything")
    
    parser.add_option("-v", "--verbose", default=False, action="store_true", 
        help="enable verbose logging")
    
    parser.add_option("-m", "--musiconly", default=False, action="store_true", 
        help="only parse music feeds")
    
    parser.add_option("-a", "--appsonly", default=False, action="store_true", 
        help="only parse app feeds")

    parser.add_option("-q", "--qps", default=2, type="int", dest="max_qps",
        action="store", help="max QPS sent to iTunes")
    
    (options, args) = parser.parse_args()
    
    if options.db:
        utils.init_db_config(options.db)
    
    return (options, args)
コード例 #2
0
ファイル: replace.py プロジェクト: Stamped/Stamped
def parseCommandLine():
    usage   = "Usage: %prog [options] one_or_more_entity_ids_to_delete entity_id_to_keep"
    version = "%prog " + __version__
    parser  = OptionParser(usage=usage, version=version)
    
    parser.add_option("-d", "--db", default=None, type="string", 
        action="store", dest="db", 
        help="db to connect to for output")
    
    parser.add_option("-n", "--noop", default=False, action="store_true", 
        help="run the dedupper in noop mode without modifying anything")
    
    parser.add_option("-v", "--verbose", default=False, action="store_true", 
        help="enable verbose logging")
    
    parser.add_option("-f", "--force", default=False, action="store_true", 
        help="force overriding of keys during deduping")
    
    (options, args) = parser.parse_args()
    
    if len(args) < 2:
        parser.print_help()
        sys.exit(1)
    
    if options.db:
        utils.init_db_config(options.db)
    
    return (options, args)
コード例 #3
0
ファイル: apple.py プロジェクト: Stamped/Stamped
def parseCommandLine():
    usage   = "Usage: %prog [options] [sources]"
    version = "%prog " + __version__
    parser  = OptionParser(usage=usage, version=version)
    
    parser.add_option("-d", "--db", default=None, type="string", 
        action="store", dest="db", 
        help="db to connect to for output")
    
    parser.add_option("-n", "--noop", default=False, action="store_true", 
        help="run the dedupper in noop mode without modifying anything")
    
    parser.add_option("-r", "--ratio", default=None, type="string", 
        action="store", dest="ratio", 
        help="where this crawler fits in to a distributed stack")
    
    parser.add_option("-o", "--offset", default=0, 
        type="int", dest="offset", 
        help="start index of entities to import")
    
    parser.add_option("-l", "--limit", default=None, type="int", 
        help="limits the number of entities to import")
    
    (options, args) = parser.parse_args()
    Globals.options = options
    
    options.verbose = False
    options.mount   = True
    
    if options.db:
        utils.init_db_config(options.db)
    
    options.album_popularity_per_genre = AppleEPFAlbumPopularityPerGenreRelationalDB()
    options.song_popularity_per_genre  = AppleEPFSongPopularityPerGenreRelationalDB()
    
    options.album_popularity_per_genre.start()
    options.song_popularity_per_genre.start()
    options.album_popularity_per_genre.join()
    options.song_popularity_per_genre.join()
    
    options.count0 = options.album_popularity_per_genre.execute('SELECT COUNT(*) FROM "%s"' % \
                                                                options.album_popularity_per_genre.table).fetchone()[0]
    options.count1 = options.song_popularity_per_genre.execute('SELECT COUNT(*) FROM "%s"' % \
                                                                options.song_popularity_per_genre.table).fetchone()[0]
    options.count = options.count0 + options.count1
    
    if options.ratio:
        num, den = options.ratio.split('/')
        num, den = int(num), int(den)
        num, den = float(num), float(den)
        
        options.offset = int(math.floor((options.count * (num - 1)) / den))
        options.limit  = int(math.ceil(options.count / den) + 1)
        
        utils.log("ratio %s) offset=%d, limit=%d" % (options.ratio, options.offset, options.limit))
    else:
        if options.limit is None:
            options.limit = options.count
    
    return options
コード例 #4
0
ファイル: checkdb.py プロジェクト: Stamped/Stamped
def parseCommandLine():
    usage   = "Usage: %prog [options] query"
    version = "%prog " + __version__
    parser  = OptionParser(usage=usage, version=version)
    
    parser.add_option("-d", "--db", default=None, type="string", 
        action="store", dest="db", help="db to connect to")
    
    parser.add_option("-n", "--noop", default=False, 
        action="store_true", help="noop mode (run read-only)")
    
    parser.add_option("-c", "--check", default=None, 
        action="store", help="optionally filter checks based off of their name")
    
    parser.add_option("-s", "--sampleSetSize", default=None, type="int", 
        action="store", help="sample size as a percentage (e.g., 5 for 5%)")
    
    (options, args) = parser.parse_args()
    
    if options.sampleSetSize is None:
        options.sampleSetRatio = 1.0
    else:
        options.sampleSetRatio = options.sampleSetSize / 100.0
    
    if options.db:
        utils.init_db_config(options.db)
    
    return (options, args)
コード例 #5
0
ファイル: EntitySearch.py プロジェクト: Stamped/Stamped
def parseCommandLine():
    usage   = "Usage: %prog [options] query"
    version = "%prog " + __version__
    parser  = OptionParser(usage=usage, version=version)
    
    parser.add_option("-d", "--db", default=None, type="string", action="store", 
        help="db to connect to (e.g., peach.db0; defaults to localhost)")
    
    parser.add_option("-l", "--limit", default=None, type="int", 
        help="limits the number of results to return")
    
    parser.add_option("-o", "--offset", default=0, type="int", 
        help="optional offset into results to support paging")
    
    parser.add_option("-L", "--Local", default=False, action="store_true", 
        help="enable local nearby search")
    
    parser.add_option("-a", "--a", default=None, type="string", 
        action="store", dest="location", help="location (lat/lng, e.g. '40.736,-73.989')")
    
    parser.add_option("-q", "--quick", default=False, action="store_true", 
        help="disable third party API queries")
    
    parser.add_option("-v", "--verbose", default=None, action="store_true", 
        help="turn verbosity on")
    
    parser.add_option("-c", "--category", default=None, type="string", 
        action="store", dest="category", 
        help="filters results by a given category")
    
    parser.add_option("-s", "--subcategory", default=None, type="string", 
        action="store", dest="subcategory", 
        help="filters results by a given subcategory")
    
    (options, args) = parser.parse_args()
    
    if len(args) <= 0:
        parser.print_help()
        sys.exit(1)
    
    if options.db:
        utils.init_db_config(options.db)
    
    if options.location:
        try:
            lat, lng = options.location.split(',')
            options.location = (float(lat), float(lng))
        except Exception:
            print "invalid location given '%s'" % options.location
            parser.print_help()
            sys.exit(1)
    
    if options.verbose is not None:
        global _verbose
        _verbose = options.verbose

    options.kinds, options.types = _convertCategorySubcategory(options.category, options.subcategory)
    
    return (options, args)
コード例 #6
0
ファイル: update_images.py プロジェクト: Stamped/Stamped
def parseCommandLine():
    usage   = "Usage: %prog [options] query"
    version = "%prog " + __version__
    parser  = OptionParser(usage=usage, version=version)
    
    parser.add_option("-d", "--db", default=None, type="string", 
        action="store", dest="db", help="db to connect to")
    
    (options, args) = parser.parse_args()
    
    if options.db:
        utils.init_db_config(options.db)
    
    return (options, args)
コード例 #7
0
ファイル: trie2.py プロジェクト: Stamped/Stamped
def parseCommandLine():
    usage   = "Usage: %prog [options] [sources]"
    version = "%prog " + __version__
    parser  = OptionParser(usage=usage, version=version)
    
    parser.add_option("-d", "--db", default=None, type="string", 
        action="store", dest="db", 
        help="db to connect to for output")
    
    parser.add_option("-n", "--noop", default=False, action="store_true", 
        help="run in noop mode without modifying anything")
    
    parser.add_option("-r", "--ratio", default=None, type="string", 
        action="store", dest="ratio", 
        help="where this crawler fits in to a distributed stack")
    
    parser.add_option("-o", "--offset", default=0, 
        type="int", dest="offset", 
        help="start index of entities to import")
    
    parser.add_option("-l", "--limit", default=None, type="int", 
        help="limits the number of entities to import")

    (options, args) = parser.parse_args()
    Globals.options = options
    
    if options.db:
        utils.init_db_config(options.db)
    
    infile = file('autocomplete.txt', 'r')
    options.count = utils.getNumLines(infile)
    infile.close()
    
    if options.ratio:
        num, den = options.ratio.split('/')
        num, den = int(num), int(den)
        num, den = float(num), float(den)
        
        options.offset = int(math.floor((options.count * (num - 1)) / den))
        options.limit  = int(math.ceil(options.count / den) + 1)
        
        utils.log("ratio %s) offset=%d, limit=%d" % (options.ratio, options.offset, options.limit))
    else:
        if options.limit is None:
            options.limit = options.count
    
    options.verbose = False
    return options
コード例 #8
0
ファイル: trie.py プロジェクト: Stamped/Stamped
def parseCommandLine():
    usage = "Usage: %prog [options] [sources]"
    version = "%prog " + __version__
    parser = OptionParser(usage=usage, version=version)

    parser.add_option(
        "-d", "--db", default=None, type="string", action="store", dest="db", help="db to connect to for output"
    )

    (options, args) = parser.parse_args()
    Globals.options = options

    if options.db:
        utils.init_db_config(options.db)

    options.verbose = False
    return options
コード例 #9
0
ファイル: apple2.py プロジェクト: Stamped/Stamped
def parseCommandLine():
    usage   = "Usage: %prog [options] [sources]"
    version = "%prog " + __version__
    parser  = OptionParser(usage=usage, version=version)
    
    parser.add_option("-d", "--db", default=None, type="string", 
        action="store", dest="db", 
        help="db to connect to for output")
    
    parser.add_option("-n", "--noop", default=False, action="store_true", 
        help="run the dedupper in noop mode without modifying anything")
    
    (options, args) = parser.parse_args()
    Globals.options = options
    
    if options.db:
        utils.init_db_config(options.db)
    
    return options
コード例 #10
0
ファイル: alerts.py プロジェクト: Stamped/Stamped
def parseCommandLine():
    usage   = "Usage: %prog [options] command [args]"
    version = "%prog " + __version__
    parser  = OptionParser(usage=usage, version=version)

    parser.add_option("-l", "--limit", dest="limit", 
        default=0, type="int", help="Limit number of records processed")
    
    parser.add_option("-n", "--noop", action="store_true", 
        default=False, help="don't make any actual changes or notifications")
    
    parser.add_option("-d", "--db", default=None, type="string", 
        action="store", dest="db", help="db to connect to for output")
    
    (options, args) = parser.parse_args()
    
    if options.db:
        utils.init_db_config(options.db)
    
    return options
コード例 #11
0
ファイル: users.py プロジェクト: Stamped/Stamped
def export():
    import argparse

    parser = argparse.ArgumentParser()

    parser.add_argument(
        "-D",
        "--drop",
        action="store_true",
        default=False,
        help="drop existing collections before performing any insertions",
    )
    parser.add_argument("-d", "--db", default=None, type=str, help="db to connect to")
    parser.add_argument(
        "-o",
        "--output_namespace",
        type=str,
        default="stamped.users",
        help=(
            "mongo db and collection namespace to store output to " "in dot-notation (e.g., defaults to stamped.users)"
        ),
    )
    parser.add_argument(
        "-s",
        "--state_namespace",
        type=str,
        default="local.elasticmongo",
        help=("mongo db and collection namespace to store elasticmongo " "mapping and index metadata"),
    )
    parser.add_argument("-v", "--version", action="version", version="%(prog)s " + __version__)

    args = parser.parse_args()

    if args.db:
        utils.init_db_config(args.db)

    api = MongoStampedAPI(lite_mode=True)
    conn = api._entityDB._collection._connection
    coll = __get_collection(conn, args.state_namespace)

    export_config(coll, args.output_namespace, args.drop)
コード例 #12
0
ファイル: dedupe.py プロジェクト: Stamped/Stamped
def parseCommandLine():
    usage   = "Usage: %prog [options]"
    version = "%prog " + __version__
    parser  = OptionParser(usage=usage, version=version)
    
    parser.add_option("-d", "--db", default=None, type="string", 
        action="store", dest="db", 
        help="db to connect to for output")
    
    parser.add_option("-s", "--seed", default=None, type="string", 
        action="store", dest="seed", 
        help="seed id to start with")
    
    parser.add_option("-n", "--noop", default=False, action="store_true", 
        help="run the dedupper in noop mode without modifying anything")
    
    parser.add_option("-p", "--place", default=False, action="store_true", 
        help="dedupe only place entities")
    
    parser.add_option("-P", "--nonplace", default=False, action="store_true", 
        help="dedupe only non-place entities")
    
    parser.add_option("-v", "--verbose", default=False, action="store_true", 
        help="enable verbose logging")
    
    (options, args) = parser.parse_args()
    
    if len(args) > 0:
        parser.print_help()
        sys.exit(1)
    
    if not (options.place or options.nonplace):
        options.place = True
        options.nonplace = True
    
    if options.db:
        utils.init_db_config(options.db)
    
    return (options, args)
コード例 #13
0
ファイル: match_apple_rdio.py プロジェクト: Stamped/Stamped
def parseCommandLine():
    usage   = "Usage: %prog [options] query"
    version = "%prog " + __version__
    parser  = OptionParser(usage=usage, version=version)
    
    parser.add_option("-d", "--db", default=None, type="string", 
        action="store", dest="db", help="db to connect to")
    
    parser.add_option("-n", "--noop", default=False, action="store_true", 
        help="run in noop mode without modifying anything")
    
    parser.add_option("-v", "--verbose", default=False, action="store_true", 
        help="enable verbose logging")
    
    parser.add_option("-l", "--limit", default=None, action="store", type="int", 
        help="limit number to convert")
    
    (options, args) = parser.parse_args()
    
    if options.db:
        utils.init_db_config(options.db)
    
    return (options, args)
コード例 #14
0
ファイル: crawler_main.py プロジェクト: Stamped/Stamped
def parseCommandLine():
    usage   = "Usage: %prog [options] [sources]"
    version = "%prog " + __version__
    parser  = OptionParser(usage=usage, version=version)
    
    parser.add_option("-a", "--all", action="store_true", dest="all", 
        default=False, help="crawl all available sources (defaults to true if no sources are specified)")
    
    parser.add_option("-o", "--offset", default=None, 
        type="int", dest="offset", 
        help="start index of entities to import")
    
    parser.add_option("-l", "--limit", default=None, type="int", 
        help="limits the number of entities to import")
    
    parser.add_option("-r", "--ratio", default=None, type="string", 
        action="store", dest="ratio", 
        help="where this crawler fits in to a distributed stack")
    
    parser.add_option("-s", "--sink", default=None, type="string", 
        action="store", dest="sink", 
        help="where to output to (test or mongodb)")
    
    parser.add_option("-t", "--test", default=False, 
        action="store_true", dest="test", 
        help="run the crawler with limited input for testing purposes")
    
    parser.add_option("-c", "--count", default=False, 
        action="store_true", dest="count", 
        help="print overall entity count from all sources specified and return")
    
    parser.add_option("-u", "--update", default=False, 
        action="store_true", dest="update", 
        help="update the existing collection as opposed to dropping it and " + 
        "overwriting any previous contents (the default)")
    
    parser.add_option("-g", "--geocode", default=False, 
        action="store_true", dest="geocode", 
        help="Geocode places to ensure all places have a valid lat/lng associated with them.")
    
    parser.add_option("-m", "--mount", default=False, 
        action="store_true", dest="mount", 
        help="mount crawler data directory if necessary")
    
    parser.add_option("-d", "--db", default=None, type="string", 
        action="store", dest="db", 
        help="db to connect to for output")
    
    #parser.add_option("-d", "--distribute", type="string", 
    #    action="callback", callback=parseDistributedHosts, 
    #    help="run the crawler distributed across the given set of hosts")
    
    (options, args) = parser.parse_args()
    #if hasattr(Globals.options, 'distributed'):
    #    options.distributed = Globals.options.distributed
    #    options.hosts = Globals.options.hosts
    #else:
    #    options.distributed = False
    #    options.hosts = []
    
    options.offset = 0
    Globals.options = options
    
    if len(args) == 0:
        options.all = True
    
    if options.all:
        options.sources = EntitySources.instantiateAll()
    else:
        options.sources = [ ]
        for arg in args:
            source = EntitySources.instantiateSource(arg)
            
            if source is None:
                print "Error: unrecognized source '%s'" % arg
                parser.print_help()
                sys.exit(1)
            else:
                options.sources.append(source)
    
    for source in options.sources:
        source._globals = _globals
    
    if options.count or options.ratio:
        count = 0
        
        for source in options.sources:
            count += source.getMaxNumEntities()
        
        if options.count:
            print count
            sys.exit(0)
        else:
            options.count = count
            num, den = options.ratio.split('/')
            num, den = int(num), int(den)
            num, den = float(num), float(den)
            options.offset = int(math.floor((count * (num - 1)) / den))
            options.limit  = int(math.ceil(count / den) + 1)
    
    if options.db:
        utils.init_db_config(options.db)
    
    if options.sink == "test":
        options.sink = TestEntitySink()
    elif options.sink == "merge":
        options.sink = MergeEntitySink()
    else:
        from api.MongoStampedAPI import MongoStampedAPI
        options.sink = MongoStampedAPI(options.db)
    
    return options
コード例 #15
0
ファイル: search.py プロジェクト: Stamped/Stamped
def parseCommandLine():
    """
        Usage: autocomplete.py [options] query

        Options:
          --version             show program's version number and exit
          -h, --help            show this help message and exit
          -d DB, --db=DB        db to connect to for output
          -l LIMIT, --limit=LIMIT
                                limits the number of entities to import
          -a LOCATION, --a=LOCATION
                                location
          -f, --full            use full search
          -v, --verbose         turn verbosity on
          -c CATEGORY, --category=CATEGORY
                                filters results by a given category
          -s SUBCATEGORY, --subcategory=SUBCATEGORY
                                filters results by a given subcategory
    """
    
    usage   = "Usage: %prog [options] query"
    version = "%prog " + __version__
    parser  = OptionParser(usage=usage, version=version)
    
    parser.add_option("-d", "--db", default=None, type="string", 
        action="store", dest="db", 
        help="db to connect to for output")
    
    parser.add_option("-l", "--limit", default=None, type="int", 
        help="limits the number of entities to import")
    
    parser.add_option("-L", "--Local", default=False, action="store_true", 
        help="local nearby search")
    
    parser.add_option("-a", "--a", default=None, type="string", 
        action="store", dest="location", help="location (lat/lng, e.g. '40.7360067,-73.98884296')")
    
    parser.add_option("-f", "--full", default=False, action="store_true", 
        help="use full search")
    
    parser.add_option("-p", "--prefix", default=False, action="store_true", 
        help="use faster prefix-based search")
    
    parser.add_option("-v", "--verbose", default=False, action="store_true", 
        help="turn verbosity on")
    
    parser.add_option("-S", "--Stats", default=False, action="store_true", 
        help="view ranking stats")
    
    parser.add_option("-c", "--category", default=None, type="string", 
        action="store", dest="category", 
        help="filters results by a given category")
    
    parser.add_option("-s", "--subcategory", default=None, type="string", 
        action="store", dest="subcategory", 
        help="filters results by a given subcategory")
    
    (options, args) = parser.parse_args()
    
    if len(args) <= 0:
        parser.print_help()
        sys.exit(1)
    
    if options.db:
        utils.init_db_config(options.db)
    
    if options.location:
        assert ',' in options.location
        
        lat, lng = options.location.split(',')
        options.location = (float(lat), float(lng))

    if options.category and options.category not in categories:
        raise Exception("Unrecognized category: '%s'; must be one of: %s" % (options.category, ", ".join(categories)))
    
    return (options, args)
コード例 #16
0
ファイル: MongoStampedAPI.py プロジェクト: Stamped/Stamped
    def __init__(self, db=None, **kwargs):
        self.__statsSink = None
        StampedAPI.__init__(self, "MongoStampedAPI", **kwargs)

        if db:
            utils.init_db_config(db)
コード例 #17
0
ファイル: handle_tvdb_images.py プロジェクト: Stamped/Stamped
from gevent.pool        import Pool
from api.MongoStampedAPI    import MongoStampedAPI

if __name__ == '__main__':
    import argparse
    
    parser = argparse.ArgumentParser()
    parser.add_argument('image_urls', nargs='*', action="append")
    parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + __version__)
    parser.add_argument('-d', '--db', action='store')
    
    args = parser.parse_args()
    db   = S3ImageDB()
    
    if args.db is not None:
        utils.init_db_config(args.db)
    
    if args.image_urls is not None:
        args.image_urls = args.image_urls[0]
    
    if args.image_urls is not None and len(args.image_urls) > 0:
        # example url:  http://thetvdb.com/banners/_cache/posters/211751-2.jpg
        db.addEntityImages(args.image_urls)
    else:
        # perform a bulk conversion of all thetvdb.com entity images, moving each 
        # image over to our own CDN (via S3 / Cloudfront) and updating the entity 
        # reference accordingly.
        
        api  = MongoStampedAPI()
        pool = Pool(32)