Ejemplo n.º 1
0
def describe_venue(venues, city, depth=2, limit=None):
    """Gather some statistics about venue, aggregating categories at `depth`
    level."""
    query = cm.build_query(city, False, ['cat', 'likes'], limit)
    group = {'_id': '$cat', 'count': {'$sum': 1}, 'like': {'$sum': '$likes'}}
    query.extend([{'$group': group}, {'$sort': {'count': -1}}])
    res = venues.aggregate(query)['result']

    def parenting_cat(place, depth):
        """Return the category of `place`, without going beyond `depth`"""
        _, path = fsc.search_categories(place['_id'])
        if len(path) > depth:
            return fsc.CAT_TO_ID[:path[depth]]
        return fsc.CAT_TO_ID[:path[-1]]

    summary = defaultdict(lambda: (0, 0))
    nb_venues = 0
    for venue in res:
        if venue['_id'] is not None:
            cat = parenting_cat(venue, depth)
            count, like = venue['count'], venue['like']
            nb_venues += count
            summary[cat] = (summary[cat][0] + count, summary[cat][1] + like)

    for cat, stat in summary.iteritems():
        count, like = stat
        summary[cat] = (100.0*count/nb_venues, count, like)
    return OrderedDict(sorted(summary.items(), key=lambda u: u[1][0],
                              reverse=True))
Ejemplo n.º 2
0
def output_checkins(city, host=cm.HOST, port=cm.PORT):
    """Write a JS array of all checkins in `city` with their hour."""
    checkins = cm.connect_to_db('foursquare', host, port)[0]['checkin']
    query = cm.build_query(city, venue=False, fields=['loc', 'time'])
    res = checkins.aggregate(query)['result']

    def format_checkin(checkin):
        """Extract location (plus jitter) and hour from checkin"""
        lng, lat = checkin['loc']['coordinates']
        hour = checkin['time'].hour
        return [lng + noise(), lat + noise(), hour]

    formated = [str(format_checkin(c)) for c in res]
    with open(city + '_fs.js', 'w') as output:
        output.write('var helsinki_fs = [\n')
        output.write(',\n'.join(formated))
        output.write('];')
Ejemplo n.º 3
0
def output_checkins(city, host=cm.HOST, port=cm.PORT):
    """Write a JS array of all checkins in `city` with their hour."""
    checkins = cm.connect_to_db("foursquare", host, port)[0]["checkin"]
    query = cm.build_query(city, venue=False, fields=["loc", "time"])
    res = checkins.aggregate(query)["result"]

    def format_checkin(checkin):
        """Extract location (plus jitter) and hour from checkin"""
        lng, lat = checkin["loc"]["coordinates"]
        hour = checkin["time"].hour
        return [lng + noise(), lat + noise(), hour]

    formated = [str(format_checkin(c)) for c in res]
    with open(city + "_fs.js", "w") as output:
        output.write("var helsinki_fs = [\n")
        output.write(",\n".join(formated))
        output.write("];")
Ejemplo n.º 4
0
def all_places_from_venue(checkins, city, converse=False):
    """Associate each venue with a list twitter place id (or do the
    `converse`)"""
    query = cm.build_query(city, fields=['lid', 'place'])
    index, values = '$lid', '$place'
    if converse:
        index, values = values, index
    query.append({"$group": {'_id': index, 'others': {'$push': values}}})
    answer = checkins.aggregate(query)['result']
    return {venue['_id']: venue['others'] for venue in answer if venue['_id']}
Ejemplo n.º 5
0
def output_checkins(city, host=cm.HOST, port=cm.PORT):
    """Write a JS array of all checkins in `city` with their hour."""
    print 'utils.py/output_checkins'
    checkins = cm.connect_to_db('foursquare', host, port)[0]['checkin']
    query = cm.build_query(city, venue=False, fields=['loc', 'time'])
    res = checkins.aggregate(query)['result']

    def format_checkin(checkin):
        """Extract location (plus jitter) and hour from checkin"""
        print 'utils.py/format_checkin'
        lng, lat = checkin['loc']['coordinates']
        hour = checkin['time'].hour
        return [lng + noise(), lat + noise(), hour]

    formated = [str(format_checkin(c)) for c in res]
    with open(city + '_fs.js', 'w') as output:
        output.write('var helsinki_fs = [\n')
        output.write(',\n'.join(formated))
        output.write('];')
Ejemplo n.º 6
0
def all_places_from_venue(checkins, city, converse=False):
    """Associate each venue with a list twitter place id (or do the
    `converse`)"""
    query = cm.build_query(city, fields=['lid', 'place'])
    index, values = '$lid', '$place'
    if converse:
        index, values = values, index
    query.append({"$group": {'_id': index, 'others': {'$push': values}}})
    answer = checkins.aggregate(query)['result']
    return {venue['_id']: venue['others'] for venue in answer if venue['_id']}
Ejemplo n.º 7
0
def get_users(args):
    import CommonMongo as cm
    city = args.city
    try:
        return p.load_var(city+'_users.my')
    except IOError:
        pass
    db = cm.connect_to_db('foursquare', args.host, args.port)[0]
    # First get a list of all users so far
    user_query = cm.build_query(city, venue=True, fields=['tuid'])
    group = {'$group': {'_id': '$tuid', 'checkins': {'$sum': 1}}}
    user_query.extend([group, {'$sort': {'checkins': -1}}])
    users = db.checkin.aggregate(user_query)['result']
    # See how many they are and their check-ins count distribution
    # import utils as u
    # import pandas as pd
    # print(len(users))
    # infos = u.xzip(users, '_id checkins'.split())
    # df_users = pd.DataFrame(index=map(int, infos[0]),
    #                         data=dict(count=infos[1]))
    # ppl.hist(df_users.values, bins=25)
    users = OrderedDict([(_['_id'], _['checkins']) for _ in users])
    return users.keys()
Ejemplo n.º 8
0
def get_users(args):
    import CommonMongo as cm
    city = args.city
    try:
        return p.load_var(city + '_users.my')
    except IOError:
        pass
    db = cm.connect_to_db('foursquare', args.host, args.port)[0]
    # First get a list of all users so far
    user_query = cm.build_query(city, venue=True, fields=['tuid'])
    group = {'$group': {'_id': '$tuid', 'checkins': {'$sum': 1}}}
    user_query.extend([group, {'$sort': {'checkins': -1}}])
    users = db.checkin.aggregate(user_query)['result']
    # See how many they are and their check-ins count distribution
    # import utils as u
    # import pandas as pd
    # print(len(users))
    # infos = u.xzip(users, '_id checkins'.split())
    # df_users = pd.DataFrame(index=map(int, infos[0]),
    #                         data=dict(count=infos[1]))
    # ppl.hist(df_users.values, bins=25)
    users = OrderedDict([(_['_id'], _['checkins']) for _ in users])
    return users.keys()
Ejemplo n.º 9
0
def load_existing_ids(cmd_args):
    """Read checkins ids in city from disk or DB."""
    city = cmd_args.city
    if city == 'whole':
        return []
    import persistent as p
    try:
        return p.load_var(city+'_checkins_ids.my')
    except IOError:
        pass
    import CommonMongo as cm
    db = cm.connect_to_db('foursquare', cmd_args.host, cmd_args.port)[0]
    ids = {str(_['_id']) for _ in db.checkin.find({'city': city}, {'_id': 1})
           if not isinstance(_['_id'], long)}
    p.save_var(city+'_checkins_ids.my', ids)
    return ids
Ejemplo n.º 10
0
def venues_activity(checkins, city, limit=None):
    """Return time pattern of all the venues in 'city', or only the 'limit'
    most visited."""
    query = cm.build_query(city, True, ['lid', 'time'], limit)
    group = {'_id': '$lid', 'count': {'$sum': 1}, 'visits': {'$push': '$time'}}
    query.insert(2, {'$group': group})
    if isinstance(limit, int) and limit > 0:
        query.insert(-1, {'$sort': {'count': -1}})
    res = checkins.aggregate(query)['result']
    hourly = []
    weekly = []
    for venue in res:
        hour, day = aggregate_visits(venue['visits'])
        hourly.append(hour)
        weekly.append(day)
    return hourly, weekly
Ejemplo n.º 11
0
def load_existing_ids(cmd_args):
    """Read checkins ids in city from disk or DB."""
    city = cmd_args.city
    if city == 'whole':
        return []
    import persistent as p
    try:
        return p.load_var(city + '_checkins_ids.my')
    except IOError:
        pass
    import CommonMongo as cm
    db = cm.connect_to_db('foursquare', cmd_args.host, cmd_args.port)[0]
    ids = {
        str(_['_id'])
        for _ in db.checkin.find({'city': city}, {'_id': 1})
        if not isinstance(_['_id'], long)
    }
    p.save_var(city + '_checkins_ids.my', ids)
    return ids
Ejemplo n.º 12
0
            return res, int(t)
        if need_answer:
            max_tries -= 1
            logging.info('insisting on page {}'.format(page))
            sleep(5)
        else:
            return None, 0

    logging.warn('Error getting page {}: too much tries'.format(page))
    return None, 0


if __name__ == '__main__':
    START_OF_REQUESTS = time()
    logging.info('initial request')

    args = arguments.city_parser().parse_args()
    photos = cm.connect_to_db('world', args.host, args.port)[0]['photos']
    photos.ensure_index([('loc', cm.pymongo.GEOSPHERE),
                         ('tags', cm.pymongo.ASCENDING),
                         ('uid', cm.pymongo.ASCENDING)])
    city = args.city
    CITY = (cities.US + cities.EU)[cities.INDEX[city]]
    HINT = city
    bbox = (CITY[:2], CITY[2:])
    start_time = datetime.datetime(2014, 7, 19)
    total = higher_request(start_time, bbox, photos)

    logging.info('Saved a total of {} photos.'.format(total))
    logging.info('made {} requests.'.format(TOTAL_REQ))
Ejemplo n.º 13
0
#! /usr/bin/python2
# vim: set fileencoding=utf-8
import scipy.io as sio
import VenueFeature as vf
import CommonMongo as cm
import persistent as p
DB, CLIENT = cm.connect_to_db('foursquare')
vf.DB = DB
vf.CLIENT = CLIENT
brands = ["mcdonald's", 'starbucks']
import cities as C
starbucks = list(vf.DB.venue.find({'name':
                                   {'$in': ['Starbucks Coffee', 'Starbucks']}},
                                  {'city': 1}))
macdo = list(vf.DB.venue.find({'name': "McDonald's"}, {'city': 1}))
for city in C.SHORT_KEY:
    vindex = set(list(sio.loadmat(city+'_fv')['i']))
    fromdb = set([_['_id'] for _ in macdo if _['city'] == city])
    res = list(fromdb.intersection(vindex))
    p.save_var('{}_{}.my'.format(city, brands[0]), res)
    print('saved {} {} in {}'.format(len(res), brands[0], city))
    fromdb = set([_['_id'] for _ in starbucks if _['city'] == city])
    res = list(fromdb.intersection(vindex))
    p.save_var('{}_{}.my'.format(city, brands[1]), res)
    print('saved {} {} in {}'.format(len(res), brands[1], city))
Ejemplo n.º 14
0
from time import sleep
import TwitterAPI as twitter
from api_keys import TWITTER_CONSUMER_KEY as consumer_key
from api_keys import TWITTER_CONSUMER_SECRET as consumer_secret
from api_keys import TWITTER_ACCESS_TOKEN as access_token
from api_keys import TWITTER_ACCESS_SECRET as access_secret
import twitter_helper as th
import arguments

ARGS = arguments.tweets_parser().parse_args()
DB = None
SAVE = None
if ARGS.mongodb:
    import CommonMongo as cm

    DB = cm.connect_to_db("foursquare", ARGS.host, ARGS.port)[0]
else:
    json = th.import_json()
import CheckinAPICrawler as cac

CRAWLER = cac.CheckinAPICrawler()
from Queue import Queue
from threading import Thread

# the size of mongo bulk insert, in multiple of pool size
INSERT_SIZE = 7
CHECKINS_QUEUE = Queue((INSERT_SIZE + 3) * cac.BITLY_SIZE)
# There is probably a cleaner version, but it is global because if
# read_twitter_stream fails, we still want to keep a accurate count of tweets
# seen so far
NB_TWEETS = 0
Ejemplo n.º 15
0
    """Find similars venues for 100 location in city, save the result in DB and
    return matching venues that were already in DB."""
    venues = answer_to_dict(venues_db.find({'city': city}, {'loc': 1}))
    chosen = sample(venues.items(), 500)
    distances = []
    all_match = []
    for vid, loc in chosen:
        similars = af.similar_venues(vid, client=client)
        if similars is None:
            continue
        else:
            print(vid, similars)
        venues_db.update({'_id': vid}, {'$set': {'similars': similars}})
        matching = answer_to_dict(venues_db.find({'_id': {'$in': similars}},
                                                 {'loc': 1}))
        all_match.append(matching)
        distances.append([geodesic_distance(loc, sloc)
                          for sloc in matching.itervalues()])
    return chosen, distances, all_match


if __name__ == '__main__':
    import doctest
    doctest.testmod()
    # pylint: disable=C0103
    import arguments
    args = arguments.city_parser().parse_args()
    city = args.city
    db, client = cm.connect_to_db('foursquare', args.host, args.port)
    checkins = db['checkin']
Ejemplo n.º 16
0
def mongo_insertion():
    global TO_BE_INSERTED
    if len(TO_BE_INSERTED) == 0:
        return
    try:
        TABLE.insert(TO_BE_INSERTED, continue_on_error=True)
    except cm.pymongo.errors.DuplicateKeyError:
        pass
    except cm.pymongo.errors.OperationFailure as e:
        print(e, e.code)
    del TO_BE_INSERTED[:]

if __name__ == '__main__':
    REQ = getattr(CLIENT, REQ)
    args = arguments.city_parser().parse_args()
    db = cm.connect_to_db('foursquare', args.host, args.port)[0]
    checkins = db['checkin']
    TABLE = db[ENTITY_KIND]
    if ENTITY_KIND == 'venue':
        TABLE.ensure_index([('loc', cm.pymongo.GEOSPHERE),
                            ('city', cm.pymongo.ASCENDING),
                            ('cat', cm.pymongo.ASCENDING)])
    t = Thread(target=entities_getter, name='Query4SQ')
    t.daemon = True
    t.start()
    t = Thread(target=entities_putter, name='InsertDB')
    t.daemon = True
    t.start()
    total_entities = 0
    city = args.city
    chunker = Chunker.Chunker(foursquare.MAX_MULTI_REQUESTS)
    similars = tree.xpath(XPATH_QUERY)
    if len(similars) == 0:
        return []
    return [c.attrib["data-venueid"] for c in similars[0].iterchildren()]


if __name__ == "__main__":
    config = ConfigParser.ConfigParser()
    config.read("api_keys.cfg")
    CLIENT_ID = config.get("foursquare", "FOURSQUARE_ID2")
    CLIENT_SECRET = config.get("foursquare", "FOURSQUARE_SECRET2")
    client = foursquare.Foursquare(CLIENT_ID, CLIENT_SECRET)
    import arguments

    args = arguments.city_parser().parse_args()
    db = cm.connect_to_db("foursquare", args.host, args.port)[0]
    checkins = db["checkin"]
    # print(venue_profile(client, ''))
    # up = user_profile(client, 2355635)
    # vids = ['4a2705e6f964a52048891fe3', '4b4ad9dff964a5200b8f26e3',
    #         '40a55d80f964a52020f31ee3', '4b4ad9dff964c5200']
    # [client.venues(vid, multi=True) for vid in vids]
    # answers = list(client.multi())
    r = gather_all_entities_id(checkins, city="helsinki", limit=50)
    for b in r:
        print (b)
    # svids = ['4c4787646c379521a121cfb5', '43222200f964a5209a271fe3',
    #          '4b218c2ef964a520a83d24e3']
    # gold = [[], ['4bbd0fbb8ec3d13acea01b28', '451d2412f964a5208a3a1fe3'],
    #         ['4d72a2a9ec07548190588cbf', '4a736a23f964a52062dc1fe3',
    #          '4f2d3e99e4b056f83aecdc88', '4aa7b5e4f964a520064d20e3',
Ejemplo n.º 18
0
    max_values = []
    for idx, val in zip(cells, count):
        max_values = sps.add_maybe([val, [idx, idx], 0, val], max_values, 500)
    return sorted(max_values, key=lambda x: x[0], reverse=True)


if __name__ == '__main__':
    #pylint: disable=C0103
    import CommonMongo as cm
    import arguments
    import cities
    import sys
    sys.exit()
    args = arguments.city_parser().parse_args()
    city = args.city
    _, client = cm.connect_to_db('foursquare', args.host, args.port)
    # client = None
    photos_in_background = True
    k = 100
    sps.GRID_SIZE = k
    sps.MAX_SUPPORT = 200
    bbox = (cities.US+cities.EU)[cities.INDEX[city]]
    sps.BBOX = bbox
    _, _, sps.index_to_rect = sps.k_split_bbox(bbox, k)
    options = {'city': city, 'photos_background': True,
               'bbox': cities.bbox_to_polygon(bbox), 'only': False}
    top_loc, ratio = do_scan(client, city, k, options['photos_background'])
    options['ratio'] = ratio
    output_json(sps.merge_regions(top_loc), options)
    options['photos_background'] = False
    top_loc, ratio = do_scan(client, city, k, options['photos_background'])
Ejemplo n.º 19
0
    tags = sorted([k for k, v in entropies.items() if 2.5 <= v <= 3.01])
    save_var('mat_tag', tags)
    u = load_var('user_status')
    user_index = {k: i for i, k in enumerate(u)}

    def format_photo(p):
        user = user_index[p['uid']]
        loc = p['loc']['coordinates']
        taken = [p['taken'].weekday(), p['taken'].hour,
                 calendar.timegm(p['taken'].utctimetuple())]
        indicator = [int(t in p['ntags']) for t in tags]
        return [user] + loc + taken + indicator

    photos_feature = np.mat(tag_time(DB, tags, format_photo))
    sio.savemat('deep', {'A': scipy.sparse.csr_matrix(photos_feature)})

if __name__ == '__main__':
    import arguments
    args = arguments.city_parser().parse_args()
    city = args.city
    DB, client = cm.connect_to_db('world', args.host, args.port)
    s = clock()
    tags = supported_tags(DB, city, photos_threshold=30, users_threshold=5,
                          timespan=60)
    save_var(city+'_tag_support', tags)
    # entropies = {t[0]: period_entropy(DB, t[0]) for t in tags}
    # save_var('Hsupported', entropies)
    # get_data(DB)
    t = clock()
    print(t-s)
Ejemplo n.º 20
0
#! /usr/bin/python2
# vim: set fileencoding=utf-8
"""Compare tags of photos and venues."""

if __name__ == '__main__':
    # pylint: disable=C0103
    getvenue = lambda i: db.venue.find_one({'_id': i})
    import CommonMongo as cm
    db, cl = cm.connect_to_db('foursquare')
    flickr = cm.connect_to_db('world', cl)[0]
    res = db.venue.find({'city': 'paris', 'tags': {'$ne': []}}, {'tags': 1})
    venues_tags = {v['_id']: len(v['tags']) for v in res}
    fl_venue = flickr.photos.find({'venue': {'$ne': None}}, {'tags': 1})
    fl_ids = set([v['_id'] for v in fl_venue])
    matching_venue = fl_ids.intersection(venues_tags.keys())
Ejemplo n.º 21
0
#! /usr/bin/python2
# vim: set fileencoding=utf-8
"""Retrieve checkins tweets"""
from timeit import default_timer as clock
from time import sleep
import TwitterAPI as twitter
import twitter_helper as th
import arguments
import ConfigParser
ARGS = arguments.tweets_parser().parse_args()
DB = None
SAVE = None
if ARGS.mongodb:
    import CommonMongo as cm
    DB = cm.connect_to_db('foursquare', ARGS.host, ARGS.port)[0]
else:
    json = th.import_json()
import CheckinAPICrawler as cac
CRAWLER = cac.CheckinAPICrawler()
from Queue import Queue
from threading import Thread
# the size of mongo bulk insert, in multiple of pool size
INSERT_SIZE = 7
CHECKINS_QUEUE = Queue((INSERT_SIZE + 3) * cac.BITLY_SIZE)
# There is probably a cleaner version, but it is global because if
# read_twitter_stream fails, we still want to keep a accurate count of tweets
# seen so far
NB_TWEETS = 0
NUM_VALID = 0

Ejemplo n.º 22
0
#! /usr/bin/python2
# vim: set fileencoding=utf-8
"""Retrieve checkins tweets"""
from timeit import default_timer as clock
from time import sleep
import TwitterAPI as twitter
import twitter_helper as th
import arguments
import ConfigParser
ARGS = arguments.tweets_parser().parse_args()
DB = None
SAVE = None
if ARGS.mongodb:
    import CommonMongo as cm
    DB = cm.connect_to_db('foursquare', ARGS.host, ARGS.port)[0]
else:
    json = th.import_json()
import CheckinAPICrawler as cac
CRAWLER = cac.CheckinAPICrawler()
from Queue import Queue
from threading import Thread
# the size of mongo bulk insert, in multiple of pool size
INSERT_SIZE = 7
CHECKINS_QUEUE = Queue((INSERT_SIZE+3)*cac.BITLY_SIZE)
# There is probably a cleaner version, but it is global because if
# read_twitter_stream fails, we still want to keep a accurate count of tweets
# seen so far
NB_TWEETS = 0
NUM_VALID = 0

Ejemplo n.º 23
0
        plt.xticks(range(24/chunk), named_ticks('day', offset, chunk))
    else:
        plt.xticks(range(7*3), named_ticks('mix'))


def get_distorsion(ak, kl, sval):
    """Compute the sum of euclidean distance from `sval` to its
    centroid"""
    return np.sum(np.linalg.norm(ak[kl, :] - sval, axis=1))

if __name__ == '__main__':
    # pylint: disable=C0103
    import arguments
    args = arguments.city_parser().parse_args()
    city = args.city
    DB, CLIENT = cm.connect_to_db('foursquare', args.host, args.port)

    # pylint: disable=E1101
    do_cluster = lambda val, k: cluster.kmeans2(val, k, 20, minit='points')

    def getclass(c, kl, visits):
        """Return {id: time pattern} of the venues in class `c` of
        `kl`."""
        return {v[0]: v[1] for v, k in zip(visits.iteritems(), kl) if k == c}

    def peek_at_class(c, kl, visits, k=15):
        """Return a table of `k` randomly chosen venues in class `c` of
        `kl`."""
        sample = r.sample([get_venue(i)
                           for i in getclass(c, kl, visits).keys()], k)
        return pd.DataFrame({'cat': [_[0] for _ in sample],
Ejemplo n.º 24
0
Archivo: LDA.py Proyecto: morgz/illalla
        for photo in self.db.find(self.query, {"tags": 1, "_id": 0}):
            yield self.dictionary.doc2bow(photo["tags"])

    def save(self):
        corpora.MmCorpus.serialize(city + "_corpus.mm", self)


def find_topics(city, client, num_topics, begin):
    dico = build_dico(city, client, begin)
    bow_photos = PhotoCorpus(city, client, dico, begin)
    bow_photos.save()

    lda = models.LdaModel(bow_photos, id2word=dico, num_topics=num_topics, passes=5, iterations=500, eval_every=5)
    lda_photos = lda[bow_photos]
    print(lda.show_topics(10))
    lda.save(city + ".lda")
    return lda


if __name__ == "__main__":
    # pylint: disable=C0103
    import arguments

    args = arguments.city_parser().parse_args()
    city = args.city
    logging.basicConfig(
        filename=city + "_lda.log", format="%(asctime)s [%(levelname)s]: %(message)s", level=logging.INFO
    )
    db, client = cm.connect_to_db("foursquare", args.host, args.port)
    lda = find_topics(city, client, num_topics=80, begin=dt(2008, 1, 1))
Ejemplo n.º 25
0
    except pycurl.error as e:
        print(str(e))
        return None
    page = buf.getvalue()
    tree = etree.fromstring(page, PARSER)
    similars = tree.xpath(XPATH_QUERY)
    if len(similars) == 0:
        return []
    return [c.attrib['data-venueid'] for c in similars[0].iterchildren()]


if __name__ == '__main__':
    client = foursquare.Foursquare(CLIENT_ID, CLIENT_SECRET)
    import arguments
    args = arguments.city_parser().parse_args()
    db = cm.connect_to_db('foursquare', args.host, args.port)[0]
    checkins = db['checkin']
    # print(venue_profile(client, ''))
    # up = user_profile(client, 2355635)
    # vids = ['4a2705e6f964a52048891fe3', '4b4ad9dff964a5200b8f26e3',
    #         '40a55d80f964a52020f31ee3', '4b4ad9dff964c5200']
    # [client.venues(vid, multi=True) for vid in vids]
    # answers = list(client.multi())
    r = gather_all_entities_id(checkins, city='helsinki', limit=50)
    for b in r:
        print(b)
    # svids = ['4c4787646c379521a121cfb5', '43222200f964a5209a271fe3',
    #          '4b218c2ef964a520a83d24e3']
    # gold = [[], ['4bbd0fbb8ec3d13acea01b28', '451d2412f964a5208a3a1fe3'],
    #         ['4d72a2a9ec07548190588cbf', '4a736a23f964a52062dc1fe3',
    #          '4f2d3e99e4b056f83aecdc88', '4aa7b5e4f964a520064d20e3',
Ejemplo n.º 26
0
            return res, int(t)
        if need_answer:
            max_tries -= 1
            logging.info('insisting on page {}'.format(page))
            sleep(5)
        else:
            return None, 0

    logging.warn('Error getting page {}: too much tries'.format(page))
    return None, 0


if __name__ == '__main__':
    START_OF_REQUESTS = time()
    logging.info('initial request')

    args = arguments.city_parser().parse_args()
    photos = cm.connect_to_db('world', args.host, args.port)[0]['photos']
    photos.ensure_index([('loc', cm.pymongo.GEOSPHERE),
                         ('tags', cm.pymongo.ASCENDING),
                         ('uid', cm.pymongo.ASCENDING)])
    city = args.city
    CITY = (cities.US + cities.EU)[cities.INDEX[city]]
    HINT = city
    bbox = (CITY[:2], CITY[2:])
    start_time = datetime.datetime(2014, 7, 19)
    total = higher_request(start_time, bbox, photos)

    logging.info('Saved a total of {} photos.'.format(total))
    logging.info('made {} requests.'.format(TOTAL_REQ))