def main(): # # # Prep # # Params events_from = datetime(2012, 6, 1) events_to = datetime(2015, 6, 1) # Load api = crawl_tools.get_meetup_api() alt_api = crawl_tools.get_alt_meetup_api() countries2citygroups = load_countries() mdb = mongo_connect() print "from", events_from print "to", events_to # # # Crawl -- expand groups, obtain members # print "\nSTAGE 1: Expand groups" for country, city2groups in countries2citygroups.iteritems(): print country for city_ident, groups in city2groups.iteritems(): #if 'Swansea' not in city_ident: # continue print country, "\t", city_ident # full supplementary crawl of each group for group in groups: gid = group['id'] if has_group(mdb, gid): # do not re-crawl print "\t", group['name'], "<SKIPPING>" #~ continue print "\t", group['name'], "<CRAWLING>" #~ expand_meetup_group(alt_api, mdb, group, events_from, events_to) add_group(mdb, group) print "\nSTAGE 2: Crawl attendance for each event" crawl_event_attendance(alt_api, mdb)
def main(): # # # Prep # # Params cat_id = 34 radius = 25.0 # Load api = crawl_tools.get_meetup_api() alt_api = crawl_tools.get_alt_meetup_api() countries2cities = load_extracted_geonames_top_cities() del countries2cities['ie'] del countries2cities['gb'] # # # Crawl # for country, top_cities in countries2cities.iteritems(): print "crawling:", country out = [] for geonames_city in top_cities: lon = float(geonames_city['longitude']) lat = float(geonames_city['latitude']) results = retrieve_groups_near(alt_api, lon=lon, lat=lat) print "\t%-20s %d" % (geonames_city['city'], len(results)), len(frozenset(map(str, results))) d = {'geonames_city' : geonames_city, 'results': results} out.append(d) # Save this city fpath_out = './dat/groups_crawl/%s.json' % (country) with open(fpath_out, 'w') as f: json.dump(out, f)