Esempio n. 1
0
def main():
    #
    #
    # Prep
    #

    # Params
    events_from = datetime(2012, 6, 1)
    events_to = datetime(2015, 6, 1)

    # Load
    api = crawl_tools.get_meetup_api()
    alt_api = crawl_tools.get_alt_meetup_api()

    countries2citygroups = load_countries()

    mdb = mongo_connect()

    print "from", events_from
    print "to", events_to

    #
    #
    # Crawl -- expand groups, obtain members
    #
    print "\nSTAGE 1: Expand groups"
    for country, city2groups in countries2citygroups.iteritems():
        print country
        for city_ident, groups in city2groups.iteritems():
            #if 'Swansea' not in city_ident:
            #    continue

            print country, "\t", city_ident

            # full supplementary crawl of each group
            for group in groups:
                gid = group['id']

                if has_group(mdb, gid):
                    # do not re-crawl
                    print "\t", group['name'], "<SKIPPING>" #~
                    continue

                print "\t", group['name'], "<CRAWLING>" #~

                expand_meetup_group(alt_api, mdb, group, events_from, events_to)
                add_group(mdb, group)

    print "\nSTAGE 2: Crawl attendance for each event"
    crawl_event_attendance(alt_api, mdb)
Esempio n. 2
0
def main():
    #
    #
    # Prep
    #

    # Params
    cat_id = 34
    radius = 25.0

    # Load
    api = crawl_tools.get_meetup_api()
    alt_api = crawl_tools.get_alt_meetup_api()

    countries2cities = load_extracted_geonames_top_cities()

    del countries2cities['ie']
    del countries2cities['gb']

    #
    #
    # Crawl
    #
    for country, top_cities in countries2cities.iteritems():
        print "crawling:", country

        out = []

        for geonames_city in top_cities:
            lon = float(geonames_city['longitude'])
            lat = float(geonames_city['latitude'])

            results = retrieve_groups_near(alt_api, lon=lon, lat=lat)

            print "\t%-20s  %d" % (geonames_city['city'], len(results)), len(frozenset(map(str, results)))

            d = {'geonames_city' : geonames_city, 'results': results}
            out.append(d)

        # Save this city
        fpath_out = './dat/groups_crawl/%s.json' % (country)
        with open(fpath_out, 'w') as f:
            json.dump(out, f)