def run_gpe_parmap_noncum(db, trait_type, traits, init_year, end_year, init_month=1, end_month=1, mark=False): """ Runs the GPE over time for the given traits and time steps. At each timestep, the traits are updated via a parallel pool map function. """ mapfunc1 = lambda x: TemporalGPE_NonCum(trait_type, x) gpes_list = parmap(mapfunc1, traits) current_time = datetime.now() for (time_0, time_1) in step_thru_qtrs(init_year, end_year, init_month, end_month): # for start_year, start_month in qtr_year_iter(init_year, end_year): # at each timestep, have threads go after a work queue with gpe updates logging.info("Updating GPE at time {}".format(time_0)) logging.info("loading pops...") if mark: anc_pop, desc_pop = get_anc_dec_mark(db, time_0, time_1, limit = None) # anc_pop, desc_pop = load_anc_dec(start_date, indir = _mark_dir) else: anc_pop, desc_pop = get_anc_noncum(db, time_0, time_1, limit = None) # anc_pop, desc_pop = load_anc_dec(start_date, indir = _noncum_dir) logging.info("anc pop size: {}, desc pop size: {}".format(len(anc_pop), len(desc_pop))) def mapfunc(gpe_computer): logging.info("Updating trait {}...".format(gpe_computer.trait)) gpe_computer.update(anc_pop, desc_pop) return gpe_computer gpes_list = parmap(mapfunc, gpes_list) nxt_time = datetime.now() logging.info("elapsed: {}".format(nxt_time - current_time)) current_time = nxt_time gpes = {computer.trait: computer.gpes for computer in gpes_list} return gpes
def main(): db = MongoClient().patents start_date = list(db.traits.find({'isd': {'$exists': True}}).sort('isd', 1).limit(1))[0]['isd'] end_date = list(db.traits.find({'isd': {'$exists': True}}).sort('isd', -1).limit(1))[0]['isd'] print "earliest patent: {}\n latest patent: {}".format(start_date, end_date) start_year, start_month = start_date.year, start_date.month end_year, end_month = end_date.year, end_date.month time_pairs = step_thru_qtrs(start_year, end_year, start_month, end_month) times_noncum, sizes_noncum = dump_pops_over_time(db, time_pairs, _qtr_outdir_noncum, limit=5000000, mark=False) times_mark, sizes_mark = dump_pops_over_time(db, time_pairs, _qtr_outdir_mark, limit=5000000, mark=True) """
def test(lim=1000): db = MongoClient().patents outdir = _test_outdir start_date = list(db.traits.find({'isd': {'$exists': True}}).sort('isd', 1).limit(1))[0]['isd'] end_date = list(db.traits.find({'isd': {'$exists': True}}).sort('isd', -1).limit(1))[0]['isd'] print "earliest patent: {}\n latest patent: {}".format(start_date, end_date) start_year, start_month = start_date.year, start_date.month end_year, end_month = end_date.year, end_date.month time_pairs = step_thru_qtrs(start_year, end_year, start_month, end_month) dump_pops_over_time(db, time_pairs, _qtr_outdir_noncum, limit=lim, mark=False) dump_pops_over_time(db, time_pairs, _qtr_outdir_mark, limit=lim, mark=True) dump_descendants_over_time(db, time_pairs, outdir, limit=lim, debug=False)