예제 #1
0
def run_gpe_parmap_noncum(db, trait_type, traits, init_year, end_year, init_month=1, end_month=1, mark=False):
    """
    Runs the GPE over time for the given traits and time steps. At each timestep, the traits are updated via a parallel pool map function.
    """
    mapfunc1 = lambda x: TemporalGPE_NonCum(trait_type, x)
    gpes_list = parmap(mapfunc1, traits)
    current_time = datetime.now()
    for (time_0, time_1) in step_thru_qtrs(init_year, end_year, init_month, end_month):
#    for start_year, start_month in qtr_year_iter(init_year, end_year):
        # at each timestep, have threads go after a work queue with gpe updates
        logging.info("Updating GPE at time {}".format(time_0))
        logging.info("loading pops...")
        if mark:
            anc_pop, desc_pop = get_anc_dec_mark(db, time_0, time_1, limit = None)
#            anc_pop, desc_pop = load_anc_dec(start_date, indir = _mark_dir)
        else:
            anc_pop, desc_pop = get_anc_noncum(db, time_0, time_1, limit = None)
#            anc_pop, desc_pop = load_anc_dec(start_date, indir = _noncum_dir)
        logging.info("anc pop size: {}, desc pop size: {}".format(len(anc_pop), len(desc_pop)))
        def mapfunc(gpe_computer):
            logging.info("Updating trait {}...".format(gpe_computer.trait))
            gpe_computer.update(anc_pop, desc_pop)
            return gpe_computer
        gpes_list = parmap(mapfunc, gpes_list)
        nxt_time = datetime.now()
        logging.info("elapsed: {}".format(nxt_time - current_time))
        current_time = nxt_time
    gpes = {computer.trait: computer.gpes for computer in gpes_list}
    return gpes
예제 #2
0
def main():
    db = MongoClient().patents
    start_date = list(db.traits.find({'isd': {'$exists': True}}).sort('isd', 1).limit(1))[0]['isd']
    end_date = list(db.traits.find({'isd': {'$exists': True}}).sort('isd', -1).limit(1))[0]['isd']
    print "earliest patent: {}\n latest patent: {}".format(start_date, end_date)
    start_year, start_month = start_date.year, start_date.month
    end_year, end_month = end_date.year, end_date.month
    time_pairs = step_thru_qtrs(start_year, end_year, start_month, end_month)
    times_noncum, sizes_noncum = dump_pops_over_time(db, time_pairs, _qtr_outdir_noncum, limit=5000000, mark=False)
    times_mark, sizes_mark = dump_pops_over_time(db, time_pairs, _qtr_outdir_mark, limit=5000000, mark=True)
    """
예제 #3
0
def test(lim=1000):
    db = MongoClient().patents
    outdir = _test_outdir
    start_date = list(db.traits.find({'isd': {'$exists': True}}).sort('isd', 1).limit(1))[0]['isd']
    end_date = list(db.traits.find({'isd': {'$exists': True}}).sort('isd', -1).limit(1))[0]['isd']
    print "earliest patent: {}\n latest patent: {}".format(start_date, end_date)
    start_year, start_month = start_date.year, start_date.month
    end_year, end_month = end_date.year, end_date.month
    time_pairs = step_thru_qtrs(start_year, end_year, start_month, end_month)
    dump_pops_over_time(db, time_pairs, _qtr_outdir_noncum, limit=lim, mark=False)
    dump_pops_over_time(db, time_pairs, _qtr_outdir_mark, limit=lim, mark=True)
    dump_descendants_over_time(db, time_pairs, outdir, limit=lim, debug=False)