def dump_populations(db, start, end, outdir, delta=timedelta(days=7), lim=100, debug=True): """ Step through time, maintaining the known ancestral population at each time step. Save each set of populations as a pickled dictionary.""" for (tm1, t, tp1) in step_through_time(start, end): new_ancestors, descendants = get_populations(db, tm1, t, tp1, lim) precompute_doc = { '_id': tm1, 'new_ancestors': list(new_ancestors), 'descendants': list(descendants) } if debug: precompute_doc['new_ancestors'] = len( precompute_doc['new_ancestors']) precompute_doc['descendants'] = len(precompute_doc['descendants']) pprint(precompute_doc) else: popfn = '/'.join([outdir, dt_as_str(tm1) + '.p']) print "pickling population for time {} as {}".format(tm1, popfn) print "#new ancestors: {}, #descendants:{}".format( len(precompute_doc['new_ancestors']), len(precompute_doc['descendants'])) pickle_obj(popfn, precompute_doc)
def load_pops(start_time, limit = None): date_str = dt_as_str(start_time) popfn = '/'.join([_pop_dir, date_str+'.p']) doc = load_obj(popfn) if limit is not None: return doc['new_ancestors'], doc['descendants'] else: return doc['new_ancestors'][:limit], doc['descendants'][:limit]
def load_pops(start_time, limit=None): date_str = dt_as_str(start_time) popfn = '/'.join([_pop_dir, date_str + '.p']) doc = load_obj(popfn) if limit is not None: return doc['new_ancestors'], doc['descendants'] else: return doc['new_ancestors'][:limit], doc['descendants'][:limit]
def dump_populations(db, start, end, outdir, delta=timedelta(days=7),lim=100, debug=True): """ Step through time, maintaining the known ancestral population at each time step. Save each set of populations as a pickled dictionary.""" for (tm1, t, tp1) in step_through_time(start, end): new_ancestors, descendants = get_populations(db, tm1, t, tp1, lim) precompute_doc = {'_id': tm1, 'new_ancestors': list(new_ancestors), 'descendants': list(descendants)} if debug: precompute_doc['new_ancestors'] = len(precompute_doc['new_ancestors']) precompute_doc['descendants'] = len(precompute_doc['descendants']) pprint(precompute_doc) else: popfn = '/'.join([outdir, dt_as_str(tm1)+'.p']) print "pickling population for time {} as {}".format(tm1, popfn) print "#new ancestors: {}, #descendants:{}".format(len(precompute_doc['new_ancestors']), len(precompute_doc['descendants'])) pickle_obj(popfn, precompute_doc)
def dump_pops_over_time(db, time_pairs, outdir, limit=None, mark = False): times = [] popsizes = [] for (time_0, time_1) in time_pairs: if mark: ancestors, new_descendants = map(list, get_anc_dec_mark(db, time_0, time_1, limit)) else: ancestors, new_descendants = map(list, get_anc_dec_noncum(db, time_0, time_1, limit)) precompute_doc = {'start': time_0, 'ancestors': ancestors, 'descendants': new_descendants} times.append(time_0) popsizes.append((len(ancestors), len(new_descendants))) popfn = '/'.join([outdir, dt_as_str(time_0)+'.p']) print "pickling population for time {} as {}".format(time_0, popfn) pickle_obj(popfn, precompute_doc) return times, popsizes
def dump_descendants_over_time(db, time_pairs, outdir, limit = None, debug = True): # also returns a histogram of pop sizes. times = [] popsizes = [] for (time_0, time_1) in time_pairs: new_descendants = list(get_new_descendants(db, time_0, time_1, limit)) precompute_doc = {'start': time_0, 'descendants': new_descendants} print "number of descendants at time {}: {}".format(time_0, len(new_descendants)) times.append(time_0) popsizes.append(len(new_descendants)) if debug: precompute_doc['descendants'] = len(new_descendants) pprint(precompute_doc) else: popfn = '/'.join([outdir, dt_as_str(time_0)+'.p']) print "pickling population for time {} as {}".format(time_0, popfn) pickle_obj(popfn, precompute_doc) return times, popsizes
def load_anc_dec(start_date, indir): """ Load two lists of patents; ancestral and descendant poulation respectively. """ filename = '/'.join([indir, dt_as_str(start_date)+'.p']) pop_dict = load_obj(filename) assert(start_date == pop_dict['start']) # Make sure the date we think we're loading matches the stored date. return pop_dict['ancestors'], pop_dict['descendants']
def load_pop(start_date): """ Load a list of patents (dictionaries) occuring in the month following the given start_date.""" filename = '/'.join([_pop_dir, dt_as_str(start_date)+'.p']) pop_dict = load_obj(filename) assert(start_date == pop_dict['start']) # Make sure the date we think we're loading matches the stored date. return pop_dict['descendants']