def explore(fpath): _, ext = splitext(fpath) ftype = 'data' if ext in ('.h5', '.hdf5') else 'simulation' print("Using {} file: '{}'".format(ftype, fpath)) if ftype == 'data': globals_def, entities = entities_from_h5(fpath) simulation = Simulation(globals_def, None, None, None, None, entities.values(), 'h5', fpath, None) period, entity_name = None, None else: simulation = Simulation.from_yaml(fpath) # use output as input simulation.data_source = H5Source(simulation.data_sink.output_path) period = simulation.start_period + simulation.periods - 1 entity_name = simulation.default_entity dataset = simulation.load() data_source = simulation.data_source data_source.as_fake_output(dataset, simulation.entities_map) data_sink = simulation.data_sink entities = simulation.entities_map if entity_name is None and len(entities) == 1: entity_name = entities.keys()[0] if period is None and entity_name is not None: entity = entities[entity_name] period = max(entity.output_index.keys()) eval_ctx = EvaluationContext(simulation, entities, dataset['globals'], period, entity_name) try: c = Console(eval_ctx) c.run() finally: data_source.close() if data_sink is not None: data_sink.close()
def setUp(self): data = {'person': {'age': array([20, 10, 35, 55]), 'dead': array([False, True, False, True])}} self.eval_ctx = EvaluationContext(entity_name='person', entities_data=data) self.parse_ctx = { 'person': {'age': Variable('age'), 'dead': Variable('dead')}, '__entity__': 'person' }
def setUp(self): entities = {} hh_link = links.Many2One('household', 'hh_id', 'household') mother_link = links.Many2One('mother', 'mother_id', 'person') child_link = links.One2Many('children', 'mother_id', 'person') persons_link = links.One2Many('persons', 'hh_id', 'person') dt = np.dtype([('period', int), ('id', int), ('age', int), ('dead', bool), ('mother_id', int), ('hh_id', int)]) # TODO: I can't use an EntityContext with an array containing several periods # of data # persons = array([(2000, 0, 53, False, -1, 0), # (2000, 1, 23, False, 0, 1), # (2000, 2, 20, False, 0, 2), # (2000, 3, 43, False, -1, 3), # (2001, 0, 54, True, -1, 0), # (2001, 1, 24, False, 0, 1), # (2001, 2, 21, False, 0, 2), # (2001, 3, 44, False, -1, 0), # they got married # (2001, 4, 0, False, 2, 2), persons = array([(2002, 0, 55, True, -1, 0), (2002, 1, 25, False, 0, 1), (2002, 2, 22, False, 0, 2), (2002, 3, 45, False, -1, 0), (2002, 4, 1, False, 2, 2)], dtype=dt) person = Entity('person', links={'household': hh_link, 'mother': mother_link, 'children': child_link}, array=persons) dt = np.dtype([('period', int), ('id', int)]) # households = array([(2000, 0), # (2000, 1), # (2000, 2), # (2000, 3), # # (2001, 0), # (2001, 1), # (2001, 2), households = array([(2002, 0), (2002, 1), (2002, 2)], dtype=dt) household = Entity('household', links={'persons': persons_link}, array=households) entities['person'] = person entities['household'] = household self.entities = entities parse_ctx = {'__globals__': {}, '__entities__': entities, '__entity__': 'person'} parse_ctx.update((entity.name, entity.all_symbols(parse_ctx)) for entity in entities.itervalues()) self.parse_ctx = parse_ctx self.eval_ctx = EvaluationContext(entities=entities, period=2002, entity_name='person')
def run(self, run_console=False): start_time = time.time() h5in, h5out, globals_data = timed(self.data_source.run, self.globals_def, entity_registry, self.init_period) if config.autodump or config.autodiff: if config.autodump: fname, _ = config.autodump mode = 'w' else: # config.autodiff fname, _ = config.autodiff mode = 'r' fpath = os.path.join(config.output_directory, fname) h5_autodump = tables.open_file(fpath, mode=mode) config.autodump_file = h5_autodump else: h5_autodump = None # input_dataset = self.data_source.run(self.globals_def, # entity_registry) # output_dataset = self.data_sink.prepare(self.globals_def, # entity_registry) # output_dataset.copy(input_dataset, self.init_period - 1) # for entity in input_dataset: # indexed_array = buildArrayForPeriod(entity) # tell numpy we do not want warnings for x/0 and 0/0 np.seterr(divide='ignore', invalid='ignore') process_time = defaultdict(float) period_objects = {} eval_ctx = EvaluationContext(self, self.entities_map, globals_data) def simulate_period(period_idx, period, periods, processes, entities, init=False): period_start_time = time.time() # set current period eval_ctx.period = period if config.log_level in ("procedures", "processes"): print() print("period", period, end=" " if config.log_level == "periods" else "\n") if init and config.log_level in ("procedures", "processes"): for entity in entities: print(" * %s: %d individuals" % (entity.name, len(entity.array))) else: if config.log_level in ("procedures", "processes"): print("- loading input data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.load_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.load_period_data(period) for entity in entities: entity.array_period = period entity.array['period'] = period if processes: # build context for this period: const_dict = {'period_idx': period_idx + 1, 'periods': periods, 'periodicity': time_period[self.time_scale] * (1 - 2 * (self.retro)), 'longitudinal': self.longitudinal, 'format_date': self.time_scale, 'pension': None, '__simulation__': self, 'period': period, 'nan': float('nan'), '__globals__': globals_data} assert(periods[period_idx + 1] == period) num_processes = len(processes) for p_num, process_def in enumerate(processes, start=1): process, periodicity, start = process_def if config.log_level in ("procedures", "processes"): print("- %d/%d" % (p_num, num_processes), process.name, end=' ') print("...", end=' ') # TDOD: change that if isinstance(periodicity, int): if period_idx % periodicity == 0: elapsed, _ = gettime(process.run_guarded, self, const_dict) else: elapsed = 0 print("skipped (periodicity)") else: assert periodicity in time_period periodicity_process = time_period[periodicity] periodicity_simul = time_period[self.time_scale] month_idx = period % 100 # first condition, to run a process with start == 12 # each year even if year are yyyy01 # modify start if periodicity_simul is not month start = int(start / periodicity_simul - 0.01) * periodicity_simul + 1 if (periodicity_process <= periodicity_simul and self.time_scale != 'year0') or ( month_idx % periodicity_process == start % periodicity_process): const_dict['periodicity'] = periodicity_process * (1 - 2 * (self.retro)) elapsed, _ = gettime(process.run_guarded, self, const_dict) else: elapsed = 0 if config.log_level in ("procedures", "processes"): print("skipped (periodicity)") process_time[process.name] += elapsed if config.log_level in ("procedures", "processes"): if config.show_timings: print("done (%s elapsed)." % time2str(elapsed)) else: print("done.") self.start_console(eval_ctx) # update longitudinal person = [x for x in entities if x.name == 'person'][0] # maybe we have a get_entity or anything more nice than that #TODO: check id = person.array.columns['id'] for varname in ['sali', 'workstate']: var = person.array.columns[varname] if init: fpath = self.data_source.input_path input_file = HDFStore(fpath, mode="r") if 'longitudinal' in input_file.root: input_longitudinal = input_file.root.longitudinal if varname in input_longitudinal: self.longitudinal[varname] = input_file['/longitudinal/' + varname] if period not in self.longitudinal[varname].columns: table = DataFrame({'id': id, period: var}) self.longitudinal[varname] = self.longitudinal[varname].merge( table, on='id', how='outer') else: # when one variable is not in the input_file self.longitudinal[varname] = DataFrame({'id': id, period: var}) else: # when there is no longitudinal in the dataset self.longitudinal[varname] = DataFrame({'id': id, period: var}) else: table = DataFrame({'id': id, period: var}) if period in self.longitudinal[varname]: import pdb pdb.set_trace() self.longitudinal[varname] = self.longitudinal[varname].merge(table, on='id', how='outer') if config.log_level in ("procedures", "processes"): print("- storing period data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.store_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.store_period_data(period) # print " - compressing period data" # for entity in entities: # print " *", entity.name, "...", # for level in range(1, 10, 2): # print " %d:" % level, # timed(entity.compress_period_data, level) period_objects[period] = sum(len(entity.array) for entity in entities) period_elapsed_time = time.time() - period_start_time if config.log_level in ("procedures", "processes"): print("period %d" % period, end=' ') print("done", end=' ') if config.show_timings: print("(%s elapsed)" % time2str(period_elapsed_time), end="") if init: print(".") else: main_elapsed_time = time.time() - main_start_time periods_done = period_idx + 1 remaining_periods = self.periods - periods_done avg_time = main_elapsed_time / periods_done # future_time = period_elapsed_time * 0.4 + avg_time * 0.6 remaining_time = avg_time * remaining_periods print(" - estimated remaining time: %s." % time2str(remaining_time)) else: print() print(""" ===================== starting simulation =====================""") try: assert(self.time_scale in time_period) month_periodicity = time_period[self.time_scale] time_direction = 1 - 2 * (self.retro) time_step = month_periodicity * time_direction periods = [ self.init_period + int(t / 12) * 100 + t % 12 for t in range(0, (self.periods + 1) * time_step, time_step) ] if self.time_scale == 'year0': periods = [self.init_period + t for t in range(0, (self.periods + 1))] print("simulated period are going to be: ", periods) init_start_time = time.time() simulate_period(0, self.init_period, [None, periods[0]], self.init_processes, self.entities, init=True) time_init = time.time() - init_start_time main_start_time = time.time() for period_idx, period in enumerate(periods[1:]): period_start_time = time.time() simulate_period(period_idx, period, periods, self.processes, self.entities) # if self.legislation: # if not self.legislation['ex_post']: # # elapsed, _ = gettime(liam2of.main,period) # process_time['liam2of'] += elapsed # elapsed, _ = gettime(of_on_liam.main,self.legislation['annee'],[period]) # process_time['legislation'] += elapsed # elapsed, _ = gettime(merge_leg.merge_h5,self.data_source.output_path, # "C:/Til/output/"+"simul_leg.h5",period) # process_time['merge_leg'] += elapsed time_elapsed = time.time() - period_start_time print("period %d done" % period, end=' ') if config.show_timings: print("(%s elapsed)." % time2str(time_elapsed)) else: print() total_objects = sum(period_objects[period] for period in periods) total_time = time.time() - main_start_time # if self.legislation: # if self.legislation['ex_post']: # # elapsed, _ = gettime(liam2of.main) # process_time['liam2of'] += elapsed # elapsed, _ = gettime(of_on_liam.main,self.legislation['annee']) # process_time['legislation'] += elapsed # # TODO: faire un programme a part, so far ca ne marche pas pour l'ensemble # # adapter n'est pas si facile, comme on veut economiser une table, # # on ne peut pas faire de append directement parce qu on met 2010 apres 2011 # # a un moment dans le calcul # elapsed, _ = gettime(merge_leg.merge_h5,self.data_source.output_path, # "C:/Til/output/"+"simul_leg.h5",None) # process_time['merge_leg'] += elapsed if self.final_stat: elapsed, _ = gettime(start, period) process_time['Stat'] += elapsed total_time = time.time() - main_start_time time_year = 0 if len(periods) > 1: nb_year_approx = periods[-1] / 100 - periods[1] / 100 if nb_year_approx > 0: time_year = total_time / nb_year_approx try: ind_per_sec = str(int(total_objects / total_time)) except ZeroDivisionError: ind_per_sec = 'inf' print(""" ========================================== simulation done ========================================== * %s elapsed * %d individuals on average * %s individuals/s/period on average * %s second for init_process * %s time/period in average * %s time/year in average ========================================== """ % ( time2str(time.time() - start_time), total_objects / self.periods, ind_per_sec, time2str(time_init), time2str(total_time / self.periods), time2str(time_year)) ) show_top_processes(process_time, 10) # if config.debug: # show_top_expr() if run_console: console_ctx = eval_ctx.clone(entity_name=self.default_entity) c = console.Console(console_ctx) c.run() finally: if h5in is not None: h5in.close() h5out.close() if h5_autodump is not None: h5_autodump.close()
def run_single(self, run_console=False, run_num=None): start_time = time.time() input_dataset = timed(self.data_source.load, self.globals_def, self.entities_map) globals_data = input_dataset.get('globals') timed(self.data_sink.prepare, self.globals_def, self.entities_map, input_dataset, self.start_period - 1) print(" * building arrays for first simulated period") for ent_name, entity in self.entities_map.iteritems(): print(" -", ent_name, "...", end=' ') # TODO: this whole process of merging all periods is very # opinionated and does not allow individuals to die/disappear # before the simulation starts. We couldn't for example, # take the output of one of our simulation and # re-simulate only some years in the middle, because the dead # would be brought back to life. In conclusion, it should be # optional. timed(entity.build_period_array, self.start_period - 1) print("done.") if config.autodump or config.autodiff: if config.autodump: fname, _ = config.autodump mode = 'w' else: # config.autodiff fname, _ = config.autodiff mode = 'r' fpath = os.path.join(config.output_directory, fname) h5_autodump = tables.open_file(fpath, mode=mode) config.autodump_file = h5_autodump else: h5_autodump = None # tell numpy we do not want warnings for x/0 and 0/0 np.seterr(divide='ignore', invalid='ignore') process_time = defaultdict(float) period_objects = {} eval_ctx = EvaluationContext(self, self.entities_map, globals_data) def simulate_period(period_idx, period, processes, entities, init=False): period_start_time = time.time() # set current period eval_ctx.period = period if config.log_level in ("functions", "processes"): print() print("period", period, end=" " if config.log_level == "periods" else "\n") if init and config.log_level in ("functions", "processes"): for entity in entities: print(" * %s: %d individuals" % (entity.name, len(entity.array))) else: if config.log_level in ("functions", "processes"): print("- loading input data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.load_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.load_period_data(period) for entity in entities: entity.array_period = period entity.array['period'] = period if processes: num_processes = len(processes) for p_num, process_def in enumerate(processes, start=1): process, periodicity = process_def # set current entity eval_ctx.entity_name = process.entity.name if config.log_level in ("functions", "processes"): print("- %d/%d" % (p_num, num_processes), process.name, end=' ') print("...", end=' ') if period_idx % periodicity == 0: elapsed, _ = gettime(process.run_guarded, eval_ctx) else: elapsed = 0 if config.log_level in ("functions", "processes"): print("skipped (periodicity)") process_time[process.name] += elapsed if config.log_level in ("functions", "processes"): if config.show_timings: print("done (%s elapsed)." % time2str(elapsed)) else: print("done.") self.start_console(eval_ctx) if config.log_level in ("functions", "processes"): print("- storing period data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.store_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.store_period_data(period) # print " - compressing period data" # for entity in entities: # print " *", entity.name, "...", # for level in range(1, 10, 2): # print " %d:" % level, # timed(entity.compress_period_data, level) period_objects[period] = sum(len(entity.array) for entity in entities) period_elapsed_time = time.time() - period_start_time if config.log_level in ("functions", "processes"): print("period %d" % period, end=' ') print("done", end=' ') if config.show_timings: print("(%s elapsed)" % time2str(period_elapsed_time), end="") if init: print(".") else: main_elapsed_time = time.time() - main_start_time periods_done = period_idx + 1 remaining_periods = self.periods - periods_done avg_time = main_elapsed_time / periods_done # future_time = period_elapsed_time * 0.4 + avg_time * 0.6 remaining_time = avg_time * remaining_periods print(" - estimated remaining time: %s." % time2str(remaining_time)) else: print() print(""" ===================== starting simulation =====================""") try: simulate_period(0, self.start_period - 1, self.init_processes, self.entities, init=True) main_start_time = time.time() periods = range(self.start_period, self.start_period + self.periods) for period_idx, period in enumerate(periods): simulate_period(period_idx, period, self.processes, self.entities) total_objects = sum(period_objects[period] for period in periods) avg_objects = str(total_objects // self.periods) \ if self.periods else 'N/A' main_elapsed_time = time.time() - main_start_time ind_per_sec = str(int(total_objects / main_elapsed_time)) \ if main_elapsed_time else 'inf' print(""" ========================================== simulation done ========================================== * %s elapsed * %s individuals on average * %s individuals/s/period on average ========================================== """ % (time2str(time.time() - start_time), avg_objects, ind_per_sec)) show_top_processes(process_time, 10) # if config.debug: # show_top_expr() if run_console: ent_name = self.default_entity if ent_name is None and len(eval_ctx.entities) == 1: ent_name = eval_ctx.entities.keys()[0] # FIXME: fresh_data prevents the old (cloned) EvaluationContext # to be referenced from each EntityContext, which lead to period # being fixed to the last period of the simulation. This should # be fixed in EvaluationContext.copy but the proper fix breaks # stuff (see the comments there) console_ctx = eval_ctx.clone(fresh_data=True, entity_name=ent_name) c = console.Console(console_ctx) c.run() finally: self.close() if h5_autodump is not None: h5_autodump.close() if self.minimal_output: output_path = self.data_sink.output_path dirname = os.path.dirname(output_path) try: os.remove(output_path) os.rmdir(dirname) except OSError: print("WARNING: could not delete temporary directory: %r" % dirname)
def run(self, run_console=False): start_time = time.time() h5in, h5out, globals_data = timed(self.data_source.run, self.globals_def, self.entities_map, self.start_period - 1) if config.autodump or config.autodiff: if config.autodump: fname, _ = config.autodump mode = 'w' else: # config.autodiff fname, _ = config.autodiff mode = 'r' fpath = os.path.join(config.output_directory, fname) h5_autodump = tables.open_file(fpath, mode=mode) config.autodump_file = h5_autodump else: h5_autodump = None # input_dataset = self.data_source.run(self.globals_def, # entity_registry) # output_dataset = self.data_sink.prepare(self.globals_def, # entity_registry) # output_dataset.copy(input_dataset, self.start_period - 1) # for entity in input_dataset: # indexed_array = build_period_array(entity) # tell numpy we do not want warnings for x/0 and 0/0 np.seterr(divide='ignore', invalid='ignore') process_time = defaultdict(float) period_objects = {} eval_ctx = EvaluationContext(self, self.entities_map, globals_data) def simulate_period(period_idx, period, processes, entities, init=False): period_start_time = time.time() # set current period eval_ctx.period = period if config.log_level in ("procedures", "processes"): print() print("period", period, end=" " if config.log_level == "periods" else "\n") if init and config.log_level in ("procedures", "processes"): for entity in entities: print(" * %s: %d individuals" % (entity.name, len(entity.array))) else: if config.log_level in ("procedures", "processes"): print("- loading input data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.load_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.load_period_data(period) for entity in entities: entity.array_period = period entity.array['period'] = period if processes: num_processes = len(processes) for p_num, process_def in enumerate(processes, start=1): process, periodicity = process_def # set current entity eval_ctx.entity_name = process.entity.name if config.log_level in ("procedures", "processes"): print("- %d/%d" % (p_num, num_processes), process.name, end=' ') print("...", end=' ') if period_idx % periodicity == 0: elapsed, _ = gettime(process.run_guarded, eval_ctx) else: elapsed = 0 if config.log_level in ("procedures", "processes"): print("skipped (periodicity)") process_time[process.name] += elapsed if config.log_level in ("procedures", "processes"): if config.show_timings: print("done (%s elapsed)." % time2str(elapsed)) else: print("done.") self.start_console(eval_ctx) if config.log_level in ("procedures", "processes"): print("- storing period data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.store_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.store_period_data(period) # print " - compressing period data" # for entity in entities: # print " *", entity.name, "...", # for level in range(1, 10, 2): # print " %d:" % level, # timed(entity.compress_period_data, level) period_objects[period] = sum(len(entity.array) for entity in entities) period_elapsed_time = time.time() - period_start_time if config.log_level in ("procedures", "processes"): print("period %d" % period, end=' ') print("done", end=' ') if config.show_timings: print("(%s elapsed)" % time2str(period_elapsed_time), end="") if init: print(".") else: main_elapsed_time = time.time() - main_start_time periods_done = period_idx + 1 remaining_periods = self.periods - periods_done avg_time = main_elapsed_time / periods_done # future_time = period_elapsed_time * 0.4 + avg_time * 0.6 remaining_time = avg_time * remaining_periods print(" - estimated remaining time: %s." % time2str(remaining_time)) else: print() print(""" ===================== starting simulation =====================""") try: simulate_period(0, self.start_period - 1, self.init_processes, self.entities, init=True) main_start_time = time.time() periods = range(self.start_period, self.start_period + self.periods) for period_idx, period in enumerate(periods): simulate_period(period_idx, period, self.processes, self.entities) total_objects = sum(period_objects[period] for period in periods) total_time = time.time() - main_start_time try: ind_per_sec = str(int(total_objects / total_time)) except ZeroDivisionError: ind_per_sec = 'inf' print(""" ========================================== simulation done ========================================== * %s elapsed * %d individuals on average * %s individuals/s/period on average ========================================== """ % (time2str(time.time() - start_time), total_objects / self.periods, ind_per_sec)) show_top_processes(process_time, 10) # if config.debug: # show_top_expr() if run_console: console_ctx = eval_ctx.clone(entity_name=self.default_entity) c = console.Console(console_ctx) c.run() finally: if h5in is not None: h5in.close() h5out.close() if h5_autodump is not None: h5_autodump.close()