def explore(fpath): _, ext = splitext(fpath) ftype = 'data' if ext in ('.h5', '.hdf5') else 'simulation' print("Using {} file: '{}'".format(ftype, fpath)) if ftype == 'data': globals_def, entities = entities_from_h5(fpath) simulation = Simulation(globals_def, None, None, None, None, entities.values(), 'h5', fpath, None) period, entity_name = None, None else: simulation = Simulation.from_yaml(fpath) # use output as input simulation.data_source = H5Source(simulation.data_sink.output_path) period = simulation.start_period + simulation.periods - 1 entity_name = simulation.default_entity dataset = simulation.load() data_source = simulation.data_source data_source.as_fake_output(dataset, simulation.entities_map) data_sink = simulation.data_sink entities = simulation.entities_map if entity_name is None and len(entities) == 1: entity_name = entities.keys()[0] if period is None and entity_name is not None: entity = entities[entity_name] period = max(entity.output_index.keys()) eval_ctx = EvaluationContext(simulation, entities, dataset['globals'], period, entity_name) try: c = Console(eval_ctx) c.run() finally: data_source.close() if data_sink is not None: data_sink.close()
def setUp(self): data = {'person': {'age': array([20, 10, 35, 55]), 'dead': array([False, True, False, True])}} self.eval_ctx = EvaluationContext(entity_name='person', entities_data=data) self.parse_ctx = { 'person': {'age': Variable('age'), 'dead': Variable('dead')}, '__entity__': 'person' }
def setUp(self): entities = {} hh_link = links.Many2One('household', 'hh_id', 'household') mother_link = links.Many2One('mother', 'mother_id', 'person') child_link = links.One2Many('children', 'mother_id', 'person') persons_link = links.One2Many('persons', 'hh_id', 'person') dt = np.dtype([('period', int), ('id', int), ('age', int), ('dead', bool), ('mother_id', int), ('hh_id', int)]) # TODO: I can't use an EntityContext with an array containing several periods # of data # persons = array([(2000, 0, 53, False, -1, 0), # (2000, 1, 23, False, 0, 1), # (2000, 2, 20, False, 0, 2), # (2000, 3, 43, False, -1, 3), # (2001, 0, 54, True, -1, 0), # (2001, 1, 24, False, 0, 1), # (2001, 2, 21, False, 0, 2), # (2001, 3, 44, False, -1, 0), # they got married # (2001, 4, 0, False, 2, 2), persons = array([(2002, 0, 55, True, -1, 0), (2002, 1, 25, False, 0, 1), (2002, 2, 22, False, 0, 2), (2002, 3, 45, False, -1, 0), (2002, 4, 1, False, 2, 2)], dtype=dt) person = Entity('person', links={'household': hh_link, 'mother': mother_link, 'children': child_link}, array=persons) dt = np.dtype([('period', int), ('id', int)]) # households = array([(2000, 0), # (2000, 1), # (2000, 2), # (2000, 3), # # (2001, 0), # (2001, 1), # (2001, 2), households = array([(2002, 0), (2002, 1), (2002, 2)], dtype=dt) household = Entity('household', links={'persons': persons_link}, array=households) entities['person'] = person entities['household'] = household self.entities = entities parse_ctx = {'__globals__': {}, '__entities__': entities, '__entity__': 'person'} parse_ctx.update((entity.name, entity.all_symbols(parse_ctx)) for entity in entities.itervalues()) self.parse_ctx = parse_ctx self.eval_ctx = EvaluationContext(entities=entities, period=2002, entity_name='person')
def run_single(self, run_console=False, run_num=None): start_time = time.time() input_dataset = timed(self.data_source.load, self.globals_def, self.entities_map) globals_data = input_dataset.get('globals') timed(self.data_sink.prepare, self.globals_def, self.entities_map, input_dataset, self.start_period - 1) print(" * building arrays for first simulated period") for ent_name, entity in self.entities_map.iteritems(): print(" -", ent_name, "...", end=' ') # TODO: this whole process of merging all periods is very # opinionated and does not allow individuals to die/disappear # before the simulation starts. We couldn't for example, # take the output of one of our simulation and # re-simulate only some years in the middle, because the dead # would be brought back to life. In conclusion, it should be # optional. timed(entity.build_period_array, self.start_period - 1) print("done.") if config.autodump or config.autodiff: if config.autodump: fname, _ = config.autodump mode = 'w' else: # config.autodiff fname, _ = config.autodiff mode = 'r' fpath = os.path.join(config.output_directory, fname) h5_autodump = tables.open_file(fpath, mode=mode) config.autodump_file = h5_autodump else: h5_autodump = None # tell numpy we do not want warnings for x/0 and 0/0 np.seterr(divide='ignore', invalid='ignore') process_time = defaultdict(float) period_objects = {} eval_ctx = EvaluationContext(self, self.entities_map, globals_data) def simulate_period(period_idx, period, processes, entities, init=False): period_start_time = time.time() # set current period eval_ctx.period = period if config.log_level in ("functions", "processes"): print() print("period", period, end=" " if config.log_level == "periods" else "\n") if init and config.log_level in ("functions", "processes"): for entity in entities: print(" * %s: %d individuals" % (entity.name, len(entity.array))) else: if config.log_level in ("functions", "processes"): print("- loading input data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.load_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.load_period_data(period) for entity in entities: entity.array_period = period entity.array['period'] = period if processes: num_processes = len(processes) for p_num, process_def in enumerate(processes, start=1): process, periodicity = process_def # set current entity eval_ctx.entity_name = process.entity.name if config.log_level in ("functions", "processes"): print("- %d/%d" % (p_num, num_processes), process.name, end=' ') print("...", end=' ') if period_idx % periodicity == 0: elapsed, _ = gettime(process.run_guarded, eval_ctx) else: elapsed = 0 if config.log_level in ("functions", "processes"): print("skipped (periodicity)") process_time[process.name] += elapsed if config.log_level in ("functions", "processes"): if config.show_timings: print("done (%s elapsed)." % time2str(elapsed)) else: print("done.") self.start_console(eval_ctx) if config.log_level in ("functions", "processes"): print("- storing period data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.store_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.store_period_data(period) # print " - compressing period data" # for entity in entities: # print " *", entity.name, "...", # for level in range(1, 10, 2): # print " %d:" % level, # timed(entity.compress_period_data, level) period_objects[period] = sum(len(entity.array) for entity in entities) period_elapsed_time = time.time() - period_start_time if config.log_level in ("functions", "processes"): print("period %d" % period, end=' ') print("done", end=' ') if config.show_timings: print("(%s elapsed)" % time2str(period_elapsed_time), end="") if init: print(".") else: main_elapsed_time = time.time() - main_start_time periods_done = period_idx + 1 remaining_periods = self.periods - periods_done avg_time = main_elapsed_time / periods_done # future_time = period_elapsed_time * 0.4 + avg_time * 0.6 remaining_time = avg_time * remaining_periods print(" - estimated remaining time: %s." % time2str(remaining_time)) else: print() print(""" ===================== starting simulation =====================""") try: simulate_period(0, self.start_period - 1, self.init_processes, self.entities, init=True) main_start_time = time.time() periods = range(self.start_period, self.start_period + self.periods) for period_idx, period in enumerate(periods): simulate_period(period_idx, period, self.processes, self.entities) total_objects = sum(period_objects[period] for period in periods) avg_objects = str(total_objects // self.periods) \ if self.periods else 'N/A' main_elapsed_time = time.time() - main_start_time ind_per_sec = str(int(total_objects / main_elapsed_time)) \ if main_elapsed_time else 'inf' print(""" ========================================== simulation done ========================================== * %s elapsed * %s individuals on average * %s individuals/s/period on average ========================================== """ % (time2str(time.time() - start_time), avg_objects, ind_per_sec)) show_top_processes(process_time, 10) # if config.debug: # show_top_expr() if run_console: ent_name = self.default_entity if ent_name is None and len(eval_ctx.entities) == 1: ent_name = eval_ctx.entities.keys()[0] # FIXME: fresh_data prevents the old (cloned) EvaluationContext # to be referenced from each EntityContext, which lead to period # being fixed to the last period of the simulation. This should # be fixed in EvaluationContext.copy but the proper fix breaks # stuff (see the comments there) console_ctx = eval_ctx.clone(fresh_data=True, entity_name=ent_name) c = console.Console(console_ctx) c.run() finally: self.close() if h5_autodump is not None: h5_autodump.close() if self.minimal_output: output_path = self.data_sink.output_path dirname = os.path.dirname(output_path) try: os.remove(output_path) os.rmdir(dirname) except OSError: print("WARNING: could not delete temporary directory: %r" % dirname)