def run_single(self, run_console=False, run_num=None): start_time = time.time() input_dataset = timed(self.data_source.load, self.globals_def, self.entities_map) globals_data = input_dataset.get('globals') timed(self.data_sink.prepare, self.globals_def, self.entities_map, input_dataset, self.start_period - 1) print(" * building arrays for first simulated period") for ent_name, entity in self.entities_map.iteritems(): print(" -", ent_name, "...", end=' ') # TODO: this whole process of merging all periods is very # opinionated and does not allow individuals to die/disappear # before the simulation starts. We couldn't for example, # take the output of one of our simulation and # re-simulate only some years in the middle, because the dead # would be brought back to life. In conclusion, it should be # optional. timed(entity.build_period_array, self.start_period - 1) print("done.") if config.autodump or config.autodiff: if config.autodump: fname, _ = config.autodump mode = 'w' else: # config.autodiff fname, _ = config.autodiff mode = 'r' fpath = os.path.join(config.output_directory, fname) h5_autodump = tables.open_file(fpath, mode=mode) config.autodump_file = h5_autodump else: h5_autodump = None # tell numpy we do not want warnings for x/0 and 0/0 np.seterr(divide='ignore', invalid='ignore') process_time = defaultdict(float) period_objects = {} eval_ctx = EvaluationContext(self, self.entities_map, globals_data) def simulate_period(period_idx, period, processes, entities, init=False): period_start_time = time.time() # set current period eval_ctx.period = period if config.log_level in ("functions", "processes"): print() print("period", period, end=" " if config.log_level == "periods" else "\n") if init and config.log_level in ("functions", "processes"): for entity in entities: print(" * %s: %d individuals" % (entity.name, len(entity.array))) else: if config.log_level in ("functions", "processes"): print("- loading input data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.load_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.load_period_data(period) for entity in entities: entity.array_period = period entity.array['period'] = period if processes: num_processes = len(processes) for p_num, process_def in enumerate(processes, start=1): process, periodicity = process_def # set current entity eval_ctx.entity_name = process.entity.name if config.log_level in ("functions", "processes"): print("- %d/%d" % (p_num, num_processes), process.name, end=' ') print("...", end=' ') if period_idx % periodicity == 0: elapsed, _ = gettime(process.run_guarded, eval_ctx) else: elapsed = 0 if config.log_level in ("functions", "processes"): print("skipped (periodicity)") process_time[process.name] += elapsed if config.log_level in ("functions", "processes"): if config.show_timings: print("done (%s elapsed)." % time2str(elapsed)) else: print("done.") self.start_console(eval_ctx) if config.log_level in ("functions", "processes"): print("- storing period data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.store_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.store_period_data(period) # print " - compressing period data" # for entity in entities: # print " *", entity.name, "...", # for level in range(1, 10, 2): # print " %d:" % level, # timed(entity.compress_period_data, level) period_objects[period] = sum(len(entity.array) for entity in entities) period_elapsed_time = time.time() - period_start_time if config.log_level in ("functions", "processes"): print("period %d" % period, end=' ') print("done", end=' ') if config.show_timings: print("(%s elapsed)" % time2str(period_elapsed_time), end="") if init: print(".") else: main_elapsed_time = time.time() - main_start_time periods_done = period_idx + 1 remaining_periods = self.periods - periods_done avg_time = main_elapsed_time / periods_done # future_time = period_elapsed_time * 0.4 + avg_time * 0.6 remaining_time = avg_time * remaining_periods print(" - estimated remaining time: %s." % time2str(remaining_time)) else: print() print(""" ===================== starting simulation =====================""") try: simulate_period(0, self.start_period - 1, self.init_processes, self.entities, init=True) main_start_time = time.time() periods = range(self.start_period, self.start_period + self.periods) for period_idx, period in enumerate(periods): simulate_period(period_idx, period, self.processes, self.entities) total_objects = sum(period_objects[period] for period in periods) avg_objects = str(total_objects // self.periods) \ if self.periods else 'N/A' main_elapsed_time = time.time() - main_start_time ind_per_sec = str(int(total_objects / main_elapsed_time)) \ if main_elapsed_time else 'inf' print(""" ========================================== simulation done ========================================== * %s elapsed * %s individuals on average * %s individuals/s/period on average ========================================== """ % (time2str(time.time() - start_time), avg_objects, ind_per_sec)) show_top_processes(process_time, 10) # if config.debug: # show_top_expr() if run_console: ent_name = self.default_entity if ent_name is None and len(eval_ctx.entities) == 1: ent_name = eval_ctx.entities.keys()[0] # FIXME: fresh_data prevents the old (cloned) EvaluationContext # to be referenced from each EntityContext, which lead to period # being fixed to the last period of the simulation. This should # be fixed in EvaluationContext.copy but the proper fix breaks # stuff (see the comments there) console_ctx = eval_ctx.clone(fresh_data=True, entity_name=ent_name) c = console.Console(console_ctx) c.run() finally: self.close() if h5_autodump is not None: h5_autodump.close() if self.minimal_output: output_path = self.data_sink.output_path dirname = os.path.dirname(output_path) try: os.remove(output_path) os.rmdir(dirname) except OSError: print("WARNING: could not delete temporary directory: %r" % dirname)
def run(self, run_console=False): start_time = time.time() h5in, h5out, globals_data = timed(self.data_source.run, self.globals_def, entity_registry, self.init_period) if config.autodump or config.autodiff: if config.autodump: fname, _ = config.autodump mode = 'w' else: # config.autodiff fname, _ = config.autodiff mode = 'r' fpath = os.path.join(config.output_directory, fname) h5_autodump = tables.open_file(fpath, mode=mode) config.autodump_file = h5_autodump else: h5_autodump = None # input_dataset = self.data_source.run(self.globals_def, # entity_registry) # output_dataset = self.data_sink.prepare(self.globals_def, # entity_registry) # output_dataset.copy(input_dataset, self.init_period - 1) # for entity in input_dataset: # indexed_array = buildArrayForPeriod(entity) # tell numpy we do not want warnings for x/0 and 0/0 np.seterr(divide='ignore', invalid='ignore') process_time = defaultdict(float) period_objects = {} eval_ctx = EvaluationContext(self, self.entities_map, globals_data) def simulate_period(period_idx, period, periods, processes, entities, init=False): period_start_time = time.time() # set current period eval_ctx.period = period if config.log_level in ("procedures", "processes"): print() print("period", period, end=" " if config.log_level == "periods" else "\n") if init and config.log_level in ("procedures", "processes"): for entity in entities: print(" * %s: %d individuals" % (entity.name, len(entity.array))) else: if config.log_level in ("procedures", "processes"): print("- loading input data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.load_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.load_period_data(period) for entity in entities: entity.array_period = period entity.array['period'] = period if processes: # build context for this period: const_dict = {'period_idx': period_idx + 1, 'periods': periods, 'periodicity': time_period[self.time_scale] * (1 - 2 * (self.retro)), 'longitudinal': self.longitudinal, 'format_date': self.time_scale, 'pension': None, '__simulation__': self, 'period': period, 'nan': float('nan'), '__globals__': globals_data} assert(periods[period_idx + 1] == period) num_processes = len(processes) for p_num, process_def in enumerate(processes, start=1): process, periodicity, start = process_def if config.log_level in ("procedures", "processes"): print("- %d/%d" % (p_num, num_processes), process.name, end=' ') print("...", end=' ') # TDOD: change that if isinstance(periodicity, int): if period_idx % periodicity == 0: elapsed, _ = gettime(process.run_guarded, self, const_dict) else: elapsed = 0 print("skipped (periodicity)") else: assert periodicity in time_period periodicity_process = time_period[periodicity] periodicity_simul = time_period[self.time_scale] month_idx = period % 100 # first condition, to run a process with start == 12 # each year even if year are yyyy01 # modify start if periodicity_simul is not month start = int(start / periodicity_simul - 0.01) * periodicity_simul + 1 if (periodicity_process <= periodicity_simul and self.time_scale != 'year0') or ( month_idx % periodicity_process == start % periodicity_process): const_dict['periodicity'] = periodicity_process * (1 - 2 * (self.retro)) elapsed, _ = gettime(process.run_guarded, self, const_dict) else: elapsed = 0 if config.log_level in ("procedures", "processes"): print("skipped (periodicity)") process_time[process.name] += elapsed if config.log_level in ("procedures", "processes"): if config.show_timings: print("done (%s elapsed)." % time2str(elapsed)) else: print("done.") self.start_console(eval_ctx) # update longitudinal person = [x for x in entities if x.name == 'person'][0] # maybe we have a get_entity or anything more nice than that #TODO: check id = person.array.columns['id'] for varname in ['sali', 'workstate']: var = person.array.columns[varname] if init: fpath = self.data_source.input_path input_file = HDFStore(fpath, mode="r") if 'longitudinal' in input_file.root: input_longitudinal = input_file.root.longitudinal if varname in input_longitudinal: self.longitudinal[varname] = input_file['/longitudinal/' + varname] if period not in self.longitudinal[varname].columns: table = DataFrame({'id': id, period: var}) self.longitudinal[varname] = self.longitudinal[varname].merge( table, on='id', how='outer') else: # when one variable is not in the input_file self.longitudinal[varname] = DataFrame({'id': id, period: var}) else: # when there is no longitudinal in the dataset self.longitudinal[varname] = DataFrame({'id': id, period: var}) else: table = DataFrame({'id': id, period: var}) if period in self.longitudinal[varname]: import pdb pdb.set_trace() self.longitudinal[varname] = self.longitudinal[varname].merge(table, on='id', how='outer') if config.log_level in ("procedures", "processes"): print("- storing period data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.store_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.store_period_data(period) # print " - compressing period data" # for entity in entities: # print " *", entity.name, "...", # for level in range(1, 10, 2): # print " %d:" % level, # timed(entity.compress_period_data, level) period_objects[period] = sum(len(entity.array) for entity in entities) period_elapsed_time = time.time() - period_start_time if config.log_level in ("procedures", "processes"): print("period %d" % period, end=' ') print("done", end=' ') if config.show_timings: print("(%s elapsed)" % time2str(period_elapsed_time), end="") if init: print(".") else: main_elapsed_time = time.time() - main_start_time periods_done = period_idx + 1 remaining_periods = self.periods - periods_done avg_time = main_elapsed_time / periods_done # future_time = period_elapsed_time * 0.4 + avg_time * 0.6 remaining_time = avg_time * remaining_periods print(" - estimated remaining time: %s." % time2str(remaining_time)) else: print() print(""" ===================== starting simulation =====================""") try: assert(self.time_scale in time_period) month_periodicity = time_period[self.time_scale] time_direction = 1 - 2 * (self.retro) time_step = month_periodicity * time_direction periods = [ self.init_period + int(t / 12) * 100 + t % 12 for t in range(0, (self.periods + 1) * time_step, time_step) ] if self.time_scale == 'year0': periods = [self.init_period + t for t in range(0, (self.periods + 1))] print("simulated period are going to be: ", periods) init_start_time = time.time() simulate_period(0, self.init_period, [None, periods[0]], self.init_processes, self.entities, init=True) time_init = time.time() - init_start_time main_start_time = time.time() for period_idx, period in enumerate(periods[1:]): period_start_time = time.time() simulate_period(period_idx, period, periods, self.processes, self.entities) # if self.legislation: # if not self.legislation['ex_post']: # # elapsed, _ = gettime(liam2of.main,period) # process_time['liam2of'] += elapsed # elapsed, _ = gettime(of_on_liam.main,self.legislation['annee'],[period]) # process_time['legislation'] += elapsed # elapsed, _ = gettime(merge_leg.merge_h5,self.data_source.output_path, # "C:/Til/output/"+"simul_leg.h5",period) # process_time['merge_leg'] += elapsed time_elapsed = time.time() - period_start_time print("period %d done" % period, end=' ') if config.show_timings: print("(%s elapsed)." % time2str(time_elapsed)) else: print() total_objects = sum(period_objects[period] for period in periods) total_time = time.time() - main_start_time # if self.legislation: # if self.legislation['ex_post']: # # elapsed, _ = gettime(liam2of.main) # process_time['liam2of'] += elapsed # elapsed, _ = gettime(of_on_liam.main,self.legislation['annee']) # process_time['legislation'] += elapsed # # TODO: faire un programme a part, so far ca ne marche pas pour l'ensemble # # adapter n'est pas si facile, comme on veut economiser une table, # # on ne peut pas faire de append directement parce qu on met 2010 apres 2011 # # a un moment dans le calcul # elapsed, _ = gettime(merge_leg.merge_h5,self.data_source.output_path, # "C:/Til/output/"+"simul_leg.h5",None) # process_time['merge_leg'] += elapsed if self.final_stat: elapsed, _ = gettime(start, period) process_time['Stat'] += elapsed total_time = time.time() - main_start_time time_year = 0 if len(periods) > 1: nb_year_approx = periods[-1] / 100 - periods[1] / 100 if nb_year_approx > 0: time_year = total_time / nb_year_approx try: ind_per_sec = str(int(total_objects / total_time)) except ZeroDivisionError: ind_per_sec = 'inf' print(""" ========================================== simulation done ========================================== * %s elapsed * %d individuals on average * %s individuals/s/period on average * %s second for init_process * %s time/period in average * %s time/year in average ========================================== """ % ( time2str(time.time() - start_time), total_objects / self.periods, ind_per_sec, time2str(time_init), time2str(total_time / self.periods), time2str(time_year)) ) show_top_processes(process_time, 10) # if config.debug: # show_top_expr() if run_console: console_ctx = eval_ctx.clone(entity_name=self.default_entity) c = console.Console(console_ctx) c.run() finally: if h5in is not None: h5in.close() h5out.close() if h5_autodump is not None: h5_autodump.close()
def run(self, run_console=False): start_time = time.time() h5in, h5out, globals_data = timed(self.data_source.run, self.globals_def, self.entities_map, self.start_period - 1) if config.autodump or config.autodiff: if config.autodump: fname, _ = config.autodump mode = 'w' else: # config.autodiff fname, _ = config.autodiff mode = 'r' fpath = os.path.join(config.output_directory, fname) h5_autodump = tables.open_file(fpath, mode=mode) config.autodump_file = h5_autodump else: h5_autodump = None # input_dataset = self.data_source.run(self.globals_def, # entity_registry) # output_dataset = self.data_sink.prepare(self.globals_def, # entity_registry) # output_dataset.copy(input_dataset, self.start_period - 1) # for entity in input_dataset: # indexed_array = build_period_array(entity) # tell numpy we do not want warnings for x/0 and 0/0 np.seterr(divide='ignore', invalid='ignore') process_time = defaultdict(float) period_objects = {} eval_ctx = EvaluationContext(self, self.entities_map, globals_data) def simulate_period(period_idx, period, processes, entities, init=False): period_start_time = time.time() # set current period eval_ctx.period = period if config.log_level in ("procedures", "processes"): print() print("period", period, end=" " if config.log_level == "periods" else "\n") if init and config.log_level in ("procedures", "processes"): for entity in entities: print(" * %s: %d individuals" % (entity.name, len(entity.array))) else: if config.log_level in ("procedures", "processes"): print("- loading input data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.load_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.load_period_data(period) for entity in entities: entity.array_period = period entity.array['period'] = period if processes: num_processes = len(processes) for p_num, process_def in enumerate(processes, start=1): process, periodicity = process_def # set current entity eval_ctx.entity_name = process.entity.name if config.log_level in ("procedures", "processes"): print("- %d/%d" % (p_num, num_processes), process.name, end=' ') print("...", end=' ') if period_idx % periodicity == 0: elapsed, _ = gettime(process.run_guarded, eval_ctx) else: elapsed = 0 if config.log_level in ("procedures", "processes"): print("skipped (periodicity)") process_time[process.name] += elapsed if config.log_level in ("procedures", "processes"): if config.show_timings: print("done (%s elapsed)." % time2str(elapsed)) else: print("done.") self.start_console(eval_ctx) if config.log_level in ("procedures", "processes"): print("- storing period data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.store_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.store_period_data(period) # print " - compressing period data" # for entity in entities: # print " *", entity.name, "...", # for level in range(1, 10, 2): # print " %d:" % level, # timed(entity.compress_period_data, level) period_objects[period] = sum(len(entity.array) for entity in entities) period_elapsed_time = time.time() - period_start_time if config.log_level in ("procedures", "processes"): print("period %d" % period, end=' ') print("done", end=' ') if config.show_timings: print("(%s elapsed)" % time2str(period_elapsed_time), end="") if init: print(".") else: main_elapsed_time = time.time() - main_start_time periods_done = period_idx + 1 remaining_periods = self.periods - periods_done avg_time = main_elapsed_time / periods_done # future_time = period_elapsed_time * 0.4 + avg_time * 0.6 remaining_time = avg_time * remaining_periods print(" - estimated remaining time: %s." % time2str(remaining_time)) else: print() print(""" ===================== starting simulation =====================""") try: simulate_period(0, self.start_period - 1, self.init_processes, self.entities, init=True) main_start_time = time.time() periods = range(self.start_period, self.start_period + self.periods) for period_idx, period in enumerate(periods): simulate_period(period_idx, period, self.processes, self.entities) total_objects = sum(period_objects[period] for period in periods) total_time = time.time() - main_start_time try: ind_per_sec = str(int(total_objects / total_time)) except ZeroDivisionError: ind_per_sec = 'inf' print(""" ========================================== simulation done ========================================== * %s elapsed * %d individuals on average * %s individuals/s/period on average ========================================== """ % (time2str(time.time() - start_time), total_objects / self.periods, ind_per_sec)) show_top_processes(process_time, 10) # if config.debug: # show_top_expr() if run_console: console_ctx = eval_ctx.clone(entity_name=self.default_entity) c = console.Console(console_ctx) c.run() finally: if h5in is not None: h5in.close() h5out.close() if h5_autodump is not None: h5_autodump.close()