def run_guarded(self, context): period = context.period if config.log_level == "processes": print() try: for k, v in self.subprocesses: if config.log_level == "processes": print(" *", end=' ') if k is not None: print(k, end=' ') utils.timed(v.run_guarded, context) else: v.run_guarded(context) # print "done." context.simulation.start_console(context) finally: if config.autodump is not None: self._autodump(context) if config.autodiff is not None: self._autodiff(period) if self.purge: self.entity.purge_locals()
def simulate_period(period_idx, period, processes, entities, init=False): period_start_time = time.time() # set current period eval_ctx.period = period if config.log_level in ("functions", "processes"): print() print("period", period, end=" " if config.log_level == "periods" else "\n") if init and config.log_level in ("functions", "processes"): for entity in entities: print(" * %s: %d individuals" % (entity.name, len(entity.array))) else: if config.log_level in ("functions", "processes"): print("- loading input data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.load_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.load_period_data(period) for entity in entities: entity.array_period = period entity.array['period'] = period if processes: num_processes = len(processes) for p_num, process_def in enumerate(processes, start=1): process, periodicity = process_def # set current entity eval_ctx.entity_name = process.entity.name if config.log_level in ("functions", "processes"): print("- %d/%d" % (p_num, num_processes), process.name, end=' ') print("...", end=' ') if period_idx % periodicity == 0: elapsed, _ = gettime(process.run_guarded, eval_ctx) else: elapsed = 0 if config.log_level in ("functions", "processes"): print("skipped (periodicity)") process_time[process.name] += elapsed if config.log_level in ("functions", "processes"): if config.show_timings: print("done (%s elapsed)." % time2str(elapsed)) else: print("done.") self.start_console(eval_ctx) if config.log_level in ("functions", "processes"): print("- storing period data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.store_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.store_period_data(period) # print " - compressing period data" # for entity in entities: # print " *", entity.name, "...", # for level in range(1, 10, 2): # print " %d:" % level, # timed(entity.compress_period_data, level) period_objects[period] = sum(len(entity.array) for entity in entities) period_elapsed_time = time.time() - period_start_time if config.log_level in ("functions", "processes"): print("period %d" % period, end=' ') print("done", end=' ') if config.show_timings: print("(%s elapsed)" % time2str(period_elapsed_time), end="") if init: print(".") else: main_elapsed_time = time.time() - main_start_time periods_done = period_idx + 1 remaining_periods = self.periods - periods_done avg_time = main_elapsed_time / periods_done # future_time = period_elapsed_time * 0.4 + avg_time * 0.6 remaining_time = avg_time * remaining_periods print(" - estimated remaining time: %s." % time2str(remaining_time)) else: print()
def run_single(self, run_console=False, run_num=None): start_time = time.time() input_dataset = timed(self.data_source.load, self.globals_def, self.entities_map) globals_data = input_dataset.get('globals') timed(self.data_sink.prepare, self.globals_def, self.entities_map, input_dataset, self.start_period - 1) print(" * building arrays for first simulated period") for ent_name, entity in self.entities_map.items(): print(" -", ent_name, "...", end=' ') # TODO: this whole process of merging all periods is very # opinionated and does not allow individuals to die/disappear # before the simulation starts. We couldn't for example, # take the output of one of our simulation and # re-simulate only some years in the middle, because the dead # would be brought back to life. In conclusion, it should be # optional. timed(entity.build_period_array, self.start_period - 1) print("done.") if config.autodump or config.autodiff: if config.autodump: fname, _ = config.autodump mode = 'w' else: # config.autodiff fname, _ = config.autodiff mode = 'r' fpath = os.path.join(config.output_directory, fname) h5_autodump = tables.open_file(fpath, mode=mode) config.autodump_file = h5_autodump else: h5_autodump = None # tell numpy we do not want warnings for x/0 and 0/0 np.seterr(divide='ignore', invalid='ignore') process_time = defaultdict(float) period_objects = {} eval_ctx = EvaluationContext(self, self.entities_map, globals_data) def simulate_period(period_idx, period, processes, entities, init=False): period_start_time = time.time() # set current period eval_ctx.period = period if config.log_level in ("functions", "processes"): print() print("period", period, end=" " if config.log_level == "periods" else "\n") if init and config.log_level in ("functions", "processes"): for entity in entities: print(" * %s: %d individuals" % (entity.name, len(entity.array))) else: if config.log_level in ("functions", "processes"): print("- loading input data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.load_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.load_period_data(period) for entity in entities: entity.array_period = period entity.array['period'] = period if processes: num_processes = len(processes) for p_num, process_def in enumerate(processes, start=1): process, periodicity = process_def # set current entity eval_ctx.entity_name = process.entity.name if config.log_level in ("functions", "processes"): print("- %d/%d" % (p_num, num_processes), process.name, end=' ') print("...", end=' ') if period_idx % periodicity == 0: elapsed, _ = gettime(process.run_guarded, eval_ctx) else: elapsed = 0 if config.log_level in ("functions", "processes"): print("skipped (periodicity)") process_time[process.name] += elapsed if config.log_level in ("functions", "processes"): if config.show_timings: print("done (%s elapsed)." % time2str(elapsed)) else: print("done.") self.start_console(eval_ctx) if config.log_level in ("functions", "processes"): print("- storing period data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.store_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.store_period_data(period) # print " - compressing period data" # for entity in entities: # print " *", entity.name, "...", # for level in range(1, 10, 2): # print " %d:" % level, # timed(entity.compress_period_data, level) period_objects[period] = sum(len(entity.array) for entity in entities) period_elapsed_time = time.time() - period_start_time if config.log_level in ("functions", "processes"): print("period %d" % period, end=' ') print("done", end=' ') if config.show_timings: print("(%s elapsed)" % time2str(period_elapsed_time), end="") if init: print(".") else: main_elapsed_time = time.time() - main_start_time periods_done = period_idx + 1 remaining_periods = self.periods - periods_done avg_time = main_elapsed_time / periods_done # future_time = period_elapsed_time * 0.4 + avg_time * 0.6 remaining_time = avg_time * remaining_periods print(" - estimated remaining time: %s." % time2str(remaining_time)) else: print() print(""" ===================== starting simulation =====================""") try: simulate_period(0, self.start_period - 1, self.init_processes, self.entities, init=True) main_start_time = time.time() periods = range(self.start_period, self.start_period + self.periods) for period_idx, period in enumerate(periods): simulate_period(period_idx, period, self.processes, self.entities) total_objects = sum(period_objects[period] for period in periods) avg_objects = str(total_objects // self.periods) \ if self.periods else 'N/A' main_elapsed_time = time.time() - main_start_time ind_per_sec = str(int(total_objects / main_elapsed_time)) \ if main_elapsed_time else 'inf' print(""" ========================================== simulation done ========================================== * %s elapsed * %s individuals on average * %s individuals/s/period on average ========================================== """ % (time2str(time.time() - start_time), avg_objects, ind_per_sec)) show_top_processes(process_time, 10) # if config.debug: # show_top_expr() if run_console: ent_name = self.default_entity if ent_name is None and len(eval_ctx.entities) == 1: ent_name = list(eval_ctx.entities.keys())[0] # FIXME: fresh_data prevents the old (cloned) EvaluationContext # to be referenced from each EntityContext, which lead to period # being fixed to the last period of the simulation. This should # be fixed in EvaluationContext.copy but the proper fix breaks # stuff (see the comments there) console_ctx = eval_ctx.clone(fresh_data=True, entity_name=ent_name) c = console.Console(console_ctx) c.run() finally: self.close() if h5_autodump is not None: h5_autodump.close() if self.minimal_output: output_path = self.data_sink.output_path dirname = os.path.dirname(output_path) try: os.remove(output_path) os.rmdir(dirname) except OSError: print("WARNING: could not delete temporary directory: %r" % dirname)
def load(self): return timed(self.data_source.load, self.globals_def, self.entities_map)
# copy globals if copy_globals: # noinspection PyProtectedMember input_file.root.globals._f_copy(output_file.root, recursive=True) output_entities = output_file.create_group("/", "entities", "Entities") for table in input_file.iterNodes(input_file.root.entities): # noinspection PyProtectedMember print(table._v_name, "...") copy_table(table, output_entities, condition=condition) input_file.close() output_file.close() if __name__ == '__main__': import sys import platform print("LIAM HDF5 filter %s using Python %s (%s)\n" % (__version__, platform.python_version(), platform.architecture()[0])) args = dict(enumerate(sys.argv)) if len(args) < 4: print("""Usage: {} inputpath outputpath condition [copy_globals] where condition is an expression copy_globals is True (default)|False""".format(args[0])) sys.exit() timed(filter_h5, args[1], args[2], args[3], eval(args.get(4, 'True')))
def merge_h5(input1_path, input2_path, output_path): input1_file = tables.open_file(input1_path) input2_file = tables.open_file(input2_path) output_file = tables.open_file(output_path, mode="w") input1root = input1_file.root input2root = input2_file.root merge_group(input1root, input2root, 'globals', output_file, 'PERIOD') merge_group(input1root, input2root, 'entities', output_file, 'period') input1_file.close() input2_file.close() output_file.close() if __name__ == '__main__': import sys import platform print("LIAM HDF5 merge %s using Python %s (%s)\n" % (__version__, platform.python_version(), platform.architecture()[0])) args = sys.argv if len(args) < 4: print("Usage: %s inputpath1 inputpath2 outputpath" % args[0]) sys.exit() timed(merge_h5, args[1], args[2], args[3])
def index_tables(globals_def, entities, fpath): print("reading data from %s ..." % fpath) input_file = tables.open_file(fpath) try: input_root = input_file.root def must_load_from_input_file(gdef): return isinstance(gdef, dict) and 'path' not in gdef any_global_from_input_file = any(must_load_from_input_file(gdef) for gdef in globals_def.values()) if any_global_from_input_file and 'globals' not in input_root: raise Exception('could not find any globals in the input data file ' '(but some are declared in the simulation file)') globals_data = load_path_globals(globals_def) constant_globals_data = handle_constant_globals(globals_def) globals_data.update(constant_globals_data) globals_node = getattr(input_root, 'globals', None) for name, global_def in globals_def.items(): # already loaded from another source (path) if name in globals_data: continue if name not in globals_node: raise Exception("could not find 'globals/%s' in the input " "data file" % name) global_data = getattr(globals_node, name) global_type = global_def.get('type', global_def.get('fields')) # TODO: move the checking (assertValidType) to a separate function assert_valid_type(global_data, global_type, context=name) array = global_data.read() if isinstance(global_type, list): # make sure we do not keep in memory columns which are # present in the input file but where not asked for by the # modeller. They are not accessible anyway. array = add_and_drop_fields(array, global_type) attrs = global_data.attrs dim_names = getattr(attrs, 'dimensions', None) if dim_names is not None: # we serialise dim_names as a numpy array so that it is # stored as a native hdf type and not a pickle but we # prefer to work with simple lists # also files serialized using Python2 are "bytes" not "str" dim_names = [str(dim_name) for dim_name in dim_names] pvalues = [getattr(attrs, 'dim%d_pvalues' % i) for i in range(len(dim_names))] axes = [la.Axis(labels, axis_name) for axis_name, labels in zip(dim_names, pvalues)] array = la.LArray(array, axes) globals_data[name] = array input_entities = input_root.entities entities_tables = {} print(" * indexing tables") for ent_name, entity in entities.items(): print(" -", ent_name, "...", end=' ') table = getattr(input_entities, ent_name) assert_valid_type(table, list(entity.fields.in_input.name_types)) rows_per_period, id_to_rownum_per_period = \ timed(index_table, table) indexed_table = IndexedTable(table, rows_per_period, id_to_rownum_per_period) entities_tables[ent_name] = indexed_table except: input_file.close() raise return input_file, {'globals': globals_data, 'entities': entities_tables}
if action != 'sort': if len(args) < 5 and action != 'sort': print("link_fields argument must be provided if using an .h5 " "input file") entities = [entity.split(':') for entity in args[4].split(';')] to_change = {ent_name: fields.split(',') for ent_name, fields in entities} # convert {ent_name: [target_ent1.fname1, target_ent2.fname2]} # to {ent_name: [(target_ent1, fname1), (target_ent2, fname2)]} for ent_name, fields in to_change.items(): for i, fname in enumerate(fields): fields[i] = \ fname.split('.') if '.' in fname else (ent_name, fname) else: simulation = Simulation.from_yaml(inputpath) inputpath = simulation.data_source.input_path to_change = {entity.name: fields_from_entity(entity) for entity in simulation.entities} assert action in {'shrink', 'shuffle', 'sort'} if action == 'shrink': timed(change_ids, inputpath, outputpath, to_change) elif action == 'shuffle': timed(change_ids, inputpath, '_shuffled_temp.h5', to_change, shuffle=True) timed(h5_sort, '_shuffled_temp.h5', outputpath, list(to_change.keys())) else: ent_names = args[4].split(',') if len(args) >= 5 else None timed(h5_sort, inputpath, outputpath, ent_names)
output_entities = output_file.create_group("/", "entities", "Entities") for table in input_file.iterNodes(input_root.entities): table_fields = get_fields(table) output_dtype = np.dtype([(fname, ftype) for fname, ftype in table_fields if fname not in todrop]) size = (len(table) * table.dtype.itemsize) / 1024.0 / 1024.0 # noinspection PyProtectedMember print(" * copying table %s (%.2f Mb) ..." % (table._v_name, size), end=' ') copy_table(table, output_entities, output_dtype) print("done.") input_file.close() output_file.close() if __name__ == '__main__': import sys import platform print("LIAM HDF5 drop fields %s using Python %s (%s)\n" % \ (__version__, platform.python_version(), platform.architecture()[0])) args = sys.argv if len(args) < 4: print("Usage: %s inputpath outputpath field1 [field2 ...]" % args[0]) sys.exit() timed(dropfields, args[1], args[2], args[3:])
def index_tables(globals_def, entities, fpath): print("reading data from %s ..." % fpath) input_file = tables.open_file(fpath) try: input_root = input_file.root def must_load_from_input_file(gdef): return isinstance(gdef, dict) and 'path' not in gdef any_global_from_input_file = any( must_load_from_input_file(gdef) for gdef in globals_def.values()) if any_global_from_input_file and 'globals' not in input_root: raise Exception( 'could not find any globals in the input data file ' '(but some are declared in the simulation file)') globals_data = load_path_globals(globals_def) constant_globals_data = handle_constant_globals(globals_def) globals_data.update(constant_globals_data) globals_node = getattr(input_root, 'globals', None) for name, global_def in globals_def.items(): # already loaded from another source (path) if name in globals_data: continue if name not in globals_node: raise Exception("could not find 'globals/%s' in the input " "data file" % name) global_data = getattr(globals_node, name) global_type = global_def.get('type', global_def.get('fields')) # TODO: move the checking (assertValidType) to a separate function assert_valid_type(global_data, global_type, context=name) array = global_data.read() if isinstance(global_type, list): # make sure we do not keep in memory columns which are # present in the input file but where not asked for by the # modeller. They are not accessible anyway. array = add_and_drop_fields(array, global_type) attrs = global_data.attrs dim_names = getattr(attrs, 'dimensions', None) if dim_names is not None: # we serialise dim_names as a numpy array so that it is # stored as a native hdf type and not a pickle but we # prefer to work with simple lists # also files serialized using Python2 are "bytes" not "str" dim_names = [str(dim_name) for dim_name in dim_names] pvalues = [ getattr(attrs, 'dim%d_pvalues' % i) for i in range(len(dim_names)) ] axes = [ la.Axis(labels, axis_name) for axis_name, labels in zip(dim_names, pvalues) ] array = la.LArray(array, axes) globals_data[name] = array input_entities = input_root.entities entities_tables = {} print(" * indexing tables") for ent_name, entity in entities.items(): print(" -", ent_name, "...", end=' ') table = getattr(input_entities, ent_name) assert_valid_type(table, list(entity.fields.in_input.name_types)) rows_per_period, id_to_rownum_per_period = \ timed(index_table, table) indexed_table = IndexedTable(table, rows_per_period, id_to_rownum_per_period) entities_tables[ent_name] = indexed_table except: input_file.close() raise return input_file, {'globals': globals_data, 'entities': entities_tables}