def __init__(self, eval_ctx): self.eval_ctx = eval_ctx globals_def = eval_ctx.simulation.globals_def globals_parse_ctx = {'__globals__': global_symbols(globals_def)} parse_ctx = globals_parse_ctx.copy() parse_ctx.update((entity.name, entity.all_symbols(globals_parse_ctx)) for entity in eval_ctx.entities.itervalues()) parse_ctx['__entity__'] = eval_ctx.entity_name self.parse_ctx = parse_ctx
def from_yaml(cls, fpath, input_dir=None, input_file=None, output_dir=None, output_file=None): simulation_path = os.path.abspath(fpath) simulation_dir = os.path.dirname(simulation_path) with open(fpath) as f: content = yaml.load(f) expand_periodic_fields(content) content = handle_imports(content, simulation_dir) validate_dict(content, cls.yaml_layout) # the goal is to get something like: # globals_def = {'periodic': [('a': int), ...], # 'MIG': int} globals_def = content.get('globals', {}) for k, v in content.get('globals', {}).iteritems(): if "type" in v: v["type"] = field_str_to_type(v["type"], "array '%s'" % k) else: #TODO: fields should be optional (would use all the fields # provided in the file) v["fields"] = fields_yaml_to_type(v["fields"]) globals_def[k] = v simulation_def = content['simulation'] seed = simulation_def.get('random_seed') if seed is not None: seed = int(seed) print("using fixed random seed: %d" % seed) random.seed(seed) np.random.seed(seed) periods = simulation_def['periods'] time_scale = simulation_def.get('time_scale', 'year') retro = simulation_def.get('retro', False) start_period = simulation_def.get('start_period', None) init_period = simulation_def.get('init_period', None) if start_period is None and init_period is None: raise Exception("Either start_period either init_period should be given.") if start_period is not None: if init_period is not None: raise Exception("Start_period can't be given if init_period is.") step = time_period[time_scale] * (1 - 2 * (retro)) init_period = addmonth(start_period, step) config.skip_shows = simulation_def.get('skip_shows', config.skip_shows) # TODO: check that the value is one of "raise", "skip", "warn" config.assertions = simulation_def.get('assertions', config.assertions) logging_def = simulation_def.get('logging', {}) config.log_level = logging_def.get('level', config.log_level) if 'timings' in simulation_def: warnings.warn("simulation.timings is deprecated, please use " "simulation.logging.timings instead", DeprecationWarning) config.show_timings = simulation_def['timings'] config.show_timings = logging_def.get('timings', config.show_timings) autodump = simulation_def.get('autodump', None) if autodump is True: autodump = 'autodump.h5' if isinstance(autodump, basestring): # by default autodump will dump all rows autodump = (autodump, None) config.autodump = autodump autodiff = simulation_def.get('autodiff', None) if autodiff is True: autodiff = 'autodump.h5' if isinstance(autodiff, basestring): # by default autodiff will compare all rows autodiff = (autodiff, None) config.autodiff = autodiff legislation = simulation_def.get('legislation', None) final_stat = simulation_def.get('final_stat', None) input_def = simulation_def['input'] input_directory = input_dir if input_dir is not None else input_def.get('path', '') if not os.path.isabs(input_directory): input_directory = os.path.join(simulation_dir, input_directory) config.input_directory = input_directory output_def = simulation_def['output'] output_directory = output_dir if output_dir is not None else output_def.get('path', '') assert os.path.isabs(output_directory), "{} is not an absolute path".format(output_directory) if not os.path.isabs(output_directory): output_directory = os.path.join(simulation_dir, output_directory) if not os.path.exists(output_directory): print("creating directory: '%s'" % output_directory) os.makedirs(output_directory) config.output_directory = output_directory if output_file is None: output_file = output_def['file'] output_path = os.path.join(output_directory, output_file) method = input_def.get('method', 'h5') # need to be before processes because in case of legislation, we need input_table for now. if method == 'h5': if input_file is None: input_file = input_def['file'] input_path = os.path.join(input_directory, input_file) data_source = H5Data(input_path, output_path) elif method == 'void': input_path = None data_source = Void(output_path) else: print(method, type(method)) for k, v in content['entities'].iteritems(): entities[k] = Entity.from_yaml(k, v) for entity in entities.itervalues(): entity.attach_and_resolve_links(entities) global_context = {'__globals__': global_symbols(globals_def), '__entities__': entities} parsing_context = global_context.copy() parsing_context.update((entity.name, entity.all_symbols(global_context)) for entity in entities.itervalues()) for entity in entities.itervalues(): parsing_context['__entity__'] = entity.name entity.parse_processes(parsing_context) entity.compute_lagged_fields() # entity.optimize_processes() # for entity in entities.itervalues(): # entity.resolve_method_calls() used_entities = set() init_def = [d.items()[0] for d in simulation_def.get('init', {})] init_processes = [] for ent_name, proc_names in init_def: if ent_name != 'legislation': if ent_name not in entity_registry: raise Exception("Entity '%s' not found" % ent_name) entity = entity_registry[ent_name] init_entities.add(entity) init_processes.extend([(entity.processes[proc_name], 1, 1) for proc_name in proc_names]) else: # proc1 = ExtProcess('liam2of', ['simulation', None]) proc2 = ExtProcess('of_on_liam', ['simulation', 2009, 'period']) # proc3 = ExtProcess('merge_leg',['simulation',data_source.output_path, # "C:/Til/output/"+"simul_leg.h5",'period']) # init_processes.append((proc1, 1)) init_processes.append((proc2, 1, 1)) # processes.append((proc3, 1)) processes_def = [d.items()[0] for d in simulation_def['processes']] processes = [] for ent_name, proc_defs in processes_def: if ent_name != 'legislation': entity = entity_registry[ent_name] entity_set.add(entity) for proc_def in proc_defs: # proc_def is simply a process name if isinstance(proc_def, basestring): # use the default periodicity of 1 proc_name, periodicity, start = proc_def, 1, 1 else: if len(proc_def) == 3: proc_name, periodicity, start = proc_def elif len(proc_def) == 2: proc_name, periodicity = proc_def start = 1 processes.append((entity.processes[proc_name], periodicity, start)) else: # proc1 = ExtProcess('liam2of',['simulation',None]) proc2 = ExtProcess('of_on_liam', ['simulation', proc_defs[0], 'period']) # proc3 = ExtProcess('merge_leg',['simulation',data_source.output_path, # "C:/Til/output/"+"simul_leg.h5",'period']) # processes.append((proc1, 1)) processes.append((proc2, 'year', 12)) # processes.append((proc3, 1)) entities = sorted(entity_set, key=lambda e: e.name) default_entity = simulation_def.get('default_entity') # processes[2][0].subprocesses[0][0] return Simulation(globals_def, periods, init_period, init_processes, init_entities, processes, entities, data_source, default_entity, legislation, final_stat, time_scale, retro)
def from_str(cls, yaml_str, simulation_dir='', input_dir=None, input_file=None, output_dir=None, output_file=None, start_period=None, periods=None, seed=None, skip_shows=None, skip_timings=None, log_level=None, assertions=None, autodump=None, autodiff=None, runs=None): content = yaml.load(yaml_str) expand_periodic_fields(content) content = handle_imports(content, simulation_dir) validate_dict(content, cls.yaml_layout) # the goal is to get something like: # globals_def = {'periodic': {'fields': [('a': int), ...], ...}, # 'MIG': {'type': int}} globals_def = {} for k, v in content.get('globals', {}).iteritems(): if "type" in v: v["type"] = field_str_to_type(v["type"], "array '%s'" % k) else: # TODO: fields should be optional (would use all the fields # provided in the file) v["fields"] = fields_yaml_to_type(v["fields"]) globals_def[k] = v simulation_def = content['simulation'] if seed is None: seed = simulation_def.get('random_seed') if seed is not None: seed = int(seed) print("using fixed random seed: %d" % seed) random.seed(seed) np.random.seed(seed) if periods is None: periods = simulation_def['periods'] if start_period is None: start_period = simulation_def['start_period'] if skip_shows is None: skip_shows = simulation_def.get('skip_shows', config.skip_shows) config.skip_shows = skip_shows if assertions is None: assertions = simulation_def.get('assertions', config.assertions) # TODO: check that the value is one of "raise", "skip", "warn" config.assertions = assertions logging_def = simulation_def.get('logging', {}) if log_level is None: log_level = logging_def.get('level', config.log_level) config.log_level = log_level if config.log_level == 'procedures': config.log_level = 'functions' warnings.warn("'procedures' logging.level is deprecated, " "please use 'functions' instead", UserDeprecationWarning) if 'timings' in simulation_def: warnings.warn("simulation.timings is deprecated, please use " "simulation.logging.timings instead", UserDeprecationWarning) config.show_timings = simulation_def['timings'] if skip_timings is None: show_timings = logging_def.get('timings', config.show_timings) else: show_timings = not skip_timings config.show_timings = show_timings if autodump is None: autodump = simulation_def.get('autodump') if autodump is True: autodump = 'autodump.h5' if isinstance(autodump, basestring): # by default autodump will dump all rows autodump = (autodump, None) config.autodump = autodump if autodiff is None: autodiff = simulation_def.get('autodiff') if autodiff is True: autodiff = 'autodump.h5' if isinstance(autodiff, basestring): # by default autodiff will compare all rows autodiff = (autodiff, None) config.autodiff = autodiff input_def = simulation_def['input'] if input_dir is None: input_dir = input_def.get('path', '') if not os.path.isabs(input_dir): input_dir = os.path.join(simulation_dir, input_dir) config.input_directory = input_dir if input_file is None: input_file = input_def.get('file', '') input_path = os.path.join(input_dir, input_file) output_def = simulation_def['output'] if output_dir is None: output_dir = output_def.get('path', '') if not os.path.isabs(output_dir): output_dir = os.path.join(simulation_dir, output_dir) if not os.path.exists(output_dir): print("creating directory: '%s'" % output_dir) os.makedirs(output_dir) config.output_directory = output_dir minimal_output = False if output_file is None: output_file = output_def.get('file', '') if output_file: output_path = os.path.join(output_dir, output_file) else: # using a temporary directory instead of a temporary file # because tempfile.* only returns file-like objects (which # pytables does not support) or directories, not file names. tmp_dir = tempfile.mkdtemp(prefix='liam2-', suffix='-tmp', dir=output_dir) output_path = os.path.join(tmp_dir, 'simulation.h5') minimal_output = True entities = {} for k, v in content['entities'].iteritems(): entities[k] = Entity.from_yaml(k, v) for entity in entities.itervalues(): entity.attach_and_resolve_links(entities) global_context = {'__globals__': global_symbols(globals_def), '__entities__': entities} parsing_context = global_context.copy() parsing_context.update((entity.name, entity.all_symbols(global_context)) for entity in entities.itervalues()) # compute the lag variable for each entity (an entity can cause fields from # other entities to be added via links) # dict of sets lag_vars_by_entity = defaultdict(set) for entity in entities.itervalues(): parsing_context['__entity__'] = entity.name entity.parse_processes(parsing_context) entity_lag_vars = entity.compute_lagged_fields() for e in entity_lag_vars: lag_vars_by_entity[e.name] |= entity_lag_vars[e] # store that in entity.lag_fields and create entity.array_lag for entity in entities.itervalues(): entity_lag_vars = lag_vars_by_entity[entity.name] if entity_lag_vars: # make sure we have an 'id' column, and that it comes first # (makes debugging easier). 'id' is always necessary for lag # expressions to be able to "expand" the vector of values to the # "current" individuals. entity_lag_vars.discard('id') sorted_vars = ['id'] + sorted(entity_lag_vars) field_type = dict(entity.fields.name_types) lag_fields = [(v, field_type[v]) for v in sorted_vars] # FIXME: this should be initialized to the data from # start_period - 2, if any so that we can use lag() in an init # process entity.array_lag = np.empty(0, dtype=np.dtype(lag_fields)) else: lag_fields = [] entity.lag_fields = lag_fields # compute minimal fields for each entity and set all which are not # minimal to output=False if minimal_output: min_fields_by_entity = defaultdict(set) for entity in entities.itervalues(): entity_lag_vars = entity.compute_lagged_fields( inspect_one_period=False) for e in entity_lag_vars: min_fields_by_entity[e.name] |= entity_lag_vars[e] for entity in entities.itervalues(): minimal_fields = min_fields_by_entity[entity.name] if minimal_fields: minimal_fields.add('id') minimal_fields.add('period') for field in entity.fields.in_output: if field.name not in minimal_fields: field.output = False if 'init' not in simulation_def and 'processes' not in simulation_def: raise SyntaxError("the 'simulation' section must have at least one " "of 'processes' or 'init' subsection") # for entity in entities.itervalues(): # entity.resolve_method_calls() used_entities = set() init_def = [d.items()[0] for d in simulation_def.get('init', [])] init_processes = [] for ent_name, proc_names in init_def: if ent_name not in entities: raise Exception("Entity '%s' not found" % ent_name) entity = entities[ent_name] used_entities.add(ent_name) init_processes.extend([(entity.processes[proc_name], 1) for proc_name in proc_names]) processes_def = [d.items()[0] for d in simulation_def.get('processes', [])] processes = [] for ent_name, proc_defs in processes_def: entity = entities[ent_name] used_entities.add(ent_name) for proc_def in proc_defs: # proc_def is simply a process name if isinstance(proc_def, basestring): # use the default periodicity of 1 proc_name, periodicity = proc_def, 1 else: proc_name, periodicity = proc_def processes.append((entity.processes[proc_name], periodicity)) entities_list = sorted(entities.values(), key=lambda e: e.name) declared_entities = set(e.name for e in entities_list) unused_entities = declared_entities - used_entities if unused_entities: suffix = 'y' if len(unused_entities) == 1 else 'ies' print("WARNING: entit%s without any executed process:" % suffix, ','.join(sorted(unused_entities))) input_method = input_def.get('method', 'h5') default_entity = simulation_def.get('default_entity') if runs is None: runs = simulation_def.get('runs', 1) return Simulation(globals_def, periods, start_period, init_processes, processes, entities_list, input_method, input_path, output_path, default_entity, runs, minimal_output)
def from_yaml(cls, fpath, input_dir=None, input_file=None, output_dir=None, output_file=None): simulation_path = os.path.abspath(fpath) simulation_dir = os.path.dirname(simulation_path) with open(fpath) as f: content = yaml.load(f) expand_periodic_fields(content) content = handle_imports(content, simulation_dir) validate_dict(content, cls.yaml_layout) # the goal is to get something like: # globals_def = {'periodic': [('a': int), ...], # 'MIG': int} globals_def = content.get('globals', {}) for k, v in content.get('globals', {}).iteritems(): if "type" in v: v["type"] = field_str_to_type(v["type"], "array '%s'" % k) else: #TODO: fields should be optional (would use all the fields # provided in the file) v["fields"] = fields_yaml_to_type(v["fields"]) globals_def[k] = v simulation_def = content['simulation'] seed = simulation_def.get('random_seed') if seed is not None: seed = int(seed) print("using fixed random seed: %d" % seed) random.seed(seed) np.random.seed(seed) periods = simulation_def['periods'] start_period = simulation_def['start_period'] config.skip_shows = simulation_def.get('skip_shows', config.skip_shows) #TODO: check that the value is one of "raise", "skip", "warn" config.assertions = simulation_def.get('assertions', config.assertions) logging_def = simulation_def.get('logging', {}) config.log_level = logging_def.get('level', config.log_level) if 'timings' in simulation_def: warnings.warn("simulation.timings is deprecated, please use " "simulation.logging.timings instead", DeprecationWarning) config.show_timings = simulation_def['timings'] config.show_timings = logging_def.get('timings', config.show_timings) autodump = simulation_def.get('autodump', None) if autodump is True: autodump = 'autodump.h5' if isinstance(autodump, basestring): # by default autodump will dump all rows autodump = (autodump, None) config.autodump = autodump autodiff = simulation_def.get('autodiff', None) if autodiff is True: autodiff = 'autodump.h5' if isinstance(autodiff, basestring): # by default autodiff will compare all rows autodiff = (autodiff, None) config.autodiff = autodiff input_def = simulation_def['input'] input_directory = input_dir if input_dir is not None \ else input_def.get('path', '') if not os.path.isabs(input_directory): input_directory = os.path.join(simulation_dir, input_directory) config.input_directory = input_directory output_def = simulation_def['output'] output_directory = output_dir if output_dir is not None \ else output_def.get('path', '') if not os.path.isabs(output_directory): output_directory = os.path.join(simulation_dir, output_directory) if not os.path.exists(output_directory): print("creating directory: '%s'" % output_directory) os.makedirs(output_directory) config.output_directory = output_directory if output_file is None: output_file = output_def['file'] output_path = os.path.join(output_directory, output_file) entities = {} for k, v in content['entities'].iteritems(): entities[k] = Entity.from_yaml(k, v) for entity in entities.itervalues(): entity.attach_and_resolve_links(entities) global_context = {'__globals__': global_symbols(globals_def), '__entities__': entities} parsing_context = global_context.copy() parsing_context.update((entity.name, entity.all_symbols(global_context)) for entity in entities.itervalues()) for entity in entities.itervalues(): parsing_context['__entity__'] = entity.name entity.parse_processes(parsing_context) entity.compute_lagged_fields() # entity.optimize_processes() # for entity in entities.itervalues(): # entity.resolve_method_calls() used_entities = set() init_def = [d.items()[0] for d in simulation_def.get('init', {})] init_processes = [] for ent_name, proc_names in init_def: if ent_name not in entities: raise Exception("Entity '%s' not found" % ent_name) entity = entities[ent_name] used_entities.add(ent_name) init_processes.extend([(entity.processes[proc_name], 1) for proc_name in proc_names]) processes_def = [d.items()[0] for d in simulation_def['processes']] processes = [] for ent_name, proc_defs in processes_def: entity = entities[ent_name] used_entities.add(ent_name) for proc_def in proc_defs: # proc_def is simply a process name if isinstance(proc_def, basestring): # use the default periodicity of 1 proc_name, periodicity = proc_def, 1 else: proc_name, periodicity = proc_def processes.append((entity.processes[proc_name], periodicity)) entities_list = sorted(entities.values(), key=lambda e: e.name) declared_entities = set(e.name for e in entities_list) unused_entities = declared_entities - used_entities if unused_entities: suffix = 'y' if len(unused_entities) == 1 else 'ies' print("WARNING: entit%s without any executed process:" % suffix, ','.join(sorted(unused_entities))) method = input_def.get('method', 'h5') if method == 'h5': if input_file is None: input_file = input_def['file'] input_path = os.path.join(input_directory, input_file) data_source = H5Data(input_path, output_path) elif method == 'void': data_source = Void(output_path) else: raise ValueError("'%s' is an invalid value for 'method'. It should " "be either 'h5' or 'void'") default_entity = simulation_def.get('default_entity') return Simulation(globals_def, periods, start_period, init_processes, processes, entities_list, data_source, default_entity)