Ejemplo n.º 1
0
    def __init__(self, eval_ctx):
        self.eval_ctx = eval_ctx

        globals_def = eval_ctx.simulation.globals_def
        globals_parse_ctx = {'__globals__': global_symbols(globals_def)}
        parse_ctx = globals_parse_ctx.copy()
        parse_ctx.update((entity.name, entity.all_symbols(globals_parse_ctx))
                         for entity in eval_ctx.entities.itervalues())
        parse_ctx['__entity__'] = eval_ctx.entity_name
        self.parse_ctx = parse_ctx
Ejemplo n.º 2
0
    def from_yaml(cls, fpath,
                  input_dir=None, input_file=None,
                  output_dir=None, output_file=None):
        simulation_path = os.path.abspath(fpath)
        simulation_dir = os.path.dirname(simulation_path)
        with open(fpath) as f:
            content = yaml.load(f)

        expand_periodic_fields(content)
        content = handle_imports(content, simulation_dir)
        validate_dict(content, cls.yaml_layout)

        # the goal is to get something like:
        # globals_def = {'periodic': [('a': int), ...],
        #                'MIG': int}
        globals_def = content.get('globals', {})
        for k, v in content.get('globals', {}).iteritems():
            if "type" in v:
                v["type"] = field_str_to_type(v["type"], "array '%s'" % k)
            else:
                #TODO: fields should be optional (would use all the fields
                # provided in the file)
                v["fields"] = fields_yaml_to_type(v["fields"])
            globals_def[k] = v

        simulation_def = content['simulation']
        seed = simulation_def.get('random_seed')
        if seed is not None:
            seed = int(seed)
            print("using fixed random seed: %d" % seed)
            random.seed(seed)
            np.random.seed(seed)

        periods = simulation_def['periods']
        time_scale = simulation_def.get('time_scale', 'year')
        retro = simulation_def.get('retro', False)

        start_period = simulation_def.get('start_period', None)
        init_period = simulation_def.get('init_period', None)
        if start_period is None and init_period is None:
            raise Exception("Either start_period either init_period should be given.")
        if start_period is not None:
            if init_period is not None:
                raise Exception("Start_period can't be given if init_period is.")
            step = time_period[time_scale] * (1 - 2 * (retro))
            init_period = addmonth(start_period, step)

        config.skip_shows = simulation_def.get('skip_shows', config.skip_shows)
        # TODO: check that the value is one of "raise", "skip", "warn"
        config.assertions = simulation_def.get('assertions', config.assertions)

        logging_def = simulation_def.get('logging', {})
        config.log_level = logging_def.get('level', config.log_level)
        if 'timings' in simulation_def:
            warnings.warn("simulation.timings is deprecated, please use "
                          "simulation.logging.timings instead",
                          DeprecationWarning)
            config.show_timings = simulation_def['timings']
        config.show_timings = logging_def.get('timings', config.show_timings)

        autodump = simulation_def.get('autodump', None)
        if autodump is True:
            autodump = 'autodump.h5'
        if isinstance(autodump, basestring):
            # by default autodump will dump all rows
            autodump = (autodump, None)
        config.autodump = autodump

        autodiff = simulation_def.get('autodiff', None)
        if autodiff is True:
            autodiff = 'autodump.h5'
        if isinstance(autodiff, basestring):
            # by default autodiff will compare all rows
            autodiff = (autodiff, None)
        config.autodiff = autodiff

        legislation = simulation_def.get('legislation', None)
        final_stat = simulation_def.get('final_stat', None)

        input_def = simulation_def['input']
        input_directory = input_dir if input_dir is not None else input_def.get('path', '')
        if not os.path.isabs(input_directory):
            input_directory = os.path.join(simulation_dir, input_directory)
        config.input_directory = input_directory

        output_def = simulation_def['output']
        output_directory = output_dir if output_dir is not None else output_def.get('path', '')
        assert os.path.isabs(output_directory), "{} is not an absolute path".format(output_directory)
        if not os.path.isabs(output_directory):
            output_directory = os.path.join(simulation_dir, output_directory)
        if not os.path.exists(output_directory):
            print("creating directory: '%s'" % output_directory)
            os.makedirs(output_directory)
        config.output_directory = output_directory

        if output_file is None:
            output_file = output_def['file']
        output_path = os.path.join(output_directory, output_file)

        method = input_def.get('method', 'h5')

        # need to be before processes because in case of legislation, we need input_table for now.
        if method == 'h5':
            if input_file is None:
                input_file = input_def['file']
            input_path = os.path.join(input_directory, input_file)
            data_source = H5Data(input_path, output_path)
        elif method == 'void':
            input_path = None
            data_source = Void(output_path)
        else:
            print(method, type(method))

        for k, v in content['entities'].iteritems():
            entities[k] = Entity.from_yaml(k, v)

        for entity in entities.itervalues():
            entity.attach_and_resolve_links(entities)

        global_context = {'__globals__': global_symbols(globals_def),
                          '__entities__': entities}
        parsing_context = global_context.copy()
        parsing_context.update((entity.name, entity.all_symbols(global_context))
                               for entity in entities.itervalues())
        for entity in entities.itervalues():
            parsing_context['__entity__'] = entity.name
            entity.parse_processes(parsing_context)
            entity.compute_lagged_fields()
            # entity.optimize_processes()

        # for entity in entities.itervalues():
        #     entity.resolve_method_calls()
        used_entities = set()
        init_def = [d.items()[0] for d in simulation_def.get('init', {})]
        init_processes = []
        for ent_name, proc_names in init_def:
            if ent_name != 'legislation':
                if ent_name not in entity_registry:
                    raise Exception("Entity '%s' not found" % ent_name)

                entity = entity_registry[ent_name]
                init_entities.add(entity)
                init_processes.extend([(entity.processes[proc_name], 1, 1)
                                       for proc_name in proc_names])
            else:
                # proc1 = ExtProcess('liam2of', ['simulation', None])
                proc2 = ExtProcess('of_on_liam', ['simulation', 2009, 'period'])
                # proc3 = ExtProcess('merge_leg',['simulation',data_source.output_path,
                #   "C:/Til/output/"+"simul_leg.h5",'period'])
                # init_processes.append((proc1, 1))
                init_processes.append((proc2, 1, 1))
                # processes.append((proc3, 1))

        processes_def = [d.items()[0] for d in simulation_def['processes']]
        processes = []
        for ent_name, proc_defs in processes_def:
            if ent_name != 'legislation':
                entity = entity_registry[ent_name]
                entity_set.add(entity)
                for proc_def in proc_defs:
                    # proc_def is simply a process name
                    if isinstance(proc_def, basestring):
                        # use the default periodicity of 1
                        proc_name, periodicity, start = proc_def, 1, 1
                    else:
                        if len(proc_def) == 3:
                            proc_name, periodicity, start = proc_def
                        elif len(proc_def) == 2:
                            proc_name, periodicity = proc_def
                            start = 1
                    processes.append((entity.processes[proc_name], periodicity, start))
            else:
                # proc1 = ExtProcess('liam2of',['simulation',None])
                proc2 = ExtProcess('of_on_liam', ['simulation', proc_defs[0], 'period'])
                # proc3 = ExtProcess('merge_leg',['simulation',data_source.output_path,
                #   "C:/Til/output/"+"simul_leg.h5",'period'])

                # processes.append((proc1, 1))
                processes.append((proc2, 'year', 12))
                # processes.append((proc3, 1))
        entities = sorted(entity_set, key=lambda e: e.name)

        default_entity = simulation_def.get('default_entity')
        # processes[2][0].subprocesses[0][0]
        return Simulation(globals_def, periods, init_period,
                          init_processes, init_entities, processes, entities,
                          data_source, default_entity, legislation, final_stat, time_scale, retro)
Ejemplo n.º 3
0
    def from_str(cls, yaml_str, simulation_dir='',
                 input_dir=None, input_file=None,
                 output_dir=None, output_file=None,
                 start_period=None, periods=None, seed=None,
                 skip_shows=None, skip_timings=None, log_level=None,
                 assertions=None, autodump=None, autodiff=None,
                 runs=None):
        content = yaml.load(yaml_str)
        expand_periodic_fields(content)
        content = handle_imports(content, simulation_dir)
        validate_dict(content, cls.yaml_layout)

        # the goal is to get something like:
        # globals_def = {'periodic': {'fields': [('a': int), ...], ...},
        #                'MIG': {'type': int}}
        globals_def = {}
        for k, v in content.get('globals', {}).iteritems():
            if "type" in v:
                v["type"] = field_str_to_type(v["type"], "array '%s'" % k)
            else:
                # TODO: fields should be optional (would use all the fields
                # provided in the file)
                v["fields"] = fields_yaml_to_type(v["fields"])
            globals_def[k] = v

        simulation_def = content['simulation']
        if seed is None:
            seed = simulation_def.get('random_seed')
        if seed is not None:
            seed = int(seed)
            print("using fixed random seed: %d" % seed)
            random.seed(seed)
            np.random.seed(seed)

        if periods is None:
            periods = simulation_def['periods']
        if start_period is None:
            start_period = simulation_def['start_period']

        if skip_shows is None:
            skip_shows = simulation_def.get('skip_shows', config.skip_shows)
        config.skip_shows = skip_shows
        if assertions is None:
            assertions = simulation_def.get('assertions', config.assertions)
        # TODO: check that the value is one of "raise", "skip", "warn"
        config.assertions = assertions

        logging_def = simulation_def.get('logging', {})
        if log_level is None:
            log_level = logging_def.get('level', config.log_level)
        config.log_level = log_level
        if config.log_level == 'procedures':
            config.log_level = 'functions'
            warnings.warn("'procedures' logging.level is deprecated, "
                          "please use 'functions' instead",
                          UserDeprecationWarning)

        if 'timings' in simulation_def:
            warnings.warn("simulation.timings is deprecated, please use "
                          "simulation.logging.timings instead",
                          UserDeprecationWarning)
            config.show_timings = simulation_def['timings']

        if skip_timings is None:
            show_timings = logging_def.get('timings', config.show_timings)
        else:
            show_timings = not skip_timings
        config.show_timings = show_timings

        if autodump is None:
            autodump = simulation_def.get('autodump')
        if autodump is True:
            autodump = 'autodump.h5'
        if isinstance(autodump, basestring):
            # by default autodump will dump all rows
            autodump = (autodump, None)
        config.autodump = autodump

        if autodiff is None:
            autodiff = simulation_def.get('autodiff')
        if autodiff is True:
            autodiff = 'autodump.h5'
        if isinstance(autodiff, basestring):
            # by default autodiff will compare all rows
            autodiff = (autodiff, None)
        config.autodiff = autodiff

        input_def = simulation_def['input']
        if input_dir is None:
            input_dir = input_def.get('path', '')
        if not os.path.isabs(input_dir):
            input_dir = os.path.join(simulation_dir, input_dir)
        config.input_directory = input_dir

        if input_file is None:
            input_file = input_def.get('file', '')
        input_path = os.path.join(input_dir, input_file)

        output_def = simulation_def['output']
        if output_dir is None:
            output_dir = output_def.get('path', '')
        if not os.path.isabs(output_dir):
            output_dir = os.path.join(simulation_dir, output_dir)
        if not os.path.exists(output_dir):
            print("creating directory: '%s'" % output_dir)
            os.makedirs(output_dir)
        config.output_directory = output_dir

        minimal_output = False
        if output_file is None:
            output_file = output_def.get('file', '')
            if output_file:
                output_path = os.path.join(output_dir, output_file)
            else:
                # using a temporary directory instead of a temporary file
                # because tempfile.* only returns file-like objects (which
                # pytables does not support) or directories, not file names.
                tmp_dir = tempfile.mkdtemp(prefix='liam2-', suffix='-tmp',
                                           dir=output_dir)
                output_path = os.path.join(tmp_dir, 'simulation.h5')
                minimal_output = True

        entities = {}
        for k, v in content['entities'].iteritems():
            entities[k] = Entity.from_yaml(k, v)

        for entity in entities.itervalues():
            entity.attach_and_resolve_links(entities)

        global_context = {'__globals__': global_symbols(globals_def),
                          '__entities__': entities}
        parsing_context = global_context.copy()
        parsing_context.update((entity.name, entity.all_symbols(global_context))
                               for entity in entities.itervalues())
        # compute the lag variable for each entity (an entity can cause fields from
        # other entities to be added via links)
        # dict of sets
        lag_vars_by_entity = defaultdict(set)
        for entity in entities.itervalues():
            parsing_context['__entity__'] = entity.name
            entity.parse_processes(parsing_context)
            entity_lag_vars = entity.compute_lagged_fields()
            for e in entity_lag_vars:
                lag_vars_by_entity[e.name] |= entity_lag_vars[e]

        # store that in entity.lag_fields and create entity.array_lag
        for entity in entities.itervalues():
            entity_lag_vars = lag_vars_by_entity[entity.name]
            if entity_lag_vars:
                # make sure we have an 'id' column, and that it comes first
                # (makes debugging easier). 'id' is always necessary for lag
                # expressions to be able to "expand" the vector of values to the
                # "current" individuals.
                entity_lag_vars.discard('id')
                sorted_vars = ['id'] + sorted(entity_lag_vars)
                field_type = dict(entity.fields.name_types)
                lag_fields = [(v, field_type[v]) for v in sorted_vars]
                # FIXME: this should be initialized to the data from
                # start_period - 2, if any so that we can use lag() in an init
                # process
                entity.array_lag = np.empty(0, dtype=np.dtype(lag_fields))
            else:
                lag_fields = []
            entity.lag_fields = lag_fields

        # compute minimal fields for each entity and set all which are not
        # minimal to output=False
        if minimal_output:
            min_fields_by_entity = defaultdict(set)
            for entity in entities.itervalues():
                entity_lag_vars = entity.compute_lagged_fields(
                    inspect_one_period=False)
                for e in entity_lag_vars:
                    min_fields_by_entity[e.name] |= entity_lag_vars[e]
            for entity in entities.itervalues():
                minimal_fields = min_fields_by_entity[entity.name]
                if minimal_fields:
                    minimal_fields.add('id')
                    minimal_fields.add('period')
                for field in entity.fields.in_output:
                    if field.name not in minimal_fields:
                        field.output = False

        if 'init' not in simulation_def and 'processes' not in simulation_def:
            raise SyntaxError("the 'simulation' section must have at least one "
                              "of 'processes' or 'init' subsection")
        # for entity in entities.itervalues():
        #     entity.resolve_method_calls()
        used_entities = set()
        init_def = [d.items()[0] for d in simulation_def.get('init', [])]
        init_processes = []
        for ent_name, proc_names in init_def:
            if ent_name not in entities:
                raise Exception("Entity '%s' not found" % ent_name)

            entity = entities[ent_name]
            used_entities.add(ent_name)
            init_processes.extend([(entity.processes[proc_name], 1)
                                   for proc_name in proc_names])

        processes_def = [d.items()[0]
                         for d in simulation_def.get('processes', [])]
        processes = []
        for ent_name, proc_defs in processes_def:
            entity = entities[ent_name]
            used_entities.add(ent_name)
            for proc_def in proc_defs:
                # proc_def is simply a process name
                if isinstance(proc_def, basestring):
                    # use the default periodicity of 1
                    proc_name, periodicity = proc_def, 1
                else:
                    proc_name, periodicity = proc_def
                processes.append((entity.processes[proc_name], periodicity))

        entities_list = sorted(entities.values(), key=lambda e: e.name)
        declared_entities = set(e.name for e in entities_list)
        unused_entities = declared_entities - used_entities
        if unused_entities:
            suffix = 'y' if len(unused_entities) == 1 else 'ies'
            print("WARNING: entit%s without any executed process:" % suffix,
                  ','.join(sorted(unused_entities)))

        input_method = input_def.get('method', 'h5')

        default_entity = simulation_def.get('default_entity')

        if runs is None:
            runs = simulation_def.get('runs', 1)
        return Simulation(globals_def, periods, start_period, init_processes,
                          processes, entities_list, input_method, input_path,
                          output_path, default_entity, runs, minimal_output)
Ejemplo n.º 4
0
    def from_yaml(cls, fpath,
                  input_dir=None, input_file=None,
                  output_dir=None, output_file=None):
        simulation_path = os.path.abspath(fpath)
        simulation_dir = os.path.dirname(simulation_path)
        with open(fpath) as f:
            content = yaml.load(f)

        expand_periodic_fields(content)
        content = handle_imports(content, simulation_dir)
        validate_dict(content, cls.yaml_layout)

        # the goal is to get something like:
        # globals_def = {'periodic': [('a': int), ...],
        #                'MIG': int}
        globals_def = content.get('globals', {})
        for k, v in content.get('globals', {}).iteritems():
            if "type" in v:
                v["type"] = field_str_to_type(v["type"], "array '%s'" % k)
            else:
                #TODO: fields should be optional (would use all the fields
                # provided in the file)
                v["fields"] = fields_yaml_to_type(v["fields"])
            globals_def[k] = v

        simulation_def = content['simulation']
        seed = simulation_def.get('random_seed')
        if seed is not None:
            seed = int(seed)
            print("using fixed random seed: %d" % seed)
            random.seed(seed)
            np.random.seed(seed)

        periods = simulation_def['periods']
        start_period = simulation_def['start_period']
        config.skip_shows = simulation_def.get('skip_shows', config.skip_shows)
        #TODO: check that the value is one of "raise", "skip", "warn"
        config.assertions = simulation_def.get('assertions', config.assertions)

        logging_def = simulation_def.get('logging', {})
        config.log_level = logging_def.get('level', config.log_level)
        if 'timings' in simulation_def:
            warnings.warn("simulation.timings is deprecated, please use "
                          "simulation.logging.timings instead",
                          DeprecationWarning)
            config.show_timings = simulation_def['timings']
        config.show_timings = logging_def.get('timings', config.show_timings)

        autodump = simulation_def.get('autodump', None)
        if autodump is True:
            autodump = 'autodump.h5'
        if isinstance(autodump, basestring):
            # by default autodump will dump all rows
            autodump = (autodump, None)
        config.autodump = autodump

        autodiff = simulation_def.get('autodiff', None)
        if autodiff is True:
            autodiff = 'autodump.h5'
        if isinstance(autodiff, basestring):
            # by default autodiff will compare all rows
            autodiff = (autodiff, None)
        config.autodiff = autodiff

        input_def = simulation_def['input']
        input_directory = input_dir if input_dir is not None \
                                    else input_def.get('path', '')
        if not os.path.isabs(input_directory):
            input_directory = os.path.join(simulation_dir, input_directory)
        config.input_directory = input_directory

        output_def = simulation_def['output']
        output_directory = output_dir if output_dir is not None \
                                      else output_def.get('path', '')
        if not os.path.isabs(output_directory):
            output_directory = os.path.join(simulation_dir, output_directory)
        if not os.path.exists(output_directory):
            print("creating directory: '%s'" % output_directory)
            os.makedirs(output_directory)
        config.output_directory = output_directory

        if output_file is None:
            output_file = output_def['file']
        output_path = os.path.join(output_directory, output_file)

        entities = {}
        for k, v in content['entities'].iteritems():
            entities[k] = Entity.from_yaml(k, v)

        for entity in entities.itervalues():
            entity.attach_and_resolve_links(entities)

        global_context = {'__globals__': global_symbols(globals_def),
                          '__entities__': entities}
        parsing_context = global_context.copy()
        parsing_context.update((entity.name, entity.all_symbols(global_context))
                               for entity in entities.itervalues())
        for entity in entities.itervalues():
            parsing_context['__entity__'] = entity.name
            entity.parse_processes(parsing_context)
            entity.compute_lagged_fields()
            # entity.optimize_processes()

        # for entity in entities.itervalues():
        #     entity.resolve_method_calls()
        used_entities = set()
        init_def = [d.items()[0] for d in simulation_def.get('init', {})]
        init_processes = []
        for ent_name, proc_names in init_def:
            if ent_name not in entities:
                raise Exception("Entity '%s' not found" % ent_name)

            entity = entities[ent_name]
            used_entities.add(ent_name)
            init_processes.extend([(entity.processes[proc_name], 1)
                                   for proc_name in proc_names])

        processes_def = [d.items()[0] for d in simulation_def['processes']]
        processes = []
        for ent_name, proc_defs in processes_def:
            entity = entities[ent_name]
            used_entities.add(ent_name)
            for proc_def in proc_defs:
                # proc_def is simply a process name
                if isinstance(proc_def, basestring):
                    # use the default periodicity of 1
                    proc_name, periodicity = proc_def, 1
                else:
                    proc_name, periodicity = proc_def
                processes.append((entity.processes[proc_name], periodicity))

        entities_list = sorted(entities.values(), key=lambda e: e.name)
        declared_entities = set(e.name for e in entities_list)
        unused_entities = declared_entities - used_entities
        if unused_entities:
            suffix = 'y' if len(unused_entities) == 1 else 'ies'
            print("WARNING: entit%s without any executed process:" % suffix,
                  ','.join(sorted(unused_entities)))

        method = input_def.get('method', 'h5')

        if method == 'h5':
            if input_file is None:
                input_file = input_def['file']
            input_path = os.path.join(input_directory, input_file)
            data_source = H5Data(input_path, output_path)
        elif method == 'void':
            data_source = Void(output_path)
        else:
            raise ValueError("'%s' is an invalid value for 'method'. It should "
                             "be either 'h5' or 'void'")

        default_entity = simulation_def.get('default_entity')
        return Simulation(globals_def, periods, start_period, init_processes,
                          processes, entities_list, data_source, default_entity)