Ejemplo n.º 1
0
def load_def(localdir, ent_name, section_def, required_fields):
    if 'type' in section_def and 'fields' in section_def:
        raise Exception("invalid structure for '%s': "
                        "type and fields sections are mutually exclusive"
                        % ent_name)

    if 'type' in section_def:
        csv_filename = section_def.get('path', ent_name + ".csv")
        csv_filepath = complete_path(localdir, csv_filename)
        str_type = section_def['type']
        if isinstance(str_type, basestring):
            celltype = field_str_to_type(str_type, "array '%s'" % ent_name)
        else:
            assert isinstance(str_type, type)
            celltype = str_type
        return 'ndarray', load_ndarray(csv_filepath, celltype)

    fields_def = section_def.get('fields')
    if fields_def is not None:
        for fdef in fields_def:
            if isinstance(fdef, basestring):
                raise SyntaxError("invalid field declaration: '%s', you are "
                                  "probably missing a ':'" % fdef)
        if all(isinstance(fdef, dict) for fdef in fields_def):
            fields = fields_yaml_to_type(fields_def)
        else:
            assert all(isinstance(fdef, tuple) for fdef in fields_def)
            fields = fields_def
    else:
        fields = None
    newnames = merge_dicts(invert_dict(section_def.get('oldnames', {})),
                           section_def.get('newnames', {}))
    transpose = section_def.get('transposed', False)

    interpolate_def = section_def.get('interpolate')
    files_def = section_def.get('files')
    if files_def is None:
        #XXX: it might be cleaner to use the same code path than for the
        # multi-file case (however, that would loose the "import any file
        # size" feature that I'm fond of.

        # we can simply return the stream as-is
        #FIXME: stream is not sorted
        # csv file is assumed to be in the correct order (ie by period then id)
        csv_filename = section_def.get('path', ent_name + ".csv")
        csv_filepath = complete_path(localdir, csv_filename)
        csv_file = CSV(csv_filepath, newnames,
                       delimiter=',', transpose=transpose)
        if fields is not None:
            fields = required_fields + fields
        stream = csv_file.read(fields)
        if fields is None:
            fields = csv_file.fields
        if interpolate_def is not None:
            raise Exception('interpolate is currently only supported with '
                            'multiple files')
        return 'table', (fields, csv_file.numlines, stream, csv_file)
    else:
        # we have to load all files, merge them and return a stream out of that
        print(" * computing number of rows...")

        # 1) only load required fields
        default_args = dict(newnames=newnames, transpose=transpose)
        if isinstance(files_def, dict):
            files_items = files_def.items()
        elif isinstance(files_def, list) and files_def:
            if isinstance(files_def[0], dict):
                # handle YAML ordered dict structure
                files_items = [d.items()[0] for d in files_def]
            elif isinstance(files_def[0], basestring):
                files_items = [(path, {}) for path in files_def]
            else:
                raise Exception("invalid structure for 'files'")
        else:
            raise Exception("invalid structure for 'files'")

        #XXX: shouldn't we use the "path" defined for the whole entity if any?
        # section_def.get('path')
        files = []
        for path, kwargs in files_items:
            kwargs['newnames'] = \
                merge_dicts(invert_dict(kwargs.pop('oldnames', {})),
                            kwargs.get('newnames', {}))
            f = CSV(complete_path(localdir, path),
                    **merge_dicts(default_args, kwargs))
            files.append(f)
        id_periods = union1d(f.as_array(required_fields) for f in files)

        print(" * reading files...")
        # 2) load all fields
        if fields is None:
            target_fields = merge_items(*[f.fields for f in files])
            fields_per_file = [None for f in files]
        else:
            target_fields = required_fields + fields
            fields_per_file = [[(name, type_) for name, type_ in target_fields
                               if name in f.field_names]
                              for f in files]
            total_fields = set.union(*[set(f.field_names) for f in files])
            missing = set(name for name, _ in target_fields) - total_fields
            if missing:
                raise Exception("the following fields were not found in any "
                                "file: %s" % ", ".join(missing))

        total_lines = len(id_periods)

        # allocate main array
        target = np.empty(total_lines, dtype=np.dtype(target_fields))
        # fill with default values
        target[:] = tuple(missing_values[ftype] for _, ftype in target_fields)
        target['period'] = id_periods['period']
        target['id'] = id_periods['id']

        arrays = [f.as_array(fields_to_load)
                  for f, fields_to_load in zip(files, fields_per_file)]

        # close all files
        for f in files:
            f.close()

        #FIXME: interpolation currently only interpolates missing data points,
        # not data points with their value equal the missing value
        # corresponding to the field type. This can only be fixed once
        # booleans are loaded as int8.
        if interpolate_def is not None:
            if any(v != 'previous_value'
                   for v in interpolate_def.itervalues()):
                raise Exception("currently, only 'previous_value' "
                                "interpolation is supported")
            to_interpolate = [k for k, v in interpolate_def.iteritems()
                              if v == 'previous_value']
        else:
            to_interpolate = []

        interpolate(target, arrays, id_periods, to_interpolate)
        return 'table', (target_fields, total_lines, iter(target), None)
Ejemplo n.º 2
0
def file2h5(fpath, input_dir='',
                  buffersize=10 * 2 ** 20):
    with open(fpath) as f:
        content = yaml.load(f)

    yaml_layout = {
        '#output': str,
        'compression': str,
        'globals': {
            'periodic': {
                'path': str,
                'fields': [{
                    '*': str
                }],
                'oldnames': {
                    '*': str
                },
                'newnames': {
                    '*': str
                },
                'invert': [str],
                'transposed': bool
            },
            '*': {
                'path': str,
                'type': str,
                'fields': [{
                    '*': str
                }],
                'oldnames': {
                    '*': str
                },
                'newnames': {
                    '*': str
                },
                'invert': [str],
                'transposed': bool
            }
        },
        '#entities': {
            '*': {
                'path': str,
                'fields': [{
                    '*': str
                }],
                'oldnames': {
                    '*': str
                },
                'newnames': {
                    '*': str
                },
                'invert': [str],
                'transposed': bool,
                'files': None,
#                {
#                    '*': None
#                }
                'interpolate': {
                    '*': str
                }
            }
        }
    }

    validate_dict(content, yaml_layout)
    localdir = os.path.dirname(os.path.abspath(fpath))

    h5_filename = content['output']
    compression = content.get('compression')
    h5_filepath = complete_path(localdir, h5_filename)
    print("Importing in", h5_filepath)
    try:
        h5file = tables.open_file(h5_filepath, mode="w", title="CSV import")

        globals_def = content.get('globals', {})
        if globals_def:
            print()
            print("globals")
            print("-------")
            const_node = h5file.create_group("/", "globals", "Globals")
            for global_name, global_def in globals_def.iteritems():
                print()
                print(" %s" % global_name)
                req_fields = ([('PERIOD', int)] if global_name == 'periodic'
                                                else [])

                kind, info = load_def(localdir, global_name,
                                      global_def, req_fields)
                if kind == 'ndarray':
                    array_to_disk_array(h5file, const_node, global_name, info,
                                        title=global_name,
                                        compression=compression)
                else:
                    assert kind == 'table'
                    fields, numlines, datastream, csvfile = info
                    stream_to_table(h5file, const_node, global_name, fields,
                                    datastream, numlines,
                                    title="%s table" % global_name,
                                    buffersize=buffersize,
                                    #FIXME: handle invert
                                    compression=compression)
                    if csvfile is not None:
                        csvfile.close()

        print()
        print("entities")
        print("--------")
        ent_node = h5file.create_group("/", "entities", "Entities")
        for ent_name, entity_def in content['entities'].iteritems():
            print()
            print(" %s" % ent_name)
            input_filename = entity_def.get('path', input_dir + ent_name + ".csv")
            if input_filename[-4:]=='.csv':
                kind, info = load_def(localdir, ent_name,
                                      entity_def, [('period', int), ('id', int)])
                assert kind == "table"
                fields, numlines, datastream, csvfile = info

                stream_to_table(h5file, ent_node, ent_name, fields,
                                datastream, numlines,
                                title="%s table" % ent_name,
                                invert=entity_def.get('invert', []),
                                buffersize=buffersize, compression=compression)
                if csvfile is not None:
                    csvfile.close()

            if input_filename[-6:]=='.Rdata':

                files_def = entity_def.get('files')
                if files_def is None:
                    files_def = ent_name
                print(" - reading", input_filename, ",file", files_def)
                rpy.set_default_mode(rpy.NO_CONVERSION)
                msg, filters = compression_str2filter(compression)

                try:
                    rpy.r.load(input_dir + input_filename)
                except:
                    rpy.r.load(input_filename)
                print(" - storing %s..." % msg)

                array_pandas = com.load_data(files_def)
                fields_def = entity_def.get('fields')
                if fields_def is not None:
                    for fdef in fields_def:
                        if isinstance(fdef, basestring):
                            raise SyntaxError("invalid field declaration: '%s', you are "
                                  "probably missing a ':'" % fdef)
                    fields = fields_yaml_to_type(fields_def)
                    columns = [col[0] for col in fields] +['id','period']
                else:
                    fields = None
                    columns = array_pandas.columns

                array_pandas = array_pandas.loc[:,columns]
                dtype = np.dtype(fields)
                #TODO: gerer les conflits

                dtype = array_pandas.to_records(index=False).dtype
                filters=None
                table = h5file.createTable(ent_node, ent_name, dtype,
                                           title="%s table" % ent_name, filters=filters)
                table.append(array_pandas.to_records(index=False))
                table.flush()
    finally:
        h5file.close()
    print()
    print("done.")
Ejemplo n.º 3
0
    def from_str(cls, yaml_str, simulation_dir='',
                 input_dir=None, input_file=None,
                 output_dir=None, output_file=None,
                 start_period=None, periods=None, seed=None,
                 skip_shows=None, skip_timings=None, log_level=None,
                 assertions=None, autodump=None, autodiff=None,
                 runs=None):
        content = yaml.load(yaml_str)
        expand_periodic_fields(content)
        content = handle_imports(content, simulation_dir)
        validate_dict(content, cls.yaml_layout)

        # the goal is to get something like:
        # globals_def = {'periodic': {'fields': [('a': int), ...], ...},
        #                'MIG': {'type': int}}
        globals_def = {}
        for k, v in content.get('globals', {}).iteritems():
            if "type" in v:
                v["type"] = field_str_to_type(v["type"], "array '%s'" % k)
            else:
                # TODO: fields should be optional (would use all the fields
                # provided in the file)
                v["fields"] = fields_yaml_to_type(v["fields"])
            globals_def[k] = v

        simulation_def = content['simulation']
        if seed is None:
            seed = simulation_def.get('random_seed')
        if seed is not None:
            seed = int(seed)
            print("using fixed random seed: %d" % seed)
            random.seed(seed)
            np.random.seed(seed)

        if periods is None:
            periods = simulation_def['periods']
        if start_period is None:
            start_period = simulation_def['start_period']

        if skip_shows is None:
            skip_shows = simulation_def.get('skip_shows', config.skip_shows)
        config.skip_shows = skip_shows
        if assertions is None:
            assertions = simulation_def.get('assertions', config.assertions)
        # TODO: check that the value is one of "raise", "skip", "warn"
        config.assertions = assertions

        logging_def = simulation_def.get('logging', {})
        if log_level is None:
            log_level = logging_def.get('level', config.log_level)
        config.log_level = log_level
        if config.log_level == 'procedures':
            config.log_level = 'functions'
            warnings.warn("'procedures' logging.level is deprecated, "
                          "please use 'functions' instead",
                          UserDeprecationWarning)

        if 'timings' in simulation_def:
            warnings.warn("simulation.timings is deprecated, please use "
                          "simulation.logging.timings instead",
                          UserDeprecationWarning)
            config.show_timings = simulation_def['timings']

        if skip_timings is None:
            show_timings = logging_def.get('timings', config.show_timings)
        else:
            show_timings = not skip_timings
        config.show_timings = show_timings

        if autodump is None:
            autodump = simulation_def.get('autodump')
        if autodump is True:
            autodump = 'autodump.h5'
        if isinstance(autodump, basestring):
            # by default autodump will dump all rows
            autodump = (autodump, None)
        config.autodump = autodump

        if autodiff is None:
            autodiff = simulation_def.get('autodiff')
        if autodiff is True:
            autodiff = 'autodump.h5'
        if isinstance(autodiff, basestring):
            # by default autodiff will compare all rows
            autodiff = (autodiff, None)
        config.autodiff = autodiff

        input_def = simulation_def['input']
        if input_dir is None:
            input_dir = input_def.get('path', '')
        if not os.path.isabs(input_dir):
            input_dir = os.path.join(simulation_dir, input_dir)
        config.input_directory = input_dir

        if input_file is None:
            input_file = input_def.get('file', '')
        input_path = os.path.join(input_dir, input_file)

        output_def = simulation_def['output']
        if output_dir is None:
            output_dir = output_def.get('path', '')
        if not os.path.isabs(output_dir):
            output_dir = os.path.join(simulation_dir, output_dir)
        if not os.path.exists(output_dir):
            print("creating directory: '%s'" % output_dir)
            os.makedirs(output_dir)
        config.output_directory = output_dir

        minimal_output = False
        if output_file is None:
            output_file = output_def.get('file', '')
            if output_file:
                output_path = os.path.join(output_dir, output_file)
            else:
                # using a temporary directory instead of a temporary file
                # because tempfile.* only returns file-like objects (which
                # pytables does not support) or directories, not file names.
                tmp_dir = tempfile.mkdtemp(prefix='liam2-', suffix='-tmp',
                                           dir=output_dir)
                output_path = os.path.join(tmp_dir, 'simulation.h5')
                minimal_output = True

        entities = {}
        for k, v in content['entities'].iteritems():
            entities[k] = Entity.from_yaml(k, v)

        for entity in entities.itervalues():
            entity.attach_and_resolve_links(entities)

        global_context = {'__globals__': global_symbols(globals_def),
                          '__entities__': entities}
        parsing_context = global_context.copy()
        parsing_context.update((entity.name, entity.all_symbols(global_context))
                               for entity in entities.itervalues())
        # compute the lag variable for each entity (an entity can cause fields from
        # other entities to be added via links)
        # dict of sets
        lag_vars_by_entity = defaultdict(set)
        for entity in entities.itervalues():
            parsing_context['__entity__'] = entity.name
            entity.parse_processes(parsing_context)
            entity_lag_vars = entity.compute_lagged_fields()
            for e in entity_lag_vars:
                lag_vars_by_entity[e.name] |= entity_lag_vars[e]

        # store that in entity.lag_fields and create entity.array_lag
        for entity in entities.itervalues():
            entity_lag_vars = lag_vars_by_entity[entity.name]
            if entity_lag_vars:
                # make sure we have an 'id' column, and that it comes first
                # (makes debugging easier). 'id' is always necessary for lag
                # expressions to be able to "expand" the vector of values to the
                # "current" individuals.
                entity_lag_vars.discard('id')
                sorted_vars = ['id'] + sorted(entity_lag_vars)
                field_type = dict(entity.fields.name_types)
                lag_fields = [(v, field_type[v]) for v in sorted_vars]
                # FIXME: this should be initialized to the data from
                # start_period - 2, if any so that we can use lag() in an init
                # process
                entity.array_lag = np.empty(0, dtype=np.dtype(lag_fields))
            else:
                lag_fields = []
            entity.lag_fields = lag_fields

        # compute minimal fields for each entity and set all which are not
        # minimal to output=False
        if minimal_output:
            min_fields_by_entity = defaultdict(set)
            for entity in entities.itervalues():
                entity_lag_vars = entity.compute_lagged_fields(
                    inspect_one_period=False)
                for e in entity_lag_vars:
                    min_fields_by_entity[e.name] |= entity_lag_vars[e]
            for entity in entities.itervalues():
                minimal_fields = min_fields_by_entity[entity.name]
                if minimal_fields:
                    minimal_fields.add('id')
                    minimal_fields.add('period')
                for field in entity.fields.in_output:
                    if field.name not in minimal_fields:
                        field.output = False

        if 'init' not in simulation_def and 'processes' not in simulation_def:
            raise SyntaxError("the 'simulation' section must have at least one "
                              "of 'processes' or 'init' subsection")
        # for entity in entities.itervalues():
        #     entity.resolve_method_calls()
        used_entities = set()
        init_def = [d.items()[0] for d in simulation_def.get('init', [])]
        init_processes = []
        for ent_name, proc_names in init_def:
            if ent_name not in entities:
                raise Exception("Entity '%s' not found" % ent_name)

            entity = entities[ent_name]
            used_entities.add(ent_name)
            init_processes.extend([(entity.processes[proc_name], 1)
                                   for proc_name in proc_names])

        processes_def = [d.items()[0]
                         for d in simulation_def.get('processes', [])]
        processes = []
        for ent_name, proc_defs in processes_def:
            entity = entities[ent_name]
            used_entities.add(ent_name)
            for proc_def in proc_defs:
                # proc_def is simply a process name
                if isinstance(proc_def, basestring):
                    # use the default periodicity of 1
                    proc_name, periodicity = proc_def, 1
                else:
                    proc_name, periodicity = proc_def
                processes.append((entity.processes[proc_name], periodicity))

        entities_list = sorted(entities.values(), key=lambda e: e.name)
        declared_entities = set(e.name for e in entities_list)
        unused_entities = declared_entities - used_entities
        if unused_entities:
            suffix = 'y' if len(unused_entities) == 1 else 'ies'
            print("WARNING: entit%s without any executed process:" % suffix,
                  ','.join(sorted(unused_entities)))

        input_method = input_def.get('method', 'h5')

        default_entity = simulation_def.get('default_entity')

        if runs is None:
            runs = simulation_def.get('runs', 1)
        return Simulation(globals_def, periods, start_period, init_processes,
                          processes, entities_list, input_method, input_path,
                          output_path, default_entity, runs, minimal_output)
Ejemplo n.º 4
0
    def from_yaml(cls, fpath,
                  input_dir=None, input_file=None,
                  output_dir=None, output_file=None):
        simulation_path = os.path.abspath(fpath)
        simulation_dir = os.path.dirname(simulation_path)
        with open(fpath) as f:
            content = yaml.load(f)

        expand_periodic_fields(content)
        content = handle_imports(content, simulation_dir)
        validate_dict(content, cls.yaml_layout)

        # the goal is to get something like:
        # globals_def = {'periodic': [('a': int), ...],
        #                'MIG': int}
        globals_def = content.get('globals', {})
        for k, v in content.get('globals', {}).iteritems():
            if "type" in v:
                v["type"] = field_str_to_type(v["type"], "array '%s'" % k)
            else:
                #TODO: fields should be optional (would use all the fields
                # provided in the file)
                v["fields"] = fields_yaml_to_type(v["fields"])
            globals_def[k] = v

        simulation_def = content['simulation']
        seed = simulation_def.get('random_seed')
        if seed is not None:
            seed = int(seed)
            print("using fixed random seed: %d" % seed)
            random.seed(seed)
            np.random.seed(seed)

        periods = simulation_def['periods']
        time_scale = simulation_def.get('time_scale', 'year')
        retro = simulation_def.get('retro', False)

        start_period = simulation_def.get('start_period', None)
        init_period = simulation_def.get('init_period', None)
        if start_period is None and init_period is None:
            raise Exception("Either start_period either init_period should be given.")
        if start_period is not None:
            if init_period is not None:
                raise Exception("Start_period can't be given if init_period is.")
            step = time_period[time_scale] * (1 - 2 * (retro))
            init_period = addmonth(start_period, step)

        config.skip_shows = simulation_def.get('skip_shows', config.skip_shows)
        # TODO: check that the value is one of "raise", "skip", "warn"
        config.assertions = simulation_def.get('assertions', config.assertions)

        logging_def = simulation_def.get('logging', {})
        config.log_level = logging_def.get('level', config.log_level)
        if 'timings' in simulation_def:
            warnings.warn("simulation.timings is deprecated, please use "
                          "simulation.logging.timings instead",
                          DeprecationWarning)
            config.show_timings = simulation_def['timings']
        config.show_timings = logging_def.get('timings', config.show_timings)

        autodump = simulation_def.get('autodump', None)
        if autodump is True:
            autodump = 'autodump.h5'
        if isinstance(autodump, basestring):
            # by default autodump will dump all rows
            autodump = (autodump, None)
        config.autodump = autodump

        autodiff = simulation_def.get('autodiff', None)
        if autodiff is True:
            autodiff = 'autodump.h5'
        if isinstance(autodiff, basestring):
            # by default autodiff will compare all rows
            autodiff = (autodiff, None)
        config.autodiff = autodiff

        legislation = simulation_def.get('legislation', None)
        final_stat = simulation_def.get('final_stat', None)

        input_def = simulation_def['input']
        input_directory = input_dir if input_dir is not None else input_def.get('path', '')
        if not os.path.isabs(input_directory):
            input_directory = os.path.join(simulation_dir, input_directory)
        config.input_directory = input_directory

        output_def = simulation_def['output']
        output_directory = output_dir if output_dir is not None else output_def.get('path', '')
        assert os.path.isabs(output_directory), "{} is not an absolute path".format(output_directory)
        if not os.path.isabs(output_directory):
            output_directory = os.path.join(simulation_dir, output_directory)
        if not os.path.exists(output_directory):
            print("creating directory: '%s'" % output_directory)
            os.makedirs(output_directory)
        config.output_directory = output_directory

        if output_file is None:
            output_file = output_def['file']
        output_path = os.path.join(output_directory, output_file)

        method = input_def.get('method', 'h5')

        # need to be before processes because in case of legislation, we need input_table for now.
        if method == 'h5':
            if input_file is None:
                input_file = input_def['file']
            input_path = os.path.join(input_directory, input_file)
            data_source = H5Data(input_path, output_path)
        elif method == 'void':
            input_path = None
            data_source = Void(output_path)
        else:
            print(method, type(method))

        for k, v in content['entities'].iteritems():
            entities[k] = Entity.from_yaml(k, v)

        for entity in entities.itervalues():
            entity.attach_and_resolve_links(entities)

        global_context = {'__globals__': global_symbols(globals_def),
                          '__entities__': entities}
        parsing_context = global_context.copy()
        parsing_context.update((entity.name, entity.all_symbols(global_context))
                               for entity in entities.itervalues())
        for entity in entities.itervalues():
            parsing_context['__entity__'] = entity.name
            entity.parse_processes(parsing_context)
            entity.compute_lagged_fields()
            # entity.optimize_processes()

        # for entity in entities.itervalues():
        #     entity.resolve_method_calls()
        used_entities = set()
        init_def = [d.items()[0] for d in simulation_def.get('init', {})]
        init_processes = []
        for ent_name, proc_names in init_def:
            if ent_name != 'legislation':
                if ent_name not in entity_registry:
                    raise Exception("Entity '%s' not found" % ent_name)

                entity = entity_registry[ent_name]
                init_entities.add(entity)
                init_processes.extend([(entity.processes[proc_name], 1, 1)
                                       for proc_name in proc_names])
            else:
                # proc1 = ExtProcess('liam2of', ['simulation', None])
                proc2 = ExtProcess('of_on_liam', ['simulation', 2009, 'period'])
                # proc3 = ExtProcess('merge_leg',['simulation',data_source.output_path,
                #   "C:/Til/output/"+"simul_leg.h5",'period'])
                # init_processes.append((proc1, 1))
                init_processes.append((proc2, 1, 1))
                # processes.append((proc3, 1))

        processes_def = [d.items()[0] for d in simulation_def['processes']]
        processes = []
        for ent_name, proc_defs in processes_def:
            if ent_name != 'legislation':
                entity = entity_registry[ent_name]
                entity_set.add(entity)
                for proc_def in proc_defs:
                    # proc_def is simply a process name
                    if isinstance(proc_def, basestring):
                        # use the default periodicity of 1
                        proc_name, periodicity, start = proc_def, 1, 1
                    else:
                        if len(proc_def) == 3:
                            proc_name, periodicity, start = proc_def
                        elif len(proc_def) == 2:
                            proc_name, periodicity = proc_def
                            start = 1
                    processes.append((entity.processes[proc_name], periodicity, start))
            else:
                # proc1 = ExtProcess('liam2of',['simulation',None])
                proc2 = ExtProcess('of_on_liam', ['simulation', proc_defs[0], 'period'])
                # proc3 = ExtProcess('merge_leg',['simulation',data_source.output_path,
                #   "C:/Til/output/"+"simul_leg.h5",'period'])

                # processes.append((proc1, 1))
                processes.append((proc2, 'year', 12))
                # processes.append((proc3, 1))
        entities = sorted(entity_set, key=lambda e: e.name)

        default_entity = simulation_def.get('default_entity')
        # processes[2][0].subprocesses[0][0]
        return Simulation(globals_def, periods, init_period,
                          init_processes, init_entities, processes, entities,
                          data_source, default_entity, legislation, final_stat, time_scale, retro)
Ejemplo n.º 5
0
    def from_yaml(cls, fpath,
                  input_dir=None, input_file=None,
                  output_dir=None, output_file=None):
        simulation_path = os.path.abspath(fpath)
        simulation_dir = os.path.dirname(simulation_path)
        with open(fpath) as f:
            content = yaml.load(f)

        content = handle_imports(content, simulation_dir)
        validate_dict(content, cls.yaml_layout)

        # the goal is to get something like:
        # globals_def = {'periodic': [('a': int), ...],
        #                'MIG': int}
        globals_def = {}
        for k, v in content.get('globals', {}).iteritems():
            # periodic is a special case
            if k == 'periodic':
                type_ = fields_yaml_to_type(v)
            else:
                # "fields" and "type" are synonyms
                type_def = v.get('fields') or v.get('type')
                if isinstance(type_def, basestring):
                    type_ = field_str_to_type(type_def, "array '%s'" % k)
                else:
                    if not isinstance(type_def, list):
                        raise SyntaxError("invalid structure for globals")
                    type_ = fields_yaml_to_type(type_def)
            globals_def[k] = type_

        simulation_def = content['simulation']
        seed = simulation_def.get('random_seed')
        if seed is not None:
            seed = int(seed)
            print("using fixed random seed: %d" % seed)
            random.seed(seed)
            np.random.seed(seed)

        periods = simulation_def['periods']
        start_period = simulation_def['start_period']
        config.skip_shows = simulation_def.get('skip_shows', False)
        #TODO: check that the value is one of "raise", "skip", "warn"
        config.assertions = simulation_def.get('assertions', 'raise')
        config.show_timings = simulation_def.get('timings', True)

        autodump = simulation_def.get('autodump', None)
        if autodump is True:
            autodump = 'autodump.h5'
        if isinstance(autodump, basestring):
            # by default autodump will dump all rows
            autodump = (autodump, None)
        config.autodump = autodump

        autodiff = simulation_def.get('autodiff', None)
        if autodiff is True:
            autodiff = 'autodump.h5'
        if isinstance(autodiff, basestring):
            # by default autodiff will compare all rows
            autodiff = (autodiff, None)
        config.autodiff = autodiff

        input_def = simulation_def['input']
        input_directory = input_dir if input_dir is not None \
                                    else input_def.get('path', '')
        if not os.path.isabs(input_directory):
            input_directory = os.path.join(simulation_dir, input_directory)
        config.input_directory = input_directory

        output_def = simulation_def['output']
        output_directory = output_dir if output_dir is not None \
                                      else output_def.get('path', '')
        if not os.path.isabs(output_directory):
            output_directory = os.path.join(simulation_dir, output_directory)
        if not os.path.exists(output_directory):
            print("creating directory: '%s'" % output_directory)
            os.makedirs(output_directory)
        config.output_directory = output_directory

        if output_file is None:
            output_file = output_def['file']
        output_path = os.path.join(output_directory, output_file)

        for k, v in content['entities'].iteritems():
            entity_registry.add(Entity.from_yaml(k, v))

        for entity in entity_registry.itervalues():
            entity.check_links()
            entity.parse_processes(globals_def)
            entity.compute_lagged_fields()

        init_def = [d.items()[0] for d in simulation_def.get('init', {})]
        init_processes, init_entities = [], set()
        for ent_name, proc_names in init_def:
            if ent_name not in entity_registry:
                raise Exception("Entity '%s' not found" % ent_name)

            entity = entity_registry[ent_name]
            init_entities.add(entity)
            init_processes.extend([(entity.processes[proc_name], 1)
                                   for proc_name in proc_names])

        processes_def = [d.items()[0] for d in simulation_def['processes']]
        processes, entity_set = [], set()
        for ent_name, proc_defs in processes_def:
            entity = entity_registry[ent_name]
            entity_set.add(entity)
            for proc_def in proc_defs:
                # proc_def is simply a process name
                if isinstance(proc_def, basestring):
                    # use the default periodicity of 1
                    proc_name, periodicity = proc_def, 1
                else:
                    proc_name, periodicity = proc_def
                processes.append((entity.processes[proc_name], periodicity))
        entities = sorted(entity_set, key=lambda e: e.name)

        method = input_def.get('method', 'h5')

        if method == 'h5':
            if input_file is None:
                input_file = input_def['file']
            input_path = os.path.join(input_directory, input_file)
            data_source = H5Data(input_path, output_path)
        elif method == 'void':
            data_source = Void(output_path)
        else:
            raise ValueError("'%s' is an invalid value for 'method'. It should "
                             "be either 'h5' or 'void'")

        default_entity = simulation_def.get('default_entity')
        return Simulation(globals_def, periods, start_period,
                          init_processes, init_entities, processes, entities,
                          data_source, default_entity)
Ejemplo n.º 6
0
    def from_yaml(cls, fpath,
                  input_dir=None, input_file=None,
                  output_dir=None, output_file=None):
        simulation_path = os.path.abspath(fpath)
        simulation_dir = os.path.dirname(simulation_path)
        with open(fpath) as f:
            content = yaml.load(f)

        expand_periodic_fields(content)
        content = handle_imports(content, simulation_dir)
        validate_dict(content, cls.yaml_layout)

        # the goal is to get something like:
        # globals_def = {'periodic': [('a': int), ...],
        #                'MIG': int}
        globals_def = content.get('globals', {})
        for k, v in content.get('globals', {}).iteritems():
            if "type" in v:
                v["type"] = field_str_to_type(v["type"], "array '%s'" % k)
            else:
                #TODO: fields should be optional (would use all the fields
                # provided in the file)
                v["fields"] = fields_yaml_to_type(v["fields"])
            globals_def[k] = v

        simulation_def = content['simulation']
        seed = simulation_def.get('random_seed')
        if seed is not None:
            seed = int(seed)
            print("using fixed random seed: %d" % seed)
            random.seed(seed)
            np.random.seed(seed)

        periods = simulation_def['periods']
        start_period = simulation_def['start_period']
        config.skip_shows = simulation_def.get('skip_shows', config.skip_shows)
        #TODO: check that the value is one of "raise", "skip", "warn"
        config.assertions = simulation_def.get('assertions', config.assertions)

        logging_def = simulation_def.get('logging', {})
        config.log_level = logging_def.get('level', config.log_level)
        if 'timings' in simulation_def:
            warnings.warn("simulation.timings is deprecated, please use "
                          "simulation.logging.timings instead",
                          DeprecationWarning)
            config.show_timings = simulation_def['timings']
        config.show_timings = logging_def.get('timings', config.show_timings)

        autodump = simulation_def.get('autodump', None)
        if autodump is True:
            autodump = 'autodump.h5'
        if isinstance(autodump, basestring):
            # by default autodump will dump all rows
            autodump = (autodump, None)
        config.autodump = autodump

        autodiff = simulation_def.get('autodiff', None)
        if autodiff is True:
            autodiff = 'autodump.h5'
        if isinstance(autodiff, basestring):
            # by default autodiff will compare all rows
            autodiff = (autodiff, None)
        config.autodiff = autodiff

        input_def = simulation_def['input']
        input_directory = input_dir if input_dir is not None \
                                    else input_def.get('path', '')
        if not os.path.isabs(input_directory):
            input_directory = os.path.join(simulation_dir, input_directory)
        config.input_directory = input_directory

        output_def = simulation_def['output']
        output_directory = output_dir if output_dir is not None \
                                      else output_def.get('path', '')
        if not os.path.isabs(output_directory):
            output_directory = os.path.join(simulation_dir, output_directory)
        if not os.path.exists(output_directory):
            print("creating directory: '%s'" % output_directory)
            os.makedirs(output_directory)
        config.output_directory = output_directory

        if output_file is None:
            output_file = output_def['file']
        output_path = os.path.join(output_directory, output_file)

        entities = {}
        for k, v in content['entities'].iteritems():
            entities[k] = Entity.from_yaml(k, v)

        for entity in entities.itervalues():
            entity.attach_and_resolve_links(entities)

        global_context = {'__globals__': global_symbols(globals_def),
                          '__entities__': entities}
        parsing_context = global_context.copy()
        parsing_context.update((entity.name, entity.all_symbols(global_context))
                               for entity in entities.itervalues())
        for entity in entities.itervalues():
            parsing_context['__entity__'] = entity.name
            entity.parse_processes(parsing_context)
            entity.compute_lagged_fields()
            # entity.optimize_processes()

        # for entity in entities.itervalues():
        #     entity.resolve_method_calls()
        used_entities = set()
        init_def = [d.items()[0] for d in simulation_def.get('init', {})]
        init_processes = []
        for ent_name, proc_names in init_def:
            if ent_name not in entities:
                raise Exception("Entity '%s' not found" % ent_name)

            entity = entities[ent_name]
            used_entities.add(ent_name)
            init_processes.extend([(entity.processes[proc_name], 1)
                                   for proc_name in proc_names])

        processes_def = [d.items()[0] for d in simulation_def['processes']]
        processes = []
        for ent_name, proc_defs in processes_def:
            entity = entities[ent_name]
            used_entities.add(ent_name)
            for proc_def in proc_defs:
                # proc_def is simply a process name
                if isinstance(proc_def, basestring):
                    # use the default periodicity of 1
                    proc_name, periodicity = proc_def, 1
                else:
                    proc_name, periodicity = proc_def
                processes.append((entity.processes[proc_name], periodicity))

        entities_list = sorted(entities.values(), key=lambda e: e.name)
        declared_entities = set(e.name for e in entities_list)
        unused_entities = declared_entities - used_entities
        if unused_entities:
            suffix = 'y' if len(unused_entities) == 1 else 'ies'
            print("WARNING: entit%s without any executed process:" % suffix,
                  ','.join(sorted(unused_entities)))

        method = input_def.get('method', 'h5')

        if method == 'h5':
            if input_file is None:
                input_file = input_def['file']
            input_path = os.path.join(input_directory, input_file)
            data_source = H5Data(input_path, output_path)
        elif method == 'void':
            data_source = Void(output_path)
        else:
            raise ValueError("'%s' is an invalid value for 'method'. It should "
                             "be either 'h5' or 'void'")

        default_entity = simulation_def.get('default_entity')
        return Simulation(globals_def, periods, start_period, init_processes,
                          processes, entities_list, data_source, default_entity)
Ejemplo n.º 7
0
def load_def(localdir, ent_name, section_def, required_fields):
    if 'type' in section_def and 'fields' in section_def:
        raise Exception("invalid structure for '%s': "
                        "type and fields sections are mutually exclusive" %
                        ent_name)

    if 'type' in section_def:
        csv_filename = section_def.get('path', ent_name + ".csv")
        csv_filepath = complete_path(localdir, csv_filename)
        str_type = section_def['type']
        if isinstance(str_type, basestring):
            celltype = field_str_to_type(str_type, "array '%s'" % ent_name)
        else:
            assert isinstance(str_type, type)
            celltype = str_type
        return 'ndarray', load_ndarray(csv_filepath, celltype)

    fields_def = section_def.get('fields')
    if fields_def is not None:
        for fdef in fields_def:
            if isinstance(fdef, basestring):
                raise SyntaxError("invalid field declaration: '%s', you are "
                                  "probably missing a ':'" % fdef)
        if all(isinstance(fdef, dict) for fdef in fields_def):
            fields = fields_yaml_to_type(fields_def)
        else:
            assert all(isinstance(fdef, tuple) for fdef in fields_def)
            fields = fields_def
        fnames = {name for name, _ in fields}
        for reqname, reqtype in required_fields[::-1]:
            if reqname not in fnames:
                fields.insert(0, (reqname, reqtype))
    else:
        fields = None
    newnames = merge_dicts(invert_dict(section_def.get('oldnames', {})),
                           section_def.get('newnames', {}))
    transpose = section_def.get('transposed', False)

    interpolate_def = section_def.get('interpolate')
    files_def = section_def.get('files')
    if files_def is None:
        # XXX: it might be cleaner to use the same code path than for the
        # multi-file case (however, that would loose the "import any file
        # size" feature that I'm fond of.

        # we can simply return the stream as-is
        # FIXME: stream is not sorted
        # csv file is assumed to be in the correct order (ie by period then id)
        csv_filename = section_def.get('path', ent_name + ".csv")
        csv_filepath = complete_path(localdir, csv_filename)
        csv_file = CSV(csv_filepath,
                       newnames,
                       delimiter=',',
                       transpose=transpose)
        stream = csv_file.read(fields)
        if fields is None:
            fields = csv_file.fields
        if interpolate_def is not None:
            raise Exception('interpolate is currently only supported with '
                            'multiple files')
        return 'table', (fields, csv_file.numlines, stream, csv_file)
    else:
        # we have to load all files, merge them and return a stream out of that
        print(" * computing number of rows...")

        # 1) only load required fields
        default_args = dict(newnames=newnames, transpose=transpose)
        if isinstance(files_def, dict):
            files_items = files_def.items()
        elif isinstance(files_def, list) and files_def:
            if isinstance(files_def[0], dict):
                # handle YAML ordered dict structure
                files_items = [d.items()[0] for d in files_def]
            elif isinstance(files_def[0], basestring):
                files_items = [(path, {}) for path in files_def]
            else:
                raise Exception("invalid structure for 'files'")
        else:
            raise Exception("invalid structure for 'files'")

        # XXX: shouldn't we use the "path" defined for the whole entity if any?
        # section_def.get('path')
        files = []
        for path, kwargs in files_items:
            kwargs['newnames'] = \
                merge_dicts(invert_dict(kwargs.pop('oldnames', {})),
                            kwargs.get('newnames', {}))
            f = CSV(complete_path(localdir, path),
                    **merge_dicts(default_args, kwargs))
            files.append(f)
        id_periods = union1d(f.as_array(required_fields) for f in files)

        print(" * reading files...")
        # 2) load all fields
        if fields is None:
            target_fields = merge_items(*[f.fields for f in files])
            fields_per_file = [None for _ in files]
        else:
            target_fields = fields
            fields_per_file = [[(name, type_) for name, type_ in target_fields
                                if name in f.field_names] for f in files]
            total_fields = set.union(*[set(f.field_names) for f in files])
            missing = set(name for name, _ in target_fields) - total_fields
            if missing:
                raise Exception("the following fields were not found in any "
                                "file: %s" % ", ".join(missing))

        total_lines = len(id_periods)

        # allocate main array
        target = get_default_array(total_lines, np.dtype(target_fields))
        target['period'] = id_periods['period']
        target['id'] = id_periods['id']

        arrays = [
            f.as_array(fields_to_load)
            for f, fields_to_load in zip(files, fields_per_file)
        ]

        # close all files
        for f in files:
            f.close()

        # FIXME: interpolation currently only interpolates missing data points,
        # not data points with their value equal the missing value
        # corresponding to the field type. This can only be fixed once
        # booleans are loaded as int8.
        if interpolate_def is not None:
            if any(v != 'previous_value'
                   for v in interpolate_def.itervalues()):
                raise Exception("currently, only 'previous_value' "
                                "interpolation is supported")
            to_interpolate = [
                k for k, v in interpolate_def.iteritems()
                if v == 'previous_value'
            ]
        else:
            to_interpolate = []

        interpolate(target, arrays, id_periods, to_interpolate)
        return 'table', (target_fields, total_lines, iter(target), None)
Ejemplo n.º 8
0
    def from_yaml(cls, main_fpath, input_dir=None, input_file=None, output_dir=None, output_file=None):
        simulation_path = os.path.abspath(main_fpath)
        simulation_dir = os.path.dirname(simulation_path)

        # TODO : add an instruction in yaml_layout in so as to call an other YAML file and use it instead of a list.
        with open(main_fpath) as f:
            content = yaml.load(f)
        list_fpath = []
        if "import" in content.keys():
            list_fpath = content["import"].values()

        for fname in list_fpath:
            fpath = os.path.join(simulation_dir, fname)
            with open(fpath) as f:
                content1 = yaml.load(f)
                if "globals" in content1:
                    if "globals" in content:
                        raise Exception("globals can be defined only once")
                    else:
                        content["globals"] = content1["globals"]
                if "entities" in content1:
                    if "entities" in content:
                        for name1 in content1["entities"].keys():
                            if name1 in content["entities"].keys():
                                for name2 in content1["entities"][name1].keys():
                                    if name2 in content["entities"][name1].keys():
                                        if name2 in ("processes", "macros", "links"):
                                            for name3 in content1["entities"][name1][name2].keys():
                                                if name3 in content["entities"][name1][name2].keys():
                                                    raise Exception(
                                                        "%s of %s is defined a second time in %s"
                                                        % (name3, name1, fpath)
                                                    )
                                                else:
                                                    content["entities"][name1][name2][name3] = content1["entities"][
                                                        name1
                                                    ][name2][name3]
                                        if name2 == "fields":
                                            raise Exception(
                                                "fields of entities can be defined only once, it is not the case for %s in %s"
                                                % (name1, fpath)
                                            )
                                    else:
                                        content["entities"][name1][name2] = content1["entities"][name1][name2]
                            else:
                                content["entities"][name1] = content1["entities"][name1]
                    else:
                        content["entities"] = content1["entities"]

                if "simulation" in content1:
                    if "simulation" in content:
                        raise Exception("simualtion can be defined only once")
                    else:
                        content["simulation"] = content1["simulation"]
        validate_dict(content, cls.yaml_layout)

        # the goal is to get something like:
        # globals_def = {'periodic': [('a': int), ...],
        #                'MIG': int}
        globals_def = {}
        for k, v in content.get("globals", {}).iteritems():
            # periodic is a special case
            if k == "periodic":
                type_ = fields_yaml_to_type(v)
            else:
                # "fields" and "type" are synonyms
                type_def = v.get("fields") or v.get("type")
                if isinstance(type_def, basestring):
                    type_ = field_str_to_type(type_def, "array '%s'" % k)
                else:
                    if not isinstance(type_def, list):
                        raise SyntaxError("invalid structure for globals")
                    type_ = fields_yaml_to_type(type_def)
            globals_def[k] = type_

        simulation_def = content["simulation"]
        seed = simulation_def.get("random_seed")
        if seed is not None:
            seed = int(seed)
            print "using fixed random seed: %d" % seed
            random.seed(seed)
            np.random.seed(seed)

        periods = simulation_def["periods"]
        start_period = simulation_def["start_period"]
        config.skip_shows = simulation_def.get("skip_shows", False)
        # TODO: check that the value is one of "raise", "skip", "warn"
        config.assertions = simulation_def.get("assertions", "raise")

        input_def = simulation_def["input"]
        input_directory = input_dir if input_dir is not None else input_def.get("path", "")
        if not os.path.isabs(input_directory):
            input_directory = os.path.join(simulation_dir, input_directory)
        config.input_directory = input_directory

        output_def = simulation_def["output"]
        output_directory = output_dir if output_dir is not None else output_def.get("path", "")
        if not os.path.isabs(output_directory):
            output_directory = os.path.join(simulation_dir, output_directory)
        config.output_directory = output_directory

        if output_file is None:
            output_file = output_def["file"]
        output_path = os.path.join(output_directory, output_file)

        for k, v in content["entities"].iteritems():
            entity_registry.add(Entity.from_yaml(k, v))

        for entity in entity_registry.itervalues():
            entity.check_links()
            entity.parse_processes(globals_def)
            entity.compute_lagged_fields()

        init_def = [d.items()[0] for d in simulation_def.get("init", {})]
        init_processes, init_entities = [], set()
        for ent_name, proc_names in init_def:
            if ent_name not in entity_registry:
                raise Exception("Entity '%s' not found" % ent_name)

            entity = entity_registry[ent_name]
            init_entities.add(entity)
            init_processes.extend([(entity.processes[proc_name], 1) for proc_name in proc_names])

        processes_def = [d.items()[0] for d in simulation_def["processes"]]
        processes, entities = [], set()
        for ent_name, proc_defs in processes_def:
            entity = entity_registry[ent_name]
            entities.add(entity)
            for proc_def in proc_defs:
                # proc_def is simply a process name
                if isinstance(proc_def, basestring):
                    # use the default periodicity of 1
                    proc_name, periodicity = proc_def, 1
                else:
                    proc_name, periodicity = proc_def
                processes.append((entity.processes[proc_name], periodicity))

        method = input_def.get("method", "h5")

        if method == "h5":
            if input_file is None:
                input_file = input_def["file"]
            input_path = os.path.join(input_directory, input_file)
            data_source = H5Data(input_path, output_path)
        elif method == "void":
            input_path = None
            data_source = Void(output_path)
        else:
            print method, type(method)

        default_entity = simulation_def.get("default_entity")
        return Simulation(
            globals_def,
            periods,
            start_period,
            init_processes,
            init_entities,
            processes,
            entities,
            data_source,
            default_entity,
        )
Ejemplo n.º 9
0
    def from_yaml(cls, fpath,
                  input_dir=None, input_file=None,
                  output_dir=None, output_file=None):
        simulation_path = os.path.abspath(fpath)
        simulation_dir = os.path.dirname(simulation_path)
        with open(fpath) as f:
            content = yaml.load(f)

        content = handle_imports(content, simulation_dir)
        validate_dict(content, cls.yaml_layout)

        # the goal is to get something like:
        # globals_def = {'periodic': [('a': int), ...],
        #                'MIG': int}
        globals_def = {}
        for k, v in content.get('globals', {}).iteritems():
            # periodic is a special case
            if k == 'periodic':
                type_ = fields_yaml_to_type(v)
            else:
                # "fields" and "type" are synonyms
                type_def = v.get('fields') or v.get('type')
                if isinstance(type_def, basestring):
                    type_ = field_str_to_type(type_def, "array '%s'" % k)
                else:
                    if not isinstance(type_def, list):
                        raise SyntaxError("invalid structure for globals")
                    type_ = fields_yaml_to_type(type_def)
            globals_def[k] = type_

        simulation_def = content['simulation']
        seed = simulation_def.get('random_seed')
        if seed is not None:
            seed = int(seed)
            print("using fixed random seed: %d" % seed)
            random.seed(seed)
            np.random.seed(seed)
            
        periods = simulation_def['periods']
        time_scale = simulation_def.get('time_scale', 'year')
        retro = simulation_def.get('retro', False)
        
        start_period = simulation_def.get('start_period',None)
        init_period = simulation_def.get('init_period',None)
        if start_period is None and init_period is None:
            raise Exception("Either start_period either init_period should be given.")
        if start_period is not None:
            if init_period is not None: 
                raise Exception("Start_period can't be given if init_period is.")
            step = time_period[time_scale]*(1 - 2*(retro))           
            init_period = addmonth(start_period, step)
        
        config.skip_shows = simulation_def.get('skip_shows', False)
        #TODO: check that the value is one of "raise", "skip", "warn"
        config.assertions = simulation_def.get('assertions', 'raise')
        config.show_timings = simulation_def.get('timings', True)
        
        autodump = simulation_def.get('autodump', None)
        if autodump is True:
            autodump = 'autodump.h5'
        if isinstance(autodump, basestring):
            # by default autodump will dump all rows
            autodump = (autodump, None)
        config.autodump = autodump

        autodiff = simulation_def.get('autodiff', None)
        if autodiff is True:
            autodiff = 'autodump.h5'
        if isinstance(autodiff, basestring):
            # by default autodiff will compare all rows
            autodiff = (autodiff, None)
        config.autodiff = autodiff

        legislation = simulation_def.get('legislation', None)
        final_stat = simulation_def.get('final_stat', None)
        
        input_def = simulation_def['input']
        input_directory = input_dir if input_dir is not None \
                                    else input_def.get('path', '')
        if not os.path.isabs(input_directory):
            input_directory = os.path.join(simulation_dir, input_directory)
        config.input_directory = input_directory

        output_def = simulation_def['output']
        output_directory = output_dir if output_dir is not None \
                                      else output_def.get('path', '')
        if not os.path.isabs(output_directory):
            output_directory = os.path.join(simulation_dir, output_directory)
        if not os.path.exists(output_directory):
            print("creating directory: '%s'" % output_directory)
            os.makedirs(output_directory)
        config.output_directory = output_directory

        if output_file is None:
            output_file = output_def['file']
        output_path = os.path.join(output_directory, output_file)
    
        method = input_def.get('method', 'h5')

        #need to be before processes because in case of legislation, we need input_table for now.
        if method == 'h5':
            if input_file is None:
                input_file = input_def['file']
            input_path = os.path.join(input_directory, input_file)
            data_source = H5Data(input_path, output_path)
        elif method == 'void':
            input_path = None
            data_source = Void(output_path)
        else:
            print(method, type(method))    
    
    
    
        for k, v in content['entities'].iteritems():
            entity_registry.add(Entity.from_yaml(k, v))
            
        for entity in entity_registry.itervalues():
            entity.check_links()
            entity.parse_processes(globals_def)
            entity.compute_lagged_fields()
            
        init_def = [d.items()[0] for d in simulation_def.get('init', {})]
        init_processes, init_entities = [], set()
        for ent_name, proc_names in init_def:
            if ent_name != 'legislation':
                if ent_name not in entity_registry:
                    raise Exception("Entity '%s' not found" % ent_name)
                
                entity = entity_registry[ent_name]
                init_entities.add(entity)
                init_processes.extend([(entity.processes[proc_name], 1, 1)
                                       for proc_name in proc_names])
            else: 
#                 proc1 = ExtProcess('liam2of',['simulation',None])
                proc2 = ExtProcess('of_on_liam',['simulation',2009,'period'])
#                 proc3 = ExtProcess('merge_leg',['simulation',data_source.output_path,
#                                                 "C:/Til/output/"+"simul_leg.h5",'period'])
#                 init_processes.append((proc1, 1))
                init_processes.append((proc2, 1, 1))
#                 processes.append((proc3, 1))


        processes_def = [d.items()[0] for d in simulation_def['processes']]
        processes, entity_set = [], set()
        for ent_name, proc_defs in processes_def:
            if ent_name != 'legislation':
                entity = entity_registry[ent_name]
                entity_set.add(entity)
                for proc_def in proc_defs:
                    # proc_def is simply a process name
                    if isinstance(proc_def, basestring):
                        # use the default periodicity of 1
                        proc_name, periodicity, start = proc_def, 1, 1
                    else:
                        if len(proc_def) == 3:
                            proc_name, periodicity, start = proc_def
                        elif len(proc_def) == 2:
                            proc_name, periodicity = proc_def 
                            start = 1
                    processes.append((entity.processes[proc_name], periodicity, start))
            else: 
#                 proc1 = ExtProcess('liam2of',['simulation',None])
                proc2 = ExtProcess('of_on_liam',['simulation',proc_defs[0],'period'])
#                 proc3 = ExtProcess('merge_leg',['simulation',data_source.output_path,
#                                                 "C:/Til/output/"+"simul_leg.h5",'period'])

#                 processes.append((proc1, 1))
                processes.append((proc2, 'year',12))
#                 processes.append((proc3, 1))
        entities = sorted(entity_set, key=lambda e: e.name)

        default_entity = simulation_def.get('default_entity')
        #processes[2][0].subprocesses[0][0]
        return Simulation(globals_def, periods, init_period,
                          init_processes, init_entities, processes, entities,
                          data_source, default_entity, legislation, final_stat, time_scale, retro)