def test_expression_2vars(self):
     # test an expression with 2 variables
     expr = "2*sqrt(var1+var2)"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='dataset',
                         table_data={
                             "var1": array([4, -8, 0.5, 1]),
                             "var2": array([3, 3, 7, 7]),
                             "id": array([1, 2, 3, 4])
                         })
     dataset = Dataset(in_storage=storage,
                       in_table_name='dataset',
                       id_name="id",
                       dataset_name="mydataset")
     result = dataset.compute_variables([expr])
     should_be = array([5.29150262, 0.0, 5.47722558, 5.65685425])
     self.assert_(ma.allclose(result, should_be, rtol=1e-6),
                  "Error in test_expression_2vars")
     # check the dependencies (will depend on two different other variables)
     v = VariableName(expr)
     var = VariableFactory().get_variable(v, dataset)
     # use sets for the equality test, since we don't know in what order the dependencies will be returned
     self.assertEqual(set(var.dependencies()),
                      set(['mydataset.var1', 'mydataset.var2']),
                      msg="dependencies are incorrect")
 def test_expression(self):
     # test an expression.  Also make sure that the generated variable can be accessued
     # using its short name and that dependencies are correct.
     expr = "2*sqrt(my_variable+10)"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='dataset',
                         table_data={
                             "my_variable": array([4, -8, 0.5, 1]),
                             "id": array([1, 2, 3, 4])
                         })
     dataset = Dataset(in_storage=storage,
                       in_table_name='dataset',
                       id_name="id",
                       dataset_name="mydataset")
     result = dataset.compute_variables([expr])
     should_be = array([7.48331477, 2.82842712, 6.4807407, 6.63324958])
     self.assert_(ma.allclose(result, should_be, rtol=1e-6),
                  "Error in test_expression")
     # check the name
     v = VariableName(expr)
     var = VariableFactory().get_variable(v, dataset)
     self.assertEqual(var.name(), expr, msg="name is incorrect")
     # check the dependencies
     self.assertEqual(var.dependencies(), ['mydataset.my_variable'],
                      msg="dependencies are incorrect")
     # test that the variable can now also be accessed using its short name in an expression
     result2 = dataset.compute_variables([v.get_short_name()])
     self.assert_(ma.allclose(result2, should_be, rtol=1e-6),
                  "Error in accessing a_test_variable")
Example #3
0
 def test_lag_variable(self):
     from opus_core.variables.variable_factory import VariableFactory
     from opus_core.variables.variable_name import VariableName
     vf = VariableFactory()
     var_name = VariableName('opus_core.tests.a_test_variable_lag3')
     var = vf.get_variable(var_name, None, index_name='my_id')
     self.assert_(var.is_lag_variable())
     self.assertEqual(var.lag_offset, 3)
Example #4
0
 def test_lag_variable(self):
     from opus_core.variables.variable_factory import VariableFactory
     from opus_core.variables.variable_name import VariableName
     vf = VariableFactory()
     var_name = VariableName('opus_core.tests.a_test_variable_lag3')
     var = vf.get_variable(var_name, None, index_name='my_id')
     self.assert_(var.is_lag_variable())
     self.assertEqual(var.lag_offset, 3)
Example #5
0
 def test_sqrt_constant(self):
     # test an expression that is constant -- should have no dependencies
     expr = "sqrt(25)"
     storage = StorageFactory().get_storage("dict_storage")
     storage.write_table(table_name="dataset", table_data={"id": array([1, 2])})
     # we don't actually use anything in the dataset
     dataset = Dataset(in_storage=storage, in_table_name="dataset", id_name="id", dataset_name="mydataset")
     result = dataset.compute_variables([expr])
     self.assert_(4.99 < result and result < 5.01, "Error in test_sqrt_constant")
     # check the dependencies
     v = VariableName(expr)
     var = VariableFactory().get_variable(v, dataset)
     self.assertEqual(var.dependencies(), [], msg="dependencies are incorrect")
Example #6
0
    def __init__(self, config, model=None, model_group=None, specification=None, scenario_name=None):
        self.factory = VariableFactory()

        lib = config.get_expression_library()
        self.factory.set_expression_library(lib)

        self.model = model
        self.model_group = model_group
        
        if model is not None:
            if specification is None:
                specification_dict = config.get_estimation_specification(model, model_group)
                if model_group is not None:
                    specification_dict = specification_dict[model_group]
                spec = get_specification_for_estimation(specification_dict)
            else:
                spec = specification
            model_prefix = ''
            if model_group is not None:
                model_prefix = '%s_' % model_group
            self.model_name = '%s%s' % (model_prefix, model)
            
            self.var_list = spec.get_distinct_long_variable_names().tolist()
            
            #check other model nodes, such as agents_filter, submodel_string or filter
#            config_node_path = "model_manager/models/model[@name='%s']" % self.model
#            model_node = config._find_node(config_node_path)
#            controller = config._convert_model_to_dict(model_node)
#            addvars = []
#            addvars.append(controller.get('init', {}).get('arguments', {}).get('filter', None))
#            addvars.append(controller.get('init', {}).get('arguments', {}).get('submodel_string', None))
#            addvars.append(controller.get('init', {}).get('arguments', {}).get('choice_attribute_name', None))
#            addvars.append(controller.get('prepare_for_run', {}).get('arguments', {}).get('agent_filter', None))

            # This assumes that xml nodes contain the tag 'model_dependency_type'
            self.model_structure_dependencies = config.model_dependencies(self.model)
            self.var_list = self.var_list + self.model_structure_dependencies.get('variable', [])
#            for var in addvars:
#                if isinstance(var, str):
#                    self.var_list.append(eval(var)) # eval because these entries are in double quotes, e.g. "'attribute'"
                    
            self.var_tree = []
            l = []
            for var in self.var_list:
                l.append((var, []))
            self.var_tree.append((self.model_name, l))
        else:
            # this is meant to be for all models but is not working yet
            #self.config = config.get_run_configuration(scenario_name)
            self.var_list = []
            self.var_tree = []
Example #7
0
 def test_expression_1var_2times(self):
     # test an expression with two occurences of the same variable
     # (the var should just occur once in dependencies)
     expr = "var1+sqrt(var1)"
     storage = StorageFactory().get_storage("dict_storage")
     storage.write_table(table_name="dataset", table_data={"var1": array([4, 25, 0, 1]), "id": array([1, 2, 3, 4])})
     dataset = Dataset(in_storage=storage, in_table_name="dataset", id_name="id", dataset_name="mydataset")
     result = dataset.compute_variables([expr])
     should_be = array([6, 30, 0, 2])
     self.assert_(ma.allclose(result, should_be, rtol=1e-6), "Error in test_expression_2vars")
     # check the dependencies
     v = VariableName(expr)
     var = VariableFactory().get_variable(v, dataset)
     self.assertEqual(var.dependencies(), ["mydataset.var1"], msg="dependencies are incorrect")
Example #8
0
 def test_fully_qualified_name_power(self):
     # test fully qualified name to a power
     expr = "opus_core.tests.a_test_variable**2"
     storage = StorageFactory().get_storage("dict_storage")
     storage.write_table(table_name="tests", table_data={"a_dependent_variable": array([1, 0]), "id": array([1, 3])})
     dataset = Dataset(in_storage=storage, in_table_name="tests", id_name="id", dataset_name="tests")
     result = dataset.compute_variables([expr])
     should_be = array([100, 0])
     self.assertEqual(
         ma.allclose(result, should_be, rtol=1e-5), True, msg="error in test_fully_qualified_name_power"
     )
     # check the dependencies
     v = VariableName(expr)
     var = VariableFactory().get_variable(v, dataset)
     self.assertEqual(var.dependencies(), ["opus_core.tests.a_test_variable"], msg="dependencies are incorrect")
Example #9
0
 def test_sqrt_constant(self):
     # test an expression that is constant -- should have no dependencies
     expr = "sqrt(25)"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(
         table_name='dataset', 
         table_data={"id": array([1,2])}
         )
     # we don't actually use anything in the dataset
     dataset = Dataset(in_storage=storage, in_table_name='dataset', id_name="id", dataset_name="mydataset")
     result = dataset.compute_variables([expr])
     self.assert_(4.99<result and result<5.01, "Error in test_sqrt_constant")
     # check the dependencies
     v = VariableName(expr)
     var = VariableFactory().get_variable(v, dataset)
     self.assertEqual(var.dependencies(), [], msg="dependencies are incorrect")
Example #10
0
 def test_expression_1var_2times(self):
     # test an expression with two occurences of the same variable 
     # (the var should just occur once in dependencies)
     expr = "var1+sqrt(var1)"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(
         table_name='dataset',
         table_data={"var1": array([4,25,0,1]), "id": array([1,2,3,4])}
         )
     dataset = Dataset(in_storage=storage, in_table_name='dataset', id_name="id", dataset_name="mydataset")
     result = dataset.compute_variables([expr])
     should_be = array([ 6, 30, 0, 2])
     self.assert_(ma.allclose(result, should_be, rtol=1e-6), "Error in test_expression_2vars")
     # check the dependencies
     v = VariableName(expr)
     var = VariableFactory().get_variable(v, dataset)
     self.assertEqual(var.dependencies(), ['mydataset.var1'], msg="dependencies are incorrect")
Example #11
0
def get_variable_dependencies(name, quiet=False):
    """Return a tuple where the first element is a list of variables of the given 'dataset' that the 
    variable given by 'name' is directly as well as indirectly dependent on.
    The second elemet is a list of dependent variables that belong to other datasets. It consists of tuples where the 
    first element is the fully qualified name and the second element is the version. """
    dep = VariableFactory().get_variable(name, None,
                                         quiet=quiet).get_all_dependencies()
    return dep
Example #12
0
 def test_attr_power(self):
     # Attributes and fully-qualified names to a power require separate parse tree patterns,
     # which are tested in the following two tests.
     # test attribute to a power
     expr = "var1**3"
     storage = StorageFactory().get_storage("dict_storage")
     storage.write_table(
         table_name="dataset", table_data={"var1": array([4, -8, 0.5, 1]), "id": array([1, 2, 3, 4])}
     )
     dataset = Dataset(in_storage=storage, in_table_name="dataset", id_name="id", dataset_name="mydataset")
     result = dataset.compute_variables([expr])
     should_be = array([64, -512, 0.125, 1])
     self.assert_(ma.allclose(result, should_be, rtol=1e-6), "Error in test_attr_power")
     # check the dependencies (trickier for ** because we need a separate attribute tree pattern)
     v = VariableName(expr)
     var = VariableFactory().get_variable(v, dataset)
     self.assertEqual(var.dependencies(), ["mydataset.var1"], msg="dependencies are incorrect")
Example #13
0
 def test_attr_power(self):
     # Attributes and fully-qualified names to a power require separate parse tree patterns,
     # which are tested in the following two tests.
     # test attribute to a power
     expr = "var1**3"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(
         table_name='dataset',
         table_data={"var1": array([4,-8,0.5,1]), "id": array([1,2,3,4])}
         )
     dataset = Dataset(in_storage=storage, in_table_name='dataset', id_name="id", dataset_name="mydataset")
     result = dataset.compute_variables([expr])
     should_be = array([64, -512, 0.125, 1])
     self.assert_(ma.allclose(result, should_be, rtol=1e-6), "Error in test_attr_power")
     # check the dependencies (trickier for ** because we need a separate attribute tree pattern)
     v = VariableName(expr)
     var = VariableFactory().get_variable(v, dataset)
     self.assertEqual(var.dependencies(), ['mydataset.var1'], msg="dependencies are incorrect")
 def test_expression_2vars(self):
     # test an expression with 2 variables
     expr = "2*sqrt(var1+var2)"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(
         table_name='dataset',
         table_data={"var1": array([4,-8,0.5,1]), "var2": array([3,3,7,7]), "id": array([1,2,3,4])}
         )
     dataset = Dataset(in_storage=storage, in_table_name='dataset', id_name="id", dataset_name="mydataset")
     result = dataset.compute_variables([expr])
     should_be = array([ 5.29150262, 0.0,  5.47722558,  5.65685425])
     self.assert_(ma.allclose(result, should_be, rtol=1e-6), "Error in test_expression_2vars")
     # check the dependencies (will depend on two different other variables)
     v = VariableName(expr)
     var = VariableFactory().get_variable(v, dataset)
     # use sets for the equality test, since we don't know in what order the dependencies will be returned
     self.assertEqual(set(var.dependencies()), set(['mydataset.var1', 'mydataset.var2']), 
                      msg="dependencies are incorrect")
Example #15
0
 def test_fully_qualified_name_power(self):
     # test fully qualified name to a power
     expr = "opus_core.tests.a_test_variable**2"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(
         table_name='tests',
         table_data={
             "a_dependent_variable":array([1,0]),
             "id":array([1,3])
             }
         )
     dataset = Dataset(in_storage=storage, in_table_name='tests', id_name="id", dataset_name="tests")
     result = dataset.compute_variables([expr])
     should_be = array([100,0])
     self.assertEqual(ma.allclose(result, should_be, rtol=1e-5), True, msg="error in test_fully_qualified_name_power")
     # check the dependencies
     v = VariableName(expr)
     var = VariableFactory().get_variable(v, dataset)
     self.assertEqual(var.dependencies(), ['opus_core.tests.a_test_variable'], msg="dependencies are incorrect")
Example #16
0
 def test_expression(self):
     # test an expression.  Also make sure that the generated variable can be accessued
     # using its short name and that dependencies are correct.
     expr = "2*sqrt(my_variable+10)"
     storage = StorageFactory().get_storage("dict_storage")
     storage.write_table(
         table_name="dataset", table_data={"my_variable": array([4, -8, 0.5, 1]), "id": array([1, 2, 3, 4])}
     )
     dataset = Dataset(in_storage=storage, in_table_name="dataset", id_name="id", dataset_name="mydataset")
     result = dataset.compute_variables([expr])
     should_be = array([7.48331477, 2.82842712, 6.4807407, 6.63324958])
     self.assert_(ma.allclose(result, should_be, rtol=1e-6), "Error in test_expression")
     # check the name
     v = VariableName(expr)
     var = VariableFactory().get_variable(v, dataset)
     self.assertEqual(var.name(), expr, msg="name is incorrect")
     # check the dependencies
     self.assertEqual(var.dependencies(), ["mydataset.my_variable"], msg="dependencies are incorrect")
     # test that the variable can now also be accessed using its short name in an expression
     result2 = dataset.compute_variables([v.get_short_name()])
     self.assert_(ma.allclose(result2, should_be, rtol=1e-6), "Error in accessing a_test_variable")
Example #17
0
    def _make_indicators_for_dataset(self, dataset, indicators_in_dataset,
                                     source_data, computed_indicators, year):

        for name, indicator in indicators_in_dataset:
            computed_indicator = ComputedIndicator(
                indicator=indicator,
                source_data=source_data,
                dataset_name=dataset.get_dataset_name(),
                primary_keys=copy(dataset.get_id_name()))

            computed_indicators[name] = computed_indicator

        table_name = dataset.get_dataset_name()
        storage_location = os.path.join(self.storage_location, repr(year))

        storage_type = 'flt'
        store = StorageFactory().get_storage(storage_type + '_storage',
                                             storage_location=storage_location)

        already_computed_attributes = []
        if not os.path.exists(storage_location):
            os.mkdir(storage_location)
        else:
            if store.table_exists(table_name=table_name):
                already_computed_attributes = store.get_column_names(
                    table_name=table_name)

        attributes = dataset.get_id_name() + [
            ind.attribute for name, ind in indicators_in_dataset
            if computed_indicators[name].get_computed_dataset_column_name()
            not in already_computed_attributes
        ]
        if self.expression_library is not None:
            VariableFactory().set_expression_library(self.expression_library)
        dataset.compute_variables(names=attributes)

        cols = copy(dataset.get_id_name())
        cols += [
            computed_indicators[name].get_computed_dataset_column_name()
            for name, ind in indicators_in_dataset
            if computed_indicators[name].get_computed_dataset_column_name()
            not in already_computed_attributes
        ]

        data = dict([(attribute, dataset.get_attribute(attribute))
                     for attribute in cols])

        store.write_table(table_name=table_name,
                          table_data=data,
                          mode=Storage.APPEND)

        del dataset
        collect()
Example #18
0
 def get_all_dependencies(self):
     """Return all variables and attributes needed to compute this variable.  
     This is returned as a list of tuples where the first element is either AttributeBox or 
     VariableName of the dependent variable and the second element is the version for 
     which this variable was computed.
     """ 
     def create_fake_dataset(dataset_name):
         storage = StorageFactory().get_storage('dict_storage')
         
         storage.write_table(
             table_name='fake_dataset',
             table_data={
                 'id':array([], dtype='int32')
                 }
             )
         
         dataset = Dataset(in_storage=storage, in_table_name='fake_dataset', dataset_name=dataset_name, id_name="id")
         return dataset
     
     result_others = []
     dependencies_list = self.get_current_dependencies()
     for i in range(len(dependencies_list)):
         dep_item = dependencies_list[i][0]
         version = dependencies_list[i][1]
         isprimary = 0
         if isinstance(dep_item, str):
             depvar_name = VariableName(dep_item)
             dataset_name = depvar_name.get_dataset_name()
             var = VariableFactory().get_variable(depvar_name, create_fake_dataset(dataset_name), 
                                                            quiet=True)
             result_others = result_others + [(depvar_name, version)]                                              
         else: # dep_item should be an instance of AttributeBox
             var = dep_item.get_variable_instance()           
             result_others = result_others + [(dep_item, version)]
             isprimary = dep_item.is_primary()
             
         if (var <> None) and (not isprimary):
             res = var.get_all_dependencies()
             result_others = result_others + res
     return result_others
Example #19
0
    def get_all_dependencies(self):
        """Return all variables and attributes needed to compute this variable.  
        This is returned as a list of tuples where the first element is either AttributeBox or 
        VariableName of the dependent variable and the second element is the version for 
        which this variable was computed.
        """
        def create_fake_dataset(dataset_name):
            storage = StorageFactory().get_storage('dict_storage')

            storage.write_table(table_name='fake_dataset',
                                table_data={'id': array([], dtype='int32')})

            dataset = Dataset(in_storage=storage,
                              in_table_name='fake_dataset',
                              dataset_name=dataset_name,
                              id_name="id")
            return dataset

        result_others = []
        dependencies_list = self.get_current_dependencies()
        for i in range(len(dependencies_list)):
            dep_item = dependencies_list[i][0]
            version = dependencies_list[i][1]
            isprimary = 0
            if isinstance(dep_item, str):
                depvar_name = VariableName(dep_item)
                dataset_name = depvar_name.get_dataset_name()
                var = VariableFactory().get_variable(
                    depvar_name, create_fake_dataset(dataset_name), quiet=True)
                result_others = result_others + [(depvar_name, version)]
            else:  # dep_item should be an instance of AttributeBox
                var = dep_item.get_variable_instance()
                result_others = result_others + [(dep_item, version)]
                isprimary = dep_item.is_primary()

            if (var <> None) and (not isprimary):
                res = var.get_all_dependencies()
                result_others = result_others + res
        return result_others
 def __init__(self,
              resources=None,
              dataset1=None,
              dataset2=None,
              index1=None,
              index2=None,
              dataset_name=None,
              debug=None):
     """ Argument 'resources' is of type Resources. It is merged with arguments. It should contain:
             dataset1 - agent class
             dataset2 - class of the choice dataset
         Optional:
             index1 - 1D array, indices of dataset1
             index2 - If 2D array: row i contains indices of individuals of dataset2 that belong to
                     i-th individual of dataset1[index1].
                     If 1D array: indices of individuals of dataset2 for all individuals of dataset1[index1].
             dataset_name - subdirectory in which implementation of the interaction variables is placed (default "")
         dataset1.resources and dataset2.resources should contain key 'dataset_name' (see Dataset.get_dataset_name()).
     """
     self.resources = Resources(resources)
     self.resources.merge_if_not_None({
         "dataset1": dataset1,
         "dataset2": dataset2,
         "index1": index1,
         "index2": index2,
         "dataset_name": dataset_name,
         "debug": debug
     })
     self.attribute_boxes = {}
     self.attribute_names = []
     self.debug = self.resources.get("debug", 0)
     if not isinstance(self.debug, DebugPrinter):
         self.debug = DebugPrinter(self.debug)
     self.resources.check_obligatory_keys(["dataset1", "dataset2"])
     self.dataset1 = self.resources["dataset1"]
     self.dataset2 = self.resources["dataset2"]
     self.index1 = self.resources.get("index1", None)
     self.index2 = self.resources.get("index2", None)
     self.dataset_name = self.resources.get("dataset_name", None)
     if self.dataset_name == None:
         self.dataset_name = self.dataset1.get_dataset_name(
         ) + '_x_' + self.dataset2.get_dataset_name()
     self._primary_attribute_names = []
     self.index1_mapping = {}
     if self.index1 <> None:
         self.index1_mapping = do_id_mapping_dict_from_array(self.index1)
     self._id_names = None  # for compatibility with Dataset
     self.variable_factory = VariableFactory()
     self._aliases = {}  # for compatibility with Dataset
    def on_pb_validate_selected_clicked(self):
        ''' User clicked the validate selected button '''
        # Get all the selected variables
        selected_rows = set()
        map(selected_rows.add,
            [i.row() for i in self.variables_table.selectedIndexes()])

        # Setup GUI for batch run
        self.pb_cancel_validation.setEnabled(True)
        self._set_problem_variables([])
        self.progress_validation.setValue(0)
        self.group_progress.setVisible(True)
        self.variables_table.setEnabled(
            False)  # disable selecting variables during run
        self.group_progress.setTitle('Validating %d variables...' %
                                     len(selected_rows))

        # Set the expression library in VariableFactory to the variables for this configuration.
        # We need to get this from the VariablesTableModel rather than from the xml configuration
        # since newly added variables may not yet have been saved to the xml configuration but we
        # still want to check them.
        VariableFactory().set_expression_library(
            self.model.get_variables_dict())

        # Batch process the selected variables
        variables = [self.model.variables[i] for i in selected_rows]
        func = self.validator.check_data_errors
        var_key = 'dataerror'
        callback = self.update_validation_progress
        cancel_flag = self.cancel_validation_flag
        results = variable_batch_check(variables=variables,
                                       validator_func=func,
                                       variable_key=var_key,
                                       progress_callback=callback,
                                       cancel_flag=cancel_flag)

        # Setup GUI for investigating results
        self.pb_cancel_validation.setEnabled(False)
        self.progress_validation.setValue(100)
        self.variables_table.setEnabled(True)
        failed_variables = [(var, msg) for (var, flag, msg) in results
                            if flag is False]
        self._set_problem_variables(failed_variables)
        self._show_problem_variables()
        self.group_progress.setVisible(False)
        if failed_variables:
            self.pb_problems.setFocus()
Example #22
0
class DependencyQuery:
    def __init__(self,
                 config,
                 model=None,
                 model_group=None,
                 specification=None,
                 scenario_name=None):
        self.factory = VariableFactory()

        lib = config.get_expression_library()
        self.factory.set_expression_library(lib)

        self.model = model
        self.model_group = model_group

        if model is not None:
            if specification is None:
                specification_dict = config.get_estimation_specification(
                    model, model_group)
                spec = get_specification_for_estimation(specification_dict)
            else:
                spec = specification
            model_prefix = ''
            if model_group is not None:
                model_prefix = '%s_' % model_group
            self.model_name = '%s%s' % (model_prefix, model)

            self.var_list = spec.get_distinct_long_variable_names().tolist()

            #check other model nodes, such as agents_filter, submodel_string or filter
            config_node_path = "model_manager/models/model[@name='%s']" % self.model
            model_node = config._find_node(config_node_path)
            controller = config._convert_model_to_dict(model_node)
            addvars = []
            addvars.append(
                controller.get('init', {}).get('arguments',
                                               {}).get('filter', None))
            addvars.append(
                controller.get('init', {}).get('arguments',
                                               {}).get('submodel_string',
                                                       None))
            addvars.append(
                controller.get('init', {}).get('arguments',
                                               {}).get('choice_attribute_name',
                                                       None))
            addvars.append(
                controller.get('prepare_for_run',
                               {}).get('arguments',
                                       {}).get('agent_filter', None))
            for var in addvars:
                if isinstance(var, str):
                    self.var_list.append(
                        eval(var)
                    )  # eval because these entries are in double quotes, e.g. "'attribute'"

            self.var_tree = []
            l = []
            for var in self.var_list:
                l.append((var, []))
            self.var_tree.append((self.model_name, l))
        else:
            # this is meant to be for all models but is not working yet
            #self.config = config.get_run_configuration(scenario_name)
            self.var_list = []
            self.var_tree = []

        #TODO: there seems to be an issue with the (xml)dictionary approach -there can be
        # multiple, indexed, submodels. This only seems to retrieve the first

        #this collects all the variables models depend on
        #self.var_tree = []


#        self.var_list = []
#        #TODO: if we update to ElementTree 1.3, use
#        # model_manager/model_system//specification/[@type='submodel']/variables
#        for x in config._find_node('model_manager/models//specification'):
#            l = []
#            for y in x:
#                if y.get('type') == 'submodel':
#                    t = y.find('variable_list')
#                    if len(t) > 0:
#                        for z in config._convert_variable_list_to_data(t[0]):
#                            for k in lib.keys():
#                                if k[1] == z:
#                                    l.append((lib[k], []))
#                                    self.var_list.append(lib[k])
#            self.var_tree.append((x, l))

#given a name, return an instance of Variable

    def get_var(self, name):
        #creates a fake dataset, required for variable resolution
        def create_fake_dataset(dataset_name):
            storage = StorageFactory().get_storage('dict_storage')

            storage.write_table(table_name='fake_dataset',
                                table_data={'id': array([], dtype='int32')})

            dataset = Dataset(in_storage=storage,
                              in_table_name='fake_dataset',
                              dataset_name=dataset_name,
                              id_name="id")
            return dataset

        var = VariableName(name)
        dataset = var.get_dataset_name()
        try:
            return self.factory.get_variable(var,
                                             create_fake_dataset(dataset),
                                             quiet=True)
        except LookupError:
            #print "LOOKUP ERROR: " + name
            return None

    #given a name, returns the tree with the model at the root, and the vars it depends on as leaves
    def get_model_vars(self, name=None, group=None):
        if name is None:
            name = self.model
        if group is None:
            group = self.model_group
        model_prefix = ''
        if group is not None:
            model_prefix = '%s_' % group

        def find(f, seq):
            for item in seq:
                if f(item):
                    return item

        model = find(lambda x: x[0] == '%s%s' % (model_prefix, name),
                     self.var_tree)
        if model == None:
            raise "Model " + name + " not found."
        else:
            return model

    # returns a list of VariableNames a model depends on
    def get_model_var_list(self, name, group=None):
        ret = []

        def rec(xs):
            for x in xs:
                ret.append(VariableName(x[0]))
                rec(x[1])

        rec(
            map(self.get_dep_tree_from_name,
                extract_leaves(self.get_model_vars(name, group)[1])))
        return elim_dups(ret)

    #get a dependency tree for a variable given its name
    def get_dep_tree_from_name(self, name):
        varclass = self.get_var(name)
        if (varclass == None): return (name, [], "primary")
        return self.get_dep_tree(varclass)

    #returns a dependency tree given a particular variable
    def get_dep_tree(self, inp):
        result_others = []
        dependencies_list = inp.get_current_dependencies()
        for x in dependencies_list:
            dep_item = x[0]
            if isinstance(dep_item, str):
                result_others.append(self.get_dep_tree_from_name(dep_item))
            else:
                print "Attribute!"
        return (inp.name(), elim_dups(result_others))

    def all_models_tree(self):
        return map(self.get_dep_tree_from_name, extract_leaves(self.var_tree)) \
             + map(lambda x,y: (x, y,"model"), self.var_tree.iteritems())

    def model_tree(self):
        model = self.get_model_vars()
        return map(self.get_dep_tree_from_name, extract_leaves(model[1])) \
                + [(model[0], model[1],"model")]

    def vars_tree(self, vl):
        return map(self.get_dep_tree_from_name, extract_leaves(vl))
class DependencyQuery:
    def __init__(self, config, model=None, model_group=None, specification=None, scenario_name=None):
        self.factory = VariableFactory()

        lib = config.get_expression_library()
        self.factory.set_expression_library(lib)

        self.model = model
        self.model_group = model_group
        
        if model is not None:
            if specification is None:
                specification_dict = config.get_estimation_specification(model, model_group)
                if model_group is not None:
                    specification_dict = specification_dict[model_group]
                spec = get_specification_for_estimation(specification_dict)
            else:
                spec = specification
            model_prefix = ''
            if model_group is not None:
                model_prefix = '%s_' % model_group
            self.model_name = '%s%s' % (model_prefix, model)
            
            self.var_list = spec.get_distinct_long_variable_names().tolist()
            
            #check other model nodes, such as agents_filter, submodel_string or filter
#            config_node_path = "model_manager/models/model[@name='%s']" % self.model
#            model_node = config._find_node(config_node_path)
#            controller = config._convert_model_to_dict(model_node)
#            addvars = []
#            addvars.append(controller.get('init', {}).get('arguments', {}).get('filter', None))
#            addvars.append(controller.get('init', {}).get('arguments', {}).get('submodel_string', None))
#            addvars.append(controller.get('init', {}).get('arguments', {}).get('choice_attribute_name', None))
#            addvars.append(controller.get('prepare_for_run', {}).get('arguments', {}).get('agent_filter', None))

            # This assumes that xml nodes contain the tag 'model_dependency_type'
            self.model_structure_dependencies = config.model_dependencies(self.model)
            self.var_list = self.var_list + self.model_structure_dependencies.get('variable', [])
#            for var in addvars:
#                if isinstance(var, str):
#                    self.var_list.append(eval(var)) # eval because these entries are in double quotes, e.g. "'attribute'"
                    
            self.var_tree = []
            l = []
            for var in self.var_list:
                l.append((var, []))
            self.var_tree.append((self.model_name, l))
        else:
            # this is meant to be for all models but is not working yet
            #self.config = config.get_run_configuration(scenario_name)
            self.var_list = []
            self.var_tree = []
            
        #TODO: there seems to be an issue with the (xml)dictionary approach -there can be
        # multiple, indexed, submodels. This only seems to retrieve the first

        #this collects all the variables models depend on
        #self.var_tree = []
#        self.var_list = []
#        #TODO: if we update to ElementTree 1.3, use
#        # model_manager/model_system//specification/[@type='submodel']/variables
#        for x in config._find_node('model_manager/models//specification'):
#            l = []
#            for y in x:
#                if y.get('type') == 'submodel':
#                    t = y.find('variable_list')
#                    if len(t) > 0:
#                        for z in config._convert_variable_list_to_data(t[0]):
#                            for k in lib.keys():
#                                if k[1] == z:
#                                    l.append((lib[k], []))
#                                    self.var_list.append(lib[k])
#            self.var_tree.append((x, l))

    #given a name, return an instance of Variable
    def get_var(self, name):
        #creates a fake dataset, required for variable resolution
        def create_fake_dataset(dataset_name):
            storage = StorageFactory().get_storage('dict_storage')

            storage.write_table(
                table_name='fake_dataset',
                table_data={
                    'id':array([], dtype='int32')
                    }
                )

            dataset = Dataset(in_storage=storage, in_table_name='fake_dataset', dataset_name=dataset_name, id_name="id")
            return dataset
        var = VariableName(name)
        dataset = var.get_dataset_name()
        try:
            return self.factory.get_variable(var, create_fake_dataset(dataset), quiet=True)
        except LookupError:
            #print "LOOKUP ERROR: " + name
            return None

    #given a name, returns the tree with the model at the root, and the vars it depends on as leaves
    def get_model_vars(self, name=None, group=None):
        if name is None:
            name = self.model
        if group is None:
            group = self.model_group
        model_prefix = ''
        if group is not None:
            model_prefix = '%s_' % group
        def find(f, seq):
            for item in seq:
                if f(item):
                    return item
        model = find(lambda x: x[0] == '%s%s' % (model_prefix, name), self.var_tree)
        if model == None:
            raise "Model " + name + " not found."
        else:
            return model

    # returns a list of VariableNames a model depends on
    def get_model_var_list(self, name, group=None):
        ret = []
        def rec(xs):
            for x in xs:
                ret.append(VariableName(x[0]))
                rec(x[1])
        rec(map(self.get_dep_tree_from_name, extract_leaves(self.get_model_vars(name, group)[1])))
        return elim_dups(ret)

    #get a dependency tree for a variable given its name
    def get_dep_tree_from_name(self, name):
        varclass = self.get_var(name)
        if(varclass == None): return (name, [], "primary")
        return self.get_dep_tree(varclass)

    #returns a dependency tree given a particular variable
    def get_dep_tree(self, inp):
        result_others = []
        dependencies_list = inp.get_current_dependencies()
        for x in dependencies_list:
            dep_item = x[0]
            if isinstance(dep_item, str):
                result_others.append(self.get_dep_tree_from_name(dep_item))
            else:
                print "Attribute!"
        return (inp.name(), elim_dups(result_others))

    def all_models_tree(self):
        return map(self.get_dep_tree_from_name, extract_leaves(self.var_tree)) \
             + map(lambda x,y: (x, y,"model"), self.var_tree.iteritems())

    def model_tree(self):
        model = self.get_model_vars()
        return map(self.get_dep_tree_from_name, extract_leaves(model[1])) \
                + [(model[0], model[1],"model")]

    def vars_tree(self, vl):
        return map(self.get_dep_tree_from_name, extract_leaves(vl))
    
    def get_model_structure_dependencies(self):
        return self.model_structure_dependencies
Example #24
0
    def run(self,
            resources,
            write_datasets_to_cache_at_end_of_year=True,
            log_file_name='run_model_system.log',
            cleanup_datasets=True):
        """Entries in resources: (entries with no defaults are required)
               models - a list containing names of models to be run. Each name
                           must correspond to the name of the module/class of that model. Default(object): None
               years - a tuple (start year, end year)
               debuglevel - an integer. The higher the more output will be printed. Default: 0
               expression_library - a dictionary.  The keys in the dictionary are pairs (dataset_name, variable_name)
               and the values are the corresponding expressions.  The model system needs to set the expression library
               (if it isn't None) in DatasetFactory for DatasetFactory to know about variables defined as expressions
               in the xml expression library.  Default: None
        This method is called both to start up the simulation for all years, and also for each year
        when running with one process per year.  In the latter case, 'years' consists of just
        (current_year, current_year) rather than the real start and end years for the simulation.
        """
        if not isinstance(resources, Resources):
            raise TypeError, "Argument 'resources' must be of type 'Resources'."
        logger_settings = resources.get("log", {
            "tags": [],
            "verbosity_level": 3
        })
        logger.set_tags(logger_settings.get("tags", []))
        logger.set_verbosity_level(logger_settings.get("verbosity_level", 3))
        self.simulation_state = SimulationState()
        self.simulation_state.set_low_memory_run(
            resources.get("low_memory_mode", False))
        self.simulation_state.set_start_time(resources.get("base_year", 0))
        self.run_year_namespace = {}

        if resources.get('cache_directory', None) is not None:
            self.simulation_state.set_cache_directory(
                resources['cache_directory'])

        if 'expression_library' in resources:
            VariableFactory().set_expression_library(
                resources['expression_library'])

        if resources.get('sample_input', False):
            self.update_config_for_multiple_runs(resources)

        cache_directory = self.simulation_state.get_cache_directory()
        log_file = os.path.join(cache_directory, log_file_name)
        logger.enable_file_logging(log_file, verbose=False)
        try:
            logger.log_status("Cache Directory set to: " + cache_directory)

            with logger.block('Start simulation run'):
                models = resources.get("models", [])
                models_in_years = resources.get("models_in_year", {})

                resources.check_obligatory_keys(["years"])

                years = resources["years"]
                if (not isinstance(years, tuple)) and (not isinstance(
                        years, list)):
                    raise TypeError, "Entry 'years' in resources must be a tuple."

                if len(years) < 2:
                    print years
                    raise StandardError, "Entry 'years' in resources must be of length at least 2."

                start_year = years[0]
                end_year = years[-1]

                debuglevel = resources.get("debuglevel", 0)
                seed_values = resources.get('seed', NO_SEED)

                logger.log_status("random seed = %s" % str(seed_values))
                seed(seed_values)

                for year in range(start_year, end_year + 1):
                    with logger.block("Starting simulation for year " +
                                      str(year)):
                        self.simulation_state.set_current_time(year)
                        SessionConfiguration().get_dataset_pool(
                        ).remove_all_datasets()
                        logger.disable_file_logging(log_file)
                        try:
                            if models_in_years.get(year, None) is not None:
                                models_to_run = models_in_years[year]
                            else:
                                models_to_run = models
                            self._run_year(
                                year=year,
                                models=models_to_run,
                                simulation_state=self.simulation_state,
                                debuglevel=debuglevel,
                                resources=resources,
                                write_datasets_to_cache_at_end_of_year=
                                write_datasets_to_cache_at_end_of_year,
                                cleanup_datasets=cleanup_datasets)
                        finally:
                            logger.enable_file_logging(log_file, verbose=False)
                        collect()

        finally:
            logger.disable_file_logging(log_file)