Exemple #1
0
 def test_multiply(self):
     expr = 'test_agent.income*test_location.cost'
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(
         table_name='test_agents', 
         table_data={'id': array([1, 2, 3]), 'income': array([1, 20, 500])}
         )
     storage.write_table(
         table_name='test_locations', 
         table_data={'id': array([1,2]), 'cost': array([1000, 2000])}
         )
     dataset_pool = DatasetPool(package_order=['opus_core'], storage=storage)
     test_agent_x_test_location = dataset_pool.get_dataset('test_agent_x_test_location')
     result = test_agent_x_test_location.compute_variables(expr, dataset_pool=dataset_pool)
     should_be = array([[1000, 2000], 
                        [20000, 40000], 
                        [500000, 1000000]])
     self.assert_(ma.allclose(result, should_be, rtol=1e-6), msg = "Error in " + expr)
     name = VariableName(expr)
     # since the expression involves both test_agent and test_location, the dataset name should be None
     # and the interaction set names should be (test_agent, test_location) or (test_location, test_agent)
     self.assertEqual(name.get_dataset_name(), None)
     names = name.get_interaction_set_names()
     self.assertEqual(len(names),2)
     self.assert_('test_agent' in names)
     self.assert_('test_location' in names)
Exemple #2
0
 def _update_variable_from_fields(self):
     ''' update the variable with values from the gui widgets '''
     self.variable['name'] = str(self.leVarName.text())
     self.variable['source'] = str(self.cboVarType.currentText())
     self.variable['definition'] = str(
         self.le_var_def.document().toPlainText())
     try:
         v = VariableName(self.variable['definition'])
         dataset_name = v.get_dataset_name()
         interaction_set_names = v.get_interaction_set_names()
     except (SyntaxError, ValueError):
         MessageBox.error(
             mainwindow=self,
             text='parse error for variable',
             detailed_text=
             'setting dataset name for this variable to <unknown>')
         dataset_name = '<unknown>'
         interaction_set_names = None
     if dataset_name is None and interaction_set_names is not None:
         # It's an interaction set.  Look up possible names in available_datasets
         names = get_available_dataset_names(self.validator.project)
         n1 = interaction_set_names[0] + '_x_' + interaction_set_names[1]
         if n1 in names:
             dataset_name = n1
         else:
             n2 = interaction_set_names[1] + '_x_' + interaction_set_names[0]
             if n2 in names:
                 dataset_name = n2
             else:
                 MessageBox.error(
                     mainwindow=self,
                     text=
                     'unable to find an interaction set in available_datasets for this variable',
                     detailed_text=
                     "tried %s and %s \nbut couldn't find either name in available_datasets \nsetting dataset_name to <unknown>"
                     % (n1, n2))
                 dataset_name = '<unknown>'
     self.variable['dataset'] = dataset_name
     if self.rbUseModel.isChecked():
         self.variable['use'] = 'model variable'
     elif self.rbUseIndicator.isChecked():
         self.variable['use'] = 'indicator'
     else:
         self.variable['use'] = 'both'
 def _update_variable_from_fields(self):
     ''' update the variable with values from the gui widgets '''
     self.variable['name'] = str(self.leVarName.text())
     self.variable['source'] = str(self.cboVarType.currentText())
     self.variable['definition'] = str(self.le_var_def.document().toPlainText())
     try:
         v = VariableName(self.variable['definition'])
         dataset_name = v.get_dataset_name()
         interaction_set_names = v.get_interaction_set_names()
     except (SyntaxError, ValueError):
         MessageBox.error(mainwindow = self,
             text = 'parse error for variable',
             detailed_text = 'setting dataset name for this variable to <unknown>')
         dataset_name = '<unknown>'
         interaction_set_names = None
     if dataset_name is None and interaction_set_names is not None:
         # It's an interaction set.  Look up possible names in available_datasets
         names = get_available_dataset_names(self.validator.project)
         n1 = interaction_set_names[0] + '_x_' + interaction_set_names[1]
         if n1 in names:
             dataset_name = n1
         else:
             n2 = interaction_set_names[1] + '_x_' + interaction_set_names[0]
             if n2 in names:
                 dataset_name = n2
             else:
                 MessageBox.error(mainwindow = self,
                     text = 'unable to find an interaction set in available_datasets for this variable',
                     detailed_text = "tried %s and %s \nbut couldn't find either name in available_datasets \nsetting dataset_name to <unknown>" % (n1,n2) )
                 dataset_name = '<unknown>'
     self.variable['dataset'] = dataset_name
     if self.rbUseModel.isChecked():
         self.variable['use'] = 'model variable'
     elif self.rbUseIndicator.isChecked():
         self.variable['use'] = 'indicator'
     else:
         self.variable['use'] = 'both'
    def run(self,
            dataset1,
            dataset2,
            index1=None,
            index2=None,
            sample_size=10,
            weight=None,
            include_chosen_choice=False,
            with_replacement=False,
            resources=None,
            dataset_pool=None):
        """this function samples number of sample_size (scalar value) alternatives from dataset2
        for agent set specified by dataset1.
        If index1 is not None, only samples alterantives for agents with indices in index1;
        if index2 is not None, only samples alternatives from indices in index2.
        sample_size specifies number of alternatives to be sampled for each agent.
        weight, to be used as sampling weight, is either an attribute name of dataset2, or a 1d
        array of the same length as index2 or 2d array of shape (index1.size, index2.size).

        Also refer to document of interaction_dataset"""

        if dataset_pool is None:
            try:
                sc = SessionConfiguration()
                dataset_pool = sc.get_dataset_pool()
            except:
                dataset_pool = DatasetPool()

        local_resources = Resources(resources)
        local_resources.merge_if_not_None({
            "dataset1":
            dataset1,
            "dataset2":
            dataset2,
            "index1":
            index1,
            "index2":
            index2,
            "sample_size":
            sample_size,
            "weight":
            weight,
            "with_replacement":
            with_replacement,
            "include_chosen_choice":
            include_chosen_choice
        })

        local_resources.check_obligatory_keys(
            ['dataset1', 'dataset2', 'sample_size'])
        agent = local_resources["dataset1"]
        index1 = local_resources.get("index1", None)
        if index1 is None:
            index1 = arange(agent.size())
        choice = local_resources["dataset2"]
        index2 = local_resources.get("index2", None)
        if index2 is None:
            index2 = arange(choice.size())

        if index1.size == 0 or index2.size == 0:
            err_msg = "either choice size or agent size is zero, return None"
            logger.log_warning(err_msg)
            return None

        include_chosen_choice = local_resources.get("include_chosen_choice",
                                                    False)
        J = local_resources["sample_size"]
        if include_chosen_choice:
            J = J - 1

        with_replacement = local_resources.get("with_replacement")

        weight = local_resources.get("weight", None)
        if isinstance(weight, str):
            if weight in choice.get_known_attribute_names():
                weight = choice.get_attribute(weight)
                rank_of_weight = 1
            else:
                varname = VariableName(weight)
                if varname.get_dataset_name() == choice.get_dataset_name():
                    weight = choice.compute_variables(
                        weight, dataset_pool=dataset_pool)
                    rank_of_weight = 1
                elif varname.get_interaction_set_names() is not None:
                    ## weights can be an interaction variable
                    interaction_dataset = InteractionDataset(local_resources)
                    weight = interaction_dataset.compute_variables(
                        weight, dataset_pool=dataset_pool)
                    rank_of_weight = 2
                    assert (len(weight.shape) >= rank_of_weight)
                else:
                    err_msg = ("weight is neither a known attribute name "
                               "nor a simple variable from the choice dataset "
                               "nor an interaction variable: '%s'" % weight)
                    logger.log_error(err_msg)
                    raise ValueError, err_msg
        elif isinstance(weight, ndarray):
            rank_of_weight = weight.ndim
        elif not weight:  ## weight is None or empty string
            weight = ones(index2.size)
            rank_of_weight = 1
        else:
            err_msg = "unkown weight type"
            logger.log_error(err_msg)
            raise TypeError, err_msg

        if (weight.size <> index2.size) and (weight.shape[rank_of_weight - 1]
                                             <> index2.size):
            if weight.shape[rank_of_weight - 1] == choice.size():
                if rank_of_weight == 1:
                    weight = take(weight, index2)
                if rank_of_weight == 2:
                    weight = take(weight, index2, axis=1)
            else:
                err_msg = "weight array size doesn't match to size of dataset2 or its index"
                logger.log_error(err_msg)
                raise ValueError, err_msg

        prob = normalize(weight)

        #chosen_choice = ones(index1.size) * UNPLACED_ID
        chosen_choice_id = agent.get_attribute(choice.get_id_name()[0])[index1]
        #index_of_placed_agent = where(greater(chosen_choice_id, UNPLACED_ID))[0]
        chosen_choice_index = choice.try_get_id_index(
            chosen_choice_id, return_value_if_not_found=UNPLACED_ID)
        chosen_choice_index_to_index2 = lookup(chosen_choice_index,
                                               index2,
                                               index_if_not_found=UNPLACED_ID)

        if rank_of_weight == 1:  # if weight_array is 1d, then each agent shares the same weight for choices
            replace = with_replacement  # sampling with no replacement
            non_zero_counts = nonzerocounts(weight)
            if non_zero_counts < J:
                logger.log_warning(
                    "weight array dosen't have enough non-zero counts, use sample with replacement"
                )
                replace = True
            if non_zero_counts > 0:
                sampled_index = prob2dsample(
                    index2,
                    sample_size=(index1.size, J),
                    prob_array=prob,
                    exclude_index=chosen_choice_index_to_index2,
                    replace=replace,
                    return_index=True)
            else:
                # all alternatives have a zero weight
                sampled_index = zeros((index1.size, 0), dtype=DTYPE)
            #return index2[sampled_index]

        if rank_of_weight == 2:
            sampled_index = zeros((index1.size, J), dtype=DTYPE) - 1

            for i in range(index1.size):
                replace = with_replacement  # sampling with/without replacement
                i_prob = prob[i, :]
                if nonzerocounts(i_prob) < J:
                    logger.log_warning(
                        "weight array dosen't have enough non-zero counts, use sample with replacement"
                    )
                    replace = True

                #exclude_index passed to probsample_noreplace needs to be indexed to index2
                sampled_index[i, :] = probsample_noreplace(
                    index2,
                    sample_size=J,
                    prob_array=i_prob,
                    exclude_index=chosen_choice_index_to_index2[i],
                    return_index=True)
        sampling_prob = take(prob, sampled_index)
        sampled_index_within_prob = sampled_index.copy()
        sampled_index = index2[sampled_index]
        is_chosen_choice = zeros(sampled_index.shape, dtype="bool")
        #chosen_choice = -1 * ones(chosen_choice_index.size, dtype="int32")
        if include_chosen_choice:
            sampled_index = column_stack(
                (chosen_choice_index[:, newaxis], sampled_index))
            is_chosen_choice = zeros(sampled_index.shape, dtype="bool")
            is_chosen_choice[chosen_choice_index != UNPLACED_ID, 0] = 1
            #chosen_choice[where(is_chosen_choice)[0]] = where(is_chosen_choice)[1]
            ## this is necessary because prob is indexed to index2, not to the choice set (as is chosen_choice_index)
            sampling_prob_for_chosen_choices = take(
                prob, chosen_choice_index_to_index2[:, newaxis])
            ## if chosen choice chosen equals unplaced_id then the sampling prob is 0
            sampling_prob_for_chosen_choices[where(
                chosen_choice_index == UNPLACED_ID)[0], ] = 0.0
            sampling_prob = column_stack(
                [sampling_prob_for_chosen_choices, sampling_prob])

        interaction_dataset = self.create_interaction_dataset(
            dataset1, dataset2, index1, sampled_index)
        interaction_dataset.add_attribute(sampling_prob,
                                          '__sampling_probability')
        interaction_dataset.add_attribute(is_chosen_choice, 'chosen_choice')

        if local_resources.get("include_mnl_bias_correction_term", False):
            if include_chosen_choice:
                sampled_index_within_prob = column_stack(
                    (chosen_choice_index_to_index2[:, newaxis],
                     sampled_index_within_prob))
            interaction_dataset.add_mnl_bias_correction_term(
                prob, sampled_index_within_prob)

        ## to get the older returns
        #sampled_index = interaction_dataset.get_2d_index()
        #chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32")
        #where_chosen = where(interaction_dataset.get_attribute("chosen_choice"))
        #chosen_choices[where_chosen[0]]=where_chosen[1]
        #return (sampled_index, chosen_choice)

        return interaction_dataset
    def run(self, dataset1, dataset2, index1=None, index2=None, sample_size=10, weight=None,
            include_chosen_choice=False, with_replacement=False, resources=None, dataset_pool=None):
        
        """this function samples number of sample_size (scalar value) alternatives from dataset2
        for agent set specified by dataset1.
        If index1 is not None, only samples alterantives for agents with indices in index1;
        if index2 is not None, only samples alternatives from indices in index2.
        sample_size specifies number of alternatives to be sampled for each agent.
        weight, to be used as sampling weight, is either an attribute name of dataset2, or a 1d
        array of the same length as index2 or 2d array of shape (index1.size, index2.size).

        Also refer to document of interaction_dataset"""

        if dataset_pool is None:
            try:
                sc = SessionConfiguration()
                dataset_pool=sc.get_dataset_pool()
            except:
                dataset_pool = DatasetPool()
        
        local_resources = Resources(resources)
        local_resources.merge_if_not_None(
                {"dataset1": dataset1, "dataset2": dataset2,
                "index1":index1, "index2": index2,
                "sample_size": sample_size, "weight": weight,
                "with_replacement": with_replacement,
                "include_chosen_choice": include_chosen_choice})

        local_resources.check_obligatory_keys(['dataset1', 'dataset2', 'sample_size'])
        agent = local_resources["dataset1"]
        index1 = local_resources.get("index1", None)
        if index1 is None:
            index1 = arange(agent.size())
        choice = local_resources["dataset2"]
        index2 = local_resources.get("index2", None)
        if index2 is None:
            index2 = arange(choice.size())
            
        if index1.size == 0 or index2.size == 0:
            err_msg = "either choice size or agent size is zero, return None"
            logger.log_warning(err_msg)
            return None
        
        include_chosen_choice = local_resources.get("include_chosen_choice",  False)
        J = local_resources["sample_size"]
        if include_chosen_choice:
            J = J - 1
            
        with_replacement = local_resources.get("with_replacement")
            
        weight = local_resources.get("weight", None)
        if isinstance(weight, str):
            if weight in choice.get_known_attribute_names():
                weight=choice.get_attribute(weight)
                rank_of_weight = 1 
            else:
                varname = VariableName(weight)
                if varname.get_dataset_name() == choice.get_dataset_name():
                    weight=choice.compute_variables(weight, dataset_pool=dataset_pool)
                    rank_of_weight = 1
                elif varname.get_interaction_set_names() is not None:
                    ## weights can be an interaction variable
                    interaction_dataset = InteractionDataset(local_resources)
                    weight=interaction_dataset.compute_variables(weight, dataset_pool=dataset_pool)
                    rank_of_weight = 2
                    assert(len(weight.shape) >= rank_of_weight)
                else:
                    err_msg = ("weight is neither a known attribute name "
                               "nor a simple variable from the choice dataset "
                               "nor an interaction variable: '%s'" % weight)
                    logger.log_error(err_msg)
                    raise ValueError, err_msg
        elif isinstance(weight, ndarray):
            rank_of_weight = weight.ndim
        elif not weight:  ## weight is None or empty string
            weight = ones(index2.size)
            rank_of_weight = 1
        else:
            err_msg = "unkown weight type"
            logger.log_error(err_msg)
            raise TypeError, err_msg

        if (weight.size <> index2.size) and (weight.shape[rank_of_weight-1] <> index2.size):
            if weight.shape[rank_of_weight-1] == choice.size():
                if rank_of_weight == 1:
                    weight = take(weight, index2)
                if rank_of_weight == 2:
                    weight = take(weight, index2, axis=1)
            else:
                err_msg = "weight array size doesn't match to size of dataset2 or its index"
                logger.log_error(err_msg)
                raise ValueError, err_msg

        prob = normalize(weight)

        #chosen_choice = ones(index1.size) * UNPLACED_ID
        chosen_choice_id = agent.get_attribute(choice.get_id_name()[0])[index1]
        #index_of_placed_agent = where(greater(chosen_choice_id, UNPLACED_ID))[0]
        chosen_choice_index = choice.try_get_id_index(chosen_choice_id, return_value_if_not_found=UNPLACED_ID)
        chosen_choice_index_to_index2 = lookup(chosen_choice_index, index2, index_if_not_found=UNPLACED_ID)
        
        if rank_of_weight == 1: # if weight_array is 1d, then each agent shares the same weight for choices
            replace = with_replacement           # sampling with no replacement 
            non_zero_counts = nonzerocounts(weight)
            if non_zero_counts < J:
                logger.log_warning("weight array dosen't have enough non-zero counts, use sample with replacement")
                replace = True
            if non_zero_counts > 0:
                sampled_index = prob2dsample( index2, sample_size=(index1.size, J),
                                        prob_array=prob, exclude_index=chosen_choice_index_to_index2,
                                        replace=replace, return_index=True )
            else:
                # all alternatives have a zero weight
                sampled_index = zeros((index1.size, 0), dtype=DTYPE)
            #return index2[sampled_index]

        if rank_of_weight == 2:
            sampled_index = zeros((index1.size,J), dtype=DTYPE) - 1
                
            for i in range(index1.size):
                replace = with_replacement          # sampling with/without replacement
                i_prob = prob[i,:]
                if nonzerocounts(i_prob) < J:
                    logger.log_warning("weight array dosen't have enough non-zero counts, use sample with replacement")
                    replace = True

                #exclude_index passed to probsample_noreplace needs to be indexed to index2
                sampled_index[i,:] = probsample_noreplace( index2, sample_size=J, prob_array=i_prob,
                                                     exclude_index=chosen_choice_index_to_index2[i],
                                                     return_index=True )
        sampling_prob = take(prob, sampled_index)
        sampled_index_within_prob = sampled_index.copy()
        sampled_index = index2[sampled_index]
        is_chosen_choice = zeros(sampled_index.shape, dtype="bool")
        #chosen_choice = -1 * ones(chosen_choice_index.size, dtype="int32")
        if include_chosen_choice:
            sampled_index = column_stack((chosen_choice_index[:,newaxis],sampled_index))
            is_chosen_choice = zeros(sampled_index.shape, dtype="bool")
            is_chosen_choice[chosen_choice_index!=UNPLACED_ID, 0] = 1
            #chosen_choice[where(is_chosen_choice)[0]] = where(is_chosen_choice)[1]
            ## this is necessary because prob is indexed to index2, not to the choice set (as is chosen_choice_index)
            sampling_prob_for_chosen_choices = take(prob, chosen_choice_index_to_index2[:, newaxis])
            ## if chosen choice chosen equals unplaced_id then the sampling prob is 0
            sampling_prob_for_chosen_choices[where(chosen_choice_index==UNPLACED_ID)[0],] = 0.0
            sampling_prob = column_stack([sampling_prob_for_chosen_choices, sampling_prob])
        
        interaction_dataset = self.create_interaction_dataset(dataset1, dataset2, index1, sampled_index)
        interaction_dataset.add_attribute(sampling_prob, '__sampling_probability')
        interaction_dataset.add_attribute(is_chosen_choice, 'chosen_choice')
        
        if local_resources.get("include_mnl_bias_correction_term",  False):
            if include_chosen_choice:
                sampled_index_within_prob = column_stack((chosen_choice_index_to_index2[:,newaxis],sampled_index_within_prob))
            interaction_dataset.add_mnl_bias_correction_term(prob, sampled_index_within_prob)
        
        ## to get the older returns
        #sampled_index = interaction_dataset.get_2d_index()
        #chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") 
        #where_chosen = where(interaction_dataset.get_attribute("chosen_choice"))
        #chosen_choices[where_chosen[0]]=where_chosen[1]
        #return (sampled_index, chosen_choice)
        
        return interaction_dataset