def test_multiply(self): expr = 'test_agent.income*test_location.cost' storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='test_agents', table_data={'id': array([1, 2, 3]), 'income': array([1, 20, 500])} ) storage.write_table( table_name='test_locations', table_data={'id': array([1,2]), 'cost': array([1000, 2000])} ) dataset_pool = DatasetPool(package_order=['opus_core'], storage=storage) test_agent_x_test_location = dataset_pool.get_dataset('test_agent_x_test_location') result = test_agent_x_test_location.compute_variables(expr, dataset_pool=dataset_pool) should_be = array([[1000, 2000], [20000, 40000], [500000, 1000000]]) self.assert_(ma.allclose(result, should_be, rtol=1e-6), msg = "Error in " + expr) name = VariableName(expr) # since the expression involves both test_agent and test_location, the dataset name should be None # and the interaction set names should be (test_agent, test_location) or (test_location, test_agent) self.assertEqual(name.get_dataset_name(), None) names = name.get_interaction_set_names() self.assertEqual(len(names),2) self.assert_('test_agent' in names) self.assert_('test_location' in names)
def _update_variable_from_fields(self): ''' update the variable with values from the gui widgets ''' self.variable['name'] = str(self.leVarName.text()) self.variable['source'] = str(self.cboVarType.currentText()) self.variable['definition'] = str( self.le_var_def.document().toPlainText()) try: v = VariableName(self.variable['definition']) dataset_name = v.get_dataset_name() interaction_set_names = v.get_interaction_set_names() except (SyntaxError, ValueError): MessageBox.error( mainwindow=self, text='parse error for variable', detailed_text= 'setting dataset name for this variable to <unknown>') dataset_name = '<unknown>' interaction_set_names = None if dataset_name is None and interaction_set_names is not None: # It's an interaction set. Look up possible names in available_datasets names = get_available_dataset_names(self.validator.project) n1 = interaction_set_names[0] + '_x_' + interaction_set_names[1] if n1 in names: dataset_name = n1 else: n2 = interaction_set_names[1] + '_x_' + interaction_set_names[0] if n2 in names: dataset_name = n2 else: MessageBox.error( mainwindow=self, text= 'unable to find an interaction set in available_datasets for this variable', detailed_text= "tried %s and %s \nbut couldn't find either name in available_datasets \nsetting dataset_name to <unknown>" % (n1, n2)) dataset_name = '<unknown>' self.variable['dataset'] = dataset_name if self.rbUseModel.isChecked(): self.variable['use'] = 'model variable' elif self.rbUseIndicator.isChecked(): self.variable['use'] = 'indicator' else: self.variable['use'] = 'both'
def _update_variable_from_fields(self): ''' update the variable with values from the gui widgets ''' self.variable['name'] = str(self.leVarName.text()) self.variable['source'] = str(self.cboVarType.currentText()) self.variable['definition'] = str(self.le_var_def.document().toPlainText()) try: v = VariableName(self.variable['definition']) dataset_name = v.get_dataset_name() interaction_set_names = v.get_interaction_set_names() except (SyntaxError, ValueError): MessageBox.error(mainwindow = self, text = 'parse error for variable', detailed_text = 'setting dataset name for this variable to <unknown>') dataset_name = '<unknown>' interaction_set_names = None if dataset_name is None and interaction_set_names is not None: # It's an interaction set. Look up possible names in available_datasets names = get_available_dataset_names(self.validator.project) n1 = interaction_set_names[0] + '_x_' + interaction_set_names[1] if n1 in names: dataset_name = n1 else: n2 = interaction_set_names[1] + '_x_' + interaction_set_names[0] if n2 in names: dataset_name = n2 else: MessageBox.error(mainwindow = self, text = 'unable to find an interaction set in available_datasets for this variable', detailed_text = "tried %s and %s \nbut couldn't find either name in available_datasets \nsetting dataset_name to <unknown>" % (n1,n2) ) dataset_name = '<unknown>' self.variable['dataset'] = dataset_name if self.rbUseModel.isChecked(): self.variable['use'] = 'model variable' elif self.rbUseIndicator.isChecked(): self.variable['use'] = 'indicator' else: self.variable['use'] = 'both'
def run(self, dataset1, dataset2, index1=None, index2=None, sample_size=10, weight=None, include_chosen_choice=False, with_replacement=False, resources=None, dataset_pool=None): """this function samples number of sample_size (scalar value) alternatives from dataset2 for agent set specified by dataset1. If index1 is not None, only samples alterantives for agents with indices in index1; if index2 is not None, only samples alternatives from indices in index2. sample_size specifies number of alternatives to be sampled for each agent. weight, to be used as sampling weight, is either an attribute name of dataset2, or a 1d array of the same length as index2 or 2d array of shape (index1.size, index2.size). Also refer to document of interaction_dataset""" if dataset_pool is None: try: sc = SessionConfiguration() dataset_pool = sc.get_dataset_pool() except: dataset_pool = DatasetPool() local_resources = Resources(resources) local_resources.merge_if_not_None({ "dataset1": dataset1, "dataset2": dataset2, "index1": index1, "index2": index2, "sample_size": sample_size, "weight": weight, "with_replacement": with_replacement, "include_chosen_choice": include_chosen_choice }) local_resources.check_obligatory_keys( ['dataset1', 'dataset2', 'sample_size']) agent = local_resources["dataset1"] index1 = local_resources.get("index1", None) if index1 is None: index1 = arange(agent.size()) choice = local_resources["dataset2"] index2 = local_resources.get("index2", None) if index2 is None: index2 = arange(choice.size()) if index1.size == 0 or index2.size == 0: err_msg = "either choice size or agent size is zero, return None" logger.log_warning(err_msg) return None include_chosen_choice = local_resources.get("include_chosen_choice", False) J = local_resources["sample_size"] if include_chosen_choice: J = J - 1 with_replacement = local_resources.get("with_replacement") weight = local_resources.get("weight", None) if isinstance(weight, str): if weight in choice.get_known_attribute_names(): weight = choice.get_attribute(weight) rank_of_weight = 1 else: varname = VariableName(weight) if varname.get_dataset_name() == choice.get_dataset_name(): weight = choice.compute_variables( weight, dataset_pool=dataset_pool) rank_of_weight = 1 elif varname.get_interaction_set_names() is not None: ## weights can be an interaction variable interaction_dataset = InteractionDataset(local_resources) weight = interaction_dataset.compute_variables( weight, dataset_pool=dataset_pool) rank_of_weight = 2 assert (len(weight.shape) >= rank_of_weight) else: err_msg = ("weight is neither a known attribute name " "nor a simple variable from the choice dataset " "nor an interaction variable: '%s'" % weight) logger.log_error(err_msg) raise ValueError, err_msg elif isinstance(weight, ndarray): rank_of_weight = weight.ndim elif not weight: ## weight is None or empty string weight = ones(index2.size) rank_of_weight = 1 else: err_msg = "unkown weight type" logger.log_error(err_msg) raise TypeError, err_msg if (weight.size <> index2.size) and (weight.shape[rank_of_weight - 1] <> index2.size): if weight.shape[rank_of_weight - 1] == choice.size(): if rank_of_weight == 1: weight = take(weight, index2) if rank_of_weight == 2: weight = take(weight, index2, axis=1) else: err_msg = "weight array size doesn't match to size of dataset2 or its index" logger.log_error(err_msg) raise ValueError, err_msg prob = normalize(weight) #chosen_choice = ones(index1.size) * UNPLACED_ID chosen_choice_id = agent.get_attribute(choice.get_id_name()[0])[index1] #index_of_placed_agent = where(greater(chosen_choice_id, UNPLACED_ID))[0] chosen_choice_index = choice.try_get_id_index( chosen_choice_id, return_value_if_not_found=UNPLACED_ID) chosen_choice_index_to_index2 = lookup(chosen_choice_index, index2, index_if_not_found=UNPLACED_ID) if rank_of_weight == 1: # if weight_array is 1d, then each agent shares the same weight for choices replace = with_replacement # sampling with no replacement non_zero_counts = nonzerocounts(weight) if non_zero_counts < J: logger.log_warning( "weight array dosen't have enough non-zero counts, use sample with replacement" ) replace = True if non_zero_counts > 0: sampled_index = prob2dsample( index2, sample_size=(index1.size, J), prob_array=prob, exclude_index=chosen_choice_index_to_index2, replace=replace, return_index=True) else: # all alternatives have a zero weight sampled_index = zeros((index1.size, 0), dtype=DTYPE) #return index2[sampled_index] if rank_of_weight == 2: sampled_index = zeros((index1.size, J), dtype=DTYPE) - 1 for i in range(index1.size): replace = with_replacement # sampling with/without replacement i_prob = prob[i, :] if nonzerocounts(i_prob) < J: logger.log_warning( "weight array dosen't have enough non-zero counts, use sample with replacement" ) replace = True #exclude_index passed to probsample_noreplace needs to be indexed to index2 sampled_index[i, :] = probsample_noreplace( index2, sample_size=J, prob_array=i_prob, exclude_index=chosen_choice_index_to_index2[i], return_index=True) sampling_prob = take(prob, sampled_index) sampled_index_within_prob = sampled_index.copy() sampled_index = index2[sampled_index] is_chosen_choice = zeros(sampled_index.shape, dtype="bool") #chosen_choice = -1 * ones(chosen_choice_index.size, dtype="int32") if include_chosen_choice: sampled_index = column_stack( (chosen_choice_index[:, newaxis], sampled_index)) is_chosen_choice = zeros(sampled_index.shape, dtype="bool") is_chosen_choice[chosen_choice_index != UNPLACED_ID, 0] = 1 #chosen_choice[where(is_chosen_choice)[0]] = where(is_chosen_choice)[1] ## this is necessary because prob is indexed to index2, not to the choice set (as is chosen_choice_index) sampling_prob_for_chosen_choices = take( prob, chosen_choice_index_to_index2[:, newaxis]) ## if chosen choice chosen equals unplaced_id then the sampling prob is 0 sampling_prob_for_chosen_choices[where( chosen_choice_index == UNPLACED_ID)[0], ] = 0.0 sampling_prob = column_stack( [sampling_prob_for_chosen_choices, sampling_prob]) interaction_dataset = self.create_interaction_dataset( dataset1, dataset2, index1, sampled_index) interaction_dataset.add_attribute(sampling_prob, '__sampling_probability') interaction_dataset.add_attribute(is_chosen_choice, 'chosen_choice') if local_resources.get("include_mnl_bias_correction_term", False): if include_chosen_choice: sampled_index_within_prob = column_stack( (chosen_choice_index_to_index2[:, newaxis], sampled_index_within_prob)) interaction_dataset.add_mnl_bias_correction_term( prob, sampled_index_within_prob) ## to get the older returns #sampled_index = interaction_dataset.get_2d_index() #chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") #where_chosen = where(interaction_dataset.get_attribute("chosen_choice")) #chosen_choices[where_chosen[0]]=where_chosen[1] #return (sampled_index, chosen_choice) return interaction_dataset
def run(self, dataset1, dataset2, index1=None, index2=None, sample_size=10, weight=None, include_chosen_choice=False, with_replacement=False, resources=None, dataset_pool=None): """this function samples number of sample_size (scalar value) alternatives from dataset2 for agent set specified by dataset1. If index1 is not None, only samples alterantives for agents with indices in index1; if index2 is not None, only samples alternatives from indices in index2. sample_size specifies number of alternatives to be sampled for each agent. weight, to be used as sampling weight, is either an attribute name of dataset2, or a 1d array of the same length as index2 or 2d array of shape (index1.size, index2.size). Also refer to document of interaction_dataset""" if dataset_pool is None: try: sc = SessionConfiguration() dataset_pool=sc.get_dataset_pool() except: dataset_pool = DatasetPool() local_resources = Resources(resources) local_resources.merge_if_not_None( {"dataset1": dataset1, "dataset2": dataset2, "index1":index1, "index2": index2, "sample_size": sample_size, "weight": weight, "with_replacement": with_replacement, "include_chosen_choice": include_chosen_choice}) local_resources.check_obligatory_keys(['dataset1', 'dataset2', 'sample_size']) agent = local_resources["dataset1"] index1 = local_resources.get("index1", None) if index1 is None: index1 = arange(agent.size()) choice = local_resources["dataset2"] index2 = local_resources.get("index2", None) if index2 is None: index2 = arange(choice.size()) if index1.size == 0 or index2.size == 0: err_msg = "either choice size or agent size is zero, return None" logger.log_warning(err_msg) return None include_chosen_choice = local_resources.get("include_chosen_choice", False) J = local_resources["sample_size"] if include_chosen_choice: J = J - 1 with_replacement = local_resources.get("with_replacement") weight = local_resources.get("weight", None) if isinstance(weight, str): if weight in choice.get_known_attribute_names(): weight=choice.get_attribute(weight) rank_of_weight = 1 else: varname = VariableName(weight) if varname.get_dataset_name() == choice.get_dataset_name(): weight=choice.compute_variables(weight, dataset_pool=dataset_pool) rank_of_weight = 1 elif varname.get_interaction_set_names() is not None: ## weights can be an interaction variable interaction_dataset = InteractionDataset(local_resources) weight=interaction_dataset.compute_variables(weight, dataset_pool=dataset_pool) rank_of_weight = 2 assert(len(weight.shape) >= rank_of_weight) else: err_msg = ("weight is neither a known attribute name " "nor a simple variable from the choice dataset " "nor an interaction variable: '%s'" % weight) logger.log_error(err_msg) raise ValueError, err_msg elif isinstance(weight, ndarray): rank_of_weight = weight.ndim elif not weight: ## weight is None or empty string weight = ones(index2.size) rank_of_weight = 1 else: err_msg = "unkown weight type" logger.log_error(err_msg) raise TypeError, err_msg if (weight.size <> index2.size) and (weight.shape[rank_of_weight-1] <> index2.size): if weight.shape[rank_of_weight-1] == choice.size(): if rank_of_weight == 1: weight = take(weight, index2) if rank_of_weight == 2: weight = take(weight, index2, axis=1) else: err_msg = "weight array size doesn't match to size of dataset2 or its index" logger.log_error(err_msg) raise ValueError, err_msg prob = normalize(weight) #chosen_choice = ones(index1.size) * UNPLACED_ID chosen_choice_id = agent.get_attribute(choice.get_id_name()[0])[index1] #index_of_placed_agent = where(greater(chosen_choice_id, UNPLACED_ID))[0] chosen_choice_index = choice.try_get_id_index(chosen_choice_id, return_value_if_not_found=UNPLACED_ID) chosen_choice_index_to_index2 = lookup(chosen_choice_index, index2, index_if_not_found=UNPLACED_ID) if rank_of_weight == 1: # if weight_array is 1d, then each agent shares the same weight for choices replace = with_replacement # sampling with no replacement non_zero_counts = nonzerocounts(weight) if non_zero_counts < J: logger.log_warning("weight array dosen't have enough non-zero counts, use sample with replacement") replace = True if non_zero_counts > 0: sampled_index = prob2dsample( index2, sample_size=(index1.size, J), prob_array=prob, exclude_index=chosen_choice_index_to_index2, replace=replace, return_index=True ) else: # all alternatives have a zero weight sampled_index = zeros((index1.size, 0), dtype=DTYPE) #return index2[sampled_index] if rank_of_weight == 2: sampled_index = zeros((index1.size,J), dtype=DTYPE) - 1 for i in range(index1.size): replace = with_replacement # sampling with/without replacement i_prob = prob[i,:] if nonzerocounts(i_prob) < J: logger.log_warning("weight array dosen't have enough non-zero counts, use sample with replacement") replace = True #exclude_index passed to probsample_noreplace needs to be indexed to index2 sampled_index[i,:] = probsample_noreplace( index2, sample_size=J, prob_array=i_prob, exclude_index=chosen_choice_index_to_index2[i], return_index=True ) sampling_prob = take(prob, sampled_index) sampled_index_within_prob = sampled_index.copy() sampled_index = index2[sampled_index] is_chosen_choice = zeros(sampled_index.shape, dtype="bool") #chosen_choice = -1 * ones(chosen_choice_index.size, dtype="int32") if include_chosen_choice: sampled_index = column_stack((chosen_choice_index[:,newaxis],sampled_index)) is_chosen_choice = zeros(sampled_index.shape, dtype="bool") is_chosen_choice[chosen_choice_index!=UNPLACED_ID, 0] = 1 #chosen_choice[where(is_chosen_choice)[0]] = where(is_chosen_choice)[1] ## this is necessary because prob is indexed to index2, not to the choice set (as is chosen_choice_index) sampling_prob_for_chosen_choices = take(prob, chosen_choice_index_to_index2[:, newaxis]) ## if chosen choice chosen equals unplaced_id then the sampling prob is 0 sampling_prob_for_chosen_choices[where(chosen_choice_index==UNPLACED_ID)[0],] = 0.0 sampling_prob = column_stack([sampling_prob_for_chosen_choices, sampling_prob]) interaction_dataset = self.create_interaction_dataset(dataset1, dataset2, index1, sampled_index) interaction_dataset.add_attribute(sampling_prob, '__sampling_probability') interaction_dataset.add_attribute(is_chosen_choice, 'chosen_choice') if local_resources.get("include_mnl_bias_correction_term", False): if include_chosen_choice: sampled_index_within_prob = column_stack((chosen_choice_index_to_index2[:,newaxis],sampled_index_within_prob)) interaction_dataset.add_mnl_bias_correction_term(prob, sampled_index_within_prob) ## to get the older returns #sampled_index = interaction_dataset.get_2d_index() #chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") #where_chosen = where(interaction_dataset.get_attribute("chosen_choice")) #chosen_choices[where_chosen[0]]=where_chosen[1] #return (sampled_index, chosen_choice) return interaction_dataset