def test_1d_weight_array_variant_sample_size_using_icc(self): sample_size = 2 index1 = where(self.households.get_attribute("lucky"))[0][1:] index2 = where(self.gridcells.get_attribute("filter"))[0] weight=self.gridcells.get_attribute("weight") sample_ret = stratified_sampler().run(dataset1=self.households, dataset2=self.gridcells, index1=index1, index2=index2, stratum="stratum_id", sample_size=sample_size, weight="weight",include_chosen_choice=True) # get results sampled_index = sample_ret.get_2d_index() chosen_choices = UNPLACED_ID * ones(index1.size, dtype=DTYPE) where_chosen = where(sample_ret.get_attribute("chosen_choice")) chosen_choices[where_chosen[0]]=where_chosen[1] self.assertEqual(sampled_index.shape, (index1.size,self.num_strata*sample_size)) self.assertEqual( chosen_choices.size, index1.size) placed_agents_index = self.gridcells.try_get_id_index( self.households.get_attribute("grid_id")[index1],UNPLACED_ID) chosen_choice_index = UNPLACED_ID * ones(index1.shape, dtype=DTYPE) w = where(chosen_choices>=0)[0] chosen_choice_index[w] = sampled_index[w, chosen_choices[w]].astype(int32) self.assert_( alltrue(equal(placed_agents_index, chosen_choice_index)) ) sampled_index = sampled_index[:,1:] self.assert_( alltrue(lookup(sampled_index.ravel(), index2, index_if_not_found=UNPLACED_ID)!=UNPLACED_ID) ) self.assert_( all(not_equal(weight[sampled_index], 0.0)) )
def test_1d_weight_array(self): """""" sample_size = 5 # check the individual gridcells # This is a stochastic model, so it may legitimately fail occassionally. index1 = where(self.households.get_attribute("lucky"))[0] index2 = where(self.gridcells.get_attribute("filter"))[0] weight=self.gridcells.get_attribute("weight") for icc in [0,1]: #include_chosen_choice? #icc = sample([0,1],1) sampler_ret = weighted_sampler().run(dataset1=self.households, dataset2=self.gridcells, index1=index1, index2=index2, sample_size=sample_size, weight="weight",include_chosen_choice=icc) # get results sampled_index = sampler_ret.get_2d_index() chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") where_chosen = where(sampler_ret.get_attribute("chosen_choice")) chosen_choices[where_chosen[0]]=where_chosen[1] sample_results = sampled_index, chosen_choices sampled_index = sample_results[0] self.assertEqual(sampled_index.shape, (index1.size, sample_size)) if icc: placed_agents_index = self.gridcells.try_get_id_index( self.households.get_attribute("grid_id")[index1],UNPLACED_ID) chosen_choice_index = resize(array([UNPLACED_ID], dtype="int32"), index1.shape) w = where(chosen_choices>=0)[0] # for 64 bit machines, need to coerce the type to int32 -- on a # 32 bit machine the astype(int32) doesn't do anything chosen_choice_index[w] = sampled_index[w, chosen_choices[w]].astype(int32) self.assert_( alltrue(equal(placed_agents_index, chosen_choice_index)) ) sampled_index = sampled_index[:,1:] self.assert_( alltrue(lookup(sampled_index.ravel(), index2, index_if_not_found=UNPLACED_ID)!=UNPLACED_ID) ) self.assert_( all(not_equal(weight[sampled_index], 0.0)) )
def test_1d_weight_array_variant_sample_size(self): sample_size_from_chosen_stratum = 2 index1 = where(self.households.get_attribute("lucky"))[0] index2 = where(self.gridcells.get_attribute("filter"))[0] weight = self.gridcells.get_attribute("weight") for icc in [0, 1]: #icc = sample([0,1],1) #include_chosen_choice? sample_ret = stratified_sampler().run( dataset1=self.households, dataset2=self.gridcells, index1=index1, index2=index2, stratum="stratum_id", sample_size=0, sample_size_from_chosen_stratum=sample_size_from_chosen_stratum, weight="weight", include_chosen_choice=icc) # get results sampled_index = sample_ret.get_2d_index() chosen_choices = UNPLACED_ID * ones(index1.size, dtype=DTYPE) where_chosen = where(sample_ret.get_attribute("chosen_choice")) chosen_choices[where_chosen[0]] = where_chosen[1] if icc: self.assertEqual( sampled_index.shape, (index1.size, sample_size_from_chosen_stratum + 1)) else: self.assertEqual( sampled_index.shape, (index1.size, sample_size_from_chosen_stratum)) if icc: self.assertEqual(chosen_choices.size, index1.size) placed_agents_index = self.gridcells.try_get_id_index( self.households.get_attribute("grid_id")[index1], UNPLACED_ID) chosen_choice_index = UNPLACED_ID * ones(index1.shape, dtype=DTYPE) w = where(chosen_choices >= 0)[0] chosen_choice_index[w] = sampled_index[ w, chosen_choices[w]].astype(int32) self.assert_( alltrue(equal(placed_agents_index, chosen_choice_index))) sampled_index = sampled_index[:, 1:] self.assert_( alltrue( lookup(sampled_index.ravel(), index2, index_if_not_found=UNPLACED_ID) != UNPLACED_ID)) self.assert_(all(not_equal(weight[sampled_index], 0.0)))
def test_1d_weight_array(self): """""" # check the individual gridcells # This is a stochastic model, so it may legitimately fail occassionally. index1 = where(self.households.get_attribute("lucky"))[0] index2 = where(self.gridcells.get_attribute("filter"))[0] weight = self.gridcells.get_attribute("weight") for icc in [0, 1]: #icc = sample([0,1],1) #include_chosen_choice? sample_ret = stratified_sampler().run(dataset1=self.households, dataset2=self.gridcells, index1=index1, index2=index2, stratum="stratum_id", sample_size=1, weight="weight", include_chosen_choice=icc) # get results sampled_index = sample_ret.get_2d_index() chosen_choices = UNPLACED_ID * ones(index1.size, dtype=DTYPE) where_chosen = where(sample_ret.get_attribute("chosen_choice")) chosen_choices[where_chosen[0]] = where_chosen[1] if icc: self.assertEqual(sampled_index.shape, (index1.size, self.num_strata + 1)) else: self.assertEqual(sampled_index.shape, (index1.size, self.num_strata)) if icc: self.assertEqual(chosen_choices.size, index1.size) placed_agents_index = self.gridcells.try_get_id_index( self.households.get_attribute("grid_id")[index1], UNPLACED_ID) chosen_choice_index = UNPLACED_ID * ones(index1.shape, dtype=DTYPE) w = where(chosen_choices >= 0)[0] # for 64 bit machines, need to coerce the type to int32 -- on a # 32 bit machine the astype(int32) doesn't do anything chosen_choice_index[w] = sampled_index[ w, chosen_choices[w]].astype(int32) self.assert_( alltrue(equal(placed_agents_index, chosen_choice_index))) sampled_index = sampled_index[:, 1:] self.assert_( alltrue( lookup(sampled_index.ravel(), index2, index_if_not_found=UNPLACED_ID) != UNPLACED_ID)) self.assert_(all(not_equal(weight[sampled_index], 0.0)))
def test_2d_weight_array(self): #2d weight sample_size = 5 n = self.households.size() index1 = where(self.households.get_attribute("lucky"))[0] index2 = where(self.gridcells.get_attribute("filter"))[0] lucky = self.households.get_attribute("lucky") weight = repeat(self.gridcells.get_attribute("weight")[newaxis, :], n, axis=0) for i in range(n): weight[i, :] += lucky[i] for icc in [0, 1]: sampler_ret = weighted_sampler().run(dataset1=self.households, dataset2=self.gridcells, index1=index1, index2=index2, sample_size=sample_size, weight=weight, include_chosen_choice=icc) sampled_index = sampler_ret.get_2d_index() chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") where_chosen = where(sampler_ret.get_attribute("chosen_choice")) chosen_choices[where_chosen[0]] = where_chosen[1] self.assertEqual(sampled_index.shape, (index1.size, sample_size)) if icc: placed_agents_index = self.gridcells.try_get_id_index( self.households.get_attribute("grid_id")[index1], UNPLACED_ID) chosen_choice_index = resize( array([UNPLACED_ID], dtype="int32"), index1.shape) w = where(chosen_choices >= 0)[0] chosen_choice_index[w] = sampled_index[ w, chosen_choices[w]].astype(int32) self.assert_( alltrue(equal(placed_agents_index, chosen_choice_index))) sampled_index = sampled_index[:, 1:] self.assert_( alltrue( lookup(sampled_index.ravel(), index2, index_if_not_found=UNPLACED_ID) != UNPLACED_ID)) for j in range(sample_size): self.assert_( all(not_equal(weight[j, sampled_index[j, :]], 0.0)))
def test_1(self): """""" sample_size = 5 # check the individual gridcells # This is a stochastic model, so it may legitimately fail occassionally. index1 = where(self.households.get_attribute("lucky"))[0] #index2 = where(self.gridcells.get_attribute("filter"))[0] weight = self.gridcells.get_attribute("weight") estimation_config = { "agent_category_definition": ["household.lucky"], "choice_category_definition": ["gridcell.filter+1"] } for icc in [0, 1]: #include_chosen_choice? #icc = sample([0,1],1) sampler_ret = weighted_sampler_by_category().run( dataset1=self.households, dataset2=self.gridcells, index1=index1, sample_size=sample_size, include_chosen_choice=icc, resources=estimation_config) # get results sampled_index = sampler_ret.get_2d_index() chosen_choices = UNPLACED_ID * ones(index1.size, dtype=DTYPE) where_chosen = where(sampler_ret.get_attribute("chosen_choice")) chosen_choices[where_chosen[0]] = where_chosen[1] self.assertEqual(sampled_index.shape, (index1.size, sample_size)) if icc: placed_agents_index = self.gridcells.try_get_id_index( self.households.get_attribute("grid_id")[index1], UNPLACED_ID) chosen_choice_index = resize(array([UNPLACED_ID], dtype=DTYPE), index1.shape) w = where(chosen_choices >= 0)[0] # for 64 bit machines, need to coerce the type to int32 -- on a # 32 bit machine the astype(int32) doesn't do anything chosen_choice_index[w] = sampled_index[ w, chosen_choices[w]].astype(int32) self.assert_( alltrue(equal(placed_agents_index, chosen_choice_index))) sampled_index = sampled_index[:, 1:] self.assert_( alltrue( lookup(sampled_index.ravel(), arange(self.gridcells.size()), index_if_not_found=UNPLACED_ID) != UNPLACED_ID)) self.assert_(all(not_equal(weight[sampled_index], 0.0)))
def test_2d_weight_array(self): #2d weight sample_size = 5 n = self.households.size() index1 = where(self.households.get_attribute("lucky"))[0] index2 = where(self.gridcells.get_attribute("filter"))[0] lucky = self.households.get_attribute("lucky") weight = repeat(self.gridcells.get_attribute("weight")[newaxis, :], n, axis=0) for i in range(n): weight[i,:] += lucky[i] for icc in [0,1]: sampler_ret = weighted_sampler().run(dataset1=self.households, dataset2=self.gridcells, index1=index1, index2=index2, sample_size=sample_size, weight=weight,include_chosen_choice=icc) sampled_index = sampler_ret.get_2d_index() chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") where_chosen = where(sampler_ret.get_attribute("chosen_choice")) chosen_choices[where_chosen[0]]=where_chosen[1] self.assertEqual(sampled_index.shape, (index1.size, sample_size)) if icc: placed_agents_index = self.gridcells.try_get_id_index( self.households.get_attribute("grid_id")[index1],UNPLACED_ID) chosen_choice_index = resize(array([UNPLACED_ID], dtype="int32"), index1.shape) w = where(chosen_choices>=0)[0] chosen_choice_index[w] = sampled_index[w, chosen_choices[w]].astype(int32) self.assert_( alltrue(equal(placed_agents_index, chosen_choice_index)) ) sampled_index = sampled_index[:,1:] self.assert_( alltrue(lookup(sampled_index.ravel(), index2, index_if_not_found=UNPLACED_ID)!=UNPLACED_ID) ) for j in range(sample_size): self.assert_( all(not_equal(weight[j, sampled_index[j,:]], 0.0)) )
def run(self, dataset1, dataset2, index1=None, index2=None, sample_size=10, weight=None, include_chosen_choice=None, with_replacement=True, resources=None, dataset_pool=None): """ this function samples number of sample_size (scalar value) alternatives from dataset2 for agent set specified by dataset1. If index1 is not None, only samples alterantives for agents with indices in index1; if index2 is not None, only samples alternatives from indices in index2. sample_size specifies number of alternatives to be sampled for each agent. weight, to be used as sampling weight, is either an attribute name of dataset2, or a 1d array of the same length as index2 or 2d array of shape (index1.size, index2.size). Also refer to document of interaction_dataset""" if dataset_pool is None: sc = SessionConfiguration() try: dataset_pool = sc.get_dataset_pool() except: dataset_pool = DatasetPool(sc.package_order) local_resources = Resources(resources) local_resources.merge_if_not_None({ "dataset1": dataset1, "dataset2": dataset2, "index1": index1, "index2": index2, "sample_size": sample_size, "weight": weight, "with_replacement": with_replacement, "include_chosen_choice": include_chosen_choice }) local_resources.check_obligatory_keys( ['dataset1', 'dataset2', 'sample_size']) agent = local_resources["dataset1"] choice = local_resources["dataset2"] index1 = local_resources.get("index1", None) if index1 is None: index1 = arange(agent.size()) index2 = local_resources.get("index2", None) if index2 is None: index2 = arange(choice.size()) if index1.size == 0 or index2.size == 0: err_msg = "either choice size or agent size is zero, return None" logger.log_warning(err_msg) return (None, None) agent_category_definition = local_resources.get( "agent_category_definition", []) choice_category_definition = local_resources.get( "choice_category_definition", []) agent_filter_attribute = local_resources.get("agent_filter_attribute", None) category_inflating_factor = local_resources.get( "category_inflating_factor", 10) frequency, unique_agent_category_id, unique_choice_category_id, agent_category_id, choice_category_id = \ get_category_and_frequency(agent, agent_category_definition, choice, choice_category_definition, agent_filter_attribute, category_inflating_factor, dataset_pool=dataset_pool) include_chosen_choice = local_resources.get("include_chosen_choice", False) chosen_choice_id = agent.get_attribute(choice.get_id_name()[0])[index1] chosen_choice_index = choice.try_get_id_index( chosen_choice_id, return_value_if_not_found=-1) chosen_choice_index_to_index2 = lookup(chosen_choice_index, index2, index_if_not_found=UNPLACED_ID) J = local_resources["sample_size"] if include_chosen_choice: J = J - 1 local_resources.merge_with_defaults( {'with_replacement': with_replacement}) with_replacement = local_resources.get("with_replacement") sampled_index = empty((index1.size, J), dtype=DTYPE) sampling_prob = empty((index1.size, J), dtype="float64") _digitize, _where, _normalize = digitize, where, normalize _ncumsum, _rand, _searchsorted = ncumsum, rand, searchsorted #speed hack for i in range(unique_agent_category_id.size): category_id = unique_agent_category_id[i] agents_in_this_category = _where( agent_category_id[index1] == category_id)[0] num_agents = agents_in_this_category.size if num_agents == 0: continue #import pdb; pdb.set_trace() ## divide frequency by the mean frequency to avoid overflow weights = frequency[ i, _digitize(choice_category_id[index2], unique_choice_category_id ) - 1] / frequency[i, :].mean() prob = _normalize(weights) index = _searchsorted(_ncumsum(prob), _rand(num_agents * J)).reshape(-1, J) if not with_replacement: raise NotImplementedError, "Sample without replacement is not implemented for this sampler yet." # nz = nonzero(prob)[0].size # if J < nz: # ## number of non zero weight less than alternatives, sample with replacement # logger.log_warning("There are %s non zero weights and are less than the number of alternatives proposed %s. " % (nz, J) + # "Sample with replacement instead.") # continue # i=0; max_iterations=200 # while True: # index = sort(index, axis=1) # where_repeats = nonzero( logical_not(diff(index, axis=1)) ) # num_repeats = where_repeats[0].size # if num_repeats == 0: break # index[where_repeats] = _searchsorted(_rand(num_repeats), prob) # i += 1 # if i > max_iterations: # logger.log_warning("weight_sampler_by_category is unable to sample %i alternatives without replacement in %i iterations; " % \ # (J, max_iterations) + # "give up sampling without replacement and results may contain replacement." # ) # break sampled_index[agents_in_this_category, :] = index sampling_prob[agents_in_this_category, :] = prob[index] sampled_index = index2[sampled_index] is_chosen_choice = zeros(sampled_index.shape, dtype="bool") #chosen_choice = -1 * ones(chosen_choice_index.size, dtype="int32") if include_chosen_choice: sampled_index = column_stack( (chosen_choice_index[:, newaxis], sampled_index)) is_chosen_choice[chosen_choice_index != UNPLACED_ID, 0] = 1 sampling_prob_for_chosen_choices = take( prob, chosen_choice_index_to_index2[:, newaxis]) ## if chosen choice chosen is unplaced has the sampling prob is 0 sampling_prob_for_chosen_choices[where( chosen_choice_index == UNPLACED_ID)[0], ] = 0.0 sampling_prob = column_stack( [sampling_prob_for_chosen_choices, sampling_prob]) #chosen_choice[where(is_chosen_choice)[0]] = where(is_chosen_choice)[1] interaction_dataset = self.create_interaction_dataset( dataset1, dataset2, index1, sampled_index) interaction_dataset.add_attribute(sampling_prob, '__sampling_probability') interaction_dataset.add_attribute(is_chosen_choice, 'chosen_choice') ## to get the older returns #sampled_index = interaction_dataset.get_2d_index() #chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") #where_chosen = where(interaction_dataset.get_attribute("chosen_choice")) #chosen_choices[where_chosen[0]]=where_chosen[1] #return (sampled_index, chosen_choice) return interaction_dataset
def run(self, dataset1, dataset2, index1=None, index2=None, sample_size=10, weight=None, include_chosen_choice=None, with_replacement=True, resources=None, dataset_pool=None): """ this function samples number of sample_size (scalar value) alternatives from dataset2 for agent set specified by dataset1. If index1 is not None, only samples alterantives for agents with indices in index1; if index2 is not None, only samples alternatives from indices in index2. sample_size specifies number of alternatives to be sampled for each agent. weight, to be used as sampling weight, is either an attribute name of dataset2, or a 1d array of the same length as index2 or 2d array of shape (index1.size, index2.size). Also refer to document of interaction_dataset""" if dataset_pool is None: sc = SessionConfiguration() try: dataset_pool=sc.get_dataset_pool() except: dataset_pool = DatasetPool(sc.package_order) local_resources = Resources(resources) local_resources.merge_if_not_None( {"dataset1": dataset1, "dataset2": dataset2, "index1":index1, "index2": index2, "sample_size": sample_size, "weight": weight, "with_replacement": with_replacement, "include_chosen_choice": include_chosen_choice}) local_resources.check_obligatory_keys(['dataset1', 'dataset2', 'sample_size']) agent = local_resources["dataset1"] choice = local_resources["dataset2"] index1 = local_resources.get("index1", None) if index1 is None: index1 = arange(agent.size()) index2 = local_resources.get("index2", None) if index2 is None: index2 = arange(choice.size()) if index1.size == 0 or index2.size == 0: err_msg = "either choice size or agent size is zero, return None" logger.log_warning(err_msg) return (None, None) agent_category_definition = local_resources.get("agent_category_definition", []) choice_category_definition = local_resources.get("choice_category_definition", []) agent_filter_attribute = local_resources.get("agent_filter_attribute", None) category_inflating_factor = local_resources.get("category_inflating_factor", 10) frequency, unique_agent_category_id, unique_choice_category_id, agent_category_id, choice_category_id = \ get_category_and_frequency(agent, agent_category_definition, choice, choice_category_definition, agent_filter_attribute, category_inflating_factor, dataset_pool=dataset_pool) include_chosen_choice = local_resources.get("include_chosen_choice", False) chosen_choice_id = agent.get_attribute(choice.get_id_name()[0])[index1] chosen_choice_index = choice.try_get_id_index(chosen_choice_id, return_value_if_not_found=-1) chosen_choice_index_to_index2 = lookup(chosen_choice_index, index2, index_if_not_found=UNPLACED_ID) J = local_resources["sample_size"] if include_chosen_choice: J = J - 1 local_resources.merge_with_defaults({'with_replacement': with_replacement}) with_replacement = local_resources.get("with_replacement") sampled_index = empty((index1.size, J), dtype="int32") sampling_prob = empty((index1.size, J), dtype="float64") _digitize, _where, _normalize = digitize, where, normalize _ncumsum, _rand, _searchsorted = ncumsum, rand, searchsorted #speed hack for i in range(unique_agent_category_id.size): category_id = unique_agent_category_id[i] agents_in_this_category = _where(agent_category_id[index1] == category_id)[0] num_agents = agents_in_this_category.size if num_agents == 0: continue #import pdb; pdb.set_trace() ## divide frequency by the mean frequency to avoid overflow weights = frequency[i, _digitize(choice_category_id[index2], unique_choice_category_id)-1] / frequency[i, :].mean() prob = _normalize(weights) index = _searchsorted(_ncumsum(prob), _rand(num_agents * J)).reshape(-1, J) if not with_replacement: raise NotImplementedError, "Sample without replacement is not implemented for this sampler yet." # nz = nonzero(prob)[0].size # if J < nz: # ## number of non zero weight less than alternatives, sample with replacement # logger.log_warning("There are %s non zero weights and are less than the number of alternatives proposed %s. " % (nz, J) + # "Sample with replacement instead.") # continue # i=0; max_iterations=200 # while True: # index = sort(index, axis=1) # where_repeats = nonzero( logical_not(diff(index, axis=1)) ) # num_repeats = where_repeats[0].size # if num_repeats == 0: break # index[where_repeats] = _searchsorted(_rand(num_repeats), prob) # i += 1 # if i > max_iterations: # logger.log_warning("weight_sampler_by_category is unable to sample %i alternatives without replacement in %i iterations; " % \ # (J, max_iterations) + # "give up sampling without replacement and results may contain replacement." # ) # break sampled_index[agents_in_this_category, :] = index sampling_prob[agents_in_this_category, :] = prob[index] sampled_index = index2[sampled_index] is_chosen_choice = zeros(sampled_index.shape, dtype="bool") #chosen_choice = -1 * ones(chosen_choice_index.size, dtype="int32") if include_chosen_choice: sampled_index = column_stack((chosen_choice_index[:,newaxis],sampled_index)) is_chosen_choice[chosen_choice_index!=UNPLACED_ID, 0] = 1 sampling_prob_for_chosen_choices = take(prob, chosen_choice_index_to_index2[:, newaxis]) ## if chosen choice chosen is unplaced has the sampling prob is 0 sampling_prob_for_chosen_choices[where(chosen_choice_index==UNPLACED_ID)[0],] = 0.0 sampling_prob = column_stack([sampling_prob_for_chosen_choices, sampling_prob]) #chosen_choice[where(is_chosen_choice)[0]] = where(is_chosen_choice)[1] interaction_dataset = self.create_interaction_dataset(dataset1, dataset2, index1, sampled_index) interaction_dataset.add_attribute(sampling_prob, '__sampling_probability') interaction_dataset.add_attribute(is_chosen_choice, 'chosen_choice') ## to get the older returns #sampled_index = interaction_dataset.get_2d_index() #chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") #where_chosen = where(interaction_dataset.get_attribute("chosen_choice")) #chosen_choices[where_chosen[0]]=where_chosen[1] #return (sampled_index, chosen_choice) return interaction_dataset
def run(self, dataset1, dataset2, index1=None, index2=None, sample_size=10, weight=None, include_chosen_choice=False, with_replacement=False, resources=None, dataset_pool=None): """this function samples number of sample_size (scalar value) alternatives from dataset2 for agent set specified by dataset1. If index1 is not None, only samples alterantives for agents with indices in index1; if index2 is not None, only samples alternatives from indices in index2. sample_size specifies number of alternatives to be sampled for each agent. weight, to be used as sampling weight, is either an attribute name of dataset2, or a 1d array of the same length as index2 or 2d array of shape (index1.size, index2.size). Also refer to document of interaction_dataset""" if dataset_pool is None: try: sc = SessionConfiguration() dataset_pool = sc.get_dataset_pool() except: dataset_pool = DatasetPool() local_resources = Resources(resources) local_resources.merge_if_not_None({ "dataset1": dataset1, "dataset2": dataset2, "index1": index1, "index2": index2, "sample_size": sample_size, "weight": weight, "with_replacement": with_replacement, "include_chosen_choice": include_chosen_choice }) local_resources.check_obligatory_keys( ['dataset1', 'dataset2', 'sample_size']) agent = local_resources["dataset1"] index1 = local_resources.get("index1", None) if index1 is None: index1 = arange(agent.size()) choice = local_resources["dataset2"] index2 = local_resources.get("index2", None) if index2 is None: index2 = arange(choice.size()) if index1.size == 0 or index2.size == 0: err_msg = "either choice size or agent size is zero, return None" logger.log_warning(err_msg) return None include_chosen_choice = local_resources.get("include_chosen_choice", False) J = local_resources["sample_size"] if include_chosen_choice: J = J - 1 with_replacement = local_resources.get("with_replacement") weight = local_resources.get("weight", None) if isinstance(weight, str): if weight in choice.get_known_attribute_names(): weight = choice.get_attribute(weight) rank_of_weight = 1 else: varname = VariableName(weight) if varname.get_dataset_name() == choice.get_dataset_name(): weight = choice.compute_variables( weight, dataset_pool=dataset_pool) rank_of_weight = 1 elif varname.get_interaction_set_names() is not None: ## weights can be an interaction variable interaction_dataset = InteractionDataset(local_resources) weight = interaction_dataset.compute_variables( weight, dataset_pool=dataset_pool) rank_of_weight = 2 assert (len(weight.shape) >= rank_of_weight) else: err_msg = ("weight is neither a known attribute name " "nor a simple variable from the choice dataset " "nor an interaction variable: '%s'" % weight) logger.log_error(err_msg) raise ValueError, err_msg elif isinstance(weight, ndarray): rank_of_weight = weight.ndim elif not weight: ## weight is None or empty string weight = ones(index2.size) rank_of_weight = 1 else: err_msg = "unkown weight type" logger.log_error(err_msg) raise TypeError, err_msg if (weight.size <> index2.size) and (weight.shape[rank_of_weight - 1] <> index2.size): if weight.shape[rank_of_weight - 1] == choice.size(): if rank_of_weight == 1: weight = take(weight, index2) if rank_of_weight == 2: weight = take(weight, index2, axis=1) else: err_msg = "weight array size doesn't match to size of dataset2 or its index" logger.log_error(err_msg) raise ValueError, err_msg prob = normalize(weight) #chosen_choice = ones(index1.size) * UNPLACED_ID chosen_choice_id = agent.get_attribute(choice.get_id_name()[0])[index1] #index_of_placed_agent = where(greater(chosen_choice_id, UNPLACED_ID))[0] chosen_choice_index = choice.try_get_id_index( chosen_choice_id, return_value_if_not_found=UNPLACED_ID) chosen_choice_index_to_index2 = lookup(chosen_choice_index, index2, index_if_not_found=UNPLACED_ID) if rank_of_weight == 1: # if weight_array is 1d, then each agent shares the same weight for choices replace = with_replacement # sampling with no replacement non_zero_counts = nonzerocounts(weight) if non_zero_counts < J: logger.log_warning( "weight array dosen't have enough non-zero counts, use sample with replacement" ) replace = True if non_zero_counts > 0: sampled_index = prob2dsample( index2, sample_size=(index1.size, J), prob_array=prob, exclude_index=chosen_choice_index_to_index2, replace=replace, return_index=True) else: # all alternatives have a zero weight sampled_index = zeros((index1.size, 0), dtype=DTYPE) #return index2[sampled_index] if rank_of_weight == 2: sampled_index = zeros((index1.size, J), dtype=DTYPE) - 1 for i in range(index1.size): replace = with_replacement # sampling with/without replacement i_prob = prob[i, :] if nonzerocounts(i_prob) < J: logger.log_warning( "weight array dosen't have enough non-zero counts, use sample with replacement" ) replace = True #exclude_index passed to probsample_noreplace needs to be indexed to index2 sampled_index[i, :] = probsample_noreplace( index2, sample_size=J, prob_array=i_prob, exclude_index=chosen_choice_index_to_index2[i], return_index=True) sampling_prob = take(prob, sampled_index) sampled_index_within_prob = sampled_index.copy() sampled_index = index2[sampled_index] is_chosen_choice = zeros(sampled_index.shape, dtype="bool") #chosen_choice = -1 * ones(chosen_choice_index.size, dtype="int32") if include_chosen_choice: sampled_index = column_stack( (chosen_choice_index[:, newaxis], sampled_index)) is_chosen_choice = zeros(sampled_index.shape, dtype="bool") is_chosen_choice[chosen_choice_index != UNPLACED_ID, 0] = 1 #chosen_choice[where(is_chosen_choice)[0]] = where(is_chosen_choice)[1] ## this is necessary because prob is indexed to index2, not to the choice set (as is chosen_choice_index) sampling_prob_for_chosen_choices = take( prob, chosen_choice_index_to_index2[:, newaxis]) ## if chosen choice chosen equals unplaced_id then the sampling prob is 0 sampling_prob_for_chosen_choices[where( chosen_choice_index == UNPLACED_ID)[0], ] = 0.0 sampling_prob = column_stack( [sampling_prob_for_chosen_choices, sampling_prob]) interaction_dataset = self.create_interaction_dataset( dataset1, dataset2, index1, sampled_index) interaction_dataset.add_attribute(sampling_prob, '__sampling_probability') interaction_dataset.add_attribute(is_chosen_choice, 'chosen_choice') if local_resources.get("include_mnl_bias_correction_term", False): if include_chosen_choice: sampled_index_within_prob = column_stack( (chosen_choice_index_to_index2[:, newaxis], sampled_index_within_prob)) interaction_dataset.add_mnl_bias_correction_term( prob, sampled_index_within_prob) ## to get the older returns #sampled_index = interaction_dataset.get_2d_index() #chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") #where_chosen = where(interaction_dataset.get_attribute("chosen_choice")) #chosen_choices[where_chosen[0]]=where_chosen[1] #return (sampled_index, chosen_choice) return interaction_dataset
def run(self, dataset1, dataset2, index1=None, index2=None, sample_size=10, weight=None, include_chosen_choice=False, with_replacement=False, resources=None, dataset_pool=None): """this function samples number of sample_size (scalar value) alternatives from dataset2 for agent set specified by dataset1. If index1 is not None, only samples alterantives for agents with indices in index1; if index2 is not None, only samples alternatives from indices in index2. sample_size specifies number of alternatives to be sampled for each agent. weight, to be used as sampling weight, is either an attribute name of dataset2, or a 1d array of the same length as index2 or 2d array of shape (index1.size, index2.size). Also refer to document of interaction_dataset""" if dataset_pool is None: try: sc = SessionConfiguration() dataset_pool=sc.get_dataset_pool() except: dataset_pool = DatasetPool() local_resources = Resources(resources) local_resources.merge_if_not_None( {"dataset1": dataset1, "dataset2": dataset2, "index1":index1, "index2": index2, "sample_size": sample_size, "weight": weight, "with_replacement": with_replacement, "include_chosen_choice": include_chosen_choice}) local_resources.check_obligatory_keys(['dataset1', 'dataset2', 'sample_size']) agent = local_resources["dataset1"] index1 = local_resources.get("index1", None) if index1 is None: index1 = arange(agent.size()) choice = local_resources["dataset2"] index2 = local_resources.get("index2", None) if index2 is None: index2 = arange(choice.size()) if index1.size == 0 or index2.size == 0: err_msg = "either choice size or agent size is zero, return None" logger.log_warning(err_msg) return None include_chosen_choice = local_resources.get("include_chosen_choice", False) J = local_resources["sample_size"] if include_chosen_choice: J = J - 1 with_replacement = local_resources.get("with_replacement") weight = local_resources.get("weight", None) if isinstance(weight, str): if weight in choice.get_known_attribute_names(): weight=choice.get_attribute(weight) rank_of_weight = 1 elif VariableName(weight).get_dataset_name() == choice.get_dataset_name(): weight=choice.compute_variables(weight, dataset_pool=dataset_pool) rank_of_weight = 1 else: ## weights can be an interaction variable interaction_dataset = InteractionDataset(local_resources) weight=interaction_dataset.compute_variables(weight, dataset_pool=dataset_pool) rank_of_weight = 2 elif isinstance(weight, ndarray): rank_of_weight = weight.ndim elif not weight: ## weight is None or empty string weight = ones(index2.size) rank_of_weight = 1 else: err_msg = "unkown weight type" logger.log_error(err_msg) raise TypeError, err_msg if (weight.size <> index2.size) and (weight.shape[rank_of_weight-1] <> index2.size): if weight.shape[rank_of_weight-1] == choice.size(): if rank_of_weight == 1: weight = take(weight, index2) if rank_of_weight == 2: weight = take(weight, index2, axis=1) else: err_msg = "weight array size doesn't match to size of dataset2 or its index" logger.log_error(err_msg) raise ValueError, err_msg prob = normalize(weight) #chosen_choice = ones(index1.size) * UNPLACED_ID chosen_choice_id = agent.get_attribute(choice.get_id_name()[0])[index1] #index_of_placed_agent = where(greater(chosen_choice_id, UNPLACED_ID))[0] chosen_choice_index = choice.try_get_id_index(chosen_choice_id, return_value_if_not_found=UNPLACED_ID) chosen_choice_index_to_index2 = lookup(chosen_choice_index, index2, index_if_not_found=UNPLACED_ID) if rank_of_weight == 1: # if weight_array is 1d, then each agent shares the same weight for choices replace = with_replacement # sampling with no replacement if nonzerocounts(weight) < J: logger.log_warning("weight array dosen't have enough non-zero counts, use sample with replacement") replace = True sampled_index = prob2dsample( index2, sample_size=(index1.size, J), prob_array=prob, exclude_index=chosen_choice_index_to_index2, replace=replace, return_index=True ) #return index2[sampled_index] if rank_of_weight == 2: sampled_index = zeros((index1.size,J), dtype="int32") - 1 for i in range(index1.size): replace = with_replacement # sampling with/without replacement i_prob = prob[i,:] if nonzerocounts(i_prob) < J: logger.log_warning("weight array dosen't have enough non-zero counts, use sample with replacement") replace = True #exclude_index passed to probsample_noreplace needs to be indexed to index2 sampled_index[i,:] = probsample_noreplace( index2, sample_size=J, prob_array=i_prob, exclude_index=chosen_choice_index_to_index2[i], return_index=True ) sampling_prob = take(prob, sampled_index) sampled_index = index2[sampled_index] is_chosen_choice = zeros(sampled_index.shape, dtype="bool") #chosen_choice = -1 * ones(chosen_choice_index.size, dtype="int32") if include_chosen_choice: sampled_index = column_stack((chosen_choice_index[:,newaxis],sampled_index)) is_chosen_choice = zeros(sampled_index.shape, dtype="bool") is_chosen_choice[chosen_choice_index!=UNPLACED_ID, 0] = 1 #chosen_choice[where(is_chosen_choice)[0]] = where(is_chosen_choice)[1] ## this is necessary because prob is indexed to index2, not to the choice set (as is chosen_choice_index) sampling_prob_for_chosen_choices = take(prob, chosen_choice_index_to_index2[:, newaxis]) ## if chosen choice chosen equals unplaced_id then the sampling prob is 0 sampling_prob_for_chosen_choices[where(chosen_choice_index==UNPLACED_ID)[0],] = 0.0 sampling_prob = column_stack([sampling_prob_for_chosen_choices, sampling_prob]) interaction_dataset = self.create_interaction_dataset(dataset1, dataset2, index1, sampled_index) interaction_dataset.add_attribute(sampling_prob, '__sampling_probability') interaction_dataset.add_attribute(is_chosen_choice, 'chosen_choice') ## to get the older returns #sampled_index = interaction_dataset.get_2d_index() #chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") #where_chosen = where(interaction_dataset.get_attribute("chosen_choice")) #chosen_choices[where_chosen[0]]=where_chosen[1] #return (sampled_index, chosen_choice) return interaction_dataset
def run(self, dataset1, dataset2, index1=None, index2=None, stratum=None, weight=None, sample_size=1, sample_size_from_each_stratum=None, sample_size_from_chosen_stratum=None, sample_rate=None, include_chosen_choice=False, resources=None, with_replacement=False, dataset_pool=None, **kwargs): """this function samples number of sample_size (scalar value) alternatives from dataset2 for agent set specified by dataset1. If index1 is not None, only samples alternatives for agents with indices in index1; if index2 is not None, only samples alternatives from indices in index2. sample_size specifies number of alternatives to be sampled from each stratum, and is overwritten by sample_size_from_each_stratum if it's not None weight, to be used as sampling weight, is either an attribute name of dataset2, or a 1d array of the same length as index2 or 2d array of shape (index1.size, index2.size). Also refer to document of interaction_dataset""" if dataset_pool is None: try: sc = SessionConfiguration() dataset_pool = sc.get_dataset_pool() except: dataset_pool = DatasetPool() local_resources = Resources(resources) local_resources.merge_if_not_None({ "dataset1": dataset1, "dataset2": dataset2, "index1": index1, "index2": index2, "with_replacement": with_replacement, "stratum": stratum, "weight": weight, "sample_size": sample_size, "sample_size_from_each_stratum": sample_size_from_each_stratum, "sample_size_from_chosen_stratum": sample_size_from_chosen_stratum, "sample_rate": sample_rate, "include_chosen_choice": include_chosen_choice }) local_resources.check_obligatory_keys(['dataset1', 'dataset2']) index1 = local_resources.get("index1", None) agent = dataset1 if index1 is None: agent.get_id_attribute() index1 = arange(agent.size()) choice = local_resources["dataset2"] index2 = local_resources.get("index2", None) if index2 is None: choice.get_id_attribute() index2 = arange(choice.size()) if index1.size == 0 or index2.size == 0: err_msg = "either choice size or agent size is zero, return None" logger.log_warning(err_msg) return (None, None) include_chosen_choice = local_resources.get("include_chosen_choice", False) weight = local_resources.get("weight", None) if isinstance(weight, str): choice.compute_variables(weight, resources=local_resources) weight = choice.get_attribute(weight) rank_of_weight = 1 elif isinstance(weight, ndarray): rank_of_weight = weight.ndim elif weight is None: weight = ones(index2.size) rank_of_weight = 1 else: err_msg = "unknown weight type" logger.log_error(err_msg) raise TypeError, err_msg if (weight.size <> index2.size) and (weight.shape[rank_of_weight - 1] <> index2.size): if weight.shape[rank_of_weight - 1] == choice.size(): weight = take(weight, index2) else: err_msg = "weight array size doesn't match to size of dataset2 or its index" logger.log_error(err_msg) raise ValueError, err_msg prob = normalize(weight) stratum = local_resources.get("stratum", None) if stratum is None: raise StandardError, "'stratum' must be defined for stratified sampling." if isinstance(stratum, str): choice.compute_variables(stratum, resources=local_resources) stratum = choice.get_attribute(stratum) #chosen_choice = ones(index1.size) * UNPLACED_ID chosen_choice_id = agent.get_attribute(choice.get_id_name()[0])[index1] #index_of_placed_agent = where(greater(chosen_choice_id, UNPLACED_ID))[0] chosen_choice_index = choice.try_get_id_index( chosen_choice_id, return_value_if_not_found=-1) chosen_choice_index_to_index2 = lookup(chosen_choice_index, index2, index_if_not_found=UNPLACED_ID) ##TODO: check all chosen strata are in selectable strata #i.e. chosen_choice_index is in index2 chosen_stratum = ones(chosen_choice_index.size, dtype=DTYPE) * NO_STRATUM_ID chosen_stratum[where( chosen_choice_index != -1)] = stratum[chosen_choice_index[where( chosen_choice_index != -1)]] selectable_strata = stratum[index2] unique_strata = unique(selectable_strata) unique_strata = unique_strata[where(unique_strata != NO_STRATUM_ID)] # if rank_of_weight == 2: # raise RuntimeError, "stratified sampling for 2d weight is unimplemented yet" # sampled_index = zeros((index1.size,1)) - 1 sample_size = local_resources.get("sample_size", None) sample_size_from_each_stratum = local_resources.get( "sample_size_from_each_stratum", None) if sample_size_from_each_stratum is None: sample_size_from_each_stratum = sample_size strata_sample_size = ones(unique_strata.size, dtype=DTYPE) * sample_size_from_each_stratum sample_rate = local_resources.get("sample_rate", None) if sample_rate is not None: raise UnImplementedError, "sample_rate is not implemented yet." ##TODO: to be finished #num_elements_in_strata = histogram(selectable_strata, unique_strata) #strata_sample_size = round(num_elements_in_strata * sample_rate) sample_size_from_chosen_stratum = local_resources.get( "sample_size_from_chosen_stratum", None) if sample_size_from_chosen_stratum is None and not include_chosen_choice: strata_sample_pairs = array( map(lambda x, y: [x, y], unique_strata, strata_sample_size)) if rank_of_weight == 1: sampled_index = self._sample_by_stratum( index1, index2, selectable_strata, prob, chosen_choice_index_to_index2, strata_sample_pairs) elif rank_of_weight == 2: sampled_index = self._sample_by_agent_and_stratum( index1, index2, selectable_strata, prob, chosen_choice_index_to_index2, strata_sample_pairs) else: strata_sample_setting = zeros((index1.size, unique_strata.size, 2), dtype=DTYPE) for i in range(index1.size): agents_strata_sample_size = copy.copy(strata_sample_size) if sample_size_from_chosen_stratum is None: ## if sample_size_from_chosen_stratum is None and include_chosen_choice is True, ## sample one less from the chosen stratum agents_strata_sample_size[where( unique_strata == chosen_stratum[i])] += -1 else: agents_strata_sample_size[where( unique_strata == chosen_stratum[i])] = sample_size_from_chosen_stratum strata_sample_pairs = array( map(lambda x, y: [x, y], unique_strata, agents_strata_sample_size)) strata_sample_setting[i, ...] = strata_sample_pairs sampled_index = self._sample_by_agent_and_stratum( index1, index2, selectable_strata, prob, chosen_choice_index_to_index2, strata_sample_setting) #chosen_choice = None is_chosen_choice = zeros(sampled_index.shape, dtype="bool") if include_chosen_choice: sampled_index = concatenate( (chosen_choice_index[:, newaxis], sampled_index), axis=1) #chosen_choice = zeros(chosen_choice_index.shape, dtype="int32") - 1 #chosen_choice[where(chosen_choice_index>UNPLACED_ID)] = 0 #make chosen_choice index to sampled_index, instead of choice (as chosen_choice_index does) #since the chosen choice index is attached to the first column, the chosen choice should be all zeros #for valid chosen_choice_index is_chosen_choice = zeros(sampled_index.shape, dtype="bool") is_chosen_choice[chosen_choice_index != UNPLACED_ID, 0] = 1 chosen_probability = zeros( (chosen_choice_index.size, ), dtype=float32) - 1 for stratum in unique_strata: w = chosen_stratum == stratum chosen_probability[w] = ( prob[chosen_choice_index[w]] / prob[selectable_strata == stratum].sum()).astype(float32) self._sampling_probability = concatenate( (chosen_probability[:, newaxis], self._sampling_probability), axis=1) self._stratum_id = concatenate( (chosen_stratum[:, newaxis], self._stratum_id), axis=1) interaction_dataset = self.create_interaction_dataset( dataset1, dataset2, index1, sampled_index) interaction_dataset.add_attribute(self._sampling_probability, '__sampling_probability') interaction_dataset.add_attribute(is_chosen_choice, 'chosen_choice') interaction_dataset.add_attribute(self._stratum_id, 'stratum_id') ## to get the older returns #sampled_index = interaction_dataset.get_2d_index() #chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") #where_chosen = where(interaction_dataset.get_attribute("chosen_choice")) #chosen_choices[where_chosen[0]]=where_chosen[1] #return (sampled_index, chosen_choice) return interaction_dataset
def run(self, dataset1, dataset2, index1=None, index2=None, stratum=None, weight=None, sample_size=1, sample_size_from_each_stratum=None, sample_size_from_chosen_stratum=None, sample_rate=None, include_chosen_choice=False, resources=None, with_replacement=False, dataset_pool=None, **kwargs): """this function samples number of sample_size (scalar value) alternatives from dataset2 for agent set specified by dataset1. If index1 is not None, only samples alternatives for agents with indices in index1; if index2 is not None, only samples alternatives from indices in index2. sample_size specifies number of alternatives to be sampled from each stratum, and is overwritten by sample_size_from_each_stratum if it's not None weight, to be used as sampling weight, is either an attribute name of dataset2, or a 1d array of the same length as index2 or 2d array of shape (index1.size, index2.size). Also refer to document of interaction_dataset""" if dataset_pool is None: try: sc = SessionConfiguration() dataset_pool=sc.get_dataset_pool() except: dataset_pool = DatasetPool() local_resources = Resources(resources) local_resources.merge_if_not_None( {"dataset1": dataset1, "dataset2": dataset2, "index1":index1, "index2": index2, "with_replacement": with_replacement, "stratum":stratum, "weight": weight, "sample_size": sample_size, "sample_size_from_each_stratum": sample_size_from_each_stratum, "sample_size_from_chosen_stratum": sample_size_from_chosen_stratum, "sample_rate": sample_rate, "include_chosen_choice": include_chosen_choice}) local_resources.check_obligatory_keys(['dataset1', 'dataset2']) index1 = local_resources.get("index1", None) agent = dataset1 if index1 is None: agent.get_id_attribute() index1 = arange(agent.size()) choice = local_resources["dataset2"] index2 = local_resources.get("index2", None) if index2 is None: choice.get_id_attribute() index2 = arange(choice.size()) if index1.size == 0 or index2.size == 0: err_msg = "either choice size or agent size is zero, return None" logger.log_warning(err_msg) return (None, None) include_chosen_choice = local_resources.get("include_chosen_choice", False) weight = local_resources.get("weight", None) if isinstance(weight, str): choice.compute_variables(weight, resources = local_resources ) weight=choice.get_attribute(weight) rank_of_weight = 1 elif isinstance(weight, ndarray): rank_of_weight = weight.ndim elif weight is None: weight = ones(index2.size) rank_of_weight = 1 else: err_msg = "unknown weight type" logger.log_error(err_msg) raise TypeError, err_msg if (weight.size <> index2.size) and (weight.shape[rank_of_weight-1] <> index2.size): if weight.shape[rank_of_weight-1] == choice.size(): weight = take(weight, index2) else: err_msg = "weight array size doesn't match to size of dataset2 or its index" logger.log_error(err_msg) raise ValueError, err_msg prob = normalize(weight) stratum = local_resources.get("stratum", None) if stratum is None: raise StandardError, "'stratum' must be defined for stratified sampling." if isinstance(stratum, str): choice.compute_variables(stratum, resources = local_resources ) stratum=choice.get_attribute(stratum) #chosen_choice = ones(index1.size) * UNPLACED_ID chosen_choice_id = agent.get_attribute(choice.get_id_name()[0])[index1] #index_of_placed_agent = where(greater(chosen_choice_id, UNPLACED_ID))[0] chosen_choice_index = choice.try_get_id_index(chosen_choice_id, return_value_if_not_found=-1) chosen_choice_index_to_index2 = lookup(chosen_choice_index, index2, index_if_not_found=UNPLACED_ID) ##TODO: check all chosen strata are in selectable strata #i.e. chosen_choice_index is in index2 chosen_stratum = ones(chosen_choice_index.size, dtype=DTYPE) * NO_STRATUM_ID chosen_stratum[where(chosen_choice_index!=-1)] = stratum[chosen_choice_index[where(chosen_choice_index!=-1)]] selectable_strata = stratum[index2] unique_strata = unique(selectable_strata) unique_strata = unique_strata[where(unique_strata!=NO_STRATUM_ID)] # if rank_of_weight == 2: # raise RuntimeError, "stratified sampling for 2d weight is unimplemented yet" # sampled_index = zeros((index1.size,1)) - 1 sample_size = local_resources.get("sample_size", None) sample_size_from_each_stratum = local_resources.get("sample_size_from_each_stratum", None) if sample_size_from_each_stratum is None: sample_size_from_each_stratum = sample_size strata_sample_size = ones(unique_strata.size, dtype=DTYPE) * sample_size_from_each_stratum sample_rate = local_resources.get("sample_rate", None) if sample_rate is not None: raise UnImplementedError, "sample_rate is not implemented yet." ##TODO: to be finished #num_elements_in_strata = histogram(selectable_strata, unique_strata) #strata_sample_size = round(num_elements_in_strata * sample_rate) sample_size_from_chosen_stratum = local_resources.get("sample_size_from_chosen_stratum", None) if sample_size_from_chosen_stratum is None and not include_chosen_choice: strata_sample_pairs = array(map(lambda x,y: [x,y], unique_strata, strata_sample_size)) if rank_of_weight == 1: sampled_index = self._sample_by_stratum(index1, index2, selectable_strata, prob, chosen_choice_index_to_index2, strata_sample_pairs) elif rank_of_weight == 2: sampled_index = self._sample_by_agent_and_stratum(index1, index2, selectable_strata, prob, chosen_choice_index_to_index2, strata_sample_pairs) else: strata_sample_setting = zeros((index1.size,unique_strata.size,2), dtype=DTYPE) for i in range(index1.size): agents_strata_sample_size = copy.copy(strata_sample_size) if sample_size_from_chosen_stratum is None: ## if sample_size_from_chosen_stratum is None and include_chosen_choice is True, ## sample one less from the chosen stratum agents_strata_sample_size[where(unique_strata==chosen_stratum[i])] += - 1 else: agents_strata_sample_size[where(unique_strata==chosen_stratum[i])] = sample_size_from_chosen_stratum strata_sample_pairs = array(map(lambda x,y: [x,y], unique_strata, agents_strata_sample_size)) strata_sample_setting[i,...] = strata_sample_pairs sampled_index = self._sample_by_agent_and_stratum(index1, index2, selectable_strata, prob, chosen_choice_index_to_index2, strata_sample_setting) #chosen_choice = None is_chosen_choice = zeros(sampled_index.shape, dtype="bool") if include_chosen_choice: sampled_index = concatenate((chosen_choice_index[:,newaxis],sampled_index), axis=1) #chosen_choice = zeros(chosen_choice_index.shape, dtype="int32") - 1 #chosen_choice[where(chosen_choice_index>UNPLACED_ID)] = 0 #make chosen_choice index to sampled_index, instead of choice (as chosen_choice_index does) #since the chosen choice index is attached to the first column, the chosen choice should be all zeros #for valid chosen_choice_index is_chosen_choice = zeros(sampled_index.shape, dtype="bool") is_chosen_choice[chosen_choice_index!=UNPLACED_ID, 0] = 1 chosen_probability = zeros((chosen_choice_index.size,),dtype=float32) - 1 for stratum in unique_strata: w = chosen_stratum==stratum chosen_probability[w] = (prob[chosen_choice_index[w]] / prob[selectable_strata==stratum].sum()).astype(float32) self._sampling_probability = concatenate((chosen_probability[:,newaxis], self._sampling_probability), axis=1) self._stratum_id = concatenate((chosen_stratum[:,newaxis], self._stratum_id), axis=1) interaction_dataset = self.create_interaction_dataset(dataset1, dataset2, index1, sampled_index) interaction_dataset.add_attribute(self._sampling_probability, '__sampling_probability') interaction_dataset.add_attribute(is_chosen_choice, 'chosen_choice') interaction_dataset.add_attribute(self._stratum_id, 'stratum_id') ## to get the older returns #sampled_index = interaction_dataset.get_2d_index() #chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") #where_chosen = where(interaction_dataset.get_attribute("chosen_choice")) #chosen_choices[where_chosen[0]]=where_chosen[1] #return (sampled_index, chosen_choice) return interaction_dataset