def calc_probabilities(self, data, choiceset): """ The method returns the selection probability associated with the the different choices. Inputs: data - DataArray object choiceset - DataArray object """ exp_expected_utilities = self.calc_exp_choice_utilities( data, choiceset) # missing_choices = choiceset.varnames #spec_dict = self.specification.specification for parent in self.parent_list: child_names = self.specification.child_names(parent) # calculating the sum of utilities across children in a branch util_sum = 0 for child in child_names: # For utils with missing values they are converted to zero # before summing the utils across choices in a parent # to avoid the case where missing + valid = missing child_column = exp_expected_utilities.column(child) child_column = child_column.filled(0) util_sum = util_sum + child_column # calculating the probability of children in a branch for child in child_names: exp_expected_utilities.setcolumn(child, exp_expected_utilities.column(child) / util_sum) # Dummy check to ensure that within any branch the probs add to one prob_sum = 0 for child in child_names: prob_sum = prob_sum + exp_expected_utilities.column(child) for choice in self.specification.actual_choices: parent_names = self.specification.all_parent_names(choice) for parent in parent_names: parent_column = exp_expected_utilities.column(parent) choice_column = exp_expected_utilities.column(choice) exp_expected_utilities.setcolumn(choice, choice_column * parent_column) self.specification.actual_choices.sort() rows = exp_expected_utilities.rows cols = len(self.specification.actual_choices) probabilities = DataArray(zeros((rows, cols)), self.specification.actual_choices, data.index) for choice in self.specification.actual_choices: probabilities.setcolumn( choice, exp_expected_utilities.column(choice)) return probabilities
def update_houseids(self, hhldSyn, persSyn, hhldVars, persVars, highestHid): hhldSynDataObj = DataArray(hhldSyn, hhldVars) persSynDataObj = DataArray(persSyn, persVars) maxFreqCol = amax(hhldSynDataObj.columns(['frequency']).data) powFreqCol = floor(log(maxFreqCol, 10)) + 1 coefficients = {'frequency': 1, 'hhid': 10**powFreqCol} newHid = hhldSynDataObj.calculate_equation(coefficients) hhldSynDataObj.setcolumn('hhid', newHid) newHid = persSynDataObj.calculate_equation(coefficients) persSynDataObj.setcolumn('hhid', newHid) hhldSynDataObj.sort([self.idSpec.hidName]) persSynDataObj.sort([self.idSpec.hidName, self.idSpec.pidName]) hidIndex_popgenH = hhldVars.index('hhid') hidIndex_popgenP = persVars.index('hhid') self.create_indices(persSynDataObj) hhldSyn = hhldSynDataObj.data persSyn = persSynDataObj.data row = 0 for hhldIndex in self.hhldIndicesOfPersons: firstPersonRec = hhldIndex[1] lastPersonRec = hhldIndex[2] #print hhldIndex[0], highestHid + 1, firstPersonRec, lastPersonRec hhldSyn[row, hidIndex_popgenH] = highestHid + 1 persSyn[firstPersonRec:lastPersonRec, hidIndex_popgenP] = highestHid + 1 highestHid += 1 row += 1 return hhldSyn, persSyn
def update_houseids(self, hhldSyn, persSyn, hhldVars, persVars, highestHid): hhldSynDataObj = DataArray(hhldSyn, hhldVars) persSynDataObj = DataArray(persSyn, persVars) maxFreqCol = amax(hhldSynDataObj.columns(['frequency']).data) powFreqCol = floor(log(maxFreqCol, 10)) + 1 coefficients = {'frequency':1, 'hhid':10**powFreqCol} newHid = hhldSynDataObj.calculate_equation(coefficients) hhldSynDataObj.setcolumn('hhid', newHid) newHid = persSynDataObj.calculate_equation(coefficients) persSynDataObj.setcolumn('hhid', newHid) hhldSynDataObj.sort([self.idSpec.hidName]) persSynDataObj.sort([self.idSpec.hidName, self.idSpec.pidName]) hidIndex_popgenH = hhldVars.index('hhid') hidIndex_popgenP = persVars.index('hhid') self.create_indices(persSynDataObj) hhldSyn = hhldSynDataObj.data persSyn = persSynDataObj.data row = 0 for hhldIndex in self.hhldIndicesOfPersons: firstPersonRec = hhldIndex[1] lastPersonRec = hhldIndex[2] #print hhldIndex[0], highestHid + 1, firstPersonRec, lastPersonRec hhldSyn[row,hidIndex_popgenH] = highestHid + 1 persSyn[firstPersonRec:lastPersonRec,hidIndex_popgenP] = highestHid + 1 highestHid += 1 row += 1 return hhldSyn, persSyn