class TestOrderedProbitModel(unittest.TestCase): def setUp(self): choices = ['Veh1', 'Veh2', 'Veh3'] self.coefficients = [{'Constant': 2, 'Var1': 2.11}] self.thresholds = [1.2, 2.1] self.data = DataArray( array([[1, 1.1], [1, -0.25], [1, 3.13], [1, -0.11]]), ['CONSTANT', 'VAR1']) specification = OLSpecification(choices, self.coefficients, self.thresholds) self.model = OrderedModel(specification) specification1 = OLSpecification(choices, self.coefficients, self.thresholds, distribution='probit') self.model1 = OrderedModel(specification1) def testprobabilitieslogit(self): prob = zeros((4, 3)) obs_utility = self.data.calculate_equation(self.coefficients[0]) [shape_param] = [ 1, ] * genlogistic.numargs prob[:, 0] = genlogistic.cdf(self.thresholds[0] - obs_utility, shape_param) prob[:, 1] = ( genlogistic.cdf(self.thresholds[1] - obs_utility, shape_param) - genlogistic.cdf(self.thresholds[0] - obs_utility, shape_param)) prob[:, 2] = 1 - genlogistic.cdf(self.thresholds[1] - obs_utility, shape_param) prob_model = self.model.calc_probabilities(self.data) prob_diff = all(prob == prob_model) self.assertEqual(True, prob_diff) def testselectionlogit(self): choice_act = array([['veh3'], ['veh3'], ['veh1'], ['veh1']]) choice_model = self.model.calc_chosenalternative(self.data) choice_diff = all(choice_act == choice_model) self.assertEqual(True, choice_diff) def testprobabilitiesprobit(self): prob = zeros((4, 3)) obs_utility = self.data.calculate_equation(self.coefficients[0]) prob[:, 0] = norm.cdf(self.thresholds[0] - obs_utility) prob[:, 1] = (norm.cdf(self.thresholds[1] - obs_utility) - norm.cdf(self.thresholds[0] - obs_utility)) prob[:, 2] = 1 - norm.cdf(self.thresholds[1] - obs_utility) prob_model = self.model1.calc_probabilities(self.data) prob_diff = all(prob == prob_model) self.assertEqual(True, prob_diff) def testselectionprobit(self): choice_act = array([['veh3'], ['veh2'], ['veh3'], ['veh2']]) choice_model = self.model1.calc_chosenalternative(self.data) choice_diff = all(choice_act == choice_model) self.assertEqual(True, choice_diff)
def setUp(self): choices = ['SOV', 'HOV'] coefficients = [{'Constant': 2, 'Var1': 2.11}, {'Constant': 1.2}] data = array([[1, 1.1], [1, -0.25], [1, 3.13], [1, -0.11]]) self.choiceset1 = DataArray(ma.array([[0, 1], [0, 1], [1, 1], [1, 1]]), ['SOV', 'HOV']) self.data = DataArray(data, ['Constant', 'Var1']) self.specification = Specification(choices, coefficients) self.utils_array_act = zeros( (self.data.rows, self.specification.number_choices)) self.utils_array_act[:, 0] = self.data.data[:, 0] * 2 + self.data.data[:, 1] * 2.11 self.utils_array_act[:, 1] = self.data.data[:, 0] * 1.2 self.exp_utils_array_act = exp(self.utils_array_act) self.prob_array_act = ( self.exp_utils_array_act.transpose() / self.exp_utils_array_act.cumsum(-1)[:, -1]).transpose() # for the selected data, and seed = 1, chosen alternatives are self.selected_act = array([['sov'], ['hov'], ['sov'], ['sov']]) self.selected_act1 = array([['hov'], ['hov'], ['sov'], ['sov']])
def setUp(self): choices = ['Episodes1', 'Episodes2', 'Episodes3'] self.coefficients = [{'Constant':2, 'Var1':2.11}] data = array([[1, 1.1], [1, -0.25], [1, 3.13], [1, -0.11]]) self.data = DataArray(data, ['CONSTANT', 'VAR1']) self.specification = CountSpecification(choices, self.coefficients) self.model = CountRegressionModel(self.specification)
def setUp(self): choice = ['age_tt_product'] coefficients = [{'age': 1, 'tt': 1}] data = array([[1, 15], [2, 10]]) self.data = DataArray(data, ['Age', 'TT']) self.specification = Specification(choice, coefficients)
def calc_probabilities(self, data, choiceset): """ The method returns the selection probability associated with the the different choices. Inputs: data - DataArray object choiceset - DataArray object """ exp_expected_utilities = self.calc_exp_choice_utilities( data, choiceset) # missing_choices = choiceset.varnames #spec_dict = self.specification.specification for parent in self.parent_list: child_names = self.specification.child_names(parent) # calculating the sum of utilities across children in a branch util_sum = 0 for child in child_names: # For utils with missing values they are converted to zero # before summing the utils across choices in a parent # to avoid the case where missing + valid = missing child_column = exp_expected_utilities.column(child) child_column = child_column.filled(0) util_sum = util_sum + child_column # calculating the probability of children in a branch for child in child_names: exp_expected_utilities.setcolumn(child, exp_expected_utilities.column(child) / util_sum) # Dummy check to ensure that within any branch the probs add to one prob_sum = 0 for child in child_names: prob_sum = prob_sum + exp_expected_utilities.column(child) for choice in self.specification.actual_choices: parent_names = self.specification.all_parent_names(choice) for parent in parent_names: parent_column = exp_expected_utilities.column(parent) choice_column = exp_expected_utilities.column(choice) exp_expected_utilities.setcolumn(choice, choice_column * parent_column) self.specification.actual_choices.sort() rows = exp_expected_utilities.rows cols = len(self.specification.actual_choices) probabilities = DataArray(zeros((rows, cols)), self.specification.actual_choices, data.index) for choice in self.specification.actual_choices: probabilities.setcolumn( choice, exp_expected_utilities.column(choice)) return probabilities
def setUp(self): choice = ['DURATION'] coefficients = [{'constant':2, 'Var1':2.11}] data = array([[1, 1.1], [1, -0.25], [1, 3.13], [1, -0.11]]) variance = array([[1]]) self.data = DataArray(data, ['Constant', 'VaR1']) self.specification = Specification(choice, coefficients) self.errorspecification = LinearRegErrorSpecification(variance)
def setUp(self): self.data = genfromtxt( "/home/kkonduri/simtravel/test/mag_zone/schedule_txt.csv", delimiter=",", dtype=int) colNames = [ 'houseid', 'personid', 'scheduleid', 'activitytype', 'locationid', 'starttime', 'endtime', 'duration' ] self.actSchedules = DataArray(self.data, colNames)
def setUp(self): choice = ['Frontier'] coefficients = [{'constant': 2, 'Var1': 2.11}] data = array([[1, 1.1], [1, -0.25], [1, 3.13], [1, -0.11]]) variance = array([[1., 0], [0, 1.1]]) self.data = DataArray(data, ['Constant', 'VaR1']) self.specification = Specification(choice, coefficients) self.errorspecification = StochasticRegErrorSpecification( variance, 'start')
class TestOrderedProbitModel(unittest.TestCase): def setUp(self): choices = ['Veh1', 'Veh2', 'Veh3'] self.coefficients = [{'Constant':2, 'Var1':2.11}] self.thresholds = [1.2, 2.1] self.data = DataArray(array([[1, 1.1], [1, -0.25], [1, 3.13], [1, -0.11]]), ['CONSTANT', 'VAR1']) specification = OLSpecification(choices, self.coefficients, self.thresholds) self.model = OrderedModel(specification) specification1 = OLSpecification(choices, self.coefficients, self.thresholds, distribution='probit') self.model1 = OrderedModel(specification1) def testprobabilitieslogit(self): prob = zeros((4,3)) obs_utility = self.data.calculate_equation(self.coefficients[0]) [shape_param] = [1,]*genlogistic.numargs prob[:,0] = genlogistic.cdf(self.thresholds[0] - obs_utility, shape_param) prob[:,1] = (genlogistic.cdf(self.thresholds[1] - obs_utility, shape_param) - genlogistic.cdf(self.thresholds[0] - obs_utility, shape_param)) prob[:,2] = 1 - genlogistic.cdf(self.thresholds[1] - obs_utility, shape_param) prob_model = self.model.calc_probabilities(self.data) prob_diff = all(prob == prob_model) self.assertEqual(True, prob_diff) def testselectionlogit(self): choice_act = array([['veh3'], ['veh3'], ['veh1'], ['veh1']]) choice_model = self.model.calc_chosenalternative(self.data) choice_diff = all(choice_act == choice_model) self.assertEqual(True, choice_diff) def testprobabilitiesprobit(self): prob = zeros((4,3)) obs_utility = self.data.calculate_equation(self.coefficients[0]) prob[:,0] = norm.cdf(self.thresholds[0] - obs_utility) prob[:,1] = (norm.cdf(self.thresholds[1] - obs_utility) - norm.cdf(self.thresholds[0] - obs_utility)) prob[:,2] = 1 - norm.cdf(self.thresholds[1] - obs_utility) prob_model = self.model1.calc_probabilities(self.data) prob_diff = all(prob == prob_model) self.assertEqual(True, prob_diff) def testselectionprobit(self): choice_act = array([['veh3'], ['veh2'], ['veh3'], ['veh2']]) choice_model = self.model1.calc_chosenalternative(self.data) choice_diff = all(choice_act == choice_model) self.assertEqual(True, choice_diff)
def setUp(self): choices = ['Veh1', 'Veh2', 'Veh3'] self.coefficients = [{'Constant': 2, 'Var1': 2.11}] self.thresholds = [1.2, 2.1] self.data = DataArray(array([[1, 1.1], [1, -0.25], [1, 3.13], [1, -0.11]]), ['CONSTANT', 'VAR1']) specification = OLSpecification( choices, self.coefficients, self.thresholds) self.model = OrderedModel(specification) specification1 = OLSpecification(choices, self.coefficients, self.thresholds, distribution='probit') self.model1 = OrderedModel(specification1)
def calc_chosenalternative(self, data, choiceset=None, seed=1): """ The method returns the selected choice among the available alternatives. Inputs: data = DataArray object choiceset = DataArray object """ if choiceset is None: choiceset = DataArray(array([]), []) probabilities = DataArray(self.calc_probabilities(data, choiceset), self.specification.choices) prob_model = AbstractProbabilityModel(probabilities, seed) return prob_model.selected_choice()
def calculate_expected_values(self, data): """ The method returns the expected values for the different choices using the coefficients in the specification input. Inputs: data - DataArray object """ num_choices = self.specification.number_choices expected_value_array = DataArray(zeros((data.rows, num_choices)), self.choices) for i in range(num_choices): coefficients = self.coefficients[i] expected_value_array.data[:, i] = data.calculate_equation(coefficients) return expected_value_array
def setUp(self): choice = ['SOV'] coefficients = [{'constant': 2, 'Var1': 2.11}] data = array([[1, 1.1], [1, -0.25], [1, 3.13], [1, -0.11]]) self.data = DataArray(data, ['Constant', 'VaR1']) self.specification = Specification(choice, coefficients)
class TestLinearRegressionModel(unittest.TestCase): def setUp(self): choice = ['DURATION'] coefficients = [{'constant': 2, 'Var1': 2.11}] data = array([[1, 1.1], [1, -0.25], [1, 3.13], [1, -0.11]]) variance = array([[1]]) self.data = DataArray(data, ['Constant', 'VaR1']) self.specification = Specification(choice, coefficients) self.errorspecification = LinearRegErrorSpecification(variance) def testvalues(self): model = LinearRegressionModel( self.specification, self.errorspecification) pred_value = model.calc_predvalue(self.data) expected_act = self.data.calculate_equation( self.specification.coefficients[0]) expected_act.shape = (4, 1) dist = RandomDistribution(seed=1) pred_act = dist.return_normal_variables( location=expected_act, scale=1, size=(4, 1)) pred_diff = all(pred_value.data == pred_act) self.assertEqual(True, pred_diff)
class TestStocFronRegressionModel(unittest.TestCase): def setUp(self): choice = ['Frontier'] coefficients = [{'constant': 2, 'Var1': 2.11}] data = array([[1, 1.1], [1, -0.25], [1, 3.13], [1, -0.11]]) variance = array([[1., 0], [0, 1.1]]) self.data = DataArray(data, ['Constant', 'VaR1']) self.specification = Specification(choice, coefficients) self.errorspecification = StochasticRegErrorSpecification( variance, 'start') def testvalues(self): model = StocFronRegressionModel(self.specification, self.errorspecification) pred_value = model.calc_predvalue(self.data) expected_act = self.data.calculate_equation( self.specification.coefficients[0]) expected_act.shape = (4, 1) variance = self.errorspecification.variance dist = RandomDistribution(seed=1) err_norm = dist.return_normal_variables(location=0, scale=variance[0, 0]**0.5, size=(4, 1)) pred_act = (expected_act + err_norm) pred_diff = all(pred_value.data == pred_act) self.assertEquals(True, pred_diff)
def calculate_expected_values(self, data): """ The method returns the product using the coefficients in the specification input as power. Inputs: data - DataArray object """ num_choices = self.specification.number_choices expected_value_array = DataArray(zeros((data.rows, num_choices)), self.choices) self.inverse = self.specification.inverse[0] for i in range(num_choices): coefficients = self.coefficients[i] expected_value_array.data[:,i] = data.calculate_product(coefficients, inverse=self.inverse) return expected_value_array
class TestCountRegressionModel(unittest.TestCase): def setUp(self): choices = ['Episodes1', 'Episodes2', 'Episodes3'] self.coefficients = [{'Constant':2, 'Var1':2.11}] data = array([[1, 1.1], [1, -0.25], [1, 3.13], [1, -0.11]]) self.data = DataArray(data, ['CONSTANT', 'VAR1']) self.specification = CountSpecification(choices, self.coefficients) self.model = CountRegressionModel(self.specification) def testprobabilitiespoisson(self): prob = zeros((4,3)) exp_value = self.data.calculate_equation(self.coefficients[0]) prob[:,0] = poisson.pmf(0, exp_value) prob[:,1] = poisson.pmf(1, exp_value) prob[:,2] = 1 - poisson.cdf(1, exp_value) prob_model = self.model.calc_probabilities(self.data) prob_diff = all(prob == prob_model) self.assertEqual(True, prob_diff) def testselectionpoisson(self): choice_act = array([['episodes3'], ['episodes3'], ['episodes1'], ['episodes2']]) choice_model = self.model.calc_chosenalternative(self.data) choice_diff = all(choice_act == choice_model) self.assertEqual(True, choice_diff)
def resolve_consistency(self, data, seed): print data.varnames actList = [] actListJoint = [] data.sort([self.activityAttribs.hidName, self.activityAttribs.pidName, self.activityAttribs.scheduleidName]) # Create Index Matrix self.create_indices(data) self.create_col_numbers(data._colnames) #if self.childDepProcessingType == 'Dummy': # return data for hhldIndex in self.hhldIndicesOfPersons: ti = time.time() firstPersonRec = hhldIndex[1] lastPersonRec = hhldIndex[2] persIndicesForActsForHhld = self.personIndicesOfActs[firstPersonRec: lastPersonRec, :] #if hhldIndex[0] not in [8843,20008,20440,30985,48365,54850,60085,64006,68037,73093,77192,84903,84963]: # 1047 # continue householdObject = Household(hhldIndex[0]) for perIndex in persIndicesForActsForHhld: personObject = Person(perIndex[0], perIndex[1]) schedulesForPerson = data.data[perIndex[2]:perIndex[3],:] activityList = self.return_activity_list_for_person(schedulesForPerson) personObject.add_episodes(activityList) workStatus, schoolStatus, childDependency = self.return_status_dependency(schedulesForPerson) personObject.add_status_dependency(workStatus, schoolStatus, childDependency) householdObject.add_person(personObject) #print 'household object created in - %.4f' %(time.time()-ti) if self.specification.terminalEpisodesAllocation: householdObject.allocate_terminal_dependent_activities(seed) elif self.childDepProcessingType == 'Allocation': householdObject.allocate_dependent_activities(seed) elif self.childDepProcessingType == 'Resolution': householdObject.lineup_activities(seed) elif self.childDepProcessingType == 'Fix Trip Purpose': householdObject.fix_trippurpose(seed) elif self.childDepProcessingType == 'Extract Tour Attributes': householdObject.extract_tripattributes(seed) #raw_input('extract tour attributes') pass reconciledSchedules = householdObject._collate_results() actList += reconciledSchedules #print 'created and reconciled in - %.4f' %(time.time()-ti) return DataArray(actList, self.colNames)
class TestStocFronRegressionModel(unittest.TestCase): def setUp(self): choice = ['Frontier'] coefficients = [{'constant':2, 'Var1':2.11}] data = array([[1, 1.1], [1, -0.25], [1, 3.13], [1, -0.11]]) variance = array([[1., 0], [0, 1.1]]) self.data = DataArray(data, ['Constant', 'VaR1']) self.specification = Specification(choice, coefficients) self.errorspecification = StochasticRegErrorSpecification(variance, 'start') def testvalues(self): model = StocFronRegressionModel(self.specification, self.errorspecification) pred_value = model.calc_predvalue(self.data) expected_act = self.data.calculate_equation( self.specification.coefficients[0]) expected_act.shape = (4,1) variance = self.errorspecification.variance dist = RandomDistribution(seed=1) err_norm = dist.return_normal_variables(location=0, scale=variance[0,0]**0.5, size=(4,1)) pred_act = (expected_act + err_norm) pred_diff = all(pred_value.data == pred_act) self.assertEquals(True, pred_diff)
class TestLinearRegressionModel(unittest.TestCase): def setUp(self): choice = ['DURATION'] coefficients = [{'constant': 2, 'Var1': 2.11}] data = array([[1, 1.1], [1, -0.25], [1, 3.13], [1, -0.11]]) variance = array([[1]]) self.data = DataArray(data, ['Constant', 'VaR1']) self.specification = Specification(choice, coefficients) self.errorspecification = LinearRegErrorSpecification(variance) def testvalues(self): model = LinearRegressionModel(self.specification, self.errorspecification) pred_value = model.calc_predvalue(self.data) expected_act = self.data.calculate_equation( self.specification.coefficients[0]) expected_act.shape = (4, 1) dist = RandomDistribution(seed=1) pred_act = dist.return_normal_variables(location=expected_act, scale=1, size=(4, 1)) pred_diff = all(pred_value.data == pred_act) self.assertEqual(True, pred_diff)
class TestReconcileModel(unittest.TestCase): def setUp(self): self.data = genfromtxt("/home/kkonduri/simtravel/test/mag_zone/schedule_txt.csv", delimiter=",", dtype=int) colNames = [ "houseid", "personid", "scheduleid", "activitytype", "locationid", "starttime", "endtime", "duration", ] self.actSchedules = DataArray(self.data, colNames) def test_retrieve_loop_ids(self): houseIdsCol = self.actSchedules.columns(["houseid"]).data houseIdsUnique = unique(houseIdsCol) print houseIdsUnique for hid in houseIdsUnique: schedulesRowsIndForHh = houseIdsCol == hid schedulesForHh = self.actSchedules.rowsof(schedulesRowsIndForHh) pIdsCol = schedulesForHh.columns(["personid"]).data pIdsUnique = unique(pIdsCol) for pid in pIdsUnique: schedulesRowIndForPer = pIdsCol == pid schedulesForPerson = schedulesForHh.rowsof(schedulesRowIndForPer) # print 'Raw schedules for hid:%s and pid:%s' %(hid, pid) # print schedulesForPerson activityList = [] for sch in schedulesForPerson.data: scheduleid = sch[2] activitytype = sch[3] locationid = sch[4] starttime = sch[5] endtime = sch[6] duration = sch[7] actepisode = ActivityEpisode(scheduleid, activitytype, locationid, starttime, endtime, duration) activityList.append(actepisode) personObject = Person(hid, pid) personObject.add_and_reconcile_episodes(activityList)
class TestReconcileModel(unittest.TestCase): def setUp(self): self.data = genfromtxt( "/home/kkonduri/simtravel/test/mag_zone/schedule_txt.csv", delimiter=",", dtype=int) colNames = [ 'houseid', 'personid', 'scheduleid', 'activitytype', 'locationid', 'starttime', 'endtime', 'duration' ] self.actSchedules = DataArray(self.data, colNames) def test_retrieve_loop_ids(self): houseIdsCol = self.actSchedules.columns(['houseid']).data houseIdsUnique = unique(houseIdsCol) #print houseIdsUnique for hid in houseIdsUnique: schedulesRowsIndForHh = houseIdsCol == hid schedulesForHh = self.actSchedules.rowsof(schedulesRowsIndForHh) pIdsCol = schedulesForHh.columns(['personid']).data pIdsUnique = unique(pIdsCol) for pid in pIdsUnique: schedulesRowIndForPer = pIdsCol == pid schedulesForPerson = schedulesForHh.rowsof( schedulesRowIndForPer) #print 'Raw schedules for hid:%s and pid:%s' %(hid, pid) #print schedulesForPerson activityList = [] for sch in schedulesForPerson.data: scheduleid = sch[2] activitytype = sch[3] locationid = sch[4] starttime = sch[5] endtime = sch[6] duration = sch[7] actepisode = ActivityEpisode(scheduleid, activitytype, locationid, starttime, endtime, duration) activityList.append(actepisode) personObject = Person(hid, pid) personObject.add_and_reconcile_episodes(activityList)
def calculate_expected_values(self, data): """ The method returns the expected values for the different choices using the coefficients in the specification input. Inputs: data - DataArray object """ num_choices = self.specification.number_choices expected_value_array = DataArray(zeros((data.rows, num_choices)), self.choices) for i in range(num_choices): coefficients = self.coefficients[i] expected_value_array.data[:, i] = data.calculate_equation( coefficients) return expected_value_array
def setUp(self): choice = ['age_tt_product'] coefficients = [{'age':1, 'tt':1}] data = array([[1,15],[2,10]]) self.data = DataArray(data, ['Age', 'TT']) self.specification = Specification(choice, coefficients)
def setUp(self): choices = ["Episodes1", "Episodes2", "Episodes3"] self.coefficients = [{"Constant": 2, "Var1": 2.11}] data = array([[1, 1.1], [1, -0.25], [1, 3.13], [1, -0.11]]) self.data = DataArray(data, ["CONSTANT", "VAR1"]) self.specification = CountSpecification(choices, self.coefficients) self.model = CountRegressionModel(self.specification)
def create_choiceset(self, shape, criterion, names): #TODO: Should setup a system to generate the choicesets dynamically #based on certain criterion # this choiceset creation criterion must be an attribute of the # SubModel class choiceset = ones(shape) return DataArray(choiceset, names)
def resolve_consistency(self, data, seed): verts = [] data.sort([ self.activityAttribs.hidName, self.activityAttribs.pidName, self.activityAttribs.scheduleidName ]) # Create Index Matrix self.create_indices(data) self.create_col_numbers(data._colnames) for hhldIndex in self.hhldIndicesOfPersons: firstPersonRec = hhldIndex[1] lastPersonRec = hhldIndex[2] firstPersonFirstAct = self.personIndicesOfActs[firstPersonRec, 2] lastPersonLastAct = self.personIndicesOfActs[lastPersonRec - 1, 3] schedulesForHhld = DataArray( data.data[firstPersonFirstAct:lastPersonLastAct, :], data.varnames) persIndicesForActsForHhld = self.personIndicesOfActs[ firstPersonRec:lastPersonRec, :] householdObject = Household(hhldIndex[0]) for perIndex in persIndicesForActsForHhld: personObject = Person(perIndex[0], perIndex[1]) schedulesForPerson = DataArray( data.data[perIndex[2]:perIndex[3], :], data.varnames) activityList = self.return_activity_list_for_person( schedulesForPerson) personObject.add_episodes(activityList) householdObject.add_person(personObject) hhldVerts = householdObject.retrieve_fixed_activity_vertices(seed) #for i in hhldVerts: # print i #raw_input() verts += hhldVerts return DataArray(verts, self.colNames)
def setUp(self): choice = ["Frontier"] coefficients = [{"constant": 2, "Var1": 2.11}] data = array([[1, 1.1], [1, -0.25], [1, 3.13], [1, -0.11]]) variance = array([[1.0, 0], [0, 1.1]]) self.data = DataArray(data, ["Constant", "VaR1"]) self.specification = Specification(choice, coefficients) self.errorspecification = StochasticRegErrorSpecification(variance, "start")
def setUp(self): choice = ['Frontier'] coefficients = [{'constant':2, 'Var1':2.11}] data = array([[1, 1.1], [1, -0.25], [1, 3.13], [1, -0.11]]) variance = array([[1., 0], [0, 1.1]]) self.data = DataArray(data, ['Constant', 'VaR1']) self.specification = Specification(choice, coefficients) self.errorspecification = StochasticRegErrorSpecification(variance, 'start')
def setUp(self): choices = ['SOV', 'HOV'] variables = [['Constant', 'Var1'], ['Constant']] coefficients = [{'Constant': 2, 'Var1': 2.11}, {'Constant': 1.2}] self.data = DataArray(array([[1, 1.1], [1, -0.25]]), ['Constant', 'Var1']) self.specification = Specification(choices, coefficients) self.model = Model(self.specification)
def setUp(self): choices = ['Veh1', 'Veh2', 'Veh3'] coefficients = [{'Constant': 2, 'Var1': 2.11}] thresholds = [1.2, 2.1] data = array([[1, 1.1], [1, -0.25], [1, 3.13], [1, -0.11]]) self.data = DataArray(data, ['CONSTANT', 'VAR1']) self.specification = OLSpecification(choices, coefficients, thresholds) self.specification1 = [choices, coefficients, thresholds]
def setUp(self): data_ = zeros((5,8)) random.seed(1) data_[:,:4] = random.rand(5,4) data_ = DataArray(data_, ['Const', 'Var1', 'Var2', 'Var3', 'choice2_ind', 'choice1', 'choice2', 'choice3']) var_list = [('table1', 'Var1'), ('table1', 'Var2'), ('table1', 'Var3'), ('table1', 'Const')] variance = array([[1]]) choice1 = ['choice1'] choice2 = ['choice2'] choice3 = ['SOV', 'HOV'] coefficients1 = [{'const':2, 'Var1':2.11}] coefficients2 = [{'const':1.5, 'var2':-.2, 'var3':16.4}] coefficients3 = [{'Const':2, 'Var1':2.11}, {'Const':1.2}] specification1 = Specification(choice1, coefficients1) specification2 = Specification(choice2, coefficients2) specification3 = Specification(choice3, coefficients3) errorspecification = LinearRegErrorSpecification(variance) model1 = LinearRegressionModel(specification1, errorspecification) model2 = LinearRegressionModel(specification2, errorspecification) model3 = LogitChoiceModel(specification3) data_filter2 = DataFilter('choice2_ind', 'less than', 25, {'choice2_ind':1, 'choice2':1}) #Run Until Condition #Subset for the model condition data_filter1 = DataFilter('Const', 'less than', 0.3) model_seq1 = SubModel(model1, 'regression', 'choice1') model_seq2 = SubModel(model2, 'regression', 'choice2', data_filter=data_filter1, run_until_condition=data_filter2) model_seq3 = SubModel(model3, 'choice', 'choice3', run_until_condition=data_filter2) model_list = [model_seq1, model_seq2, model_seq3] # SPECIFY SEED TO REPLICATE RESULTS, DATA FILTER AND # RUN UNTIL CONDITION component = AbstractComponent('DummyComponent', model_list, var_list) component.run(data_) print component.data.data
def testmodelresults(self): model = LogitChoiceModel(self.specification) choiceset = DataArray(array([]), []) probabilities_model = model.calc_probabilities(self.data, choiceset) probabilities_diff = all(self.prob_array_act == probabilities_model) self.assertEqual(True, probabilities_diff) selected_model = model.calc_chosenalternative(self.data) selected_diff = all(self.selected_act == selected_model) self.assertEqual(True, selected_diff)
def setUp(self): self.data = genfromtxt( "/home/karthik/simtravel/test/mag_zone_dynamic/schedule_txt_small.csv", delimiter=",", dtype=int) colNames = [ 'scheduleid', 'houseid', 'personid', 'activitytype', 'locationid', 'starttime', 'endtime', 'duration', 'dependentpersonid' ] self.actSchedules = DataArray(self.data, colNames)
def calc_chosenalternative(self, data, choiceset=None, seed=1): """ The method returns the chosen alternaitve among the count choices for the choice variable under consideration Inputs: data - DataArray object """ probabilities = DataArray(self.calc_probabilities(data), self.specification.choices) prob_model = AbstractProbabilityModel(probabilities, seed) return prob_model.selected_choice()
def setUp(self): choices = ['Veh1', 'Veh2', 'Veh3'] self.coefficients = [{'Constant':2, 'Var1':2.11}] self.thresholds = [1.2, 2.1] self.data = DataArray(array([[1, 1.1], [1, -0.25], [1, 3.13], [1, -0.11]]), ['CONSTANT', 'VAR1']) specification = OLSpecification(choices, self.coefficients, self.thresholds) self.model = OrderedModel(specification) specification1 = OLSpecification(choices, self.coefficients, self.thresholds, distribution='probit') self.model1 = OrderedModel(specification1)
def setUp(self): self.data = genfromtxt("/home/kkonduri/simtravel/test/mag_zone/schedule_txt.csv", delimiter=",", dtype=int) colNames = [ "houseid", "personid", "scheduleid", "activitytype", "locationid", "starttime", "endtime", "duration", ] self.actSchedules = DataArray(self.data, colNames)
def resolve_consistency(self, data, seed, numberProcesses): pschedulesGrouped = data.data.groupby(level=[0, 1], sort=False) verts = df(columns=self.colNames) verts[self.hidName] = pschedulesGrouped[self.hidName].min() verts[self.pidName] = pschedulesGrouped[self.pidName].min() verts[self.starttimeName] = pschedulesGrouped[self.starttimeName].min() verts[self.endtimeName] = pschedulesGrouped[self.endtimeName].max() return DataArray(verts.values, self.colNames, indexCols=[self.hidName, self.pidName]) """
def sample_choices(self, data, destLocSetInd, zoneLabels, count, sampleVarName, seed): destLocSetInd = destLocSetInd[:, 1:] #print 'number of choices - ', destLocSetInd.shape #raw_input() ti = time.time() for i in range(count): destLocSetIndSum = destLocSetInd.sum(-1) #print 'Number of choices', destLocSetIndSum probLocSet = (destLocSetInd.transpose() / destLocSetIndSum).transpose() zeroChoices = destLocSetIndSum.mask #print 'zero choices', zeroChoices if (~zeroChoices).sum() == 0: continue #probLocSet = probLocSet[zeroChoices,:] #print probLocSet.shape, len(zoneLabels), 'SHAPES -- <<' probDataArray = DataArray(probLocSet, zoneLabels) # seed is the count of the sampled destination starting with 1 probModel = AbstractProbabilityModel(probDataArray, self.projectSeed + seed + i) res = probModel.selected_choice() # Assigning the destination # We subtract -1 from the results that were returned because the # abstract probability model returns results indexed at 1 # actual location id = choice returned - 1 colName = '%s%s' % (sampleVarName, i + 1) nonZeroRows = where(res.data <> 0) #print 'SELECTED LOCATIONS FOR COUNT - ', i+1 #print res.data[:,0] #actualLocIds = res.data[nonZeroRows] #actualLocIds[nonZeroRows] -= 1 #print actualLocIds data.setcolumn(colName, res.data) #print data.columns([colName]).data[:,0] #raw_input() # Retrieving the row indices dataCol = data.columns([colName]).data rowIndices = array(xrange(dataCol.shape[0]), int) #rowIndices = 0 colIndices = res.data.astype(int) destLocSetInd.mask[rowIndices, colIndices - 1] = True
def returnTable(self, tableName, id_var, colNames): tableRef = self.returnTableReference(tableName) idVarColumn = tableRef.col(id_var) uniqueIds = unique(idVarColumn) table = zeros((max(uniqueIds) + 1, len(colNames))) for i in range(len(colNames)): table[uniqueIds, i] = tableRef.col(colNames[i]) #print table return DataArray(table, colNames), uniqueIds
def resolve_consistency(self, data, seed, numberProcesses): print "Number of splits - ", numberProcesses splits = split_df(data.data, houseidCol=self.hidName, workers=numberProcesses) args = [(split, seed, self.activityAttribs, self.dailyStatusAttribs, self.dependencyAttribs) for split in splits] resultList = [] resultList += resolve_by_multiprocessing( func=clean_aggregate_schedules, args=args, workers=numberProcesses) return DataArray(resultList, self.colNames) """
class TestInteractionModel(unittest.TestCase): def setUp(self): choice = ['age_tt_product'] coefficients = [{'age':1, 'tt':1}] data = array([[1,15],[2,10]]) self.data = DataArray(data, ['Age', 'TT']) self.specification = Specification(choice, coefficients) def testvalues(self): model = InteractionModel(self.specification) pred_value = model.calc_predvalue(self.data) pred_act = self.data.calculate_product(self.specification.coefficients[0]) pred_diff = all(pred_value.data[:,0] == pred_act) self.assertEqual(True, pred_diff)
def update_houseids(self, hhldSyn, persSyn, hhldVars, persVars, highestHid): hhldSynDataObj = DataArray(hhldSyn, hhldVars) persSynDataObj = DataArray(persSyn, persVars) maxFreqCol = amax(hhldSynDataObj.columns(['frequency']).data) powFreqCol = floor(log(maxFreqCol, 10)) + 1 coefficients = {'frequency':1, 'hhid':10**powFreqCol} newHid = hhldSynDataObj.calculate_equation(coefficients) hhldSynDataObj.setcolumn('hhid', newHid) newHid = persSynDataObj.calculate_equation(coefficients) persSynDataObj.setcolumn('hhid', newHid) hhldSynDataObj.sort([self.idSpec.hidName]) persSynDataObj.sort([self.idSpec.hidName, self.idSpec.pidName]) hidIndex_popgenH = hhldVars.index('hhid') hidIndex_popgenP = persVars.index('hhid') self.create_indices(persSynDataObj) hhldSyn = hhldSynDataObj.data persSyn = persSynDataObj.data row = 0 for hhldIndex in self.hhldIndicesOfPersons: firstPersonRec = hhldIndex[1] lastPersonRec = hhldIndex[2] #print hhldIndex[0], highestHid + 1, firstPersonRec, lastPersonRec hhldSyn[row,hidIndex_popgenH] = highestHid + 1 persSyn[firstPersonRec:lastPersonRec,hidIndex_popgenP] = highestHid + 1 highestHid += 1 row += 1 return hhldSyn, persSyn
def select_join(self, db_dict, column_names, table_names, max_dict=None, spatialConst_list=None, analysisInterval=None, subsample=None): """ self, table1_list, table2_list, column_name This method is used to select the join of tables and display them. Input: Database configuration object, table names, columns and values. DB_DICT {'households': ['htaz', 'numchild', 'inclt35k', 'hhsize'], 'persons': ['male', 'schstatus', 'one', 'houseid', 'personid'], 'schedule_r': ['scheduleid', 'activitytype']} COLUMN_NAMES {'households': ['houseid'], 'schedule_r': ['personid', 'houseid']} TABLE_NAMES ['persons', 'households', 'schedule_r'] MAX_DICT {'schedule_r': ['scheduleid']} SPATIALCONST_LIST [Table - travel_skims; Field - tt Orifin Field - origin; Destination Field - destination; Sample Field - destination Start Constraint - Table - schedule_r; Location Field - locationid; Time Field - endtime End Constraint - Table - schedule_r; Location Field - locationid; Time Field - starttime Number of choices - 5 Threshold for window - 120] ANALYSISINTERVAL 195 SUBSAMPLE None Test SQL String for above inputs: select households.htaz,households.numchild,households.inclt35k,households.hhsize,persons.male,persons.schstatus,persons.one, persons.houseid,persons.personid,schedule_r.activitytype,temp.scheduleid,stl.locationid as st_locationid,enl.locationid as en_locationid,sptime.st_endtime,sptime.en_starttime from persons left join households on ( persons.houseid=households.houseid ) left join (select personid,houseid, max(scheduleid) as scheduleid from schedule_r group by personid,houseid) as temp on ( temp.personid=persons.personid and temp.houseid=persons.houseid ) left join schedule_r on ( persons.personid=schedule_r.personid and persons.houseid=schedule_r.houseid and schedule_r.scheduleid=temp.scheduleid) join (select st.personid personid,st.houseid houseid, min(st.endtime) st_endtime,min(en.starttime) en_starttime from schedule_r as st inner join schedule_r as en on ( st.personid = en.personid and st.houseid = en.houseid and st.endtime=195 and en.starttime>195) group by st.personid,st.houseid) as sptime on ( sptime.personid = persons.personid and sptime.houseid = persons.houseid ) left join schedule_r stl on ( stl.personid = persons.personid and stl.houseid = persons.houseid and sptime.st_endtime = stl.endtime) left join schedule_r enl on ( enl.personid = persons.personid and enl.houseid = persons.houseid and sptime.en_starttime = enl.starttime) Output: Displays the rows based on the join and the selection criterion. """ self.dbcon_obj.new_sessionInstance() print 'DB_DICT', db_dict print 'COLUMN_NAMES', column_names print 'TABLE_NAMES', table_names print 'MAX_DICT', max_dict print 'SPATIALCONST_LIST', spatialConst_list print 'ANALYSISINTERVAL', analysisInterval print 'SUBSAMPLE', subsample #db_dict = {'households': ['urb', 'numchild', 'inclt35k', 'ownhome', 'one', 'drvrcnt', 'houseid'], # 'vehicles_r': ['vehtype', 'vehid'], # 'households_r': ['numvehs']} #columns_names = ['houseid'] #table_names = ['households', 'households_r', 'vehicles_r'] #max_dict = {'vehicles_r':['vehid']} # Prism Query or or just a Travel Time Query with Vertices # ADD APPROPRIATE JOIN/?INNER JOINS print 'Database Dictionary of Table and Columns - ', db_dict print 'Column Names', column_names #raw_input() #initialize the variables final_list = [] table_list = [] class_list = [] cols_list = [] tabs_list = [] #col_name = column_name final_col_list = db_dict.values() table_list = db_dict.keys() """ #check if the table exists. If not return none if chk_table.lower() in [each.lower() for each in table_list]: print 'table %s is present in the table list'%chk_table else: print 'table %s is not present in the table list'%chk_table return None """ #similarly check if the table in the list exists num_tab = len(list(set(table_list) & set(table_names))) if num_tab <= len(table_list): #print 'Tables exist' pass else: #print 'Tables do not exists' return None #check for the columns passed in the dictionary for i in db_dict.keys(): clist = self.dbcon_obj.get_column_list(i.lower()) list1 = db_dict[i] #print 'table--', i #print 'clist', clist #print 'list1', list1 chk_list = len(list(set(list1) & set(clist))) if chk_list == len(list1): for j in db_dict[i]: #print '\tColumn - ', j new_str = i.lower() + '.' + j.lower() final_list.append(new_str) else: print ('Column passed in the dictionary does not exist in the table - ') print 'Column List in the Table - ', clist print 'Actual List of Columns requested from table - ', list1 return None #print 'final_list is %s'%final_list #print 'FINAL LIST', final_list #print 'TABLE LIST', table_list # Generating the left join statements mainTable = table_names[0] #print 'mainTable ----> ', mainTable primCols = [] for i in column_names: primCols += column_names[i] primCols = list(set(primCols)) joinStrList = [] for table in table_list: if table == mainTable: continue joinCondition = '' for col in column_names[table]: joinCondition = (joinCondition + ' %s.%s=%s.%s ' %(mainTable, col, table, col) + 'and') joinCondition = joinCondition[:-3] joinStr = ' left join %s on (%s)' %(table, joinCondition) joinStrList.append(joinStr) #print 'JOIN STRING LIST', joinStrList #check the max flag if max_dict is not None: max_flag = 1 max_table = max_dict.keys() max_column = max_dict.values()[0][0] #print max_column, max_table #for each in max_dict.values(): # max_column = each[0] else: max_flag = 0 # Index of the table containing the max dict var # as it stands only querying for one count variable is # provided #print max_dict if max_dict is not None: maxTable = max_dict.keys()[0] maxColumn = max_dict.values()[0][0] index = table_list.index(maxTable) #print 'INDEX--->', index #remove the count column from the col list countVarStr = '%s.%s' %(maxTable, maxColumn) final_list.remove(countVarStr) final_list.append('temp.%s'%maxColumn) #print 'NEW FINAL LIST -->', final_list # left join for the count variable joinStr = '' #grouping string grpStr = '' joinCondition='' #print 'column_names of max TABLE ----->', column_names for i in column_names[maxTable]: #print 'createing join string for column name - ', i grpStr = grpStr + '%s,' %(i) joinCondition = (joinCondition + ' temp.%s=%s.%s ' %(i, mainTable, i) + 'and') grpStr = grpStr[:-1] joinCondition = joinCondition[:-3] #combine left join along with the count variable/max condition mJoinStr = joinStrList.pop(index-1) mJoinStrIncMaxConditionVar = (mJoinStr[:-1] + 'and %s.%s=temp.%s)' %(maxTable, maxColumn, maxColumn)) joinStrList.append(""" left join (select %s, max(%s) as %s from """ """%s group by %s) as temp on (%s) """ %(grpStr, maxColumn, maxColumn,maxTable, grpStr, joinCondition) + mJoinStrIncMaxConditionVar) #print 'LEFT JOIN MAX COL LIST--->', joinStrList # Spatial TSP identification if spatialConst_list is not None: for i in spatialConst_list: if i.countChoices is not None: # substring for the inner join stTable = i.startConstraint.table #stLocationField = 'st_' + i.startConstraint.locationField stLocationCol = 'stl.%s' %i.startConstraint.locationField #stTimeField = 'st_'+ i.startConstraint.timeField stTimeCol = 'st.%s' %i.startConstraint.timeField enTable = i.endConstraint.table #enLocationField = 'en_' + i.endConstraint.locationField enLocationCol = 'enl.%s' %i.endConstraint.locationField #enTimeField = 'en_' + i.endConstraint.timeField enTimeCol = 'en.%s' %i.endConstraint.timeField timeCols = [stTimeCol, enTimeCol] table_list.append(stTable) # left join for end location # time cols are part of sptime timeColsNewNames = [] for j in timeCols: timeColsNewNames.append(j.replace('.', '_')) timeColsStr = '' for j in range(len(timeCols)): # minimum of the time cols gives the first prism timeColsStr += 'min(%s) %s,' %(timeCols[j], timeColsNewNames[j]) timeColsStr = timeColsStr[:-1] spGrpNewNameStr = '' spGrpStr = '' for j in column_names[stTable]: spGrpNewNameStr += 'st.%s %s,' %(j, j) spGrpStr += 'st.%s,' %(j) spGrpNewNameStr = spGrpNewNameStr[:-1] spGrpStr = spGrpStr[:-1] spInnerJoinCondition = '' for j in column_names[stTable]: spInnerJoinCondition += ' %s.%s = %s.%s and' %('st', j, 'en', j) spInnerJoinCondition = spInnerJoinCondition[:-3] spJoinCondition = '' for j in column_names[stTable]: spJoinCondition += ' %s.%s = %s.%s and' %('sptime', j, mainTable, j) spJoinCondition = spJoinCondition[:-3] # Left join condition for prism start location stLocJoinCondition = '' stLocCondCols = column_names[stTable] for j in stLocCondCols: stLocJoinCondition += ' %s.%s = %s.%s and' %('stl', j, mainTable, j) stLocJoinCondition += ' sptime.st_%s = %s.%s' %(i.startConstraint.timeField, 'stl', i.startConstraint.timeField) final_list.append('stl.%s as st_%s' %(i.startConstraint.locationField, i.startConstraint.locationField)) cols_list.append('st_%s' %i.startConstraint.locationField) #stLocJoinCondition = stLocJoinCondition[:-3] # Left join condition for prism end location enLocJoinCondition = '' enLocCondCols = column_names[stTable] for j in enLocCondCols: enLocJoinCondition += ' %s.%s = %s.%s and' %('enl', j, mainTable, j) enLocJoinCondition += ' sptime.en_%s = %s.%s' %(i.endConstraint.timeField, 'enl', i.endConstraint.timeField) final_list.append('enl.%s as en_%s' %(i.endConstraint.locationField, i.endConstraint.locationField)) cols_list.append('en_%s' %i.endConstraint.locationField) #enLocJoinCondition = enLocJoinCondition[:-3] # TSP consistency check # nextepisode_starttime > lastepisode_endtime #consistencyStr = '%s < %s' %(stTimeCol, endTimeCol) analysisPeriodStr = ('%s=%s and %s>%s' %(stTimeCol, analysisInterval, enTimeCol, analysisInterval)) spatialJoinStr = (""" join (select %s, %s """\ """from %s as %s """\ """inner join %s as %s """\ """on ( %s and %s) group by"""\ """ %s) """\ """as sptime on (%s)""" % (spGrpNewNameStr, timeColsStr, stTable, 'st', enTable, 'en', spInnerJoinCondition, analysisPeriodStr, spGrpStr, spJoinCondition)) #print 'SPATIAL JOIN' #print spatialJoinStr # left join for start location stLocJoinStr = (""" left join %s %s on """\ """(%s) """ %(stTable, 'stl', stLocJoinCondition)) enLocJoinStr = (""" left join %s %s on """\ """(%s) """ %(enTable, 'enl', enLocJoinCondition)) joinStrList.append(spatialJoinStr) joinStrList.append(stLocJoinStr) joinStrList.append(enLocJoinStr) cols_list += timeColsNewNames for i in timeColsNewNames: final_list.append('sptime.%s' %(i)) # Only one time-space prism can be retrieved within a component # there cannot be two TSP's in the same component break # Generating the col list colStr = '' for i in final_list: colStr = colStr + '%s,' %(i) colStr = colStr[:-1] # Build the SQL string allJoinStr = '' for i in joinStrList: allJoinStr = allJoinStr + '%s' %i sql_string = 'select %s from %s %s' %(colStr, mainTable, allJoinStr) print 'SQL string for query - ', sql_string #convert all the table names to upper case for each in table_list: tabs_list.append(each.upper()) #print 'tabs_list is %s'%tabs_list #separate all the columns from the lists new_keys = db_dict.keys() for i in new_keys: cols_list = cols_list + db_dict[i] #print 'cols_list is %s'%cols_list try: sample_str = '' ctr = 0 for i in tabs_list: if ctr==0: sample_str = i ctr = ctr + 1 else: sample_str = sample_str + ', ' + i query = self.dbcon_obj.session.query((sample_str)) #print 'sample_str is %s'%sample_str result = query.from_statement(sql_string).values(*cols_list) resultArray = self.createResultArray(result) # Returns the query as a DataArray object data = DataArray(resultArray, cols_list) data.sort(primCols) self.dbcon_obj.close_sessionInstance() return data except Exception, e: print e print 'Error retrieving the information. Query failed.'
def setUp(self): self.data = genfromtxt( "/home/kkonduri/simtravel/test/mag_zone/schedule_txt.csv", delimiter=",", dtype=int) colNames = ['houseid', 'personid', 'scheduleid', 'activitytype', 'locationid', 'starttime', 'endtime', 'duration'] self.actSchedules = DataArray(self.data, colNames)