def compute_prob_correlation(self, pij, ptij, phij, index, resources): chosen_choice_dummy = resources['chosen_choice'] indices = unique(index.ravel()) correlation = None for j in indices: w = where(index == j) pj = pij[w] ptj = ptij[w] phj = phij[w] sj = chosen_choice_dummy[w] #import pdb; pdb.set_trace() cor = corr(pj, ptj, phj, sj)[([0,0,0,1,1,2],[1,2,3,2,3,3])][newaxis,:] #take the upper triangle if correlation is None: correlation = cor else: correlation = concatenate((correlation, cor), axis=0) return (indices, correlation)
def compute_prob_correlation(self, pij, ptij, phij, index, resources): chosen_choice_dummy = resources['chosen_choice'] indices = unique(index.ravel()) correlation = None for j in indices: w = where(index == j) pj = pij[w] ptj = ptij[w] phj = phij[w] sj = chosen_choice_dummy[w] #import pdb; pdb.set_trace() cor = corr(pj, ptj, phj, sj)[([0, 0, 0, 1, 1, 2], [1, 2, 3, 2, 3, 3])][newaxis, :] #take the upper triangle if correlation is None: correlation = cor else: correlation = concatenate((correlation, cor), axis=0) return (indices, correlation)
def run(self, data, upc_sequence, resources=None): self.mnl_probabilities=upc_sequence.probability_class self.bhhh_estimation = bhhh_mnl_estimation() modified_upc_sequence = UPCFactory().get_model( utilities=None, probabilities="opus_core.mnl_probabilities", choices=None) modified_upc_sequence.utility_class = upc_sequence.utility_class N, neqs, V = data.shape max_iter = resources.get("max_iterations", 100) # default sc = SessionConfiguration() dataset_pool = sc.get_dataset_pool() sample_rate = dataset_pool.get_dataset("sample_rate") CLOSE = sc["CLOSE"] info_filename = sc["info_file"] info_filename = os.path.join('.', info_filename) info_file = open(info_filename, "a") constraint_dict = {1:'constrained', 0:'unconstrained'} swing_cases_fix = 0 #set swing alternatives to constrained (1) or unconstrained (0) prob_correlation = None choice_set = resources['_model_'].choice_set J = choice_set.size() alt_id = choice_set.get_id_attribute() movers = choice_set.get_attribute('movers') resources.check_obligatory_keys(["capacity_string"]) supply = choice_set.get_attribute(resources["capacity_string"]) index = resources.get("index", None) if index is None: # no sampling case, alternative set is the full choice_set index = arange(J) if index.ndim <= 1: index = repeat(index[newaxis,:], N, axis=0) if resources.get('aggregate_to_dataset', None): aggregate_dataset = dataset_pool.get_dataset(resources.get('aggregate_to_dataset')) choice_set_aggregate_id = choice_set.get_attribute(aggregate_dataset.get_id_name()[0]) index = aggregate_dataset.get_id_index(choice_set_aggregate_id[index].ravel()).reshape(index.shape) supply = aggregate_dataset.get_attribute(resources["capacity_string"]) J = aggregate_dataset.size() movers = aggregate_dataset.get_attribute("movers") demand_history = movers[:, newaxis] resources.merge({"index":index}) pi = ones(index.shape, dtype=float32) #initialize pi #average_omega = ones(J,dtype=float32) #initialize average_omega logger.start_block('Outer Loop') for i in range(max_iter): logger.log_status('Outer Loop Iteration %s' % i) result = self.bhhh_estimation.run(data, modified_upc_sequence, resources) del self.bhhh_estimation; collect() self.bhhh_estimation = bhhh_mnl_estimation() probability = modified_upc_sequence.get_probabilities() if data.shape[2] == V: #insert a placeholder for ln(pi) in data data = concatenate((data,ones((N,neqs,1),dtype=float32)), axis=2) coef_names = resources.get("coefficient_names") coef_names = concatenate( (coef_names, array(["ln_pi"])) ) resources.merge({"coefficient_names":coef_names}) else: beta_ln_pi = result['estimators'][where(coef_names == 'ln_pi')][0] logger.log_status("mu = 1/%s = %s" % (beta_ln_pi, 1/beta_ln_pi)) prob_hat = safe_array_divide(probability, pi ** beta_ln_pi) #prob_hat = safe_array_divide(probability, pi) prob_hat_sum = prob_hat.sum(axis=1, dtype=float32) if not ma.allclose(prob_hat_sum, 1.0): logger.log_status("probability doesn't sum up to 1, with minimum %s, and maximum %s" % (prob_hat_sum.min(), prob_hat_sum.max())) probability = normalize(prob_hat) demand = self.mnl_probabilities.get_demand(index, probability, J) * 1 / sample_rate demand_history = concatenate((demand_history, demand[:, newaxis]), axis=1) sdratio = safe_array_divide(supply, demand, return_value_if_denominator_is_zero=2.0) sdratio_matrix = sdratio[index] ## debug info from numpy import histogram from opus_core.misc import unique cc = histogram(index.ravel(), unique(index.ravel()))[0] logger.log_status( "=================================================================") logger.log_status( "Probability min: %s, max: %s" % (probability.min(), probability.max()) ) logger.log_status( "Demand min: %s, max: %s" % (demand.min(), demand.max()) ) logger.log_status( "sdratio min: %s, max: %s" % (sdratio.min(), sdratio.max()) ) logger.log_status( "demand[sdratio==sdratio.min()]=%s" % demand[sdratio==sdratio.min()] ) logger.log_status( "demand[sdratio==sdratio.max()]=%s" % demand[sdratio==sdratio.max()] ) logger.log_status( "Counts of unique submarkets in alternatives min: %s, max: %s" % (cc.min(), cc.max()) ) logger.log_status( "=================================================================") constrained_locations_matrix, omega, info = self.inner_loop(supply, demand, probability, index, sdratio_matrix, J, max_iteration=max_iter) inner_iterations, constrained_locations_history, swing_index, average_omega_history = info for idx in swing_index: logger.log_status("swinging alt with id %s set to %s" % (alt_id[idx], constraint_dict[swing_cases_fix])) constrained_locations_matrix[index==idx] = swing_cases_fix if swing_index.size > 0: info_file.write("swing of constraints found with id %s \n" % alt_id[swing_index]) info_file.write("outer_iteration, %i, " % i + ", ".join([str(i)]*(len(inner_iterations))) + "\n") info_file.write("inner_iteration, , " + ", ".join(inner_iterations) + "\n") info_file.write("id, sdratio, " + ", ".join(["avg_omega"]*len(inner_iterations)) + "\n") for idx in swing_index: line = str(alt_id[idx]) + ',' line += str(sdratio[idx]) + ',' line += ",".join([str(x) for x in average_omega_history[idx,]]) line += "\n" info_file.write(line) info_file.write("\n") info_file.flush() outer_iterations = [str(i)] * len(inner_iterations) prob_min = [str(probability.min())] * len(inner_iterations) prob_max = [str(probability.max())] * len(inner_iterations) pi_new = self.mnl_probabilities.get_pi(sdratio_matrix, omega, constrained_locations_matrix) data[:,:,-1] = ln(pi_new) #diagnostic output if not ma.allclose(pi, pi_new, atol=CLOSE): if i > 0: #don't print this for the first iteration logger.log_status("min of abs(pi(l+1) - pi(l)): %s" % absolute(pi_new - pi).min()) logger.log_status("max of abs(pi(l+1) - pi(l)): %s" % absolute(pi_new - pi).max()) logger.log_status("mean of pi(l+1) - pi(l): %s" % (pi_new - pi).mean()) logger.log_status('Standard Deviation pi(l+1) - pi(l): %s' % standard_deviation(pi_new - pi)) logger.log_status('correlation of pi(l+1) and pi(l): %s' % corr(pi_new.ravel(), pi.ravel())[0,1]) pi = pi_new probability_old = probability # keep probability of the previous loop, for statistics computation only else: #convergence criterion achieved, quiting outer loop logger.log_status("pi(l) == pi(l+1): Convergence criterion achieved") info_file.write("\nConstrained Locations History:\n") info_file.write("outer_iteration," + ",".join(outer_iterations) + "\n") info_file.write("inner_iteration," + ",".join(inner_iterations) + "\n") info_file.write("minimum_probability," + ",".join(prob_min) + "\n") info_file.write("maximum_probability," + ",".join(prob_max) + "\n") for row in range(J): line = [str(x) for x in constrained_locations_history[row,]] info_file.write(str(alt_id[row]) + "," + ",".join(line) + "\n") info_file.flush() info_file.write("\nDemand History:\n") i_str = [str(x) for x in range(i)] info_file.write("outer_iteration, (movers)," + ",".join(i_str) + "\n") #info_file.write(", ,\n") for row in range(J): line = [str(x) for x in demand_history[row,]] info_file.write(str(alt_id[row]) + "," + ",".join(line) + "\n") demand_history_info_criteria = [500, 100, 50, 20] for criterion in demand_history_info_criteria: com_rows_index = where(movers <= criterion)[0] info_file.write("\nDemand History for alternatives with less than or equal to %s movers in 1998:\n" % criterion) i_str = [str(x) for x in range(i)] info_file.write("outer_iteration, (movers)," + ",".join(i_str) + "\n") #info_file.write(", movers,\n") for row in com_rows_index: line = [str(x) for x in demand_history[row,]] info_file.write(str(alt_id[row]) + "," + ",".join(line) + "\n") #import pdb; pdb.set_trace() #export prob correlation history correlation_indices, prob_correlation = self.compute_prob_correlation(probability_old, probability, prob_hat, index, resources) info_file.write("\nCorrelation of Probabilities:\n") c_name = ['corr(p_ij p~_ij)', 'corr(p_ij p^_ij)', 'corr(p_ij dummy)', 'corr(p~_ij p^_ij)', 'corr(p~_ij dummy)', 'corr(p^_ij dummy)'] info_file.write("com_id, " + ",".join(c_name) + "\n") #info_file.write(", ,\n") for row in range(correlation_indices.size): line = [str(x) for x in prob_correlation[row,]] info_file.write(str(alt_id[correlation_indices[row]]) + "," + ",".join(line) + "\n") info_file.close() result['pi'] = pi return result logger.end_block() try:info_file.close() except:pass raise RuntimeError, "max iteration reached without convergence."
def run(self, data, upc_sequence, resources=None): self.mnl_probabilities = upc_sequence.probability_class self.bhhh_estimation = bhhh_mnl_estimation() modified_upc_sequence = UPCFactory().get_model( utilities=None, probabilities="opus_core.mnl_probabilities", choices=None) modified_upc_sequence.utility_class = upc_sequence.utility_class N, neqs, V = data.shape max_iter = resources.get("max_iterations", 100) # default sc = SessionConfiguration() dataset_pool = sc.get_dataset_pool() sample_rate = dataset_pool.get_dataset("sample_rate") CLOSE = sc["CLOSE"] info_filename = sc["info_file"] info_filename = os.path.join('.', info_filename) info_file = open(info_filename, "a") constraint_dict = {1: 'constrained', 0: 'unconstrained'} swing_cases_fix = 0 #set swing alternatives to constrained (1) or unconstrained (0) prob_correlation = None choice_set = resources['_model_'].choice_set J = choice_set.size() alt_id = choice_set.get_id_attribute() movers = choice_set.get_attribute('movers') resources.check_obligatory_keys(["capacity_string"]) supply = choice_set.get_attribute(resources["capacity_string"]) index = resources.get("index", None) if index is None: # no sampling case, alternative set is the full choice_set index = arange(J) if index.ndim <= 1: index = repeat(index[newaxis, :], N, axis=0) if resources.get('aggregate_to_dataset', None): aggregate_dataset = dataset_pool.get_dataset( resources.get('aggregate_to_dataset')) choice_set_aggregate_id = choice_set.get_attribute( aggregate_dataset.get_id_name()[0]) index = aggregate_dataset.get_id_index( choice_set_aggregate_id[index].ravel()).reshape(index.shape) supply = aggregate_dataset.get_attribute( resources["capacity_string"]) J = aggregate_dataset.size() movers = aggregate_dataset.get_attribute("movers") demand_history = movers[:, newaxis] resources.merge({"index": index}) pi = ones(index.shape, dtype=float32) #initialize pi #average_omega = ones(J,dtype=float32) #initialize average_omega logger.start_block('Outer Loop') for i in range(max_iter): logger.log_status('Outer Loop Iteration %s' % i) result = self.bhhh_estimation.run(data, modified_upc_sequence, resources) del self.bhhh_estimation collect() self.bhhh_estimation = bhhh_mnl_estimation() probability = modified_upc_sequence.get_probabilities() if data.shape[2] == V: #insert a placeholder for ln(pi) in data data = concatenate((data, ones((N, neqs, 1), dtype=float32)), axis=2) coef_names = resources.get("coefficient_names") coef_names = concatenate((coef_names, array(["ln_pi"]))) resources.merge({"coefficient_names": coef_names}) else: beta_ln_pi = result['estimators'][where( coef_names == 'ln_pi')][0] logger.log_status("mu = 1/%s = %s" % (beta_ln_pi, 1 / beta_ln_pi)) prob_hat = safe_array_divide(probability, pi**beta_ln_pi) #prob_hat = safe_array_divide(probability, pi) prob_hat_sum = prob_hat.sum(axis=1, dtype=float32) if not ma.allclose(prob_hat_sum, 1.0): logger.log_status( "probability doesn't sum up to 1, with minimum %s, and maximum %s" % (prob_hat_sum.min(), prob_hat_sum.max())) probability = normalize(prob_hat) demand = self.mnl_probabilities.get_demand(index, probability, J) * 1 / sample_rate demand_history = concatenate((demand_history, demand[:, newaxis]), axis=1) sdratio = safe_array_divide( supply, demand, return_value_if_denominator_is_zero=2.0) sdratio_matrix = sdratio[index] ## debug info from numpy import histogram from opus_core.misc import unique cc = histogram(index.ravel(), unique(index.ravel()))[0] logger.log_status( "=================================================================" ) logger.log_status("Probability min: %s, max: %s" % (probability.min(), probability.max())) logger.log_status("Demand min: %s, max: %s" % (demand.min(), demand.max())) logger.log_status("sdratio min: %s, max: %s" % (sdratio.min(), sdratio.max())) logger.log_status("demand[sdratio==sdratio.min()]=%s" % demand[sdratio == sdratio.min()]) logger.log_status("demand[sdratio==sdratio.max()]=%s" % demand[sdratio == sdratio.max()]) logger.log_status( "Counts of unique submarkets in alternatives min: %s, max: %s" % (cc.min(), cc.max())) logger.log_status( "=================================================================" ) constrained_locations_matrix, omega, info = self.inner_loop( supply, demand, probability, index, sdratio_matrix, J, max_iteration=max_iter) inner_iterations, constrained_locations_history, swing_index, average_omega_history = info for idx in swing_index: logger.log_status( "swinging alt with id %s set to %s" % (alt_id[idx], constraint_dict[swing_cases_fix])) constrained_locations_matrix[index == idx] = swing_cases_fix if swing_index.size > 0: info_file.write("swing of constraints found with id %s \n" % alt_id[swing_index]) info_file.write("outer_iteration, %i, " % i + ", ".join([str(i)] * (len(inner_iterations))) + "\n") info_file.write("inner_iteration, , " + ", ".join(inner_iterations) + "\n") info_file.write("id, sdratio, " + ", ".join(["avg_omega"] * len(inner_iterations)) + "\n") for idx in swing_index: line = str(alt_id[idx]) + ',' line += str(sdratio[idx]) + ',' line += ",".join( [str(x) for x in average_omega_history[idx, ]]) line += "\n" info_file.write(line) info_file.write("\n") info_file.flush() outer_iterations = [str(i)] * len(inner_iterations) prob_min = [str(probability.min())] * len(inner_iterations) prob_max = [str(probability.max())] * len(inner_iterations) pi_new = self.mnl_probabilities.get_pi( sdratio_matrix, omega, constrained_locations_matrix) data[:, :, -1] = ln(pi_new) #diagnostic output if not ma.allclose(pi, pi_new, atol=CLOSE): if i > 0: #don't print this for the first iteration logger.log_status("min of abs(pi(l+1) - pi(l)): %s" % absolute(pi_new - pi).min()) logger.log_status("max of abs(pi(l+1) - pi(l)): %s" % absolute(pi_new - pi).max()) logger.log_status("mean of pi(l+1) - pi(l): %s" % (pi_new - pi).mean()) logger.log_status( 'Standard Deviation pi(l+1) - pi(l): %s' % standard_deviation(pi_new - pi)) logger.log_status('correlation of pi(l+1) and pi(l): %s' % corr(pi_new.ravel(), pi.ravel())[0, 1]) pi = pi_new probability_old = probability # keep probability of the previous loop, for statistics computation only else: #convergence criterion achieved, quiting outer loop logger.log_status( "pi(l) == pi(l+1): Convergence criterion achieved") info_file.write("\nConstrained Locations History:\n") info_file.write("outer_iteration," + ",".join(outer_iterations) + "\n") info_file.write("inner_iteration," + ",".join(inner_iterations) + "\n") info_file.write("minimum_probability," + ",".join(prob_min) + "\n") info_file.write("maximum_probability," + ",".join(prob_max) + "\n") for row in range(J): line = [ str(x) for x in constrained_locations_history[row, ] ] info_file.write( str(alt_id[row]) + "," + ",".join(line) + "\n") info_file.flush() info_file.write("\nDemand History:\n") i_str = [str(x) for x in range(i)] info_file.write("outer_iteration, (movers)," + ",".join(i_str) + "\n") #info_file.write(", ,\n") for row in range(J): line = [str(x) for x in demand_history[row, ]] info_file.write( str(alt_id[row]) + "," + ",".join(line) + "\n") demand_history_info_criteria = [500, 100, 50, 20] for criterion in demand_history_info_criteria: com_rows_index = where(movers <= criterion)[0] info_file.write( "\nDemand History for alternatives with less than or equal to %s movers in 1998:\n" % criterion) i_str = [str(x) for x in range(i)] info_file.write("outer_iteration, (movers)," + ",".join(i_str) + "\n") #info_file.write(", movers,\n") for row in com_rows_index: line = [str(x) for x in demand_history[row, ]] info_file.write( str(alt_id[row]) + "," + ",".join(line) + "\n") #import pdb; pdb.set_trace() #export prob correlation history correlation_indices, prob_correlation = self.compute_prob_correlation( probability_old, probability, prob_hat, index, resources) info_file.write("\nCorrelation of Probabilities:\n") c_name = [ 'corr(p_ij p~_ij)', 'corr(p_ij p^_ij)', 'corr(p_ij dummy)', 'corr(p~_ij p^_ij)', 'corr(p~_ij dummy)', 'corr(p^_ij dummy)' ] info_file.write("com_id, " + ",".join(c_name) + "\n") #info_file.write(", ,\n") for row in range(correlation_indices.size): line = [str(x) for x in prob_correlation[row, ]] info_file.write( str(alt_id[correlation_indices[row]]) + "," + ",".join(line) + "\n") info_file.close() result['pi'] = pi return result logger.end_block() try: info_file.close() except: pass raise RuntimeError, "max iteration reached without convergence."