def main(): # get and aggregate accessibility from cube using import_acc_results.py file # TARGETS = [12, 35, 55, 71, 75, 82, 86, 87, 88, 106, 108, 115, 121, 231, 241, 247, 256, 258, 260, 261, 676, 730, 733, 1231, 1548] #first set of Cube runs TARGETS = [ 20, 33, 36, 137, 142, 143, 144, 151, 152, 159, 166, 167, 171, 173, 183, 184, 192, 193, 194, 196, 205, 1676, 1692, 2851, 2914, ] # data within: 12-Dec-2013_12_3909_50_0.55556_25.mat #indices between 0 and 2110. the scenarios for which you want to save the damaged bridge data # TARGETS = [20] # y = aggregate_accessibility(TARGETS) # util.write_list(time.strftime("%Y%m%d")+'_accessTot.txt', y) y = [ 18.2339128119, 18.2338120181, 18.2338952366, 18.2338109314, 18.2270352566, 18.2177845713, 18.1998501612, 18.2177377231, 18.233770681, 18.2261430987, 18.1691203163, 18.1849249099, 18.2141010264, 18.2139231104, 18.23383158091398, 18.2253745585, 18.2155757901, 18.2012935522, 18.2138556128, 18.1758345198, 18.226103683, 18.2338211763, 18.2260523679, 18.2339486092, 18.2215360497, ] weights = get_scenario_weights("12-Dec-2013_12_3909_50_0.55556_25_weights.csv") # get general x values. These are the various welfare metrics. the_filename = "/Users/mahalia/ita/20131212_bridges_flow_path_tt_vmt_bridges1eps_extensive2.txt" new_x = freq_svm.build_x(TARGETS, the_filename) the_filename_full = ( "/Users/mahalia/ita/20131212_bridges_flow_path_tt_vmt_bridges3eps_extensive.txt" ) # indices in the first column start at 0 x_for_predicting = freq_svm.build_x(range(1, 11728), the_filename_full) # x_for_predicting = freq_svm.build_x(range(1, 3092), the_filename_full) the_x = np.vstack((new_x, x_for_predicting)) the_x = preprocessing.scale(the_x) new_x = the_x[0 : new_x.shape[0], :] x_for_predicting = the_x[new_x.shape[0] :, :] print "built baby x" # pick threshold. Above this y value, the data is called a "match" and below is a "miss". For frequent itemsets, we'll be doing frequent items ONLY among the items predicted as a match so VORSICHT! target_annual_rate = 0.002 # 1 in 475 years threshold = freq_svm.identify_threshold(target_annual_rate, y, weights) print "by my method I find the threshold to be: ", threshold threshold = ( 18.19933616 ) # from the Matlab script called cubeAnalysiswDamagedTransit.m for 475 year return period #18.2139 #75th quantile print "I think the threshold is: ", threshold # label events above threshold as match and below as miss match_label = 1 miss_label = 0 # for purposes of acesibility, low is bad so these are the true high loss cases new_y = freq_svm.label( y, threshold, match_label, miss_label ) # less than threshold is miss label. So, this puts high loss in accessibility as miss (lower value) print "new_ y: ", new_y # should be mostly 1's # ############################ # h = .02 # step size in the mesh # # we create an instance of SVM and fit out data. We do not scale our # # data since we want to plot the support vectors # C = 1.0 # SVM regularization parameter # svc = svm.SVC(kernel='linear', C=C, class_weight='auto').fit(new_x, new_y) # rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C, class_weight='auto').fit(new_x, new_y) # poly_svc = svm.SVC(kernel='poly', degree=3, C=C, class_weight='auto').fit(new_x, new_y) # lin_svc = svm.LinearSVC(C=C, class_weight='auto').fit(new_x, new_y) # X = new_x.copy() # # create a mesh to plot in # x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 # y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 # xx, yy = np.meshgrid(np.arange(x_min, x_max, h), # np.arange(y_min, y_max, h)) # # title for the plots # titles = ['SVC with linear kernel', # 'SVC with RBF kernel', # 'SVC with polynomial (degree 3) kernel', # 'LinearSVC (linear kernel)'] # for i, clf in enumerate((svc, rbf_svc, poly_svc, lin_svc)): # # Plot the decision boundary. For that, we will assign a color to each # # point in the mesh [x_min, m_max]x[y_min, y_max]. # pl.subplot(2, 2, i + 1) # Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) # # Put the result into a color plot # Z = Z.reshape(xx.shape) # pl.contourf(xx, yy, Z, cmap=pl.cm.Paired) # pl.axis('off') # pl.xlabel('Percentage increase of bridges damaged (normalized)') # pl.ylabel('Percentage incrase of travel time (normlized') # # Plot also the training points # pl.scatter(X[:, 0], X[:, 1], c=new_y, cmap=pl.cm.Paired) # #plot also the prediction # y_pred = clf.predict(x_for_predicting) # pl.scatter(x_for_predicting[:, 0], x_for_predicting[:, 1], c= y_pred, marker='^', cmap = pl.cm.Paired) # pl.title(titles[i]) # pl.savefig('/Users/mahalia/Dropbox/research/dailyWriting/bridges/classificationComp.png') # #################### # #train SVM svm_object = freq_svm.train(new_x, new_y, "auto") # {0:1, 1:1}) ######Done using Cube results. Now just use ITA results....##### # use trained svm to predict values from large set # print 'built x' y_pred = freq_svm.predict(x_for_predicting, svm_object) # y_pred = [] # for i in range(11727): # y_pred.append(0) util.write_list(time.strftime("%Y%m%d") + "_predictedY.txt", y_pred) # count up annual rates for each bridge in the list when event predicted as match miss_indices = [] for index, value in enumerate(y_pred): if value == miss_label: # high loss means low accessibility, which means miss miss_indices.append(index + 1) # matlab indices starting from 1 print 'we have this many "misses"=="predicted high loss cases": ', len(miss_indices) item_indices = range(3152) # 1743 highway bridges and 1409 bart structures with open("20131212_3eps_damagedBridges.pkl", "rb") as f: list_of_baskets = pkl.load( f ) # this has list of bridge indices (MATLAB INDICES that start from 1) that are damaged # for basket in list_of_baskets: # if '609' in basket: # print 'found one: ', basket lnsas, weights = travel_main_simple_simplev3.ground_motions( 3, 0.00001, "input/SF2_mtc_total_3909scenarios_1743bridgesPlusBART_3eps.txt" ) support_list = get_support(weights, miss_indices, item_indices, list_of_baskets) # output the sum of weights of scenarios where each bridge was damanged to plot in matlab. First column is counter stsarting at 1. second column is support util.write_2dlist(time.strftime("%Y%m%d") + "_bridgeIndex_support.txt", support_list) pdb.set_trace()
def main_tt(): print "chin up" # get and aggregate travel time # get general x values. These are the various welfare metrics. the_filename_full = ( "/Users/mahalia/ita/20131212_bridges_flow_path_tt_vmt_bridges3eps_extensive.txt" ) # indices in the first column start at 0 x_raw = freq_svm.build_x(range(1, 11728), the_filename_full) the_x = preprocessing.scale([[row[0]] for row in x_raw]) the_y = np.array([row[1] for row in x_raw]) break_point = 9383 new_x = np.array(the_x[0:break_point]) # 80% x_for_predicting = the_x[break_point:] # 20% y = np.array([row[1] for row in x_raw[0:break_point, :]]) # should be as big as the training dataset numeps = 3 # the number of epsilons tol = ( 0.00001 ) # the minimum annual rate that you care about in the original event set (the weight now is the original annual rate / number of epsilons per event) lnsas, full_weights = travel_main_simple_simplev3.ground_motions( numeps, tol, "/Users/mahalia/Documents/matlab/Research/Herbst2011/output_data/SF2_mtc_total_3909scenarios_1743bridgesPlusBART_3eps.txt", ) weights = full_weights[0:break_point] print "built baby x" # pick threshold. Above this y value, the data is called a "match" and below is a "miss". For frequent itemsets, we'll be doing frequent items ONLY among the items predicted as a match so VORSICHT! target_annual_rate = 0.002 # 1 in 475 years threshold = freq_svm.identify_threshold(target_annual_rate, y, weights) print "i thought: ", threshold threshold = ( 346420000 ) # 18.19933616 #from the Matlab script called cubeAnalysiswDamagedTransit.m for 475 year return period #18.2139 #75th quantile print "I think the threshold is: ", threshold # label events above threshold as match and below as miss match_label = 1 miss_label = 0 # for purposes of accesibility, low is bad so these are the true high loss cases new_y = np.array(freq_svm.label(y, threshold, match_label, miss_label)) print "new_ y: ", new_y # ############################ # h = .02 # step size in the mesh # # we create an instance of SVM and fit out data. We do not scale our # # data since we want to plot the support vectors # C = 1.0 # SVM regularization parameter # svc = svm.SVC(kernel='linear', C=C, class_weight='auto').fit(new_x, new_y) # rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C, class_weight='auto').fit(new_x, new_y) # poly_svc = svm.SVC(kernel='poly', degree=3, C=C, class_weight='auto').fit(new_x, new_y) # lin_svc = svm.LinearSVC(C=C, class_weight='auto').fit(new_x, new_y) # X = new_x.copy() # # create a mesh to plot in # x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 # y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 # xx, yy = np.meshgrid(np.arange(x_min, x_max, h), # np.arange(y_min, y_max, h)) # # title for the plots # titles = ['SVC with linear kernel', # 'SVC with RBF kernel', # 'SVC with polynomial (degree 3) kernel', # 'LinearSVC (linear kernel)'] # for i, clf in enumerate((svc, rbf_svc, poly_svc, lin_svc)): # # Plot the decision boundary. For that, we will assign a color to each # # point in the mesh [x_min, m_max]x[y_min, y_max]. # pl.subplot(2, 2, i + 1) # Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) # # Put the result into a color plot # Z = Z.reshape(xx.shape) # pl.contourf(xx, yy, Z, cmap=pl.cm.Paired) # pl.axis('off') # pl.xlabel('Percentage increase of bridges damaged (normalized)') # pl.ylabel('Percentage incrase of travel time (normlized') # # Plot also the training points # pl.scatter(X[:, 0], X[:, 1], c=new_y, cmap=pl.cm.Paired) # #plot also the prediction # y_pred = clf.predict(x_for_predicting) # pl.scatter(x_for_predicting[:, 0], x_for_predicting[:, 1], c= y_pred, marker='^', cmap = pl.cm.Paired) # pl.title(titles[i]) # pl.savefig('/Users/mahalia/Dropbox/research/dailyWriting/bridges/classificationComp.png') # #################### # #train SVM print new_x.shape print new_y.shape svm_object = freq_svm.train(new_x, new_y, "auto") # {0:1, 1:1}) ######Done using Cube results. Now just use ITA results....##### # use trained svm to predict values from large set # print 'built x' y_pred = freq_svm.predict(x_for_predicting, svm_object) # y_pred = [] # for i in range(11727): # y_pred.append(0) util.write_list(time.strftime("%Y%m%d") + "_predictedY_tt.txt", y_pred) y_test_raw = [row[1] for row in x_raw[break_point:, :]] y_test = freq_svm.label(y_test_raw, threshold, match_label, miss_label) y_tot_raw = [row[1] for row in x_raw] y_tot = freq_svm.label(y_tot_raw, threshold, match_label, miss_label) util.write_list(time.strftime("%Y%m%d") + "_actualY_tt.txt", y_test) print (classification_report(y_test, y_pred)) print (confusion_matrix(y_test, y_pred, labels=range(2))) scores = cross_validation.cross_val_score( svm_object, the_x, freq_svm.label(the_y, threshold, match_label, miss_label), cv=3 ) print ("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) # count up annual rates for each bridge in the list when event predicted as match miss_indices = [] for index, value in enumerate(y_tot): # cheating and just using the actual data instead of predicted one if value == miss_label: miss_indices.append(index + 1) # matlab indices starting from 1 print 'we have this many "misses"=="predicted high loss cases": ', len(miss_indices) item_indices = range(3152) # 1743 highway bridges and 1409 bart structures with open("20131212_3eps_damagedBridges.pkl", "rb") as f: list_of_baskets = pkl.load( f ) # this has list of bridge indices (MATLAB INDICES that start from 1) that are damaged lnsas, weights = travel_main_simple_simplev3.ground_motions( 3, 0.00001, "input/SF2_mtc_total_3909scenarios_1743bridgesPlusBART_3eps.txt" ) support_list = get_support(weights, miss_indices, item_indices, list_of_baskets) # output the sum of weights of scenarios where each bridge was damanged to plot in matlab. First column is counter stsarting at 1. second column is support util.write_2dlist(time.strftime("%Y%m%d") + "_bridgeIndex_support_tt.txt", support_list) pdb.set_trace()