Exemple #1
0
def pick_scenarios(lnsas, weights, multi=True):
  scenarios = []
  wout = []
  index = 0
  easy = True #whether to just take scenarios that are of engineering interest or do some complicated other thing
  print 'length of lnsas: ', len(lnsas)
  print 'length of weights: ', len(weights)
  numeps = int(round(len(lnsas)/4993.0))
  print 'numeps: ', numeps
  wsum = 0
  if easy == True:
    print 'easy'
    print 0.00001/float(numeps)
    for w in weights:
      wsum += weights[w]
      print weights[w]
      if weights[w]> 0.00001/float(numeps): #0.0000001/numeps: #10^-5 divided by num eps because the weights get renormalized when take more than one epsilon realization per scenario
        scenarios.append(index)
        wout.append((index, weights[w]))
      index += 1
  else:
    (scenarios, wout) = get_praveen_results(lnsas)
  util.write_2dlist(time.strftime("%Y%m%d")+'_weights2.txt', wout) #save the weights of the chosen scenarios
  print 'number of chosen scenarios: ', len(scenarios)
  print 'weights of all scenarios: ', wsum
  print 'the sum of the subset weights: ', sum([ww[1] for ww in wout])
  return scenarios
def pick_scenarios(lnsas, weights, multi, numeps):
  '''this function takes some scenarios with an annual rate of occurrence > 10e-5 OR does it based on some other criteria, called get_praveen_results'''
  scenarios = []
  wout = []
  index = 0
  easy = True #whether to just take scenarios that are of engineering interest or do some complicated other thing
  print 'length of lnsas: ', len(lnsas)
  print 'length of weights: ', len(weights)
  print 'numeps: ', numeps
  wsum = 0
  if easy == True:
    print 'easy'
    for w in weights:
      wsum += weights[w]
      if weights[w]> 0.00001/float(numeps): #0.0000001/numeps: #10^-5 divided by num eps because the weights get renormalized when take more than one epsilon realization per scenario
        scenarios.append(index)
        wout.append((index, weights[w]))
      index += 1
  else:
    (scenarios, wout) = get_praveen_results(lnsas)
  util.write_2dlist(time.strftime("%Y%m%d")+'_weights_' + str(numeps) + 'eps.txt', wout) #save the weights of the chosen scenarios
  print 'number of chosen scenarios: ', len(scenarios)
  print 'weights of all scenarios: ', wsum
  print 'the sum of the subset weights: ', sum([ww[1] for ww in wout])
  return scenarios
Exemple #3
0
def main():
  seed(0) #set seed
  #get graph info
  G = nx.read_gpickle("input/graphMTC_CentroidsLength5.gpickle") #noCentroidsLength15.gpickle") #does not have centroidal links
  print '|V| = ', len(G.nodes())
  print '|E| = ', len(G.edges())
  G = nx.freeze(G) #prevents edges or nodes to be added or deleted
  #get od info. This is in format of a dict keyed by od, like demand[sd1][sd2] = 200000.
  demand = bd.build_demand('input/BATS2000_34SuperD_TripTableData.csv', 'input/superdistricts_centroids.csv') #bd.build_demand('input/BATS2000_34SuperD_TripTableData.csv', 'input/superdistricts_centroids.csv')
  #get earthquake info
  q = QuakeMaps('input/20130210_mtc_total_lnsas3.pkl', 'input/20130210_mtc_magnitudes3.pkl', 'input/20130210_mtc_faults3.pkl', 'input/20130210_mtc_weights3.pkl', 'input/20130210_mtc_scenarios3.pkl') #(input/20130107_mtc_total_lnsas1.pkl', 'input/20130107_mtc_magnitudes1.pkl', 'input/20130107_mtc_faults1.pkl', 'input/20130107_mtc_weights1.pkl', 'input/20130107_mtc_scenarios1.pkl') #totalfilename=None, magfilename=None, faultfilename=None, weightsfilename=None, scenariofilename=None): 'input/20130210_mtc_total_lnsas3.pkl', 'input/20130210_mtc_magnitudes3.pkl', 'input/20130210_mtc_faults3.pkl', 'input/20130210_mtc_weights3.pkl', 'input/20130210_mtc_scenarios3.pkl') #(


  q.num_sites = len(q.lnsas[0])
  #determine which scenarios you want to run
  good_indices = pick_scenarios(q.lnsas, q.weights)
  
  travel_index_times = []
  index = 0
  #loop over scenarios
  for scenario in q.lnsas: #each 'scenario' has 1557 values of lnsa, i.e. one per site
    if index in good_indices:
      print 'index: ', index
      (travel_time, vmt) = run_iteration(G, scenario, demand)
      travel_index_times.append((index, travel_time, vmt))
#      print 'new travel times: ', travel_index_times
      if index%100 ==0:
        util.write_2dlist(time.strftime("%Y%m%d")+'_travel_time.txt',travel_index_times)
    index += 1 #IMPORTANT
  util.write_2dlist(time.strftime("%Y%m%d")+'_travel_time.txt',travel_index_times)
Exemple #4
0
def main():
  seed(0) #set seed
  #get graph info
  G = nx.read_gpickle("input/graphMTC_CentroidsLength6.gpickle") #noCentroidsLength15.gpickle") #does not have centroidal links. There is also the choice of a proper multidigraph: nx.read_gpickle("input/graphMTC_CentroidsLength5.gpickle")
  G = nx.freeze(G) #prevents edges or nodes to be added or deleted
  #get od info. This is in format of a dict keyed by od, like demand[sd1][sd2] = 200000.
  demand = bd.build_demand('input/BATS2000_34SuperD_TripTableData.csv', 'input/superdistricts_centroids.csv')
  #get earthquake info
  q = QuakeMaps('input/20130210_mtc_total_lnsas3.pkl', 'input/20130210_mtc_magnitudes3.pkl', 'input/20130210_mtc_faults3.pkl', 'input/20130210_mtc_weights3.pkl', 'input/20130210_mtc_scenarios3.pkl') #input/20130107_mtc_total_lnsas1.pkl', 'input/20130107_mtc_magnitudes1.pkl','input/20130107_mtc_faults1.pkl', 'input/20130107_mtc_weights1.pkl', 'input/20130107_mtc_scenarios1.pkl') #'input/20130210_mtc_total_lnsas3.pkl', 'input/20130210_mtc_magnitudes3.pkl', 'input/20130210_mtc_faults3.pkl', 'input/20130210_mtc_weights3.pkl', 'input/20130210_mtc_scenarios3.pkl') #('input/20130107_mtc_total_lnsas1.pkl', 'input/20130107_mtc_magnitudes1.pkl',  #totalfilename=None, magfilename=None, faultfilename=None, weightsfilename=None, scenariofilename=None):
  print 'weights: ', q.weights
  q.num_sites = len(q.lnsas[0])
  #determine which scenarios you want to run
  good_indices = pick_scenarios(q.lnsas, q.weights)
  
  travel_index_times = []
  index = 0
  #loop over scenarios
  print 'size of lnsas: ', len(q.lnsas)
  for scenario in q.lnsas: #each 'scenario' has 1557 values of lnsa, i.e. one per site
    if index in good_indices:
      print 'index: ', index
      (bridges, flow, path, path2) = run_simple_iteration(G, scenario, demand, False)
      travel_index_times.append((index, bridges, flow, path, path2))
#      print 'new travel times: ', travel_index_times
      if index%1000 ==0:
        util.write_2dlist(time.strftime("%Y%m%d")+'_bridges_flow_paths4.txt',travel_index_times)
    index += 1 #IMPORTANT
  util.write_2dlist(time.strftime("%Y%m%d")+'_bridges_flow_paths4.txt',travel_index_times)
  print 'the number of scenarios I considered doing: ', index
  print 'the number of scenarios I actually did: ', len(travel_index_times)
Exemple #5
0
def main():
    TARGETS = [
        20,
        33,
        36,
        137,
        142,
        143,
        144,
        151,
        152,
        159,
        166,
        167,
        171,
        173,
        183,
        184,
        192,
        193,
        194,
        196,
        205,
        1676,
        1692,
        2851,
        2914,
    ]  # data within: 12-Dec-2013_12_3909_50_0.55556_25.mat #indices between 0 and 2110. the scenarios for which you want to save the damaged bridge data
    TARGETS = ["173_high"]  # we have retrofitted the top 25% ranked by accessibility impact
    weights = get_scenario_weights(
        "12-Dec-2013_12_3909_50_0.55556_25_weights.csv"
    )  # the annual likelihood of occurance of each of the scenarios ("targets")

    y = aggregate_accessibility(TARGETS, True)
    util.write_list(time.strftime("%Y%m%d") + "_accessTotACC_fromMain.txt", y)

    # TODO: implement the 3 functions below so they actually do something
    y_array = aggregate_accessibility_by_income(TARGETS, True)
    util.write_2dlist(time.strftime("%Y%m%d") + "_accessByIncome_fromMain.txt", y_array)

    y = aggregate_accessibility_by_taz(TARGETS, weights, True)
    util.write_list(time.strftime("%Y%m%d") + "_accessbyTAZ_fromMain.txt", y)

    y_array = aggregate_accessibility_by_taz_by_income(TARGETS, weights, True)
    util.write_2dlist(time.strftime("%Y%m%d") + "_accessByTAZByIncome_fromMain.txt", y_array)
Exemple #6
0
def save_results(bridge_array_internal, bridge_array_new, travel_index_times,
                 numeps, seed):
    util.write_2dlist(
        'output/' + time.strftime("%Y%m%d") +
        '_bridges_flow_path_tt_vmt_bridges_allBridges_roadonly_' +
        str(numeps) + 'eps_extensive_seed' + str(seed) + '.txt',
        travel_index_times)
    with open(
            'output/' + time.strftime("%Y%m%d") + '_' + str(numeps) +
            'sets_damagedBridgesInternal_roadonly_seed' + str(seed) + '.pkl',
            'wb') as f:
        pickle.dump(bridge_array_internal, f)
    with open(
            'output/' + time.strftime("%Y%m%d") + '_' + str(numeps) +
            'sets_damagedBridgesNewID_roadonly_seed' + str(seed) + '.pkl',
            'wb') as f:
        pickle.dump(bridge_array_new, f)
    print bridge_array_new
    print bridge_array_internal
    print travel_index_times
def main():
	# cd /Volumes/bakergroup$/

	########################################
	#get results for the base case
	folder_name = 'base_no_road_damage_but_reduced_transit'
	folder_name = 'no_damage'
	base = aggregate_results(folder_name)
	print base
	base_results = [base]
	# print base_results
	# print ['scenario', 'bridge_per', 'vmt', 'vht', 'low_auto', 'med_auto', 'high_auto', 'veryhighauto', 'autoPeakTotal', 'autoOffPeakTotal'].append(base_results)
  	util.write_2dlist(time.strftime("%Y%m%d")+'_scen_bridge_tt_vmt_6acc_vmt_vhtbtop20.txt', base_results)

	########################################
	print 'now the next'
	#get results for all the other runs
	scenario_results = []
	folder_names = TARGETS #[261] #TARGETS
	for folder_name in folder_names:
		scenario_results.append(aggregate_results(folder_name))
	print 'base: ', base_results
	print 'scenaro: ', scenario_results
  	util.write_2dlist(time.strftime("%Y%m%d")+'_scen_bridge_tt_vmt_6acc_vmt_vhttop20261.txt', scenario_results)
def main():
  '''can change the number of epsilons below'''
  seed(0) #set seed
  simple = False #False #simple is just %bridges out, which is computationally efficient
  #get graph info
  # G = nx.read_gpickle("input/graphMTC_CentroidsLength6.gpickle") #noCentroidsLength15.gpickle") #does not have centroidal links. There is also the choice of a proper multidigraph: nx.read_gpickle("input/graphMTC_CentroidsLength5.gpickle")
  G = nx.read_gpickle("input/graphMTC_CentroidsLength6highways.gpickle") #noCentroidsLength15.gpickle") #does not have centroidal links. Directed! only one edge between nodes
  # G1 = nx.read_gpickle("input/graphMTC_CentroidsLength5.gpickle") #undirected, multiple edges. It is a little funky because it has two links between A and B and two between B and A so is that double-counting?
  # '''a multigraph: An undirected graph class that can store multiedges.
  #   Multiedges are multiple edges between two nodes.  Each edge
  #   can hold optional data or attributes.
  #   A MultiGraph holds undirected edges.  Self loops are allowed.'''
  print 'nodes: ', len(G.nodes())
  G = nx.freeze(G) #prevents edges or nodes to be added or deleted
  # G1 = nx.freeze(G1)
  #get od info. This is in format of a dict keyed by od, like demand[sd1][sd2] = 200000.
  demand = bd.build_demand('input/BATS2000_34SuperD_TripTableData.csv', 'input/superdistricts_centroids.csv') #we just take a percentage in ita.py, namely  #to get morning flows, take 5.3% of daily driver values. 11.5/(4.5*6+11.5*10+14*4+4.5*4) from Figure S10 of http://www.nature.com/srep/2012/121220/srep01001/extref/srep01001-s1.pdf
          #get path
  #get earthquake info #UPDATED May 23, 2013
  #TODO
  q = QuakeMaps('input/20130612_mtc_total_lnsas5.pkl', 'input/20130612_mtc_magnitudes5.pkl', 'input/20130612_mtc_faults5.pkl', 'input/20130612_mtc_weights5.pkl', 'input/20130612_mtc_scenarios5.pkl') #input/20130107_mtc_total_lnsas1.pkl', 'input/20130107_mtc_magnitudes1.pkl','input/20130107_mtc_faults1.pkl', 'input/20130107_mtc_weights1.pkl', 'input/20130107_mtc_scenarios1.pkl') #'input/20130210_mtc_total_lnsas3.pkl', 'input/20130210_mtc_magnitudes3.pkl', 'input/20130210_mtc_faults3.pkl', 'input/20130210_mtc_weights3.pkl', 'input/20130210_mtc_scenarios3.pkl') #('input/20130107_mtc_total_lnsas1.pkl', 'input/20130107_mtc_magnitudes1.pkl',  #totalfilename=None, magfilename=None, faultfilename=None, weightsfilename=None, scenariofilename=None):
  q.num_sites = len(q.lnsas[0])
  numeps = 5 #CAHNGE THIS CHANGE THIS!!!!!!!!
  #determine which scenarios you want to run
  good_indices = pick_scenarios(q.lnsas, q.weights,True, numeps)
  targets = good_indices #[12, 35, 55, 71, 75, 82, 86, 87, 88, 106, 108, 115, 121, 231, 241, 247, 256, 258, 260, 261, 676, 730, 733, 1231, 1548] #indices between 0 and 2110. the scenarios for which you want to save the damaged bridge data
  print 'the number of scenarios for which I want to save bridge info: ', len(targets)

  travel_index_times = []
  index = 0
  good_index = 0
  # pdb.set_trace()
  #figure out what the travel time and vmt are if no damage to any bridges
  no_damage_travel_time = -1
  no_damage_vmt = -1
  found_no_damage = False
  for scenario in q.lnsas: #each 'scenario' has 1xxx values of lnsa, i.e. one per site
    while found_no_damage == False:
      (bridges, flow, path, path2, newG) = run_simple_iteration(G, scenario, demand, False, good_index, targets, True) #since looking for no damage case, it is ok to clean up
      if bridges == 0:
        found_no_damage = True
        print 'found case with no damage so I will save those and save you work later on'
        (no_damage_travel_time, no_damage_vmt) = run_iteration(G, scenario, demand, newG)

  #loop over scenarios
  print 'size of lnsas: ', len(q.lnsas)
  for scenario in q.lnsas: #each 'scenario' has 1xxx values of lnsa, i.e. one per site
    if index in good_indices:
      print 'index: ', index
      if simple == True:
        (bridges, flow, path, path2, newG) = run_simple_iteration(G, scenario, demand, False, good_index, targets)
        travel_index_times.append((index, bridges, flow, path, path2, -1, -1, bridges/float(q.num_sites), -1))
      else:
        (bridges, flow, path, path2, newG) = run_simple_iteration(G, scenario, demand, False, good_index, targets, False) #doesn't clean up the damage
        print 'what i found for bridges: ', bridges
        if bridges == 0:
          travel_time = no_damage_travel_time; 
          vmt = no_damage_vmt; 
        else:
          print 'attempting new'
          (travel_time, vmt) = run_iteration(G, scenario, demand, newG, True)
        print 'what i have for (tt, vmt): ', (travel_time, vmt)
        travel_index_times.append((index, bridges, flow, path, path2, travel_time, vmt, bridges/float(q.num_sites), -1))
      good_index += 1
        # travel_index_times.append((index, travel_time, vmt))
#      print 'new travel times: ', travel_index_times
    if index%1000 ==0:
      print 'index: ', index
      util.write_2dlist(time.strftime("%Y%m%d")+'_bridges_flow_paths_5eps_extensive.txt',travel_index_times)
    index += 1 #IMPORTANT
  util.write_2dlist(time.strftime("%Y%m%d")+'_bridges_flow_paths_5eps_extensive.txt',travel_index_times)
  print 'the number of scenarios I considered doing: ', index
  print 'the number of scenarios I actually did: ', len(travel_index_times)
  print 'i.e.: ', good_index
  print 'and now, I will save a dataset of damaged bridges in each scenario'
  util.write_2dlist(time.strftime("%Y%m%d")+'_damaged_bridges_5eps_extensive.txt',BRIDGE_DAMAGE_DATASET)
  with open(time.strftime("%Y%m%d")+'_damaged_bridges_5eps_extensive.pkl', 'wb') as f:
    pickle.dump(BRIDGE_DAMAGE_DATASET, f)
Exemple #9
0
            L2.sort()
            if L1 == L2:
                retList.append(freq_sets[i] | freq_sets[j])
    return retList


def apriori(dataset, weights, minsupport=0.5):
    "Generate a list of candidate item sets. iven a data set and a support level, it will generate a list of candidate itemsets."
    C1 = createC1(dataset)
    D = map(set, dataset)
    minsupport_weighted = sum(weights)*minsupport
    L1, support_data = scanD(D, C1, weights, minsupport_weighted)
    L = [L1]
    k = 2
    while (len(L[k - 2]) > 0):
        Ck = aprioriGen(L[k - 2], k)
        Lk, supK = scanD(D, Ck, weights, minsupport)
        support_data.update(supK)
        L.append(Lk)
        k += 1 #TODO: check that we want to do this

    return L, support_data
if __name__ == '__main__':
    weights, bla = load_dataset()
    min_support = 0.4
    r, s = apriori(bla, weights, min_support)
    print 'r: ', r
    print 's: ', s
    result = sorted(s.iterkeys(), key=lambda k: s[k], reverse=True)
    util.write_2dlist(time.strftime("%Y%m%d")+'_damaged_bridges_5eps_extensive_frequentitemsets'+str(min_support)+'.txt', result)
    print result
def save_results(bridge_array, travel_index_times, numeps):
    util.write_2dlist(time.strftime("%Y%m%d")+'_bridges_flow_path_tt_vmt_bridges' + str(numeps) + 'eps_extensive.txt',travel_index_times)
    with open (time.strftime("%Y%m%d")+'_' + str(numeps) + 'eps_damagedBridges.pkl', 'wb') as f:
      pickle.dump(bridge_array, f)
Exemple #11
0
def main():
    # get and aggregate accessibility from cube using import_acc_results.py file
    # TARGETS = [12, 35, 55, 71, 75, 82, 86, 87, 88, 106, 108, 115, 121, 231, 241, 247, 256, 258, 260, 261, 676, 730, 733, 1231, 1548] #first set of Cube runs
    TARGETS = [
        20,
        33,
        36,
        137,
        142,
        143,
        144,
        151,
        152,
        159,
        166,
        167,
        171,
        173,
        183,
        184,
        192,
        193,
        194,
        196,
        205,
        1676,
        1692,
        2851,
        2914,
    ]  # data within: 12-Dec-2013_12_3909_50_0.55556_25.mat #indices between 0 and 2110. the scenarios for which you want to save the damaged bridge data
    # TARGETS = [20]
    # y = aggregate_accessibility(TARGETS)
    # util.write_list(time.strftime("%Y%m%d")+'_accessTot.txt', y)
    y = [
        18.2339128119,
        18.2338120181,
        18.2338952366,
        18.2338109314,
        18.2270352566,
        18.2177845713,
        18.1998501612,
        18.2177377231,
        18.233770681,
        18.2261430987,
        18.1691203163,
        18.1849249099,
        18.2141010264,
        18.2139231104,
        18.23383158091398,
        18.2253745585,
        18.2155757901,
        18.2012935522,
        18.2138556128,
        18.1758345198,
        18.226103683,
        18.2338211763,
        18.2260523679,
        18.2339486092,
        18.2215360497,
    ]
    weights = get_scenario_weights("12-Dec-2013_12_3909_50_0.55556_25_weights.csv")

    # get general x values. These are the various welfare metrics.
    the_filename = "/Users/mahalia/ita/20131212_bridges_flow_path_tt_vmt_bridges1eps_extensive2.txt"
    new_x = freq_svm.build_x(TARGETS, the_filename)
    the_filename_full = (
        "/Users/mahalia/ita/20131212_bridges_flow_path_tt_vmt_bridges3eps_extensive.txt"
    )  # indices in the first column start at 0
    x_for_predicting = freq_svm.build_x(range(1, 11728), the_filename_full)
    # x_for_predicting = freq_svm.build_x(range(1, 3092), the_filename_full)
    the_x = np.vstack((new_x, x_for_predicting))
    the_x = preprocessing.scale(the_x)
    new_x = the_x[0 : new_x.shape[0], :]
    x_for_predicting = the_x[new_x.shape[0] :, :]
    print "built baby x"

    # pick threshold. Above this y value, the data is called a "match" and below is a "miss". For frequent itemsets, we'll be doing frequent items ONLY among the items predicted as a match so VORSICHT!
    target_annual_rate = 0.002  # 1 in 475 years
    threshold = freq_svm.identify_threshold(target_annual_rate, y, weights)
    print "by my method I find the threshold to be: ", threshold
    threshold = (
        18.19933616
    )  # from the Matlab script called cubeAnalysiswDamagedTransit.m for 475 year return period  #18.2139 #75th quantile
    print "I think the threshold is: ", threshold

    # label events above threshold as match and below as miss
    match_label = 1
    miss_label = 0  # for purposes of acesibility, low is bad so these are the true high loss cases
    new_y = freq_svm.label(
        y, threshold, match_label, miss_label
    )  # less than threshold is miss label. So, this puts high loss in accessibility as miss (lower value)
    print "new_ y: ", new_y  # should be mostly 1's

    # ############################

    # h = .02  # step size in the mesh

    # # we create an instance of SVM and fit out data. We do not scale our
    # # data since we want to plot the support vectors
    # C = 1.0  # SVM regularization parameter
    # svc = svm.SVC(kernel='linear', C=C, class_weight='auto').fit(new_x, new_y)
    # rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C, class_weight='auto').fit(new_x, new_y)
    # poly_svc = svm.SVC(kernel='poly', degree=3, C=C, class_weight='auto').fit(new_x, new_y)
    # lin_svc = svm.LinearSVC(C=C, class_weight='auto').fit(new_x, new_y)
    # X = new_x.copy()

    # # create a mesh to plot in
    # x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    # y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    # xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
    #                      np.arange(y_min, y_max, h))

    # # title for the plots
    # titles = ['SVC with linear kernel',
    #           'SVC with RBF kernel',
    #           'SVC with polynomial (degree 3) kernel',
    #           'LinearSVC (linear kernel)']

    # for i, clf in enumerate((svc, rbf_svc, poly_svc, lin_svc)):
    #     # Plot the decision boundary. For that, we will assign a color to each
    #     # point in the mesh [x_min, m_max]x[y_min, y_max].
    #     pl.subplot(2, 2, i + 1)
    #     Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

    #     # Put the result into a color plot
    #     Z = Z.reshape(xx.shape)
    #     pl.contourf(xx, yy, Z, cmap=pl.cm.Paired)
    #     pl.axis('off')
    #     pl.xlabel('Percentage  increase of bridges damaged (normalized)')
    #     pl.ylabel('Percentage incrase of travel time (normlized')

    #     # Plot also the training points
    #     pl.scatter(X[:, 0], X[:, 1], c=new_y, cmap=pl.cm.Paired)

    #     #plot also the prediction
    #     y_pred = clf.predict(x_for_predicting)
    #     pl.scatter(x_for_predicting[:, 0], x_for_predicting[:, 1], c= y_pred, marker='^', cmap = pl.cm.Paired)

    #     pl.title(titles[i])

    # pl.savefig('/Users/mahalia/Dropbox/research/dailyWriting/bridges/classificationComp.png')

    # ####################

    # #train SVM
    svm_object = freq_svm.train(new_x, new_y, "auto")  # {0:1, 1:1})
    ######Done using Cube results. Now just use ITA results....#####
    # use trained svm to predict values from large set
    # print 'built x'
    y_pred = freq_svm.predict(x_for_predicting, svm_object)
    # y_pred = []
    # for i in range(11727):
    # 	y_pred.append(0)
    util.write_list(time.strftime("%Y%m%d") + "_predictedY.txt", y_pred)
    # count up annual rates for each bridge in the list when event predicted as match
    miss_indices = []
    for index, value in enumerate(y_pred):
        if value == miss_label:  # high loss means low accessibility, which means miss
            miss_indices.append(index + 1)  # matlab indices starting from 1
    print 'we have this many "misses"=="predicted high loss cases": ', len(miss_indices)
    item_indices = range(3152)  # 1743 highway bridges and 1409 bart structures
    with open("20131212_3eps_damagedBridges.pkl", "rb") as f:
        list_of_baskets = pkl.load(
            f
        )  # this has list of bridge indices (MATLAB INDICES that start from 1) that are damaged
        # for basket in list_of_baskets:
        # 	if '609' in basket:
        # 		print 'found one: ', basket
    lnsas, weights = travel_main_simple_simplev3.ground_motions(
        3, 0.00001, "input/SF2_mtc_total_3909scenarios_1743bridgesPlusBART_3eps.txt"
    )

    support_list = get_support(weights, miss_indices, item_indices, list_of_baskets)

    # output the sum of weights of scenarios where each bridge was damanged to plot in matlab. First column is counter stsarting at 1. second column is support
    util.write_2dlist(time.strftime("%Y%m%d") + "_bridgeIndex_support.txt", support_list)
    pdb.set_trace()
Exemple #12
0
def main_tt():
    print "chin up"
    # get and aggregate travel time

    # get general x values. These are the various welfare metrics.
    the_filename_full = (
        "/Users/mahalia/ita/20131212_bridges_flow_path_tt_vmt_bridges3eps_extensive.txt"
    )  # indices in the first column start at 0
    x_raw = freq_svm.build_x(range(1, 11728), the_filename_full)

    the_x = preprocessing.scale([[row[0]] for row in x_raw])
    the_y = np.array([row[1] for row in x_raw])

    break_point = 9383
    new_x = np.array(the_x[0:break_point])  # 80%
    x_for_predicting = the_x[break_point:]  # 20%
    y = np.array([row[1] for row in x_raw[0:break_point, :]])  # should be as big as the training dataset
    numeps = 3  # the number of epsilons
    tol = (
        0.00001
    )  # the minimum annual rate that you care about in the original event set (the weight now is the original annual rate / number of epsilons per event)
    lnsas, full_weights = travel_main_simple_simplev3.ground_motions(
        numeps,
        tol,
        "/Users/mahalia/Documents/matlab/Research/Herbst2011/output_data/SF2_mtc_total_3909scenarios_1743bridgesPlusBART_3eps.txt",
    )
    weights = full_weights[0:break_point]

    print "built baby x"
    # pick threshold. Above this y value, the data is called a "match" and below is a "miss". For frequent itemsets, we'll be doing frequent items ONLY among the items predicted as a match so VORSICHT!
    target_annual_rate = 0.002  # 1 in 475 years
    threshold = freq_svm.identify_threshold(target_annual_rate, y, weights)
    print "i thought: ", threshold
    threshold = (
        346420000
    )  # 18.19933616 #from the Matlab script called cubeAnalysiswDamagedTransit.m for 475 year return period  #18.2139 #75th quantile
    print "I think the threshold is: ", threshold

    # label events above threshold as match and below as miss
    match_label = 1
    miss_label = 0  # for purposes of accesibility, low is bad so these are the true high loss cases
    new_y = np.array(freq_svm.label(y, threshold, match_label, miss_label))
    print "new_ y: ", new_y

    # ############################

    # h = .02  # step size in the mesh

    # # we create an instance of SVM and fit out data. We do not scale our
    # # data since we want to plot the support vectors
    # C = 1.0  # SVM regularization parameter
    # svc = svm.SVC(kernel='linear', C=C, class_weight='auto').fit(new_x, new_y)
    # rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C, class_weight='auto').fit(new_x, new_y)
    # poly_svc = svm.SVC(kernel='poly', degree=3, C=C, class_weight='auto').fit(new_x, new_y)
    # lin_svc = svm.LinearSVC(C=C, class_weight='auto').fit(new_x, new_y)
    # X = new_x.copy()

    # # create a mesh to plot in
    # x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    # y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    # xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
    #                      np.arange(y_min, y_max, h))

    # # title for the plots
    # titles = ['SVC with linear kernel',
    #           'SVC with RBF kernel',
    #           'SVC with polynomial (degree 3) kernel',
    #           'LinearSVC (linear kernel)']

    # for i, clf in enumerate((svc, rbf_svc, poly_svc, lin_svc)):
    #     # Plot the decision boundary. For that, we will assign a color to each
    #     # point in the mesh [x_min, m_max]x[y_min, y_max].
    #     pl.subplot(2, 2, i + 1)
    #     Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

    #     # Put the result into a color plot
    #     Z = Z.reshape(xx.shape)
    #     pl.contourf(xx, yy, Z, cmap=pl.cm.Paired)
    #     pl.axis('off')
    #     pl.xlabel('Percentage  increase of bridges damaged (normalized)')
    #     pl.ylabel('Percentage incrase of travel time (normlized')

    #     # Plot also the training points
    #     pl.scatter(X[:, 0], X[:, 1], c=new_y, cmap=pl.cm.Paired)

    #     #plot also the prediction
    #     y_pred = clf.predict(x_for_predicting)
    #     pl.scatter(x_for_predicting[:, 0], x_for_predicting[:, 1], c= y_pred, marker='^', cmap = pl.cm.Paired)

    #     pl.title(titles[i])

    # pl.savefig('/Users/mahalia/Dropbox/research/dailyWriting/bridges/classificationComp.png')

    # ####################

    # #train SVM
    print new_x.shape
    print new_y.shape

    svm_object = freq_svm.train(new_x, new_y, "auto")  # {0:1, 1:1})
    ######Done using Cube results. Now just use ITA results....#####
    # use trained svm to predict values from large set
    # print 'built x'
    y_pred = freq_svm.predict(x_for_predicting, svm_object)
    # y_pred = []
    # for i in range(11727):
    # 	y_pred.append(0)
    util.write_list(time.strftime("%Y%m%d") + "_predictedY_tt.txt", y_pred)
    y_test_raw = [row[1] for row in x_raw[break_point:, :]]
    y_test = freq_svm.label(y_test_raw, threshold, match_label, miss_label)
    y_tot_raw = [row[1] for row in x_raw]
    y_tot = freq_svm.label(y_tot_raw, threshold, match_label, miss_label)
    util.write_list(time.strftime("%Y%m%d") + "_actualY_tt.txt", y_test)

    print (classification_report(y_test, y_pred))
    print (confusion_matrix(y_test, y_pred, labels=range(2)))
    scores = cross_validation.cross_val_score(
        svm_object, the_x, freq_svm.label(the_y, threshold, match_label, miss_label), cv=3
    )
    print ("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

    # count up annual rates for each bridge in the list when event predicted as match
    miss_indices = []
    for index, value in enumerate(y_tot):  # cheating and just using the actual data instead of predicted one
        if value == miss_label:
            miss_indices.append(index + 1)  # matlab indices starting from 1
    print 'we have this many "misses"=="predicted high loss cases": ', len(miss_indices)
    item_indices = range(3152)  # 1743 highway bridges and 1409 bart structures
    with open("20131212_3eps_damagedBridges.pkl", "rb") as f:
        list_of_baskets = pkl.load(
            f
        )  # this has list of bridge indices (MATLAB INDICES that start from 1) that are damaged
    lnsas, weights = travel_main_simple_simplev3.ground_motions(
        3, 0.00001, "input/SF2_mtc_total_3909scenarios_1743bridgesPlusBART_3eps.txt"
    )
    support_list = get_support(weights, miss_indices, item_indices, list_of_baskets)

    # output the sum of weights of scenarios where each bridge was damanged to plot in matlab. First column is counter stsarting at 1. second column is support
    util.write_2dlist(time.strftime("%Y%m%d") + "_bridgeIndex_support_tt.txt", support_list)
    pdb.set_trace()