def test(): classes = [ "0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "unkown" ] # classes = ["1800","1810","1820","1830","1840","1850","1860","1870","1880","1890","1900","1910","unkown"] full_data = gd2.get_data_list_of_dicts() test_data = gd3.get_data_list_of_dicts() values = getValues(classes, test_data) correct = 0 almost = 0 total = len(full_data) for entry in full_data: decade = entry["class"] entry.pop("class", None) result = test_single(entry, values) if result == decade: correct += 1 try: if abs(int(result[0]) - int(decade[0])) <= 2: almost += 1 except: continue return [float(correct) / float(total), float(almost) / float(total)]
def make_cadence_map(csv_input, csv_output, index_of_interest, lol): #Variable Setup. headers = gd2.get_headers() #print headers start_measure_index = headers.index("start_measure") #print start_measure_index id_index = headers.index("id") header = headers[index_of_interest] headers += ["{}_before".format(header)] headers += ["{}_after".format(header)] new_dicts = gd2.get_data_list_of_dicts() data = lol data_by_composition = {row[id_index]:[] for row in data} new_data = [] #Sort (ascending) the individual entries for each composition by the first element. for row in data: composition_id = row[id_index] data_by_composition[composition_id].append(row) #try: data_by_composition[composition_id].sort(key=lambda x: int(x[start_measure_index])) #data_by_composition[composition_id].sort(key = start_measure_index) #except ValueError: #print row #For each entry, find the next one consecutively. for row in data: comp_entries = data_by_composition[row[id_index]] i = comp_entries.index(row) element_before = comp_entries[i-1][index_of_interest] if i>0 else "None" element_after = comp_entries[i+1][index_of_interest] if i<len(comp_entries)-1 else "None" new_data.append(row + [element_before, element_after]) print "File written successfully! Added 'before' and 'after' for {}".format(header) gd2.write_data(csv_output, new_headers, new_data)
def test(): classes = ["0-10","10-20","20-30","30-40","40-50","50-60","60-70","70-80","80-90","unkown"] # classes = ["1800","1810","1820","1830","1840","1850","1860","1870","1880","1890","1900","1910","unkown"] full_data = gd2.get_data_list_of_dicts() test_data = gd3.get_data_list_of_dicts() values = getValues(classes,test_data) correct = 0 almost = 0 total = len(full_data) for entry in full_data: decade = entry["class"] entry.pop("class",None) result = test_single(entry,values) if result == decade: correct += 1 try: if abs(int(result[0])-int(decade[0])) <= 2: almost += 1 except: continue return [float(correct)/float(total), float(almost)/float(total)]
import get_data as gd import get_data2 as gd2 import get_data3 as gd3 list_of_dicts = gd.get_data_list_of_dicts() full = gd2.get_data_list_of_dicts() full_headers = gd2.get_headers() headers = gd.get_headers() headers_income = gd3.get_headers() codes = {} full_clean = [] final_headers = [] for h in headers: h = h.split(" - ") code = h[0] try: name = h[1] codes[code] = name except: print h for h2 in headers_income: h2 = h2.split(" - ") code = h2[0] try: if not "Error" in h2[1]: name = h2[1] codes[code] = name except:
#from sklearn.neighbors import KNeighborsClassifier from sklearn.neighbors import NearestNeighbors from sklearn.neighbors.kd_tree import KDTree #from sklearn.neighbors import DistanceMetric import numpy as np import get_data2 as gd headers = gd.get_headers() dicts = gd.get_data_list_of_dicts() rows_lol = [] for i in range(len(gd.get_data_slice(headers[0], dicts))): rows_lol.append([]) for i in range(len(headers)): if i ==1 or i==4: column = gd.get_data_slice_numbers(headers[i], dicts) else: column = gd.get_data_slice_numbers(headers[i], dicts) for j in range(len(gd.get_data_slice(headers[0], dicts))): rows_lol[j].append(column[j]) X = np.array(rows_lol) #nbrs = NearestNeighbors(n_neighbors=5, algorithm ='kd_tree', metric ='jaccard').fit(X) kdt = KDTree(X, leaf_size=30, metric='euclidean') kdt.query(X, k=3, return_distance=False)
def find_point(name, points): for p in points: if name == p.getName(): return p sys.exit("Could not find point") list_of_dicts = gd.get_data_list_of_dicts() has_both = [] for entry in list_of_dicts: if not (entry["Place Of Origin"] == "") and not (entry["Destination"] == ""): has_both.append(entry) list_of_places = gd2.get_data_list_of_dicts() places = [] for entry in list_of_places: places.append(entry["Name"]) has_full = [] for item in has_both: # Have to clean the name so it will match the one we have listed # for the places Poo = item["Place Of Origin"].replace(" ", "") Poo = Poo.replace("(", "COMMA") Poo = Poo.replace(")", "COMMA") Poo = Poo.split("COMMA") Poo2 = [] for word in Poo:
from sklearn.neighbors import KNeighborsClassifier from sklearn.neighbors import NearestNeighbors #from sklearn.neighbors.kd_tree import KDTree #from sklearn.neighbors import DistanceMetric import numpy as np import get_data2 as gd import json headers = gd.get_headers() dicts = gd.get_data_list_of_dicts() rows_lol = [] for i in range(len(gd.get_data_slice(headers[0], dicts))): rows_lol.append([]) print len(rows_lol) for i in range(len(headers)): column = gd.get_data_slice(headers[i], dicts) for j in range(len(gd.get_data_slice(headers[0], dicts))): rows_lol[j].append(column[j]) print rows_lol[0] #actually get similarities def compare_rows(row1, row2): counter = 0 for i in range(len(row1)):
return pageRanks def find_point(name, points): for p in points: if name == p.getName(): return p sys.exit("Could not find point") list_of_dicts = gd.get_data_list_of_dicts() has_both = [] for entry in list_of_dicts: if not (entry["Place Of Origin"] == "") and not (entry["Destination"] == ""): has_both.append(entry) list_of_places = gd2.get_data_list_of_dicts() places = [] for entry in list_of_places: places.append(entry["Name"]) has_full = [] for item in has_both: # Have to clean the name so it will match the one we have listed # for the places Poo = item["Place Of Origin"].replace(" ","") Poo = Poo.replace("(","COMMA") Poo = Poo.replace(")","COMMA") Poo = Poo.split("COMMA") Poo2 = [] for word in Poo: