def _CCA(data, graph, n): cca = CCA(n_components=n) adjacencyMatrix = createAffinityMatrix(graph) cca.fit(data, adjacencyMatrix) X_c, Y_c = cca.transform(data, adjacencyMatrix) writeCSV(X_c, 'CCA_X') writeCSV(Y_c, 'CCA_Y')
def runClustering(train): ac = AgglomerativeClustering(n_clusters=10).fit(train) labels = ac.labels_ return labels if __name__ == '__main__': path = sys.argv[1] output = sys.argv[2] print 'Loading data...' seed, similarity, train = loadData(path) # print 'Creating connectivity matrix...' # connectivity = createConnectivityMatrix(similarity) print 'Running clustering algorithm...' labels = runClustering(train) print 'Creating CSV File...' name = 'AgglomerativeLabels' writeCSV(labels, name) print 'Visualing clusters...' result = np.array( pd.read_csv("./Scripts/Results/" + name + '.csv', header=None)) cluster = assignClusters(seed, result) pred = createPred(seed, cluster, result) writeCSV(pred, output, ['Id', 'Label'])
def runSpectralClustering(train): sc = SpectralClustering(n_clusters=10, affinity='nearest_neighbors').fit(train) labels = sc.labels_ return labels if __name__ == '__main__': start = time.time() path = sys.argv[1] print 'Loading data...' seed, train = loadData(path) print 'Running Spectral Clustering...' labels = runSpectralClustering(train) print 'Creating CSV File...' name = 'SCLabels' writeCSV(labels, name) print 'Visualing clusters...' result = np.array( pd.read_csv("./Scripts/Results/" + name + '.csv', header=None)) cluster = assignClusters(seed, result) # pred = createPred(seed, cluster, result) # writeCSV(pred[6000:], 'SCPred', ['Id', 'Label']) print time.time() - start
def combineLabels2(data, seed): label0 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(0).csv", header=None)) assignment0 = makeAssignment(label0) print '0 - ' + str(len(assignment0)) label1 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(1).csv", header=None)) assignment1 = makeAssignment(label1) print '1 - ' + str(len(assignment1)) label2 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(2).csv", header=None)) assignment2 = makeAssignment(label2) print '2 - ' + str(len(assignment2)) label3_1 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(3)1.csv", header=None)) label3_2 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(3)2.csv", header=None)) assignment3 = {} for x, y in label3_1: l = assignment3.setdefault(x, []) l.append(y) assignment3[x] = l for x, y in label3_2: l = assignment3.setdefault(x, []) l.append(y) assignment3[x] = l for key, val in assignment3.items(): if len(val) < 2: del assignment3[key] print '3 - ' + str(len(assignment3)) label4_1 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(4)1.csv", header=None)) label4_2 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(4)2.csv", header=None)) assignment4 = {} for x, y in label4_1: l = assignment4.setdefault(x, []) l.append(y) assignment4[x] = l for x, y in label4_2: l = assignment4.setdefault(x, []) l.append(y) assignment4[x] = l for key, val in assignment4.items(): if len(val) < 2: del assignment4[key] print '4 - ' + str(len(assignment4)) label5_1 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(5)1.csv", header=None)) label5_2 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(5)2.csv", header=None)) assignment5 = {} for x, y in label5_1: l = assignment5.setdefault(x, []) l.append(y) assignment5[x] = l for x, y in label5_2: l = assignment5.setdefault(x, []) l.append(y) assignment5[x] = l for key, val in assignment5.items(): if len(val) < 2: del assignment5[key] print '5 - ' + str(len(assignment5)) label6 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(6).csv", header=None)) assignment6 = makeAssignment(label6) print '6 - ' + str(len(assignment6)) label7_1 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(7)1.csv", header=None)) label7_2 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(7)2.csv", header=None)) assignment7 = {} for x, y in label7_1: l = assignment7.setdefault(x, []) l.append(y) assignment7[x] = l for x, y in label7_2: l = assignment7.setdefault(x, []) l.append(y) assignment7[x] = l for key, val in assignment7.items(): if len(val) < 2: del assignment7[key] print '7 - ' + str(len(assignment7)) label8_1 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(8)1.csv", header=None)) label8_2 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(8)2.csv", header=None)) assignment8 = {} for x, y in label8_1: l = assignment8.setdefault(x, []) l.append(y) assignment8[x] = l for x, y in label8_2: l = assignment8.setdefault(x, []) l.append(y) assignment8[x] = l for key, val in assignment8.items(): if len(val) < 2: del assignment8[key] print '8 - ' + str(len(assignment8)) label9_1 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(9)1.csv", header=None)) label9_2 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(9)2.csv", header=None)) assignment9 = {} for x, y in label9_1: l = assignment9.setdefault(x, []) l.append(y) assignment9[x] = l for x, y in label9_2: l = assignment9.setdefault(x, []) l.append(y) assignment9[x] = l for key, val in assignment9.items(): if len(val) < 2: del assignment9[key] print '9 - ' + str(len(assignment9)) assignments = [ assignment0, assignment1, assignment2, assignment3, assignment4, assignment5, assignment6, assignment7, assignment8, assignment9 ] assignment = {} for i in range(len(data)): for digit in range(10): if i + 1 in assignments[digit]: assignment[i + 1] = digit for x, y in seed: assignment[x] = y d = {} res = [] for key, val in assignment.items(): res.append([key, val]) count = d.setdefault(val, 0) d[val] = count + 1 print d writeCSV(res, 'assignment5')
def combineLabels(data): label0 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(0).csv", header=None)) assignment0 = makeAssignment(label0) print '0 - ' + str(len(assignment0)) label1 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(1).csv", header=None)) assignment1 = makeAssignment(label1) print '1 - ' + str(len(assignment1)) label2 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(2).csv", header=None)) assignment2 = makeAssignment(label2) print '2 - ' + str(len(assignment2)) label3 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(3).csv", header=None)) assignment3 = makeAssignment(label3) print '3 - ' + str(len(assignment3)) label6 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(6).csv", header=None)) assignment6 = makeAssignment(label6) print '6 - ' + str(len(assignment6)) label5_1 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(5)1.csv", header=None)) label5_2 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(5)2.csv", header=None)) label5_3 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(5)3.csv", header=None)) print 'Assignment 5' assignment5 = {} for x, y in label5_1: l = assignment5.setdefault(x, []) l.append(y) assignment5[x] = l for x, y in label5_2: l = assignment5.setdefault(x, []) l.append(y) assignment5[x] = l for x, y in label5_3: l = assignment5.setdefault(x, []) l.append(y) assignment5[x] = l print '5 - ' + str(len(assignment5)) # for key, val in assignment5.items(): # if len(val) < 2: # del assignment5[key] # print len(assignment5) label7_1 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(7).csv", header=None)) label7_2 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(7)1.csv", header=None)) label7_3 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(7)2.csv", header=None)) label7_4 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(7)3.csv", header=None)) print 'Assignment 7' assignment7 = {} for x, y in label7_1: l = assignment7.setdefault(x, []) l.append(y) assignment7[x] = l for x, y in label7_2: l = assignment7.setdefault(x, []) l.append(y) assignment7[x] = l for x, y in label7_3: l = assignment7.setdefault(x, []) l.append(y) assignment7[x] = l for x, y in label7_4: l = assignment7.setdefault(x, []) l.append(y) assignment7[x] = l print len(assignment7) for key, val in assignment7.items(): if len(val) < 3: del assignment7[key] print '7 - ' + str(len(assignment7)) label8_1 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(8)1.csv", header=None)) label8_2 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(8)2.csv", header=None)) label8_3 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(8)3.csv", header=None)) print 'Assignment 8' assignment8 = {} for x, y in label8_1: l = assignment8.setdefault(x, []) l.append(y) assignment8[x] = l for x, y in label8_2: l = assignment8.setdefault(x, []) l.append(y) assignment8[x] = l for x, y in label8_3: l = assignment8.setdefault(x, []) l.append(y) assignment8[x] = l for key, val in assignment8.items(): if len(val) < 2: del assignment8[key] print '8 - ' + str(len(assignment8)) label4_1 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(4)1.csv", header=None)) label4_2 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(4)2.csv", header=None)) label9_1 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(9)1.csv", header=None)) label49 = np.array( pd.read_csv("./Scripts/Results/TrainLabel_(49).csv", header=None)) print 'Assignment 4 and 9' assignment49 = {} for x, y in label4_1: l = assignment49.setdefault(x, []) l.append(y) assignment49[x] = l for x, y in label4_2: l = assignment49.setdefault(x, []) l.append(y) assignment49[x] = l for x, y in label9_1: l = assignment49.setdefault(x, []) l.append(y) assignment49[x] = l for x, y in label49: l = assignment49.setdefault(x, []) l.append(y) assignment49[x] = l assignment4 = {} assignment9 = {} for key, val in assignment49.items(): if val.count(4) == 2 and val.count(9) == 2: a = random.randint(0, 1) if a == 0: val.append(4) else: val.append(9) if val.count(4) >= 2: l = assignment4.setdefault(key, []) l.append(val) assignment4[key] = l elif val.count(9) >= 2: l = assignment9.setdefault(key, []) l.append(val) assignment9[key] = l print '4 - ' + str(len(assignment4)) print '9 - ' + str(len(assignment9)) assignments = [ assignment0, assignment1, assignment2, assignment3, assignment4, assignment5, assignment6, assignment7, assignment8, assignment9 ] assignment = {} for i in range(len(data)): for digit in range(10): if i + 1 in assignments[digit]: assignment[i + 1] = digit d = {} res = [] for key, val in assignment.items(): res.append([key, val]) count = d.setdefault(val, 0) d[val] = count + 1 writeCSV(res, 'assignment3')