Beispiel #1
0
def _CCA(data, graph, n):
    cca = CCA(n_components=n)
    adjacencyMatrix = createAffinityMatrix(graph)
    cca.fit(data, adjacencyMatrix)
    X_c, Y_c = cca.transform(data, adjacencyMatrix)

    writeCSV(X_c, 'CCA_X')
    writeCSV(Y_c, 'CCA_Y')
def runClustering(train):
    ac = AgglomerativeClustering(n_clusters=10).fit(train)
    labels = ac.labels_
    return labels


if __name__ == '__main__':
    path = sys.argv[1]
    output = sys.argv[2]

    print 'Loading data...'
    seed, similarity, train = loadData(path)

    # print 'Creating connectivity matrix...'
    # connectivity = createConnectivityMatrix(similarity)

    print 'Running clustering algorithm...'
    labels = runClustering(train)

    print 'Creating CSV File...'
    name = 'AgglomerativeLabels'
    writeCSV(labels, name)

    print 'Visualing clusters...'
    result = np.array(
        pd.read_csv("./Scripts/Results/" + name + '.csv', header=None))
    cluster = assignClusters(seed, result)

    pred = createPred(seed, cluster, result)
    writeCSV(pred, output, ['Id', 'Label'])
def runSpectralClustering(train):
    sc = SpectralClustering(n_clusters=10,
                            affinity='nearest_neighbors').fit(train)
    labels = sc.labels_
    return labels


if __name__ == '__main__':
    start = time.time()

    path = sys.argv[1]

    print 'Loading data...'
    seed, train = loadData(path)

    print 'Running Spectral Clustering...'

    labels = runSpectralClustering(train)
    print 'Creating CSV File...'
    name = 'SCLabels'
    writeCSV(labels, name)

    print 'Visualing clusters...'
    result = np.array(
        pd.read_csv("./Scripts/Results/" + name + '.csv', header=None))
    cluster = assignClusters(seed, result)

    # pred = createPred(seed, cluster, result)
    # writeCSV(pred[6000:], 'SCPred', ['Id', 'Label'])
    print time.time() - start
Beispiel #4
0
def combineLabels2(data, seed):
    label0 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(0).csv", header=None))
    assignment0 = makeAssignment(label0)
    print '0 - ' + str(len(assignment0))

    label1 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(1).csv", header=None))
    assignment1 = makeAssignment(label1)
    print '1 - ' + str(len(assignment1))

    label2 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(2).csv", header=None))
    assignment2 = makeAssignment(label2)
    print '2 - ' + str(len(assignment2))

    label3_1 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(3)1.csv", header=None))
    label3_2 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(3)2.csv", header=None))
    assignment3 = {}
    for x, y in label3_1:
        l = assignment3.setdefault(x, [])
        l.append(y)
        assignment3[x] = l

    for x, y in label3_2:
        l = assignment3.setdefault(x, [])
        l.append(y)
        assignment3[x] = l

    for key, val in assignment3.items():
        if len(val) < 2:
            del assignment3[key]

    print '3 - ' + str(len(assignment3))

    label4_1 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(4)1.csv", header=None))
    label4_2 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(4)2.csv", header=None))
    assignment4 = {}
    for x, y in label4_1:
        l = assignment4.setdefault(x, [])
        l.append(y)
        assignment4[x] = l

    for x, y in label4_2:
        l = assignment4.setdefault(x, [])
        l.append(y)
        assignment4[x] = l

    for key, val in assignment4.items():
        if len(val) < 2:
            del assignment4[key]

    print '4 - ' + str(len(assignment4))

    label5_1 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(5)1.csv", header=None))
    label5_2 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(5)2.csv", header=None))
    assignment5 = {}
    for x, y in label5_1:
        l = assignment5.setdefault(x, [])
        l.append(y)
        assignment5[x] = l

    for x, y in label5_2:
        l = assignment5.setdefault(x, [])
        l.append(y)
        assignment5[x] = l

    for key, val in assignment5.items():
        if len(val) < 2:
            del assignment5[key]

    print '5 - ' + str(len(assignment5))

    label6 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(6).csv", header=None))
    assignment6 = makeAssignment(label6)
    print '6 - ' + str(len(assignment6))

    label7_1 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(7)1.csv", header=None))
    label7_2 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(7)2.csv", header=None))
    assignment7 = {}
    for x, y in label7_1:
        l = assignment7.setdefault(x, [])
        l.append(y)
        assignment7[x] = l

    for x, y in label7_2:
        l = assignment7.setdefault(x, [])
        l.append(y)
        assignment7[x] = l

    for key, val in assignment7.items():
        if len(val) < 2:
            del assignment7[key]

    print '7 - ' + str(len(assignment7))

    label8_1 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(8)1.csv", header=None))
    label8_2 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(8)2.csv", header=None))
    assignment8 = {}
    for x, y in label8_1:
        l = assignment8.setdefault(x, [])
        l.append(y)
        assignment8[x] = l

    for x, y in label8_2:
        l = assignment8.setdefault(x, [])
        l.append(y)
        assignment8[x] = l

    for key, val in assignment8.items():
        if len(val) < 2:
            del assignment8[key]

    print '8 - ' + str(len(assignment8))

    label9_1 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(9)1.csv", header=None))
    label9_2 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(9)2.csv", header=None))
    assignment9 = {}
    for x, y in label9_1:
        l = assignment9.setdefault(x, [])
        l.append(y)
        assignment9[x] = l

    for x, y in label9_2:
        l = assignment9.setdefault(x, [])
        l.append(y)
        assignment9[x] = l

    for key, val in assignment9.items():
        if len(val) < 2:
            del assignment9[key]

    print '9 - ' + str(len(assignment9))

    assignments = [
        assignment0, assignment1, assignment2, assignment3, assignment4,
        assignment5, assignment6, assignment7, assignment8, assignment9
    ]
    assignment = {}

    for i in range(len(data)):
        for digit in range(10):
            if i + 1 in assignments[digit]:
                assignment[i + 1] = digit

    for x, y in seed:
        assignment[x] = y

    d = {}
    res = []
    for key, val in assignment.items():
        res.append([key, val])
        count = d.setdefault(val, 0)
        d[val] = count + 1

    print d

    writeCSV(res, 'assignment5')
Beispiel #5
0
def combineLabels(data):
    label0 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(0).csv", header=None))
    assignment0 = makeAssignment(label0)
    print '0 - ' + str(len(assignment0))
    label1 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(1).csv", header=None))
    assignment1 = makeAssignment(label1)
    print '1 - ' + str(len(assignment1))
    label2 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(2).csv", header=None))
    assignment2 = makeAssignment(label2)
    print '2 - ' + str(len(assignment2))
    label3 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(3).csv", header=None))
    assignment3 = makeAssignment(label3)
    print '3 - ' + str(len(assignment3))
    label6 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(6).csv", header=None))
    assignment6 = makeAssignment(label6)
    print '6 - ' + str(len(assignment6))

    label5_1 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(5)1.csv", header=None))
    label5_2 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(5)2.csv", header=None))
    label5_3 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(5)3.csv", header=None))

    print 'Assignment 5'
    assignment5 = {}
    for x, y in label5_1:
        l = assignment5.setdefault(x, [])
        l.append(y)
        assignment5[x] = l

    for x, y in label5_2:
        l = assignment5.setdefault(x, [])
        l.append(y)
        assignment5[x] = l

    for x, y in label5_3:
        l = assignment5.setdefault(x, [])
        l.append(y)
        assignment5[x] = l

    print '5 -  ' + str(len(assignment5))

    # for key, val in assignment5.items():
    #     if len(val) < 2:
    #         del assignment5[key]

    # print len(assignment5)

    label7_1 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(7).csv", header=None))
    label7_2 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(7)1.csv", header=None))
    label7_3 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(7)2.csv", header=None))
    label7_4 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(7)3.csv", header=None))
    print 'Assignment 7'
    assignment7 = {}
    for x, y in label7_1:
        l = assignment7.setdefault(x, [])
        l.append(y)
        assignment7[x] = l

    for x, y in label7_2:
        l = assignment7.setdefault(x, [])
        l.append(y)
        assignment7[x] = l

    for x, y in label7_3:
        l = assignment7.setdefault(x, [])
        l.append(y)
        assignment7[x] = l

    for x, y in label7_4:
        l = assignment7.setdefault(x, [])
        l.append(y)
        assignment7[x] = l

    print len(assignment7)

    for key, val in assignment7.items():
        if len(val) < 3:
            del assignment7[key]

    print '7 -  ' + str(len(assignment7))

    label8_1 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(8)1.csv", header=None))
    label8_2 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(8)2.csv", header=None))
    label8_3 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(8)3.csv", header=None))
    print 'Assignment 8'
    assignment8 = {}
    for x, y in label8_1:
        l = assignment8.setdefault(x, [])
        l.append(y)
        assignment8[x] = l

    for x, y in label8_2:
        l = assignment8.setdefault(x, [])
        l.append(y)
        assignment8[x] = l

    for x, y in label8_3:
        l = assignment8.setdefault(x, [])
        l.append(y)
        assignment8[x] = l

    for key, val in assignment8.items():
        if len(val) < 2:
            del assignment8[key]

    print '8 -  ' + str(len(assignment8))

    label4_1 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(4)1.csv", header=None))
    label4_2 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(4)2.csv", header=None))
    label9_1 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(9)1.csv", header=None))
    label49 = np.array(
        pd.read_csv("./Scripts/Results/TrainLabel_(49).csv", header=None))

    print 'Assignment 4 and 9'
    assignment49 = {}
    for x, y in label4_1:
        l = assignment49.setdefault(x, [])
        l.append(y)
        assignment49[x] = l

    for x, y in label4_2:
        l = assignment49.setdefault(x, [])
        l.append(y)
        assignment49[x] = l

    for x, y in label9_1:
        l = assignment49.setdefault(x, [])
        l.append(y)
        assignment49[x] = l

    for x, y in label49:
        l = assignment49.setdefault(x, [])
        l.append(y)
        assignment49[x] = l

    assignment4 = {}
    assignment9 = {}
    for key, val in assignment49.items():
        if val.count(4) == 2 and val.count(9) == 2:
            a = random.randint(0, 1)
            if a == 0:
                val.append(4)
            else:
                val.append(9)

        if val.count(4) >= 2:
            l = assignment4.setdefault(key, [])
            l.append(val)
            assignment4[key] = l
        elif val.count(9) >= 2:
            l = assignment9.setdefault(key, [])
            l.append(val)
            assignment9[key] = l

    print '4 -  ' + str(len(assignment4))
    print '9 -  ' + str(len(assignment9))

    assignments = [
        assignment0, assignment1, assignment2, assignment3, assignment4,
        assignment5, assignment6, assignment7, assignment8, assignment9
    ]
    assignment = {}

    for i in range(len(data)):
        for digit in range(10):
            if i + 1 in assignments[digit]:
                assignment[i + 1] = digit

    d = {}
    res = []
    for key, val in assignment.items():
        res.append([key, val])
        count = d.setdefault(val, 0)
        d[val] = count + 1

    writeCSV(res, 'assignment3')