Ejemplo n.º 1
0
def main():

    nbOutput = 4

    file = IOHelper().checkArg(sys.argv)
    if (len(file) < 2):
        print("Missing file")
        exit(1)

    d = Dataset()
    d.loadFile(file[0])

    featuresId = range(7, 19)
    # nbInput = len(featuresId)
    X = generateDataset(d, featuresId)

    X, nbInput = d.featureExpand(d, X)
    X = d.featureRescale(d, X)

    allWeight, AllOutput = csvToArray(file[1])

    allclassifier = MultiClassifier(nbInput, AllOutput)
    allclassifier.initWeight(allWeight)

    with open('houses.csv', 'w') as file:
        file.write("Index,Hogwarts House\n")
        for i, d in enumerate(X):
            name = allclassifier.predict(d)
            file.write(str(i) + "," + name + "\n")
Ejemplo n.º 2
0
def main():

    file = IOHelper().checkArg(sys.argv)
    if (len(file) < 1):
        print("Missing file")
        exit(1)

    d = Dataset()
    d.loadFile(file[0])

    fig, axes = plt.subplots(figsize=(18, 10))
    fig.tight_layout()

    start = 6
    width = 13

    widthStart = 0
    widthEnd = widthStart + width
    ystart = start
    for i in range(width):
        drawOneSub(d, start, ystart, range(widthStart, widthEnd))
        widthStart += width
        widthEnd += width
        start += 1
    print("")

    # plt.title(d.getName(index))

    plt.savefig('scatter_plot.png')
    plt.show()
Ejemplo n.º 3
0
def main():

    file = IOHelper().checkArg(sys.argv)
    if (len(file) < 1):
        print("Missing file")
        exit(1)

    d = Dataset()
    d.loadFile(file[0])

    fig, axes = plt.subplots(figsize=(18, 10))
    fig.tight_layout()

    for index in range(6, 19):

        mean = d.mean(d.getFeature(index))
        std = d.standardDeviation(d.getFeature(index), mean)

        if mean >= -10 and mean <= 10 and std >= -10 and std <= 10:
            addFeatureOnSubplot(index, d.getFeature(index), d.getName(index),
                                1)
        elif mean >= -500 and mean <= 500 and std >= -500 and std <= 500:
            addFeatureOnSubplot(index, d.getFeature(index), d.getName(index),
                                2)
        elif mean >= -2000 and mean <= 2000 and std >= -2000 and std <= 2000:
            addFeatureOnSubplot(index, d.getFeature(index), d.getName(index),
                                3)
        else:
            addFeatureOnSubplot(index, d.getFeature(index), d.getName(index),
                                4)

    # plt.title(d.getName(index))

    plt.savefig('scatter_plot.png')
    plt.show()
Ejemplo n.º 4
0
def main():

    file = IOHelper().checkArg(sys.argv)
    if (len(file) < 1):
        print("Missing file")
        exit(1)

    d = Dataset()
    d.loadFile(file[0])

    d.printFeatureHeader()
    d.printAllFeature()
Ejemplo n.º 5
0
def main():

    nbInput = 0
    epoch = 30

    file = IOHelper().checkArg(sys.argv)
    if (len(file) < 2):
        print("Missing file")
        exit(1)

    featuresId = range(7, 19)

    ### train
    d = Dataset()
    d.loadFile(file[0])

    # nbInput = len(featuresId)
    X, Y = generateDataset(d, featuresId)

    X, nbInput = d.featureExpand(d, X)
    X = d.featureRescale(d, X)

    ### test
    d_test = Dataset()
    d_test.loadFile(file[1])

    X_test, Y_test = generateDataset(d_test, featuresId)

    X_test, nbInput = d_test.featureExpand(d_test, X_test)
    X_test = d_test.featureRescale(d_test, X_test)

    houseArray = d.getFeature(1, uniq=True)

    allclassifier = MultiClassifier(nbInput, houseArray)

    lr = 10.0
    oldLoss = 9e+9
    allclassifier.setLr(lr)

    # allclassifier.printInfo()

    for j in range(epoch):
        loss = allclassifier.train(X, Y)

        allLoss = loss.sum()

        if abs(allLoss) > abs(oldLoss) and lr > 0.000000001:
            lr /= 10
            print("DECREASE TO " + str(lr))
            allclassifier.setLr(lr)
        oldLoss = allLoss

        allclassifier.saveWeight()

        # y_true, y_pred = generatePrediction(allclassifier, X, Y)
        y_true, y_pred = generatePrediction(allclassifier, X_test, Y_test)

        # print(y_true)
        # print(y_pred)

        acc = accuracy_score(y_true, y_pred) * 100
        print("epoch: {0:<15.5g} Loss1: {1:<15.5g} Loss2: {2:<15.5g} Loss3: {3:<15.5g} Loss4: {4:<15.5g} LOSS: {5:<15.5g} Accuracy: {6:<g}%" \
        .format(j, loss[0], loss[1], loss[2], loss[3], allLoss, acc))
def main():

    file = IOHelper().checkArg(sys.argv)
    if (len(file) < 1):
        print("Missing file")
        exit(1)

    d = Dataset()
    d.loadFile(file[0])

    for index in range(6, 19):

        x1 = d.getFeature(index, 1, 'Gryffindor')
        x2 = d.getFeature(index, 1, 'Hufflepuff')
        x3 = d.getFeature(index, 1, 'Ravenclaw')
        x4 = d.getFeature(index, 1, 'Slytherin')

        x1s = sorted(d.getFeature(index, 1, 'Gryffindor'))
        x2s = sorted(d.getFeature(index, 1, 'Hufflepuff'))
        x3s = sorted(d.getFeature(index, 1, 'Ravenclaw'))
        x4s = sorted(d.getFeature(index, 1, 'Slytherin'))

        ax = plt.subplot(1, 1, 1)
        plt.tight_layout()
        ax.set_xlim([-10, len(x1) + 10])

        plt.scatter(np.arange(len(x1)),
                    x1,
                    c='b',
                    s=10,
                    alpha=0.3,
                    label='Gryffindor')
        plt.scatter(np.arange(len(x2)),
                    x2,
                    c='g',
                    s=10,
                    alpha=0.3,
                    label='Hufflepuff')
        plt.scatter(np.arange(len(x3)),
                    x3,
                    c='c',
                    s=10,
                    alpha=0.3,
                    label='Ravenclaw')
        plt.scatter(np.arange(len(x4)),
                    x4,
                    c='r',
                    s=10,
                    alpha=0.3,
                    label='Slytherin')

        plt.scatter(np.arange(len(x1)), x1s, c='b', s=10, alpha=0.3)
        plt.scatter(np.arange(len(x2)), x2s, c='g', s=10, alpha=0.3)
        plt.scatter(np.arange(len(x3)), x3s, c='c', s=10, alpha=0.3)
        plt.scatter(np.arange(len(x4)), x4s, c='r', s=10, alpha=0.3)

        plt.title(d.getName(index))
        plt.ylabel('Worst <---> Best')
        plt.xlabel('Evaluation')

        plt.legend()
        plt.tight_layout()
        # plt.set_xlim([-10, len(x1) + 10])
        plt.show()
Ejemplo n.º 7
0
def main():

    file = IOHelper().checkArg(sys.argv)
    if (len(file) < 1):
        print("Missing file")
        exit(1)

    d = Dataset()
    d.loadFile(file[0])

    print(d.getName(1))
    HOUSE = {}
    for house in d.getFeature(1, uniq=True):
        HOUSE[house] = {}
        HOUSE[house]['1moy'] = []
        HOUSE[house]['2min'] = []
        HOUSE[house]['3q25'] = []
        HOUSE[house]['4q50'] = []
        HOUSE[house]['5q75'] = []
        HOUSE[house]['6max'] = []
        HOUSE['name'] = []
        print(house)
        d.printFeatureHeader()
        index = 6
        while index <= 18:
            nom = d.getName(index)
            nb = d.count(d.getFeature(index, 1, house))
            moy = d.mean(d.getFeature(index, 1, house))
            std = d.standardDeviation(d.getFeature(index, 1, house), moy)
            min1 = d.min(d.getFeature(index, 1, house))
            q25, q50, q75 = d.quartile(d.getFeature(index, 1, house))
            max1 = d.max(d.getFeature(index, 1, house))

            print("{0:<40s} {1:<15.5g} {2:<15.5g} {3:<15.5g} {4:<15.5g} {5:<15.5g} {6:<15.5g} {7:<15.5g} {8:<15.5g}" \
             .format(nom, nb, std, moy, min1, q25, q50, q75, max1))
            index += 1

            HOUSE[house]['1moy'].append(moy)
            HOUSE[house]['2min'].append(min1)
            HOUSE[house]['3q25'].append(q25)
            HOUSE[house]['4q50'].append(q50)
            HOUSE[house]['5q75'].append(q75)
            HOUSE[house]['6max'].append(max1)
            HOUSE['name'].append(nom)
        print("")

    x = np.arange(len(HOUSE['Gryffindor']['1moy']))

    width = 0.035
    w = 0
    for i in sorted(HOUSE['Gryffindor']):
        # for i in sorted(HOUSE['Gryffindor'].iterkeys()):
        if w == 0:
            plt.bar(x + (width * (w + 1)),
                    HOUSE['Gryffindor'][i],
                    width,
                    color='b',
                    label='Gryffindor')
            plt.bar(x + (width * (w + 2)),
                    HOUSE['Hufflepuff'][i],
                    width,
                    color='r',
                    label='Hufflepuff')
            plt.bar(x + (width * (w + 3)),
                    HOUSE['Ravenclaw'][i],
                    width,
                    color='g',
                    label='Ravenclaw')
            plt.bar(x + (width * (w + 4)),
                    HOUSE['Slytherin'][i],
                    width,
                    color='c',
                    label='Slytherin')
        else:
            plt.bar(x + (width * (w + 1)),
                    HOUSE['Gryffindor'][i],
                    width,
                    color='b')
            plt.bar(x + (width * (w + 2)),
                    HOUSE['Hufflepuff'][i],
                    width,
                    color='r')
            plt.bar(x + (width * (w + 3)),
                    HOUSE['Ravenclaw'][i],
                    width,
                    color='g')
            plt.bar(x + (width * (w + 4)),
                    HOUSE['Slytherin'][i],
                    width,
                    color='c')
        w += 4

    plt.tight_layout()

    plt.xticks(x, HOUSE['name'], rotation='vertical')

    plt.xlabel('Cours')
    plt.ylabel('Evaluation')

    plt.legend()
    plt.show()