Ejemplo n.º 1
0
def formatFeatures():
    data = []
    # Isolating feature House
    y = []
    for _ in range(lenFeatures):
        data.append([])
    for row in rawdata:
        if ml.isFormatted(row):
            if row[1] == 'Gryffindor':
                y.append(1.0)
            elif row[1] == 'Ravenclaw':
                y.append(2.0)
            elif row[1] == 'Slytherin':
                y.append(3.0)
            elif row[1] == 'Hufflepuff':
                y.append(4.0)
            for i in range(lenFeatures):
                data[i].append(float(row[i + indexFeatures]))

    # Normalize
    for i in range(lenFeatures):
        minV, maxV = ml.getMinMax(data[i])
        data[i] = ml.normalizeData(data[i], minV, maxV)

    return data, y
Ejemplo n.º 2
0
def formatFeatures():
    data = []
    for _ in range(lenFeatures):
        data.append([])
    for row in rawdata:
        for i in range(lenFeatures):
            if row[i + indexFeatures] == '':
                data[i].append(0.0)
            else:
                data[i].append(float(row[i + indexFeatures]))

    # Normalize
    for i in range(lenFeatures):
        minV, maxV = ml.getMinMax(data[i])
        data[i] = ml.normalizeData(data[i], minV, maxV)

    return data
Ejemplo n.º 3
0
def predict():
    f = open('assets/houses.csv', 'w')
    f.write('Index,Hogwarts House\n')
    distribution = [[], [], [], []]
    for i, studMarks in enumerate(X):
        probs = []
        for _, weights in enumerate(housesWeights):
            z = np.dot(studMarks, weights)
            probs.append(sigmoid(z))
            _, maxV = ml.getMinMax(probs)
            house = ''
            for j in range(len(probs)):
                if probs[j] == maxV:
                    house = houses[j]
        if house == 'Gryffindor':
            distribution[0].append('Gryffindor')
        elif house == 'Ravenclaw':
            distribution[1].append('Ravenclaw')
        elif house == 'Slytherin':
            distribution[2].append('Slytherin')
        elif house == 'Hufflepuff':
            distribution[3].append('Hufflepuff')
        f.write(str(i) + ',' + house + '\n')
    return distribution
Ejemplo n.º 4
0
marks = []

# Get data
for i in range(lenFeatures + 1):
    marks.append([])

for row in rawdata:
    if ml.isFormatted(row):
        marks[lenFeatures].append(row[1])
        for i in range(lenFeatures):
            marks[i].append(float(row[i + indexFeatures]))

# Normalize
for i in range(lenFeatures):
    minV, maxV = ml.getMinMax(marks[i])
    marks[i] = ml.normalizeData(marks[i], minV, maxV)

# Prepare data for pair plot
pairplot = []
for i in range(len(marks[0])):
    tmp = []
    for j in range(lenFeatures + 1):
        tmp.append(marks[j][i])
    pairplot.append(tmp)

# Plot
pairplot = pd.DataFrame(pairplot, columns=features)
sns_plot = sns.pairplot(pairplot, size=2.5, hue=features[lenFeatures])
sns_plot.savefig("pair_plot.png")
Ejemplo n.º 5
0
for row in rawdata:
    if ml.isFormatted(row):
        tmp = gryf
        if row[1] == 'Ravenclaw':
            tmp = raven
        elif row[1] == 'Slytherin':
            tmp = slyth
        elif row[1] == 'Hufflepuff':
            tmp = huffle
        for j in range(lenFeatures):
            tmp[j].append(row[j + indexFeatures])
# Get all stats
for i, house in enumerate(houses):
    for j, row in enumerate(house):
        house[j] = ml.formatData(row)
        minV, maxV = ml.getMinMax(house[j])
        house[j] = ml.normalizeData(house[j], minV, maxV)
        mean = ml.getMean(house[j])
        housesStd[i][j] = ml.getStd(house[j], mean)
        stdMean[j] += housesStd[i][j]

for i, mean in enumerate(stdMean):
    stdMean[i] /= lenHouses

# Get Std
for i, house in enumerate(housesStd):
    for j, row in enumerate(house):
        featuresStd[j] += (row - stdMean[j]) * (row - stdMean[j])

for i, std in enumerate(featuresStd):
    featuresStd[i] /= lenHouses - 1