Exemple #1
0
def formatFeatures():
    data = []
    # Isolating feature House
    y = []
    for _ in range(lenFeatures):
        data.append([])
    for row in rawdata:
        if ml.isFormatted(row):
            if row[1] == 'Gryffindor':
                y.append(1.0)
            elif row[1] == 'Ravenclaw':
                y.append(2.0)
            elif row[1] == 'Slytherin':
                y.append(3.0)
            elif row[1] == 'Hufflepuff':
                y.append(4.0)
            for i in range(lenFeatures):
                data[i].append(float(row[i + indexFeatures]))

    # Normalize
    for i in range(lenFeatures):
        minV, maxV = ml.getMinMax(data[i])
        data[i] = ml.normalizeData(data[i], minV, maxV)

    return data, y
Exemple #2
0
def formatFeatures():
    data = []
    for _ in range(lenFeatures):
        data.append([])
    for row in rawdata:
        for i in range(lenFeatures):
            if row[i + indexFeatures] == '':
                data[i].append(0.0)
            else:
                data[i].append(float(row[i + indexFeatures]))

    # Normalize
    for i in range(lenFeatures):
        minV, maxV = ml.getMinMax(data[i])
        data[i] = ml.normalizeData(data[i], minV, maxV)

    return data
Exemple #3
0
marks = []

# Get data
for i in range(lenFeatures + 1):
    marks.append([])

for row in rawdata:
    if ml.isFormatted(row):
        marks[lenFeatures].append(row[1])
        for i in range(lenFeatures):
            marks[i].append(float(row[i + indexFeatures]))

# Normalize
for i in range(lenFeatures):
    minV, maxV = ml.getMinMax(marks[i])
    marks[i] = ml.normalizeData(marks[i], minV, maxV)

# Prepare data for pair plot
pairplot = []
for i in range(len(marks[0])):
    tmp = []
    for j in range(lenFeatures + 1):
        tmp.append(marks[j][i])
    pairplot.append(tmp)

# Plot
pairplot = pd.DataFrame(pairplot, columns=features)
sns_plot = sns.pairplot(pairplot, size=2.5, hue=features[lenFeatures])
sns_plot.savefig("pair_plot.png")
Exemple #4
0
    if ml.isFormatted(row):
        tmp = gryf
        if row[1] == 'Ravenclaw':
            tmp = raven
        elif row[1] == 'Slytherin':
            tmp = slyth
        elif row[1] == 'Hufflepuff':
            tmp = huffle
        for j in range(lenFeatures):
            tmp[j].append(row[j + indexFeatures])
# Get all stats
for i, house in enumerate(houses):
    for j, row in enumerate(house):
        house[j] = ml.formatData(row)
        minV, maxV = ml.getMinMax(house[j])
        house[j] = ml.normalizeData(house[j], minV, maxV)
        mean = ml.getMean(house[j])
        housesStd[i][j] = ml.getStd(house[j], mean)
        stdMean[j] += housesStd[i][j]

for i, mean in enumerate(stdMean):
    stdMean[i] /= lenHouses

# Get Std
for i, house in enumerate(housesStd):
    for j, row in enumerate(house):
        featuresStd[j] += (row - stdMean[j]) * (row - stdMean[j])

for i, std in enumerate(featuresStd):
    featuresStd[i] /= lenHouses - 1
    featuresStd[i] = featuresStd[i]**0.5