def formatFeatures(): data = [] # Isolating feature House y = [] for _ in range(lenFeatures): data.append([]) for row in rawdata: if ml.isFormatted(row): if row[1] == 'Gryffindor': y.append(1.0) elif row[1] == 'Ravenclaw': y.append(2.0) elif row[1] == 'Slytherin': y.append(3.0) elif row[1] == 'Hufflepuff': y.append(4.0) for i in range(lenFeatures): data[i].append(float(row[i + indexFeatures])) # Normalize for i in range(lenFeatures): minV, maxV = ml.getMinMax(data[i]) data[i] = ml.normalizeData(data[i], minV, maxV) return data, y
def formatFeatures(): data = [] for _ in range(lenFeatures): data.append([]) for row in rawdata: for i in range(lenFeatures): if row[i + indexFeatures] == '': data[i].append(0.0) else: data[i].append(float(row[i + indexFeatures])) # Normalize for i in range(lenFeatures): minV, maxV = ml.getMinMax(data[i]) data[i] = ml.normalizeData(data[i], minV, maxV) return data
marks = [] # Get data for i in range(lenFeatures + 1): marks.append([]) for row in rawdata: if ml.isFormatted(row): marks[lenFeatures].append(row[1]) for i in range(lenFeatures): marks[i].append(float(row[i + indexFeatures])) # Normalize for i in range(lenFeatures): minV, maxV = ml.getMinMax(marks[i]) marks[i] = ml.normalizeData(marks[i], minV, maxV) # Prepare data for pair plot pairplot = [] for i in range(len(marks[0])): tmp = [] for j in range(lenFeatures + 1): tmp.append(marks[j][i]) pairplot.append(tmp) # Plot pairplot = pd.DataFrame(pairplot, columns=features) sns_plot = sns.pairplot(pairplot, size=2.5, hue=features[lenFeatures]) sns_plot.savefig("pair_plot.png")
if ml.isFormatted(row): tmp = gryf if row[1] == 'Ravenclaw': tmp = raven elif row[1] == 'Slytherin': tmp = slyth elif row[1] == 'Hufflepuff': tmp = huffle for j in range(lenFeatures): tmp[j].append(row[j + indexFeatures]) # Get all stats for i, house in enumerate(houses): for j, row in enumerate(house): house[j] = ml.formatData(row) minV, maxV = ml.getMinMax(house[j]) house[j] = ml.normalizeData(house[j], minV, maxV) mean = ml.getMean(house[j]) housesStd[i][j] = ml.getStd(house[j], mean) stdMean[j] += housesStd[i][j] for i, mean in enumerate(stdMean): stdMean[i] /= lenHouses # Get Std for i, house in enumerate(housesStd): for j, row in enumerate(house): featuresStd[j] += (row - stdMean[j]) * (row - stdMean[j]) for i, std in enumerate(featuresStd): featuresStd[i] /= lenHouses - 1 featuresStd[i] = featuresStd[i]**0.5