import get_y import sklearn.linear_model from sklearn import preprocessing from sklearn.linear_model import Lasso import combineData as cd import copy folder = './data/' #X = cd.loadFilesFrom(folder) X = pd.read_csv('./data/education.csv', encoding='mac_roman') X = X.set_index("Geography") cols = X.columns originalData = copy.deepcopy(X) y = pd.read_csv('./election_results.csv') X = cd.addYcol(X, y) mat = X.as_matrix() mat = list(mat) for i in range(len(mat)): for j in range(len(mat[i])): val = mat[i][j] try: mat[i][j] = float(val) except: mat[i][j] = 0.0 mat = np.array(mat) #np.random.shuffle(mat) X = mat[:, :-1] Y = mat[:, -1] #Y = Y * 10000.0
import pdb import get_y import sklearn.linear_model from sklearn import preprocessing from sklearn.linear_model import Lasso from sklearn.mixture import GaussianMixture as GM import combineData as cd import copy import matplotlib.pyplot as plt fname = './cleanedData/' X = cd.loadFilesFrom(fname) Y = pd.read_csv('./election_results.csv') X = cd.addYcol(X,Y) df = X.copy(deep=True) Y = X.iloc[:,-1].as_matrix() X = X.iloc[:,0:-1].as_matrix() temp = np.nan_to_num(X) for i in range(len(temp)): for j in range(len(temp[0])): if type(X[i,j]) == type('NaN') : temp[i,j] = 0.0 if np.isnan(temp[i,j]): temp[i,j] = 0.0