class Model: lin_reg = Ridge() xscaler = sc() yscaler = sc() poly = PolynomialFeatures(degree=3) #Function to extract from DataFrame the necessary columns def extract(self, df): y = df['Score'] df = df.drop( ['labour_id', 'Name', 'skill', 'Latitudes', 'Longitudes', 'Score'], axis=1) x = df return x, y #Function to be called after extract for preprocessing def preprocess(self, x, y): gen = x['Gender'] y = np.array(y).reshape((1, len(y))) x = self.poly.fit_transform(x) x = self.xscaler.fit_transform(x) y = np.ravel(y) y = np.reshape(np.array(y), (len(y), 1)) y = self.yscaler.fit_transform(y) x = pd.DataFrame(x) x[0] = gen dump(self.xscaler, open('xscaler+ve.pkl', 'wb')) dump(self.yscaler, open('yscaler+ve.pkl', 'wb')) dump(self.poly, open('poly.pkl', 'wb')) return x, y #Function for input preprocessting def preprocess_input(self, x): x = self.poly.transform(x) x = self.xscaler.transform(x) return x #Function to fit the model on x and y def fitmodel(self, x, y): k = self.lin_reg.fit(x, y) dump(self.lin_reg, open('model+ve.pkl', 'wb')) print(k) #Function to Check if the model is predicting as expected def predict(self, x): y_pred = self.lin_reg.predict(x) #print(x.shape) return self.yscaler.inverse_transform(y_pred)
def preprocess(self, x, type): gen = x['Gender'] xscaler = sc() if type == '+': xscaler = self.xscaler_positive elif type == '-': xscaler = self.xscaler_negative x = self.poly.transform(x) x = xscaler.transform(x) x = pd.DataFrame(x) x[0] = gen return x
def predict(self, x, type, indexes): model = Ridge() yscaler = sc() if type == '+': model = self.model_positive yscaler = self.yscaler_positive elif type == '-': model = self.model_negative yscaler = self.yscaler_negative y_pred = model.predict(x) y_pred = yscaler.inverse_transform(y_pred) idscores = {} for i in range(len(indexes)): idscores[indexes[i]] = y_pred[i][0] return idscores
def preprocess_input(self, x, type): #Now x is Dictionary of lists xs = [] indexes = {} c = 0 for key, value in x.items(): indexes[c] = key c += 1 xs.append(value) xs = np.array(xs) x = self.poly.transform(xs) xscaler = sc() if type == '+': xscaler = self.xscaler_positive elif type == '-': xscaler = self.xscaler_negative x = xscaler.transform(x) return x, indexes
x[:, 2] = le_x_2.fit_transform(x[:, 2]) ohec = ohe(categorical_features=[ 1 ]) #index of the column is to be specified for the onehot encoding x = ohec.fit_transform(x).toarray() #now we have to fit the ohec object into x = x[:, 1:] #to eliminate the dummy variable trap(like for three classes a dummy variable set of 2 is fine(third is automatically set)) #data splitting from sklearn.model_selection import train_test_split as tts x_train, x_test, y_train, y_test = tts(x, y, test_size=0.2, random_state=0) #feature scaling from sklearn.preprocessing import StandardScaler as sc sc_x = sc() x_train = sc_x.fit_transform(x_train) #standardization scaling we are doing x_test = sc_x.transform(x_test) import keras as ke from keras.models import Sequential #to initialize the ann from keras.layers import Dense #to build the layers of the ann from keras.layers import Dropout as dr #INITIALIZING THE ANN classifier = Sequential( ) #sequential object created( ann as sequence of layers) #adding the input layer and first hidden layer classifier.add( Dense(activation="relu",
def preprocess_normalize(df): df[quantitative_data] = sc().fit_transform(df[quantitative_data]) df[numerical_data] = sc().fit_transform(df[numerical_data]) return (df)
import numpy as np #Libreria númerica import pandas as pd #Libreria para analisis de datos filepath = "C:/Users/Condominios Manzano/Desktop/Machine Learning/python-ml-course-master/python-ml-course-master/datasets/iris/iris.csv" df = pd.read_csv(filepath) Y = df.iloc[:, -1] X = df.iloc[:, :-1] #Modelizar PCA desde 0 #3 metodos para encontrar los vectores propios y los valores propios #1) Matriz de variazas y covarianzas #2) Matriz de correlaciones #3) metodo singular value descomposition #Para el metodo 1 se normaliza ya que los valores estan en diferentes unidades X_std = sc().fit_transform(X) x_ = np.mean(X_std, axis=0) #1) Matriz de variazas y covarianzas cov_ = (1 / (len(X) - 1)) * ((X_std - x_).T @ (X_std - x_)) ceig_val, ceig_vec = np.linalg.eig(cov_) #2) Matriz de correlaciones corr_ = np.corrcoef(X_std.T) eig_val, eig_vec = np.linalg.eig(corr_) cor = np.corrcoef(X.T) #3) metodo singular value descomposition u, s, v = np.linalg.svd(X_std.T) # Seleccion de los componentes principales: se toman los vectores que expliquen la mayor parte de los datos sum_com = ceig_val.sum()
test_x_set = dataset.iloc[2375:, 1:125].values test_y_set = dataset.iloc[2375:, 125:128].values # Taking care of miising data : replace NaN to col average col_mean = np.nanmean(training_x_set, axis=0) inds = np.where(np.isnan(training_x_set)) training_x_set[inds] = np.take(col_mean, inds[1]) col_mean2 = np.nanmean(test_x_set, axis=0) inds = np.where(np.isnan(test_x_set)) test_x_set[inds] = np.take(col_mean2, inds[1]) # data scaling(0-1) scaled = sc() training_x_set = scaled.fit_transform(training_x_set) #%% # making learning model model = Sequential() # 입력값 == ((6*9)+(8*1)) * 2 model.add(Dense(100, input_dim = 124, activation = 'relu')) model.add(Dense(100, activation = 'relu')) model.add(Dense(100, activation = 'relu')) model.add(Dropout(0.1)) model.add(Dense(100, activation = 'relu')) model.add(Dropout(0.2)) model.add(Dense(80, activation = 'relu')) model.add(Dropout(0.2))
import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler as sc from sklearn.decomposition import PCA from scipy import linalg as alg r = pd.read_csv('bank_contacts.csv') r_std = sc().fit_transform(r) """ method to find covariance matrix manually mean_vec=np.mean(r_std,axis=0) cov_mat=(r_std-mean_vec).T.dot((r_std-mean_vec)) / (r_std.shape[0]-1) print("covariance matrix:",cov_mat) """ #by function cov_mat = np.cov(r_std.T) eig_val, eig_vec = alg.eig(cov_mat) print(eig_val) print(eig_vec) #for visually confirming eigen pairs eig_pairs = [(np.abs(eig_val[i]), eig_vec[:, i]) for i in range(len(eig_val))] for i in eig_pairs: print(i[0]) t = PCA(n_components=2) t.fit(r) print(t.explained_variance_ratio_) ##for cumulative variance #print(t.explained_variance_ratio_.sum()) u = t.fit(r_std) f = t.transform(r_std)
from keras.models import Sequential from keras.layers import Dense from keras.layers import LSTM from keras.layers import Dropout from sklearn.preprocessing import MinMaxScaler as sc import matplotlib.pyplot as plt from keras.models import model_from_json # Training training_data = pd.read_csv('AMZNtrain.csv') num_samples = training_data.shape[0] trainX = training_data.iloc[:, 1:-2] # Scaling each dimension/feature list_sc = [sc(feature_range=(0, 1)) for i in range(4)] for i in range(4): trainX.iloc[:, i] = list_sc[i].fit_transform(trainX.iloc[:, i].values.reshape( num_samples, 1)) trainX_scaled = trainX.values lookback = 60 X_train = [] y_train = [] for i in range(lookback, num_samples): # Grab all features, i.e open, high, low, and close X_train.append(trainX_scaled[i - lookback:i]) # I want to predict the 'close' price, i.e. the last column
@author: Thineth """ import pandas as pd import matplotlib.pyplot as plt import numpy as np dataset = pd.read_csv('Position_salaries.csv') X = dataset.iloc[:, 2:3].values y = dataset.iloc[:, 2].values from sklearn.preprocessing import StandardScaler as sc sc_X = sc() sc_y = sc() X = sc_X.fit_transform(X) y = sc_y.fit_transform(y) from sklearn.svm import SVR regressor = SVR(kernel='rbf') regressor.fit(X, y) y_pred = regressor.predict(sc_X.fit_transform([[6.5]])) y_pred = sc_y.inverse_transform(y_pred) plt.scatter(X, y, color='red')
from sklearn.preprocessing import Imputer, LabelEncoder, OneHotEncoder, StandardScaler as sc from sklearn.cross_validation import train_test_split #Importing Datasets dataset = pd.read_csv('datasets/Data.csv') X = dataset.iloc[:, :-1].values Y = dataset.iloc[:,3].values # Handle Missing Data imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0) imputer = imputer.fit(X[:,1:3]) X[:,1:3] = imputer.transform(X[:,1:3]) #Categorical Data handling labelEncoder_X = LabelEncoder() X[:,0] = labelEncoder_X.fit_transform(X[:,0]) oneHotEncoder = OneHotEncoder(categorical_features = [0]) X = oneHotEncoder.fit_transform(X).toarray() labelEncoder_Y = LabelEncoder() Y = labelEncoder_Y.fit_transform(Y) # Splitting the dataset into the Training set and Test set X_train, X_test, Y_Train, Y_Test = train_test_split(X,Y, test_size = 0.2, random_state = 0) # Feature Scalings sc_X = sc() X_train = sc_X.fit_transform(X_train) X_test = sc_X.transform(X_test)
# preprocess time tempTime = cleanData['Time'].str.split(':', expand=True).pop(0) tempTime = pd.DataFrame(tempTime, dtype=np.float) cleanData = cleanData.drop(columns=['Time']) cleanData.insert(3, 'Time', tempTime) # change type to numpy array data = cleanData.values c = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 13, 14, 15, 16, 17, 18] # get features and labels label = data[:, 12] data = data[:, c] # scale data sc = sc() data = sc.fit_transform(data) print(data) pca = pca(n_components=10) fit = pca.fit_transform(data) data = pd.DataFrame(data=fit) # data after processing by PCA # set up the model logisticRegressionInstance = lr() logisticRegressionInstance.fit(data, label.astype('int')) print(label[182100:182114]) print(logisticRegressionInstance.get_params()) print(logisticRegressionInstance.predict(data[182100:182114])) print(logisticRegressionInstance.predict_proba(data[182100:182114])[:, 1])