예제 #1
0
class Model:
    lin_reg = Ridge()
    xscaler = sc()
    yscaler = sc()
    poly = PolynomialFeatures(degree=3)

    #Function to extract from DataFrame the necessary columns
    def extract(self, df):
        y = df['Score']
        df = df.drop(
            ['labour_id', 'Name', 'skill', 'Latitudes', 'Longitudes', 'Score'],
            axis=1)
        x = df
        return x, y

    #Function to be called after extract for preprocessing
    def preprocess(self, x, y):
        gen = x['Gender']
        y = np.array(y).reshape((1, len(y)))
        x = self.poly.fit_transform(x)
        x = self.xscaler.fit_transform(x)
        y = np.ravel(y)
        y = np.reshape(np.array(y), (len(y), 1))
        y = self.yscaler.fit_transform(y)
        x = pd.DataFrame(x)
        x[0] = gen
        dump(self.xscaler, open('xscaler+ve.pkl', 'wb'))
        dump(self.yscaler, open('yscaler+ve.pkl', 'wb'))
        dump(self.poly, open('poly.pkl', 'wb'))
        return x, y

    #Function for input preprocessting
    def preprocess_input(self, x):
        x = self.poly.transform(x)
        x = self.xscaler.transform(x)
        return x

    #Function to fit the model on x and y
    def fitmodel(self, x, y):
        k = self.lin_reg.fit(x, y)
        dump(self.lin_reg, open('model+ve.pkl', 'wb'))
        print(k)

    #Function to Check if the model is predicting as expected
    def predict(self, x):
        y_pred = self.lin_reg.predict(x)
        #print(x.shape)
        return self.yscaler.inverse_transform(y_pred)
예제 #2
0
    def preprocess(self, x, type):
        gen = x['Gender']
        xscaler = sc()
        if type == '+':
            xscaler = self.xscaler_positive
        elif type == '-':
            xscaler = self.xscaler_negative

        x = self.poly.transform(x)
        x = xscaler.transform(x)
        x = pd.DataFrame(x)
        x[0] = gen
        return x
예제 #3
0
    def predict(self, x, type, indexes):
        model = Ridge()
        yscaler = sc()
        if type == '+':
            model = self.model_positive
            yscaler = self.yscaler_positive
        elif type == '-':
            model = self.model_negative
            yscaler = self.yscaler_negative

        y_pred = model.predict(x)
        y_pred = yscaler.inverse_transform(y_pred)
        idscores = {}
        for i in range(len(indexes)):
            idscores[indexes[i]] = y_pred[i][0]
        return idscores
예제 #4
0
    def preprocess_input(self, x, type):
        #Now x is Dictionary of lists
        xs = []
        indexes = {}
        c = 0
        for key, value in x.items():
            indexes[c] = key
            c += 1
            xs.append(value)

        xs = np.array(xs)
        x = self.poly.transform(xs)
        xscaler = sc()
        if type == '+':
            xscaler = self.xscaler_positive
        elif type == '-':
            xscaler = self.xscaler_negative
        x = xscaler.transform(x)
        return x, indexes
예제 #5
0
x[:, 2] = le_x_2.fit_transform(x[:, 2])
ohec = ohe(categorical_features=[
    1
])  #index of the column is to be specified for the onehot encoding
x = ohec.fit_transform(x).toarray()
#now we have to fit the ohec object into
x = x[:,
      1:]  #to eliminate the dummy variable trap(like for three classes a dummy variable set of 2 is fine(third is automatically set))

#data splitting
from sklearn.model_selection import train_test_split as tts
x_train, x_test, y_train, y_test = tts(x, y, test_size=0.2, random_state=0)

#feature scaling
from sklearn.preprocessing import StandardScaler as sc
sc_x = sc()
x_train = sc_x.fit_transform(x_train)  #standardization scaling we are doing
x_test = sc_x.transform(x_test)

import keras as ke
from keras.models import Sequential  #to initialize the ann
from keras.layers import Dense  #to build the layers of the ann
from keras.layers import Dropout as dr

#INITIALIZING THE ANN
classifier = Sequential(
)  #sequential object created( ann as sequence of layers)

#adding the input layer and first hidden layer
classifier.add(
    Dense(activation="relu",
예제 #6
0
def preprocess_normalize(df):
    df[quantitative_data] = sc().fit_transform(df[quantitative_data])
    df[numerical_data] = sc().fit_transform(df[numerical_data])
    return (df)
예제 #7
0
import numpy as np  #Libreria númerica
import pandas as pd  #Libreria para analisis de datos

filepath = "C:/Users/Condominios Manzano/Desktop/Machine Learning/python-ml-course-master/python-ml-course-master/datasets/iris/iris.csv"
df = pd.read_csv(filepath)
Y = df.iloc[:, -1]
X = df.iloc[:, :-1]
#Modelizar PCA desde 0
#3 metodos para encontrar los vectores propios y los valores propios
#1) Matriz de variazas y covarianzas
#2) Matriz de correlaciones
#3) metodo singular value descomposition

#Para el metodo 1 se normaliza ya que los valores estan en diferentes unidades

X_std = sc().fit_transform(X)
x_ = np.mean(X_std, axis=0)
#1) Matriz de variazas y covarianzas
cov_ = (1 / (len(X) - 1)) * ((X_std - x_).T @ (X_std - x_))
ceig_val, ceig_vec = np.linalg.eig(cov_)

#2) Matriz de correlaciones
corr_ = np.corrcoef(X_std.T)
eig_val, eig_vec = np.linalg.eig(corr_)
cor = np.corrcoef(X.T)

#3) metodo singular value descomposition
u, s, v = np.linalg.svd(X_std.T)

# Seleccion de los componentes principales: se toman los vectores que expliquen la mayor parte de los datos
sum_com = ceig_val.sum()
예제 #8
0
test_x_set = dataset.iloc[2375:, 1:125].values
test_y_set = dataset.iloc[2375:, 125:128].values


# Taking care of miising data : replace NaN to col average
col_mean = np.nanmean(training_x_set, axis=0)
inds = np.where(np.isnan(training_x_set))
training_x_set[inds] = np.take(col_mean, inds[1])

col_mean2 = np.nanmean(test_x_set, axis=0)
inds = np.where(np.isnan(test_x_set))
test_x_set[inds] = np.take(col_mean2, inds[1])


# data scaling(0-1)
scaled = sc()
training_x_set = scaled.fit_transform(training_x_set)

#%%

# making learning model
model = Sequential()
# 입력값 == ((6*9)+(8*1)) * 2
model.add(Dense(100, input_dim = 124, activation = 'relu'))
model.add(Dense(100, activation = 'relu'))
model.add(Dense(100, activation = 'relu'))
model.add(Dropout(0.1))
model.add(Dense(100, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(80, activation = 'relu'))
model.add(Dropout(0.2))
예제 #9
0
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler as sc
from sklearn.decomposition import PCA
from scipy import linalg as alg
r = pd.read_csv('bank_contacts.csv')
r_std = sc().fit_transform(r)
"""
method to find covariance matrix manually
mean_vec=np.mean(r_std,axis=0)
cov_mat=(r_std-mean_vec).T.dot((r_std-mean_vec)) / (r_std.shape[0]-1)
print("covariance matrix:",cov_mat)
"""
#by function
cov_mat = np.cov(r_std.T)
eig_val, eig_vec = alg.eig(cov_mat)
print(eig_val)
print(eig_vec)
#for visually confirming eigen pairs
eig_pairs = [(np.abs(eig_val[i]), eig_vec[:, i]) for i in range(len(eig_val))]
for i in eig_pairs:
    print(i[0])

t = PCA(n_components=2)
t.fit(r)
print(t.explained_variance_ratio_)
##for cumulative variance
#print(t.explained_variance_ratio_.sum())
u = t.fit(r_std)
f = t.transform(r_std)
예제 #10
0
파일: lstm.py 프로젝트: abernal2/AMZ_pred
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from sklearn.preprocessing import MinMaxScaler as sc
import matplotlib.pyplot as plt
from keras.models import model_from_json

# Training
training_data = pd.read_csv('AMZNtrain.csv')
num_samples = training_data.shape[0]

trainX = training_data.iloc[:, 1:-2]

# Scaling each dimension/feature
list_sc = [sc(feature_range=(0, 1)) for i in range(4)]

for i in range(4):
    trainX.iloc[:,
                i] = list_sc[i].fit_transform(trainX.iloc[:, i].values.reshape(
                    num_samples, 1))

trainX_scaled = trainX.values

lookback = 60
X_train = []
y_train = []
for i in range(lookback, num_samples):
    # Grab all features, i.e open, high, low, and close
    X_train.append(trainX_scaled[i - lookback:i])
    # I want to predict the 'close' price, i.e. the last column
예제 #11
0
@author: Thineth
"""

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

dataset = pd.read_csv('Position_salaries.csv')

X = dataset.iloc[:, 2:3].values
y = dataset.iloc[:, 2].values

from sklearn.preprocessing import StandardScaler as sc

sc_X = sc()
sc_y = sc()

X = sc_X.fit_transform(X)
y = sc_y.fit_transform(y)

from sklearn.svm import SVR

regressor = SVR(kernel='rbf')

regressor.fit(X, y)

y_pred = regressor.predict(sc_X.fit_transform([[6.5]]))
y_pred = sc_y.inverse_transform(y_pred)

plt.scatter(X, y, color='red')
예제 #12
0
from sklearn.preprocessing import Imputer, LabelEncoder, OneHotEncoder, StandardScaler as sc
from sklearn.cross_validation import train_test_split

#Importing Datasets
dataset = pd.read_csv('datasets/Data.csv')
X = dataset.iloc[:, :-1].values
Y = dataset.iloc[:,3].values

# Handle Missing Data
imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0)
imputer = imputer.fit(X[:,1:3])
X[:,1:3] = imputer.transform(X[:,1:3])


#Categorical Data handling
labelEncoder_X = LabelEncoder()
X[:,0] = labelEncoder_X.fit_transform(X[:,0])

oneHotEncoder = OneHotEncoder(categorical_features = [0])
X = oneHotEncoder.fit_transform(X).toarray()

labelEncoder_Y = LabelEncoder()
Y = labelEncoder_Y.fit_transform(Y)

# Splitting the dataset into the Training set and Test set
X_train, X_test, Y_Train, Y_Test = train_test_split(X,Y, test_size = 0.2, random_state = 0)

# Feature Scalings
sc_X  = sc()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
예제 #13
0
# preprocess time
tempTime = cleanData['Time'].str.split(':', expand=True).pop(0)
tempTime = pd.DataFrame(tempTime, dtype=np.float)
cleanData = cleanData.drop(columns=['Time'])
cleanData.insert(3, 'Time', tempTime)

# change type to numpy array
data = cleanData.values
c = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 13, 14, 15, 16, 17, 18]

# get features and labels
label = data[:, 12]
data = data[:, c]

# scale data
sc = sc()
data = sc.fit_transform(data)
print(data)
pca = pca(n_components=10)
fit = pca.fit_transform(data)
data = pd.DataFrame(data=fit)  # data after processing by PCA
# set up the model
logisticRegressionInstance = lr()
logisticRegressionInstance.fit(data, label.astype('int'))

print(label[182100:182114])
print(logisticRegressionInstance.get_params())
print(logisticRegressionInstance.predict(data[182100:182114]))
print(logisticRegressionInstance.predict_proba(data[182100:182114])[:, 1])