Exemple #1
0
import numpy as np
import pandas as pd
from scipy.io import loadmat
from preprocessing import getData
from MLP_auto import MLP as MLP_auto
from MLP import MLP

if __name__ == '__main__':
    X, y_cat = getData('data.mat')

    # m = number of feature vectors
    m = X.shape[0]
    # n = number of features
    n = X.shape[1]

    train_percent = 0.6
    X_train = X[:int(train_percent * X.shape[0]), :]
    y_train = y_cat[:int(train_percent * X.shape[0]), :]
    X_test = X[int(train_percent * X.shape[0]):, :]
    y_test = y_cat[int(train_percent * X.shape[0]):, :]

    # hidden layers array
    Layers = [42, 24, 12]
    alpha = 0.5
    max_iter = 30

    # pretraining 3 autoencoders
    model11 = MLP_auto([n, Layers[0]], ['sigmoid'])
    print("pre-training autoencoder 1")
    model11.train(X_train, X_train, alpha, 12, max_iter)
import preprocessing

cols = ['sentiment','id','date','query_string','user','text']
loc = "C:/Users/risha/Downloads/NLP Data/"
df = pd.read_csv(loc+"emoticon.csv",header = None, names=cols,encoding = "ISO-8859-1")
df = df.drop(columns=['id','date','query_string','user'])
neg = random.randint(1,650000)
pos = random.randint(800000,1500000)

neg = df.loc[neg:(neg+100000)]
pos = df.loc[pos:(pos+100000)]
df = pd.concat([neg,pos])

print("Data Import Complete")

dataset = preprocessing.getData(df)

from sklearn.feature_extraction.text import TfidfVectorizer
cv = TfidfVectorizer(max_features=1500)
X = cv.fit_transform(dataset).toarray()
y = df.iloc[:, 0].values

print("Vectorization finished")

# Training the Naive Bayes model on the Training set
import sklearn.naive_bayes as nb
classifier = nb.MultinomialNB()
classifier.fit(X, y)
print("training complete")

#adding tweet functionality
Exemple #3
0
#
# pos = twitter_samples.strings('positive_tweets.json')
# neg = twitter_samples.strings('negative_tweets.json')
# posData = preprocessing.getData(pos)
# negData = preprocessing.getData(neg)
#
# df1 = pd.DataFrame(negData,columns = ['Text'])
# df1 = df1.assign(Sent=0)
#
# df2 = pd.DataFrame(posData,columns = ['Text'])
# df2.assign(Sent=1)
#
# df = pd.concat([df1, df2, df3])

data = data_structuring.df
dt = preprocessing.getData(data_structuring.arr)
dataset = []

for list in dt:
    sentence = ""
    for words in list:
        sentence = sentence + " " + words
    dataset.append(sentence)

from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=1000)
X = cv.fit_transform(dataset).toarray()
y = data.iloc[:, 1].values

# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
Exemple #4
0
        5: lambda x, y, param: quadratic(x, y, param),
        6: lambda x, y, param: quadratic(x, y, param),
        7: lambda x, y, param: multiquadric(x, y, param),
        8: lambda x, y, param: multiquadric(x, y, param),
        9: lambda x, y, param: inverse_multiquadric(x, y, param),
        3: lambda x, y, param: sinc(x, y),
        0: lambda x, y, param: laplacean(x, y, param),
    }[i](x, y, param)


for i in range(0, 4):
    f.write("Zbior " + test[i] + ' ' + train[i] + '\n')
    f.write(
        "kernel; c/gamma; tol; Wektory podpierające; jakosc zbioru trenujacego; jakosc zbiory testowego \n"
    )
    X, y = getData(sciezka + train[i])
    X_test, y_test = getData(sciezka + test[i])
    for j in range(0, 10):
        gram = get_kernel(j, X, X, 10)
        clf = SVC(C=10, kernel='precomputed', tol=0.000001).fit(gram, y)
        print('Liczba wektorow podpierajacych: ', np.sum(clf.n_support_))
        print('Jakosc klasyfikacji zbioru trenujacego: ', clf.score(gram, y))
        gram_test = rbf_kernel(X_test, X, 10)
        print('Test gram calculated')
        print('Jakosc klasyfikacji zbioru testowego: ',
              clf.score(gram_test, y_test))
        cpredicted = clf.predict(gram_test)
        f.write(kernels[j])
        f.write(';')
        f.write(str(params[j]))
        f.write(';')
        A2 = Y_tilda = sigmoid(Z2)
        #Backpropagation
        Delta_2 = (Y_tilda - Output) * sigmoidDerivative(A2)
        Delta_1 = W2.T.dot(Delta_2) * sigmoidDerivative(A1)
        #Weight and bias updation
        W2 = W2 - alpha * np.dot(Delta_2, A1.T)
        W1 = W1 - alpha * np.dot(Delta_1, Input.T)
        b2 = b2 - alpha * np.sum(Delta_2, axis=1, keepdims=True)
        b1 = b1 - alpha * np.sum(Delta_1, axis=1, keepdims=True)
    #Completed Training
    return [W1, b1, sigmoid(W1.dot(Input) + b1)]


# %%

X, Y = getData('data.mat')
#Holdout method
train_percent = 0.7
train_size = int(train_percent * X.shape[0])
train_X = X[:train_size, :]
test_X = X[train_size:, :]
train_Y = Y[:train_size, :]
test_Y = Y[train_size:, :]
train_X = train_X.T
train_Y = train_Y.T
test_X = test_X.T
test_Y = test_Y.T

# %%

#deep layer stacked autoencoder based extreme learning machine.
Exemple #6
0
# ---------------- Import ----------------
from preprocessing import getData, boolRain
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import pandas as pd
import sys

# ---------------- Main ----------------
print("---------------- Rain-Predictor ----------------")
print("Train \t (1)\nTest\t (2)\nUse\t (3)")
menu = int(input())

if menu == 1:
    # Pre-processing of data
    x, y = getData()

    # Model
    model = tf.keras.models.Sequential()

    # Layers
    model.add(tf.keras.layers.Dense(3, input_dim=3, activation='relu'))
    model.add(tf.keras.layers.Dense(3, activation='relu'))
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

    # Complie
    model.compile(loss='mean_squared_error',
                  optimizer='adam',
                  metrics=['binary_accuracy'])

    # Train
Exemple #7
0
from preprocessing import getData
from sklearn.svm import SVR
import kernelFunctions as kf

#import matplotlib.pyplot as plt
#from matplotlib import cm
#from mpl_toolkits.mplot3d import Axes3D

sciezka = './dane/'
print('Uzywam sciezki: ' + sciezka)
sets = [['train6.csv', 'test6.csv'], ['train7.csv', 'test7.csv']]

# each entry is a list of C, gamma and degree
configs = [[1, 0.1, 2], [5, 0.2, 4], [20, 0.4, 8]]
for set_num in range(0, len(sets)):
    xlearn, rlearn = getData(sciezka + sets[set_num][0])
    xtest, rtest = getData(sciezka + sets[set_num][1])
    print(sets[set_num])
    for c_num in range(0, len(configs)):
        svr_rbf = SVR(kernel='rbf',
                      C=configs[c_num][0],
                      gamma=configs[c_num][1])
        svr_lin = SVR(kernel='linear', C=configs[c_num][0])
        svr_poly = SVR(kernel='poly',
                       C=configs[c_num][0],
                       degree=configs[c_num][2])
        r_laplacean = SVR(kernel='precomputed').fit(
            kf.laplacean(xlearn, xlearn, configs[c_num][1]),
            rlearn).predict(kf.laplacean(xtest, xlearn, configs[c_num][1]))
        r_sinc = SVR(kernel='precomputed').fit(
            kf.sinc(xlearn, xlearn, configs[c_num][0]),
Exemple #8
0
        resMoc = MOC4(c[j % 2], cores[j], tols[j % 4], train_mapped,
                      test_mapped, y_train, y_test)
        resOvo = OneVsAll4(c[j % 2], cores[j], tols[j % 4], train_mapped,
                           test_mapped, y_train, y_test)
    else:
        resMoc = MOC16(c[j % 2], cores[j], tols[j % 4], train_mapped,
                       test_mapped, y_train, y_test)
        resOvo = OneVsAll16(c[j % 2], cores[j], tols[j % 4], train_mapped,
                            test_mapped, y_train, y_test)
    writeRow(cores[j], c[j % 2], tols[j % 4], resMoc, resOvo)


f = open('results.csv', 'w')
#for each data set
kernels = [kf.laplacean, kf.sinc, kf.quadratic, kf.multiquadric]
for i in range(0, 4):
    x_train, y_train = getData(sciezka + train[i])
    x_test, y_test = getData(sciezka + test[i])
    #append header to file
    f.write(test[i])
    f.write('\n')
    writeHeader(f)
    #for each configuration
    for j in range(0, 8):
        iteration(i, j, x_train, x_test)
    for j in range(8, 12):
        iteration(i, j, kernels[j - 8](x_train, x_train, 10),
                  kernels[j - 8](x_test, x_test, 10))

f.close()
Exemple #9
0
        7: lambda x, y, param: multiquadric(x, y, param),
        8: lambda x, y, param: multiquadric(x, y, param),
        9: lambda x, y, param: inverse_multiquadric(x, y, param),
        3: lambda x, y, param: sinc(x, y, param),
        0: lambda x, y, param: laplacean(x, y, param),
    }[i](x, y, param)


print('Uzywam sciezki: ' + sciezka)
for i in range(0, 4):
    f.write("Zbior " + test[i] + ' ' + train[i] + '\n')
    f.write(
        "kernel; c; tol; gamma; wektory; jakosc zbioru trenujacego; jakosc zbiory testowego \n"
    )
    for j in range(0, 10):
        xlearn, clearn = getData(sciezka + train[i])
        xtest, ctest = getData(sciezka + test[i])

        clf = SVC(C=cs[j], kernel=kernels[j], gamma=gammas[i],
                  tol=tols[j]).fit(xlearn, clearn)

        print('Liczba wektorow podpierajacych: ', np.sum(clf.n_support_))
        print('Jakosc klasyfikacji zbioru trenujacego: ',
              clf.score(xlearn, clearn))
        print('Jakosc klasyfikacji zbioru testowego: ',
              clf.score(xtest, ctest))

        cpredicted = clf.predict(xtest)
        print('Porownanie predykcji z faktycznymi kategoriami:')
        f.write(kernels[j])
        f.write(';')