def main(): st.set_page_config(layout='wide') state = _get_state() placeholder = st.empty() if state.degiro is None: with placeholder.form('2FA'): username = st.text_input('Username', '') password = st.text_input('Password', '', type='password') totp = st.text_input('2FA - Leave empty if not needed', '') if st.form_submit_button('Submit'): state.degiro = degiroapi.DeGiro() try: state.degiro.login(username, password, totp) except Exception: st.warning('Wrong credentials') state.degiro = None st.stop() if state.degiro and state.products is None: placeholder.empty() process.get_data(state) if state.products is not None: show_page(state) state.sync()
def using_course_code(): Xtrain, Ytrain, Xtest, Ytest = get_data() D = Xtrain.shape[1] K = len(set(Ytrain) | set(Ytest)) W = np.random.randn(D, K) b = np.zeros(K) # convert to indicator Ytrain_ind = y2indicator(Ytrain, K) Ytest_ind = y2indicator(Ytest, K) train_costs = [] test_costs = [] learning_rate = 0.001 for i in range(10000): pYtrain = forward(Xtrain, W, b) pYtest = forward(Xtest, W, b) ctrain = cross_entropy(Ytrain_ind, pYtrain) ctest = cross_entropy(Ytest_ind, pYtest) train_costs.append(ctrain) test_costs.append(ctest) # gradient descent W -= learning_rate * Xtrain.T.dot(pYtrain - Ytrain_ind) b -= learning_rate * (pYtrain - Ytrain_ind).sum(axis=0) if i % 1000 == 0: print(i, ctrain, ctest) print("Final train classification_rate:", classification_rate(Ytrain, predict(pYtrain))) print("Final test classification_rate:", classification_rate(Ytest, predict(pYtest)))
def _grid_search(hyperparameters): path = '/Users/rngentry/machinelearning/machine_learning_examples/ann_logistic_extra/ecommerce_data.csv' # Get and shuffle ecommerce data Xtrain, Ytrain, Xtest, Ytest = get_data(path) Xtrain, Ytrain = shuffle(Xtrain, Ytrain) Xtest, Ytest = shuffle(Xtest, Ytest) values = [] keys = [] for k, v in hyperparameters: keys.append(k) values.append(v) combinations = list(itertools.product(*v)) best_params = {} best_accuracy = 0 for c in combinations: # Create the classifier model params = dict(zip(keys, c)) print(params) model = MLPClassifier(**params) model.fit(Xtrain, Ytrain) # Train and test accuracy train_accuracy = model.score(Xtrain, Ytrain) test_accuracy = model.score(Xtest, Ytest) if test_accuracy > best_accuracy: best_accuracy = test_accuracy best_params = params print('Current best accuracy', best_accuracy) print('Current best params', params) print('The best accuaracy was', best_accuracy) print('With these parameters', best_params)
def main(): X, Y = get_data() D = X.shape[1] M = 16 K = 1 W1 = np.random.randn(D, M) B1 = np.random.randn(M) W2 = np.random.randn(M, K) B2 = np.random.randn(K) train_size = .85 epochs = batch_size = 50000 X_train = X[:(int)(X.shape[0] * train_size),:] X_test = X[(int)(X.shape[0] * train_size):,:] Y_train = Y[:(int)(Y.shape[0] * train_size)] Y_test = Y[(int)(Y.shape[0] * train_size):] X_batch = np.split(X_train, batch_size) Y_batch = np.split(Y_train, batch_size) learning_rate = 1e-5 losses = [] rates = [] for epoch in range(epochs): X = X_batch[epoch] Y = Y_batch[epoch] YP, Z1 = feedforward(X, W1, W2, B1, B2) Z2 = YP l = loss(YP, Y) losses.append(-l) LP = np.rint(YP) r = class_rate(LP, Y) rates.append(r) if(epoch % 1000 == 0): print("loss: {} class rate: {}".format(l, r)) W2 += learning_rate * gradient_w2(YP, Y, Z2, Z1) B2 += learning_rate * gradient_b2(YP, Y, Z2) W1 += learning_rate * gradient_w1(YP, Y, Z2, W2, Z1, X) B1 += learning_rate * gradient_b1(YP, Y, Z2, W2, Z1) plt.plot(losses) plt.show() plt.plot(rates) plt.show() Y, Z = feedforward(X_test, W1, W2, B1, B2) r = class_rate(np.rint(Y), Y_test) print("test-set class rate: ",r) np.savez('matrix.npz', W2 = W2, B2 = B2, W1 = W1, B1 = B1)
def main(): data_file = "/Users/rngentry/machinelearning/machine_learning_examples/ann_logistic_extra/ecommerce_data.csv" Xtrain, Ytrain, Xtest, Ytest = get_data(data_file) D = Xtrain.shape[1] K = len(set(Ytrain) | set(Ytest)) M = 5 print(Xtrain[0:4, :]) Ytrain_ind = y2indicator(Ytrain, K) Ytest_ind = y2indicator(Ytest, K) W1 = np.random.randn(D, M) b1 = np.zeros(M) W2 = np.random.randn(M, K) b2 = np.zeros(K) traincosts = [] testcosts = [] learningrate = 0.001 for i in range(10000): A1, Ytrain_pred = forward(Xtrain, W1, b1, W2, b2) _, Ytest_pred = forward(Xtest, W1, b1, W2, b2) ctrain = cross_entropy(Ytrain_ind, Ytrain_pred) ctest = cross_entropy(Ytest_ind, Ytest_pred) traincosts.append(ctrain) testcosts.append(ctest) if i % 1000 == 0: print(i, ctrain, ctest) W2 -= learningrate * derivative_wrt_w2(Ytrain_ind, Ytrain_pred, A1) b2 -= learningrate * derivative_wrt_b2(Ytrain_ind, Ytrain_pred) W1 -= learningrate * derivative_wrt_w1(Xtrain, W2, A1, Ytrain_ind, Ytrain_pred) b1 -= learningrate * derivative_wrt_b1(Ytrain_ind, Ytrain_pred, W2, A1) # classification rate Ptrain = np.argmax(Ytrain_pred, axis=1) Ptest = np.argmax(Ytest_pred, axis=1) crtrain = classification_rate(Ytrain, Ptrain) crtest = classification_rate(Ytest, Ptest) print("Final train classification_rate:", crtrain) print("Final test classification_rate:", crtest) legend1, = plt.plot(traincosts, label='train cost') legend2, = plt.plot(testcosts, label='test cost') plt.legend([legend1, legend2]) plt.show()
def main(): data_file = "/Users/rngentry/machinelearning/machine_learning_examples/ann_logistic_extra/ecommerce_data.csv" # Get data Xtrain, Ytrain, Xtest, Ytest = get_data(data_file) D = Xtrain.shape[1] K = len(set(Ytrain) | set(Ytest)) # convert to indicator matrices Ytrain_ind = y2indicator(Ytrain, K) Ytest_ind = y2indicator(Ytest, K) # initialize weights and biases W = np.random.randn(D, K) b = np.zeros(K) traincosts = [] testcosts = [] alpha = 1e-3 for i in range(10000): # calculate predicted Y given X Ytrain_pred = forward(Xtrain, W, b) Ytest_pred = forward(Xtest, W, b) # Calculate cost and classification rate cr_train = classification_rate(Ytrain_ind, Ytrain_pred) cr_test = classification_rate(Ytest_ind, Ytest_pred) traincost = cross_entropy(Ytrain_ind, Ytrain_pred) testcost = cross_entropy(Ytest_ind, Ytest_pred) traincosts.append(traincost) testcosts.append(testcost) W -= alpha * Xtrain.T.dot(Ytrain_pred - Ytrain_ind) b -= alpha * (Ytrain_pred - Ytrain_ind).sum(axis=0) if i % 1000 == 0: print(i, traincost, testcost) print("Final train classification_rate:", classification_rate(Ytrain, predict(Ytrain_pred))) print("Final test classification_rate:", classification_rate(Ytest, predict(Ytest_pred))) legend1, = plt.plot(traincosts, label='train cost') legend2, = plt.plot(testcosts, label='test cost') plt.legend([legend1, legend2]) plt.show()
def get_some_batch(): "Batch 하나 가져오기" data_path = "dataset/" file_name = "train." txt_en, train_en = get_data(file_path=data_path + file_name + 'en', field_name='en') train_loader = data.Iterator( train_en, batch_size=3, device=None, # if using GPU, type "cuda" repeat=False) for batch in train_loader: break a = batch.en return a[1]
def main(): X, Y = get_data() X, Y = shuffle(X, Y) Y = Y.astype(np.int32) # conver y to int32 D = X.shape[1] K = len(set(Y)) M = 5 Xtrain = X[:-100] Ytrain = Y[:-100] T_train = y2indicator(Ytrain, K) Xtest = X[-100:] Ytest = Y[-100:] T_test = y2indicator(Ytest, K) W1 = np.random.randn(D, M) b1 = np.random.randn(M) W2 = np.random.randn(M, K) b2 = np.random.randn(K) learning_rate = 0.001 train_costs = [] for epoch in range(10000): output, hidden = forward(Xtrain, W1, b1, W2, b2) if epoch % 100 == 0: c_train = cost(T_train, output) P = np.argmax(output, axis=1) r = score(Ytrain, P) print( f'epoch #: {epoch} Train Cost: {c_train} score: {r}') train_costs.append(c_train) W2 += learning_rate * dirv_W2(hidden, T_train, output) b2 += learning_rate * dirv_b2(T_train, output) W1 += learning_rate * dirv_W1(Xtrain, hidden, T_train, output, W2) b1 += learning_rate * dirv_b1(hidden, T_train, output, W2) plt.plot(train_costs) plt.show() output, _ = forward(Xtest, W1, b1, W2, b2) yhat = np.argmax(output, axis=1) print(f'Test Score: {score(Ytest,yhat)}')
def main(): filepath = sys.argv[1] X, Y, _, _ = get_data(filepath) # randomly initialize weights M = 5 D = X.shape[1] K = len(set(Y)) W1 = np.random.randn(D, M) b1 = np.zeros(M) W2 = np.random.randn(M, K) b2 = np.zeros(K) # predicted Y Y_pred = forward(X, W1, b1, W2, b2) print(Y_pred[:10, :]) y_pred = np.argmax(Y_pred, axis=1) # get classification rate correct = classification_rate(y_pred, Y) print('The classification rate is:', correct)
# # the notes for this class can be found at: # https://deeplearningcourses.com/c/data-science-deep-learning-in-python # https://www.udemy.com/data-science-deep-learning-in-python from __future__ import print_function, division from builtins import range # Note: you may need to update your version of future # sudo pip install -U future import sys sys.path.append('../ann_logistic_extra') from process import get_data from sklearn.neural_network import MLPClassifier from sklearn.utils import shuffle # get the data Xtrain, Ytrain, Xtest, Ytest = get_data() # create the neural network model = MLPClassifier(hidden_layer_sizes=(20, 20), max_iter=2000) # train the neural network model.fit(Xtrain, Ytrain) # print the train and test accuracy train_accuracy = model.score(Xtrain, Ytrain) test_accuracy = model.score(Xtest, Ytest) print("train accuracy:", train_accuracy, "test accuracy:", test_accuracy)
import numpy as np import matplotlib.pyplot as plt from sklearn.utils import shuffle from process import get_data X, T = get_data() # get and set sizes N, D = X.shape M = 5 #3 I tried 3 at first, despite ann_predict using 5. Performs better with 5. # He said it is complicated, picking the correct number of units, but it likely # has some proportional relationship with how many features (D) and classes (K) there are K = len(set(T)) # one-hot encoded targets Thot = np.zeros((N, K)) Thot[np.arange(N), T.astype(np.int32)] = 1 # train and test set split X, T, Thot = shuffle(X, T, Thot) # I think in the future I'll just use the one-hot version # then argmax both the T and Y matrices for prediction rate calculation # lazy programmer does them seperately like this, for reference Ntest = int(.2 * N) # 20% of the dataset will be used to test Xtrain = X[:-Ntest, :] Ttrain = T[:-Ntest] THOTtrain = Thot[:-Ntest, :] Xtest = X[-Ntest:, :] Ttest = T[-Ntest:]
from __future__ import print_function, division from builtins import range # Note: you may need to update your version of future # sudo pip install -U future import numpy as np from process import get_data X, Y, _, _ = get_data() # randomly initialize weights M = 5 D = X.shape[1] K = len(set(Y)) W1 = np.random.randn(D, M) b1 = np.zeros(M) W2 = np.random.randn(M, K) b2 = np.zeros(K) # make predictions def softmax(a): expA = np.exp(a) return expA / expA.sum(axis=1, keepdims=True) def forward(X, W1, b1, W2, b2): Z = np.tanh(X.dot(W1) + b1) return softmax(Z.dot(W2) + b2) P_Y_given_X = forward(X, W1, b1, W2, b2) print("P_Y_given_X.shape:", P_Y_given_X.shape)
#!/usr/bin/env python3 # Section 2.9 # Import sample data, and calculate output of logistic regression with random weights import numpy as np from process import get_data from util import sigmoid # Calculate sigmoid of weighted sum plus bias def forward(X, W, b): return sigmoid(X.dot(W) + b) # Calculate prediction accuracy def classification_rate(Y, P): return np.mean(Y == P) X, Y = get_data(open('ecommerce_data.csv')) D = X.shape[1] # 1e4 random trials total = 0 for i in range(10000): W = np.random.randn(D) b = 0 predictions = np.round(forward(X, W, b)) total += classification_rate(Y, predictions) print(total / 10000)
def processpdf(filename): text = get_data(os.path.join(app.config['UPLOAD_FOLDER'], filename)) # print(text) return jsonify(text)
# the graph creation module import graphs # the parameters for filtering from config import variables, crime_types, base_columns app = dash.Dash(__name__) # app.config['suppress_callback_exceptions'] = True app.title = 'Exploring Crimes Dashboard' server = app.server server.secret_key = os.environ.get('SECRET_KEY', 'my-secret-key') # Get and process the data df = process.get_data() app.layout = html.Div(children=[ # The controls html.Div([ # The checkboxes html.Div([ html.H5('Crime Types'), dcc.Checklist( id='crime_checks', options=[{'label': i, 'value': i} for i in crime_types], values=crime_types, labelStyle={'display': 'inline-block' },
from __future__ import print_function, division from builtins import range # Note: you may need to update your version of future # sudo pip install -U future import numpy as np from process import get_data X, Y, _, _ = get_data() # randomly initialize weights M = 5 D = X.shape[1] K = len(set(Y)) W1 = np.random.randn(D, M) b1 = np.zeros(M) W2 = np.random.randn(M, K) b2 = np.zeros(K) # make predictions def softmax(a): expA = np.exp(a) return expA / expA.sum(axis=1, keepdims=True) def forward(X, W1, b1, W2, b2): Z = np.tanh(X.dot(W1) + b1) return softmax(Z.dot(W2) + b2)
import numpy as np from process import get_data x, y = get_data() #Hidden units m = 5 #input shape d = x.shape[1] k = len(set(y)) #initialize weights and biases w1 = np.random.randn(d, m) b1 = np.zeros(m) w2 = np.random.randn(m, k) b2 = np.zeros(k) def softmax(a): expa = np.exp(a) return expa / expa.sum(axis=1, keepdims=True) def forward(x, w1, b1, w2, b2): z = np.tanh(x.dot(w1) + b1) return softmax(z.dot(w2) + b2) P_Y_given_x = forward(x, w1, b1, w2, b2) predictions = np.argmax(P_Y_given_x, axis=1)
import numpy as np import matplotlib.pyplot as plt from sklearn.utils import shuffle from process import get_data def y2indicator(y, K): N = len(y) ind = np.zeros((N, K)) for i in xrange(N): ind[i, y[i]] = 1 return ind X, Y = get_data() X, Y = shuffle(X, Y) Y = Y.astype(np.int32) D = X.shape[1] K = len(set(Y)) # create train and test sets Xtrain = X[:-100] Ytrain = Y[:-100] Ytrain_ind = y2indicator(Ytrain, K) Xtest = X[-100:] Ytest = Y[-100:] Ytest_ind = y2indicator(Ytest, K) # randomly initialize weights W = np.random.randn(D, K) b = np.zeros(K)
def main(): Xtrain, Ytrain, Xtest, Ytest = get_data() # print(Xtest) Ytrain = Ytrain.astype(np.int32) Ytest = Ytest.astype(np.int32) D = Xtrain.shape[1] #print(N, D) M = 5 K = len(set(Ytrain) | set(Ytest)) # its binary classification but with softmax # jeszcze zamienic Ytrain na target # print(Ytrain) # T = np.zeros((N, K)) # for i in range(N): # T[i, Ytrain[i]] = 1 Ytrain_ind = y2indicatior(Ytrain, K) Ytest_ind = y2indicatior(Ytest, K) # initialize random weights W1 = np.random.randn(D, M) W2 = np.random.randn(M, K) b1 = np.zeros(M) b2 = np.zeros(K) train_costs = [] test_costs = [] lr = 0.001 steps = 10000 for epochs in range(steps): pYtrain, Ztrain = forward(Xtrain, W1, W2, b1, b2) pYtest, Ztest = forward(Xtest, W1, W2, b1, b2) # print(Z.shape[1]) W2 -= lr * Ztrain.T.dot( pYtrain - Ytrain_ind) # derivative_W2(T, pYtrain, Z) b2 -= lr * (pYtrain - Ytrain_ind).sum( axis=0) # derivative_b2(T, pYtrain) dZ = (pYtrain - Ytrain_ind).dot(W2.T) * ( 1 - Ztrain * Ztrain ) # Z(1-Z) is derivitae of tanh <-#error at the hidden node W1 -= lr * Xtrain.T.dot(dZ) # derivative_W1(T, pYtrain, Z, Xtrain, W2) b1 -= lr * dZ.sum(axis=0) # derivative_b1(T, pYtrain, Z, W2) ctrain = cross_entropy( Ytrain_ind, pYtrain) # tutaj musza byc macierze jedynek dla pojedynczej klasy ctest = cross_entropy( Ytest_ind, pYtest) # tutaj musza byc macierze jedynek dla pojedynczej klasy train_costs.append(ctrain) test_costs.append(ctest) if epochs % 1000 == 0: print("cost train: ", ctrain, "cost test", ctest) print("final classification rate for TRANING set is:", classification_rate(Ytrain, predict(pYtrain))) print("final classification rate for TEST set is:", classification_rate(Ytest, predict(pYtest))) plt.plot(train_costs) plt.plot(test_costs) plt.show()
import numpy as np from sklearn.model_selection import train_test_split import process as pr import start as st # using the training size of 75% train_s = 0.75 # Setting our variables all_data = pr.get_data() data_train = [] target_train = [] data_test = [] target_test = [] # Splitting our dataset according to the train_s variable for data in all_data: X_train, X_test, y_train, y_test = train_test_split(data[:, :-1], data[:, -1], train_size = train_s, random_state = 0) data_train.append(X_train) target_train.append(y_train) data_test.append(X_test) target_test.append(y_test) data_train, target_train, data_test, target_test = np.concatenate(data_train), np.concatenate(target_train), np.concatenate(data_test), np.concatenate(target_test)
import numpy as np import matplotlib.pyplot as plt from sklearn.utils import shuffle from process import get_data def y2indicator(y, K): N = len(y) ind = np.zeros((N, K)) for i in range(N): ind[i, y[i]] = 1 return ind Xtrain, Ytrain, Xtest, Ytest = get_data() D = Xtrain.shape[1] K = len(set(Ytrain) | set(Ytest)) # convert to indicator Ytrain_ind = y2indicator(Ytrain, K) Ytest_ind = y2indicator(Ytest, K) # randomly initialize weights W = np.random.randn(D, K) b = np.zeros(K) # make predictions def softmax(a): expA = np.exp(a)
import numpy as np from process import get_data path = "ecommerce_data.csv" X, Y = get_data(path) M = 5 D = X.shape[1] K = len(set(Y)) print len(Y) print K w1 = np.random.randn(D, M) b1 = np.zeros(M) w2 = np.random.randn(M, K) b2 = np.zeros(K) def softmax(a): expA = np.exp(a) return expA / expA.sum(axis=1, keepdims=True) def forward(X, w1, b1, w2, b2): Z = np.tanh(X.dot(w1) + b1) return softmax(Z.dot(w2) + b2) P_Y_given_X = forward(X, w1, b1, w2, b2)
from process import get_data from sklearn.neural_network import MLPClassifier from sklearn.utils import shuffle x_train, y_train, x_test, y_test = get_data() x_train, y_train = shuffle(x_train, y_train) x_test, y_test = shuffle(x_test, y_test) model = MLPClassifier(hidden_layer_sizes=(20, 20), max_iter=2000) model.fit(x_train, y_train) train_accuracy = model.score(x_train, y_train) test_accuracy = model.score(x_test, y_test) print("train accuracy: " + str(train_accuracy) + " test accuracy: " + str(test_accuracy))
skills1 = [x.strip() for x in skills[0].split(',')] for val in skills1: val = '%' + val + '%' c = db.execute("SELECT id from jobs where summary like ?", [val]) v = c.fetchall() if v: for jobid in v: skilljobsid.add(jobid[0]) return list(skilljobsid) else: return [] jobs = process.get_data() #init_db() def process_jobs(): db = get_db() for job in jobs: db.execute( 'INSERT INTO jobs(job_title,company_name,summary,location) VALUES (?,?,?,?)', (job['job_title'], job['organization'], job['job_description'], job['location'])) db.commit() @app.cli.command('preprocess') def preprocess():
# make predictions from ecommerce.csv file using ann # going in the forward direction using randomly initialized weights # (no training) import numpy as np from process import get_data X, Y = get_data() M = 5 D = X.shape[1] K = len(set(Y)) W1 = np.random.randn(D, M) b1 = np.zeros(M) W2 = np.random.randn(M, K) b2 = np.zeros(K) def softmax(A): expA = np.exp(A) return expA / expA.sum(axis = 1, keepdims = True) def forward(X, W1, b1, W2, b2): Z = np.tanh(X.dot(W1) + b1) return softmax(Z.dot(W2) + b2) P_Y_given_X = forward(X, W1, b1, W2, b2) predictions = np.argmax(P_Y_given_X, axis = 1) def classification_rate(Y, P):
def main(): """ :return: """ process.get_data(settings.PATH_TO_DATA)