def main(): df = pd.read_csv(DATA_FILEPATH) print(df.columns) x_data = df.drop(["clase"], axis=1).as_matrix() y_data = np.array(list(map(int, df["clase"])), dtype="int32") n_samples = x_data.shape[0] print(type(y_data)) x = tensor.matrix(name="x") y = tensor.ivector(name="y") clf = lr.LogisticRegression(x, x_data.shape[1], 2) with_validation = True if with_validation: val_frac = 0.3 val_samples = int(n_samples * val_frac) train_samples = n_samples - val_samples x_tr, y_tr = x_data[:train_samples, :], y_data[:train_samples] x_tr_sh = theano.shared(x_tr, borrow=True) y_tr_sh = theano.shared(y_tr, borrow=True) x_val, y_val = (x_data[train_samples:(train_samples + val_samples), :], y_data[train_samples:(train_samples + val_samples)]) x_val_sh = theano.shared(x_val, borrow=True) y_val_sh = theano.shared(y_val, borrow=True) print("calling sgd_with_validation") sgd.sgd_with_validation(clf, x_tr_sh, y_tr_sh, x_val_sh, y_val_sh, learning_rate=0.01, reg_term=0.0001, batch_size=32, n_epochs=1000, max_its=10000, improv_thresh=0.01, max_its_incr=4, rel_val_tol=1e-3, verbose=True) else: x_tr_sh = theano.shared(x_data, borrow=True) y_tr_sh = theano.shared(y_data, borrow=True) print("calling sgd") sgd.sgd(clf, x_tr_sh, y_tr_sh, y=y, learning_rate=0.01, reg_term=0.0001, batch_size=220, rel_tol=2e-3, n_epochs=256, verbose=True) acc = theano.function([x, y], clf.score(y)) print("accuracy: %.2f%%" % (100 * acc(x_data, y_data)))
def __init__( self, inp, n_inp, n_hidden, n_out, activation_f=tensor.tanh, w_init_f="xavier_tanh", reg="l2", rand_state=42): """ Initialization of network. Parameters: *inp: Input tensor. *n_inp: Number of input neurons. *n_hidden: Number of hidden neurons. *n_out: Number of outut neurons. *activation_f: Activation function to use. *w_init_f: Weight initialization method. *reg: Regularization method for weights. *rand_state: Random state. """ #input/output matrices self.inp = inp #hidden layer self.hidden_layer = HiddenLayer( inp=inp, n_inp=n_inp, n_out=n_hidden, activation_f=activation_f, reg=reg) #logistic regression layer self.log_reg_layer = lr.LogisticRegression( inp=self.hidden_layer.output, n_inp=n_hidden, n_out=n_out, reg=reg) #model parameters self.params = self.hidden_layer.params + self.log_reg_layer.params #regularization term symbolic expression self.reg = self.hidden_layer.reg + self.log_reg_layer.reg #cost(y) function self.cost = self.log_reg_layer.cost #score(y) function self.score = self.log_reg_layer.score #pred function self.pred = self.log_reg_layer.pred
def __init__(self, rng, input, n_in, n_hidden, n_out): self.hiddenLayer = HiddenLayer(rng=rng, input=input, n_in=n_in, n_out=n_hidden, activation=T.tanh) self.logRegressionLayer = lr.LogisticRegression( input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out) self.L1 = (np.abs(self.hiddenLayer.W).sum() + np.abs(self.logRegressionLayer.W.sum())) self.L2_sqr = ((self.hiddenLayer.W**2).sum() + (self.logRegressionLayer.W**2).sum()) self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood self.errors = self.logRegressionLayer.error self.params = self.hiddenLayer.params + self.logRegressionLayer.params
row[11] = 1 else: row[11] = 0 # BREAST CANCER IMPORT # bcdata = load_data("breast-cancer-wisconsin.csv", ",") # clean up data for row in bcdata: row[-1] = 0 if row[-1] == 2 else 1 # pick out training points from classes X=bcdata[:,1:10] Y=np.ravel((bcdata[:,-1:])) # create models lda = lda.LDA() linr = lr.LogisticRegression() # k-fold accuracy test k = 10 k_values = [0]*k ldawineacc = [0.0]*10 ldawinert = [0.0]*10 ldabcacc = [0.0]*10 ldabcrt = [0.0]*10 for i in range(1, k): k_values[i] = i print() startTime = time.time() ldabcacc[i] = k_fold(X, Y, i, lda) ldabcrt[i] = time.time() - startTime print(str(i) + "-fold LDA accuracy on breast cancer: " + str(ldabcacc[i]))
import lr, fm, deepfm, deepandwide import dcn, attention_net import pnn, attention_fm, xdeepfm torch.set_num_threads(1) ## load data input_path = "./data/census_148d_train.libsvm.tmp" X, Y = load_svmlight_file(input_path, dtype=np.float32) size, dim = X.shape ## train model = lr.LogisticRegression(dim) # model = fm.FactorizationMachine(dim, 10) # model = deepfm.DeepFM(dim, 13, 10, [10, 5, 1]) # model = deepandwide.DeepAndWide(dim, 13, 10, [10, 5, 1]) # model = attention_net.AttentionNet(dim, 13, 10, [10, 5, 1]) # model = dcn.DCNet(dim, 13, 10, cross_depth=4, deep_layers=[10, 10, 10]) # model = pnn.PNN(dim, 13, 10, [10, 5, 1]) # model = attention_fm.AttentionFM(dim, 13, 10, 10) # model = xdeepfm.xDeepFM(dim, 13, 10, [10, 5, 5], [128, 128]) optim = torch.optim.Adam(model.parameters(), 0.01) loss_fn = torch.nn.BCELoss() batch_size = 30 for epoch in range(10): start = 0
''' Created on 2017/4/9/ @author: jiefisher ''' import numpy as np import math import pandas as pd import lr as lgr if __name__ == '__main__': lr = lgr.LogisticRegression(learning_rate=.0001, l2=.0005, iters=50000) df = pd.read_csv("train.csv") x_label = df["Survived"] df = df.drop( ["Survived", "PassengerId", "Name", "Ticket", "Cabin", "Embarked"], axis=1) df['Sex'] = np.where(df['Sex'] == 'male', 1, 0) df = df.fillna(0) lr.train(df.values, x_label.values) df = pd.read_csv("test.csv") bf = pd.read_csv("gender_submission.csv") x_label = bf["Survived"] df = df.drop(["PassengerId", "Name", "Ticket", "Cabin", "Embarked"], axis=1) df['Sex'] = np.where(df['Sex'] == 'male', 1, 0) df = df.fillna(0) print(lr.predict(df.values))