def train_and_test(args, print_config): assert args.conv_layer_n == len(args.filter_widths) == len(args.nkerns) == (len(args.L2_regs) - 2) == len(args.fold_flags) == len(args.ks) # \mod{dim, 2^{\sum fold_flags}} == 0 assert args.embed_dm % (2 ** sum(args.fold_flags)) == 0 ################### # get the data # ################### datasets = load_data(args.corpus_path) train_set_x, train_set_y = datasets[0] dev_set_x, dev_set_y = datasets[1] test_set_x, test_set_y = datasets[2] word2index = datasets[3] index2word = datasets[4] pretrained_embeddings = datasets[5] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / args.batch_size n_dev_batches = dev_set_x.get_value(borrow=True).shape[0] / args.dev_test_batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / args.dev_test_batch_size train_sent_len = train_set_x.get_value(borrow=True).shape[1] possible_labels = set(train_set_y.get_value().tolist()) if args.use_pretrained_embedding: args.embed_dm = pretrained_embeddings.get_value().shape[1] ################################### # Symbolic variable definition # ################################### x = T.imatrix('x') # the word indices matrix y = T.ivector('y') # the sentiment labels batch_index = T.iscalar('batch_index') rng = np.random.RandomState(1234) ############################### # Construction of the network # ############################### # Layer 1, the embedding layer layer1 = WordEmbeddingLayer(rng, input = x, vocab_size = len(word2index), embed_dm = args.embed_dm, embeddings = ( pretrained_embeddings if args.use_pretrained_embedding else None ) ) dropout_layers = [layer1] layers = [layer1] for i in xrange(args.conv_layer_n): fold_flag = args.fold_flags[i] # for the dropout layer dpl = DropoutLayer( input = dropout_layers[-1].output, rng = rng, dropout_rate = args.dropout_rates[0] ) next_layer_dropout_input = dpl.output next_layer_input = layers[-1].output # for the conv layer filter_shape = ( args.nkerns[i], (1 if i == 0 else args.nkerns[i-1]), 1, args.filter_widths[i] ) k = args.ks[i] print "For conv layer(%s) %d, filter shape = %r, k = %d, dropout_rate = %f and normalized weight init: %r and fold: %d" %( args.conv_activation_unit, i+2, filter_shape, k, args.dropout_rates[i], args.norm_w, fold_flag ) # we have two layers adding to two paths repsectively, # one for training # the other for prediction(averaged model) dropout_conv_layer = ConvFoldingPoolLayer(rng, input = next_layer_dropout_input, filter_shape = filter_shape, k = k, norm_w = args.norm_w, fold = fold_flag, activation = args.conv_activation_unit) # for prediction # sharing weight with dropout layer conv_layer = ConvFoldingPoolLayer(rng, input = next_layer_input, filter_shape = filter_shape, k = k, activation = args.conv_activation_unit, fold = fold_flag, W = dropout_conv_layer.W * (1 - args.dropout_rates[i]), # model averaging b = dropout_conv_layer.b ) dropout_layers.append(dropout_conv_layer) layers.append(conv_layer) # last, the output layer # both dropout and without dropout if sum(args.fold_flags) > 0: n_in = args.nkerns[-1] * args.ks[-1] * args.embed_dm / (2**sum(args.fold_flags)) else: n_in = args.nkerns[-1] * args.ks[-1] * args.embed_dm print "For output layer, n_in = %d, dropout_rate = %f" %(n_in, args.dropout_rates[-1]) dropout_output_layer = LogisticRegression( rng, input = dropout_layers[-1].output.flatten(2), n_in = n_in, # divided by 2x(how many times are folded) n_out = len(possible_labels) # five sentiment level ) output_layer = LogisticRegression( rng, input = layers[-1].output.flatten(2), n_in = n_in, n_out = len(possible_labels), W = dropout_output_layer.W * (1 - args.dropout_rates[-1]), # sharing the parameters, don't forget b = dropout_output_layer.b ) dropout_layers.append(dropout_output_layer) layers.append(output_layer) ############################### # Error and cost # ############################### # cost and error come from different model! dropout_cost = dropout_output_layer.nnl(y) errors = output_layer.errors(y) def prepare_L2_sqr(param_layers, L2_regs): assert len(L2_regs) == len(param_layers) return T.sum([ L2_reg / 2 * ((layer.W if hasattr(layer, "W") else layer.embeddings) ** 2).sum() for L2_reg, layer in zip(L2_regs, param_layers) ]) L2_sqr = prepare_L2_sqr(dropout_layers, args.L2_regs) L2_sqr_no_ebd = prepare_L2_sqr(dropout_layers[1:], args.L2_regs[1:]) if args.use_L2_reg: cost = dropout_cost + L2_sqr cost_no_ebd = dropout_cost + L2_sqr_no_ebd else: cost = dropout_cost cost_no_ebd = dropout_cost ############################### # Parameters to be used # ############################### print "Delay embedding learning by %d epochs" %(args.embedding_learning_delay_epochs) print "param_layers: %r" %dropout_layers param_layers = dropout_layers ############################## # Parameter Update # ############################## print "Using AdaDelta with rho = %f and epsilon = %f" %(args.rho, args.epsilon) params = [param for layer in param_layers for param in layer.params] param_shapes= [param for layer in param_layers for param in layer.param_shapes] param_grads = [T.grad(cost, param) for param in params] # AdaDelta parameter update # E[g^2] # initialized to zero egs = [ theano.shared( value = np.zeros(param_shape, dtype = theano.config.floatX ), borrow = True, name = "Eg:" + param.name ) for param_shape, param in zip(param_shapes, params) ] # E[\delta x^2], initialized to zero exs = [ theano.shared( value = np.zeros(param_shape, dtype = theano.config.floatX ), borrow = True, name = "Ex:" + param.name ) for param_shape, param in zip(param_shapes, params) ] new_egs = [ args.rho * eg + (1 - args.rho) * g ** 2 for eg, g in zip(egs, param_grads) ] delta_x = [ -(T.sqrt(ex + args.epsilon) / T.sqrt(new_eg + args.epsilon)) * g for new_eg, ex, g in zip(new_egs, exs, param_grads) ] new_exs = [ args.rho * ex + (1 - args.rho) * (dx ** 2) for ex, dx in zip(exs, delta_x) ] egs_updates = zip(egs, new_egs) exs_updates = zip(exs, new_exs) param_updates = [ (p, p + dx) for dx, g, p in zip(delta_x, param_grads, params) ] updates = egs_updates + exs_updates + param_updates # updates WITHOUT embedding # exclude the embedding parameter egs_updates_no_ebd = zip(egs[1:], new_egs[1:]) exs_updates_no_ebd = zip(exs[1:], new_exs[1:]) param_updates_no_ebd = [ (p, p + dx) for dx, g, p in zip(delta_x, param_grads, params)[1:] ] updates_no_emb = egs_updates_no_ebd + exs_updates_no_ebd + param_updates_no_ebd def make_train_func(cost, updates): return theano.function(inputs = [batch_index], outputs = [cost], updates = updates, givens = { x: train_set_x[batch_index * args.batch_size: (batch_index + 1) * args.batch_size], y: train_set_y[batch_index * args.batch_size: (batch_index + 1) * args.batch_size] } ) train_model_no_ebd = make_train_func(cost_no_ebd, updates_no_emb) train_model = make_train_func(cost, updates) def make_error_func(x_val, y_val): return theano.function(inputs = [], outputs = errors, givens = { x: x_val, y: y_val }, ) dev_error = make_error_func(dev_set_x, dev_set_y) test_error = make_error_func(test_set_x, test_set_y) ############################# # Debugging purpose code # ############################# # : PARAMETER TUNING NOTE: # some demonstration of the gradient vanishing probelm train_data_at_index = { x: train_set_x[batch_index * args.batch_size: (batch_index + 1) * args.batch_size], } train_data_at_index_with_y = { x: train_set_x[batch_index * args.batch_size: (batch_index + 1) * args.batch_size], y: train_set_y[batch_index * args.batch_size: (batch_index + 1) * args.batch_size] } if print_config["nnl"]: get_nnl = theano.function( inputs = [batch_index], outputs = dropout_cost, givens = { x: train_set_x[batch_index * args.batch_size: (batch_index + 1) * args.batch_size], y: train_set_y[batch_index * args.batch_size: (batch_index + 1) * args.batch_size] } ) if print_config["L2_sqr"]: get_L2_sqr = theano.function( inputs = [], outputs = L2_sqr ) get_L2_sqr_no_ebd = theano.function( inputs = [], outputs = L2_sqr_no_ebd ) if print_config["grad_abs_mean"]: print_grads = theano.function( inputs = [], outputs = [theano.printing.Print(param.name)( T.mean(T.abs_(param_grad)) ) for param, param_grad in zip(params, param_grads) ], givens = { x: train_set_x, y: train_set_y } ) activations = [ l.output for l in dropout_layers[1:-1] ] weight_grads = [ T.grad(cost, l.W) for l in dropout_layers[1:-1] ] if print_config["activation_hist"]: # turn into 1D array get_activations = theano.function( inputs = [batch_index], outputs = [ val.flatten(1) for val in activations ], givens = train_data_at_index ) if print_config["weight_grad_hist"]: # turn into 1D array get_weight_grads = theano.function( inputs = [batch_index], outputs = [ val.flatten(1) for val in weight_grads ], givens = train_data_at_index_with_y ) if print_config["activation_tracking"]: # get the mean and variance of activations for each conv layer get_activation_mean = theano.function( inputs = [batch_index], outputs = [ T.mean(val) for val in activations ], givens = train_data_at_index ) get_activation_std = theano.function( inputs = [batch_index], outputs = [ T.std(val) for val in activations ], givens = train_data_at_index ) if print_config["weight_grad_tracking"]: # get the mean and variance of activations for each conv layer get_weight_grad_mean = theano.function( inputs = [batch_index], outputs = [ T.mean(g) for g in weight_grads ], givens = train_data_at_index_with_y ) get_weight_grad_std = theano.function( inputs = [batch_index], outputs = [ T.std(g) for g in weight_grads ], givens = train_data_at_index_with_y ) #the training loop patience = args.patience # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = np.inf best_iter = 0 start_time = time.clock() done_looping = False epoch = 0 nnls = [] L2_sqrs = [] activation_means = [[] for i in xrange(args.conv_layer_n)] activation_stds = [[] for i in xrange(args.conv_layer_n)] weight_grad_means = [[] for i in xrange(args.conv_layer_n)] weight_grad_stds = [[] for i in xrange(args.conv_layer_n)] activation_hist_data = [[] for i in xrange(args.conv_layer_n)] weight_grad_hist_data = [[] for i in xrange(args.conv_layer_n)] train_errors = [] dev_errors = [] try: print "validation_frequency = %d" %validation_frequency while (epoch < args.n_epochs): epoch += 1 print "At epoch {0}".format(epoch) if epoch == (args.embedding_learning_delay_epochs + 1): print "########################" print "Start training embedding" print "########################" # shuffle the training data train_set_x_data = train_set_x.get_value(borrow = True) train_set_y_data = train_set_y.get_value(borrow = True) permutation = np.random.permutation(train_set_x.get_value(borrow=True).shape[0]) train_set_x.set_value(train_set_x_data[permutation]) train_set_y.set_value(train_set_y_data[permutation]) for minibatch_index in xrange(n_train_batches): if epoch >= (args.embedding_learning_delay_epochs + 1): train_cost = train_model(minibatch_index) else: train_cost = train_model_no_ebd(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # train_error_val = np.mean([train_error(i) # for i in xrange(n_train_batches)]) dev_error_val = dev_error() # print "At epoch %d and minibatch %d. \nTrain error %.2f%%\nDev error %.2f%%\n" %( # epoch, # minibatch_index, # train_error_val * 100, # dev_error_val * 100 # ) print "At epoch %d and minibatch %d. \nDev error %.2f%%\n" %( epoch, minibatch_index, dev_error_val * 100 ) # train_errors.append(train_error_val) dev_errors.append(dev_error_val) if dev_error_val < best_validation_loss: best_iter = iter #improve patience if loss improvement is good enough if dev_error_val < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = dev_error_val test_error_val = test_error() print( ( ' epoch %i, minibatch %i/%i, test error of' ' best dev error %f %%' ) % ( epoch, minibatch_index + 1, n_train_batches, test_error_val * 100. ) ) print "Dumping model to %s" %(args.model_path) dump_params(params, args.model_path) if (minibatch_index+1) % 50 == 0 or minibatch_index == n_train_batches - 1: print "%d / %d minibatches completed" %(minibatch_index + 1, n_train_batches) if print_config["nnl"]: print "`nnl` for the past 50 minibatches is %f" %(np.mean(np.array(nnls))) nnls = [] if print_config["L2_sqr"]: print "`L2_sqr`` for the past 50 minibatches is %f" %(np.mean(np.array(L2_sqrs))) L2_sqrs = [] ################## # Plotting stuff # ################## if print_config["nnl"]: nnl = get_nnl(minibatch_index) # print "nll for batch %d: %f" %(minibatch_index, nnl) nnls.append(nnl) if print_config["L2_sqr"]: if epoch >= (args.embedding_learning_delay_epochs + 1): L2_sqrs.append(get_L2_sqr()) else: L2_sqrs.append(get_L2_sqr_no_ebd()) if print_config["activation_tracking"]: layer_means = get_activation_mean(minibatch_index) layer_stds = get_activation_std(minibatch_index) for layer_ms, layer_ss, layer_m, layer_s in zip(activation_means, activation_stds, layer_means, layer_stds): layer_ms.append(layer_m) layer_ss.append(layer_s) if print_config["weight_grad_tracking"]: layer_means = get_weight_grad_mean(minibatch_index) layer_stds = get_weight_grad_std(minibatch_index) for layer_ms, layer_ss, layer_m, layer_s in zip(weight_grad_means, weight_grad_stds, layer_means, layer_stds): layer_ms.append(layer_m) layer_ss.append(layer_s) if print_config["activation_hist"]: for layer_hist, layer_data in zip(activation_hist_data , get_activations(minibatch_index)): layer_hist += layer_data.tolist() if print_config["weight_grad_hist"]: for layer_hist, layer_data in zip(weight_grad_hist_data , get_weight_grads(minibatch_index)): layer_hist += layer_data.tolist() except: import traceback traceback.print_exc(file = sys.stdout) finally: from plot_util import (plot_hist, plot_track, plot_error_vs_epoch, plt) if print_config["activation_tracking"]: plot_track(activation_means, activation_stds, "activation_tracking") if print_config["weight_grad_tracking"]: plot_track(weight_grad_means, weight_grad_stds, "weight_grad_tracking") if print_config["activation_hist"]: plot_hist(activation_hist_data, "activation_hist") if print_config["weight_grad_hist"]: plot_hist(weight_grad_hist_data, "weight_grad_hist") if print_config["error_vs_epoch"]: train_errors = [0] * len(dev_errors) ax = plot_error_vs_epoch(train_errors, dev_errors, title = ('Best dev score: %f %% ' ' at iter %i with test error %f %%') %( best_validation_loss * 100., best_iter + 1, test_error_val * 100. ) ) if not args.task_signature: plt.show() else: plt.savefig("plots/" + args.task_signature + ".png") end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_error_val * 100.)) # save the result with open(args.output, "a") as f: f.write("%s\t%f\t%f\n" %(args.task_signature, best_validation_loss, test_error_val)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def main(train_path, validation_path, save_path): """Problem 2: Logistic regression for imbalanced labels. Run under the following conditions: 1. naive logistic regression 2. upsampling minority class Args: train_path: Path to CSV file containing training set. validation_path: Path to CSV file containing validation set. save_path: Path to save predictions. """ output_path_naive = save_path.replace(WILDCARD, 'naive') output_path_upsampling = save_path.replace(WILDCARD, 'upsampling') # *** START CODE HERE *** # Part (b): Vanilla logistic regression # Make sure to save predicted probabilities to output_path_naive using np.savetxt() print("Vanilla Logistic Regression:") x_train, y_train = util.load_dataset(train_path, add_intercept=True) x_val, y_val = util.load_dataset(validation_path, add_intercept=True) clf = LogisticRegression() clf.fit(x_train, y_train) y_predict = clf.predict(x_val) np.savetxt(output_path_naive, y_predict) y_predict = y_predict >= 0.5 util.plot(x_val, y_predict, clf.theta, output_path_naive[:-4]) accuracy = np.mean(y_predict == y_val) A_0 = np.sum((y_predict == 0) * (y_val == 0)) / np.sum(y_val == 0) A_1 = np.sum((y_predict == 1) * (y_val == 1)) / np.sum(y_val == 1) balanced_accuracy = 0.5 * (A_0 + A_1) print("Accuracy: {},\nAccuracy for class 0: {},\nAccuracy for class 1: {}," "\nBalanced Accuracy: {}".format(accuracy, A_0, A_1, balanced_accuracy)) #plot the real expected outcome from the validation: util.plot(x_val, y_val, clf.theta, output_path_naive[:-4] + "validation") # Part (d): Upsampling minority class # Make sure to save predicted probabilities to output_path_upsampling using np.savetxt() # Repeat minority examples 1 / kappa times num_add = int(1 / kappa) - 1 x_train = np.concatenate( (x_train, np.repeat(x_train[y_train == 1, :], num_add, axis=0)), axis=0) y_train = np.concatenate( (y_train, np.repeat(y_train[y_train == 1], num_add, axis=0)), axis=0) x_val, y_val = util.load_dataset(validation_path, add_intercept=True) clf = LogisticRegression() clf.fit(x_train, y_train) y_predict = clf.predict(x_val) np.savetxt(output_path_upsampling, y_predict) y_predict = y_predict >= 0.5 util.plot(x_val, y_predict, clf.theta, output_path_upsampling[:-4]) accuracy = np.mean(y_predict == y_val) A_0 = np.sum((y_predict == 0) * (y_val == 0)) / np.sum(y_val == 0) A_1 = np.sum((y_predict == 1) * (y_val == 1)) / np.sum(y_val == 1) balanced_accuracy = 0.5 * (A_0 + A_1) print("Accuracy: {},\nAccuracy for class 0: {},\nAccuracy for class 1: {}," "\nBalanced Accuracy: {}".format(accuracy, A_0, A_1, balanced_accuracy)) #plot the real expected outcome from the validation: util.plot(x_val, y_val, clf.theta, output_path_upsampling[:-4] + "validation")
plt.plot(costs_train[i], "--", color=color, label="Train, lambda = {:g}".format(lmbda)) plt.plot(costs_test[i], color=color, label="Test, lambda = {:g}".format(lmbda)) plt.legend(loc="upper right") plt.savefig("results/cost_lmbda.pdf") plt.show() if mode == "logreg": batch_size = 100 n_batches = int(Xtrain.shape[0] / batch_size) logReg = LogisticRegression(n_batches=n_batches, allow_early_stop=False) etas = [1, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5] acc_list = [] accuracys_train = [] costs_train = [] accuracys_test = [] costs_test = [] for eta in etas: a, b, c, d = logReg.fit(Xtrain, ytrain, eta=eta, n_epochs=2000, Xtest=Xtest,
def evaluatePerformance(numTrials=1000): ''' Evaluate the performance of decision trees and logistic regression, average over 1,000 trials of 10-fold cross validation Return: a matrix giving the performance that will contain the following entries: stats[0,0] = mean accuracy of decision tree stats[0,1] = std deviation of decision tree accuracy stats[1,0] = mean accuracy of logistic regression stats[1,1] = std deviation of logistic regression accuracy ** Note that your implementation must follow this API** ''' # Load Data filename = 'data/SPECTF.dat' data = np.loadtxt(filename, delimiter=',') X = data[:, 1:] y = np.array([data[:, 0]]).T n,d = X.shape # Standardize the data mean = X.mean(axis=0) std = X.std(axis=0) X = (X - mean) / std #1000 trials num_folds = 10 percent_incs = 10 tree_accuracy = np.zeros(shape=[numTrials*num_folds,percent_incs]) log_accuracy = np.zeros(shape=[numTrials*num_folds,percent_incs]) #split the data k_fold = sklearn.cross_validation.KFold(len(y), n_folds=num_folds) for i in xrange(numTrials): #for each trial, shuffle the data #print 'Iteration: ', i+1 idx = np.arange(n) np.random.seed(13) np.random.shuffle(idx) X = X[idx] y = y[idx] j = 0 for train_index, test_index in k_fold: for k in xrange(percent_incs): #get the data splits for the current fold Xtrain, Xtest = X[train_index[0:(n/percent_incs)*(k+1)]], X[test_index] ytrain, ytest = y[train_index[0:(n/percent_incs)*(k+1)]], y[test_index] # train the decision tree clf = tree.DecisionTreeClassifier() clf = clf.fit(Xtrain, ytrain) # output tree predictions on the remaining data and check them tree_pred = clf.predict(Xtest) tree_accuracy[i*num_folds + j,k] = accuracy_score(ytest, tree_pred) #train logarithmic regression logregModel = LogisticRegression(alpha = 0.1, epsilon = 0.005) logregModel.fit(Xtrain, ytrain) #output logreg predictions on the remaining data and check them log_pred = logregModel.predict(Xtest) log_accuracy[i*num_folds + j,k] = accuracy_score(ytest, log_pred) j += 1 # compute the training accuracy of the model meanDecisionTreeAccuracy = np.mean(tree_accuracy[:,percent_incs-1]) # TODO: update these statistics based on the results of your experiment stddevDecisionTreeAccuracy = np.std(tree_accuracy[:,percent_incs-1]) meanLogisticRegressionAccuracy = np.mean(log_accuracy[:,percent_incs-1]) stddevLogisticRegressionAccuracy = np.std(log_accuracy[:,percent_incs-1]) #print graph tree_array = np.zeros(percent_incs) tree_array_std = np.zeros(percent_incs) log_array = np.zeros(percent_incs) log_array_std = np.zeros(percent_incs) for i in xrange(percent_incs): tree_array[i] = np.mean(tree_accuracy[:,i]) tree_array_std[i] = np.std(tree_accuracy[:,i]) log_array[i] = np.mean(log_accuracy[:,i]) log_array_std[i] = np.std(log_accuracy[:,i]) x_axis = (np.arange(percent_incs) + 1) * 10 tree_plot = plt.errorbar(x=x_axis, y=tree_array, yerr=tree_array_std) log_plot = plt.errorbar(x=x_axis, y=log_array, yerr=log_array_std) plt.xlabel('Training Data Used (percentage)') plt.ylabel('Accuracy (mean)') plt.title('Learning Curve') plt.axis([10, 100, 0.0, 1.0]) plt.grid(True) plt.legend([tree_plot, log_plot], ["Decision Tree", "Logistic Regression"], loc=4) plt.savefig('learningcurve.pdf') #plt.show() # make certain that the return value matches the API specification stats = np.zeros((2,2)) stats[0,0] = meanDecisionTreeAccuracy stats[0,1] = stddevDecisionTreeAccuracy stats[1,0] = meanLogisticRegressionAccuracy stats[1,1] = stddevLogisticRegressionAccuracy return stats
def __init__(self, x, y, batch_size, videos, kernels, pools, n_input, n_output, hidden_input, params=None): learning_rate = 0.1 rng = numpy.random.RandomState(1234) print '... building the model' sys.stdout.flush() if not params: # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = ConvLayer(x, n_input[0], n_output[0], kernels[0], videos[0], pools[0], batch_size, 'L0', rng) layer1 = ConvLayer(layer0.output, n_input[1], n_output[1], kernels[1], videos[1], pools[1], batch_size, 'L1', rng) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=hidden_input, n_out=batch_size, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=batch_size, n_out=2) else: layer0 = ConvLayer(x, n_input[0], n_output[0], kernels[0], videos[0], pools[0], batch_size, 'L0', rng, True, params[6], params[7]) layer1 = ConvLayer(layer0.output, n_input[1], n_output[1], kernels[1], videos[1], pools[1], batch_size, 'L1', rng, True, params[4], params[5]) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=hidden_input, n_out=batch_size, activation=T.tanh, W=params[2], b=params[3]) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=batch_size, n_out=2, W=params[0], b=params[1]) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a list of all model parameters to be fit by gradient descent self.params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, self.params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i],grads[i]) pairs. updates = [] for param_i, grad_i in zip(self.params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) self.train_model = theano.function([x, y], cost, updates=updates) self.validate_model = theano.function(inputs=[x, y], outputs=layer3.errors(y)) self.predict = theano.function(inputs=[x], outputs=layer3.y_pred) print '... building done' sys.stdout.flush()
if __name__ == "__main__": # Load Data filename = 'data/data1.dat' data = loadtxt(filename, delimiter=',') X = data[:, 0:2] y = np.array([data[:, 2]]).T n, d = X.shape # Standardize the data mean = X.mean(axis=0) std = X.std(axis=0) X = (X - mean) / std # train logistic regression logregModel = LogisticRegression(regLambda=0.0001) logregModel.fit(X, y) # Plot the decision boundary h = .02 # step size in the mesh x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z = logregModel.predict(np.c_[xx.ravel(), yy.ravel()]) print Z # Put the result into a color plot Z = Z.reshape(xx.shape) plt.figure(1, figsize=(4, 3)) plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)
def main(): print "############# Load Datasets ##############" import stanfordSentimentTreebank as sst skip_unknown_words = bool(args.get("--skip")) shuffle_flag = bool(args.get("--shuffle")) datatype = args.get("--datatype") if datatype == 5: # Fine-grained 5-class n_class = 5 elif datatype == 2: # Binary 2-class n_class = 2 # print "skip_unknown_words",skip_unknown_words vocab, index2word, datasets, datasets_all_sentences, funcs = sst.load_stanfordSentimentTreebank_dataset( normalize=True, skip_unknown_words=skip_unknown_words, datatype=datatype ) train_set, test_set, dev_set = datasets train_set_sentences, test_set_sentences, dev_set_sentences = datasets_all_sentences get, sentence2ids, ids2sentence = funcs # 関数を読み込み scores, sentences = zip(*train_set_sentences) sentences = [[word for word in sentence.lower().split()] for sentence in sentences] vocab_size = len(vocab) dev_unknown_count = sum([unknown_word_count for score, (ids, unknown_word_count) in dev_set]) test_unknown_count = sum([unknown_word_count for score, (ids, unknown_word_count) in test_set]) train_set = [(score, ids) for score, (ids, unknown_word_count) in train_set] test_set = [(score, ids) for score, (ids, unknown_word_count) in test_set] dev_set = [(score, ids) for score, (ids, unknown_word_count) in dev_set] print "train_size : ", len(train_set) print "dev_size : ", len(dev_set) print "test_size : ", len(test_set) print "-" * 30 print "vocab_size: ", len(vocab) print "dev_unknown_words : ", dev_unknown_count print "test_unknown_words : ", test_unknown_count print args # EMB_DIM = 50 EMB_DIM = args.get("--emb_size") vocab_size = len(vocab) feat_map_n_1 = args.get("--feat_map_n_1") feat_map_n_final = args.get("--feat_map_n_final") height = 1 width1 = args.get("--width1") width2 = args.get("--width2") k_top = args.get("--k_top") n_class = n_class alpha = args.get("--alpha") n_epoch = args.get("--n_epoch") dropout_rate0 = args.get("--dropout_rate0") dropout_rate1 = args.get("--dropout_rate1") dropout_rate2 = args.get("--dropout_rate2") activation = args.get("--activation") learn = args.get("--learn") number_of_convolutinal_layer = 2 pretrain = args.get("--pretrain") if pretrain == "word2vec": print "*Using word2vec" embeddings_W, model = pretrained_embedding.use_word2vec( sentences=sentences, index2word=index2word, emb_dim=EMB_DIM ) # -0.5 ~ 0.5で初期化している elif pretrain == "glove": print "*Using glove" embeddings_W = pretrained_embedding.use_glove( sentences=sentences, index2word=index2word, emb_dim=EMB_DIM, model_file="glove_model/glove_50_iter2900.model", ) else: embeddings_W = np.asarray(rng.normal(0, 0.05, size=(vocab_size, EMB_DIM)), dtype=theano.config.floatX) embeddings_W[0, :] = 0 print np.amax(embeddings_W) print np.amin(embeddings_W) # print "*embeddings" print embeddings_W # print bool(embeddings) # input_x = [1, 3, 4, 5, 0, 22, 4, 5] print "############# Model Setting ##############" x = T.imatrix("x") length_x = T.iscalar("length_x") y = T.ivector("y") # the sentence sentiment label embeddings = WordEmbeddingLayer(rng=rng, input=x, vocab_size=vocab_size, embed_dm=EMB_DIM, embeddings=embeddings_W) def dropout(X, p=0.5): if p > 0: retain_prob = 1 - p X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX) # X /= retain_prob return X # number_of_convolutinal_layer = theano.shared(number_of_convolutinal_layer) # dynamic_func = theano.function(inputs=[length_x], outputs=number_of_convolutinal_layer * length_x) # dynamic_func_test = theano.function( # inputs = [length_x], # outputs = dynamic_func(length_x), # ) # print dynamic_func(len([1,2,3])) l1 = DynamicConvFoldingPoolLayer( rng, input=dropout(embeddings.output, p=dropout_rate0), filter_shape=(feat_map_n_1, 1, height, width1), # two feature map, height: 1, width: 2, k_top=k_top, number_of_convolutinal_layer=number_of_convolutinal_layer, index_of_convolitonal_layer=1, length_x=length_x, activation=activation, ) l1_no_dropout = DynamicConvFoldingPoolLayer( rng, input=embeddings.output, W=l1.W * (1 - dropout_rate0), b=l1.b, filter_shape=(feat_map_n_1, 1, height, width1), # two feature map, height: 1, width: 2, k_top=k_top, number_of_convolutinal_layer=number_of_convolutinal_layer, index_of_convolitonal_layer=1, length_x=length_x, activation=activation, ) l2 = DynamicConvFoldingPoolLayer( rng, input=dropout(l1.output, p=dropout_rate1), filter_shape=(feat_map_n_final, feat_map_n_1, height, width2), # two feature map, height: 1, width: 2, k_top=k_top, number_of_convolutinal_layer=number_of_convolutinal_layer, index_of_convolitonal_layer=2, length_x=length_x, activation=activation, ) l2_no_dropout = DynamicConvFoldingPoolLayer( rng, input=l1_no_dropout.output, W=l2.W * (1 - dropout_rate1), b=l2.b, filter_shape=(feat_map_n_final, feat_map_n_1, height, width2), # two feature map, height: 1, width: 2, k_top=k_top, number_of_convolutinal_layer=number_of_convolutinal_layer, index_of_convolitonal_layer=2, length_x=length_x, activation=activation, ) # l2_output = theano.function( # inputs = [x,length_x], # outputs = l2.output, # # on_unused_input='ignore' # ) # TODO: # check the dimension # input: 1 x 1 x 6 x 4 # out = l2_output( # np.array([input_x], dtype = np.int32), # len(input_x), # ) # test = theano.function( # inputs = [x], # outputs = embeddings.output, # ) # print "--input--" # print np.array([input_x], dtype = np.int32).shape # print "--input embeddings--" # a = np.array([input_x], dtype = np.int32) # print test(a).shape # print "-- output --" # print out # print out.shape # x = T.dscalar("x") # b = T.dscalar("b") # a = 1 # f = theano.function(inputs=[x,b], outputs=b * x + a) # print f(2,2) # expected = (1, feat_map_n, EMB_DIM / 2, k) # assert out.shape == expected, "%r != %r" %(out.shape, expected) ##### Test Part Three ############### # LogisticRegressionLayer ################################# # print "############# LogisticRegressionLayer ##############" l_final = LogisticRegression( rng, input=dropout(l2.output.flatten(2), p=dropout_rate2), n_in=feat_map_n_final * k_top * EMB_DIM, # n_in = feat_map_n * k * EMB_DIM / 2, # we fold once, so divide by 2 n_out=n_class, # five sentiment level ) l_final_no_dropout = LogisticRegression( rng, input=l2_no_dropout.output.flatten(2), W=l_final.W * (1 - dropout_rate2), b=l_final.b, n_in=feat_map_n_final * k_top * EMB_DIM, # n_in = feat_map_n * k * EMB_DIM / 2, # we fold once, so divide by 2 n_out=n_class, # five sentiment level ) print "n_in : ", feat_map_n_final * k_top * EMB_DIM # print "n_in = %d" %(2 * 2 * math.ceil(EMB_DIM / 2.)) # p_y_given_x = theano.function( # inputs = [x, length_x], # outputs = l_final.p_y_given_x, # allow_input_downcast=True, # # mode = "DebugMode" # ) # print "p_y_given_x = " # print p_y_given_x( # np.array([input_x], dtype=np.int32), # len(input_x) # ) cost = theano.function( inputs=[x, length_x, y], outputs=l_final.nnl(y), allow_input_downcast=True, # mode = "DebugMode" ) # print "cost:\n", cost( # np.array([input_x], dtype = np.int32), # len(input_x), # np.array([1], dtype = np.int32) # ) print "############# Learning ##############" layers = [] layers.append(embeddings) layers.append(l1) layers.append(l2) layers.append(l_final) cost = l_final.nnl(y) params = [p for layer in layers for p in layer.params] param_shapes = [l.param_shapes for l in layers] param_grads = [T.grad(cost, param) for param in params] def sgd(cost, params, lr=0.05): grads = [T.grad(cost, param) for param in params] updates = [] for p, g in zip(params, grads): updates.append([p, p - g * lr]) return updates from sgd import rmsprop, adagrad, adadelta, adam # updates = sgd(cost, l_final.params) # print param_grads if learn == "sgd": updates = sgd(cost, params, lr=0.05) elif learn == "adam": updates = adam(loss_or_grads=cost, params=params, learning_rate=alpha) elif learn == "adagrad": updates = adagrad(loss_or_grads=cost, params=params, learning_rate=alpha) elif learn == "adadelta": updates = adadelta(loss_or_grads=cost, params=params) elif learn == "rmsprop": updates = rmsprop(loss_or_grads=cost, params=params, learning_rate=alpha) train = theano.function(inputs=[x, length_x, y], outputs=cost, updates=updates, allow_input_downcast=True) # predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True) predict = theano.function( inputs=[x, length_x], outputs=T.argmax(l_final_no_dropout.p_y_given_x, axis=1), allow_input_downcast=True, # mode = "DebugMode" ) def b(x_data): return np.array(x_data, dtype=np.int32) def test(test_set): # print "############# TEST ##############" y_pred = [] test_set_y = [] # for train_x, train_y in zip(X_data, Y_data): # print test_set # Accuracy_count = 0 for test_y, test_x in test_set: test_x = b([test_x]) p = predict(test_x, len(test_x))[0] y_pred.append(p) test_set_y.append(test_y) # if test_y == p: # Accuracy_count += 1 # print "*predict :",predict(train_x, len(train_x)), train_y # Accuracy = float(Accuracy_count) / len(test_set) # print " accuracy : %f" % Accuracy, return accuracy_score(test_set_y, y_pred) # print classification_report(test_set_y, y_pred) # train_set_rand = np.ndarray(train_set) train_set_rand = train_set[:] train_cost_sum = 0.0 for epoch in xrange(n_epoch): print "== epoch : %d ==" % epoch if shuffle_flag: np.random.shuffle(train_set_rand) # train_set_rand = np.random.permutation(train_set) for i, x_y_set in enumerate(train_set_rand): train_y, train_x = x_y_set train_x = b([train_x]) train_y = b([train_y]) train_cost = train(train_x, len(train_x), train_y) train_cost_sum += train_cost if i % 1000 == 0 or i == len(train_set) - 1: print "i : (%d/%d)" % (i, len(train_set)), print " (cost : %f )" % train_cost print " cost :", train_cost_sum print " train_set : %f" % test(train_set) print " dev_set : %f" % test(dev_set) print " test_set : %f" % test(test_set) """
def main(train_path, valid_path, test_path, save_path): """Problem 2: Logistic regression for incomplete, positive-only labels. Run under the following conditions: 1. on t-labels, 2. on y-labels, 3. on y-labels with correction factor alpha. Args: train_path: Path to CSV file containing training set. valid_path: Path to CSV file containing validation set. test_path: Path to CSV file containing test set. save_path: Path to save predictions. """ output_path_true = save_path.replace(WILDCARD, 'true') output_path_naive = save_path.replace(WILDCARD, 'naive') output_path_adjusted = save_path.replace(WILDCARD, 'adjusted') # *** START CODE HERE *** # Part (a): Train and test on true labels x_train, y_train = util.load_dataset(train_path, add_intercept=True, label_col='t') x_valid, y_valid = util.load_dataset(valid_path, add_intercept=True, label_col='t') from logreg import LogisticRegression clf = LogisticRegression() clf.fit(x_train, y_train) print(clf.theta) fig, ax = plt.subplots(1, 1, figsize=(12, 8)) ax.scatter(x_valid[:, 1], x_valid[:, 2], c=y_valid.astype(np.int)) ax.set_ylim(x_valid[:, 2].min(), x_valid[:, 2].max()) plot_decision_line(clf.theta, x_valid, ax) plt.savefig("posonly_all_observed.png") plt.show() # Make sure to save predicted probabilities to output_path_true using np.savetxt() # Part (b): Train on y-labels and test on true labels x_train, y_train = util.load_dataset(train_path, add_intercept=True, label_col='y') x_valid, y_valid = util.load_dataset(valid_path, add_intercept=True, label_col='y') from logreg import LogisticRegression clf = LogisticRegression() clf.fit(x_train, y_train) print(clf.theta) fig, ax = plt.subplots(1, 1, figsize=(12, 8)) ax.scatter(x_valid[:, 1], x_valid[:, 2], c=y_valid.astype(np.int)) ax.set_ylim(x_valid[:, 2].min(), x_valid[:, 2].max()) plot_decision_line(clf.theta, x_valid, ax) plt.savefig("naive_training_partial.png") plt.show() # Make sure to save predicted probabilities to output_path_naive using np.savetxt() # Part (f): Apply correction factor using validation set and test on true labels clf = LogisticRegression() clf.fit(x_train, y_train) #decition y_pred = clf.predict(x_valid) print(y_pred) fig, ax = plt.subplots(1, 1, figsize=(12, 8)) ax.scatter(x_valid[:, 1], x_valid[:, 2], c=y_valid.astype(np.int)) ax.set_ylim(x_valid[:, 2].min(), x_valid[:, 2].max()) plt.show()
#coding:utf-8 import sys from sklearn.externals import joblib from question71 import makeStoplist from question72 import extractFeaturesFromString from logreg import LogisticRegression if __name__ == "__main__": vectorizer = joblib.load("tfidf.vec") clf = LogisticRegression("logreg") stoplist = makeStoplist() while True: test = input() test = extractFeaturesFromString(test, stoplist) print(["-1", "+1"][clf.predict(vectorizer.transform([" ".join(test)]))[0]]) sys.stdout.flush()
index = 27 plt.imshow(train_set_x_orig[index]) plt.show() print ("y = " + str(train_set_y[:, index]) + ", it's a '" + classes[np.squeeze(train_set_y[:, index])].decode("utf-8") + "' picture.") ''' # Flatten the images train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T # Normalise image values train_set_x = train_set_x_flatten / 255. test_set_x = test_set_x_flatten / 255. # Create model instance model = LogisticRegression() # Fit model to the data model.fit(train_set_x, train_set_y) # Train the model model.train(2400, verbose=True) # Predict values predictions = model.predict(test_set_x) # Check accuracy model.print_accuracy(predictions, test_set_y) # Plot training loss model.plot_cost()
def main(): print "############# Load Datasets ##############" import stanfordSentimentTreebank as sst skip_unknown_words = bool(args.get("--skip")) shuffle_flag = bool(args.get("--shuffle")) datatype = args.get("--datatype") if datatype == 5: # Fine-grained 5-class n_class = 5 elif datatype == 2: # Binary 2-class n_class = 2 # print "skip_unknown_words",skip_unknown_words vocab, index2word, datasets, datasets_all_sentences, funcs = sst.load_stanfordSentimentTreebank_dataset(normalize=True, skip_unknown_words=skip_unknown_words, datatype=datatype) train_set, test_set, dev_set = datasets train_set_sentences, test_set_sentences, dev_set_sentences = datasets_all_sentences get,sentence2ids, ids2sentence = funcs # 関数を読み込み scores, sentences = zip(*train_set_sentences) sentences = [[word for word in sentence.lower().split()] for sentence in sentences] vocab_size = len(vocab) dev_unknown_count = sum([unknown_word_count for score,(ids,unknown_word_count) in dev_set]) test_unknown_count = sum([unknown_word_count for score,(ids,unknown_word_count) in test_set]) train_set = [(score, ids) for score,(ids,unknown_word_count) in train_set] test_set = [(score, ids) for score,(ids,unknown_word_count) in test_set] dev_set = [(score, ids) for score,(ids,unknown_word_count) in dev_set] print "train_size : ", len(train_set) print "dev_size : ", len(dev_set) print "test_size : ", len(test_set) print "-"*30 print "vocab_size: ", len(vocab) print "dev_unknown_words : ", dev_unknown_count print "test_unknown_words : ", test_unknown_count print args # EMB_DIM = 50 EMB_DIM = args.get("--emb_size") vocab_size = len(vocab) feat_map_n_1 = args.get("--feat_map_n_1") feat_map_n_final = args.get("--feat_map_n_final") height = 1 width1 = args.get("--width1") width2 = args.get("--width2") k_top = args.get("--k_top") n_class = n_class alpha = args.get("--alpha") n_epoch = args.get("--n_epoch") dropout_rate0 = args.get("--dropout_rate0") dropout_rate1 = args.get("--dropout_rate1") dropout_rate2 = args.get("--dropout_rate2") activation = args.get("--activation") learn = args.get("--learn") number_of_convolutinal_layer = 2 use_regular = bool(args.get("--use_regular")) regular_c = args.get("--regular_c") pretrain = args.get('--pretrain') if pretrain == 'word2vec': print "*Using word2vec" embeddings_W, model = pretrained_embedding.use_word2vec(sentences=sentences, index2word=index2word, emb_dim=EMB_DIM) # -0.5 ~ 0.5で初期化している elif pretrain == 'glove': print "*Using glove" embeddings_W = pretrained_embedding.use_glove(sentences=sentences, index2word=index2word, emb_dim=EMB_DIM, model_file='glove_model/glove_50_iter2900.model') else: embeddings_W = np.asarray( rng.normal(0, 0.05, size = (vocab_size, EMB_DIM)), dtype = theano.config.floatX ) embeddings_W[0,:] = 0 print np.amax(embeddings_W) print np.amin(embeddings_W) # print "*embeddings" print embeddings_W # print bool(embeddings) # input_x = [1, 3, 4, 5, 0, 22, 4, 5] print "############# Model Setting ##############" x = T.imatrix('x') length_x = T.iscalar('length_x') y = T.ivector('y') # the sentence sentiment label embeddings = WordEmbeddingLayer(rng=rng, input=x, vocab_size=vocab_size, embed_dm=EMB_DIM, embeddings=embeddings_W) def dropout(X, p=0.5): if p > 0: retain_prob = 1 - p X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX) # X /= retain_prob return X # number_of_convolutinal_layer = theano.shared(number_of_convolutinal_layer) # dynamic_func = theano.function(inputs=[length_x], outputs=number_of_convolutinal_layer * length_x) # dynamic_func_test = theano.function( # inputs = [length_x], # outputs = dynamic_func(length_x), # ) # print dynamic_func(len([1,2,3])) l1 = DynamicConvFoldingPoolLayer(rng, input = dropout(embeddings.output, p=dropout_rate0), filter_shape = (feat_map_n_1, 1, height, width1), # two feature map, height: 1, width: 2, k_top = k_top, number_of_convolutinal_layer=number_of_convolutinal_layer, index_of_convolitonal_layer=1, length_x=length_x, activation = activation ) l1_no_dropout = DynamicConvFoldingPoolLayer(rng, input = embeddings.output, W=l1.W * (1 - dropout_rate0), b=l1.b, filter_shape = (feat_map_n_1, 1, height, width1), # two feature map, height: 1, width: 2, k_top = k_top, number_of_convolutinal_layer=number_of_convolutinal_layer, index_of_convolitonal_layer=1, length_x=length_x, activation = activation ) l2 = DynamicConvFoldingPoolLayer(rng, input = dropout(l1.output, p=dropout_rate1), filter_shape = (feat_map_n_final, feat_map_n_1, height, width2), # two feature map, height: 1, width: 2, k_top = k_top, number_of_convolutinal_layer=number_of_convolutinal_layer, index_of_convolitonal_layer=2, length_x=length_x, activation = activation ) l2_no_dropout = DynamicConvFoldingPoolLayer(rng, input = l1_no_dropout.output, W=l2.W * (1 - dropout_rate1), b=l2.b, filter_shape = (feat_map_n_final, feat_map_n_1, height, width2), # two feature map, height: 1, width: 2, k_top = k_top, number_of_convolutinal_layer=number_of_convolutinal_layer, index_of_convolitonal_layer=2, length_x=length_x, activation = activation ) # l2_output = theano.function( # inputs = [x,length_x], # outputs = l2.output, # # on_unused_input='ignore' # ) # TODO: # check the dimension # input: 1 x 1 x 6 x 4 # out = l2_output( # np.array([input_x], dtype = np.int32), # len(input_x), # ) # test = theano.function( # inputs = [x], # outputs = embeddings.output, # ) # print "--input--" # print np.array([input_x], dtype = np.int32).shape # print "--input embeddings--" # a = np.array([input_x], dtype = np.int32) # print test(a).shape # print "-- output --" # print out # print out.shape # x = T.dscalar("x") # b = T.dscalar("b") # a = 1 # f = theano.function(inputs=[x,b], outputs=b * x + a) # print f(2,2) # expected = (1, feat_map_n, EMB_DIM / 2, k) # assert out.shape == expected, "%r != %r" %(out.shape, expected) ##### Test Part Three ############### # LogisticRegressionLayer ################################# # print "############# LogisticRegressionLayer ##############" l_final = LogisticRegression( rng, input = dropout(l2.output.flatten(2), p=dropout_rate2), n_in = feat_map_n_final * k_top * EMB_DIM, # n_in = feat_map_n * k * EMB_DIM / 2, # we fold once, so divide by 2 n_out = n_class, # five sentiment level ) l_final_no_dropout = LogisticRegression( rng, input = l2_no_dropout.output.flatten(2), W = l_final.W * (1 - dropout_rate2), b = l_final.b, n_in = feat_map_n_final * k_top * EMB_DIM, # n_in = feat_map_n * k * EMB_DIM / 2, # we fold once, so divide by 2 n_out = n_class, # five sentiment level ) print "n_in : ", feat_map_n_final * k_top * EMB_DIM # print "n_in = %d" %(2 * 2 * math.ceil(EMB_DIM / 2.)) # p_y_given_x = theano.function( # inputs = [x, length_x], # outputs = l_final.p_y_given_x, # allow_input_downcast=True, # # mode = "DebugMode" # ) # print "p_y_given_x = " # print p_y_given_x( # np.array([input_x], dtype=np.int32), # len(input_x) # ) cost = theano.function( inputs = [x, length_x, y], outputs = l_final.nnl(y), allow_input_downcast=True, # mode = "DebugMode" ) # print "cost:\n", cost( # np.array([input_x], dtype = np.int32), # len(input_x), # np.array([1], dtype = np.int32) # ) print "############# Learning ##############" from sgd import sgd, rmsprop, adagrad, adadelta, adam from regularizer import regularize_l2 layers = [] layers.append(embeddings) layers.append(l1) layers.append(l2) layers.append(l_final) cost = l_final.nnl(y) params = [p for layer in layers for p in layer.params] param_shapes = [l.param_shapes for l in layers] param_grads = [T.grad(cost, param) for param in params] # regularizer setting regularizers = {} regularizers['c'] = regular_c # 2.0, 4.0, 15.0 regularizers['func'] = [None for _ in range(len(params))] if use_regular: regularizers_func = [] regularizers_func.append([regularize_l2(l=0.0001)]) # [embeddings] regularizers_func.append([regularize_l2(l=0.00003), None]) # [W, b] regularizers_func.append([regularize_l2(l=0.000003), None]) # [W, b] regularizers_func.append([regularize_l2(l=0.0001), None]) # [logreg_W, logreg_b] regularizers_func = [r_func for r in regularizers_func for r_func in r] regularizers['func'] = regularizers_func # if third conv layer: 1e-5 print embeddings.params print l1.params print l2.params print l_final.params # updates = sgd(cost, l_final.params) # RegE = 1e-4 # print param_grads if learn == "sgd": updates = sgd(cost, params, lr=0.05) elif learn == "adam": updates = adam(loss_or_grads=cost, params=params, learning_rate=alpha, regularizers=regularizers) elif learn == "adagrad": updates = adagrad(loss_or_grads=cost, params=params, learning_rate=alpha, regularizers=regularizers) elif learn == "adadelta": updates = adadelta(loss_or_grads=cost, params=params, regularizers=regularizers) elif learn == "rmsprop": updates = rmsprop(loss_or_grads=cost, params=params, learning_rate=alpha, regularizers=regularizers) train = theano.function(inputs=[x, length_x, y], outputs=cost, updates=updates, allow_input_downcast=True) # predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True) predict = theano.function( inputs = [x, length_x], outputs = T.argmax(l_final_no_dropout.p_y_given_x, axis=1), allow_input_downcast=True, # mode = "DebugMode" ) def b(x_data): return np.array(x_data, dtype=np.int32) def test(test_set): # print "############# TEST ##############" y_pred = [] test_set_y = [] # for train_x, train_y in zip(X_data, Y_data): # print test_set # Accuracy_count = 0 for test_y,test_x in test_set: test_x = b([test_x]) p = predict(test_x, len(test_x))[0] y_pred.append(p) test_set_y.append(test_y) # if test_y == p: # Accuracy_count += 1 # print "*predict :",predict(train_x, len(train_x)), train_y # Accuracy = float(Accuracy_count) / len(test_set) # print " accuracy : %f" % Accuracy, return accuracy_score(test_set_y, y_pred) # print classification_report(test_set_y, y_pred) # train_set_rand = np.ndarray(train_set) train_set_rand = train_set[:] train_cost_sum = 0.0 for epoch in xrange(n_epoch): print "== epoch : %d ==" % epoch if shuffle_flag: np.random.shuffle(train_set_rand) # train_set_rand = np.random.permutation(train_set) for i,x_y_set in enumerate(train_set_rand): train_y, train_x = x_y_set train_x = b([train_x]) train_y = b([train_y]) train_cost = train(train_x, len(train_x) , train_y) train_cost_sum += train_cost if i % 1000 == 0 or i == len(train_set)-1: print "i : (%d/%d)" % (i, len(train_set)) , print " (cost : %f )" % train_cost print ' cost :', train_cost_sum print ' train_set : %f' % test(train_set) print ' dev_set : %f' % test(dev_set) print ' test_set : %f' % test(test_set) '''
#coding:utf-8 import numpy as np from scipy import io from sklearn.externals import joblib from sklearn.metrics import precision_score, recall_score from logreg import LogisticRegression import matplotlib.pyplot as plt if __name__ == "__main__": X_train = io.loadmat("X_train")["X_train"] X_train = X_train.tocsr() #疎行列の種類の変更(tfidfVectorizerで出力されるものと同じものにする) y_train = np.load("y_train.npy") clf = LogisticRegression("logreg") #thresholdに応じたprecisionとrecallの変化をプロット threshold_list = [i * 0.05 for i in range(20)] precision_list = [] recall_list = [] for threshold in threshold_list: y_predict = clf.predict(X_train, threshold) precision_list.append(precision_score(y_train, y_predict)) recall_list.append(recall_score(y_train, y_predict)) plt.plot(threshold_list, precision_list, label="precision", color="red") plt.plot(threshold_list, recall_list, label="recall", color="blue") plt.xlabel("threshold") plt.ylabel("rate") plt.xlim(0.0, 1.0) plt.ylim(0, 1)
import time import numpy as np from scipy import io from sklearn.externals import joblib from sklearn.model_selection import KFold from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score from logreg import LogisticRegression if __name__ == "__main__": X_train = io.loadmat("X_train")["X_train"] X_train = X_train.tocsr() #疎行列の種類の変更(tfidfVectorizerで出力されるものと同じものにする) y_train = np.load("y_train.npy") kf = KFold(n_splits=5) start = time.time() for (i, (train, test)) in enumerate(kf.split(X_train), start=1): clf = LogisticRegression() clf.fit(X_train[train], y_train[train]) y_predict = clf.predict(X_train[test]) y_test = y_train[test] print("Fold %d" % i) print("正解率: %f" % accuracy_score(y_test, y_predict)) print("適合率: %f" % precision_score(y_test, y_predict)) print("再現率: %f" % recall_score(y_test, y_predict)) print("F1スコア: %f" % f1_score(y_test, y_predict)) print("") elapsed_time = time.time() - start print(str(elapsed_time) + "[sec]")
filename = 'data/data2.dat' data = loadtxt(filename, delimiter=',') X = data[:, 0:2] y = np.array([data[:, 2]]).T n,d = X.shape # Standardize the data mean = X.mean(axis=0) std = X.std(axis=0) X = (X - mean) / std # map features into a higher dimensional feature space X = mapFeature(X[:,0],X[:,1]) # train logistic regression logregModel = LogisticRegression() logregModel.fit(X,y) # reload the data for 2D plotting purposes data = loadtxt(filename, delimiter=',') PX = data[:, 0:2] y = data[:, 2] # Standardize the data mean = PX.mean(axis=0) std = PX.std(axis=0) PX = (PX - mean) / std # Plot the decision boundary h = .02 # step size in the mesh
def __init__(self, rng, input, n_in, n_hidden, n_out): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden: int :param n_hidden: number of hidden units :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ # Since we are dealing with a one hidden layer MLP, this will translate # into a HiddenLayer with a tanh activation function connected to the # LogisticRegression layer; the activation function can be replaced by # sigmoid or any other nonlinear function self.hiddenLayer = HiddenLayer(rng=rng, input=input, n_in=n_in, n_out=n_hidden, activation=T.tanh) # The logistic regression layer gets as input the hidden units # of the hidden layer self.logRegressionLayer = LogisticRegression( input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out) # end-snippet-2 start-snippet-3 # L1 norm ; one regularization option is to enforce L1 norm to # be small self.L1 = (abs(self.hiddenLayer.W).sum() + abs(self.logRegressionLayer.W).sum()) # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = ((self.hiddenLayer.W**2).sum() + (self.logRegressionLayer.W**2).sum()) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = ( self.logRegressionLayer.negative_log_likelihood) # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors # the parameters of the model are the parameters of the two layer it is # made out of self.params = self.hiddenLayer.params + self.logRegressionLayer.params # end-snippet-3 # keep track of model input self.input = input
def evaluate_lenet5(learning_rate=0.1, n_epochs=200, dataset='mnist.pkl.gz', nkerns=[20, 50], batch_size=500): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 1, 28, 28)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2) ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh ) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # end-snippet-1 ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def main(train_path, valid_path, test_path, save_path): """Problem 2: Logistic regression for incomplete, positive-only labels. Run under the following conditions: 1. on t-labels, 2. on y-labels, 3. on y-labels with correction factor alpha. Args: train_path: Path to CSV file containing training set. valid_path: Path to CSV file containing validation set. test_path: Path to CSV file containing test set. save_path: Path to save predictions. """ output_path_true = save_path.replace(WILDCARD, 'true') output_path_naive = save_path.replace(WILDCARD, 'naive') output_path_adjusted = save_path.replace(WILDCARD, 'adjusted') # *** START CODE HERE *** def image_path(path): return path[:-3] + "png" # Part (a): Train and test on true labels # Make sure to save predicted probabilities to output_path_true using np.savetxt() x_train, t_train = util.load_dataset(train_path, label_col="t", add_intercept=True) x_test, t_test = util.load_dataset(test_path, label_col="t", add_intercept=True) model = LogisticRegression() model.fit(x_train, t_train) prob_test = model.predict(x_test) np.savetxt(output_path_true, prob_test) util.plot(x_test, t_test, model.theta, save_path=image_path(output_path_true)) # Part (b): Train on y-labels and test on true labels # Make sure to save predicted probabilities to output_path_naive using np.savetxt() x_train, y_train = util.load_dataset(train_path, label_col="y", add_intercept=True) x_test, y_test = util.load_dataset(test_path, label_col="y", add_intercept=True) model = LogisticRegression() model.fit(x_train, y_train) prob_test = model.predict(x_test) np.savetxt(output_path_naive, prob_test) util.plot(x_test, t_test, model.theta, save_path=image_path(output_path_naive)) # Part (f): Apply correction factor using validation set and test on true labels # Plot and use np.savetxt to save outputs to output_path_adjusted # Estimate alpha x_val, y_val = util.load_dataset(valid_path, label_col="y", add_intercept=True) model = LogisticRegression() model.fit(x_train, y_train) h_val = model.predict(x_val) alpha = np.mean(h_val[y_val == 1]) # Mean over positive y samples. # Adjustment py_test = model.predict(x_test) pt_test = py_test / alpha np.savetxt(output_path_adjusted, pt_test) # Plot util.plot(x_test, t_test, model.theta, save_path=image_path(output_path_adjusted), correction=alpha)
x = np.random.rand(3, 10) y = np.asarray(np.random.randint(5, size=3), dtype=np.int32) np_l = LogisticRegression(W, b) ######################### # THEANO PART ######################### x_symbol = theano.tensor.dmatrix('x') y_symbol = theano.tensor.ivector('y') th_l = TheanoLogisticRegression(rng=np.random.RandomState(1234), input=x_symbol, n_in=10, n_out=5, W=theano.shared(value=W, name="W"), b=theano.shared(value=b, name="b")) f1 = theano.function(inputs=[x_symbol, y_symbol], outputs=th_l.nnl(y_symbol)) actual = np_l.nnl(x, y) expected = f1(x, y) assert_matrix_eq(actual, expected, "nnl") f2 = theano.function(inputs=[x_symbol, y_symbol], outputs=th_l.errors(y_symbol)) actual = np_l.errors(x, y) expected = f2(x, y)
def main(train_path, valid_path, test_path, save_path): """Problem 2: Logistic regression for incomplete, positive-only labels. Run under the following conditions: 1. on t-labels, 2. on y-labels, 3. on y-labels with correction factor alpha. Args: train_path: Path to CSV file containing training set. valid_path: Path to CSV file containing validation set. test_path: Path to CSV file containing test set. save_path: Path to save predictions. """ output_path_true = save_path.replace(WILDCARD, 'true') output_path_naive = save_path.replace(WILDCARD, 'naive') output_path_adjusted = save_path.replace(WILDCARD, 'adjusted') # *** START CODE HERE *** # Part (a): Train and test on true labels x_train, t_train = util.load_dataset(train_path, label_col='t', add_intercept=True) model = LogisticRegression() model.fit(x_train, t_train) x_test, t_test = util.load_dataset(test_path, label_col='t', add_intercept=True) t_pred = model.predict(x_test) util.plot(x_test, t_test, model.theta, '{}.png'.format(output_path_true)) np.savetxt(output_path_true, t_pred) # Make sure to save predicted probabilities to output_path_true using np.savetxt() # Part (b): Train on y-labels and test on true labels x_train, y_train = util.load_dataset(train_path, label_col='y', add_intercept=True) model = LogisticRegression() model.fit(x_train, y_train) x_test, t_test = util.load_dataset(test_path, label_col='t', add_intercept=True) t_pred = model.predict(x_test) util.plot(x_test, t_test, model.theta, '{}.png'.format(output_path_naive)) np.savetxt(output_path_naive, t_pred) # Make sure to save predicted probabilities to output_path_naive using np.savetxt() # Part (f): Apply correction factor using validation set and test on true labels x_val, y_val = util.load_dataset(valid_path, label_col='y', add_intercept=True) h_val = model.predict(x_val) alpha = np.mean(h_val[y_val == 1]) py_test = model.predict(x_test) pt_test = py_test / alpha util.plot(x_test, t_test, model.theta, '{}.png'.format(output_path_adjusted), correction=alpha) np.savetxt(output_path_adjusted, pt_test)
filename = 'data/data2.dat' data = loadtxt(filename, delimiter=',') X = data[:, 0:2] y = np.array([data[:, 2]]).T n, d = X.shape # Standardize the data mean = X.mean(axis=0) std = X.std(axis=0) X = (X - mean) / std # map features into a higher dimensional feature space X = mapFeature(X[:, 0], X[:, 1]) # train logistic regression logregModel = LogisticRegression(regLambda=10) logregModel.fit(X, y) # reload the data for 2D plotting purposes data = loadtxt(filename, delimiter=',') PX = data[:, 0:2] y = data[:, 2] # Standardize the data mean = PX.mean(axis=0) std = PX.std(axis=0) PX = (PX - mean) / std # Plot the decision boundary h = .02 # step size in the mesh x_min, x_max = PX[:, 0].min() - .5, PX[:, 0].max() + .5
) np_l = LogisticRegression(W, b) ######################### # THEANO PART ######################### x_symbol = theano.tensor.dmatrix('x') y_symbol = theano.tensor.ivector('y') th_l = TheanoLogisticRegression(rng = np.random.RandomState(1234), input = x_symbol, n_in = 10, n_out = 5, W = theano.shared(value = W, name = "W"), b = theano.shared(value = b, name = "b") ) f1 = theano.function(inputs = [x_symbol, y_symbol], outputs = th_l.nnl(y_symbol) ) actual = np_l.nnl(x, y) expected = f1(x, y) assert_matrix_eq(actual, expected, "nnl")
if __name__ == "__main__": # Load Data filename = 'data/data1.dat' data = loadtxt(filename, delimiter=',') X = data[:, 0:2] y = np.array([data[:, 2]]).T n,d = X.shape # Standardize the data mean = X.mean(axis=0) std = X.std(axis=0) X = (X - mean) / std # train logistic regression logregModel = LogisticRegression(regLambda = 0.00000001) logregModel.fit(X,y) # Plot the decision boundary h = .02 # step size in the mesh x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z = logregModel.predict(np.c_[xx.ravel(), yy.ravel()]) # Put the result into a color plot Z = Z.reshape(xx.shape) plt.figure(1, figsize=(4, 3)) plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired) # Plot the training points
#coding:utf-8 import numpy as np from scipy import io from sklearn.externals import joblib from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score from logreg import LogisticRegression if __name__ == "__main__": X_train = io.loadmat("X_train")["X_train"] X_train = X_train.tocsr() #疎行列の種類の変更(tfidfVectorizerで出力されるものと同じものにする) y_train = np.load("y_train.npy") clf = LogisticRegression("logreg") y_predict = clf.predict(X_train) print("正解率: %f" % accuracy_score(y_train, y_predict)) print("適合率: %f" % precision_score(y_train, y_predict)) print("再現率: %f" % recall_score(y_train, y_predict)) print("F1スコア: %f" % f1_score(y_train, y_predict))
print out print out.shape expected = (1, feat_map_n, EMB_DIM / 2, k) assert out.shape == expected, "%r != %r" % (out.shape, expected) ##### Test Part Three ############### # LogisticRegressionLayer ################################# print "############# LogisticRegressionLayer ##############" l3 = LogisticRegression( rng, input=l2.output.flatten(2), n_in=feat_map_n * k * EMB_DIM / 2, # we fold once, so divide by 2 n_out=5 # five sentiment level ) print "n_in = %d" % (2 * 2 * math.ceil(EMB_DIM / 2.)) y = T.ivector('y') # the sentence sentiment label p_y_given_x = theano.function(inputs=[x], outputs=l3.p_y_given_x, mode="DebugMode") print "p_y_given_x = " print p_y_given_x(np.array([[1, 3, 4, 5], [0, 1, 4, 7]], dtype=np.int32)) cost = theano.function(inputs=[x, y], outputs=l3.nnl(y), mode="DebugMode")
def main(train_path, valid_path, test_path, save_path): """Problem 2: Logistic regression for incomplete, positive-only labels. Run under the following conditions: 1. on t-labels, 2. on y-labels, 3. on y-labels with correction factor alpha. Args: train_path: Path to CSV file containing training set. valid_path: Path to CSV file containing validation set. test_path: Path to CSV file containing test set. save_path: Path to save predictions. """ output_path_true = save_path.replace(WILDCARD, 'true') output_path_naive = save_path.replace(WILDCARD, 'naive') output_path_adjusted = save_path.replace(WILDCARD, 'adjusted') # *** START CODE HERE *** # Part (a): Train and test on true labels x_train, y_train = util.load_dataset(train_path, label_col='t', add_intercept=True) model_true = LogisticRegression() model_true.fit(x_train, y_train) x_test, y_test = util.load_dataset(test_path, label_col='t', add_intercept=True) util.plot(x_test, y_test, model_true.theta, 'plot_5a.png') # Make sure to save predicted probabilities to output_path_true using np.savetxt() np.savetxt(output_path_true, model_true.predict(x_test)) # Part (b): Train on y-labels and test on true labels x_train, y_train = util.load_dataset(train_path, label_col='y', add_intercept=True) model_naive = LogisticRegression() model_naive.fit(x_train, y_train) x_test, y_test = util.load_dataset(test_path, label_col='y', add_intercept=True) util.plot(x_test, y_test, model_naive.theta, 'plot_5b.png') # Make sure to save predicted probabilities to output_path_naive using np.savetxt() np.savetxt(output_path_naive, model_naive.predict(x_test)) # Part (f): Apply correction factor using validation set and test on true labels x_valid, y_valid = util.load_dataset(valid_path, label_col='t', add_intercept=True) x_index = np.where(y_valid == 1) alpha = 1 / len(y_valid[y_valid == 1]) * np.sum( model_naive.predict((x_valid[x_index]))) x_test, y_test = util.load_dataset(test_path, label_col='y', add_intercept=True) util.plot(x_test, y_test, model_naive.theta, 'plot_5f.png', correction=alpha) np.savetxt(output_path_adjusted, model_naive.predict(x_test) * alpha)
def __init__(self, x, y, vocab_size, embed_dim, label_n): """ x: theano.tensor.imatrix, (minibatch size, 3) the tree matrix of the minibatch for each row, (node id, left child id, right child id) y: theano.tensor.ivector, (minibatch size,) the labels vocab_size: int vocabulary size, including both the words and phrases embed_dim: int the embedding dimension """ assert x.ndim == 2 assert y.ndim == 1 parent_ids = x[:, 0] children_ids = x[:, 1:] rng = np.random.RandomState(1234) self.embedding = theano.shared( value=rng.normal(0, 0.05, (vocab_size, embed_dim)), name='embedding', borrow=True, ) self.rntn_layer = RNTNLayer(rng, embed_dim) # Update the embedding by # forwarding the embedding from bottom to up # and getting the vector for each node in each tree def update_embedding(child_indices, my_index, embedding): assert child_indices.ndim == 1 assert my_index.ndim == 0 return T.switch( T.eq( child_indices[0], -1 ), # NOTE: not using all() because it's non-differentiable embedding, # if no child, return the word embedding T.set_subtensor( embedding[ my_index], # otherwise, compute the embedding of RNTN layer self.rntn_layer.output(embedding[child_indices[0]], embedding[child_indices[1]]))) final_embedding, updates = theano.scan( fn=update_embedding, sequences=[children_ids, parent_ids], outputs_info=self. embedding, # we should pass the whole matrix and fill in the positions if necessary ) self.update_embedding = theano.function( inputs=[x], updates=[(self.embedding, T.set_subtensor(self.embedding[parent_ids], final_embedding[-1][parent_ids]))]) # the logistic regression layer that predicts the label self.logreg_layer = LogisticRegression( rng, input=final_embedding[-1][parent_ids], n_in=embed_dim, n_out=label_n) cost = self.logreg_layer.nnl(y) params = self.logreg_layer.params + self.rntn_layer.params + [ self.embedding ] self.params = params param_shapes = self.logreg_layer.param_shapes + self.rntn_layer.param_shapes + [ (vocab_size, embed_dim) ] grads = [T.grad(cost=cost, wrt=p) for p in params] updates = build_adadelta_updates(params, param_shapes, grads, epsilon=0.1) # TODO: in this step, forward propagation is done again besides the one in `update_embedding` # this extra computation should be avoided self.train = theano.function(inputs=[x, y], updates=updates)
def __init__(self, x, y, vocab_size, embed_dim, label_n): """ x: theano.tensor.imatrix, (minibatch size, 3) the tree matrix of the minibatch for each row, (node id, left child id, right child id) y: theano.tensor.ivector, (minibatch size,) the labels vocab_size: int vocabulary size, including both the words and phrases embed_dim: int the embedding dimension """ assert x.ndim == 2 assert y.ndim == 1 parent_ids = x[:,0] children_ids = x[:,1:] rng = np.random.RandomState(1234) self.embedding = theano.shared( value = rng.normal(0, 0.05, (vocab_size, embed_dim)), name = 'embedding', borrow = True, ) self.rntn_layer = RNTNLayer(rng, embed_dim) # Update the embedding by # forwarding the embedding from bottom to up # and getting the vector for each node in each tree def update_embedding(child_indices, my_index, embedding): assert child_indices.ndim == 1 assert my_index.ndim == 0 return T.switch(T.eq(child_indices[0], -1), # NOTE: not using all() because it's non-differentiable embedding, # if no child, return the word embedding T.set_subtensor(embedding[my_index], # otherwise, compute the embedding of RNTN layer self.rntn_layer.output(embedding[child_indices[0]], embedding[child_indices[1]]) ) ) final_embedding, updates = theano.scan( fn = update_embedding, sequences = [children_ids, parent_ids], outputs_info = self.embedding, # we should pass the whole matrix and fill in the positions if necessary ) self.update_embedding = theano.function(inputs = [x], updates = [(self.embedding, T.set_subtensor(self.embedding[parent_ids], final_embedding[-1][parent_ids]))]) # the logistic regression layer that predicts the label self.logreg_layer = LogisticRegression(rng, input = final_embedding[-1][parent_ids], n_in = embed_dim, n_out = label_n ) cost = self.logreg_layer.nnl(y) params = self.logreg_layer.params + self.rntn_layer.params + [self.embedding] self.params = params param_shapes = self.logreg_layer.param_shapes + self.rntn_layer.param_shapes + [(vocab_size, embed_dim)] grads = [T.grad(cost = cost, wrt=p) for p in params] updates = build_adadelta_updates(params, param_shapes, grads, epsilon = 0.1) # TODO: in this step, forward propagation is done again besides the one in `update_embedding` # this extra computation should be avoided self.train = theano.function(inputs = [x, y], updates = updates)
fold = 1, W = theano.shared(value = W, name = "W"), b = theano.shared(value = b, name = "b") ) n_in = filter_shape[0] * k * embed_dm / 2 n_out = 5 W_logreg = np.asarray(np.random.rand(n_in, n_out), dtype = theano.config.floatX) b_logreg = np.asarray(np.random.rand(n_out), dtype = theano.config.floatX) layer3 = LogisticRegression(rng = rng, input = layer2.output.flatten(2), n_in = n_in, n_out = n_out, W = theano.shared(value = W_logreg, name = "W_logreg"), b = theano.shared(value = b_logreg, name = "b_logreg") ) f1 = theano.function(inputs = [x_symbol, y_symbol], outputs = layer3.nnl(y_symbol) ) f2 = theano.function(inputs = [x_symbol, y_symbol], outputs = layer3.errors(y_symbol) ) f3 = theano.function(inputs = [x_symbol], outputs = layer3.p_y_given_x )
#coding:utf-8 from sklearn.externals import joblib from logreg import LogisticRegression ENCODING = "cp1252" if __name__ == "__main__": vectorizer = joblib.load("tfidf.vec") clf = LogisticRegression("logreg") terms = vectorizer.get_feature_names() index_list = list(range(len(terms))) index_list.sort(key=lambda i: clf.coef_[i]) print("top 10") for i in index_list[:-11:-1]: print(terms[i], clf.coef_[i]) print("") print("worst 10") for i in index_list[:10]: print(terms[i], clf.coef_[i])
# 1) Replace the `create_dataset` function from dep_parser_fix.py to your dep_parser.py file # 2) Replace parse_dataset.py with the given new version # # Create parser p = Parser() # Create training dataset ds = p.create_dataset("en-ud-train-projective.conllu", train=True) # Train LR model if os.path.exists('model.pkl'): # if model exists, load from file print("Loading existing model...") lr = pickle.load(open('model.pkl', 'rb')) else: # train model using minibatch GD lr = LogisticRegression() lr.fit(*ds.to_arrays()) pickle.dump(lr, open('model.pkl', 'wb')) # Create test dataset test_ds = p.create_dataset("en-ud-dev.conllu") # Copy feature maps to ensure that test datapoints are encoded in the same way test_ds.copy_feature_maps(ds) # Compute move-level accuracy lr.classify_datapoints(*test_ds.to_arrays()) # Compute UAS and sentence-level accuracy t = TreeConstructor(p) t.evaluate(lr, 'en-ud-dev.conllu', ds)
layer2 = ConvFoldingPoolLayer(rng=rng, input=layer1.output, filter_shape=filter_shape, k=k, fold=1, W=theano.shared(value=W, name="W"), b=theano.shared(value=b, name="b")) n_in = filter_shape[0] * k * embed_dm / 2 n_out = 5 W_logreg = np.asarray(np.random.rand(n_in, n_out), dtype=theano.config.floatX) b_logreg = np.asarray(np.random.rand(n_out), dtype=theano.config.floatX) layer3 = LogisticRegression(rng=rng, input=layer2.output.flatten(2), n_in=n_in, n_out=n_out, W=theano.shared(value=W_logreg, name="W_logreg"), b=theano.shared(value=b_logreg, name="b_logreg")) f1 = theano.function(inputs=[x_symbol, y_symbol], outputs=layer3.nnl(y_symbol)) f2 = theano.function(inputs=[x_symbol, y_symbol], outputs=layer3.errors(y_symbol)) f3 = theano.function(inputs=[x_symbol], outputs=layer3.p_y_given_x) f_el = theano.function(inputs=[x_symbol], outputs=layer1.output) f_cl = theano.function(inputs=[x_symbol], outputs=layer2.output) #########################
def main(train_path, valid_path, test_path, save_path): """Problem 2: Logistic regression for incomplete, positive-only labels. Run under the following conditions: 1. on t-labels, 2. on y-labels, 3. on y-labels with correction factor alpha. Args: train_path: Path to CSV file containing training set. valid_path: Path to CSV file containing validation set. test_path: Path to CSV file containing test set. save_path: Path to save predictions. """ output_path_true = save_path.replace(WILDCARD, 'true') output_path_naive = save_path.replace(WILDCARD, 'naive') output_path_adjusted = save_path.replace(WILDCARD, 'adjusted') # Part (a): x_train, t_train = util.load_dataset(train_path, 't', add_intercept=True) x_test, t_test = util.load_dataset(test_path, 't', add_intercept=True) clf = LogisticRegression() clf.fit(x_train, t_train) util.plot(x_test, t_test, clf.theta, 'posonly-true.jpg') np.savetxt(output_path_true, clf.predict(x_test)) # Part (b): x_train, y_train = util.load_dataset(train_path, add_intercept=True) x_test, y_test = util.load_dataset(test_path, add_intercept=True) x_valid, y_valid = util.load_dataset(valid_path, add_intercept=True) clf = LogisticRegression() clf.fit(x_train, y_train) util.plot(x_test, t_test, clf.theta, 'posonly-naive.jpg') np.savetxt(output_path_naive, clf.predict(x_test)) # Part (f): alpha = np.mean(clf.predict(x_valid[y_valid == 1])) np.savetxt(output_path_adjusted, clf.predict(x_test) / alpha) clf.theta[0] += np.log(2 / alpha - 1) util.plot(x_test, t_test, clf.theta, 'posonly_adjusted.jpg')
def evaluatePerformance(numTrials = 1000): ''' Evaluate the performance of decision trees and logistic regression, average over 1,000 trials of 10-fold cross validation Return: a matrix giving the performance that will contain the following entries: stats[0,0] = mean accuracy of decision tree stats[0,1] = std deviation of decision tree accuracy stats[1,0] = mean accuracy of logistic regression stats[1,1] = std deviation of logistic regression accuracy ** Note that your implementation must follow this API** ''' # Xtrain = X[1:101,:] # train on first 100 instances # Xtest = X[101:,:] # ytrain = y[1:101,:] # test on remaining instances # ytest = y[101:,:] # Load Data filename = 'data/SPECTF.dat' data = np.loadtxt(filename, delimiter=',') X = data[:, 1:] y = np.array([data[:, 0]]).T n,d = X.shape # shuffle the data idx = np.arange(n) np.random.seed(13) # number of folds k = 10 # creates an array of numbers that correspond to the start / end points of each fold in the case for hw from 0 -266 it should return 0 26 ...267 fold_index = n/k index_arrayX = [i*fold_index for i in range(k)] index_arrayX = np.append(index_arrayX,n) index_arrayY = [i*fold_index for i in range(k)] index_arrayY = np.append(index_arrayX,n) stddevLogisticRegressionAccuracy = 0 meanDecisionTreeAccuracy = 0 meanLogisticRegressionAccuracy = 0 stddevDecisionTreeAccuracy = 0 # an array to store all of the learning accuracies where the #rows = k*numTrial and # columns is each percentage of the data log_learning = np.matrix(np.zeros((numTrials*k,9))) tree_learning = np.matrix(np.zeros((numTrials*k,9))) #index for learning ll =0 #accuracy vars log_a = 0 tree_a =0 # making decision tree object and a logistic regression object clf = tree.DecisionTreeClassifier() lr = LogisticRegression(alpha = 0.0000001, regLambda=0.001, epsilon=0.0001, maxNumIters = 10000) #test_instance = 1 #start_time = time.time() # ~~~~~~~~~~~main loop ~~~~~~~~~~~~~~~~~ for i in xrange (numTrials): #shuffle data after each cross validation np.random.shuffle(idx) X = X[idx] y = y[idx] for j in xrange(k): # seperate test data from train data, moves test data to subsequent fold after each loop #print (time.time() - start_time) end = j+1 Xtest = X[index_arrayX[j]:index_arrayX[end],:] ytest = y[index_arrayY[j]:index_arrayX[end],:] Xtrain = X[0:index_arrayX[j],:] ytrain = y[0:index_arrayY[j],:] Xtrain = np.append(Xtrain, X[index_arrayX[j+1]:n,:],axis =0) ytrain = np.append(ytrain, y[index_arrayY[j+1]:n,:],axis =0) size_n,size_d = Xtrain.shape #size of 10% blocks train_percentage = size_n/10 for l in xrange(1,10): #train / find accuracy over 10% then 20% ect until loop exits clf = clf.fit(Xtrain[0:train_percentage*l,:],ytrain[0:train_percentage*l,:]) treey_pred = clf.predict(Xtest[0:train_percentage*l,:]) lr.fit(Xtrain[0:train_percentage*l,:], ytrain[0:train_percentage*l,:]) logy_pred = lr.predict(Xtest[0:train_percentage*l,:]) # fill in accuracies into accuracy matrix log_a = accuracy_score(ytest[0:train_percentage*l,:],logy_pred) + log_a tree_a = accuracy_score(ytest[0:train_percentage*l,:],treey_pred) + tree_a log_learning[ll,(l-1)] = log_a tree_learning[ll,(l-1)] = tree_a ll+1 tree_acc = 0 log_acc = 0 for o in xrange(9): #summing the accuracies for each percentage then dviding by fold*trials * percentages meanDecisionTreeAccuracy = (np.sum(tree_learning[:,o])/(9*k*numTrials)) + meanDecisionTreeAccuracy meanLogisticRegressionAccuracy = (np.sum(log_learning[:,o])/(9*k*numTrials)) + meanLogisticRegressionAccuracy #finding total mean accuracy over all percentages as well as standard deviations over (k*numTrial) trials meanDecisionTreeAccuracy = meanDecisionTreeAccuracy/(9) meanLogisticRegressionAccuracy = meanLogisticRegressionAccuracy /(9) stddevDecisionTreeAccuracy = np.std(tree_learning)/(k*numTrials) stddevLogisticRegressionAccuracy = np.std(log_learning)/(k*numTrials) # make certain that the return value matches the API specification stats = np.zeros((2,2)) stats[0,0] = meanDecisionTreeAccuracy stats[0,1] = stddevDecisionTreeAccuracy stats[1,0] = meanLogisticRegressionAccuracy stats[1,1] = stddevLogisticRegressionAccuracy #end_time = time.time() plot_log= np.array(np.zeros((9,1))) plot_tree =np.array(np.zeros((9,1))) #putting the mean accuracies for each perctage block into an array for q in xrange(9): plot_log[q] = np.sum(log_learning[:,q])/(9*k*numTrials) plot_tree[q] = np.sum(tree_learning[:,q])/(9*k*numTrials) percent_array = [10,20,30,40,50,60,70,80,90] plt.figure(1) plt.clf() plt.title("Learning Curve") plt.xlabel("Percentage") plt.ylabel("Accuracy") plt.axis([0,100, .6,.8]) plt.plot(percent_array,plot_log, 'rx', label='Logistic Regression') plt.hold plt.plot(percent_array,plot_tree, 'bx',label ='Decision Tree') plt.legend(loc='lower right') plt.savefig('learningcurve.png') #plt.show() return stats
print out print out.shape expected = (1, feat_map_n, EMB_DIM / 2, k) assert out.shape == expected, "%r != %r" % (out.shape, expected) ##### Test Part Three ############### # LogisticRegressionLayer ################################# print "############# LogisticRegressionLayer ##############" l3 = LogisticRegression( rng, input=l2.output.flatten(2), n_in=feat_map_n * k * EMB_DIM / 2, # we fold once, so divide by 2 n_out=5, # five sentiment level ) print "n_in = %d" % (2 * 2 * math.ceil(EMB_DIM / 2.0)) y = T.ivector("y") # the sentence sentiment label p_y_given_x = theano.function(inputs=[x], outputs=l3.p_y_given_x, mode="DebugMode") print "p_y_given_x = " print p_y_given_x(np.array([[1, 3, 4, 5], [0, 1, 4, 7]], dtype=np.int32)) cost = theano.function(inputs=[x, y], outputs=l3.nnl(y), mode="DebugMode") print "cost:\n", cost(np.array([[1, 3, 4, 5], [0, 1, 4, 7]], dtype=np.int32), np.array([1, 2], dtype=np.int32))
def train_lenet5(train_set_x, train_set_y, params, batch_size, learning_rate=0.01, nkerns=[20,50], test=False): """ Trains LeNet-5 on MNIST dataset, and returns trained parameters on completion. :type train_set: list of floats :param train_set: training samples (x- and y-values) for training :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type params: list of tuples of floats :param params: list of tuple of parameters from Supervisor. Takes the form [(W_layer0, b_layer0), (W_layer1, b_layer1), (W_layer2, b_layer2), (W_layer3, b_layer3)] :type batch_size: int :param batch_size: size of training batch :type nkerns: list of ints :param nkerns: number of kernels on each layer Output: tuple of LeNet-5 parameters by layer, in this format: ( (W_layer0, b_layer0), ..., (W_layer3, b_layer3) ) """ rng = numpy.random.RandomState(23455) # compute number of minibatches for training, validation and testing n_train_batches = 100 #n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ishape = (28,28) # this is the size of MNIST images print ' ... building the model' # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size,1,28,28)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size,1,28,28), W_values = params[0][0], b_values = params[0][1], filter_shape=(nkerns[0],1,5,5), poolsize=(2,2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size,nkerns[0],12,12), W_values = params[1][0], b_values = params[1][1], filter_shape=(nkerns[1],nkerns[0],5,5), poolsize=(2,2)) # the TanhLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1]*4*4, n_out=120, activation = T.tanh, W_values = params[2][0], b_values = params[2][1]) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=120, n_out=10, W_values=params[3][0], b_values=params[3][1]) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by SGD # Since this model has many parameters, it would be tedious to manually # create an update rule for each model parameter. We thus create the updates # dictionary by automatically looping over all (params[i],grads[i]) pairs. updates = {} for param_i, grad_i in zip(params, grads): updates[param_i] = param_i - learning_rate * grad_i train_model = theano.function([index], cost, updates=updates, givens = {x: train_set_x, y: train_set_y}, mode='FAST_RUN') print " training lenet-5..." start_time = time.clock() epoch = 0 done_looping = False while (epoch < batch_size/100) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = epoch * n_train_batches + minibatch_index cost_ij = train_model(minibatch_index) end_time = time.clock() print " worker training complete." print " %i samples analyzed in %.2fm" % (batch_size, (end_time-start_time)/60.) return ((layer0.params[0].get_value(), layer0.params[1].get_value()), (layer1.params[0].get_value(), layer1.params[1].get_value()), (layer2.params[0].get_value(), layer2.params[1].get_value()), (layer3.params[0].get_value(), layer3.params[1].get_value()))