def get_trained_model(c, corpus, dictionary, non_zero_indices): model = LogisticRegression(penalty='l1', tol=0.0001, C=c, fit_intercept=True, \ intercept_scaling=1, solver='liblinear', max_iter=args.epochs, multi_class='ovr', verbose=1) train_sentences1, train_labels = helper.batch_to_one_hot_encoded( corpus.data, dictionary, non_zero_indices=non_zero_indices) model.fit(train_sentences1, train_labels) return model
def eval_routine(corpus, dictionary, model, non_zero_indices=None): print('one hot encoding...') eval_sentences, eval_labels = helper.batch_to_one_hot_encoded(corpus.data, dictionary, non_zero_indices = non_zero_indices) if not non_zero_indices: print('Creating CSR sparsing...') eval_sentences = csr_matrix(eval_sentences) print('Testing...') acc = model.score(eval_sentences, eval_labels) print(' Accurcay: ', acc) return acc
def get_trained_model2(c, corpus, dictionary, non_zero_indices): model = LogisticRegression(penalty='l1', tol=0.0001, C=c, fit_intercept=True, \ intercept_scaling=1, solver='liblinear', max_iter=args.epochs, multi_class='ovr', verbose=0) train_batches = helper.batchify(corpus.data, args.batch_size) print('number of train batches = ', len(train_batches)) num_batches = len(train_batches) n_correct, n_total = 0, 0 for batch_no in range(1, num_batches + 1): if batch_no % 500 == 0: print(' training batch: ', batch_no, ' of ', num_batches, ' percentage: ', batch_no / num_batches) train_sentences1, train_labels = helper.batch_to_one_hot_encoded( train_batches[batch_no - 1], dictionary, non_zero_indices=non_zero_indices) model.fit(train_sentences1, train_labels) return model
def eval_routine(corpus, dictionary, model, non_zero_indices=None): nexamples = len(corpus.data) dev_batches = helper.batchify(corpus.data, args.batch_size) print('number of train batches = ', len(dev_batches)) total_acc = 0.0 correct = 0.0 num_batches = len(dev_batches) n_correct, n_total = 0, 0 for batch_no in range(1, num_batches + 1): if batch_no % 500 == 0: print(' validation batch: ', batch_no, ' of ', num_batches, ' percentage: ', batch_no / num_batches) eval_sentences, eval_labels = helper.batch_to_one_hot_encoded( dev_batches[batch_no - 1], dictionary, non_zero_indices=non_zero_indices) acc = model.score(eval_sentences, eval_labels) correct += acc * len(eval_labels) total_acc += acc # if batch_no%500 == 0 :print(' for this minibatch score: ', acc, ' correct: ', acc*len(eval_labels), ' of ', len(eval_labels), 'total accc: ', total_acc, ' total correct: ', correct) print(' Correct: ', correct, ' acc: ', correct / nexamples, ' sanity check: ', total_acc / num_batches) return correct / nexamples
# save the model to disk filename = args.task+'_L1_model.pcl' best_acc = 0 i=0 bc = -1 # cs = 0.02, and 0.2 for sst for BCN and LSTM (or the otehr sequence) cs = [] # for IMDB print('one hot encoding...') train_sentences1, train_labels = helper.batch_to_one_hot_encoded(train_corpus.data, dictionary, non_zero_indices = []) print('Training...') # import pdb # pdb.set_trace() # for c in cs: while (i<1): # c = numpy.random.uniform(args.c-5, args.c+5) c = args.c#2.5#numpy.random.uniform(0.7785, 0.7785) print('training model with c: ', c, ' in iter: ', i+1, "/", len(cs)) i+=1 model = get_trained_model(c, train_sentences1, train_labels, dictionary, non_zero_indices=[]) # print("==="*20, "\nC: ", c, "\n", "=="*20)