def unigram_model(): print("The model will be trained on unigrams:") train_data = ut.TRAIN_UNI dev_data = ut.DEV_UNI # test_data = ut.TEST in_dim = len(ut.vocab_uni) out_dim = len(ut.L2I_UNI) hid_dim = int(2 ** (math.floor(math.log(in_dim - out_dim))) / 2) num_iterations = 30 learning_rate = 0.01 params = model.create_classifier(in_dim, hid_dim, out_dim) trained_params = train_classifier(train_data, dev_data, num_iterations, learning_rate, params)
def train_xor(): import xor_data train_data = xor_data.data dev_data = xor_data.data in_dim = 2 hid_dim = 4 out_dim = 2 params = mlp1.create_classifier(in_dim, hid_dim, out_dim) num_iterations = 25 learning_rate = 0.9 trained_params = train_classifier(train_data, dev_data, num_iterations, learning_rate, params)
def xor(): in_dim = 2 hidden_dim = 4 out_dim = 2 num_iterations = 70 learning_rate = 1.0 params = ll.create_classifier(in_dim, hidden_dim, out_dim) trained_params = train_xor(xor_data.data, num_iterations, learning_rate, params) return trained_params
def xor_model(): print("The model will be trained on the xor dataset:") train_data = xor_data dev_data = xor_data in_dim = 2 out_dim = 2 hid_dim = 4 epochs = 30 f2I = lambda x: x l2I = {0: 0, 1: 1} params = model.create_classifier(in_dim, hid_dim, out_dim) trained_params = train_classifier(train_data, dev_data, params, f2I, l2I, feat_parser=lambda feats, x: np.asarray(feats), num_iterations=epochs)
def main(text_to_ngram): if config.debug: np.random.seed(config.mlp1.seed) train_data = utils.read_data(config.filename_train) symbol_dict = tl.initialize_symbol_dict(train_data,text_to_ngram) label_dict = tl.initialize_label_dict(train_data) xy_train = list(tl.xy_generator(train_data, text_to_ngram, symbol_dict, label_dict)) dev_data = utils.read_data(config.filename_dev) xy_dev = list(tl.xy_generator(dev_data,text_to_ngram, symbol_dict, label_dict)) in_dim = min(config.max_count, len(symbol_dict)) out_dim = len(label_dict) hidden_dim = config.mlp1.hidden_layer_size print("problem dimensions are: {}".format((in_dim, hidden_dim, out_dim))) params = lp.create_classifier(in_dim, hidden_dim,out_dim) params = [randomize_array(p) for p in params] trained_params = train_classifier(xy_train, xy_dev, config.mlp1.num_iterations, config.mlp1.learning_rate, params) return trained_params
def mlp1_grad_sanity(): # Sanity checks. If these fail, your gradient calculation is definitely wrong. # If they pass, it is likely, but not certainly, correct. # import sys #sys.path.append("C:\Shahar\BarIlan\NLP-courses\89687-DL\Assignment1\code\loglinear.py") #print(sys.path) #from .grad_check import gradient_check W, b, U, b_tag = mlp1.create_classifier(3, 4, 6) def _loss_and_W_grad(W): x = np.array([[1, 2, 3]], np.double) loss, grads = mlp1.loss_and_gradients(x, 0, [W, b, U, b_tag]) return loss, grads[0] def _loss_and_b_grad(b): x = np.array([[1, 2, 3]], np.double) loss, grads = mlp1.loss_and_gradients(x, 0, [W, b, U, b_tag]) return loss, grads[1] def _loss_and_U_grad(U): x = np.array([[1, 2, 3]], np.double) loss, grads = mlp1.loss_and_gradients(x, 0, [W, b, U, b_tag]) return loss, grads[2] def _loss_and_b_tag_grad(b_tag): x = np.array([[1, 2, 3]], np.double) loss, grads = mlp1.loss_and_gradients(x, 0, [W, b, U, b_tag]) return loss, grads[3] for _ in range(10): W = randomize_array(W) b = randomize_array(b) U = randomize_array(U) b_tag = randomize_array(b_tag) print("b_tag") gradient_check(_loss_and_b_tag_grad, b_tag) print("U:") gradient_check(_loss_and_U_grad, U) print("b:") gradient_check(_loss_and_b_grad, b) print("W:") gradient_check(_loss_and_W_grad, W)
def mlp_check(): print("MLP (one hidden layer) gradient checks") from mlp1 import create_classifier, loss_and_gradients as mlp1_loss_and_grad from train_mlp1 import randomly_initialize_params in_dim, hid_dim, out_dim = 5, 3, 2 initialized_params = create_classifier(in_dim=in_dim, hid_dim=hid_dim, out_dim=out_dim) x = np.random.randn(in_dim, ) y = 0 for i in range(5): random_params = randomly_initialize_params(initialized_params) W, b, U, b_tag = random_params def _loss_and_W_grad(W_): loss, grads = mlp1_loss_and_grad(x, y, [W_, b, U, b_tag]) return loss, grads[0] def _loss_and_b_grad(b_): loss, grads = mlp1_loss_and_grad(x, y, [W, b_, U, b_tag]) return loss, grads[1] def _loss_and_U_grad(U_): loss, grads = mlp1_loss_and_grad(x, y, [W, b, U_, b_tag]) return loss, grads[2] def _loss_and_b_tag_grad(b_tag_): loss, grads = mlp1_loss_and_grad(x, y, [W, b, U, b_tag_]) return loss, grads[3] print(f"Gradients checks for random initialization {i+1}") gradient_check(_loss_and_W_grad, W) gradient_check(_loss_and_b_grad, b) gradient_check(_loss_and_U_grad, U) gradient_check(_loss_and_b_tag_grad, b_tag)
def create_test_pred_file(test_data, params): """ creates a 'test.pred' file :param test_data: test data to be predicted :param params: trained params :return: """ f = open("test.pred", 'w') for label, features in test_data: x = feats_to_vec(features) y_hat = mlp1.predict(x, params) for l, i in utils.L2I.items(): if y_hat == i: label = l break f.write(label + "\n") f.close() LR = 0.001 NUM_ITERATIONS = 15 HIDDEN_LAYER_SIZE = 10 if __name__ == '__main__': params = mlp1.create_classifier(len(utils.F2I), HIDDEN_LAYER_SIZE, len(utils.L2I)) trained_params = train_classifier(utils.TRAIN, utils.DEV, NUM_ITERATIONS, LR, params) TEST = [(l, utils.text_to_bigrams(t)) for l, t in utils.read_data("test")] create_test_pred_file(TEST, trained_params)
import mlp1 as model import train_mlp1 as trainer from utils import I2L data = [(I2L[1], [0, 0]), (I2L[0], [0, 1]), (I2L[0], [1, 0]), (I2L[1], [1, 1])] if __name__ == "__main__": params = model.create_classifier(2, 10, 2) trained_params = trainer.train_classifier(data, data, 200, 0.0765, params)
cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec_uni(features) # convert features to a vector. y = L2I[label] # convert the label to number if needed. loss, grads = ll.loss_and_gradients(x, y, params) gw, gb, gu, gb_tag = grads cum_loss += loss W, b, U, b_tag = params W -= learning_rate * gw b -= learning_rate * gb U -= learning_rate * gu b_tag -= learning_rate * gb_tag params = W, b, U, b_tag # update the parameters according to the gradients # and the learning rate. """if epoch == 4: learning_rate = 0.001""" train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print(epoch, train_loss, train_accuracy, dev_accuracy) return params if __name__ == '__main__': # write code to load the train and dev sets, set up whatever you need, # and call train_classifier. params = ll.create_classifier(600, 1000, 6) trained_params = train_classifier(TRAIN_UNI, DEV_UNI, 10, 0.0485, params)
train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print I, train_loss, train_accuracy, dev_accuracy return params def run_test(test_data, params): pred_file = open("test.pred", 'w') for label, features in test_data: x = feats_to_vec(features) # convert features to a vector. y_hat = ml.predict(x, params) for key, val in ut.L2I.items( ): # for name, age in dictionary.iteritems(): (for Python 2.x) if val == y_hat: label = key break pred_file.write(str(label) + "\n") pred_file.close() if __name__ == '__main__': # YOUR CODE HERE # write code to load the train and dev sets, set up whatever you need, # and call train_classifier. params = ml.create_classifier(len(ut.F2I), HIDDEN_SIZE, len(ut.L2I)) trained_params = train_classifier(ut.TRAIN, ut.DEV, EPOCHS, ETA, params) run_test(ut.TRAIN, trained_params)
from utils import DEV as dev_data from utils import L2I, F2I import loglinear as ll import itertools STUDENT = { 'name': 'Royi Rassin', 'ID': '311334734', 'name2': 'Shon Otzmagin', 'ID2': '305394975' } param_grid = {'epochs': [300], 'hid_dim': [32, 128], 'lr': [1e-3, 2e-3, 3e-3]} # Generate all combinations of parameters all_params = [ dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values()) ] in_dim = len(F2I) out_dim = len(L2I) # Use cross validation to evaluate all parameters for h_params in all_params: print(h_params) params = mlp1.create_classifier(in_dim, h_params['hid_dim'], out_dim) trained_params = train_classifier(train_data, dev_data, h_params['epochs'], h_params['lr'], params) print()
learning_rate = .001 hid_layer = int(1.2 * vocab_size) vocab = set([x for x, c in fc.most_common(vocab_size)]) # label strings to IDs L2I = { l: i for i, l in enumerate(list(sorted(set([l for l, t in TRAIN])))) } I2L = { i: l for i, l in enumerate(list(sorted(set([l for l, t in TRAIN])))) } # feature strings (bigrams) to IDs F2I = {f: i for i, f in enumerate(list(sorted(vocab)))} # ... params = mlp1.create_classifier(vocab_size, hid_layer, len(L2I)) trained_params = train_classifier(TRAIN, DEV, epochs, learning_rate, params) # #__________PART 4 ____________ #after training the model , the parameters are supposely set to reach max accuracy. # # test_predictions = [] # for label, features in TEST: # x = feats_to_vec(features) # test_predictions.append(I2L[mlp1.predict(x, params)]) # later ll change to nn. # # save_test_prediction("test.pred", test_predictions)
print I, train_loss, train_accuracy, dev_accuracy return params def test(test_data, params): prediction_file = open("test.pred", 'w') for label, features in test_data: x = feats_to_vec(features) # convert features to a vector. pred = ml.predict(x, params) for key, val in ut.L2I.items(): if val == pred: label = key break prediction_file.write(str(label) + "\n") prediction_file.close() if __name__ == '__main__': # YOUR CODE HERE # write code to load the train and dev sets, set up whatever you need, # and call train_classifier. num_iterations = 60 learning_rate = 0.01 hidden = 20 params = ml.create_classifier(len(ut.F2I), hidden, len(ut.L2I)) trained_params = train_classifier(ut.TRAIN, ut.DEV, num_iterations, learning_rate, params) test(ut.TRAIN, trained_params)
test_bigrams = True # YOUR CODE HERE # write code to load the train and dev sets, set up whatever you need, # and call train_classifier. num_iterations = 30 learning_rate = 0.01 hidden_dim = 60 out_dim = len(ut.L2I) if test_bigrams: train_data = ut.TRAIN dev_data = ut.DEV in_dim = len(ut.F2I) params = m1.create_classifier(in_dim, hidden_dim, out_dim) trained_params = train_classifier(train_data, dev_data, num_iterations, learning_rate, params) else: train_data, dev_data, F2I = ut.get_unigrams() in_dim = len(F2I) params = m1.create_classifier(in_dim, hidden_dim, out_dim) trained_params = train_classifier(train_data, dev_data, num_iterations, learning_rate, params, F2I) predictTest = True # set wether the log-liniear should predict the test data and write it to the file if predictTest: I2L = {i: l for l, i in ut.L2I.iteritems()} print I2L
train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print(I, train_loss, train_accuracy, dev_accuracy) return params if __name__ == '__main__': # YOUR CODE HERE # write code to load the train and dev sets, set up whatever you need, # and call train_classifier. # learning_rate = 5e-1 # params = mlp.create_classifier(2, 8, 2) # trained_params = train_classifier(data, data, 20, learning_rate, params) learning_rate = 5e-3 num_iterations = 10 hidden_layer_size = 18 train_data, dev_data = load_data('train', 'dev', 'bigrams') # input dimension is the number of features (bigrams) in_dim = get_common_features_number() # output dimension is the number of labels (languages) out_dim = get_labels_number() params = mlp.create_classifier(in_dim, hidden_layer_size, out_dim) trained_params = train_classifier(train_data, dev_data, num_iterations, learning_rate, params)
# ... train_data = utils.read_data("train") y_train = [l[0] for l in train_data] X_trrain = [l[1] for l in train_data] dev_data = utils.read_data("dev") y_dev = [l[0] for l in dev_data] X_dev = [l[1] for l in dev_data] num_iterations = 100 out_dim = len(utils.L2I) in_dim = len(utils.vocab) learning_rate = 6.46428571e-04 #from xor_data import data #train_data = data XOR problem #dev_data = data XOR problem params = mlp.create_classifier(in_dim, hid_dim=15, out_dim=out_dim) trained_params = train_classifier(train_data, dev_data, num_iterations, learning_rate, params) # predict on test test_data = utils.read_data("test") f = open("test.pred", "w+") inv_map = {v: k for k, v in utils.L2I.iteritems()} index = 0 for label, features in test_data: # YOUR CODE HERE # Compute the accuracy (a scalar) of the current parameters # on the dataset. index += 1 #label = utils.L2I[label] one_hot = feats_to_vec(features)
train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print I, train_loss, train_accuracy, dev_accuracy return params if __name__ == '__main__': # optional models - xor, unigrams, bigrams (use the utils.<function> associated with the model) model = 'xor' if model == 'xor': in_dim, out_dim, train_data, dev_data = utils.get_xor_params() hid_dim = 5 num_iterations = 50 learning_rate = 0.2 params = mlp1.create_classifier(in_dim, hid_dim, out_dim) train_classifier(train_data, '', num_iterations, learning_rate, params) exit() model = 'unigrams' # training parameters hid_dim = 30 num_iterations = 30 learning_rate = 1e-3 # get params vocab_size, num_langs, train_data, dev_data = utils.get_unigrams_params() params = mlp1.create_classifier(vocab_size, hid_dim, num_langs) trained_params = train_classifier(train_data, dev_data, num_iterations,
# YOUR CODE HERE # write code to load the train and dev sets, set up whatever you need, # and call train_classifier. # ... # For xor problem: # data = [(1, [0, 0]), # (0, [0, 1]), # (0, [1, 0]), # (1, [1, 1])] # # learning_rate = 0.5 # params = mlp.create_classifier(2,3, 2) # trained_params = train_classifier(data, data, 100, learning_rate, params) # For Bigrams: train, features_size, labels_size = load_train_set('bigrams') validation = load_validation_set('bigrams') learning_rate = 0.005 params = mlp.create_classifier(features_size, 18, labels_size) trained_params = train_classifier(train, validation, 10, learning_rate, params) # For Unigrams: # train, features_size, labels_size = load_train_set('unigrams') # validation = load_validation_set('unigrams') # learning_rate = 0.001 # params = mlp.create_classifier(features_size, 50, labels_size) # trained_params = train_classifier(train, validation, 100, learning_rate, params)
dev_accuracy = accuracy_on_dataset(dev_data, [U, W, bu, bw]) print I + 1, train_loss, train_accuracy, dev_accuracy return params if __name__ == '__main__': # YOUR CODE HERE # write code to load the train and dev sets, set up whatever you need, # and call train_classifier. train_data = utils.TRAIN dev_data = utils.DEV learning_rate = 0.01 # num_iterations = 400 num_iterations = 2 out_dim = len(utils.L2I) hid_dim = 50 in_dim = 600 # in_dim = len(utils.F2I) params = mlp1.create_classifier(in_dim, hid_dim, out_dim) trained_params = train_classifier(train_data, dev_data, num_iterations, learning_rate, params) if EXECUTE_TEST: test_results = [] for val in utils.TEST: test_results.append(utils.I2L[mlp1.predict(feats_to_vec(val[1]), trained_params)]) with open("test_results.txt", "w") as f: f.writelines(["%s\n" % item for item in test_results])
def fileData(fData): data = [] for [lang, bigrams] in fData: features = np.zeros(len(all_bigrams)) for bigram in bigrams: if bigram in all_bigrams: features[all_bigrams[bigram]] += 1 language = all_langs[lang] if lang in all_langs else -1 data.append([language, features]) return data # process the training and dev data and print accuracy params = mlp.create_classifier( len(all_bigrams), int(math.log(len(all_bigrams) * len(all_langs))), len(all_langs)) trained_params = train_classifier(fileData(utils.TRAIN), fileData(utils.DEV), num_iterations, learning_rate, params) # run prediction on the test data predict = [] for [label, data] in fileData(utils.TEST): predict.append(lang_to_id[mlp.predict(data, trained_params)]) """ In comment in order not to run over the file # write the prediction to a file predict_file = open('test.pred', 'w') predict_file.writelines(["%s\n" % item for item in predict]) predict_file.close() """
import xor_data import train_mlp1 import mlp1 import numpy as np dataList = [] for label, data in xor_data.data: dataList.append([label, np.array(data)]) params = mlp1.create_classifier(2, 2, 2) trainedParams = train_mlp1.train_classifier(dataList, dataList, 800, 0.075, params)
params: list of parameters (initial values) """ for I in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. for label, features in train_data: loss, grads = mlp.loss_and_gradients(features, label, params) cum_loss += loss params = [ param - grad for param, grad in zip( params, [grad * learning_rate for grad in grads]) ] train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print((I, train_loss, train_accuracy, dev_accuracy)) return params if __name__ == '__main__': in_dim = 2 out_dim = 2 np.random.seed(5) train_data = data dev_data = data num_iterations = 8 learning_rate = 0.5 params = mlp.create_classifier(in_dim, 4, out_dim) trained_params = train_classifier(train_data, dev_data, num_iterations, learning_rate, params)