def perplexity(rnn_sizes): rnn_sizes=rnn_sizes if rnn_sizes not in [32,64,128,256]: print("invalid hidden layer size", rnn_sizes) exit(1) args, dataset, vectorizer, train_state, model=model_rnn[rnn_sizes] #dataset = SurnameDataset.load_dataset_and_make_vectorizer(args.surname_csv) #dataset.save_vectorizer(args.vectorizer_file) #vectorizer = dataset.get_vectorizer() model = SurnameGenerationModel(char_embedding_size=args.char_embedding_size, char_vocab_size=len(vectorizer.char_vocab), rnn_hidden_size=args.rnn_hidden_size, padding_idx=vectorizer.char_vocab.mask_index) print(model) mask_index = vectorizer.char_vocab.mask_index model.load_state_dict(torch.load(f'{rnn_sizes}_hidden.pt')) model = model.to(args.device) perplexity=[] for i in range(1,20): #train_state['epoch_index'] = epoch_index # Iterate over training dataset # setup: batch generator, set loss and acc to 0, set train mode on dataset.set_split('test') batch_generator = generate_batches(dataset, batch_size=args.batch_size, device=args.device) running_loss = 0.0 running_acc = 0.0 #model.train() for batch_index, batch_dict in enumerate(batch_generator): x_data = np.array(torch.Tensor.clone(batch_dict['x_data'].cpu()))# for j in range(i,19): for k in range(x_data.shape[0]): x_data[k][j]=0 x_data = torch.Tensor(x_data).type(torch.LongTensor).to(args.device) y_pred=model(x_in=x_data) # step 3. compute the loss #loss = F.cross_entropy(y_pred,batch_dict['y_target'],mask_index) loss= sequence_loss(y_pred,batch_dict['y_target'],mask_index) # step 4. use loss to produce gradients running_loss += (loss.item() - running_loss) / (batch_index + 1) acc_t = compute_accuracy(y_pred, batch_dict['y_target'], mask_index) running_acc += (acc_t - running_acc) / (batch_index + 1) #perplexity.append(torch.exp(loss)) perplexity.append(2**running_loss) return perplexity
running_loss = 0.0 running_acc = 0.0 model.train() for batch_index, batch_dict in enumerate(batch_generator): # the training routine is these 5 steps: # -------------------------------------- # step 1. zero the gradients optimizer.zero_grad() # step 2. compute the output y_pred = model(x_in=batch_dict['x_data']) # step 3. compute the loss loss = sequence_loss(y_pred, batch_dict['y_target'], mask_index) # step 4. use loss to produce gradients loss.backward() # step 5. use optimizer to take gradient step optimizer.step() # ----------------------------------------- # compute the running loss and running accuracy running_loss += (loss.item() - running_loss) / (batch_index + 1) acc_t = compute_accuracy(y_pred, batch_dict['y_target'], mask_index) running_acc += (acc_t - running_acc) / (batch_index + 1) # update bar #train_bar.set_postfix(loss=running_loss,
def create_models_diff_hidden(rnn_hidden_size): args = Namespace( # Data and Path information surname_csv="https://raw.githubusercontent.com/jasoriya/CS6120-PS2-support/master/data/surnames/surnames_with_splits.csv", vectorizer_file="vectorizer.json", model_state_file="model.pth", save_dir= "/", # give path here # Model hyper parameters char_embedding_size=32, rnn_hidden_size= rnn_hidden_size, # give hidden size # Training hyper parameters seed=1337, learning_rate=0.001, batch_size=128, num_epochs=100, early_stopping_criteria=5, # Runtime options catch_keyboard_interrupt=True, cuda=True, expand_filepaths_to_save_dir=True, reload_from_files=False, ) if args.expand_filepaths_to_save_dir: args.vectorizer_file = os.path.join(args.save_dir,args.vectorizer_file) args.model_state_file = os.path.join(args.save_dir,args.model_state_file) print("Expanded filepaths: ") print("\t{}".format(args.vectorizer_file)) print("\t{}".format(args.model_state_file)) # Check CUDA if not torch.cuda.is_available(): args.cuda = False args.device = torch.device("cuda" if args.cuda else "cpu") print("Using CUDA: {}".format(args.cuda)) # Set seed for reproducibility set_seed_everywhere(args.seed, args.cuda) # handle dirs handle_dirs(args.save_dir) if args.reload_from_files: # training from a checkpoint dataset = SurnameDataset.load_dataset_and_load_vectorizer(args.surname_csv,args.vectorizer_file) else: # create dataset and vectorizer dataset = SurnameDataset.load_dataset_and_make_vectorizer(args.surname_csv) dataset.save_vectorizer(args.vectorizer_file) vectorizer = dataset.get_vectorizer() model = SurnameGenerationModel(char_embedding_size=args.char_embedding_size, char_vocab_size=len(vectorizer.char_vocab), rnn_hidden_size=args.rnn_hidden_size, padding_idx=vectorizer.char_vocab.mask_index) print(model) mask_index = vectorizer.char_vocab.mask_index model = model.to(args.device) optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='min', factor=0.5, patience=1) train_state = make_train_state(args) #epoch_bar = tqdm_notebook(desc='training routine', # total=args.num_epochs, # position=0) dataset.set_split('train') #train_bar = tqdm_notebook(desc='split=train', # total=dataset.get_num_batches(args.batch_size), # position=1, # leave=True) dataset.set_split('val') #val_bar = tqdm_notebook(desc='split=val', # total=dataset.get_num_batches(args.batch_size), # position=1, # leave=True) try: for epoch_index in range(args.num_epochs): train_state['epoch_index'] = epoch_index # Iterate over training dataset # setup: batch generator, set loss and acc to 0, set train mode on dataset.set_split('train') batch_generator = generate_batches(dataset, batch_size=args.batch_size, device=args.device) running_loss = 0.0 running_acc = 0.0 model.train() for batch_index, batch_dict in enumerate(batch_generator): # the training routine is these 5 steps: # -------------------------------------- # step 1. zero the gradients optimizer.zero_grad() # step 2. compute the output y_pred = model(x_in=batch_dict['x_data']) # step 3. compute the loss loss = sequence_loss(y_pred, batch_dict['y_target'], mask_index) # step 4. use loss to produce gradients loss.backward() # step 5. use optimizer to take gradient step optimizer.step() # ----------------------------------------- # compute the running loss and running accuracy running_loss += (loss.item() - running_loss) / (batch_index + 1) acc_t = compute_accuracy(y_pred, batch_dict['y_target'], mask_index) running_acc += (acc_t - running_acc) / (batch_index + 1) train_state['train_loss'].append(running_loss) train_state['train_acc'].append(running_acc) # Iterate over val dataset # setup: batch generator, set loss and acc to 0; set eval mode on dataset.set_split('val') batch_generator = generate_batches(dataset, batch_size=args.batch_size, device=args.device) running_loss = 0. running_acc = 0. model.eval() for batch_index, batch_dict in enumerate(batch_generator): # compute the output y_pred = model(x_in=batch_dict['x_data']) # step 3. compute the loss loss = sequence_loss(y_pred, batch_dict['y_target'], mask_index) # compute the running loss and running accuracy running_loss += (loss.item() - running_loss) / (batch_index + 1) acc_t = compute_accuracy(y_pred, batch_dict['y_target'], mask_index) running_acc += (acc_t - running_acc) / (batch_index + 1) # Update bar #val_bar.set_postfix(loss=running_loss, acc=running_acc, # epoch=epoch_index) #val_bar.update() train_state['val_loss'].append(running_loss) train_state['val_acc'].append(running_acc) train_state = update_train_state(args=args, model=model, train_state=train_state) scheduler.step(train_state['val_loss'][-1]) if train_state['stop_early']: break # move model to cpu for sampling model = model.cpu() sampled_surnames = decode_samples( sample_from_model(model, vectorizer, num_samples=2), vectorizer) #epoch_bar.set_postfix(sample1=sampled_surnames[0], # sample2=sampled_surnames[1]) # move model back to whichever device it should be on model = model.to(args.device) #train_bar.n = 0 #val_bar.n = 0 #epoch_bar.update() except KeyboardInterrupt: print("Exiting loop") train_state['model_filename'] = "./{}_hidden.pt".format(str(rnn_hidden_size)) torch.save(model.state_dict(), train_state['model_filename']) return args, dataset, vectorizer, train_state, model
def fit_nn(rnn): args = Namespace( # Data and Path information surname_csv= "https://raw.githubusercontent.com/jasoriya/CS6120-PS2-support/master/data/surnames/surnames_with_splits.csv", vectorizer_file="vectorizer.json", model_state_file="model.pth", save_dir="./" + str(rnn), # give path here # Model hyper parameters char_embedding_size=32, rnn_hidden_size=rnn, # give hidden size # Training hyper parameters seed=1337, learning_rate=0.001, batch_size=128, num_epochs=100, early_stopping_criteria=5, # Runtime options catch_keyboard_interrupt=True, cuda=True, expand_filepaths_to_save_dir=True, reload_from_files=False, ) if args.expand_filepaths_to_save_dir: args.vectorizer_file = os.path.join(args.save_dir, args.vectorizer_file) args.model_state_file = os.path.join(args.save_dir, args.model_state_file) print("Expanded filepaths: ") print("\t{}".format(args.vectorizer_file)) print("\t{}".format(args.model_state_file)) # Check CUDA if not torch.cuda.is_available(): args.cuda = False args.device = torch.device("cuda" if args.cuda else "cpu") print("Using CUDA: {}".format(args.cuda)) # Set seed for reproducibility set_seed_everywhere(args.seed, args.cuda) # handle dirs handle_dirs(args.save_dir) if args.reload_from_files: # training from a checkpoint dataset = SurnameDataset.load_dataset_and_load_vectorizer( args.surname_csv, args.vectorizer_file) else: # create dataset and vectorizer dataset = SurnameDataset.load_dataset_and_make_vectorizer( args.surname_csv) dataset.save_vectorizer(args.vectorizer_file) vectorizer = dataset.get_vectorizer() model = SurnameGenerationModel( char_embedding_size=args.char_embedding_size, char_vocab_size=len(vectorizer.char_vocab), rnn_hidden_size=args.rnn_hidden_size, padding_idx=vectorizer.char_vocab.mask_index) mask_index = vectorizer.char_vocab.mask_index model = model.to(args.device) optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='min', factor=0.5, patience=1) train_state = make_train_state(args) epoch_bar = tqdm_notebook(desc='training routine', total=args.num_epochs, position=0) dataset.set_split('train') train_bar = tqdm_notebook(desc='split=train', total=dataset.get_num_batches(args.batch_size), position=1, leave=True) dataset.set_split('val') val_bar = tqdm_notebook(desc='split=val', total=dataset.get_num_batches(args.batch_size), position=1, leave=True) try: for epoch_index in range(args.num_epochs): train_state['epoch_index'] = epoch_index # Iterate over training dataset # setup: batch generator, set loss and acc to 0, set train mode on dataset.set_split('train') batch_generator = generate_batches(dataset, batch_size=args.batch_size, device=args.device) running_loss = 0.0 running_acc = 0.0 model.train() for batch_index, batch_dict in enumerate(batch_generator): # the training routine is these 5 steps: # -------------------------------------- # step 1. zero the gradients optimizer.zero_grad() # step 2. compute the output y_pred = model(x_in=batch_dict['x_data']) # print("x:", batch_dict['x_data'][0][0]) # print("x:", batch_dict['x_data'][0]) # print("x:", batch_dict['x_data']) # print("x:", y_pred[0][0]) # print("x:", y_pred[0]) # print("x:", y_pred) # print("y:", y_pred) # step 3. compute the loss loss = sequence_loss(y_pred, batch_dict['y_target'], mask_index) # step 4. use loss to produce gradients loss.backward() # step 5. use optimizer to take gradient step optimizer.step() # ----------------------------------------- # compute the running loss and running accuracy running_loss += (loss.item() - running_loss) / (batch_index + 1) acc_t = compute_accuracy(y_pred, batch_dict['y_target'], mask_index) running_acc += (acc_t - running_acc) / (batch_index + 1) # update bar train_bar.set_postfix(loss=running_loss, acc=running_acc, epoch=epoch_index) train_bar.update() train_state['train_loss'].append(running_loss) train_state['train_acc'].append(running_acc) # Iterate over val dataset # setup: batch generator, set loss and acc to 0; set eval mode on dataset.set_split('val') batch_generator = generate_batches(dataset, batch_size=args.batch_size, device=args.device) running_loss = 0. running_acc = 0. model.eval() for batch_index, batch_dict in enumerate(batch_generator): # compute the output y_pred = model(x_in=batch_dict['x_data']) # step 3. compute the loss loss = sequence_loss(y_pred, batch_dict['y_target'], mask_index) # compute the running loss and running accuracy running_loss += (loss.item() - running_loss) / (batch_index + 1) acc_t = compute_accuracy(y_pred, batch_dict['y_target'], mask_index) running_acc += (acc_t - running_acc) / (batch_index + 1) # Update bar val_bar.set_postfix(loss=running_loss, acc=running_acc, epoch=epoch_index) val_bar.update() train_state['val_loss'].append(running_loss) train_state['val_acc'].append(running_acc) train_state = update_train_state(args=args, model=model, train_state=train_state) scheduler.step(train_state['val_loss'][-1]) if train_state['stop_early']: break # move model to cpu for sampling model = model.cpu() sampled_surnames = decode_samples( sample_from_model(model, vectorizer, num_samples=2), vectorizer) epoch_bar.set_postfix(sample1=sampled_surnames[0], sample2=sampled_surnames[1]) # move model back to whichever device it should be on model = model.to(args.device) train_bar.n = 0 val_bar.n = 0 epoch_bar.update() except KeyboardInterrupt: print("Exiting loop") np.random.choice(np.arange(len(vectorizer.nationality_vocab)), replace=True, size=2) model.load_state_dict(torch.load(train_state['model_filename'])) print(train_state['model_filename']) model = model.to(args.device) dataset.set_split('test') batch_generator = generate_batches(dataset, batch_size=args.batch_size, device=args.device) running_acc = 0. # running_loss = 0. model.eval() for batch_index, batch_dict in enumerate(batch_generator): # compute the output # # for i in range(0,19): # print(batch_index) # print(batch_dict) y_pred = model(x_in=batch_dict['x_data']) # print(y_pred[0][0][0]) # print(y_pred[0][0]) # print(len(y_pred[0])) # print(y_pred) # perplexity+=math.log(y_pred) # compute the loss loss = sequence_loss(y_pred, batch_dict['y_target'], mask_index) # compute the accuracy running_loss += (loss.item() - running_loss) / (batch_index + 1) acc_t = compute_accuracy(y_pred, batch_dict['y_target'], mask_index) running_acc += (acc_t - running_acc) / (batch_index + 1) final_perplex = torch.exp(torch.tensor(running_loss)) train_state['test_loss'] = running_loss train_state['test_acc'] = running_acc train_tensor = train_state['train_loss'] validation_tensor = train_state['val_loss'] print("Test loss: {};".format(train_state['test_loss'])) print("Train perplexity;", train_tensor[-1]) print("Validation perplexity;", validation_tensor[-1]) print("Test perplexity: {};".format( final_perplex.item())) # compute and print perplexity here print("Test Accuracy: {}".format(train_state['test_acc'])) entire_corpus = (final_perplex.item() + train_tensor[-1] + validation_tensor[-1]) / 3 print("Perplexity of the entire corpus:", entire_corpus) model.load_state_dict(torch.load(train_state['model_filename'])) model = model.to(args.device) ############## OVER ENTIRE TEST SET (SETTING EACH COLUMN TO)############################################## # running_acc = 0. # running_loss = 0. # perplexity_character_dict ={} # accuracy_character_dict={} # # compute the output # # model.eval() # # enumerated = list(enumerate(batch_generator)) # for m in range(1,20): # # print('OUTSIDE ENUM',i) # dataset.set_split('test') # batch_generator = generate_batches(dataset, # batch_size=args.batch_size, # device=args.device) # for batch_index, batch_dict in enumerate(batch_generator): # # for j in range(0,len(batch_dict['x_data'])): # temp=np.array(torch.Tensor.clone(batch_dict['x_data'].cpu())) # for i in range(m,19): # for j in range(temp.shape[0]): # temp[j][i]=0 # new_temp = torch.Tensor(temp).type(torch.LongTensor).to(args.device) # y_pred=model(x_in=new_temp) # # y_pred = model(x_in=batch_dict['x_data'][:,0:i]) # # print('INSIDE ENUM',batch_index, len(batch_dict)) # # print('INSIDE ENUM',i) # # compute the loss # # loss = sequence_loss(y_pred, batch_dict['y_target'][:,0:i], mask_index) # loss = sequence_loss(y_pred, batch_dict['y_target'], mask_index) # # compute the accuracy # running_loss += (loss.item() - running_loss) / (batch_index + 1) # # acc_t = compute_accuracy(y_pred, batch_dict['y_target'][:,0:i], mask_index) # acc_t = compute_accuracy(y_pred, batch_dict['y_target'], mask_index) # running_acc += (acc_t - running_acc) / (batch_index + 1) # accuracy_character_dict[m] = running_acc # perplexity_character_dict[m] = torch.exp(torch.tensor(running_loss)).item() # print(accuracy_character_dict) # print(perplexity_character_dict) ############## OVER ENTIRE TEST SET (SETTING EACH COLUMN TO)############################################## ############## OVER ENTIRE TEST SET ############################################## dataset.set_split('test') batch_generator = generate_batches(dataset, batch_size=args.batch_size, device=args.device) running_acc = 0. running_loss = 0. perplexity_character_dict = {} accuracy_character_dict = {} # compute the output model.eval() enumerated = list(enumerate(batch_generator)) for i in range(1, 20): for batch_index, batch_dict in enumerated: # for j in range(0,len(batch_dict['x_data'])): y_pred = model(x_in=batch_dict['x_data'][:, 0:i]) # compute the loss loss = sequence_loss(y_pred, batch_dict['y_target'][:, 0:i], mask_index) # compute the accuracy running_loss += (loss.item() - running_loss) / (batch_index + 1) acc_t = compute_accuracy(y_pred, batch_dict['y_target'][:, 0:i], mask_index) running_acc += (acc_t - running_acc) / (batch_index + 1) accuracy_character_dict[i] = running_acc perplexity_character_dict[i] = torch.exp( torch.tensor(running_loss)).item() # print(accuracy_character_dict) for k in perplexity_character_dict.keys(): print("Peplexity after looking at ", k, " characters:", perplexity_character_dict[k]) ############## OVER ENTIRE TEST SET ############################################## ############## FOR ONE SURNAME AND UNCOMMENT THIS ############################################## # obj = SurnameVectorizer(vectorizer.char_vocab, vectorizer.nationality_vocab) # from_vect, to_vect = obj.vectorize('Singhrathore', 19) # from_vect = from_vect.reshape(19, 1) # to_vect = to_vect.reshape(19, 1) # from_tensor = torch.from_numpy(from_vect).to(args.device) # to_tensor = torch.from_numpy(to_vect).to(args.device) # running_acc = 0. # # running_loss = 0. # perplexity_character_dict = {} # accuracy_character_dict = {} # # compute the output # # enumerated = list(enumerate(batch_generator)) # for i in range(1, 20): # # for batch_index, batch_dict in enumerated: # y_pred = model(from_tensor[0:i]) # # compute the loss # loss = sequence_loss(y_pred, to_tensor[0:i], mask_index) # # compute the accuracy # running_loss += (loss.item() - running_loss) # acc_t = compute_accuracy(y_pred, to_tensor[0:i], mask_index) # running_acc += (acc_t - running_acc) # accuracy_character_dict[i] = running_acc # perplexity_character_dict[i] = torch.exp(torch.tensor(running_loss)).item() # print(accuracy_character_dict) # print(perplexity_character_dict) ############## FOR ONE SURNAME AND UNCOMMENT THIS ############################################## # number of names to generate num_names = 10 model = model.cpu() # Generate nationality hidden state sampled_surnames = decode_samples( sample_from_model(model, vectorizer, num_samples=num_names), vectorizer) # Show results print("-" * 15) for i in range(num_names): print(sampled_surnames[i]) return perplexity_character_dict, accuracy_character_dict