def main(args): if args.model == 'base': postprocessing = None elif args.model == 'jump': postprocessing = pick_fix_length(400, PAD_TOKEN) TEXT = data.Field(lower=True, postprocessing=postprocessing, pad_token=PAD_TOKEN, include_lengths=True) LABEL = data.Field(sequential=False, pad_token=None, unk_token=None) train, test = datasets.IMDB.splits(TEXT, LABEL) TEXT.build_vocab(train) LABEL.build_vocab(train) train_iter, test_iter = data.BucketIterator.splits( (train, test), batch_sizes=(args.batch, args.batch * 4), device=args.gpu, repeat=False, sort_within_batch=True) if args.model == 'base': model = LSTM(len(TEXT.vocab), 300, 128, len(LABEL.vocab)) elif args.model == 'jump': model = LSTMJump(len(TEXT.vocab), 300, 128, len(LABEL.vocab), args.R, args.K, args.N, 80, 8) model.load_pretrained_embedding( get_word2vec(TEXT.vocab.itos, '.vector_cache/GoogleNews-vectors-negative300.bin')) model.cuda(args.gpu) optimizer = optim.Adam(model.parameters(), lr=args.lr) max_accuracy = 0 for i in range(args.epoch): print('Epoch: {}'.format(i + 1)) sum_loss = 0 model.train() for batch in train_iter: optimizer.zero_grad() xs, lengths = batch.text loss = model(xs, lengths, batch.label) loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), 1.) optimizer.step() sum_loss += loss.data[0] print(f'Loss: {sum_loss / len(train_iter)}') sum_correct = 0 total = 0 model.eval() for batch in test_iter: y = model.inference(*batch.text) sum_correct += y.eq(batch.label).sum().float() total += batch.label.size(0) accuracy = (sum_correct / total).data[0] max_accuracy = max(accuracy, max_accuracy) print(f'Accuracy: {accuracy}') print(f'Max Accuracy: {max_accuracy}')
def train(): int_to_vocab, vocab_to_int, n_vocab, in_text = get_data_from_file( flags.batch_size, flags.seq_size) x_batch,y_batch = create_batch(in_text,flags.batch_size,flags.seq_size) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = LSTM(n_vocab, flags.seq_size,flags.embedding_size, flags.lstm_size).to(device) #optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.7) optimizer = torch.optim.Adam(model.parameters(), lr=0.01) loss_function = nn.CrossEntropyLoss() for e in range(flags.num_epochs): print(f'epoch #{e}: ',end="") batches = get_batches(x_batch,y_batch,flags.batch_size, flags.seq_size) (state_h_1, state_c_1),(state_h_2, state_c_2) = model.zero_state(flags.batch_size) state_h_1 = state_h_1.to(device) state_c_1 = state_c_1.to(device) state_h_2 = state_h_2.to(device) state_c_2 = state_c_2.to(device) for i,(x, y) in enumerate(batches): model.train() optimizer.zero_grad() x = torch.tensor(x , dtype=torch.int64).to(device) #print("x shape {} ".format(np.shape(x))) tmp = [] for index,el in enumerate(y) : tmp.append(np.zeros(n_vocab)) tmp[index][y[index]] = 1 #print(y) y = tmp y = torch.tensor(y , dtype=torch.int64).to(device) logits, (state_h_1, state_c_1),(state_h_2, state_c_2) = model(x, (state_h_1, state_c_1),(state_h_2, state_c_2)) #print("logits shape {} , y shape {}".format(np.shape(logits),np.shape(y))) loss = loss_function(logits, y) state_h_1 = state_h_1.detach() state_c_1 = state_c_1.detach() state_h_2 = state_h_2.detach() state_c_2 = state_c_2.detach() loss_value = loss.item() loss.backward() _ = torch.nn.utils.clip_grad_norm_(model.parameters(), flags.gradients_norm) optimizer.step() print(f'batch #{i}:\tloss={loss.item():.10f}') return model
def main(): model = LSTM(settings.vocab_size, settings.word_embedding_size, settings.hidden_size, settings.num_layers, settings.out_dim, settings.drop_out) ''' pre-train word embedding init ''' dataset = Dataset(args.data) model.word_embed.weight = nn.Parameter(torch.from_numpy(dataset.get_wordembedding())) if torch.cuda.is_available(): torch.cuda.manual_seed(settings.seed) model.cuda() optimizer = optim.SGD(model.parameters(), lr=settings.lr, weight_decay=1e-5) criteria = nn.CrossEntropyLoss() best_dev_acc = 0.0 best_test_acc = 0.0 for i in xrange(dataset.size/settings.batch_size*settings.max_epochs): batch_data = dataset.get_batch() loss = train(model, batch_data, optimizer, criteria) if (i+1) % settings.validate_freq == 0: print "validating..." dev_acc = test(model, dataset.dev_data) test_acc = test(model, dataset.test_data) if dev_acc > best_dev_acc: best_dev_acc = dev_acc best_test_acc = test_acc torch.save(model, os.path.join(args.model_dir, "sa_{}.model".format(best_dev_acc))) with open(os.path.join(args.model_dir, "log.txt"), "a") as logger: logger.write("epoch: {}, dev acc: {}, test acc: {}, " \ "batch loss: {}, best dev acc:{}, best test acc:{}\n".format(i*settings.batch_size/float(dataset.size), dev_acc, test_acc, loss.cpu().numpy()[0], best_dev_acc, best_test_acc)) print "epoch: {}, dev acc: {}, test acc: {}, " \ "batch loss: {}, best dev acc:{}, best test acc:{}".format(i*settings.batch_size/float(dataset.size), dev_acc, test_acc, loss.cpu().numpy()[0], best_dev_acc, best_test_acc)
def main(opt): model = LSTM(opt, batch_first=True, dropout=opt.dropout) if opt.pre_train: model.load_state_dict(torch.load(opt.save_path)) optimizer = optim.Adam(model.parameters(), opt.learning_rate) mseloss = nn.MSELoss() dataset = PowerDataset(opt, prepocess_path=opt.prepocess_path, transform=transforms.Compose( [transforms.ToTensor()])) train_dataset = data.Subset(dataset, indices=range(8664)) test_dataset = data.Subset(dataset, indices=range(8664, len(dataset))) train_dataloader = data.dataloader.DataLoader(train_dataset, num_workers=opt.n_threads, batch_size=opt.batch_size, shuffle=True) test_sampler = data.SequentialSampler(test_dataset) test_dataloader = data.dataloader.DataLoader( test_dataset, num_workers=opt.n_threads, batch_size=opt.test_batch_size, shuffle=False, sampler=test_sampler) for e in range(opt.epochs): if opt.test_only: test(model, test_dataloader) break print('epoch: ', e) train(model, mseloss, optimizer, train_dataloader) test(model, test_dataloader) torch.save(model.state_dict(), opt.save_path)
def main(opt): train_dataset = BADataset(opt.dataroot, opt.L, True, False, False) train_dataloader = BADataloader(train_dataset, batch_size=opt.batchSize, \ shuffle=True, num_workers=opt.workers, drop_last=True) valid_dataset = BADataset(opt.dataroot, opt.L, False, True, False) valid_dataloader = BADataloader(valid_dataset, batch_size=opt.batchSize, \ shuffle=True, num_workers=opt.workers, drop_last=True) test_dataset = BADataset(opt.dataroot, opt.L, False, False, True) test_dataloader = BADataloader(test_dataset, batch_size=opt.batchSize, \ shuffle=True, num_workers=opt.workers, drop_last=True) all_dataset = BADataset(opt.dataroot, opt.L, False, False, False) all_dataloader = BADataloader(all_dataset, batch_size=opt.batchSize, \ shuffle=False, num_workers=opt.workers, drop_last=False) opt.n_edge_types = train_dataset.n_edge_types opt.n_node = train_dataset.n_node opt.n_existing_node = all_node_num net = LSTM(opt, hidden_state=opt.state_dim*5) net.double() print(net) criterion = nn.CosineSimilarity(dim=1, eps=1e-6) if opt.cuda: net.cuda() criterion.cuda() optimizer = optim.Adam(net.parameters(), lr=opt.lr) early_stopping = EarlyStopping(patience=opt.patience, verbose=True) os.makedirs(OutputDir, exist_ok=True) train_loss_ls = [] valid_loss_ls = [] test_loss_ls = [] for epoch in range(0, opt.niter): train_loss = train(epoch, train_dataloader, net, criterion, optimizer, opt) valid_loss = valid(valid_dataloader, net, criterion, opt) test_loss = test(test_dataloader, net, criterion, opt) train_loss_ls.append(train_loss) valid_loss_ls.append(valid_loss) test_loss_ls.append(test_loss) early_stopping(valid_loss, net, OutputDir) if early_stopping.early_stop: print("Early stopping") break df = pd.DataFrame({'epoch':[i for i in range(1, len(train_loss_ls)+1)], 'train_loss': train_loss_ls, 'valid_loss': valid_loss_ls, 'test_loss': test_loss_ls}) df.to_csv(OutputDir + '/loss.csv', index=False) net.load_state_dict(torch.load(OutputDir + '/checkpoint.pt')) inference(all_dataloader, net, criterion, opt, OutputDir)
def main(): global args, best_prec1 best_prec1 = 1e6 args = parser.parse_args() args.original_lr = 1e-6 args.lr = 1e-6 args.momentum = 0.95 args.decay = 5 * 1e-4 args.start_epoch = 0 args.epochs = 5000 args.steps = [-1, 1, 100, 150] args.scales = [1, 1, 1, 1] args.workers = 4 args.seed = time.time() args.print_freq = 30 args.feature_size = 100 args.lSeq=5 wandb.config.update(args) wandb.run.name = f"Default_{wandb.run.name}" if (args.task == wandb.run.name) else f"{args.task}_{wandb.run.name}" conf = configparser.ConfigParser() conf.read(args.config) # print(conf) TRAIN_DIR = conf.get("lstm", "train") VALID_DIR = conf.get("lstm", "valid") TEST_DIR = conf.get("lstm", "test") LOG_DIR = conf.get("lstm", "log") create_dir_not_exist(LOG_DIR) # TODO: train_list to train_file train_list = [os.path.join(TRAIN_DIR, item) for item in os.listdir(TRAIN_DIR)] val_list = [os.path.join(VALID_DIR, item) for item in os.listdir(VALID_DIR)] os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu torch.cuda.manual_seed(int(args.seed)) model = LSTM(args.feature_size, args.feature_size, args.feature_size) model = model.cuda() criterion = nn.MSELoss().cuda() optimizer = torch.optim.Adam(model.parameters(), args.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=args.decay) model = DataParallel_withLoss(model, criterion) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) train(train_list, model, criterion, optimizer, epoch) prec1 = validate(val_list, model, criterion, epoch) with open(os.path.join(LOG_DIR, args.task + ".txt"), "a") as f: f.write("epoch " + str(epoch) + " MSELoss: " + str(float(prec1))) f.write("\n") wandb.save(os.path.join(LOG_DIR, args.task + ".txt")) is_best = prec1 < best_prec1 best_prec1 = min(prec1, best_prec1) print(' * best MSELoss {MSELoss:.3f} '.format(MSELoss=best_prec1)) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.pre, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, args.task, epoch=epoch, path=os.path.join(LOG_DIR, args.task))
def create_model(): model = LSTM(input_size=input_size, num_classes=num_classes, hidden=args.hidden_unit, num_layers=args.num_layers, mean_after_fc=args.mean_after_fc, mask_empty_frame=args.mask_empty_frame) model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) return (model, optimizer)
def load_model(): print("==> loading existing lstm model") model_info = torch.load(model_path) model = LSTM(input_size=input_size, num_classes=model_info['num_classes'], hidden=model_info['hidden'], num_layers=model_info['num_layers'], mean_after_fc=model_info['mean_after_fc'], mask_empty_frame=model_info['mask_empty_frame']) model.cuda() model.load_state_dict(model_info['state_dict']) best_acc = model_info['best_acc'] optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) optimizer.load_state_dict(model_info['optimizer']) return (model, optimizer)
def train(): train_writer = SummaryWriter( os.path.join(LOG_DIR, 'train7-64-LSTM-Doppler')) test_writer = SummaryWriter(os.path.join(LOG_DIR, 'test7-64-LSTM-Doppler')) train_loader, test_loader = load_data(TRAIN_DIR, TEST_DIR) lstm = LSTM().to(DEVICE) optimizer = torch.optim.Adam(lstm.parameters(), lr=LR) loss_func = nn.CrossEntropyLoss().to(DEVICE) for epoch in range(MAX_EPOCH): log_string('**** EPOCH %3d ****' % (epoch)) sys.stdout.flush() train_one_epoch(epoch, train_writer, train_loader, lstm, loss_func, optimizer) eval_one_epoch(epoch, test_writer, test_loader, lstm, loss_func) # save model parameters to files torch.save(lstm.state_dict(), MODEL_DIR)
def main(): names_str = read_csv(filname='data/names/names.csv') all_char_str = set([char for name in names_str for char in name]) char2idx = {char: i for i, char in enumerate(all_char_str)} char2idx['EOS'] = len(char2idx) # save char dictionary cPickle.dump(char2idx, open("dic.p", "wb")) names_idx = [[char2idx[char_str] for char_str in name_str] for name_str in names_str] # build model model = LSTM(input_dim=len(char2idx), embed_dim=100, hidden_dim=128) criterion = nn.NLLLoss() optimizer = optim.Adam(model.parameters()) n_iters = 5 for iter in range(1, n_iters + 1): # data shuffle random.shuffle(names_idx) total_loss = 0 for i, name_idx in enumerate(names_idx): input = inputTensor(name_idx) target = targetTensor(name_idx, char2idx) loss = train(model, criterion, input, target) total_loss += loss optimizer.step() print(iter, "/", n_iters) print("loss {:.4}".format(float(total_loss / len(names_idx)))) # save trained model torch.save(model.state_dict(), "model.pt")
def train_initialization(domain, classifier_name, all_data, data_type): train_data, test_data, Final_test, Final_test_original, Final_test_gt, unique_vocab_dict, unique_vocab_list = all_data output_size = 2 batch_size = 32 pre_train = True embedding_tune = True if data_type == 'train': epoch_num = 10 if domain == 'captions' else 4 else: # 'dev' epoch_num = 3 # sample test embedding_length = 300 if domain != 'captions' else 50 hidden_size = 256 if domain != 'captions' else 32 learning_rate = collections.defaultdict(dict) learning_rate['amazon'] = {'LSTM': 0.001, 'LSTMAtten': 0.0002, 'RNN': 0.001, 'RCNN': 0.001, 'SelfAttention': 0.001, 'CNN': 0.001} learning_rate['yelp'] = {'LSTM': 0.002, 'LSTMAtten': 0.0002, 'RNN': 0.0001, 'RCNN': 0.001, 'SelfAttention': 0.0001, 'CNN': 0.001} learning_rate['captions'] = {'LSTM': 0.005, 'LSTMAtten': 0.005, 'RNN': 0.01, 'RCNN': 0.01, 'SelfAttention': 0.005, 'CNN': 0.001} TEXT, vocab_size, word_embeddings, train_iter, test_iter, Final_test_iter, Final_test_original_iter, Final_test_gt_iter = load_dataset(train_data, test_data, Final_test, Final_test_original, Final_test_gt, embedding_length, batch_size) if classifier_name == 'LSTM': model = LSTM(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings, pre_train, embedding_tune) elif classifier_name == 'LSTMAtten': model = LSTM_AttentionModel(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings, pre_train, embedding_tune) elif classifier_name == 'RNN': model = RNN(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings, pre_train, embedding_tune) elif classifier_name == 'RCNN': model = RCNN(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings, pre_train, embedding_tune) elif classifier_name == 'SelfAttention': model = SelfAttention(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings, pre_train, embedding_tune) elif classifier_name == 'CNN': model = CNN(batch_size, output_size, 1, 32, [2,4,6], 1, 0, 0.6, vocab_size, embedding_length, word_embeddings, pre_train, embedding_tune) else: raise ValueError('Not a valid classifier_name!!!') loss_fn = F.cross_entropy optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate[domain][classifier_name]) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 2, gamma=0.1) return train_iter, test_iter, Final_test_iter, Final_test_original_iter, Final_test_gt_iter, epoch_num, model, loss_fn, optimizer, scheduler
def main(): global device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") notes = loade_data('./notes.json')['notes'] validation = loade_data('./validation.json')['notes'] test = loade_data('./test.json')['notes'] int_to_sign = loade_data('./int2sign.json') sign_to_int = loade_data('./sign2int.json') seq_length = 100 #refactor this, we only need a one-hot for the input #select a sequence or whatever here, use predefined for now (testing) learning_rate = 0.001 network = LSTM(hidden_size=64, input_size=90, output_size=90) criterion = nn.CrossEntropyLoss() network.to(device) optimizer = optim.Adam(network.parameters(), learning_rate) scheduler = optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.0001, max_lr=0.001, cycle_momentum=False) # move network to GPU print(device) #network, _, losses, best_net = trainLoop(network, criterion, notes, optimizer, 3, seq_length, sign_to_int, scheduler) best_net = network """plt.plot(losses) plt.savefig('losses.png') plt.close('all')""" print('saving network....') #save_network(best_net, "net.pth") print('evaluating on test data...') evaluateAccuracy(test, best_net, seq_length, sign_to_int) print("eval done!")
class Train(): def __init__(self, difficulty): self.data_path = "../data" self.model_path = "../models" self.output_path = "../outputs" self.difficulty = difficulty self.timestamp = str(int(time.time())) self.model_name = "lstm_" + self.difficulty self.data = Data(difficulty=self.difficulty, data_path=self.data_path) (self.img_features, self.w2i, self.i2w, self.nwords, self.UNK, self.PAD) = self.data() self.train = list(self.data.get_train_data()) self.dev = list(self.data.get_validation_data()) self.test = list(self.data.get_test_data()) self.image_feature_size = 2048 self.output_vector_size = 10 def __call__(self, number_of_iterations = 2, learning_rate = 0.005, embedding_size = 300, hidden_size=100, batch_size=100): print("Starting 'Image Retrieval' in 'LSTM' mode with '" + self.difficulty + "' data") self.model_full_path = self.model_path + "/" + self.model_name + "_" + self.timestamp + "_" + str(learning_rate) + "_" + str(embedding_size) + ".pty" self.output_file_name = self.output_path + "/" + self.model_name + "_" + self.timestamp + "_" + str(learning_rate) + "_" + str(embedding_size) + ".csv" self.number_of_iterations = number_of_iterations self.learning_rate = learning_rate self.embedding_size = embedding_size self.hidden_size = hidden_size self.batch_size = batch_size self.model = LSTM(self.nwords, self.embedding_size, self.image_feature_size, self.output_vector_size, self.hidden_size, self.batch_size) self.criterion = nn.CrossEntropyLoss() self.evaluate = Evaluate(self.model, self.img_features, self.minibatch, self.preprocess, self.image_feature_size, self.output_vector_size) print(self.model) self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate) self.train_loss_values = [] self.magic() self.save_model() self.save_data() def minibatch(self, data, batch_size = 50): for i in range(0, len(data), batch_size): yield data[i:i+batch_size] def preprocess(self, batch): """Helper function for functional batches""" correct_indexes = [observation[2] for observation in batch] img_ids = [observation[1] for observation in batch] text_features = [observation[0] for observation in batch] last_words = [len(dialog) for dialog in text_features] #Add Padding to max len of sentence in batch max_length = max(map(len, text_features)) text_features = [txt + [self.PAD] * (max_length - len(txt)) for txt in text_features] #return in "stacked" format, added last_words for excluding padding effects on LSTM return text_features, img_ids, correct_indexes, last_words def magic(self): for ITER in range(self.number_of_iterations): random.shuffle(self.train) train_loss = 0.0 start = time.time() iteration = 0 for batch in self.minibatch(self.train, self.batch_size): self.model.zero_grad() self.optimizer.zero_grad() self.model.hidden = self.model.init_hidden() #Load data for model text_features, h5_ids, correct_index, last_words = self.preprocess(batch) lookup_text_tensor = Variable(torch.LongTensor([text_features])).squeeze() full_img_batch = np.empty([len(batch), self.output_vector_size, self.image_feature_size]) for obs, img_ids in enumerate(h5_ids): for index, h5_id in enumerate(img_ids): full_img_batch[obs, index] = self.img_features[h5_id] full_img_batch = Variable(torch.from_numpy(full_img_batch).type(torch.FloatTensor)) #Target target = Variable(torch.LongTensor([correct_index])).squeeze() #Vector for excluding padding effects last_words = Variable(torch.LongTensor(last_words)) #Run model and calculate loss prediction = self.model(lookup_text_tensor, full_img_batch, last_words) loss = self.criterion(prediction, target) train_loss += loss.data[0] iteration += self.batch_size print(iteration) loss.backward() self.optimizer.step() print("ITERATION %r: train loss/sent=%.4f, time=%.2fs" % (ITER+1, train_loss/len(self.train), time.time() - start)) self.train_loss_values.append(train_loss/len(self.train)) def save_model(self): #Save model torch.save(self.model, self.model_full_path) print("Saved model has test score", self.evaluate(self.test, self.batch_size)) def plot(self): plt.plot(self.train_loss_values, label = "Train loss") plt.legend(loc='best') plt.xlabel("Epochs") plt.ylabel("Loss") plt.title(self.model_name + " - has loss with lr = %.4f, embedding size = %r" % (self.learning_rate, self.embedding_size)) plt.show() def save_data(self): file = open(self.output_file_name, "w") file.write(", ".join(map(str, self.train_loss_values))) file.write("\n") file.write(str(self.evaluate(self.test, self.batch_size))) file.write("\n") file.close()
batch_size=cfg['model']['batch_size'], output_dim=cfg['model']['data_dim'], num_layers_lstm=cfg['model']['lstm_layers'], inference=False).cuda() print('-> READ DATA') dataset = MusicDataset(cfg['data']['processed_numpy_file'], cfg['hyperparams']['sequence_length'], cfg['data']['data_augmentation']) dataloader = DataLoader(dataset, batch_size=cfg['model']['batch_size'], shuffle=False) print('-> START TRAINING') if cfg['hyperparams']['optimiser'] == 'adam': optimiser = torch.optim.Adam(lstm_model.parameters(), lr=cfg['hyperparams']['learning_rate']) for batch_idx, batch_data in enumerate(dataloader): # zero grad model optimiser.zero_grad() # re-init hidden states lstm_model.hidden = lstm_model.init_hidden() # sort batch based on sequence length sort_batch(batch_data) # put batch on GPU batch_data = to_cuda(batch_data)
def ToVariable(x): tmp = torch.FloatTensor(x) return Variable(tmp) use_gpu = torch.cuda.is_available() # print(use_gpu) input_size = 900 output_size = 900 hidden_dim = 2000 num_layer = 4 model = LSTM(input_size, hidden_dim, num_layer, output_size) loss_function = nn.MSELoss() # optimizer = torch.optim.Adam(model.parameters(), lr=0.001) optimizer = optim.RMSprop(model.parameters(), lr=0.001, alpha=0.9) if use_gpu: model = model.cuda() root_path = "dataset" data_num = 100 time_step = 5 datalist = create_datalist(root_path) train_data, test_data = create_dataset(data_num, datalist, time_step) # print(len(train_data)) #17*80 # print(len(test_data)) #17*20 def train(epoch): for step, input_data in enumerate(train_data, 1):
seq_len = TrainX.shape[1] net = LSTM(input_dim, output_dim, seq_len, n_hidden, n_layers, fixed_pt_quantize=fixed_pt_quantize) lossfunc = nn.MSELoss() lr = 0.002 if fixed_pt_quantize: lr = 0.003 optimizer = torch.optim.Adamax(net.parameters(), lr=lr) ##############################################PRUNING########################################################################### if pruning: print( "Pruning============================================================================" ) figure_name = "/Subject_" + str(Idx_subject) + "_Finger_" + str( Finger) + "_pruning" PATH_pre_trained = checkpoint_path + '/s' + str( Idx_subject) + '_f' + str(Finger) + '_trained_model' net.load_state_dict(torch.load(PATH_pre_trained)) net.train() net.threshold_pruning() #train the prunned model:
def main(args): # Create model directory if not os.path.exists(args.model_path): os.makedirs(args.model_path) # Build the data loader dataset, targets = load_dataset() print('\nThe data are loaded') # Build the models lstm = LSTM(args.input_size, args.output_size) print('The model is build') print(lstm) if torch.cuda.is_available(): lstm.cuda() # Loss and Optimizer criterion = nn.MSELoss() optimizer = torch.optim.Adam(lstm.parameters(), lr=args.learning_rate) # Train the Models toatal_time = 0 sm = 50 # start saving models after 100 epochs for epoch in range(args.num_epochs): print('\nepoch ' + str(epoch) + ':') avg_loss = 0 start = time.time() for i in range(0, len(dataset), args.batch_size): lstm.zero_grad() bi, bt = get_input(i, dataset, targets, args.batch_size) bi = bi.view(-1, 1, 32) bi = to_var(bi) bt = to_var(bt) bo = lstm(bi) loss = criterion(bo, bt) avg_loss = avg_loss + loss.item() loss.backward() optimizer.step() epoch_avg_loss = avg_loss / (len(dataset) / args.batch_size) print('--average loss:', epoch_avg_loss) end = time.time() epoch_time = end - start toatal_time = toatal_time + epoch_time print('time of per epoch:', epoch_time) # save the data into csv data = [epoch_avg_loss] with open(args.model_path + 'lstm_loss.csv', 'a+') as csvfile: writer = csv.writer(csvfile) writer.writerow(data) if epoch == sm: model_path = 'lstm_' + str(sm) + '.pkl' torch.save(lstm.state_dict(), os.path.join(args.model_path, model_path)) sm = sm + args.save_step model_path = 'lstm_final.pkl' torch.save(lstm.state_dict(), os.path.join(args.model_path, model_path))
def main(trial_num): # Device configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model_type = "lstm" # Hyper-parameters sequence_length = 28 input_size = 28 num_layers = 1 hidden_size = 128 num_classes = 10 batch_size = 100 num_epochs = 20 learning_rate = 0.01 num_trials = 100 a_range = [1.0, 3.0] # a_s = [1.5, 2.0, 2.2, 2.5, 2.7, 3.0] # just for testing # num_trials = 1 # num_epochs = 20 # a_s = [1.0] # for a in a_s: trials = {} for num_trial in range(num_trials): a = random.random() * (a_range[1] - a_range[0]) + a_range[0] print('trial Num: ', trial_num, "a: ", a, "num_trial: ", num_trial) trial = {} trial['a'] = a # define model if model_type == 'lstm': model = LSTM(input_size, hidden_size, num_layers, num_classes, a, device).to(device) elif model_type == 'gru': model = GRU(input_size, hidden_size, num_layers, num_classes, a, device).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) train_dataloader = MNIST_dataloader(batch_size, train=True) test_dataloader = MNIST_dataloader(batch_size, train=False) # Train the model total_step = len(train_dataloader.dataloader) total = 0 total_loss = 0 for epoch in range(num_epochs): model.train() for i, (images, labels) in enumerate(train_dataloader.dataloader): images = images.reshape(-1, sequence_length, input_size).to(device) labels = labels.to(device) # Forward pass outputs, hts = model(images) loss = criterion(outputs, labels) total_loss += loss * labels.size(0) total += labels.size(0) # print(LEs, rvals) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() # if (i + 1) % 300 == 0: # print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' # .format(epoch + 1, num_epochs, i + 1, total_step, total_loss / total)) # for i, (name, param) in enumerate(model.named_parameters()): # if i == 3: # print(name, param) # Test the model model.eval() with torch.no_grad(): correct = 0 total = 0 total_loss = 0 for i, (images, labels) in enumerate(test_dataloader.dataloader): images = images.reshape(-1, sequence_length, input_size).to(device) labels = labels.to(device) outputs, _ = model(images) # h = torch.zeros(model.num_layers, images.size(0), model.hidden_size).to(model.device) # c = torch.zeros(model.num_layers, images.size(0), model.hidden_size).to(model.device) # params = (images, (h, c)) # if i == 0: # LEs, rvals = calc_LEs_an(*params, model=model) loss = criterion(outputs, labels) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() total_loss += loss * labels.size(0) if epoch == (num_epochs - 1): print('Epoch [{}/{}] Loss: {}, Test Accuracy: {} %'.format(epoch + 1, num_epochs, total_loss / total, 100 * correct / total)) saved_model = copy.deepcopy(model) trial[epoch] = {"model": saved_model, "accuracy": 100 * correct / total, "loss": total_loss / total} del saved_model trials[num_trial] = trial pickle.dump(trials, open('trials/{}/models/{}_{}_trials_{}.pickle'.format(model_type, model_type, hidden_size, trial_num), 'wb'))
def train(args): prefix = '' f_prefix = '.' if not os.path.isdir("log/"): print("Directory creation script is running...") subprocess.call([f_prefix+'/make_directories.sh']) args.freq_validation = np.clip(args.freq_validation, 0, args.num_epochs) validation_epoch_list = list(range(args.freq_validation, args.num_epochs+1, args.freq_validation)) validation_epoch_list[-1]-=1 # Create the data loader object. This object would preprocess the data in terms of # batches each of size args.batch_size, of length args.seq_length dataloader = DataLoader(f_prefix, args.batch_size, args.seq_length, args.num_validation, forcePreProcess=True) method_name = "VANILLALSTM" model_name = "LSTM" save_tar_name = method_name+"_lstm_model_" if args.gru: model_name = "GRU" save_tar_name = method_name+"_gru_model_" # Log directory log_directory = os.path.join(prefix, 'log/') plot_directory = os.path.join(prefix, 'plot/', method_name, model_name) plot_train_file_directory = 'validation' # Logging files log_file_curve = open(os.path.join(log_directory, method_name, model_name,'log_curve.txt'), 'w+') log_file = open(os.path.join(log_directory, method_name, model_name, 'val.txt'), 'w+') # model directory save_directory = os.path.join(f_prefix, 'model') # Save the arguments int the config file with open(os.path.join(save_directory, method_name, model_name,'config.pkl'), 'wb') as f: pickle.dump(args, f) # Path to store the checkpoint file def checkpoint_path(x): return os.path.join(save_directory, method_name, model_name, save_tar_name+str(x)+'.tar') # model creation net = LSTM(args) if args.use_cuda: net = net.cuda() # optimizer = torch.optim.Adagrad(net.parameters(), weight_decay=args.lambda_param) optimizer = torch.optim.RMSprop(net.parameters(), lr=args.learning_rate) loss_f = torch.nn.MSELoss() learning_rate = args.learning_rate best_val_loss = 100 best_val_data_loss = 100 smallest_err_val = 100000 smallest_err_val_data = 100000 best_epoch_val = 0 best_epoch_val_data = 0 best_err_epoch_val = 0 best_err_epoch_val_data = 0 all_epoch_results = [] grids = [] num_batch = 0 # Training for epoch in range(args.num_epochs): print('****************Training epoch beginning******************') if dataloader.additional_validation and (epoch-1) in validation_epoch_list: dataloader.switch_to_dataset_type(True) dataloader.reset_batch_pointer(valid=False) loss_epoch = 0 # For each batch # num_batches 資料可以被分多少批 要跑幾個iter for batch in range(dataloader.num_batches): start = time.time() # print(dataloader.num_batches, dataloader.batch_size) # Get batch data x, y, d = dataloader.next_batch(randomUpdate=False) loss_batch = 0 # x_cat = Variable(torch.from_numpy(np.array(x[0])).float()) x_seq = np.array(x) y_seq = np.array(y) x_seq = Variable(torch.from_numpy(x_seq).float()) y_seq = Variable(torch.from_numpy(y_seq).float()) temp = x_seq[:,:,-2:] x_seq = x_seq[:,:,:-2] y_seq = y_seq[:,:,:3] hidden_states = Variable(torch.zeros(x_seq.size()[0], args.rnn_size)) cell_states = Variable(torch.zeros(x_seq.size()[0], args.rnn_size)) if args.use_cuda: x_seq = x_seq.cuda() y_seq = y_seq.cuda() temp = temp.cuda() hidden_states = hidden_states.cuda() cell_states = cell_states.cuda() # Zero out gradients net.zero_grad() optimizer.zero_grad() outputs, _, _ = net(x_seq, temp, hidden_states, cell_states) loss = loss_f(outputs, y_seq) loss_batch = loss.detach().item() # Compute gradients loss.backward() # Clip gradients torch.nn.utils.clip_grad_norm_(net.parameters(), args.grad_clip) # Update parameters optimizer.step() end = time.time() loss_epoch += loss_batch print('{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}'.format((batch+1) * dataloader.batch_size, dataloader.num_batches * dataloader.batch_size, epoch, loss_batch, end - start)) loss_epoch /= dataloader.num_batches print("Training epoch: "+str(epoch)+" loss: "+str(loss_epoch)) #Log loss values log_file_curve.write("Training epoch: "+str(epoch)+" loss: "+str(loss_epoch)+'\n') # Validation dataset if dataloader.additional_validation and (epoch) in validation_epoch_list: dataloader.switch_to_dataset_type() print('****************Validation with dataset epoch beginning******************') dataloader.reset_batch_pointer(valid=False) dataset_pointer_ins = dataloader.dataset_pointer validation_dataset_executed = True loss_epoch = 0 err_epoch = 0 num_of_batch = 0 smallest_err = 100000 #results of one epoch for all validation datasets epoch_result = [] #results of one validation dataset results = [] # For each batch for batch in range(dataloader.num_batches): # Get batch data x, y, d = dataloader.next_batch(randomUpdate=False) # Loss for this batch loss_batch = 0 err_batch = 0 # For each sequence for sequence in range(len(x)): # Get the sequence x_seq = x[sequence] y_seq = y[sequence] x_seq= np.array(x_seq) y_seq= np.array(y_seq)[:,:3] x_seq = Variable(torch.from_numpy(x_seq).float()) y_seq = Variable(torch.from_numpy(y_seq).float()) temp = x_seq[:,-2:] x_seq = x_seq[:,:-2] y_seq = y_seq[:,:3] if args.use_cuda: x_seq = x_seq.cuda() y_seq = y_seq.cuda() temp = temp.cuda() #will be used for error calculation orig_x_seq = y_seq.clone() # print(x_seq.size(), args.seq_length) with torch.no_grad(): hidden_states = Variable(torch.zeros(1, args.rnn_size)) cell_states = Variable(torch.zeros(1, args.rnn_size)) ret_x_seq = Variable(torch.zeros(args.seq_length, net.input_size)) # all_outputs = Variable(torch.zeros(1, args.seq_length, net.input_size)) # Initialize the return data structure if args.use_cuda: ret_x_seq = ret_x_seq.cuda() hidden_states = hidden_states.cuda() cell_states = cell_states.cuda() total_loss = 0 # For the observed part of the trajectory for tstep in range(args.seq_length): outputs, hidden_states, cell_states = net(x_seq[tstep].view(1, 1, net.input_size), temp[tstep].view(1, 1, temp.size()[-1]), hidden_states, cell_states) ret_x_seq[tstep, 0] = outputs[0,0,0] ret_x_seq[tstep, 1] = outputs[0,0,1] ret_x_seq[tstep, 2] = outputs[0,0,2] print(outputs.size(), ) loss = loss_f(outputs, y_seq[tstep].view(1, 1, y_seq.size()[1])) total_loss += loss total_loss = total_loss / args.seq_length #get mean and final error # print(ret_x_seq.size(), y_seq.size()) err = get_mean_error(ret_x_seq.data, y_seq.data, args.use_cuda) loss_batch += total_loss.item() err_batch += err print('Current file : ',' Batch : ', batch+1, ' Sequence: ', sequence+1, ' Sequence mean error: ', err, 'valid_loss: ',total_loss.item()) results.append((y_seq.data.cpu().numpy(), ret_x_seq.data.cpu().numpy())) loss_batch = loss_batch / dataloader.batch_size err_batch = err_batch / dataloader.batch_size num_of_batch += 1 loss_epoch += loss_batch err_epoch += err_batch epoch_result.append(results) all_epoch_results.append(epoch_result) if dataloader.num_batches != 0: loss_epoch = loss_epoch / dataloader.num_batches err_epoch = err_epoch / dataloader.num_batches # avarage_err = (err_epoch + f_err_epoch)/2 # Update best validation loss until now if loss_epoch < best_val_data_loss: best_val_data_loss = loss_epoch best_epoch_val_data = epoch if err_epoch<smallest_err_val_data: # Save the model after each epoch print('Saving model') torch.save({ 'epoch': epoch, 'state_dict': net.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, checkpoint_path(epoch)) smallest_err_val_data = err_epoch best_err_epoch_val_data = epoch print('(epoch {}), valid_loss = {:.3f}, valid_mean_err = {:.3f}'.format(epoch, loss_epoch, err_epoch)) print('Best epoch', best_epoch_val_data, 'Best validation loss', best_val_data_loss, 'Best error epoch',best_err_epoch_val_data, 'Best error', smallest_err_val_data) log_file_curve.write("Validation dataset epoch: "+str(epoch)+" loss: "+str(loss_epoch)+" mean_err: "+str(err_epoch.data.cpu().numpy())+'\n') optimizer = time_lr_scheduler(optimizer, epoch, lr_decay_epoch = args.freq_optimizer) if dataloader.valid_num_batches != 0: print('Best epoch', best_epoch_val, 'Best validation Loss', best_val_loss, 'Best error epoch',best_err_epoch_val, 'Best error', smallest_err_val) # Log the best epoch and best validation loss log_file.write('Validation Best epoch:'+str(best_epoch_val_data)+','+' Best validation Loss: '+str(best_val_data_loss)) if dataloader.additional_validation: print('Best epoch acording to validation dataset', best_epoch_val_data, 'Best validation Loss', best_val_data_loss, 'Best error epoch',best_err_epoch_val_data, 'Best error', smallest_err_val_data) log_file.write("Validation dataset Best epoch: "+str(best_epoch_val_data)+','+' Best validation Loss: '+str(best_val_data_loss)+'Best error epoch: ',str(best_err_epoch_val_data),'\n') #dataloader.write_to_plot_file(all_epoch_results[best_epoch_val_data], plot_directory) #elif dataloader.valid_num_batches != 0: # dataloader.write_to_plot_file(all_epoch_results[best_epoch_val], plot_directory) #else: if validation_dataset_executed: dataloader.switch_to_dataset_type(load_data=False) create_directories(plot_directory, [plot_train_file_directory]) dataloader.write_to_plot_file(all_epoch_results[len(all_epoch_results)-1], os.path.join(plot_directory, plot_train_file_directory)) # Close logging files log_file.close() log_file_curve.close()
seed_everything(seed) df = get_df(DATA_PATH, columns).reset_index(drop=True) price = df['Closing_price'] train_len = len(df) - valid_len _, train, valid, scaler = get_data(DATA_PATH, columns, valid_len) train_loader = generate_dataset(train, seq_length) valid_loader = generate_dataset(valid, seq_length) test_inputs = train[-seq_length:].tolist() model = LSTM() model.to(device) criterion = nn.MSELoss() optimizer = optim.Adam(model.parameters(), lr=lr) scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=10, cooldown=5) def main(): train_losses = [] valid_losses = [] best_loss = np.inf for i in range(epochs): train_loss = 0 valid_loss = 0 if i % 10 == 0: print('-----------------------') print(f'epoch: {i+1} / {epochs}') print('-----------------------') for seq, label in (train_loader):
for shop in tqdm(range(_SHOP_NUM)): if X_train is None: X_train, y_train = np.load(_TITLE_TRAIN_FEATURES.format(shop)), np.load(_TITLE_TRAIN_LABELS.format(shop)) else: X, y = np.load(_TITLE_TRAIN_FEATURES.format(shop)), np.load(_TITLE_TRAIN_LABELS.format(shop)) X_train, y_train = np.vstack([X_train, X]), np.vstack([y_train, y]) X_train, y_train = torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32) print(X_train.shape, y_train.shape) train_data = TensorDataset(X_train, y_train) trainloader = DataLoader(train_data, batch_size=64, shuffle=True) net = LSTM() optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9) criterion = nn.MSELoss() running_loss = 0.0 print('Start training...') for epoch in range(EPOCHS): print('{} epoch begins...'.format(epoch + 1)) for i, data in enumerate(trainloader): net.zero_grad() X, y = data X = X.view(-1, 34, 3) outputs = net(X) labels = y[:, -2:-1] loss = criterion(outputs, labels) loss.backward() optimizer.step()
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(device) idx_to_word, word_to_idx, vocab_size, in_text, out_text = read_file( train_file, batch_size, seq_size) num_batches, _ = in_text.shape val_index = np.random.choice(np.arange(num_batches), int(num_batches * val_data_proportion), replace=False) train_index = np.delete(np.arange(num_batches), val_index) train_in_text = in_text[train_index, :] train_out_text = out_text[train_index, :] val_in_text = in_text[val_index, :] val_out_text = out_text[val_index, :] # print(num_batches) # print(train_in_text.shape) # print(val_in_text.shape) # print(vocab_size) lstm_model = LSTM(vocab_size, seq_size, emb_size, hidden_size) lstm_model = lstm_model.to(device) lstm_optim = optim.Adam(lstm_model.parameters(), lr=l_rate) loss_function = torch.nn.CrossEntropyLoss() train_set_loss = [] val_set_loss = [] for i in range(epoch): train_batches = generate_batch(train_in_text, train_out_text, batch_size, seq_size) val_batches = generate_batch(val_in_text, val_out_text, batch_size, seq_size) h0, c0 = lstm_model.initial_state(batch_size) h0 = h0.to(device) c0 = c0.to(device) total_loss, iterations, val_loss, val_iterations = 0, 0, 0, 0 # training_batch for x, y in train_batches: iterations += 1 lstm_model.train() # shape of x is (batch_size, seq_size) x = torch.tensor(x).to(device) y = torch.tensor(y).to(device) lstm_optim.zero_grad() logits, (h0, c0) = lstm_model(x, (h0, c0)) _, _, n_cat = logits.shape loss = loss_function(logits.view(-1, n_cat), y.view(-1)) total_loss += loss.item() loss.backward() # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. h0 = h0.detach() c0 = c0.detach() _ = torch.nn.utils.clip_grad_norm_(lstm_model.parameters(), gradients_norm) lstm_optim.step() # break for x_val, y_val in val_batches: val_iterations += 1 lstm_model.eval() x_val = torch.tensor(x_val).to(device) y_val = torch.tensor(y_val).to(device) logits, (h0, c0) = lstm_model(x_val, (h0, c0)) _, _, n_cat = logits.shape loss = loss_function(logits.view(-1, n_cat), y_val.view(-1)) val_loss += loss.item() avg_loss = total_loss / iterations val_avg_loss = val_loss / val_iterations train_set_loss.append(avg_loss) val_set_loss.append(val_avg_loss) print('Epoch: {}'.format(i), 'Loss: {}'.format(avg_loss), 'Validation Loss: {}'.format(val_avg_loss)) # if i % 10 == 0: # torch.save(lstm_model.state_dict(),'checkpoint_pt/model-{}.pth'.format(i)) _ = predict(device, lstm_model, vocab_size, word_to_idx, idx_to_word, top_k=predict_top_k) return train_set_loss, val_set_loss
class StockPrediction(): def __init__(self, stock, time_window, batch_size, learning_rate=0.001): self.stock = stock self.time_window = time_window self.batch_size = batch_size self.learning_rate = learning_rate self.input_size = 4 self.output_size = 1 self.nb_neurons = 200 self.prepare_data() self.output = "/Users/baptiste/Desktop/training" def validate(self): self.lstm_model.eval() error = [] loss_function = nn.MSELoss() it = iter(self.real_data_dataloader) real_data = next(it) loss = [] for i, (x, _) in enumerate(self.testing_dataloader): try: with torch.no_grad(): pred = self.lstm_model(x.float()) pred = self.data.unnormalizeData(pred) real_data = real_data.view(-1, 1) error = self.compute_error(error, pred, real_data) real_data = next(it) except: pass error_mean = np.mean(error) * 100 print("Mean error percentage : ", error_mean) self.lstm_model.train() def compute_error(self, error, pred, target): for i in range(self.batch_size): error.append(abs(pred[i, 0] - target[i, 0]) / target[i, 0]) return (error) def prepare_data(self): validation_split = 0 test_split = 0.1 train_split = 1 - validation_split - test_split self.data = Data(self.stock) df = self.data.getData() df_normalized = self.data.normalizeData(df) df_normalized = torch.FloatTensor(df_normalized.to_numpy()) train_split = int(train_split * df.shape[0]) validation_split = int(validation_split * df.shape[0]) test_split = int(test_split * df.shape[0]) training_split = df_normalized[:train_split, :] training_data = Dataset(training_split, self.time_window) self.training_dataloader = DataLoader(training_data, batch_size=self.batch_size) #testing_data real_data_tensor = torch.FloatTensor(df.to_numpy()) self.real_data_test = torch.FloatTensor( real_data_tensor[-test_split:-self.time_window, 3]) testing_dataset = Dataset(df_normalized[-test_split:, :], self.time_window) self.testing_dataloader = DataLoader(testing_dataset, batch_size=self.batch_size) self.real_data_dataloader = DataLoader(self.real_data_test, batch_size=self.batch_size) def train(self): #Model self.lstm_model = LSTM(self.input_size, self.output_size, self.nb_neurons) self.lstm_model.load_state_dict( torch.load("/Users/baptiste/Desktop/training/AAPL_36.pth")) loss_function = nn.MSELoss() optimizer = torch.optim.Adam(self.lstm_model.parameters(), lr=self.learning_rate) print("Start training") for epoch in range(nb_epochs): for (x, y) in self.training_dataloader: optimizer.zero_grad() self.lstm_model.hidden_cell = (torch.zeros( 1, self.batch_size, self.lstm_model.nb_neurons), torch.zeros( 1, self.batch_size, self.lstm_model.nb_neurons)) pred = self.lstm_model(x.float()) y = y.view(self.batch_size, 1) loss = loss_function(pred, y) loss.backward() optimizer.step() print("epoch n°%s : loss = %s" % (epoch, loss.item())) self.validate() if epoch % 5 == 1: model_name = "%s_%s.pth" % (self.stock, epoch) torch.save(self.lstm_model.state_dict(), os.path.join(output_path, model_name)) def show_result(self): files = os.listdir(self.output) for file in files: if ".pth" in file: path = os.path.join(self.output, file) lstm_model = LSTM(self.input_size, self.output_size, self.nb_neurons) lstm_model.load_state_dict(torch.load(path)) lstm_model.eval() print("model : %s loaded" % path) predictions = [] for (x, _) in self.testing_dataloader: if x.shape[0] == self.batch_size: with torch.no_grad(): lstm_model.hidden_cell = ( torch.zeros(1, self.batch_size, lstm_model.nb_neurons), torch.zeros(1, self.batch_size, lstm_model.nb_neurons)) output = lstm_model(x.float()) output = self.data.unnormalizeData( output).squeeze() predictions += output.tolist() plt.plot(predictions, label="prediction") plt.plot(self.real_data_test, label="target") plt.title(file) plt.legend() plt.show()
epochs = 10 data, targets = get_data() data = torch.FloatTensor(data) data = data.unsqueeze(-1) data = data.permute(1, 2, 0) targets = torch.FloatTensor(targets).unsqueeze(-1).unsqueeze(-1) # L1 Norm, sum to 1 # s_targets = data[1:, :, :-1] # data = f.normalize(data, p=1, dim=2) # targets = data[1:, :, :-1] l = LSTM() # optimizer = optim.SGD(l.parameters(), lr=0.001) optimizer = optim.Adam(l.parameters(), lr=0.02) loss_function = nn.MSELoss() # loss_function = nn.L1Loss() import random sequence = 10 def sample(data, targets, s): n = random.randint(0, len(data) - (s + 5)) # return data[n: n + s + 1], targets[n: n + s + 1] for _ in range(1, epochs + 1): epoch_loss = 0 l.reset_hidden()
from model import Resnet50 from model import LSTM from model import Loss # loading features extracted by pretrain model train_features = torch.load('train_features.pt') # list of torch valid_features = torch.load('valid_features.pt') # list of torch train_vals = torch.load('train_vals.pt') # list of torchs valid_vals = torch.load('valid_vals.pt') # list of torchs # model, optimzer, loss function feature_size = 2048 learning_rate = 0.0001 model = LSTM(feature_size).cuda() model = torch.load("../problem2/best_rnnbased.pth") optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True) loss_function = Loss() for param_group in optimizer.param_groups: param_group['lr'] = learning_rate # some training parameters BATCH_SIZE = 32 EPOCH_NUM = 500 datalen = len(train_features) datalen_valid = len(valid_features)
def train(feature,label, epochs, model, layer, hidden, save,postfix, index2char, index2phone, phone_map, phone2index): dataset = Feature_Dataset(feature,'train') train_size = int(0.9*len(dataset)) if feature == 'mfcc': feature_dim = 39 elif feature == 'fbank': feature_dim = 69 elif feature == 'all': feature_dim = 108 print("Building model and optimizer...") if model == 'LSTM': train_model = LSTM(feature_dim,hidden,layer) elif model == 'C_RNN': group_size = 5 train_model = C_RNN(group_size,feature_dim,hidden,layer) elif model == 'BiLSTM': train_model = LSTM(feature_dim, hidden, layer, bi = True) if USE_CUDA: train_model = train_model.cuda() optimizer = optim.Adam(train_model.parameters(), lr = 0.005) #optimizer = optim.SGD(train_model.parameters(),lr = 0.1) criterion = nn.NLLLoss() if USE_CUDA: criterion = criterion.cuda() for epoch in range(1,epochs+1): print("Epoch {}".format(epoch)) epoch_loss = 0 epoch_edit = 0 for i in tqdm(range(1,train_size+1)): data = dataset[i-1] speaker = data[0] train_model.zero_grad() input_hidden = train_model.init_hidden() train_feature = Variable(data[1].float()) output = train_model(train_feature,input_hidden) output_seq = test_trim(index2char, index2phone, phone_map, phone2index, torch.max(output,1)[1].data.cpu().numpy()) target_seq = trim_and_map(index2char,index2phone, phone_map, phone2index, [[int(l)] for l in label[speaker]]) target = Variable(torch.from_numpy(np.array(label[speaker]).astype('int'))) target = target.cuda() if USE_CUDA else target loss = criterion(output,target) edit = editdistance.eval(output_seq,target_seq) epoch_loss += loss.data[0]/train_size epoch_edit += edit/train_size loss.backward() optimizer.step() print("Negative log-likelihood: {}".format(epoch_loss)) print("Edit distance: {} ".format(epoch_edit)) val_loss = 0 val_edit = 0 for i in tqdm(range(train_size+1,len(dataset)+1)): data = dataset[i-1] speaker = data[0] val_feature = Variable(data[1].float()) output = train_model(val_feature,train_model.init_hidden()) target = Variable(torch.from_numpy(np.array(label[speaker]).astype('int'))) target = target.cuda() if USE_CUDA else target val_loss += criterion(output,target).data[0] output_seq = test_trim(index2char,index2phone, phone_map, phone2index,torch.max(output,1)[1].data.cpu().numpy()) target_seq = trim_and_map(index2char,index2phone, phone_map, phone2index,[[int(l)] for l in label[speaker]]) val_edit += editdistance.eval(output_seq,target_seq) print("Validation loss: {}".format(val_loss/(len(dataset)-train_size))) print("Validation edit distance: {}".format(val_edit/(len(dataset)-train_size))) if epoch%save == 0: directory = os.path.join(SAVE_DIR, feature, model, '{}-{}{}'.format(layer,hidden,postfix)) if not os.path.exists(directory): os.makedirs(directory) torch.save({ 'model': train_model.state_dict(), 'opt': optimizer.state_dict(), 'val_loss': val_loss/(len(dataset)-train_size), 'val_edit': val_edit/(len(dataset)-train_size), }, os.path.join(directory, '{}.tar'.format(epoch))) print("Finish training")
LABEL.build_vocab(train) train_iter, val_iter = data.BucketIterator.splits((train, val), batch_size=BATCH_SIZE, device='cuda') train_iter.repeat = False val_iter.repeat = False # Instantiate the classifier net = LSTM(layer_dim=N_LAYERS, hidden_dim=100, vocab_size=len(TEXT.vocab), embedding_dim=EMBEDDING_DIM, output_dim=N_CLASSES, dropout_proba=0.2).cuda() # Define loss function and optimiser criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE, eps=1e-08) # train model epochs = 5 for epoch in range(epochs): train_loss = 0 val_loss = 0 net.train() train_correct = 0 for batch in tqdm(train_iter): optimizer.zero_grad() text, target = batch.text, batch.label output = net(text)
os.path.join(opt['train']['pretrained'], 'short_discriminator.pkl'))) long_discriminator.load_state_dict( torch.load( os.path.join(opt['train']['pretrained'], 'long_discriminator.pkl'))) print('discriminator model loaded') # print('ztta:', ztta.size()) # assert 0 ## initilize optimizer_g ## optimizer_g = optim.Adam(lr = opt['train']['lr'], params = list(state_encoder.parameters()) +\ list(offset_encoder.parameters()) +\ list(target_encoder.parameters()) +\ list(lstm.parameters()) +\ list(decoder.parameters()), \ betas = (opt['train']['beta1'], opt['train']['beta2']), \ weight_decay = opt['train']['weight_decay']) if len(opt['train']['pretrained']) > 0: optimizer_g.load_state_dict( torch.load( os.path.join(opt['train']['pretrained'], 'optimizer_g.pkl'))) print('optimizer_g model loaded') ## initialize optimizer_d ## if opt['train']['use_adv']: optimizer_d = optim.Adam(lr = opt['train']['lr'] * 0.1, params = list(short_discriminator.parameters()) +\ list(long_discriminator.parameters()), \ betas = (opt['train']['beta1'], opt['train']['beta2']), \ weight_decay = opt['train']['weight_decay']) if len(opt['train']['pretrained']) > 0:
from Parameters import Parameter import torch.nn as nn import torch import torch.nn.functional as F import torch.optim as optim from DataProcessing import DataProcessing from model import LSTM import numpy as np import time models = LSTM().double() models = models.cuda() loss_function = nn.BCELoss(size_average=True, reduce=True) optimizer = optim.Adam(models.parameters()) # See what the scores are before training # Note that element i,j of the output is the score for tag j for word i. # Here we don't need to train, so the code is wrapped in torch.no_grad() DataObject = DataProcessing() for epoch in range( 300): # again, normally you would NOT do 300 epochs, it is toy data print("Beginning as a batch") StepsOfEpoch = 0 DataMethodObject = DataObject.FetchInputsAndLabels() for wav, label in DataMethodObject: then = time.time() StepsOfEpoch += 1 # Step 1. Remember that Pytorch accumulates gradients. # We need to clear them out before each instance
def train(config, start_epoch=1, best_validation_loss=np.inf): """Trains AWD-LSTM model using parameters from config.""" print(f'Training for {config.epochs} epochs using the {config.dataset}', f'dataset with lambda value of {config.encoding_lmbd}') device = torch.device(config.device) dataLoader = DataLoader(config.dataset, config.batch_size, device, config.bptt) model = LSTM(embedding_size=config.embedding_size, hidden_size=config.hidden_size, lstm_num_layers=config.lstm_num_layers, vocab_size=len(dataLoader.corpus.dictionary), batch_size=config.batch_size, dropoute=config.dropoute, dropouti=config.dropouti, dropouth=config.dropouth, dropouto=config.dropouto, weight_drop=config.weight_drop, tie_weights=config.tie_weights, device=device) # D is set of gendered words, N is neutral words (not entirely correct, but close enough) D, N = get_gendered_words(config.dataset, dataLoader.corpus) criterion = torch.nn.CrossEntropyLoss(reduction='mean') optimizer = torch.optim.SGD(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) def using_asgd(): """Checks if optimizer is using ASGD""" return 't0' in optimizer.param_groups[0] if not config.overwrite and check_model_exists(config): print("Loading model from precious state") model, optimizer, start_epoch, best_validation_loss = load_current_state( model, optimizer, config) if using_asgd(): temp = torch.optim.ASGD(model.parameters(), lr=config.learning_rate, t0=0, lambd=0., weight_decay=config.weight_decay) temp.load_state_dict(optimizer.state_dict()) optimizer = temp print("start epoch", start_epoch) params = list(model.parameters()) + list(criterion.parameters()) val_losses = deque(maxlen=config.nonmono) for e in range(start_epoch, config.epochs + 1): epoch_done = False model.train() model.initialize_hidden() epoch_loss = 0 # Loss over the epoch n_batch = 0 # Number of batches that have been done t_start = time.time() print(f"starting epoch {e}/{config.epochs}") while not epoch_done: lr = optimizer.param_groups[0]['lr'] # tr_batch, tr_labels are matrices with horizontal sequences. # seq_len is the sequence length in this iteration of the epoch, # see the openreviewpaper mentioned in the dataloader file tr_batch, tr_labels, seq_len, epoch_done = dataLoader.get_train_minibatch( ) # Rescale learning rate for sequence length optimizer.param_groups[0]['lr'] = lr * seq_len / config.bptt n_batch += 1 model.detach_hidden() # Need to prevent improper backprop optimizer.zero_grad() out, _, lstm_raw_out, lstm_drop_out = model(tr_batch, return_out=True) loss = criterion(out.permute(0, 2, 1), tr_labels.t()) # AR optimisation if config.alpha: loss += config.alpha * lstm_drop_out.pow(2).mean() # TAR optimisation if config.beta: loss += config.beta * (lstm_raw_out[1:] - lstm_raw_out[:-1]).pow(2).mean() # Encoding bias regularization if config.encoding_lmbd > 0: loss += bias_regularization_term(model.embed.weight, D, N, config.bias_variation, config.encoding_lmbd) # Decoding bias regularization if config.decoding_lmbd > 0: loss += bias_regularization_term(model.decoder.weight, D, N, config.bias_variation, config.decoding_lmbd) loss.backward() # Gradient clipping added to see effects. Turned off by default if config.clip: torch.nn.utils.clip_grad_norm_(params, config.clip) optimizer.step() # Add current loss to epoch loss epoch_loss += loss.item() # Return learning rate to default optimizer.param_groups[0]['lr'] = lr # Evaluate the training if n_batch % config.batch_interval == 0: cur_loss = epoch_loss / n_batch elapsed = float(time.time() - t_start) examples_per_second = n_batch / elapsed print( '| epoch {:3d} | {:5d} batch | lr {:05.5f} | batch/s {:5.2f} | ' 'train loss {:5.2f} | perplexity {:5.2f} |'.format( e, n_batch, optimizer.param_groups[0]['lr'], examples_per_second, cur_loss, np.exp(cur_loss))) print("Saving current model") save_current_state(model, optimizer, e, best_validation_loss, config) # Evaluate the model on the validation set for early stopping if e % config.eval_interval == 0: print('Evaluating on validation for early stopping criterion') test_done = False model.initialize_hidden() model.eval() epoch_loss = 0 n_batch = 0 tot_seq_len = 0 while not test_done: n_batch += 1 va_batch, va_labels, seq_len, test_done = dataLoader.get_validation_minibatch( ) tot_seq_len += seq_len out, _ = model(va_batch) model.detach_hidden() loss = criterion(out.permute(0, 2, 1), va_labels.t()) epoch_loss += loss.item() cur_loss = epoch_loss / n_batch if best_validation_loss > cur_loss: print("best_validation_loss > cur_loss") best_validation_loss = cur_loss val_losses.append(cur_loss) save_for_early_stopping(model, config, best_validation_loss) print( '| epoch {:3d} | lr {:05.5f} | validation loss {:5.2f} | perplexity {:5.2f} |' .format(e, optimizer.param_groups[0]['lr'], cur_loss, np.exp(cur_loss))) if not config.no_asgd and not using_asgd() and ( len(val_losses) == val_losses.maxlen and cur_loss > min(val_losses)): print('Switching to ASGD') optimizer = torch.optim.ASGD(model.parameters(), lr=config.learning_rate, t0=0, lambd=0., weight_decay=config.weight_decay) # Evaluate the model on the test set if e % config.eval_interval == 0: print('Evaluating on test') test_done = False model.eval() model.initialize_hidden() epoch_loss = 0 n_batch = 0 while not test_done: n_batch += 1 te_batch, te_labels, seq_len, test_done = dataLoader.get_test_minibatch( ) out, _ = model(te_batch) model.detach_hidden() loss = criterion(out.permute(0, 2, 1), te_labels.t()) epoch_loss += loss.item() cur_loss = epoch_loss / n_batch print( '| epoch {:3d} | lr {:05.5f} | test loss {:5.2f} | perplexity {:5.2f} |' .format(e, optimizer.param_groups[0]['lr'], cur_loss, np.exp(cur_loss))) print( f'Training is done. Best validation loss: {best_validation_loss}, validation perplexity: {np.exp(best_validation_loss)}' )