def load_and_cache_examples(args, tokenizer, evaluate=False): if args.not_pretrain: dataset = finetuneDataset(tokenizer, args, logger, file_type='dev' if evaluate else 'train', block_size=args.block_size) else: dataset = TextDataset(tokenizer, args, logger, file_type='dev' if evaluate else 'train', block_size=args.block_size) return dataset
def generate(config): np.random.seed(config.seed) torch.manual_seed(config.seed) # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # fixme data_loader = DataLoader(dataset, config.batch_size, drop_last=True) # Initialize the model that we are going to use model = TextGenerationModel(batch_size=config.batch_size, seq_length=config.seq_length, vocabulary_size=86, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, device=config.device).to(device) model.load_state_dict(torch.load(config.model)) for l in ["In 1776 ", "Liberty is ", "Democracy is "]: char_id = torch.tensor([dataset._char_to_ix[ch] for ch in l]).reshape(-1, 1).to(device) hidden = (torch.zeros( (config.lstm_num_layers, 1, config.lstm_num_hidden)).to(device), torch.zeros((config.lstm_num_layers, 1, config.lstm_num_hidden)).to(device)) sequence = sample(model=model, dataset=dataset, init_seq=char_id, init_hidden=hidden, seq_length=200, device=device, temp=config.temp) print( dataset.convert_to_string(char_id.cpu().numpy().reshape(-1)) + sequence)
def build_text_graph_dataset(dataset, window_size): if "small" in dataset or "presplit" in dataset or 'sentiment' in dataset: dataset_name = "_".join(dataset.split("_")[:-1]) else: dataset_name = dataset clean_text_path = join(get_corpus_path(), dataset_name + '_sentences_clean.txt') labels_path = join(get_corpus_path(), dataset_name + '_labels.txt') labels = pd.read_csv(labels_path, header=None, sep='\t') doc_list = [] f = open(clean_text_path, 'rb') for line in f.readlines(): doc_list.append(line.strip().decode()) f.close() assert len(labels) == len(doc_list) if 'presplit' not in dataset: labels_list = labels.iloc[0:, 0].tolist() split_dict = None else: labels_list = labels.iloc[0:, 2].tolist() split = labels.iloc[0:, 1].tolist() split_dict = {} for i, v in enumerate(split): split_dict[i] = v if "small" in dataset: doc_list = doc_list[:200] labels_list = labels_list[:200] word_freq = get_vocab(doc_list) vocab = list(word_freq.keys()) if not exists(join(get_corpus_path(), dataset + '_vocab.txt')): vocab_str = '\n'.join(vocab) f = open(join(get_corpus_path(), dataset + '_vocab.txt'), 'w') f.write(vocab_str) f.close() words_in_docs, word_doc_freq = build_word_doc_edges(doc_list) word_id_map = {word: i for i, word in enumerate(vocab)} sparse_graph = build_edges(doc_list, word_id_map, vocab, word_doc_freq, window_size) docs_dict = {i: doc for i, doc in enumerate(doc_list)} return TextDataset(dataset, sparse_graph, labels_list, vocab, word_id_map, docs_dict, None, train_test_split=split_dict)
def construct_dictionary(data_train, data_val, data_test): if args.use_val: dataset = pd.concat([data_train, data_val], 0) if args.use_test: dataset = pd.concat([dataset, data_test], 0) else: dataset = data_train print("constructing doctionary...") dictionary = Dictionary() dictionary.word2idx, dictionary.idx2word = TextDataset.assign_word_ids( args.emsize, dataset) print("----processed {%d} word_2_id----" % (len(dictionary.word2idx))) with open(p.dict_path, 'wb') as f: pickle.dump(dictionary, f) return dictionary
def get_prediction_slength(grouped): preds = [] label_list = [] slength_list = [] # for every batch for name, group in grouped: s = np.sum(list(map(lambda x: len(x), group.tokens))) slength_list.append(s) tokens = TextDataset._text2idx(group.tokens, dictionary.word2idx) labels = np.array(group.label.values) tokens, labels = process_batch(tokens, labels) if config.pooling == 'attn': y_pred, _, _ = model.forward(tokens) else: y_pred = model.forward(tokens) _, y_pred = torch.max(y_pred, 1) preds.append(y_pred.item()) label_list.append(labels[0].item()) return preds, label_list, slength_list
def check_loss_and_accuracy(grouped): loss = [] preds = [] labels = [] for name, group in grouped: tokens = TextDataset._text2idx(group.tokens, dictionary.word2idx) labels = np.array(group.label.values) tokens, labels = process_batch(tokens, labels) y_pred = model.forward(tokens) loss.append(loss.item()) loss = criterion(y_pred.cuda(), labels[0]) _, y_pred = torch.max(y_pred, 1) preds.append(np.ndarray.flatten(y_pred.data.cpu().numpy())) labels.append(np.ndarray.flatten(labels[0])) preds = np.array([item for sublist in preds for item in sublist]) labels = np.array([item for sublist in labels for item in sublist]) precision, recall, f1, _ = precision_recall_fscore_support(labels, preds) return np.mean(np.array(loss)), accuracy_score( labels, preds), precision, recall, f1, confusion_matrix(labels, preds)
def check_loss_and_accuracy(grouped, model, dictionary): preds = [] label_list = [] for name, group in grouped: tokens = TextDataset._text2idx(group.tokens, dictionary.word2idx) labels = np.array(group.label.values) tokens, labels = process_batch(tokens, labels) if config.pooling == 'attn': y_pred, _, _ = model.forward(tokens) elif config.pooling == 'ensem': y_pred = model.forward(tokens) labels = labels.view(labels.shape[0], -1) _, y_pred = torch.max(y_pred, 1) preds.append(y_pred.item()) label_list.append(labels[0].item()) preds = np.array(preds) label_list = np.array(label_list) precision, recall, f1, _ = precision_recall_fscore_support(label_list, preds) return accuracy_score(label_list, preds), precision, recall, f1, confusion_matrix(label_list, preds)
def main(): args = parse_args() print('BATCH_SIZE: {}'.format(args.batch_size)) print('SEQ_LENGTH: {}'.format(args.seq_length)) print('EMBEDDING_DIM: {}'.format(args.embedding_dim)) print('HIDDEN_DIM: {}'.format(args.hidden_dim)) print('LR: {}'.format(args.lr)) print('DROPOUT: {}'.format(args.dropout)) print('EPOCHS: {}'.format(args.epochs)) print('LOG_INTERVAL: {}'.format(args.log_interval)) print('----------------------------') # Prepare data & split dataset = TextDataset(args.corpus, seq_length=args.seq_length) train_set_size = int(len(dataset) * 0.8) train_set, test_set = random_split( dataset, [train_set_size, len(dataset) - train_set_size]) train_dataloader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True) test_dataloader = DataLoader(test_set, batch_size=args.batch_size) # Create model & optimizer model = Net(len(dataset.chars), args.embedding_dim, args.hidden_dim, dropout=args.dropout) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # Train train(model, optimizer, train_dataloader, args) # Save model torch.save(model.state_dict(), args.output_model) # Test test(model, test_dataloader, args)
id_str, tensor = vis_ds[i] with torch.no_grad(): tensor = tensor.view([1, -1]).cuda() tensor = net.encode_visual(tensor) tensor = tensor.cpu().numpy()[0] vis_ids.append(id_str) vis_emb[i] = tensor """ ENCODE TEXT QUERIES """ txt_ds = TextDataset(args.text_ds) bow_encoder = Text2BoWEncoder(args.bow_vocab) w2v_encoder = Text2W2VEncoder(args.w2v_weights) txt_ids = [] txt_emb = np.empty([len(txt_ds), 2048]) for i in tqdm.trange(len(txt_ds)): id_str, tensor = txt_ds[i] bow_tensor = bow_encoder.encode(tensor) w2v_tensor = w2v_encoder.encode(tensor) roberta_tensor = net.roberta.encode(tensor) with torch.no_grad(): roberta_tensor_len = torch.LongTensor([len(roberta_tensor)]).cuda() roberta_tensor = roberta_tensor.view([1, -1]).cuda() static_tensor = torch.cat([bow_tensor, w2v_tensor], 0).view([1, -1]).cuda()
def main(args): fix_seeds() # if os.path.exists('./logs'): # shutil.rmtree('./logs') # os.mkdir('./logs') # writer = SummaryWriter(log_dir='./logs') vis = visdom.Visdom() val_avg_loss_window = create_plot_window(vis, '#Epochs', 'Loss', 'Average Loss', legend=['Train', 'Val']) val_avg_accuracy_window = create_plot_window(vis, '#Epochs', 'Accuracy', 'Average Accuracy', legend=['Val']) size = (args.height, args.width) train_transform = transforms.Compose([ transforms.Resize(size), # transforms.RandomResizedCrop(size=size, scale=(0.5, 1)), transforms.RandomHorizontalFlip(), transforms.RandomAffine(10, translate=(0.1, 0.1), scale=(0.8, 1.2), resample=PIL.Image.BILINEAR), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) val_transform = transforms.Compose([ transforms.Resize(size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) train_dataset = TextDataset(args.data_path, 'train.txt', size=args.train_size, transform=train_transform) val_dataset = TextDataset(args.data_path, 'val.txt', size=args.val_size, transform=val_transform) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.workers, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=args.batch_size, num_workers=args.workers, shuffle=False) model = models.resnet18(pretrained=False) model.fc = nn.Linear(512, 16) model.load_state_dict(torch.load(args.resume_from)['model']) device = 'cpu' if args.cuda: device = 'cuda' print(device) metrics = {'accuracy': Accuracy(), 'loss': Loss(criterion)} evaluator = create_supervised_evaluator(model, metrics, device=device) @trainer.on(Events.ITERATION_COMPLETED) def lr_step(engine): if model.training: scheduler.step() global pbar, desc pbar, desc = None, None @trainer.on(Events.EPOCH_STARTED) def create_train_pbar(engine): global desc, pbar if pbar is not None: pbar.close() desc = 'Train iteration - loss: {:.4f} - lr: {:.4f}' pbar = tqdm(initial=0, leave=False, total=len(train_loader), desc=desc.format(0, lr)) @trainer.on(Events.EPOCH_COMPLETED) def create_val_pbar(engine): global desc, pbar if pbar is not None: pbar.close() desc = 'Validation iteration - loss: {:.4f}' pbar = tqdm(initial=0, leave=False, total=len(val_loader), desc=desc.format(0)) # desc_val = 'Validation iteration - loss: {:.4f}' # pbar_val = tqdm(initial=0, leave=False, total=len(val_loader), desc=desc_val.format(0)) log_interval = 1 e = Events.ITERATION_COMPLETED(every=log_interval) train_losses = [] @trainer.on(e) def log_training_loss(engine): lr = optimizer.param_groups[0]['lr'] train_losses.append(engine.state.output) pbar.desc = desc.format(engine.state.output, lr) pbar.update(log_interval) # writer.add_scalar("training/loss", engine.state.output, engine.state.iteration) # writer.add_scalar("lr", lr, engine.state.iteration) @evaluator.on(e) def log_validation_loss(engine): label = engine.state.batch[1].to(device) output = engine.state.output[0] pbar.desc = desc.format(criterion(output, label)) pbar.update(log_interval) # if args.resume_from is not None: # @trainer.on(Events.STARTED) # def _(engine): # pbar.n = engine.state.iteration # @trainer.on(Events.EPOCH_COMPLETED(every=1)) # def log_train_results(engine): # evaluator.run(train_loader) # eval on train set to check for overfitting # metrics = evaluator.state.metrics # avg_accuracy = metrics['accuracy'] # avg_nll = metrics['loss'] # tqdm.write( # "Train Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" # .format(engine.state.epoch, avg_accuracy, avg_nll)) # pbar.n = pbar.last_print_n = 0 @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): pbar.refresh() evaluator.run(val_loader) metrics = evaluator.state.metrics avg_accuracy = metrics['accuracy'] avg_nll = metrics['loss'] tqdm.write( "Validation Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" .format(engine.state.epoch, avg_accuracy, avg_nll)) # pbar.n = pbar.last_print_n = 0 # writer.add_scalars("avg losses", {"train": statistics.mean(train_losses), # "valid": avg_nll}, engine.state.epoch) # # writer.add_scalar("valdation/avg_loss", avg_nll, engine.state.epoch) # writer.add_scalar("avg_accuracy", avg_accuracy, engine.state.epoch) vis.line(X=np.array([engine.state.epoch]), Y=np.array([avg_accuracy]), win=val_avg_accuracy_window, update='append') vis.line(X=np.column_stack( (np.array([engine.state.epoch]), np.array([engine.state.epoch]))), Y=np.column_stack((np.array([statistics.mean(train_losses)]), np.array([avg_nll]))), win=val_avg_loss_window, update='append', opts=dict(legend=['Train', 'Val'])) del train_losses[:] objects_to_checkpoint = { "trainer": trainer, "model": model, "optimizer": optimizer, "scheduler": scheduler } training_checkpoint = Checkpoint(to_save=objects_to_checkpoint, save_handler=DiskSaver( args.snapshot_dir, require_empty=False)) trainer.add_event_handler(Events.EPOCH_COMPLETED(every=1), training_checkpoint) if args.resume_from not in [None, '']: tqdm.write("Resume from a checkpoint: {}".format(args.resume_from)) checkpoint = torch.load(args.resume_from) Checkpoint.load_objects(to_load=objects_to_checkpoint, checkpoint=checkpoint) try: trainer.run(train_loader, max_epochs=args.epochs) pbar.close() except Exception as e: import traceback print(traceback.format_exc())
import time import matplotlib.pyplot as plt import numpy as np import torch from torch import nn, optim from torch.autograd import Variable from torch.utils.data import DataLoader from dataset import TextDataset from model.seq2seq import AttnDecoderRNN, DecoderRNN, EncoderRNN SOS_token = 0 EOS_token = 1 MAX_LENGTH = 10 lang_dataset = TextDataset() # batch_size = 1 lang_dataloader = DataLoader(lang_dataset, shuffle=True) # input words num input_size = lang_dataset.input_lang_words hidden_size = 256 # output words num output_size = lang_dataset.output_lang_words total_epoch = 20 encoder = EncoderRNN(input_size, hidden_size) decoder = DecoderRNN(hidden_size, output_size, n_layers=2) attn_decoder = AttnDecoderRNN(hidden_size, output_size, n_layers=2) use_attn = True
def train(config): # Initialize the device which to run the model on device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # fixme data_loader = DataLoader(dataset, batch_size = config.batch_size, shuffle=True, num_workers=1) vocab_size = dataset.vocab_size # char2i = dataset._char_to_ix # i2char = dataset._ix_to_char # ---------------------------------------- # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, vocab_size, \ config.lstm_num_hidden, config.lstm_num_layers, device) # fixme model.to(device) # Setup the loss and optimizer criterion = nn.NLLLoss() # fixme optimizer = optim.RMSprop(model.parameters(), lr = config.learning_rate) # fixme logSoftmax = nn.LogSoftmax(dim=2) # Learning rate scheduler lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, \ step_size=config.learning_rate_step, gamma=config.learning_rate_decay) step = 1 if config.resume: if os.path.isfile(config.resume): print("Loading checkpoint '{}'".format(config.resume)) checkpoint = torch.load(config.resume) step = checkpoint['step'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) print("Checkpoint loaded '{}', steps {}".format(config.resume, checkpoint['step'])) if not os.path.isdir(config.summary_path): os.makedirs(config.summary_path) if config.sampling =="greedy": f = open(os.path.join(config.summary_path,"sampled_"+config.sampling+".txt"), "w+") else: f = open(os.path.join(config.summary_path,"sampled_"+config.sampling+"_"+str(config.temp)+".txt"), "w+") best_accuracy = 0.0 pl_loss =[] average_loss =[] acc =[] for epochs in range(30): if step == config.train_steps: print('Done training.') break for (batch_inputs, batch_targets) in data_loader: if config.batch_size!=batch_inputs.size()[0]: print("batch mismatch") break # Only for time measurement of step through network t1 = time.time() model.hidden = model.init_hidden(config.batch_size) model.zero_grad() ####################################################### # Add more code here ... #convert batch inputs to one-hot vector batch_inputs= torch.zeros(config.batch_size, config.seq_length, vocab_size).scatter_(2,batch_inputs.unsqueeze(-1),1.0) batch_inputs, batch_targets = batch_inputs.to(device), batch_targets.to(device) predictions, _ = model(batch_inputs) if config.sampling=="greedy": predictions = logSoftmax(predictions) else: predictions = logSoftmax(predictions/config.temp) loss = criterion(predictions.transpose(2,1), batch_targets) # fixme _, predictions = torch.max(predictions, dim=2, keepdim=True) predictions = (predictions.squeeze(-1) == batch_targets).float() accuracy = torch.mean(predictions) loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) optimizer.step() lr_scheduler.step() ####################################################### # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) pl_loss.append(loss.item()) average_loss.append(np.mean(pl_loss[:-100:-1])) acc.append(accuracy) if step % config.print_every == 0: print("[{}] Train Step {}/{}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss.item() )) if step % config.sample_every == 0: model.eval() with torch.no_grad(): char_ix = generate_sample(model, vocab_size, config.seq_length, device, config) sentence = dataset.convert_to_string(char_ix) f.write("--------------"+str(step)+"----------------\n") f.write(sentence+"\n") print(sentence) print() model.train() # ########################################################################### # save training loss plt.plot(pl_loss,'r-', label="Batch loss", alpha=0.5) plt.plot(average_loss,'g-', label="Average loss", alpha=0.5) plt.legend() plt.xlabel("Iterations") plt.ylabel("Loss") plt.title("Training Loss") plt.grid(True) # plt.show() if config.sampling == "greedy": plt.savefig("loss_"+config.sampling+".png") else: plt.savefig("loss_"+config.sampling+"_"+str(config.temp)+".png") plt.close() ################################training################################################## plt.plot(acc,'g-', alpha=0.5) plt.xlabel("Iterations") plt.ylabel("Accuracy") plt.title("Train Accuracy") plt.grid(True) if config.sampling == "greedy": plt.savefig("accuracy_"+config.sampling+".png") else: plt.savefig("accuracy_"+config.sampling+"_"+str(config.temp)+".png") plt.close() if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break step+=1 save_checkpoint({ 'epoch': epochs + 1, 'step': step, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler':lr_scheduler.state_dict(), 'accuracy': accuracy }, config) f.close()
def train(config): def acc(predictions, targets): hotvec = predictions.argmax(-2) == targets accuracy = torch.mean(hotvec.float()) return accuracy # Initialize the device which to run the model on device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=0) print('batch', config.batch_size) vocabulary_size = dataset.vocab_size print('vocab', vocabulary_size) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, vocabulary_size=vocabulary_size, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, dropout=1 - config.dropout_keep_prob, device=device) model = model.to(device) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=1e-5) gamma = 1 - config.learning_rate_decay lr_optim = torch.optim.lr_scheduler.StepLR(optimizer, config.learning_rate_step, gamma=gamma, last_epoch=-1) print('Hi') acc_list = [] loss_list = [] step_list = [] text_list = [] epoch = 100 offset = 2380 temperature = 1 policy = 'greedy' for e in range(epoch): torch.save(model.state_dict(), str(e + 1) + 'tunedmodel.pt') for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network lr_optim.step() optimizer.zero_grad() t1 = time.time() inputs = torch.stack([*batch_inputs], dim=1) targets = torch.stack([*batch_targets], dim=1) inputs = inputs.to(device) targets = targets.to(device) out = model.forward(inputs)[0] out = out.permute(0, 2, 1) loss = criterion(out, targets) accuracy = acc(out, targets) torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) loss.backward() optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.print_every == 0: print('accuracy, loss, step: \n', np.around(accuracy.item(), 4), np.around(loss.item(), 4), step, '\n') acc_list.append(accuracy.item()) loss_list.append(loss.item()) step_list.append(step + offset * e) if step % config.sample_every == 0: # Generate some sentences by sampling from the model generator = torch.randint(low=0, high=vocabulary_size, size=(1, 1)).to(device) hidden = None char_list = [] for _ in range(config.seq_length): generator, hidden = model.forward(generator, hidden) if policy == 'greedy': idx = torch.argmax(generator).item() else: pass generator = torch.Tensor([idx]).unsqueeze(-1) generator = generator.to(device) char_list.append(idx) char = dataset.convert_to_string(char_list) with open("MyTunedBook.txt", "a") as text_file: print('Epoch. ', e, 'Stahp: ', step, '\n Output: ', char, file=text_file) print('Epoch. ', e, 'Stahp: ', step, '\n Output: ', char) text_list.append((str((step + offset * e)) + '\n' + char)) pass if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') with open('FinalTunedBook.txt', 'w+') as f: for item in text_list: f.write("%s\n" % item) # save with pandas header = ['accuracy', 'length', 'loss', 'step'] savefiles = zip(acc_list, [config.seq_length] * len(acc_list), loss_list, step_list) df = pd.DataFrame(list(savefiles), columns=header) df.to_csv('GEN' + str(config.seq_length) + 'tunedlstm.csv') print('I am Loaded') temp_list = [0., 0.5, 1., 2.] policy_list = ['greedy', 'temp'] seq_length = 111 alice_string = list('Alice') # Generate some sentences by sampling from the model for policy in policy_list: for temperature in temp_list: char_list = [] hidden = None for alice in alice_string: idx = dataset.convert_to_idx(alice) char_list.append(idx) generator = torch.tensor([idx]).unsqueeze(-1) generator = generator.to(device) generator, hidden = model.forward(generator, hidden) for _ in range(seq_length): if policy == 'greedy': idx = torch.argmax(generator).item() else: temp = generator.squeeze() / temperature soft = torch.softmax(temp, dim=0) idx = torch.multinomial(soft, 1)[-1].item() generator = torch.tensor([idx]).unsqueeze(-1) generator = generator.to(device) generator, hidden = model.forward(generator, hidden) char_list.append(idx) char = dataset.convert_to_string(char_list) with open( "BonusTemp" + str(int(np.floor(temperature))) + "Book.txt", "w+") as text_file: print(policy + ': ', temperature, '\n Output: ', char, file=text_file) print(policy + ': ', temperature, '\n Output: ', char) print('Finito!')
def train(config): # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # should we do +1?? torch.save(dataset, config.save_dataset) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, config.lstm_num_hidden, config.lstm_num_layers, 1 - config.dropout_keep_prob, device) criterion = nn.CrossEntropyLoss() optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) losses = [] accuracies = [] # run through the dataset several times till u reach max_steps step = 0 while step < config.train_steps: for (batch_inputs, batch_targets) in data_loader: step += 1 # Only for time measurement of step through network t1 = time.time() batch_inputs = torch.stack(batch_inputs).to(device) batch_targets = torch.stack(batch_targets, dim=1).to( device) #dim=1 to avoid transposing batch_predictions, (_, _) = model.forward(batch_inputs) batch_predictions = batch_predictions.permute(1, 2, 0) loss = criterion(batch_predictions, batch_targets) losses.append(loss.item()) model.zero_grad() # should we do this?? loss.backward() torch.nn.utils.clip_grad_norm( model.parameters(), max_norm=config.max_norm) # prevents maximum gradient problem optimizer.step() accuracy = accuracy_(batch_predictions, batch_targets) accuracies.append(accuracy) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.print_every == 0: print( "[{}] Train Step {}/{}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), int(step), int(config.train_steps), config.batch_size, examples_per_second, accuracy, loss)) if step % config.sample_every == 0: for temperature in [0]: for length in [30, 60, 90, 120]: sentence = generate_sentence(model, dataset, temperature, length, device) with open(config.save_generated_text, 'a', encoding='utf-8') as file: file.write("{};{};{};{}\n".format( step, temperature, length, sentence)) if step % config.save_every == 0: torch.save(model.state_dict(), config.save_model) if step == config.train_steps: # save only the model parameters torch.save(model.state_dict(), config.save_model) # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break # revive the model # model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size(), # config.lstm_num_hidden, config.lstm_num_layers, device) # model.load_state_dict(torch.load(config.save_model)) print('Done training.')
def train(config): if not os.path.isdir(CHECKPOINTS_FOLDER): os.mkdir(CHECKPOINTS_FOLDER) # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length, config.batch_size, config.train_steps) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size).to(device=device) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) generated_sentences = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() optimizer.zero_grad() batch_inputs = torch.unsqueeze(torch.stack(batch_inputs), 2).float().to(device=device) batch_targets = torch.cat(batch_targets).to(device=device) predictions = model(batch_inputs, config.batch_size) torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) loss = criterion(predictions, batch_targets) accuracy = get_accuracy(predictions, batch_targets) loss.backward() optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.print_every == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, int(config.train_steps), config.batch_size, examples_per_second, accuracy, loss)) if step % config.sample_every == 0: # Generate some sentences by sampling from the model sentence = generate_sentence(model, dataset, config) generated_sentences.append(sentence) state = { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } torch.save( state, 'checkpoints/{}'.format( config.txt_file.split("/", 1)[1].replace('.txt', ''))) filename = config.txt_file.replace('.txt', '') + 'generated_sentences.txt' f = open(filename, 'w') output_string = '\n'.join(generated_sentences) f.write(output_string) print('Done training.')
t0 = time.time() cur_cost = self.train_model(b, lr,mb) print("Time calculating minibatch cost: {:.4f}. Cost: {}".format(time.time() - t0,cur_cost)) if b % 20 == 0 and b != 0: t0 = time.time() r = np.random.randint(0, in_test.shape[0] - 1001) err_test = self.error(in_test[r:r+1000], obs_test[r:r+1000]) err_train = self.error(in_train[r:r+1000], obs_train[r:r+1000]) print("Current cost: {}".format(cur_cost)) print("Current Test Error: {}".format(err_test)) print("Current Train Error: {}".format(err_train)) print("Time calculating errors: {:.4f}".format(time.time() - t0)) if __name__ == '__main__': dataset = TextDataset('shakespeare.hdf5') dataset.cut_by_sequence(10,classify=False) #x = T.matrix('x') #y = T.matrix('y') x = T.tensor3('x') y = T.tensor3('y') foo = np.random.rand(10,50) #random data nhid = 200 rnn = RNN(x, dataset.seq_len, [dataset.char_len,dataset.char_len,nhid],mode='LSTM',bptt_truncate=-1) trainer = Trainer(rnn, dataset) trainer.compile_functions(x,y) trainer.gradient_descent(0.01,200,10) #print(trainer.feed_forward(foo).shape)
def __init__(self, K, hidden_size): self.K = K self.hidden_size = hidden_size pass def build(self): conv_bank = list() batch_norm_list = list() conv_bank.append(nn.Conv1d(1, self.hidden_size, 1)) batch_norm_list.append(nn.BatchNorm1d(self.hidden_size)) for k in range(2, self.K + 1): conv_bank.append(nn.Conv1d(self.hidden_size, self.hidden_size, k)) batch_norm_list.append(nn.BatchNorm1d(self.hidden_size)) def forward(self, x): pass if __name__ == '__main__': transcript_path = 'kss/transcript.txt' txt_dataset = TextDataset(transcript_path) data_loader = DataLoader(dataset=txt_dataset, batch_size=32, shuffle=True, num_workers=2) print('Dataset making and Loading Success') prenet = Prenet()
def main(): #DataGenerator imsize = cfg.TREE.BASE_SIZE * (2**(cfg.TREE.BRANCH_NUM - 1)) #64, 3 image_transform = transforms.Compose([ transforms.Resize(int(imsize * 76 / 64)), transforms.RandomCrop(imsize), transforms.RandomHorizontalFlip() ]) #cfg.DATA_DIR = "data/birds" dataset = TextDataset(cfg.DATA_DIR, "train", base_size=cfg.TREE.BASE_SIZE, transform=image_transform) assert dataset traingenerator = DataGenerator(dataset, batchsize=cfg.TRAIN.BATCH_SIZE) ##Create model G_model, D_model, GRD_model, CR_model, RNN_model = model_create(dataset) print("loadmodel_completed") #Preparation for learning total_epoch = cfg.TRAIN.MAX_EPOCH batch_size = traingenerator.batchsize step_epoch = int(len(dataset) / batch_size) wrong_step = 3 wrong_step_epoch = int(step_epoch / wrong_step) image_list, captions_ar, captions_ar_prezeropad, \ z_code, eps_code, mask, keys_list, captions_label, \ real_label, fake_label = next(traingenerator) traingenerator.count = 0 #for image plot test_noise = deepcopy(z_code[:20]) test_eps = deepcopy(eps_code[:20]) test_cap_pd = deepcopy(captions_ar_prezeropad[:20]) test_cap = deepcopy(captions_ar[:20]) test_mask = deepcopy(mask[:20]) test_mask = np.where(test_mask == 1, -float("inf"), 0) #Start learning print("batch_size: {} step_epoch : {} srong_step_epoch {}".format( batch_size, step_epoch, wrong_step_epoch)) for epoch in range(total_epoch): total_D_loss = 0 total_D_acc = 0 total_D_wrong_loss = 0 total_D_wrong_acc = 0 total_G_loss = 0 total_G_des_loss = 0 total_G_enc_loss = 0 print("----------------EPOCH: {} START----------------".format(epoch)) for batch in tqdm(range(step_epoch)): image_list, captions_ar, captions_ar_prezeropad, \ z_code, eps_code, mask, keys_list, captions_label, \ real_label, fake_label = next(traingenerator) mask = np.where(mask == 1, -float("inf"), 0) if cfg.TREE.BRANCH_NUM == 1: real_image = image_list[0] if cfg.TREE.BRANCH_NUM == 2: real_image = image_list[1] if cfg.TREE.BRANCH_NUM == 3: real_image = image_list[2] #D learning if cfg.TREE.BRANCH_NUM == 1: fake_image = G_model.predict( [captions_ar_prezeropad, eps_code, z_code]) else: # 2 or 3 fake_image = G_model.predict( [captions_ar_prezeropad, eps_code, z_code, mask]) if batch % 1 == 0: histDr = D_model.train_on_batch( [real_image, captions_ar_prezeropad], [real_label, real_label], ) total_D_loss += histDr[0] total_D_acc += (histDr[3] + histDr[4]) / 2 histDf = D_model.train_on_batch( [fake_image, captions_ar_prezeropad], [fake_label, fake_label], ) total_D_loss += histDf[0] total_D_acc += (histDf[3] + histDf[4]) / 2 if batch % wrong_step == 0: histDw = D_model.train_on_batch( [real_image[:-1], captions_ar_prezeropad[1:]], [fake_label[:-1], fake_label[:-1]], ) total_D_wrong_loss += histDw[0] total_D_wrong_acc += (histDw[3] + histDw[4]) / 2 #G learning if cfg.TREE.BRANCH_NUM == 1: histGRD = GRD_model.train_on_batch( [captions_ar_prezeropad, eps_code, z_code, captions_ar], [real_label, real_label, captions_label], ) else: # 2 or 3 histGRD = GRD_model.train_on_batch( [ captions_ar_prezeropad, eps_code, z_code, mask, captions_ar ], [real_label, real_label, captions_label], ) total_G_loss += histGRD[0] total_G_des_loss += (histGRD[1] + histGRD[2]) / 2 total_G_enc_loss += histGRD[3] #Calculation of loss D_loss = total_D_loss / step_epoch / 2 D_acc = total_D_acc / step_epoch / 2 D_wrong_loss = total_D_wrong_loss / wrong_step_epoch D_wrong_acc = total_D_wrong_acc / wrong_step_epoch G_loss = total_G_loss / step_epoch G_des_loss = total_G_des_loss / step_epoch G_enc_loss = total_G_enc_loss / step_epoch print( "D_loss: {:.5f} D_wrong_loss: {:.5f} D_acc: {:.5f} D_wrong_acc: {:.5f}" .format(D_loss, D_wrong_loss, D_acc, D_wrong_acc)) print( "G_loss: {:.5f} G_discriminator_loss: {:.5f} G_encoder_loss: {:.5f}" .format(G_loss, G_des_loss, G_enc_loss)) if epoch % 4 == 0: G_save_path = "model/G_epoch{}.h5".format(epoch) G_model.save_weights(G_save_path) D_save_path = "model/D_epoch{}.h5".format(epoch) D_model.save_weights(D_save_path) #Save image if epoch % 1 == 0: sample_images(epoch, test_noise, test_eps, test_cap_pd, test_mask, G_model)
def main(): device = torch.device('cuda') embedding_vectors = torch.load(f'{EMBEDDINGS_DIR}/vectors.pkl') text_processor = TextProcessor( wti=pickle.load(open(f'{EMBEDDINGS_DIR}/wti.pkl', 'rb')), tokenizer=get_tokenizer('basic_english'), standardize=True, min_len=3, ) dataset = TextDataset(CORPUS_DIR, text_processor) # split into training and test set # TODO: fix this splitting sometimes failing when corpus size changes train_set, test_set = torch.utils.data.random_split( dataset, [ int(len(dataset) * DATA_SPLIT), int(len(dataset) * (1.0 - DATA_SPLIT)) ]) # count number of samples in each class class_count = [0, 0] for data, label in dataset: class_count[int(label.item())] += 1 # get relative weights for classes _sum = sum(class_count) class_count[0] /= _sum class_count[1] /= _sum # reverse the weights since we're getting the inverse for the sampler class_count = list(reversed(class_count)) # set weight for every sample weights = [class_count[int(x[1].item())] for x in train_set] # weighted sampler sampler = torch.utils.data.WeightedRandomSampler( weights=weights, num_samples=len(train_set), replacement=True) train_loader = DataLoader(dataset=train_set, batch_size=32, collate_fn=Sequencer(SEQUENCE_LEN), sampler=sampler) test_loader = DataLoader(dataset=test_set, batch_size=32, collate_fn=Sequencer(SEQUENCE_LEN)) # number of filters in each convolutional filter N_FILTERS = 64 # sizes and number of convolutional layers FILTER_SIZES = [2, 3] # dropout for between conv and dense layers DROPOUT = 0.5 model = TextCNN( embeddings=embedding_vectors, n_filters=N_FILTERS, filter_sizes=FILTER_SIZES, dropout=DROPOUT, ).to(device) print(model) print('Trainable params:', sum(p.numel() for p in model.parameters() if p.requires_grad)) criterion = nn.BCELoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) EPOCHS = 12 best_acc = 0.0 # training loop for epoch in range(EPOCHS): print('Epoch', epoch + 1) for i, data in tqdm(enumerate(train_loader), total=len(train_loader)): # get word indices vector and corresponding labels x, labels = data # send to device x = x.to(device) labels = labels.to(device) # make predictions predictions = model(x).squeeze() # calculate loss loss = criterion(predictions, labels) # learning stuff... optimizer.zero_grad() loss.backward() optimizer.step() # evaluate with torch.no_grad(): model.eval() correct = 0 wrong = 0 m = [[0, 0], [0, 0]] for data in test_loader: x, label = data x = x.to(device) predictions = model(x).squeeze() for truth, prediction in zip(label, predictions): y = int(truth.item()) y_pred = 1 if prediction.item() > 0.5 else 0 m[y][y_pred] += 1 if y == y_pred: correct += 1 else: wrong += 1 model.train() acc = correct / (correct + wrong) if acc > best_acc: best_acc = acc for file in glob.glob('models/model_*.pth'): os.remove(file) torch.save(model.state_dict(), f'models/state_{epoch}.pth') print() print('Correct:', f'{correct}/{correct + wrong}', 'Accuracy:', acc) print('[[TN, FP], [FN, TP]]') print(m) print() # put into evaluation mode model.eval() text_processor.do_standardize = True with torch.no_grad(): while True: text = input('Prompt: ') x = text_processor.process(text) x = torch.tensor(x).unsqueeze(dim=0) print(model(x.to(device)).squeeze())
def train(config, CHOICES): # Initialize the device which to run the model on #device = torch.device(config.device)# fix this! device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) # Initialize the model that we are going to use # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length ); # fixme model = TextGenerationModel( config.batch_size, config.seq_length, dataset.vocab_size, config.temperature).cuda(); if (CHOICES['LOAD_BEST_MODEL']): model.load_state_dict(torch.load('./model_parameter.txt')); #print(model.state_dict()); data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss(); optimizer = torch.optim.RMSprop(model.parameters(),lr=config.learning_rate); if (CHOICES['LOAD_BEST_MODEL']): optimizer.load_state_dict(torch.load('./model_optimizer.txt')); accuracy_list = []; loss_list = []; string_list = []; tmp_accuracy = 0; a = 76; while (tmp_accuracy == 0) or (accuracy_list[-1] >0.85): for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() batch_inputs = torch.stack(batch_inputs)[:,:, None].view(config.seq_length, -1).to(device); # sequ_length * batch_size batch_targets = torch.stack(batch_targets)[:,:, None].view(config.seq_length, -1).to(device); # sequ_length * batch_size if not((int(batch_inputs.size()[1])) == config.batch_size): continue; #print(dataset.convert_to_string(batch_inputs[:, 0].cpu().numpy())); batch_inputs_onehot = one_hot(batch_inputs, dataset.vocab_size); # seq_length * batch_size * vacab_size; optimizer.zero_grad(); torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm); out = model(batch_inputs_onehot); values, indices = torch.max(out, 1); loss_criterion = criterion(out,batch_targets); loss_criterion.backward(); optimizer.step(); loss = loss_criterion.data[0]/(config.seq_length); values, indices = torch.max(out, 1); accuracy = ((indices[indices == batch_targets].size())[0])/(config.batch_size*config.seq_length); # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % config.print_every == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, int(config.train_steps), config.batch_size, examples_per_second, accuracy, loss)) # generate sentences if step % 50000 == 0 and CHOICES['GENERATE_FIVE_SENTENCES']: model.eval(); test_input = (torch.Tensor(batch_inputs.size())).type(torch.LongTensor).to(device); a = a + 1; test_input = test_input.fill_(a); output_string = generate_new_stings(model, test_input, dataset.vocab_size, config.seq_length); tmp = dataset.convert_to_string(output_string.cpu().numpy().tolist()); string_list += [tmp]; print(tmp); print('---') model.train(); # save parameter torch.save(model.state_dict(), './model_parameter{:d}.txt'.format(step)); torch.save(optimizer.state_dict(), './model_optimizer{:d}.txt'.format(step)); if (CHOICES['DRAW_ACCURACY_PLOT']): accuracy_list += [accuracy]; loss_list += [loss]; if step == config.sample_every: # Generate some sentences by sampling from the model pass if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break if (CHOICES['GENERATE_FIVE_SENTENCES']) and (len(string_list) == 5): break; if (CHOICES['GENERATE_FIVE_SENTENCES']) and (len(string_list) == 5): break; print("============ finish {} epoch ============ ".format(len(accuracy_list))); torch.save(model.state_dict(), './model_parameter.txt'); torch.save(optimizer.state_dict(), './model_optimizer.txt'); print('Done training.'); if (CHOICES['GENERATE_FIVE_SENTENCES']): if (CHOICES['DRAW_ACCURACY_PLOT']): fig, ax = plt.subplots(); ax.plot(np.arange(len(accuracy_list)), accuracy_list, 'r', label = 'accuracy'); ax.plot(np.arange(len(accuracy_list)), loss_list, 'b', label = 'loss'); legend = ax.legend(loc='upper center'); plt.xlabel('Steps'); plt.title('loss and accuracy of LSTM in 2000 steps'); plt.show(); for idx in range(5): print('====') print(string_list[idx]);
def weights_init(m): classname = m.__class__.__name__ if classname == 'LSTM': nn.init.orthogonal_(m.weight_ih_l0) nn.init.orthogonal_(m.weight_hh_l0) nn.init.orthogonal_(m.weight_ih_l1) nn.init.orthogonal_(m.weight_hh_l1) label_size = 8 batch_size = 64 learning_rate = 0.001 epochs = 10 chapters = choose_chapters2() cp = Corpus(chapters) train_set = TextDataset(cp, train=True) train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) test_set = TextDataset(cp, train=False) test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True) rnn = 'gru' if rnn == 'lstm': model = LSTMNet(512, 128, vocab_size=len(cp.vocab), label_size=label_size, batch_size=batch_size).cuda() model.apply(weights_init) else: model = GRUNet(512, 128, vocab_size=len(cp.vocab), label_size=label_size, batch_size=batch_size).cuda() optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4) loss_function = nn.CrossEntropyLoss() record = {}
def train(config): # Print all configs to confirm parameter settings print_flags() assert config.sampling_method in ('greedy', 'random') assert config.generate_mode in ('generate', 'finish') # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(filename=config.txt_file, seq_length=config.seq_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel(batch_size=config.batch_size, seq_length=config.seq_length, vocabulary_size=dataset.vocab_size, dropout=1-config.dropout_keep_prob, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, device=device) model.to(device) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) epoch = 10 # Store some measures los = list() iteration = list() acc = list() max_step = 0 for i in range(epoch): for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() model.train() optimizer.zero_grad() batch_inputs = torch.stack(batch_inputs).to(device) batch_targets = torch.stack(batch_targets).to(device) h_0 = torch.zeros(config.lstm_num_layers, batch_inputs.shape[1], config.lstm_num_hidden).to(device) c_0 = torch.zeros(config.lstm_num_layers, batch_inputs.shape[1], config.lstm_num_hidden).to(device) pred, _, _ = model(batch_inputs, h_0, c_0) accuracy = compute_accuracy(pred, batch_targets) pred = pred.permute(1, 2, 0) batch_targets = batch_targets.permute(1, 0) loss = criterion(pred, batch_targets) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if (step + i * max_step) % config.print_every == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step + i * max_step, int(config.train_steps), config.batch_size, examples_per_second, accuracy, loss )) iteration.append(step + i * max_step) acc.append(accuracy) los.append(loss) if max_step < step: max_step = step if (step + i * max_step) % config.sample_every == 0: model.eval() batch_sample = 5 if config.generate_mode == 'finish': generated = [dataset._char_to_ix[c] for c in config.input_seq] generated = torch.LongTensor(generated).view(-1, 1).to(device) for l in range(config.generate_length): if l == 0: h_s = torch.zeros(config.lstm_num_layers, 1, config.lstm_num_hidden).to(device) c_s = torch.zeros(config.lstm_num_layers, 1, config.lstm_num_hidden).to(device) gen, h_s, c_s = model(generated, h_s, c_s) gen = torch.unsqueeze(gen[-1], 0) else: gen, h_s, c_s = model(gen, h_s, c_s) if config.sampling_method == 'greedy': gen = gen.argmax(dim=2) else: gen = nn.functional.softmax(gen/config.temperature, dim=2) dist = torch.distributions.categorical.Categorical(gen) gen = dist.sample() generated = torch.cat((generated, gen)) else: generated = [dataset._char_to_ix[random.choice(dataset._chars)] for c in range(batch_sample)] generated = torch.LongTensor(generated).view(-1, batch_sample).to(device) for l in range(config.generate_length - 1): if l == 0: h_s = torch.zeros(config.lstm_num_layers, batch_sample, config.lstm_num_hidden).to(device) c_s = torch.zeros(config.lstm_num_layers, batch_sample, config.lstm_num_hidden).to(device) gen, h_s, c_s = model(generated, h_s, c_s) else: gen, h_s, c_s = model(gen, h_s, c_s) if config.sampling_method == 'greedy': gen = gen.argmax(dim=2) else: gen = nn.functional.softmax(gen/config.temperature, dim=2) dist = torch.distributions.categorical.Categorical(gen) gen = dist.sample() generated = torch.cat((generated, gen)) generated = generated.t() sentence = [dataset.convert_to_string(idx) for idx in generated.tolist()] if config.sampling_method == 'random': with open('{}/{}_{}_{}_{}.txt'.format(config.summary_path, config.generate_mode, datetime.now().strftime("%Y-%m-%d"), config.sampling_method, config.temperature), 'a', encoding='utf-8') as file: file.write('--------------\n') file.write('Training Step: {}\n'.format(step + i * max_step)) file.write('--------------\n') for sen in sentence: file.write('{}\n'.format(sen)) file.write('\n') file.close() else: with open('{}/{}_{}_{}.txt'.format(config.summary_path, config.generate_mode, datetime.now().strftime("%Y-%m-%d"), config.sampling_method), 'a', encoding='utf-8') as file: file.write('--------------\n') file.write('Training Step: {}\n'.format(step + i * max_step)) file.write('--------------\n') for sen in sentence: file.write('{}\n'.format(sen)) file.write('\n') file.close() if (step + i * max_step) == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break if (step + i * max_step) == config.train_steps: break print('Done training.') fig, axs = plt.subplots(1, 2, figsize=(10,5)) axs[0].plot(iteration, acc) axs[0].set_xlabel('Iteration') axs[0].set_ylabel('Accuracy') axs[1].plot(iteration, los) axs[1].set_xlabel('Iteration') axs[1].set_ylabel('Loss') fig.tight_layout() plt.show()
label = torch.FloatTensor(opt.batchSize) real_label = 1 fake_label = 0 if opt.cuda: netD.cuda() netG.cuda() criterion.cuda() input, label = input.cuda(), label.cuda() noise, fixed_noise = noise.cuda(), fixed_noise.cuda() fixed_noise = Variable(fixed_noise) if not opt.eval: train_dataset = TextDataset(opt.dataroot, transform=image_transform) ## Completed - TODO: Make a new DataLoader and Dataset to include embeddings train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=opt.batchSize, shuffle=True, num_workers=int( opt.workers)) # setup optimizer optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
def train(config): # determine the filename (to be used for saving results, checkpoints, models, etc.) filename = Path(config.txt_file).stem # Initialize the device which to run the model on if config.device == 'cuda': if torch.cuda.is_available(): device = torch.device(config.device) else: device = torch.device('cpu') else: device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset( filename=config.txt_file, seq_length=config.seq_length ) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # get the vocabulary size and int2char and char2int dictionaries for use later VOCAB_SIZE = dataset.vocab_size # Initialize the model that we are going to use model = TextGenerationModel( batch_size=config.batch_size, seq_length=config.seq_length, vocabulary_size=VOCAB_SIZE, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, device=device, batch_first=config.batch_first, dropout=1.0-config.dropout_keep_prob ) # Setup the loss and optimizer and learning rate scheduler criterion = nn.CrossEntropyLoss() optimizer = optim.Adam( model.parameters(), config.learning_rate ) # Load the latest checkpoint, if any exist checkpoints = list(CHECKPOINTS_DIR.glob(f'{model.__class__.__name__}_{filename}_checkpoint_*.pt')) if len(checkpoints) > 0: # load the latest checkpoint checkpoints.sort(key=os.path.getctime) latest_checkpoint_path = checkpoints[-1] start_step, results, sequences = load_checkpoint(latest_checkpoint_path, model, optimizer) else: # initialize the epoch, results and best_accuracy start_step = 0 results = { 'step': [], 'accuracy': [], 'loss': [], } sequences = { 'step': [], 't': [], 'temperature': [], 'sequence': [] } for step in range(start_step, int(config.train_steps)): # reinitialize the data_loader iterater if we have iterated over all available mini-batches if step % len(data_loader) == 0 or step == start_step: data_iter = iter(data_loader) # get the mini-batch batch_inputs, batch_targets = next(data_iter) # Only for time measurement of step through network t1 = time.time() ####################################################### # Add more code here ... ####################################################### # put the model in training mode model.train() # convert the data and send to device X = torch.stack(batch_inputs, dim=1) X = X.to(device) Y = torch.stack(batch_targets, dim=1) Y = Y.to(device) # forward pass the mini-batch Y_out, _ = model.forward(X) Y_pred = Y_out.argmax(dim=-1) # (re)set the optimizer gradient to 0 optimizer.zero_grad() # compute the accuracy and the loss accuracy = get_accuracy(Y_pred, Y) loss = criterion.forward(Y_out.transpose(2, 1), Y) # backwards propogate the loss loss.backward() # clip the gradients (to preven them from exploding) torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) # tune the model parameters optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % config.print_every == 0: print(f'[{datetime.now().strftime("%Y-%m-%d %H:%M")}], Train Step {step:04d}/{int(config.train_steps):04d}, Batch Size = {config.batch_size}, Examples/Sec = {examples_per_second:.2f}, Accuracy = {accuracy:.2f}, Loss = {loss:.3f}') # append the accuracy and loss to the results results['step'].append(step) results['accuracy'].append(accuracy.item()) results['loss'].append(loss.item()) if step % config.sample_every == 0: for T in [20, 30, 60, 120]: for temperature in [0.0, 0.5, 1.0, 2.0]: # Generate some sentences by sampling from the model sequence = sample_sequence( model=model, vocab_size=VOCAB_SIZE, T=T, char=None, temperature=temperature, device=device ) sequence_str = dataset.convert_to_string(sequence) print(f'Generated sample sequence (T={T}, temp={temperature}): {sequence_str}') # append the generated sequence to the sequences sequences['step'].append(step) sequences['t'].append(T) sequences['temperature'].append(temperature) sequences['sequence'].append(sequence_str) if step % config.checkpoint_every == 0: # create a checkpoint create_checkpoint(CHECKPOINTS_DIR, filename, step, model, optimizer, results, sequences) # save the results save_results(RESULTS_DIR, filename, results, sequences, model) # save the model save_model(MODELS_DIR, filename, model) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def train_early_stopping(epoch_number): global best_val_loss, best_acc loss_epoch = [] i = 1 batch_start = time.time() for name, group in train_grouped: # print(group.tokens.values) tokens = TextDataset._text2idx(group.tokens, dictionary.word2idx) labels = np.array(group.label.values) try: tokens, labels = process_batch(tokens, labels) except: print(tokens) sys.exit(0) loss = train_data(tokens, labels) loss_epoch.append(loss) # print loss every n passes if i % (p.print_loss_every * 5) == 0: print('| epoch %d | %d/%d batches | ms/batch (%s) | loss %f' % (epoch_number, i % (num_batches + 1), num_batches, time_since(batch_start), np.mean(loss_epoch))) batch_start = time.time() i += 1 # word_encoder.eval() # sent_encoder.eval() model.eval() print('-' * 89) val_loss, val_acc, precision, recall, f1, conf_matrix = check_loss_and_accuracy( val_grouped) print( '| val set result | valid loss (pure) {:5.4f} | Acc {:8.4f} | Precision {:8.4f} | Recall {:8.4f} ' '| F1-score {:8.4f}'.format(val_loss, val_acc, precision, recall, f1)) print('The confusion matrix is: ') print(str(conf_matrix)) print('-' * 89) test_loss, test_acc, precision, recall, f1, conf_matrix = check_loss_and_accuracy( test_grouped) print( '| test set result | valid loss (pure) {:5.4f} | Acc {:8.4f} | Precision {:8.4f} | Recall {:8.4f} ' '| F1-score {:8.4f}'.format(test_loss, test_acc, precision, recall, f1)) print('The confusion matrix is: ') print(str(conf_matrix)) print('-' * 89) directory = "./experiments/%s/models/" % config.exp_num if not os.path.exists(directory): os.makedirs(directory) if not best_val_loss or val_loss < best_val_loss: best_val_loss = val_loss else: # if loss doesn't go down, divide the learning rate by 5. for param_group in optimizer.param_groups: param_group['lr'] = param_group['lr'] * 0.2 if not best_acc or val_acc > best_acc: with open( directory + 'para_{}.best_acc.pt'.format(config.para_pooling), 'wb') as f: torch.save(model, f) best_acc = val_acc with open( directory + 'para_{}.epoch-{:02d}.pt'.format( config.para_pooling, epoch_number), 'wb') as f: torch.save(model, f) with open("./experiments/{}/optimizer.pt".format(config.exp_num), 'wb') as f: torch.save(optimizer.state_dict(), f)
def train(config): # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # fixme data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Save the instantiated dataset. with open('model_ckpt/train.dataset', 'wb') as dataset_file: pickle.dump(dataset, dataset_file) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, config.lstm_num_hidden, config.lstm_num_layers, device, config.dropout_keep_prob) # fixme # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() # reduction='mean'(default) - average over all timesteps and all batches as they are merged. optimizer = optim.RMSprop(model.parameters(), config.learning_rate) # fixme # optimizer = optim.Adam(model.parameters(), config.learning_rate) # Create a tensor to hold the one-hot encoding for the batch inputs. onehot_batch_inputs = torch.FloatTensor(config.seq_length, config.batch_size, dataset.vocab_size) onehot_batch_inputs = onehot_batch_inputs.to(device) h_init = torch.zeros(config.lstm_num_layers, config.batch_size, config.lstm_num_hidden, device=device) c_init = torch.zeros(config.lstm_num_layers, config.batch_size, config.lstm_num_hidden, device=device) # Record the learning rate steps individually for learning rate decay. lr_step = 0 lr = 1 for epoch in np.arange(config.epochs): losses = [] accs = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() ####################################################### # Add more code here ... ####################################################### model.train() # Convert the DataLoader output from list of tensors to tensors. batch_inputs = torch.stack(batch_inputs) batch_inputs = batch_inputs.to(device) # If the epoch is finished and there is not enough character to extract, break the loop if batch_inputs.shape[0] * batch_inputs.shape[1] != onehot_batch_inputs.shape[0] * onehot_batch_inputs.shape[1]: break # Zero the one-hot encoding and encode according to batch_inputs. onehot_batch_inputs.zero_() onehot_batch_inputs.scatter_(2, batch_inputs.unsqueeze_(-1), 1) # Convert the DataLoader output from list of tensors to tensors. batch_targets = torch.stack(batch_targets) batch_targets = batch_targets.to(device) # Learning rate decay. if lr_step % config.learning_rate_step == 0: optimizer = optim.RMSprop(model.parameters(), config.learning_rate * lr) lr *= config.learning_rate_decay optimizer.zero_grad() logits, _, _ = model(onehot_batch_inputs, h_init, c_init) # The seq_length dimension and batch_size dimension of the logits and batch_targets are merged together, and the mean is computed over this new dimension. loss = criterion(logits.view(-1, dataset.vocab_size), batch_targets.view(-1)) # fixme loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) accuracy = accuracy_fn(logits.view(-1, dataset.vocab_size), batch_targets.view(-1)) # fixme optimizer.step() losses.append(loss.item()) accs.append(accuracy) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % config.print_every == 0: print("[{}] Epoch {}/{}, Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), epoch + 1, config.epochs, step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss )) if step % config.sample_every == 0: # Generate some sentences by sampling from the model model.eval() # Create tensor to hold the generated samples. samples = torch.zeros((5, config.sample_length), dtype=torch.int, device=device) # Initialize the first characters for the samples. start_chars = torch.randint(dataset.vocab_size, size=(1, 5, 1), dtype=torch.long, device=device) samples[:, 0] = start_chars.squeeze() # Create a tensor to hold the one-hot encoding for the output characters of the LSTM network (one per each time step). onehot_chars = torch.zeros((1, 5, dataset.vocab_size), device=device) onehot_chars.scatter_(2, start_chars, 1) last_h = torch.zeros(config.lstm_num_layers, 5, config.lstm_num_hidden, device=device) last_c = torch.zeros(config.lstm_num_layers, 5, config.lstm_num_hidden, device=device) for t in np.arange(config.sample_length - 1): logits, last_h, last_c = model(onehot_chars, last_h, last_c) next_chars = logits.squeeze().argmax(-1) onehot_chars.zero_() onehot_chars.scatter_(2, next_chars.view(1, 5, 1), 1) samples[:, t + 1] = next_chars samples = samples.tolist() samples = [dataset.convert_to_string(sample) for sample in samples] # Output the samples into a text file. with open(config.summary_path + 'samples.txt', 'a') as txt_file: txt_file.write('Epoch: {}\nStep: {}\n'.format(epoch + 1, step)) txt_file.writelines(map(lambda x: x + '\n', samples)) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break lr_step += 1 # After each training epoch, save the model and the training loss and accuracy. model.train() torch.save(model.state_dict(), 'model_ckpt/lstm_gen_epoch{}.ckpt'.format(epoch + 1)) with open(config.summary_path + 'train_epoch{}.csv'.format(epoch + 1), 'w', newline='') as csv_file: csv_writer = csv.writer(csv_file) csv_writer.writerow(losses) csv_writer.writerow(accs) print('Done training.')
start = time.time() if args.model == 'lstmcnn': vocab_list = list( """abcdefghijklmnopqrstuvwxyzABSCEFGHIJKLMNOPQRSTUVWXYZ0123456789,;.!?:'\"/\\|_@#$%^&*~`+-=<>()[]{} """ ) else: vocab_list = list( """abcdefghijklmnopqrstuvwxyz0123456789,;.!?:'\"/\\|_@#$%^&*~`+-=<>()[]{} """ ) print('==> download dataset ' + args.dataset) download_dataset(args.data_path) print('==> make dataset') train_dataset = TextDataset(args.data_path, args.seq_length, vocab_list, is_train=True) test_dataset = TextDataset(args.data_path, args.seq_length, vocab_list, is_train=False) train_loader = data_utils.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) test_loader = data_utils.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) print('==> make model')
def train(config): def compute_accuracy(outputs, targets): """ Compute the accuracy of the predicitions. """ outputs = torch.argmax(outputs, -1) return (outputs == targets).float().mean() # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # fixme data_loader = DataLoader(dataset, config.batch_size, num_workers=4) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, config.lstm_num_hidden, config.lstm_num_layers, device, config.dropout_keep_prob).to(device) learning_rate = config.learning_rate # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() # fixme optimizer = optim.Adam(model.parameters(), learning_rate) # fixme x_onehot = torch.FloatTensor(config.seq_length, config.batch_size, dataset.vocab_size).to(device) y_onehot = torch.FloatTensor(config.seq_length, config.batch_size, dataset.vocab_size).to(device) # HACK: config.train_steps seems to be of type 'float' instead of 'int'. config.train_steps = int(config.train_steps) step = 0 loss_list = [] accuracy_list = [] while step < config.train_steps: for batch_inputs, batch_targets in data_loader: # Only for time measurement of step through network t1 = time.time() ####################################################### # Add more code here ... ####################################################### optimizer.zero_grad() batch_inputs = torch.stack(batch_inputs).to(device) batch_targets = torch.stack(batch_targets).to(device) # print(dataset.convert_to_string(batch_inputs.t()[0].cpu().numpy())) try: x_onehot.zero_() x_onehot.scatter_(2, batch_inputs.unsqueeze(-1), 1) except RuntimeError: continue y = model(x_onehot) loss = criterion(y.view(-1, dataset.vocab_size), batch_targets.view(-1)) loss.backward() optimizer.step() loss = loss.item() # fixme accuracy = compute_accuracy(y, batch_targets) # fixme # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) loss_list.append(loss) accuracy_list.append(accuracy) if step % config.learning_rate_step == 0: learning_rate = config.learning_rate_decay * learning_rate print(learning_rate) optimizer = optim.Adam(model.parameters(), learning_rate) if step % config.print_every == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, " "Examples/Sec = {:.2f}, Accuracy = {:.2f}, " "Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) # Save an image of loss and accuracy during training. plt.figure() plt.subplot(121) plt.plot(loss_list) plt.xlabel("Steps") plt.ylabel("Loss") plt.subplot(122) plt.plot(accuracy_list) plt.xlabel("Steps") plt.ylabel("Accuracy") plt.tight_layout() plt.savefig('loss.png') plt.close() if step % config.sample_every == 0: # Generate some sentences by sampling from the model inputs = sample_text(dataset, x_onehot) output = sample_text(dataset, y) sample = sample_text(dataset, model.sample()) for idx in range(5): print(f"{inputs[idx]} | {output[idx]} | {sample[idx]}") # Save some sampled sequences. with open('samples.csv', 'a') as file: for line in sample[:5]: file.write(f"{step};'{line}'\n") torch.save( { 'step': step + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(config.summary_path, f"model_{step}.pth.tar")) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this # bug report: https://github.com/pytorch/pytorch/pull/9655 break else: step += 1 print('Done training.')
def train(config): # Initialize the device which to run the model on use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") #path to save the model path = "results/" # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # print("Data file:", dataset._data[0:5]) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset, config.lstm_num_hidden, config.lstm_num_layers, device) # model = torch.load("results/book_EN_grimms_fairy_tails_final_model.pt") # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) # Store Accuracy and losses: results = {'accuracy': [], 'loss': []} # Training: total_steps = 0 while total_steps <= config.train_steps: for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() optimizer.zero_grad() # Stacking and One-hot encoding: batch_inputs = torch.stack(batch_inputs, dim=1).to(device) batch_targets = torch.stack(batch_targets, dim=1).to(device) # print("Inputs and targets:", x_onehot.size(), batch_targets.size()) # forward inputs to the model: pred_targets, _ = model.forward( index_to_onehot(batch_inputs, dataset.vocab_size)) # print("pred_targets trans shape:", pred_targets.transpose(2,1).size()) loss = criterion(pred_targets.transpose(2, 1), batch_targets) #Backward pass loss.backward(retain_graph=True) optimizer.step() #Accuracy # argmax along the vocab dimension accuracy = (pred_targets.argmax( dim=2) == batch_targets).float().mean().item() #Update the accuracy and losses for visualization: results['accuracy'].append(accuracy) results['loss'].append(loss.item()) # Just for time measurement t2 = time.time() # examples_per_second = config.batch_size/float(t2-t1) total_steps += 1 if step % config.print_every == 0: # print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " # "Accuracy = {:.2f}, Loss = {:.3f}".format( # datetime.now().strftime("%Y-%m-%d %H:%M"), step, # config.train_steps, config.batch_size, examples_per_second, # accuracy, loss # )) print("[{}] Train Step {:07d}/{:07d}, Batch Size = {}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, total_steps, config.batch_size, results['accuracy'][-1], results['loss'][-1])) if step % config.sample_every == 0: # Generate some sentences by sampling from the model print('GENERATED NO TEMP:') print(model.generate_sentence(100)) print('__________________') print('GENERATED 0.5 TEMP:') print(model.generate_sentence(100, 0.5)) print('__________________') print('GENERATED 1 TEMP:') print(model.generate_sentence(100, 1)) print('__________________') print('GENERATED 2 TEMP:') print(model.generate_sentence(100, 2)) # save model for individual timesteps torch.save( model, path + config.txt_file.split('/')[1].split('.')[0] + str(step) + "_model.pt") if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') #Save the final model torch.save( model, path + config.txt_file.split('/')[1].split('.')[0] + "_final_model.pt") print("saving results in folder...") np.save(path + "loss_train", results['loss']) np.save(path + "accuracy_train", results['accuracy'])
def train(config, lr): # Initialize the device which to run the model on #device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # fixme data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel( batch_size=config.batch_size, seq_length=config.seq_length, vocabulary_size=dataset.vocab_size, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers) # fixme if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' print('Currently using: ', device) model = model.to(device) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() # fixme #optimizer = torch.optim.Adam(model.parameters(), lr = config.learning_rate, amsgrad = True) # fixme #optimizer = torch.optim.Adam(model.parameters(), lr = lr, amsgrad = True) acc_list = [] loss_list = [] test_batches_in = [] test_batches_ta = [] test_acc = [] best_accuracy = 0 ### Flag for temperature temp = True temp_value = 2 for runs in range(3): optimizer = torch.optim.RMSprop(model.parameters(), lr=lr) for step, (batch_inputs, batch_targets) in enumerate(data_loader): if step % config.print_every != 0 or step == 0: t1 = time.time() #print(type(step)) #model.train() ####################################################### torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) zerox = create_zerox(batch_inputs, dataset.vocab_size, device) output, _ = model.forward(zerox) #.to(device) targets = torch.stack(batch_targets).to(device) output_indices = torch.argmax(output, dim=2).to(device) output = output.transpose(0, 1).transpose(1, 2).to(device) #print(output.shape, targets.shape) #return 'a' #print(output.transpose(0,2).shape, targets.t().shape) #return 'a' loss_for_backward = criterion(output.transpose(0, 2), targets.t()).to(device) optimizer.zero_grad() loss_for_backward.backward() optimizer.step() correct_indices = output_indices == targets.transpose( 0, 1).to(device) #return correct_indices ####################################################### #loss = criterion.forward(output, targets) #accuracy = int(sum(sum(correct_indices)))/int(correct_indices.shape[0]* #correct_indices.shape[1]) #print(type(accuracy),type(loss)) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.print_every == 0 and step != 0: #model.eval() zerox = create_zerox(batch_inputs, dataset.vocab_size, device) output, _ = model.forward(zerox) output_indices = torch.argmax(output, dim=2).to(device) output = output.transpose(0, 1).transpose(1, 2).to(device) targets = torch.stack(batch_targets).to(device) #loss_for_backward = criterion(output,targets).to(device) loss_for_backward = criterion(output.transpose(0, 2), targets.t()).to(device) correct_indices = output_indices == targets.transpose( 0, 1) #.to(device) #return output_indices, targets.transpose(0,1) #print(correct_indices.shape) #accuracy = sum(acc_list) / len(acc_list) #accuracy = int(sum(sum(correct_indices)))/int(correct_indices.numel()) accuracy = np.array(correct_indices.detach().cpu()).mean() #print("[{}] Train Step {:04d}/{:f}, Batch Size = {}, Examples/Sec = {:.2f}, " # "Accuracy = {:.2f}, Loss = {:.3f}".format( # datetime.now().strftime("%Y-%m-%d %H:%M"), step, # config.train_steps, config.batch_size, examples_per_second, # accuracy, # loss_for_backward #)) acc_list.append(accuracy) loss_list.append(float(loss_for_backward)) if accuracy > best_accuracy: torch.save( { 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, 'model.pth') if step % config.sample_every == 0: # Generate some sentences by sampling from the model ## Generate a good sample instead of the same one over and over again #model.eval() ### Append every modulo batch to a list of test batches and run ### over that list to test zerox = create_zerox(batch_inputs, dataset.vocab_size, device) test_batches_in.append(zerox) targets = torch.stack(batch_targets).to(device) test_batches_ta.append(targets) batch_inputz = torch.stack(batch_inputs).to(device) batch_input = batch_inputz.transpose(1, 0).to(device) output, _ = model.forward(zerox) #.to(device) output_indices = torch.argmax(output, dim=2).to(device) output = output.transpose(0, 1).transpose(1, 2).to(device) loss_for_backward = criterion(output, targets).to(device) correct_indices = output_indices == targets.transpose( 0, 1).to(device) best_sample = np.argmax( np.asarray(sum(correct_indices.t().detach().cpu()))) print( 'Real: ', dataset.convert_to_string( np.asarray(batch_input[best_sample].cpu()))) output, _ = model.forward(zerox) #.to(device) output_indices = torch.argmax(output, dim=2).to(device) print( 'prediction: ', dataset.convert_to_string( np.asarray(output_indices[best_sample].cpu()))) bc = int(sum(correct_indices.t().detach().cpu()) [best_sample]) / config.seq_length print('This sample had:', bc, 'characters right') output = np.random.randint(dataset.vocab_size) letters = [output] greedy_output = np.random.randint(dataset.vocab_size) greedy_letters = [greedy_output] Temperature_time(runs, step, dataset, device, model) for i in range(config.seq_length - 1): #if temp: # ============================================================================= # # soft = torch.nn.Softmax(dim=2) # # # # # zerol = torch.zeros([1,1,dataset.vocab_size]) # one_hot_letter = torch.tensor(output).unsqueeze(-1).unsqueeze(-1).unsqueeze(-1) # zerol.scatter_(2,one_hot_letter,1) # zerol = zerol.to(device) # if i == 0: # output, h = model.forward(zerol) # # else: # output, h = model.forward(zerol, h) # # tempered = soft(output/temp_value) # #print(tempered) # output = int(torch.multinomial(tempered[0][0],1).detach().cpu()) # #print(output) # letters.append(output) # ============================================================================= greedy_zerol = torch.zeros([1, 1, dataset.vocab_size]) greedy_one_hot_letter = torch.tensor( greedy_output).unsqueeze(-1).unsqueeze(-1).unsqueeze( -1) greedy_zerol.scatter_(2, greedy_one_hot_letter, 1) greedy_zerol = greedy_zerol.to(device) if i == 0: greedy_output, greedy_h = model.forward(greedy_zerol) else: greedy_output, greedy_h = model.forward( greedy_zerol, greedy_h) greedy_output = int( torch.argmax(greedy_output, dim=2).detach().cpu()) greedy_letters.append(greedy_output) print('Greedy Generation ', dataset.convert_to_string(greedy_letters)) abs_step = (runs * 10000) + step line = ' '.join(('Step:', str(abs_step), dataset.convert_to_string(letters))) with open('GreedyGeneration.txt', 'a') as file: file.write(line + '\n') # ============================================================================= # if step % (config.sample_every*1000) ==0: # avg = [] # print('Testing over ', len(test_batches_in), 'batches') # for z in range(len(test_batches_in)): # ##OUTPUT # output,_ = model.forward(test_batches_in[z]) # output_indices = torch.argmax(output, dim=2).to(device) # output = output.transpose(0,1).transpose(1,2).to(device) # # ##LOSS AND ACCURACY # loss_for_backward = criterion(output,targets).to(device) # correct_indices = output_indices == test_batches_ta[z].transpose(0,1).to(device) # # accuracy = int(sum(sum(correct_indices)))/int(correct_indices.shape[0]* # correct_indices.shape[1]) # # avg.append(accuracy) # # this_test_acc = sum(avg)/len(avg) # print('The test accuracy over ',len(test_batches_in), 'is: ', this_test_acc) # test_acc.append(this_test_acc) # #if bc > 0.8: # # print(bc) # # #return correct_indices # # ============================================================================= if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') line = ' '.join( ('Test accuracy:', str(test_acc.append), 'Learning rate:', str(lr), 'Accuracy:', str(acc_list), 'Loss:', str(loss_list))) with open('textresults.txt', 'a') as file: file.write(line + '\n') #hiddenstates = [None]*30 output = np.random.randint(dataset.vocab_size) letters = [output] for i in range(400): zerol = torch.zeros([1, 1, dataset.vocab_size]) one_hot_letter = torch.tensor(output).unsqueeze(-1).unsqueeze( -1).unsqueeze(-1) zerol.scatter_(2, one_hot_letter, 1) zerol = zerol.to(device) if i == 0: output, h = model.forward(zerol) output = int(torch.argmax(output, dim=2).detach().cpu()) letters.append(output) #hiddenstates[i] = h else: output, h = model.forward(zerol, h) output = int(torch.argmax(output, dim=2).detach().cpu()) letters.append(output) #hiddenstates[i % 30] = h print('Final generation: ', dataset.convert_to_string(letters)) line = ' '.join(('Accuracy:', str(acc_list), 'Loss', str(loss_list))) with open('PrideAndPrejudice2.txt', 'a') as file: file.write(line + '\n')
model_config = yaml.load(open(args.model_config, "r"), Loader=yaml.FullLoader) train_config = yaml.load(open(args.train_config, "r"), Loader=yaml.FullLoader) configs = (preprocess_config, model_config, train_config) # Get model model = get_model(args, configs, device, train=False) # Load vocoder vocoder = get_vocoder(model_config, device) # Preprocess texts if args.mode == "batch": # Get dataset dataset = TextDataset(args.source, preprocess_config) batchs = DataLoader( dataset, batch_size=8, collate_fn=dataset.collate_fn, ) if args.mode == "single": ids = raw_texts = [args.text[:100]] speakers = np.array([args.speaker_id]) if preprocess_config["preprocessing"]["text"]["language"] == "en": texts = np.array( [preprocess_english(args.text, preprocess_config)]) text_lens = np.array([len(texts[0])]) batchs = [(ids, raw_texts, speakers, texts, text_lens, max(text_lens))] synthesize(model, args.restore_step, configs, vocoder, batchs)