def deep_rnn_model(input_dim, units, recur_layers, output_dim=29): """ Build a deep recurrent network for speech """ return M.RNNModel(bd_merge=None, rnn_type=M.RNNType.LSTM, rnn_layers=recur_layers, rnn_units=units).model(input_shape=(None, input_dim), output_dim=output_dim)
def rnn_model(input_dim, units, activation, output_dim=29): """ Build a recurrent network for speech """ return M.RNNModel(bd_merge=None, rnn_type=M.RNNType.LSTM, rnn_units=units, activation=activation).model(input_shape=(None, input_dim), output_dim=output_dim)
def cnn_rnn_model(input_dim, filters, kernel_size, conv_stride, conv_border_mode, units, output_dim=29): """ Build a recurrent + convolutional network for speech """ return M.RNNModel(cnn_config=M.CNNConfig(filters=filters, kernel_size=kernel_size, conv_stride=conv_stride, conv_border_mode=conv_border_mode), bd_merge=None, rnn_type=M.RNNType.LSTM, rnn_units=units).model(input_shape=(None, input_dim), output_dim=output_dim)
def final_model(): """ Build a deep network for speech """ return M.RNNModel(cnn_config=M.CNNConfig(kernel_size=3, conv_stride=1, conv_border_mode="same", cnn_layers=12, cnn_dropout_rate=0.25, cnn_activation_before_bn_do=True, cnn_do_bn_order=True), bd_merge=M.BidirectionalMerge.concat, rnn_type=M.RNNType.GRU, rnn_dense=True, rnn_units=250, rnn_layers=4, rnn_dropout_rate=0.2, dropout_rate=0.3, name_suffix="Final").model(input_shape=(None, 26), output_dim=29)
def main(): preprocessing = PreProcessing() rnn_model = models.RNNModel() if config.char_or_word == config.character_model: data = None if config.data_type == "cmu_dict": cmu_data = datasets.getCMUDictData(config.data_src_cmu) data = cmu_data preprocessing.loadDataCharacter(data=data) else: preprocessing.loadData() preprocessing.prepareLMdata() # get model params = {} params['embeddings_dim'] = config.embeddings_dim params['lstm_cell_size'] = config.lstm_cell_size if config.char_or_word == config.character_model: params['vocab_size'] = preprocessing.vocab_size else: params['vocab_size'] = len(preprocessing.word_index) params['inp_length'] = config.inp_length - 1 model = rnn_model.getModel(params) x_train, y_train, x_val, y_val, x_test, y_test = preprocessing.x_train, preprocessing.y_train, preprocessing.x_val, preprocessing.y_val, preprocessing.x_test, preprocessing.y_test # train checkpointer = ModelCheckpoint( filepath="./checkpoints/weights.{epoch:02d}-{val_loss:.2f}.hdf5", verbose=1, save_best_only=True) model.fit(x_train, y_train, validation_data=(x_val, y_val), nb_epoch=config.num_epochs, batch_size=config.batch_size, callbacks=[checkpointer]) #config.num_epochs saveEmbeddings(model, preprocessing.word_index) #evaluate scores = model.evaluate(x_test, y_test, verbose=0) print("Accuracy: %.2f%%" % (scores[1] * 100)) print "--- Sampling few sequences.. " for i in range(5): pred = utilities.generateSentence(model, preprocessing.word_index, preprocessing.sent_start, preprocessing.sent_end, preprocessing.unknown_word) sent = [preprocessing.index_word[i] for i in pred] if config.char_or_word == config.character_model: print ''.join(sent) else: print ' '.join(sent)
def main(): ############################################################################### # Load command line options. ############################################################################### opts = options() # Set the random seed manually for reproducibility. torch.manual_seed(opts.seed) if opts.temperature < 1e-3: parser.error("--temperature has to be greater or equal 1e-3") ############################################################################### # Load the dictionary ############################################################################### with open(opts.dict, "rb") as f: dictionary = pickle.load(f) ############################################################################### # Build a model ############################################################################### with open(opts.load + ".params", 'rb') as f: params = pickle.load(f) # Model check if params["direction"] == "both": print( "WARNING: Bidirectional language model is not supproted by this generator." ) assert (False) model = models.RNNModel(params) model.load_state_dict(torch.load(opts.load + ".pt")) if torch.cuda.is_available(): if not opts.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) device = torch.device("cuda" if opts.cuda else "cpu") model.to(device) model.eval() generate(opts, params, dictionary, model, device)
def predict(testdata, path_to_weights, vocab_size, n_classes): ''' Creates, loads and initiates the model and making predictions on the test data Input: testdata - data loader of the test data (type: DataLoader) path_to_weights - relative path and file name of the saved model weights with .pth extension (type:string) vocab_size - size of the vocabulary (type: int) n_classes - number of labels/classes that can be predicted (type: int) Output: preds_prob_list - list of all the probabilities of which the model predicted the corresponding label (type: list of floats) preds_status_list - list of all the reencoded labels that were predicted (type: list of strings) ''' rnn_params = train.rnn_params model = models.RNNModel(rnn_type=rnn_params.rnn_type, nr_layers=rnn_params.nr_layers, voc_size=vocab_size, emb_dim=rnn_params.emb_dim, rnn_size=rnn_params.rnn_size, dropout=rnn_params.dropout, n_classes=n_classes) models.ModelUtils.load_model(path_to_weights, model) model.to(rnn_params.device) batch_size = 1 h = model.init_hidden(batch_size, device=rnn_params.device) model.zero_grad() preds_prob_list, preds_status_list = [], [] for x_test in testdata: x_test = x_test.to(train.rnn_params.device) h = tuple([each.data for each in h]) out, h = model(x_test, h) pred = torch.round(out.squeeze()).item() pred_status = "depressive" if pred < 0.5 else "non-depressive" prob = (1 - pred) if pred_status == "depressive" else pred preds_status_list.append(pred_status) preds_prob_list.append(prob) return preds_prob_list, preds_status_list
def data_model(args): """ Load data and model """ train_data, val_data, test_data, corpus = data.get_data(args) print("Dataset: {}".format(args.dataset)) print("Dataset path: {}".format(args.data)) print("Dataset stats:") print("Train samples/tokens: {}".format(len(corpus.train))) print("Dev samples/tokens: {}".format(len(corpus.valid))) print("Test samples/tokens: {}".format(len(corpus.valid))) print("Vocabulary size: {}".format(len(corpus.dictionary.idx2word))) # Build or load the model ntokens = len(corpus.dictionary) model = models.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied).to(device) if args.load_model: model = torch.load(args.saved_model_path) print("Loaded saved model from: {}".format(args.saved_model_path)) return train_data, val_data, test_data, corpus, model
def __init__(self,args): option, checkpoint_fname, action = args rnn_model = models.RNNModel() preprocessing = PreProcessing() if config.char_or_word == config.character_model: data=None if config.data_type=="cmu_dict": cmu_data = datasets.getCMUDictData(config.data_src_cmu) data=cmu_data preprocessing.loadDataCharacter(data=data) else: preprocessing.loadData() preprocessing.prepareLMdata() self.preprocessing=preprocessing # get model params = {} params['embeddings_dim'] = config.embeddings_dim params['lstm_cell_size'] = config.lstm_cell_size if config.char_or_word == config.character_model: params['vocab_size'] = preprocessing.vocab_size else: params['vocab_size'] = len( preprocessing.word_index ) params['inp_length'] = config.inp_length-1 model = rnn_model.getModel(params) if option=="train": x_train, y_train, x_val, y_val, x_test, y_test = preprocessing.x_train, preprocessing.y_train, preprocessing.x_val, preprocessing.y_val, preprocessing.x_test, preprocessing.y_test # train checkpointer = ModelCheckpoint(filepath="./checkpoints/weights.{epoch:02d}-{val_loss:.2f}.hdf5", verbose=1, save_best_only=True) model.fit(x_train, y_train, validation_data=(x_val, y_val), nb_epoch=config.num_epochs, batch_size=config.batch_size, callbacks=[checkpointer]) #config.num_epochs #evaluate scores = model.evaluate(x_test, y_test, verbose=0) print("Accuracy: %.2f%%" % (scores[1]*100)) #Sample sequences print "--- Sampling few sequences.. " for i in range(5): pred = utilities.generateSentence(model, preprocessing.word_index, preprocessing.sent_start, preprocessing.sent_end, preprocessing.unknown_word) sent = [preprocessing.index_word[i] for i in pred] if config.char_or_word==config.character_model: print ''.join(sent) else: print ' '.join(sent) else: model.load_weights(checkpoint_fname) try: cache=pickle.load( open('lm_cache','r') ) print "Loaded cache" except: cache={} print "cache not found. Starting with empty cache" if 'cache_clean' in args: self.cache={} else: self.cache=cache self.model = model #Action if action=="save_embeddings": saveEmbeddings(model, preprocessing.word_index) else: pass
def train_language_model(CFG, train_nums, test_nums, valid_nums, num_tokens): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f'Using device: {device}') if CFG.seed is not None: torch.manual_seed(CFG.seed) print(f'using seed {CFG.seed}') train_data = batchify(train_nums, CFG.batch_size, device=device) val_data = batchify(valid_nums, bsz=1, device=device) test_data = batchify(test_nums, bsz=1, device=device) if CFG.model == 'Transformer': model = models.TransformerModel(num_tokens, CFG.emb_size, CFG.nhead, CFG.n_hid, CFG.n_layers, CFG.dropout_p).to(device) else: model = models.RNNModel(CFG.model, num_tokens, CFG.emb_size, CFG.n_hid, CFG.n_layers, CFG.dropout_p, CFG.tied).to(device) criterion = nn.CrossEntropyLoss() lr = CFG.lr best_val_loss = None # At any point you can hit Ctrl + C to break out of training early. try: for epoch in range(1, CFG.epochs + 1): epoch_start_time = time.time() train_epoch(epoch, CFG, model, num_tokens, train_data, criterion, lr) val_loss = evaluate(CFG, model, num_tokens, criterion, val_data) _saved = False # Save the model if the validation loss is the best we've seen so far. if not best_val_loss or val_loss < best_val_loss: with open(CFG.save_path, 'wb') as f: torch.save(model, f) best_val_loss = val_loss _saved = True else: # Anneal the learning rate if no improvement has been seen in the validation dataset. lr = 0.9 * lr if epoch % 20 == 0: print('-' * 120) _s = '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:8.5f} | valid ppl {:10.5f}'.format( epoch, (time.time() - epoch_start_time), val_loss, np.exp(val_loss)) if _saved: _s += ' | * saved best model' print(_s) print('-' * 120) except KeyboardInterrupt: print('-' * 89) print('Exiting from training early') # Load the best saved model. with open(CFG.save_path, 'rb') as f: model = torch.load(f) # after load the rnn params are not a continuous chunk of memory # this makes them a continuous chunk, and will speed up forward pass # Currently, only rnn model supports flatten_parameters function. if CFG.model in ['RNN_TANH', 'RNN_RELU', 'LSTM', 'GRU']: model.rnn.flatten_parameters() # Run on test data. test_loss = evaluate(CFG, model, num_tokens, criterion, test_data) print('=' * 89) print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format( test_loss, np.exp(test_loss))) print('=' * 89)
def bidirectional_rnn_model(input_dim, units, output_dim=29): """ Build a bidirectional recurrent network for speech """ return M.RNNModel(bd_merge=M.BidirectionalMerge.concat, rnn_type=M.RNNType.LSTM, rnn_units=units).model(input_shape=(None, input_dim), output_dim=output_dim)
# Build a matrix of size num_batch * args.bsz containing the index of observation. np.random.seed(args.seed) index = data.subsample_index(train_data[1], args.bptt, args.nsample) train_batch = data.batch_index(index, args.bsz) valid_batch = data.batch_index(np.arange(args.bptt - 1, len(valid_data[1])), args.bsz) test_batch = data.batch_index(np.arange(args.bptt - 1, len(test_data[1])), args.bsz) classes = ['Downward', 'Stationary', 'Upward'] ############################################################################### # Build the model ############################################################################### model = models.RNNModel(args.model, args.ninp, args.ntag, args.nhid, args.nlayers, args.dropout).to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr) ############################################################################### # Training code ############################################################################### def repackage_hidden(h): """Wraps hidden states in new Variables, to detach them from their history.""" if isinstance(h, torch.Tensor): return h.detach() else: return tuple(repackage_hidden(v) for v in h)
def simple_rnn_model(input_dim, output_dim=29): """ Build a recurrent network for speech """ return M.RNNModel(bd_merge=None, rnn_type=M.RNNType.SimpleRNN, time_distributed_dense=False).model(input_shape=(None, input_dim), output_dim=output_dim)
def run( batch_size: int, bptt: int, clip: float, cuda: bool, data: Path, dry_run: bool, em_size: int, epochs: int, log_interval: int, model: str, n_heads: int, report: callable, save: Path, lr: float, seed: int, tied: bool, warmup: int, load: Optional[Path] = None, onnx_export: Optional[Path] = None, **kwargs, ): # Set the random seed manually for reproducibility. torch.manual_seed(seed) cuda = cuda and torch.cuda.is_available() device = torch.device("cuda" if cuda else "cpu") print("Running with device:", device) ############################################################################### # Load data ############################################################################### eval_batch_size = 10 if data.name == "debug.npz": if not data.exists(): DebugDataset.generate(data, seed=seed, n_seq=10000, seq_len=bptt, n_tokens=10, p=0.8) dataset = DebugDataset(data, device) assert bptt == dataset.bptt ntokens = dataset.n_tokens + 1 n_seq = len(dataset) size_valid = int(n_seq * 0.2) size_test = int(n_seq * 0.1) train_data, val_data, test_data = torch.utils.data.random_split( dataset, [n_seq - size_test - size_valid, size_valid, size_test]) else: corpus = Corpus(data) train_data = LMDataset(corpus.train, bptt, batch_size=batch_size, device=device) # [104431, 20] val_data = LMDataset(corpus.valid, bptt, batch_size=batch_size, device=device) # [21764, 10] test_data = LMDataset(corpus.test, bptt, batch_size=batch_size, device=device) # [24556, 10] ntokens = len(corpus.dictionary) train_data = DataLoader(train_data, batch_size=batch_size, shuffle=True) val_data = DataLoader(val_data, batch_size=batch_size, shuffle=True) test_data = DataLoader(test_data, batch_size=batch_size, shuffle=True) ############################################################################### # Build the model ############################################################################### em_size = (em_size // n_heads) * n_heads kwargs.update(n_tokens=ntokens, em_size=em_size) recurrent = False if model == "transformer": model = models.TransformerModel(n_head=n_heads, **kwargs).to(device) elif model == "ours": model = ours.TransformerModel(n_head=n_heads, **kwargs).to(device) else: model = models.RNNModel(model, tied, **kwargs).to(device) recurrent = True if load is not None: with load.open("rb") as f: model.load_state_dict(torch.load(f)) # after load the rnn params are not a continuous chunk of memory # this makes them a continuous chunk, and will speed up forward pass # Currently, only rnn model supports flatten_parameters function. if recurrent: model.rnn.flatten_parameters() ############################################################################### # Training code ############################################################################### def evaluate(data_source): # Turn on evaluation mode which disables dropout. model.eval() hidden = model.init_hidden(eval_batch_size) if recurrent else None with torch.no_grad(): for (inputs, targets) in data_source: targets = targets.flatten() if hidden is None: output = model(inputs) output = output.reshape(-1, ntokens) else: output, hidden = model(inputs, hidden) hidden = repackage_hidden(hidden) yield len(inputs) * criterion(output, targets).item() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=lr) scheduler = (None if warmup is None else optim.lr_scheduler.LambdaLR( optimizer, lambda e: em_size**(-0.5) * min(e**(-0.5), e * warmup**(-1.5)), )) def train(): # Turn on training mode which enables dropout. model.train() hidden = model.init_hidden(batch_size) if recurrent else None for i, (inputs, targets) in enumerate(train_data): targets = targets.flatten() optimizer.zero_grad() # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. model.zero_grad() if hidden is None: outputs = model(inputs) outputs = outputs.reshape(-1, ntokens) else: hidden = repackage_hidden(hidden) outputs, hidden = model(inputs, hidden) is_accurate = outputs.max(-1).indices == targets assert isinstance(is_accurate, torch.Tensor) accuracy = torch.mean(is_accurate.float()) loss = criterion(outputs, targets) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), clip) optimizer.step() if scheduler is not None: scheduler.step() logs = dict(epoch=epoch, batches=i) if scheduler is not None: logs.update(lr=scheduler.get_lr()) means = dict(accuracy=accuracy.item(), loss=loss.item()) writes = dict(inputs=inputs[0], outputs=outputs[0], targets=targets[0]) yield logs, means, writes if dry_run: break def export_onnx(path, bsz, seq_len): print("The model is also exported in ONNX format at {}".format( onnx_export.absolute())) model.eval() dummy_input = torch.LongTensor(seq_len * bsz).zero_().view( -1, bsz).to(device) hidden = model.init_hidden(bsz) torch.onnx.export(model, (dummy_input, hidden), str(path)) # Loop over epochs. best_val_loss = None # At any point you can hit Ctrl + C to break out of training early. try: for epoch in range(1, epochs + 1): aggregator = MeanAggregator() for batch, (to_log, to_mean, to_write) in enumerate(train()): aggregator.update(**to_mean) if batch % log_interval == 0 and batch > 0: report(**to_log, **dict(aggregator.items())) aggregator = MeanAggregator() val_loss = np.mean(list(evaluate(val_data))) report(val_loss=val_loss) if not best_val_loss or val_loss < best_val_loss: with save.open("wb") as f: torch.save(model.state_dict(), f) best_val_loss = val_loss except KeyboardInterrupt: print("-" * 89) print("Exiting from training early") # Load the best saved model. with save.open("rb") as f: model.load_state_dict(torch.load(f)) # after load the rnn params are not a continuous chunk of memory # this makes them a continuous chunk, and will speed up forward pass # Currently, only rnn model supports flatten_parameters function. if recurrent: model.rnn.flatten_parameters() # Run on test data. test_loss = np.mean(list(evaluate(test_data))) report(test_loss=test_loss, test_ppl=math.exp(test_loss)) if onnx_export: # Export the model in ONNX format. export_onnx(onnx_export, bsz=1, seq_len=bptt)
if args.output_file == 'gospel_output.txt': ''' corpus = data.Corpus(train_path='../../data/version2/gospel_dataset_single_file_v2_train.txt', test_path='../../data/version2/gospel_dataset_single_file_v2_test.txt', valid_path='../../data/version2/gospel_dataset_single_file_v2_valid.txt') ntokens = len(corpus.dictionary) filehandler = open('gospel_corpus.txt', 'wb') pickle.dump(corpus, filehandler) ''' filehandler = open('gospel_corpus.txt', 'rb') corpus = pickle.load(filehandler) print('corpus loaded') ntokens = len(corpus.dictionary) model = models.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied) model.load_state_dict(torch.load('gospel_lm_v2.pt')) elif args.output_file == 'hiphop_output.txt': ''' corpus = data.Corpus(train_path='../../data/version2/hip_hop_dataset_single_file_v2_train.txt', test_path='../../data/version2/hip_hop_dataset_single_file_v2_test.txt', valid_path='../../data/version2/hip_hop_dataset_single_file_v2_valid.txt') ntokens = len(corpus.dictionary) ''' filehandler = open('hip_hop_corpus.txt', 'rb') corpus = pickle.load(filehandler) print('corpus loaded') ntokens = len(corpus.dictionary) model = models.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied) model.load_state_dict(torch.load('hiphop_lm_v2.pt'))
def main(): ############################################################################### # Load command line options. ############################################################################### opts = options() # Set the random seed manually for reproducibility. torch.manual_seed(opts.seed) ############################################################################### # Load data ############################################################################### corpus = data.Corpus(opts) if opts.pretrain == "": corpus.make_dict(opts.data) else: corpus.load_dict() corpus.load_data(opts.data) with open(opts.dict, mode='wb') as f: pickle.dump(corpus.dictionary, f) ############################################################################### # Build a model ############################################################################### if opts.pretrain == "": # convert to parameters params = models.opts2params(opts, corpus.dictionary) # construct model model = models.RNNModel(params) # For fine-tuning else: # load parameters with open(opts.pretrain + ".params", 'rb') as f: params = pickle.load(f) # construct model model = models.RNNModel(params) # load pretraind model model.load_state_dict(torch.load(opts.pretrain + ".pt")) model.freeze_emb() # save parameters with open(opts.save + ".params", mode='wb') as f: pickle.dump(params, f) if torch.cuda.is_available(): if not opts.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) else: if opts.cuda: print("Error: No CUDA device. Remove the option --cuda") device = torch.device("cuda" if opts.cuda else "cpu") model = model.to(device) # loss function (ignore padding id) criterion = nn.CrossEntropyLoss(ignore_index=corpus.dictionary.pad_id()) ############################################################################### # Train the model ############################################################################### # Loop over epochs. lr = opts.lr best_val_loss = None # Select an optimizer try: optimizer = getattr(torch.optim, opts.optim_type)(model.parameters(), lr=lr) except: raise ValueError( """An invalid option for `--optim_type` was supplied.""") # At any point you can hit Ctrl + C to break out of training early. try: for epoch in range(1, opts.epochs + 1): train(opts, device, corpus, model, criterion, optimizer, lr, epoch) val_loss = evaluate(opts, device, corpus, model, criterion, epoch) # Save the model if the validation loss is the best we've seen so far. if not best_val_loss or val_loss < best_val_loss: torch.save(model.state_dict(), opts.save + ".pt") best_val_loss = val_loss else: # Anneal the learning rate if no improvement has been seen in the validation dataset. lr /= 4.0 optimizer.lr = lr except KeyboardInterrupt: print('-' * 89) print('Exiting from training early')
def main(): ############################################################################### # Load command line options. ############################################################################### opts = options() # Set the random seed manually for reproducibility. torch.manual_seed(opts.seed) ############################################################################### # Build a model ############################################################################### with open(opts.load + ".params", 'rb') as f: params = pickle.load(f) model = models.RNNModel(params) model.load_state_dict(torch.load(opts.load + ".pt")) if torch.cuda.is_available(): if not opts.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) device = torch.device("cuda" if opts.cuda else "cpu") model.to(device) model.eval() ############################################################################### # Load dictionary ############################################################################### corpus = data.Corpus(opts) corpus.load_dict() criterion = nn.CrossEntropyLoss(reduction="none", ignore_index=corpus.dictionary.pad_id()) ############################################################################### # Run as a server ############################################################################### if opts.server: app = FastAPI() @app.post('/lm', response_model=List[Score], description="get several scores with POST method") def predict(req: InSentences): print(req) stream = [] for sent in req.sentences: seq = ["<s>"] + sent.split(" ") + ["</s>"] stream.append(corpus.sent2ids(seq)) return [ Score(lm_score=(ppl)) for ppl in evaluate( opts, corpus, stream, model, criterion, device) ] logzero.loglevel(10) # log_level = DEBUG uvicorn.run(app, host=opts.host, port=opts.port, workers=1, logger=logger, debug=True) ############################################################################### # Calculates perplexities for sentences in the input file ############################################################################### else: input_texts = corpus.tokenize(opts.input_text) with open(opts.outf, 'w') as f_out: for ppl in evaluate(opts, corpus, input_texts, model, criterion, device): f_out.write(str(ppl) + "\n")
def main(): ############################################################################### # Load command line options. ############################################################################### global verbose, opts opts = options() # Set the random seed manually for reproducibility. torch.manual_seed(opts.seed) hvd.init() if opts.cuda: # Horovod: pin GPU to local rank. torch.cuda.set_device(hvd.local_rank()) #torch.cuda.manual_seed(opts.seed) cudnn.benchmark = True # Horovod: print logs on the first worker. verbose = 1 if hvd.rank() == 0 else 0 if opts.lms == True: torch.cuda.set_enabled_lms(True) if verbose == True: print('LMS is enabled') # If set > 0, will resume training from a given checkpoint. resume_from_epoch = 0 for try_epoch in range(opts.epochs, 0, -1): filepath = opts.save + "checkpoint-" + str(try_epoch) + ".pth.tar" if os.path.exists(filepath): resume_from_epoch = try_epoch break # Horovod: broadcast resume_from_epoch from rank 0 (which will have # checkpoints) to other ranks. resume_from_epoch = hvd.broadcast(torch.tensor(resume_from_epoch), root_rank=0, name='resume_from_epoch').item() ############################################################################### # Load data ############################################################################### corpus = data.Corpus(opts) if opts.pretrain == "": corpus.make_dict(opts.data) else: corpus.load_dict() corpus.load_data(opts.data) with open(opts.dict, mode='wb') as f: pickle.dump(corpus.dictionary, f) ############################################################################### # Build a model ############################################################################### if opts.pretrain == "": # convert to parameters params = models.opts2params(opts, corpus.dictionary) # construct model model = models.RNNModel(params) # For fine-tuning else: # load parameters with open(opts.pretrain + ".params", 'rb') as f: params = pickle.load(f) # construct model model = models.RNNModel(params) # load pretraind model model.load_state_dict(torch.load(opts.pretrain + ".pt")) model.freeze_emb() # save parameters #with open(opts.save + ".params", mode='wb') as f: # pickle.dump(params, f) save_params(params, opts.save) if torch.cuda.is_available(): if not opts.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) else: if opts.cuda: print("Error: No CUDA device. Remove the option --cuda") device = torch.device("cuda" if opts.cuda else "cpu") model = model.to(device) # loss function (ignore padding id) criterion = nn.CrossEntropyLoss(ignore_index=corpus.dictionary.pad_id()) ############################################################################### # Train the model ############################################################################### # Loop over epochs. lr = opts.lr best_val_loss = None # Select an optimizer try: optimizer = getattr(torch.optim, opts.optim_type)(model.parameters(), lr=lr) except: raise ValueError( """An invalid option for `--optim_type` was supplied.""") # Horovod: (optional) compression algorithm. compression = hvd.Compression.fp16 if opts.fp16_allreduce else hvd.Compression.none # Horovod: wrap optimizer with DistributedOptimizer. try: optimizer = hvd.DistributedOptimizer( optimizer, named_parameters=model.named_parameters(), compression=compression, backward_passes_per_step=opts.batches_per_allreduce, op=hvd.Adasum if opts.use_adasum else hvd.Average) except: optimizer = hvd.DistributedOptimizer( optimizer, named_parameters=model.named_parameters(), compression=compression, backward_passes_per_step=opts.batches_per_allreduce) # Restore from a previous checkpoint, if initial_epoch is specified. # Horovod: restore on the first worker which will broadcast weights to other workers. if (resume_from_epoch > 0) and (hvd.rank() == 0): filepath = opts.save + "checkpoint-" + str( resume_from_epoch) + ".pth.tar" #filepath = args.checkpoint_format.format(epoch=resume_from_epoch) checkpoint = torch.load(filepath) model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) # Horovod: broadcast parameters & optimizer state. hvd.broadcast_parameters(model.state_dict(), root_rank=0) hvd.broadcast_optimizer_state(optimizer, root_rank=0) # At any point you can hit Ctrl + C to break out of training early. try: for epoch in range(resume_from_epoch, opts.epochs): train(opts, device, corpus, model, criterion, optimizer, lr, epoch) val_loss = evaluate(opts, device, corpus, model, criterion, epoch) save_checkpoint(model, optimizer, epoch) # Save the model if the validation loss is the best we've seen so far. if not best_val_loss or val_loss < best_val_loss: #torch.save(model.state_dict(), opts.save + ".pt") save_checkpoint(model, optimizer, -1) best_val_loss = val_loss #else: # # Anneal the learning rate if no improvement has been seen in the validation dataset. # lr /= 4.0 #optimizer.lr = lr except KeyboardInterrupt: print('-' * 89) print('Exiting from training early')