def load_all_model(root_dir, device=0): model_corpus = [] for i in range(17): config_file = os.path.join(root_dir, str(i), "config.json") with open(config_file, 'r') as fin: config = json.load(fin) args = argparse.Namespace(**config) item = [] for j in range(args.model_num): if args.model_type == 'lstm': model = models.LSTMModel(args) elif args.model_type == 'conv': model = models.ConvModel(args) elif args.model_type == 'char': model = models.CharCNNModel(args) elif args.model_type == 'base': model = models.BaseModel(args) else: raise NotImplementedError model_path = os.path.join( args.checkpoint_path, str(i), "%s_%s" % (args.model_type, args.type_suffix), "model_%d.pth" % j) if not os.path.isfile(model_path): print("No model to test") exit(1) model.load_state_dict(torch.load(model_path)) model = model.cuda(device) model.eval() item.append(model) model_corpus.append(item) return model_corpus
def get_model(model_name, **kwargs): if model_name == 'lstm': return models.LSTMModel(**kwargs) if model_name == 'cnn': return models.CNNModel(**kwargs) if model_name == "split_cnn": return models.SplitCNN(**kwargs) raise Exception('Invalid Model Type: {}'.format(model_name))
def get_lstm_classifier(): global y_train_df # Convert to a format suitable for RNNs to_tensor = FunctionTransformer(torch.from_numpy) # Type cast to float tensor (our classifier doesn't seem to work with the # default double tensor) to_float = FunctionTransformer(lambda t: t.type(dtype=torch.FloatTensor)) _, vector_size = glove_vectorize.get_instance_dims() lstm_model = models.LSTMModel(vector_size=vector_size) return Pipeline([('to_tensor', to_tensor), ('to_float', to_float), ('lstm', lstm_model.get_sklearn_compatible_estimator())])
elif config['model'] == 'mlp': model = models.MLPModel(input_shape=(config['history_length'], ), nb_output_units=1, nb_hidden_units=config['nb_hidden_units'], nb_layers=config['nb_layers']) elif config['model'] == 'gru': model = models.GRUModel(input_shape=(config['history_length'], 1), nb_output_units=1, nb_hidden_units=config['nb_hidden_units'], nb_layers=config['nb_layers'], dropout=config['dropout'], recurrent_dropout=config['recurrent_dropout']) elif config['model'] == 'lstm': model = models.LSTMModel(input_shape=(config['history_length'], 1), nb_output_units=1, nb_hidden_units=config['nb_hidden_units'], nb_layers=config['nb_layers'], dropout=config['dropout'], recurrent_dropout=config['recurrent_dropout']) elif config['model'] == 'lstm_attention': model = models.LSTMAttentionModel( input_shape=(config['history_length'], 1), nb_output_units=1, nb_hidden_units=config['nb_hidden_units'], dropout=config['dropout'], recurrent_dropout=config['recurrent_dropout'], nb_attention_units=config['nb_attention_units']) elif config["model"] == "seq2seq": model = models.Seq2seqModel(input_shape=(config['history_length'], 1), kernel_size=4, n_block=4, nb_hidden_units=64,
def train(args, model_id, tb): torch.manual_seed(args.seed) np.random.seed(args.seed) train_data = MedicalEasyEnsembleDataloader(args.train_data, args.class_id, args.batch_size, True, args.num_workers) val_data = MedicalEasyEnsembleDataloader(args.val_data, args.class_id, args.batch_size, False, args.num_workers) if os.path.exists(args.w2v_file): embedding = utils.load_embedding(args.w2v_file, vocab_size=args.vocab_size, embedding_size=args.embedding_size) else: embedding = None if args.model_type == 'lstm': model = models.LSTMModel(args, embedding) elif args.model_type == 'conv': model = models.ConvModel(args, embedding) elif args.model_type == 'char': model = models.CharCNNModel(args, embedding) elif args.model_type == 'base': model = models.BaseModel(args, embedding) else: raise NotImplementedError if os.path.isfile( os.path.join(args.checkpoint_path, str(args.class_id), "%s_%s" % (args.model_type, args.type_suffix), "model_%d.pth" % model_id)): print("Load %d class %s type %dth model from previous step" % (args.class_id, args.model_type, model_id)) model.load_state_dict( torch.load( os.path.join(args.checkpoint_path, str(args.class_id), "%s_%s" % (args.model_type, args.type_suffix), "model_%d.pth" % model_id))) iteration = 0 model = model.cuda(args.device) model.train() optimizer = utils.build_optimizer(args, model) loss_func = MultiBceLoss() cur_worse = 1000 bad_times = 0 for epoch in range(args.epochs): if epoch >= args.start_epoch: factor = (epoch - args.start_epoch) // args.decay_every decay_factor = args.decay_rate**factor current_lr = args.lr * decay_factor utils.set_lr(optimizer, current_lr) # if epoch != 0 and epoch % args.sample_every == 0: # train_data.re_sample() for i, data in enumerate(train_data): tmp = [ _.cuda(args.device) if isinstance(_, torch.Tensor) else _ for _ in data ] report_ids, sentence_ids, sentence_lengths, output_vec = tmp optimizer.zero_grad() loss = loss_func(model(sentence_ids, sentence_lengths), output_vec) loss.backward() train_loss = loss.item() optimizer.step() iteration += 1 if iteration % args.print_every == 0: print("iter %d epoch %d loss: %.3f" % (iteration, epoch, train_loss)) if iteration % args.save_every == 0: torch.save( model.state_dict(), os.path.join(args.checkpoint_path, str(args.class_id), "%s_%s" % (args.model_type, args.type_suffix), "model_%d.pth" % model_id)) with open(os.path.join(args.checkpoint_path, str(args.class_id), "config.json"), 'w', encoding='utf-8') as config_f: json.dump(vars(args), config_f, indent=2) with open(os.path.join( args.checkpoint_path, str(args.class_id), "%s_%s" % (args.model_type, args.type_suffix), "config.json"), 'w', encoding='utf-8') as config_f: json.dump(vars(args), config_f, indent=2) if iteration % args.val_every == 0: val_loss = eval_model(model, loss_func, val_data, epoch) tb.add_scalar("model_%d val_loss" % model_id, val_loss, iteration) if val_loss > cur_worse: print("Bad Time Appear") cur_worse = val_loss bad_times += 1 else: cur_worse = val_loss bad_times = 0 if bad_times > args.patient: print('Early Stop !!!!') return if iteration % args.loss_log_every == 0: tb.add_scalar("model_%d train_loss" % model_id, loss.item(), iteration) print("The train finished")
def main(argv): parser = argparse.ArgumentParser( description='WikiText-2 language modeling') parser.add_argument('--batch-size', type=int, default=70, metavar='N', help='input batch size for training (default: 90)'), parser.add_argument('--eval-batch-size', type=int, default=50, metavar='N', help='input batch size for training (default: 50)'), parser.add_argument('--save-directory', type=str, default='output/wikitext-2', help='output directory') parser.add_argument('--model-save-directory', type=str, default='models/', help='output directory') parser.add_argument('--epochs', type=int, default=5, metavar='N', help='number of epochs to train') parser.add_argument('--base-seq-len', type=int, default=70, metavar='N', help='Batch length'), parser.add_argument('--min-seq-len', type=int, default=50, metavar='N', help='minimum batch length'), parser.add_argument('--seq-prob', type=int, default=0.99, metavar='N', help='prob of being divided by 2'), parser.add_argument('--seq-std', type=int, default=1, metavar='N', help='squence length std'), parser.add_argument('--hidden-dim', type=int, default=1150, metavar='N', help='Hidden dim') parser.add_argument('--embedding-dim', type=int, default=400, metavar='N', help='Embedding dim') parser.add_argument('--lr', type=int, default=20, metavar='N', help='learning rate'), parser.add_argument('--weight-decay', type=int, default=2e-6, metavar='N', help='learning rate'), parser.add_argument('--tag', type=str, default='lr-1e-2-base.pt', metavar='N', help='learning rate'), parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') args = parser.parse_args(argv) args.cuda = not args.no_cuda and torch.cuda.is_available() # load dataset train_data, val_data, vocabulary = (np.load('./dataset/wiki.train.npy'), np.load('./dataset/wiki.valid.npy'), np.load('./dataset/vocab.npy')) word_count = len(vocabulary) model = models.LSTMModel(word_count, args) loss_fn = models.CrossEntropyLoss3D() checkpoint_path = os.path.join(args.model_save_directory, args.tag) if not os.path.exists(checkpoint_path): model = models.LSTMModel(word_count, args) else: print("Using pre-trained model") print("*" * 90) model = models.LSTMModel(word_count, args) checkpoint_path = os.path.join(args.model_save_directory, args.tag) model.load_state_dict(torch.load(checkpoint_path)) if args.cuda: model = model.cuda() loss_fn = loss_fn.cuda() generated = utils.generate(model, sequence_length=10, batch_size=2, stochastic=True, args=args).data.cpu().numpy() utils.print_generated(utils.to_text(preds=generated, vocabulary=vocabulary)) print('Model: ', model) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) logging = dict() logging['loss'] = [] logging['train_acc'] = [] logging['val_loss'] = [] model.train() for epoch in range(args.epochs): epoch_time = time.time() np.random.shuffle(train_data) train_data_ = utils.batchify( utils.to_tensor(np.concatenate(train_data)), args.batch_size) val_data_ = utils.batchify(utils.to_tensor(np.concatenate(val_data)), args.eval_batch_size) train_data_loader = utils.custom_data_loader(train_data_, args) val_data_loader = utils.custom_data_loader(val_data_, args, evaluation=True) # number of words train_size = train_data_.size(0) * train_data_.size(1) val_size = val_data_.size(0) * val_data_.size(1) n_batchs = len(train_data_) n_batchs_val = len(val_data_) correct = 0 epoch_loss = 0 batch_index = 0 seq_len = 0 counter = 0 while (batch_index < n_batchs - 1): optimizer.zero_grad() X, y, seq_len = next(train_data_loader) out = model(X) loss = loss_fn(out, y) loss.backward() # scale lr with respect the size of the seq_len utils.adjust_learning_rate(optimizer, args, seq_len) torch.nn.utils.clip_grad_norm(model.parameters(), 0.25) for p in model.parameters(): p.data.add_(-args.lr, p.grad.data) optimizer.step() utils.adjust_learning_rate(optimizer, args, args.base_seq_len) epoch_loss += loss.data.sum() batch_index += seq_len if counter % 30 == 0 and counter != 0: print('|batch {:3d}|train loss {:5.2f}|'.format( counter, epoch_loss / counter)) counter += 1 train_loss = epoch_loss / counter val_loss = validate(model, val_data_loader, loss_fn, n_batchs_val) logging['loss'].append(train_loss) logging['val_loss'].append(val_loss) utils.save_model(model, checkpoint_path) print('=' * 83) print('|epoch {:3d}|time: {:5.2f}s|valid loss {:5.2f}|' 'train loss {:8.2f}'.format(epoch + 1, (time.time() - epoch_time), val_loss, train_loss))