def main(args): if args.dataset == 'mr': # data, label = dataloader.read_MR(args.path) # train_x, train_y, test_x, test_y = dataloader.cv_split2( # data, label, # nfold=10, # valid_id=args.cv # ) # # if args.save_data_split: # save_data(train_x, train_y, args.path, 'train') # save_data(test_x, test_y, args.path, 'test') train_x, train_y = dataloader.read_corpus( '/data/medg/misc/jindi/nlp/datasets/mr/train.txt') test_x, test_y = dataloader.read_corpus( '/data/medg/misc/jindi/nlp/datasets/mr/test.txt') elif args.dataset == 'imdb': train_x, train_y = dataloader.read_corpus(os.path.join( '/data/medg/misc/jindi/nlp/datasets/imdb', 'train_tok.csv'), clean=False, MR=True, shuffle=True) test_x, test_y = dataloader.read_corpus(os.path.join( '/data/medg/misc/jindi/nlp/datasets/imdb', 'test_tok.csv'), clean=False, MR=True, shuffle=True) else: train_x, train_y = dataloader.read_corpus( '/afs/csail.mit.edu/u/z/zhijing/proj/to_di/data/{}/' 'train_tok.csv'.format(args.dataset), clean=False, MR=False, shuffle=True) test_x, test_y = dataloader.read_corpus( '/afs/csail.mit.edu/u/z/zhijing/proj/to_di/data/{}/' 'test_tok.csv'.format(args.dataset), clean=False, MR=False, shuffle=True) nclasses = max(train_y) + 1 # elif args.dataset == 'subj': # data, label = dataloader.read_SUBJ(args.path) # elif args.dataset == 'cr': # data, label = dataloader.read_CR(args.path) # elif args.dataset == 'mpqa': # data, label = dataloader.read_MPQA(args.path) # elif args.dataset == 'trec': # train_x, train_y, test_x, test_y = dataloader.read_TREC(args.path) # data = train_x + test_x # label = None # elif args.dataset == 'sst': # train_x, train_y, valid_x, valid_y, test_x, test_y = dataloader.read_SST(args.path) # data = train_x + valid_x + test_x # label = None # else: # raise Exception("unknown dataset: {}".format(args.dataset)) # if args.dataset == 'trec': # elif args.dataset != 'sst': # train_x, train_y, valid_x, valid_y, test_x, test_y = dataloader.cv_split( # data, label, # nfold = 10, # test_id = args.cv # ) model = Model(args.embedding, args.d, args.depth, args.dropout, args.cnn, nclasses).cuda() need_grad = lambda x: x.requires_grad optimizer = optim.Adam(filter(need_grad, model.parameters()), lr=args.lr) train_x, train_y = dataloader.create_batches( train_x, train_y, args.batch_size, model.word2id, ) # valid_x, valid_y = dataloader.create_batches( # valid_x, valid_y, # args.batch_size, # emb_layer.word2id, # ) test_x, test_y = dataloader.create_batches( test_x, test_y, args.batch_size, model.word2id, ) best_test = 0 # test_err = 1e+8 for epoch in range(args.max_epoch): best_test = train_model( epoch, model, optimizer, train_x, train_y, # valid_x, valid_y, test_x, test_y, best_test, args.save_path) if args.lr_decay > 0: optimizer.param_groups[0]['lr'] *= args.lr_decay # sys.stdout.write("best_valid: {:.6f}\n".format( # best_valid # )) sys.stdout.write("test_err: {:.6f}\n".format(best_test))
def main(args): if args.dataset == 'mr': data, label = dataloader.read_MR(args.path) elif args.dataset == 'subj': data, label = dataloader.read_SUBJ(args.path) elif args.dataset == 'cr': data, label = dataloader.read_CR(args.path) elif args.dataset == 'mpqa': data, label = dataloader.read_MPQA(args.path) elif args.dataset == 'trec': train_x, train_y, test_x, test_y = dataloader.read_TREC(args.path) data = train_x + test_x label = None elif args.dataset == 'sst': train_x, train_y, valid_x, valid_y, test_x, test_y = dataloader.read_SST(args.path) data = train_x + valid_x + test_x label = None else: raise Exception("unknown dataset: {}".format(args.dataset)) emb_layer = modules.EmbeddingLayer( args.d, data, embs = dataloader.load_embedding(args.embedding) ) if args.dataset == 'trec': train_x, train_y, valid_x, valid_y = dataloader.cv_split2( train_x, train_y, nfold = 10, valid_id = args.cv ) elif args.dataset != 'sst': train_x, train_y, valid_x, valid_y, test_x, test_y = dataloader.cv_split( data, label, nfold = 10, test_id = args.cv ) nclasses = max(train_y)+1 train_x, train_y = dataloader.create_batches( train_x, train_y, args.batch_size, emb_layer.word2id, sort = args.dataset == 'sst' ) valid_x, valid_y = dataloader.create_batches( valid_x, valid_y, args.batch_size, emb_layer.word2id, sort = args.dataset == 'sst' ) test_x, test_y = dataloader.create_batches( test_x, test_y, args.batch_size, emb_layer.word2id, sort = args.dataset == 'sst' ) model = Model(args, emb_layer, nclasses).cuda() need_grad = lambda x: x.requires_grad optimizer = optim.Adam( filter(need_grad, model.parameters()), lr = args.lr ) best_valid = 1e+8 test_err = 1e+8 for epoch in range(args.max_epoch): best_valid, test_err = train_model(epoch, model, optimizer, train_x, train_y, valid_x, valid_y, test_x, test_y, best_valid, test_err ) if args.lr_decay>0: optimizer.param_groups[0]['lr'] *= args.lr_decay sys.stdout.write("best_valid: {:.6f}\n".format( best_valid )) sys.stdout.write("test_err: {:.6f}\n".format( test_err ))
def main(args): max_length = args.max_length if args.dataset == 'mr': # data, label = dataloader.read_MR(args.path) # train_x, train_y, test_x, test_y = dataloader.cv_split2( # data, label, # nfold=10, # valid_id=args.cv # ) # # if args.save_data_split: # save_data(train_x, train_y, args.path, 'train') # save_data(test_x, test_y, args.path, 'test') # train_x, train_y = dataloader.read_corpus('/data/medg/misc/jindi/nlp/datasets/mr/train.txt', max_length=max_length) # test_x, test_y = dataloader.read_corpus('/data/medg/misc/jindi/nlp/datasets/mr/test.txt', max_length=max_length) train_x, train_y = dataloader.read_corpus( '/home/mahmoudm/pb90_scratch/mahmoud/TextFooler-master/data/adversary_training_corpora/mr/train.txt', max_length=max_length) test_x, test_y = dataloader.read_corpus( '/home/mahmoudm/pb90_scratch/mahmoud/TextFooler-master/data/adversary_training_corpora/mr/test.txt', max_length=max_length) elif args.dataset == 'imdb': train_x, train_y = dataloader.read_corpus(os.path.join( '/home/mahmoudm/pb90_scratch/mahmoud/TextFooler-master/data/adversary_training_corpora/imdb', 'train_tok.csv'), clean=False, MR=True, shuffle=False, max_length=max_length) test_x, test_y = dataloader.read_corpus(os.path.join( '/home/mahmoudm/pb90_scratch/mahmoud/TextFooler-master/data/adversary_training_corpora/imdb', 'test_tok.csv'), clean=False, MR=True, shuffle=False, max_length=max_length) else: fix_labels = False if args.dataset == "yelp" or args.dataset == "fake" or args.dataset == "ag": fix_labels = True train_x, train_y = dataloader.read_corpus( '/home/mahmoudm/pb90_scratch/mahmoud/TextFooler-master/data/adversary_training_corpora/{}/' 'train_tok.csv'.format(args.dataset), clean=False, MR=True, shuffle=False, fix_labels=fix_labels, max_length=max_length) test_x, test_y = dataloader.read_corpus( '/home/mahmoudm/pb90_scratch/mahmoud/TextFooler-master/data/adversary_training_corpora/{}/' 'test_tok.csv'.format(args.dataset), clean=False, MR=True, shuffle=False, fix_labels=fix_labels, max_length=max_length) nclasses = max(train_y) + 1 # elif args.dataset == 'subj': # data, label = dataloader.read_SUBJ(args.path) # elif args.dataset == 'cr': # data, label = dataloader.read_CR(args.path) # elif args.dataset == 'mpqa': # data, label = dataloader.read_MPQA(args.path) # elif args.dataset == 'trec': # train_x, train_y, test_x, test_y = dataloader.read_TREC(args.path) # data = train_x + test_x # label = None # elif args.dataset == 'sst': # train_x, train_y, valid_x, valid_y, test_x, test_y = dataloader.read_SST(args.path) # data = train_x + valid_x + test_x # label = None # else: # raise Exception("unknown dataset: {}".format(args.dataset)) # if args.dataset == 'trec': # elif args.dataset != 'sst': # train_x, train_y, valid_x, valid_y, test_x, test_y = dataloader.cv_split( # data, label, # nfold = 10, # test_id = args.cv # ) log_file = open( os.path.join(os.path.dirname(args.save_path), f'{os.path.basename(args.save_path)}.log'), 'a') model = Model(args.embedding, args.d, args.depth, args.dropout, args.cnn, nclasses).cuda() need_grad = lambda x: x.requires_grad optimizer = optim.Adam(filter(need_grad, model.parameters()), lr=args.lr) train_x, train_y = dataloader.create_batches(train_x, train_y, args.batch_size, model.word2id, max_len=max_length) # valid_x, valid_y = dataloader.create_batches( # valid_x, valid_y, # args.batch_size, # emb_layer.word2id, max_len=max_length) test_x, test_y = dataloader.create_batches(test_x, test_y, args.batch_size, model.word2id, max_len=max_length) lengths = np.array( [len(seq) for batch in train_x for seq in batch.t().contiguous()]) log_file.write("Run with command:\n" + " ".join([arg for arg in sys.argv[1:]]) + "\n") log_file.write("\n") log_file.write(f"Max seq length found = {np.max(lengths)}\n") log_file.flush() best_test = 0 # test_err = 1e+8 progress = tqdm(total=args.max_epoch) for epoch in range(args.max_epoch): best_test = train_model( epoch, model, optimizer, train_x, train_y, # valid_x, valid_y, test_x, test_y, best_test, args.save_path, log_file) if args.lr_decay > 0: optimizer.param_groups[0]['lr'] *= args.lr_decay if epoch % 20 == 0: progress.update(20) log_file.write(str(progress) + '\n') log_file.flush() # sys.stdout.write("best_valid: {:.6f}\n".format( # best_valid # )) sys.stdout.write("test_acc: {:.6f}\n".format(best_test)) log_file.write("test_acc: {:.6f}\n".format(best_test)) log_file.flush() log_file.close()
def main(config, progress): # save config file with open("./log/config_history.txt", "a+") as f: f.write(json.dumps(config) + "\n") logging.info("*" * 80) logging.info("Experiment progress: {0:.2f}%".format(progress * 100)) logging.info("*" * 80) train_all = bool(config["train_all"]) # data dir data_dir = config["data_dir"] # data dir train_csv = data_dir + config["train_csv"] # train.csv or train_val.csv val_csv = data_dir + config["val_csv"] # val.csv or testc.csv # path to save model model_dir = config["save_dir"] # dir to save model f1_criteria = config["f1_criteria"] # f1 criteria to save model # data preprocessing settings min_freq = config["min_freq"] # min frequency in vocabulary pretrained_embedding = config[ "embedding_name"] # embedding name provided in torchtext batch_size = config["batch_size"] # model settings twitter_embedding = config[ "twitter_embedding"] # 0: default to word2vec or glove; 1: from datastories; 2: from trained sentiment classifier twitter_embedding_file = config[ "twitter_embedding_file"] # the saved sentiment classifier use_deepmoji = bool(config["use_deepmoji"]) use_infersent = bool(config["infersent_file"]) infersent_file = config[ "infersent_file"] # the infersent embedding in numpy use_elmo = bool(config["use_elmo"]) use_bert_word = bool(config["use_bert_word"]) embedding_size = config["embedding_size"] embedding_size = int(pretrained_embedding[-4:-1]) if twitter_embedding > 0: embedding_size = 100 freeze_epochs = config[ "freeze_epochs"] # freeze embedding for a few epochs kmaxpooling = config["kmaxpooling"] # top k max pooling hidden_size = config["hidden_size"] additional_hidden_size = config[ "additional_hidden_size"] # an additional hidden layer before softmax output_size = config["output_size"] # 4-class classification n_layers = config["n_layers"] bidirectional = bool(config["bidirectional"]) dropout = config["dropout"] weight_decay = config["weight_decay"] recurrent_dropout = config["recurrent_dropout"] gradient_clip = config["gradient_clip"] # training settings num_epochs = config["epochs"] learning_rate = config["lr"] epoch_to_lower_lr = config["epoch_to_lower_lr"] # scheduled lr decay lr_gamma = config["lr_gamma"] # scheduled lr decay rate device = torch.device(config["device"]) # gpu id or "cpu" exp = config["exp"] # experiment number or code seed = config["seed"] config_id = config["config_id"] # set seed random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) ###################### #### Process data #### ###################### # tokenization logging.info("Tokenizing data {0}, {1}...".format(train_csv, val_csv)) TEXT = Field(sequential=True, tokenize=tokenizer, batch_first=True) LABEL = Field(sequential=False, use_vocab=False, batch_first=True) train_set = TabularDataset(path=train_csv, format="csv", fields=[("text", TEXT), ("label", LABEL)], skip_header=False) val_set = TabularDataset(path=val_csv, format="csv", fields=[("text", TEXT), ("label", LABEL)], skip_header=False) ######################## #### Load embedding #### ######################## deepmoji_train = [None] deepmoji_val = [None] if use_deepmoji: # load deepmoji representation deepmoji_file = data_dir + "deepmoji/train.npy" logging.info( "Loading deepmoji representation from {0}".format(deepmoji_file)) with open(deepmoji_file, "rb") as f: deepmoji_train = np.load(f) if config["val_csv"].startswith("val"): with open(data_dir + "deepmoji/val.npy", "rb") as f: deepmoji_val = np.load(f) elif config["val_csv"].startswith("test"): with open(data_dir + "deepmoji/test.npy", "rb") as f: deepmoji_val = np.load(f) if train_all: deepmoji_train = np.concatenate((deepmoji_train, deepmoji_val), axis=0) infersent_train = [None] infersent_val = [None] if use_infersent: infersent_file = data_dir + "infersent/" + infersent_file logging.info( "Loading infersent representation from {0}".format(infersent_file)) with open(infersent_file + "_train.npy", "rb") as f: infersent_train = np.load(f) if config["val_csv"].startswith("val"): with open(infersent_file + "_val.npy", "rb") as f: infersent_val = np.load(f) elif config["val_csv"].startswith("test"): with open(infersent_file + "_test.npy", "rb") as f: infersent_val = np.load(f) elmo_train = [None] elmo_val = [None] if use_elmo: elmo_file = data_dir + "elmo/" logging.info("Loading elmo representation from {0}".format(elmo_file)) with open(elmo_file + "elmo_train.pkl", "rb") as f: elmo_train = np.load(f) if config["val_csv"].startswith("val"): with open(elmo_file + "elmo_val.pkl", "rb") as f: elmo_val = np.load(f) elif config["val_csv"].startswith("test"): with open(elmo_file + "elmo_test.pkl", "rb") as f: elmo_val = np.load(f) bert_word_train = [None] bert_word_val = [None] if use_bert_word: bert_file = data_dir + "bert/" logging.info("Loading bert representation from {0}".format(bert_file)) with open(bert_file + "bert_train.pkl", "rb") as f: bert_word_train = np.load(f) if config["val_csv"].startswith("val"): with open(bert_file + "bert_val.pkl", "rb") as f: bert_word_val = np.load(f) elif config["val_csv"].startswith("test"): with open(bert_file + "bert_test.pkl", "rb") as f: bert_word_val = np.load(f) # build vocab logging.info("Building vocabulary...") if twitter_embedding == 0: TEXT.build_vocab(train_set, min_freq=min_freq, vectors=pretrained_embedding) else: TEXT.build_vocab(train_set, min_freq=min_freq) vocab_size = len(TEXT.vocab.itos) # use pretrained twitter embedding if twitter_embedding > 0: if twitter_embedding == 1: with open(data_dir + "datastories.twitter.100d.pkl", "rb") as f: tweet_embedding_raw = pickle.load(f) elif twitter_embedding == 2: checkpoint = torch.load("./saved_model/" + twitter_embedding_file) embedding = checkpoint["embedding"] vocab = checkpoint["vocab"] tweet_vectors = torch.zeros(vocab_size, embedding_size) if twitter_embedding != 2: for w, idx in TEXT.vocab.stoi.items(): if w in tweet_embedding_raw: tweet_vectors[idx] = torch.Tensor(tweet_embedding_raw[w]) else: tweet_vectors[idx] = torch.Tensor( tweet_embedding_raw["<unk>"]) if twitter_embedding == 2: for w, idx in TEXT.vocab.stoi.items(): if w in vocab.stoi: tweet_vectors[idx] = torch.Tensor(embedding[vocab.stoi[w]]) else: tweet_vectors[idx] = torch.Tensor( embedding[vocab.stoi["<unk>"]]) TEXT.vocab.vectors = tweet_vectors logging.info("Vocab size: {0}".format(vocab_size)) ####################### ### Model Training #### ####################### metrics = { "accuracy": [], "microPrecision": [], "microRecall": [], "microF1": [] } # create model logging.info("Building model...") model_kwargs = { "embed_size": embedding_size, "hidden_size": hidden_size, "output_size": output_size, "vocab_size": vocab_size, "n_layers": n_layers, "dropout": dropout, "bidirection": bidirectional, "use_deepmoji": use_deepmoji, "use_infersent": use_infersent, "use_elmo": use_elmo, "use_bert_word": use_bert_word, "additional_hidden_size": additional_hidden_size, "recurrent_dropout": recurrent_dropout, "kmaxpooling": kmaxpooling, } model = globals()[config["model"]](**model_kwargs) logging.info("Initializing model weight...") for name, param in model.named_parameters(): if "weight" in name and len(param.shape) >= 2: xavier_uniform_(param) if use_elmo == False: model.init_embedding(TEXT.vocab.vectors, config) # load GloVe 100d embedding logging.info(model) logging.info("Number of model params: {0}".format(count_parameters(model))) model.to(device) # weighted crossentropy loss label_weights = torch.tensor(label_weight[config["train_csv"]]).to(device) criterion = nn.CrossEntropyLoss(weight=label_weights) optimizer = optim.Adam(model.parameters(), learning_rate, weight_decay=weight_decay) scheduler = MultiStepLR(optimizer, milestones=epoch_to_lower_lr, gamma=lr_gamma) train_losses = [] train_epoch_losses = [] val_losses = [] val_epoch_losses = [] # train logging.info("Start training...") # freeze embedding model.embedding.weight.requires_grad = False for epoch in range(1, num_epochs + 1): # load data train_batches = create_batches( train_set, TEXT.vocab, batch_size, [deepmoji_train, infersent_train, elmo_train, bert_word_train], shuffle=True, use_elmo=use_elmo) val_batches = create_batches( val_set, TEXT.vocab, 1, [deepmoji_val, infersent_val, elmo_val, bert_word_val], shuffle=False, use_elmo=use_elmo) logging.info("-" * 80) logging.critical("config_id: {0}".format(config_id)) logging.info("Epoch {0}/{1}".format(epoch, num_epochs)) train_epoch_loss = [] val_epoch_loss = [] # unfreeze embedding if epoch >= freeze_epochs: model.embedding.weight.requires_grad = True # lr scheduler scheduler.step() model.train() for batch_idx, ((batch_x, batch_y), [ batch_deepmoji, batch_infersent, batch_elmo, batch_bert ]) in enumerate(train_batches): batch_x = torch.from_numpy(batch_x).to(device) batch_y = torch.from_numpy(batch_y).to(device) if use_deepmoji: batch_deepmoji = torch.from_numpy(batch_deepmoji).float().to( device) if use_infersent: batch_infersent = torch.from_numpy(batch_infersent).float().to( device) if use_elmo: batch_elmo = torch.from_numpy(batch_elmo).float().to(device) if use_bert_word: batch_bert = torch.from_numpy(batch_bert).float().to(device) optimizer.zero_grad() additional_sent_representations = { "deepmoji": None, "infersent": None, "elmo": None, "bert_word": None } if use_deepmoji: additional_sent_representations["deepmoji"] = batch_deepmoji if use_infersent: additional_sent_representations["infersent"] = batch_infersent if use_elmo: additional_sent_representations["elmo"] = batch_elmo if use_bert_word: additional_sent_representations["bert_word"] = batch_bert output = model(batch_x, config, **additional_sent_representations) loss = criterion(output, batch_y) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), gradient_clip) optimizer.step() # log train_epoch_loss.append(loss.item()) train_losses.append(loss.item()) logging.info("Training loss: {0:.4f}".format( np.mean(train_epoch_loss))) train_epoch_losses.append(np.mean(train_epoch_loss)) # val if train_all == False: model.eval() eval_epoch_outputs = np.zeros((len(val_batches), output_size)) eval_epoch_labels = np.zeros((len(val_batches), )) with torch.no_grad(): for batch_idx, ((batch_x, batch_y), [ batch_deepmoji, batch_infersent, batch_elmo, batch_bert ]) in enumerate(val_batches): batch_x = torch.from_numpy(batch_x).to(device) batch_y = torch.from_numpy(batch_y).to(device) if use_deepmoji: batch_deepmoji = torch.from_numpy( batch_deepmoji).float().to(device) if use_infersent: batch_infersent = torch.from_numpy( batch_infersent).float().to(device) if use_elmo: batch_elmo = torch.from_numpy(batch_elmo).float().to( device) if use_bert_word: batch_bert = torch.from_numpy(batch_bert).float().to( device) additional_sent_representations = { "deepmoji": None, "infersent": None, "elmo": None, "bert_word": None } if use_deepmoji: additional_sent_representations[ "deepmoji"] = batch_deepmoji if use_infersent: additional_sent_representations[ "infersent"] = batch_infersent if use_elmo: additional_sent_representations["elmo"] = batch_elmo if use_bert_word: additional_sent_representations[ "bert_word"] = batch_bert output = model(batch_x, config, **additional_sent_representations) loss = criterion(output, batch_y) # log val_epoch_loss.append(loss.item()) val_losses.append(loss.item()) # save predictions and labels for metrics computation eval_epoch_outputs[batch_idx:batch_idx + 1, :] = output.cpu().detach().numpy() eval_epoch_labels[batch_idx:batch_idx + 1] = batch_y.cpu().detach().numpy() logging.info("Validation loss: {0:.4f}".format( np.mean(val_epoch_loss))) val_epoch_losses.append(np.mean(val_epoch_loss)) # get metrics logging.critical("config_id: {0}".format(config_id)) accuracy, microPrecision, microRecall, microF1 = getMetrics( eval_epoch_outputs, eval_epoch_labels, output_size) # scheduler.step(microF1) # save model upon improvement and F1 beyond f1_criteria if microF1 > f1_criteria and (metrics["microF1"] == [] or microF1 > max(metrics["microF1"])): model_path = "{0}{1}_id_{4}_e{2}_F1_{3:.4f}.pt".format( model_dir, exp, epoch, microF1, config_id) torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'config': config, 'model_kwargs': model_kwargs }, model_path) metrics["accuracy"].append(accuracy) metrics["microPrecision"].append(microPrecision) metrics["microRecall"].append(microRecall) metrics["microF1"].append(microF1) if train_all: # save model model_path = "{0}{1}_id_{2}_e{3}.pt".format(model_dir, exp, config_id, epoch) torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'config': config, 'model_kwargs': model_kwargs }, model_path) config.pop("seed") config.pop("device") config.pop("config_id") metrics["config"] = config return metrics
def main(args): datasetList = ['mr', 'subj', 'cr', 'mpqa', 'trec', 'sst'] numberOfTest = 5 args.max_epoch = 100 for dset in datasetList: if dset == 'mr': data, label = dataloader.read_MR(args.path) elif dset == 'subj': data, label = dataloader.read_SUBJ(args.path) elif dset == 'cr': data, label = dataloader.read_CR(args.path) elif dset == 'mpqa': data, label = dataloader.read_MPQA(args.path) elif dset == 'trec': train_x, train_y, test_x, test_y = dataloader.read_TREC(args.path) data = train_x + test_x label = None elif dset == 'sst': train_x, train_y, valid_x, valid_y, test_x, test_y = dataloader.read_SST(args.path) data = train_x + valid_x + test_x label = None else: raise Exception("unknown dataset: {}".format(dset)) emb_layer = modules.EmbeddingLayer( args.d, data, embs = dataloader.load_embedding(args.embedding) ) if dset == 'trec': train_x, train_y, valid_x, valid_y = dataloader.cv_split2( train_x, train_y, nfold = 10, valid_id = args.cv ) elif dset != 'sst': train_x, train_y, valid_x, valid_y, test_x, test_y = dataloader.cv_split( data, label, nfold = 10, test_id = args.cv ) nclasses = max(train_y)+1 train_x, train_y = dataloader.create_batches(train_x, train_y, args.batch_size, emb_layer.word2id, sort = dset == 'sst') valid_x, valid_y = dataloader.create_batches(valid_x, valid_y, args.batch_size, emb_layer.word2id, sort = dset == 'sst') test_x, test_y = dataloader.create_batches(test_x, test_y, args.batch_size, emb_layer.word2id, sort = dset == 'sst') for models in range(3): if models == 1: args.cnn = True modelName = 'CNN' elif models == 2: args.cnn = False args.lstm = True modelName = 'LSTM' else: args.lstm = False modelName = 'SRU' sys.stdout.write("Training {} with {} architecture: \n".format(dset,modelName)) args.dropout = 0.5 for testNo in range(numberOfTest): model = Model(args, emb_layer, nclasses).cuda() need_grad = lambda x: x.requires_grad optimizer = optim.Adam(filter(need_grad, model.parameters()), lr = args.lr) best_valid = 1e+8 test_err = 1e+8 results = [] for epoch in range(args.max_epoch): results.append(train_model(epoch, model, optimizer, train_x, train_y, valid_x, valid_y, test_x, test_y, best_valid, test_err)) with open('results_{d}_{m}_{i}.csv'.format(d=dset, m=modelName, i=(testNo+1)), 'wb') as dump: wr = csv.writer(dump, delimiter=',') wr.writerow(['Epoch','Training Loss', 'Validation Error', 'Test Error', 'Duration']) for idx, value in enumerate(results): wr.writerow(value)