def __init__(self, embedding, hidden_size=150, depth=1, dropout=0.3, cnn=False, nclasses=2): super(Model, self).__init__() self.cnn = cnn self.drop = nn.Dropout(dropout) self.emb_layer = modules.EmbeddingLayer( embs=dataloader.load_embedding(embedding)) self.word2id = self.emb_layer.word2id if cnn: self.encoder = modules.CNN_Text(self.emb_layer.n_d, widths=[3, 4, 5], filters=hidden_size) d_out = 3 * hidden_size else: self.encoder = nn.LSTM( self.emb_layer.n_d, hidden_size // 2, depth, dropout=dropout, # batch_first=True, bidirectional=True) d_out = hidden_size # else: # self.encoder = SRU( # emb_layer.n_d, # args.d, # args.depth, # dropout = args.dropout, # ) # d_out = args.d self.out = nn.Linear(d_out, nclasses)
def main(args): if args.dataset == 'mr': data, label = dataloader.read_MR(args.path) elif args.dataset == 'subj': data, label = dataloader.read_SUBJ(args.path) elif args.dataset == 'cr': data, label = dataloader.read_CR(args.path) elif args.dataset == 'mpqa': data, label = dataloader.read_MPQA(args.path) elif args.dataset == 'trec': train_x, train_y, test_x, test_y = dataloader.read_TREC(args.path) data = train_x + test_x label = None elif args.dataset == 'sst': train_x, train_y, valid_x, valid_y, test_x, test_y = dataloader.read_SST(args.path) data = train_x + valid_x + test_x label = None else: raise Exception("unknown dataset: {}".format(args.dataset)) emb_layer = modules.EmbeddingLayer( args.d, data, embs = dataloader.load_embedding(args.embedding) ) if args.dataset == 'trec': train_x, train_y, valid_x, valid_y = dataloader.cv_split2( train_x, train_y, nfold = 10, valid_id = args.cv ) elif args.dataset != 'sst': train_x, train_y, valid_x, valid_y, test_x, test_y = dataloader.cv_split( data, label, nfold = 10, test_id = args.cv ) nclasses = max(train_y)+1 train_x, train_y = dataloader.create_batches( train_x, train_y, args.batch_size, emb_layer.word2id, sort = args.dataset == 'sst' ) valid_x, valid_y = dataloader.create_batches( valid_x, valid_y, args.batch_size, emb_layer.word2id, sort = args.dataset == 'sst' ) test_x, test_y = dataloader.create_batches( test_x, test_y, args.batch_size, emb_layer.word2id, sort = args.dataset == 'sst' ) model = Model(args, emb_layer, nclasses).cuda() need_grad = lambda x: x.requires_grad optimizer = optim.Adam( filter(need_grad, model.parameters()), lr = args.lr ) best_valid = 1e+8 test_err = 1e+8 for epoch in range(args.max_epoch): best_valid, test_err = train_model(epoch, model, optimizer, train_x, train_y, valid_x, valid_y, test_x, test_y, best_valid, test_err ) if args.lr_decay>0: optimizer.param_groups[0]['lr'] *= args.lr_decay sys.stdout.write("best_valid: {:.6f}\n".format( best_valid )) sys.stdout.write("test_err: {:.6f}\n".format( test_err ))
def main(args): datasetList = ['mr', 'subj', 'cr', 'mpqa', 'trec', 'sst'] numberOfTest = 5 args.max_epoch = 100 for dset in datasetList: if dset == 'mr': data, label = dataloader.read_MR(args.path) elif dset == 'subj': data, label = dataloader.read_SUBJ(args.path) elif dset == 'cr': data, label = dataloader.read_CR(args.path) elif dset == 'mpqa': data, label = dataloader.read_MPQA(args.path) elif dset == 'trec': train_x, train_y, test_x, test_y = dataloader.read_TREC(args.path) data = train_x + test_x label = None elif dset == 'sst': train_x, train_y, valid_x, valid_y, test_x, test_y = dataloader.read_SST(args.path) data = train_x + valid_x + test_x label = None else: raise Exception("unknown dataset: {}".format(dset)) emb_layer = modules.EmbeddingLayer( args.d, data, embs = dataloader.load_embedding(args.embedding) ) if dset == 'trec': train_x, train_y, valid_x, valid_y = dataloader.cv_split2( train_x, train_y, nfold = 10, valid_id = args.cv ) elif dset != 'sst': train_x, train_y, valid_x, valid_y, test_x, test_y = dataloader.cv_split( data, label, nfold = 10, test_id = args.cv ) nclasses = max(train_y)+1 train_x, train_y = dataloader.create_batches(train_x, train_y, args.batch_size, emb_layer.word2id, sort = dset == 'sst') valid_x, valid_y = dataloader.create_batches(valid_x, valid_y, args.batch_size, emb_layer.word2id, sort = dset == 'sst') test_x, test_y = dataloader.create_batches(test_x, test_y, args.batch_size, emb_layer.word2id, sort = dset == 'sst') for models in range(3): if models == 1: args.cnn = True modelName = 'CNN' elif models == 2: args.cnn = False args.lstm = True modelName = 'LSTM' else: args.lstm = False modelName = 'SRU' sys.stdout.write("Training {} with {} architecture: \n".format(dset,modelName)) args.dropout = 0.5 for testNo in range(numberOfTest): model = Model(args, emb_layer, nclasses).cuda() need_grad = lambda x: x.requires_grad optimizer = optim.Adam(filter(need_grad, model.parameters()), lr = args.lr) best_valid = 1e+8 test_err = 1e+8 results = [] for epoch in range(args.max_epoch): results.append(train_model(epoch, model, optimizer, train_x, train_y, valid_x, valid_y, test_x, test_y, best_valid, test_err)) with open('results_{d}_{m}_{i}.csv'.format(d=dset, m=modelName, i=(testNo+1)), 'wb') as dump: wr = csv.writer(dump, delimiter=',') wr.writerow(['Epoch','Training Loss', 'Validation Error', 'Test Error', 'Duration']) for idx, value in enumerate(results): wr.writerow(value)