def train(self, trainX, trainY, verbose=False): trainY = np.squeeze(trainY) self.forwardPropagation(trainX) self.backPropagation(trainY) self.updateParameters() if verbose: loss = Metrics.crossEntropyLoss(trainY, self.outputLayer.predictions) accuracy = Metrics.accuracy(trainY, self.outputLayer.predictedLabels) return [loss, accuracy]
def main(): global args args = parse_args() # argument validation args.cuda = args.cuda and torch.cuda.is_available() torch.manual_seed(args.seed) random.seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) torch.backends.cudnn.benchmark = True if not os.path.exists(args.save): os.makedirs(args.save) print(args) train_dir = glob.glob(os.path.join(args.data, 'train/holistic/*.pt')) dev_dir = glob.glob(os.path.join(args.data, 'val/holistic/*.pt')) test_dir = glob.glob(os.path.join(args.data, 'test/holistic/*.pt')) train_dataset = Dataset(os.path.join(args.data, 'train'), train_dir) dev_dataset = Dataset(os.path.join(args.data, 'val'), dev_dir) test_dataset = Dataset(os.path.join(args.data, 'test'), test_dir) print('==> Size of train data : %d ' % len(train_dataset)) print('==> Size of val data : %d ' % len(dev_dataset)) print('==> Size of test data : %d ' % len(test_dataset)) # initialize model, criterion/loss_function, optimizer if args.pretrained_model == 'vgg16': pretrained_vgg16 = models.vgg16(pretrained=True) # Freeze training for all layers for child in pretrained_vgg16.children(): for param in child.parameters(): param.requires_grad = False if args.pretrained_holistic == 0: model = model_vgg16.DocClassificationHolistic( args, pretrained_vgg16) elif args.pretrained_holistic == 1: pretrained_orig_vgg16 = model_vgg16.DocClassificationHolistic( args, pretrained_vgg16) pretrained_holistic = model_vgg16.DocClassificationHolistic( args, pretrained_orig_vgg16.pretrained_model) checkpoint = torch.load('./checkpoints/vgg16.pt') pretrained_holistic.load_state_dict(checkpoint['model']) model = model_vgg16.DocClassificationRest(args, pretrained_orig_vgg16, pretrained_holistic) elif args.pretrained_model == 'vgg19': pretrained_vgg19 = models.vgg19(pretrained=True) # Freeze training for all layers for child in pretrained_vgg19.children(): for param in child.parameters(): param.requires_grad = False if args.pretrained_holistic == 0: model = model_vgg19.DocClassificationHolistic( args, pretrained_vgg19) elif args.pretrained_holistic == 1: pretrained_orig_vgg19 = model_vgg19.DocClassificationHolistic( args, pretrained_vgg19) pretrained_holistic = model_vgg19.DocClassificationHolistic( args, pretrained_orig_vgg19.pretrained_model) checkpoint = torch.load('./checkpoints/vgg19.pt') pretrained_holistic.load_state_dict(checkpoint['model']) model = model_vgg19.DocClassificationRest(args, pretrained_orig_vgg19, pretrained_holistic) elif args.pretrained_model == 'resnet50': pretrained_resnet50 = models.resnet50(pretrained=True) # Freeze training for all layers for child in pretrained_resnet50.children(): for param in child.parameters(): param.requires_grad = False if args.pretrained_holistic == 0: model = model_resnet50.DocClassificationHolistic( args, pretrained_resnet50) elif args.pretrained_holistic == 1: pretrained_orig_resnet50 = model_resnet50.DocClassificationHolistic( args, pretrained_resnet50) pretrained_holistic = model_resnet50.DocClassificationHolistic( args, pretrained_orig_resnet50.pretrained_model) checkpoint = torch.load('./checkpoints/resnet50.pt') pretrained_holistic.load_state_dict(checkpoint['model']) model = model_resnet50.DocClassificationRest( args, pretrained_orig_resnet50, pretrained_holistic) elif args.pretrained_model == 'densenet121': pretrained_densenet121 = models.densenet121(pretrained=True) # Freeze training for all layers for child in pretrained_densenet121.children(): for param in child.parameters(): param.requires_grad = False if args.pretrained_holistic == 0: model = model_densenet121.DocClassificationHolistic( args, pretrained_densenet121) elif args.pretrained_holistic == 1: pretrained_orig_densenet121 = model_densenet121.DocClassificationHolistic( args, pretrained_densenet121) pretrained_holistic = model_densenet121.DocClassificationHolistic( args, pretrained_orig_densenet121.pretrained_model) checkpoint = torch.load('./checkpoints/densenet121.pt') pretrained_holistic.load_state_dict(checkpoint['model']) model = model_densenet121.DocClassificationRest( args, pretrained_orig_densenet121, pretrained_holistic) elif args.pretrained_model == 'inceptionv3': pretrained_inceptionv3 = models.inception_v3(pretrained=True) # Freeze training for all layers for child in pretrained_inceptionv3.children(): for param in child.parameters(): param.requires_grad = False if args.pretrained_holistic == 0: model = model_inceptionv3.DocClassificationHolistic( args, pretrained_inceptionv3) elif args.pretrained_holistic == 1: pretrained_orig_inceptionv3 = model_inceptionv3.DocClassificationHolistic( args, pretrained_inceptionv3) pretrained_holistic = model_inceptionv3.DocClassificationHolistic( args, pretrained_orig_inceptionv3.pretrained_model) checkpoint = torch.load('./checkpoints/inceptionv3.pt') pretrained_holistic.load_state_dict(checkpoint['model']) model = model_inceptionv3.DocClassificationRest( args, pretrained_orig_inceptionv3, pretrained_holistic) criterion = nn.CrossEntropyLoss(reduction='sum') parameters = filter(lambda p: p.requires_grad, model.parameters()) if args.cuda: model.cuda(), criterion.cuda() if args.optim == 'adam': optimizer = optim.Adam(parameters, lr=args.lr, weight_decay=args.wd) elif args.optim == 'adagrad': optimizer = optim.Adagrad(parameters, lr=args.lr, weight_decay=args.wd) elif args.optim == 'sgd': optimizer = optim.SGD(parameters, lr=args.lr, weight_decay=args.wd) elif args.optim == 'adadelta': optimizer = optim.Adadelta(parameters, lr=args.lr, weight_decay=args.wd) metrics = Metrics(args.num_classes) # create trainer object for training and testing trainer = Trainer(args, model, criterion, optimizer) train_idx = list(np.arange(len(train_dataset))) dev_idx = list(np.arange(len(dev_dataset))) test_idx = list(np.arange(len(test_dataset))) best = float('inf') columns = ['ExpName', 'ExpNo', 'Epoch', 'Loss', 'Accuracy'] results = [] early_stop_count = 0 for epoch in range(args.epochs): train_loss = 0.0 dev_loss = 0.0 test_loss = 0.0 train_predictions = [] train_labels = [] dev_predictions = [] dev_labels = [] test_predictions = [] test_labels = [] random.shuffle(train_idx) random.shuffle(dev_idx) random.shuffle(test_idx) batch_train_data = [ train_idx[i:i + args.batchsize] for i in range(0, len(train_idx), args.batchsize) ] batch_dev_data = [ dev_idx[i:i + args.batchsize] for i in range(0, len(dev_idx), args.batchsize) ] batch_test_data = [ test_idx[i:i + args.batchsize] for i in range(0, len(test_idx), args.batchsize) ] for batch in tqdm(batch_train_data, desc='Training batches..'): train_batch_holistic, \ train_batch_header, \ train_batch_footer, \ train_batch_left_body, \ train_batch_right_body, \ train_batch_labels = train_dataset[batch] if args.pretrained_holistic == 0: _ = trainer.train_holistic(train_batch_holistic, train_batch_labels) elif args.pretrained_holistic == 1: _ = trainer.train_rest(train_batch_holistic, \ train_batch_header, \ train_batch_footer, \ train_batch_left_body, \ train_batch_right_body, \ train_batch_labels) for batch in tqdm(batch_train_data, desc='Training batches..'): train_batch_holistic, \ train_batch_header, \ train_batch_footer, \ train_batch_left_body, \ train_batch_right_body, \ train_batch_labels = train_dataset[batch] if args.pretrained_holistic == 0: train_batch_loss, train_batch_predictions, train_batch_labels = trainer.test_holistic( train_batch_holistic, train_batch_labels) elif args.pretrained_holistic == 1: train_batch_loss, train_batch_predictions, train_batch_labels = trainer.test_rest(train_batch_holistic, \ train_batch_header, \ train_batch_footer, \ train_batch_left_body, \ train_batch_right_body, \ train_batch_labels) train_predictions.append(train_batch_predictions) train_labels.append(train_batch_labels) train_loss = train_loss + train_batch_loss train_accuracy = metrics.accuracy(np.concatenate(train_predictions), np.concatenate(train_labels)) for batch in tqdm(batch_dev_data, desc='Dev batches..'): dev_batch_holistic, \ dev_batch_header, \ dev_batch_footer, \ dev_batch_left_body, \ dev_batch_right_body, \ dev_batch_labels = dev_dataset[batch] if args.pretrained_holistic == 0: dev_batch_loss, dev_batch_predictions, dev_batch_labels = trainer.test_holistic( dev_batch_holistic, dev_batch_labels) elif args.pretrained_holistic == 1: dev_batch_loss, dev_batch_predictions, dev_batch_labels = trainer.test_rest(dev_batch_holistic, \ dev_batch_header, \ dev_batch_footer, \ dev_batch_left_body, \ dev_batch_right_body, \ dev_batch_labels) dev_predictions.append(dev_batch_predictions) dev_labels.append(dev_batch_labels) dev_loss = dev_loss + dev_batch_loss dev_accuracy = metrics.accuracy(np.concatenate(dev_predictions), np.concatenate(dev_labels)) for batch in tqdm(batch_test_data, desc='Test batches..'): test_batch_holistic, \ test_batch_header, \ test_batch_footer, \ test_batch_left_body, \ test_batch_right_body, \ test_batch_labels = test_dataset[batch] if args.pretrained_holistic == 0: test_batch_loss, test_batch_predictions, test_batch_labels = trainer.test_holistic( test_batch_holistic, test_batch_labels) elif args.pretrained_holistic == 1: test_batch_loss, test_batch_predictions, test_batch_labels = trainer.test_rest(test_batch_holistic, \ test_batch_header, \ test_batch_footer, \ test_batch_left_body, \ test_batch_right_body, \ test_batch_labels) test_predictions.append(test_batch_predictions) test_labels.append(test_batch_labels) test_loss = test_loss + test_batch_loss test_accuracy = metrics.accuracy(np.concatenate(test_predictions), np.concatenate(test_labels)) print('==> Training Epoch: %d, \ \nLoss: %f, \ \nAccuracy: %f' %(epoch + 1, \ train_loss/(len(batch_train_data) * args.batchsize), \ train_accuracy)) print('==> Dev Epoch: %d, \ \nLoss: %f, \ \nAccuracy: %f' %(epoch + 1, \ dev_loss/(len(batch_dev_data) * args.batchsize), \ dev_accuracy)) print('==> Test Epoch: %d, \ \nLoss: %f, \ \nAccuracy: %f' %(epoch + 1, \ test_loss/(len(batch_test_data) * args.batchsize), \ test_accuracy)) #quit() results.append((args.expname, \ args.expno, \ epoch+1, \ test_loss/(len(batch_test_data) * args.batchsize), \ test_accuracy)) if best > test_loss: best = test_loss checkpoint = { 'model': trainer.model.state_dict(), 'optim': trainer.optimizer, 'loss': test_loss, 'accuracy': test_accuracy, 'args': args, 'epoch': epoch } print('==> New optimum found, checkpointing everything now...') torch.save(checkpoint, '%s.pt' % os.path.join(args.save, args.expname)) #np.savetxt("test_pred.csv", test_pred.numpy(), delimiter=",") else: early_stop_count = early_stop_count + 1 if early_stop_count == 20: quit()
def main(): global args args = parse_args() args.input_dim, args.mem_dim = 300, 150 args.hidden_dim, args.num_classes = 20, 2 args.cuda = args.cuda and torch.cuda.is_available() if args.sparse and args.wd != 0: print('Sparsity and weight decay are incompatible, pick one!') exit() print(args) torch.manual_seed(args.seed) random.seed(args.seed) numpy.random.seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) torch.backends.cudnn.benchmark = True if not os.path.exists(args.save): os.makedirs(args.save) train_dir = os.path.join(args.data, 'train/') dev_dir = os.path.join(args.data, 'dev/') test_dir = os.path.join(args.data, 'test/') # write unique words from all token files sick_vocab_file = os.path.join(args.data, 'sick.vocab') if not os.path.isfile(sick_vocab_file): token_files_a = [ os.path.join(split, 'toks.a') for split in [train_dir, dev_dir, test_dir] ] token_files_b = [ os.path.join(split, 'toks.b') for split in [train_dir, dev_dir, test_dir] ] token_files = token_files_a + token_files_b sick_vocab_file = os.path.join(args.data, 'sick.vocab') build_vocab(token_files, sick_vocab_file) # get vocab object from vocab file previously written vocab = Vocab(filename=sick_vocab_file, data=[ Constants.PAD_WORD, Constants.UNK_WORD, Constants.BOS_WORD, Constants.EOS_WORD ]) print('==> SICK vocabulary size : %d ' % vocab.size()) # load SICK dataset splits train_file = os.path.join(args.data, 'sick_train.pth') if os.path.isfile(train_file): train_dataset = torch.load(train_file) else: train_dataset = SICKDataset(train_dir, vocab, args.num_classes) torch.save(train_dataset, train_file) print('==> Size of train data : %d ' % len(train_dataset)) dev_file = os.path.join(args.data, 'sick_dev.pth') if os.path.isfile(dev_file): dev_dataset = torch.load(dev_file) else: dev_dataset = SICKDataset(dev_dir, vocab, args.num_classes) torch.save(dev_dataset, dev_file) print('==> Size of dev data : %d ' % len(dev_dataset)) test_file = os.path.join(args.data, 'sick_test.pth') if os.path.isfile(test_file): test_dataset = torch.load(test_file) else: test_dataset = SICKDataset(test_dir, vocab, args.num_classes) torch.save(test_dataset, test_file) print('==> Size of test data : %d ' % len(test_dataset)) # initialize model, criterion/loss_function, optimizer model = SimilarityTreeLSTM(args.cuda, vocab.size(), args.input_dim, args.mem_dim, args.hidden_dim, args.num_classes, args.sparse) criterion = nn.KLDivLoss() if args.cuda: model.cuda(), criterion.cuda() if args.optim == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) elif args.optim == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=args.lr, weight_decay=args.wd) elif args.optim == 'sgd': optimizer = optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.wd) metrics = Metrics(args.num_classes) # for words common to dataset vocab and GLOVE, use GLOVE vectors # for other words in dataset vocab, use random normal vectors emb_file = os.path.join(args.data, 'sick_embed.pth') if os.path.isfile(emb_file): emb = torch.load(emb_file) else: # load glove embeddings and vocab glove_vocab, glove_emb = load_word_vectors( os.path.join(args.glove, 'glove.840B.300d')) print('==> GLOVE vocabulary size: %d ' % glove_vocab.size()) emb = torch.Tensor(vocab.size(), glove_emb.size(1)).normal_(-0.05, 0.05) # zero out the embeddings for padding and other special words if they are absent in vocab for idx, item in enumerate([ Constants.PAD_WORD, Constants.UNK_WORD, Constants.BOS_WORD, Constants.EOS_WORD ]): emb[idx].zero_() for word in vocab.labelToIdx.keys(): word_new = word.decode("utf8") idx_set = [ glove_vocab.getIndex(token) for token in word_tokenize(word_new) ] idx_set = [id for id in idx_set if id is not None] if len(idx_set) != 0: idx_set = torch.LongTensor(idx_set) sum_emb = F.torch.sum(glove_emb.index_select(0, idx_set), 0) else: sum_emb = glove_emb[1] * 0 # for token in word_tokenize(word_new): # idx = glove_vocab.getIndex(token) # if idx is not None: # if sum_emb is None: # sum_emb = glove_emb[idx] # else: # sum_emb += glove_emb[idx] emb[vocab.getIndex(word)] = sum_emb torch.save(emb, emb_file) # plug these into embedding matrix inside model if args.cuda: emb = emb.cuda() model.childsumtreelstm.emb.state_dict()['weight'].copy_(emb) # create trainer object for training and testing trainer = Trainer(args, model, criterion, optimizer) best = -float('inf') for epoch in range(args.epochs): train_loss = trainer.train(train_dataset) train_loss, train_pred = trainer.test(train_dataset) print(train_pred) dev_loss, dev_pred = trainer.test(dev_dataset) print(dev_pred) test_loss, test_pred = trainer.test(test_dataset) train_pearson = metrics.pearson(train_pred, train_dataset.labels) train_mse = metrics.accuracy(train_pred, train_dataset.labels) print('==> Train Loss: {}\tPearson: {}\tL1: {}'.format( train_loss, train_pearson, train_mse)) dev_pearson = metrics.pearson(dev_pred, dev_dataset.labels) dev_mse = metrics.accuracy(dev_pred, dev_dataset.labels) print('==> Dev Loss: {}\tPearson: {}\tL1: {}'.format( dev_loss, dev_pearson, dev_mse)) test_pearson = metrics.pearson(test_pred, test_dataset.labels) test_mse = metrics.accuracy(test_pred, test_dataset.labels) print('==> Test Loss: {}\tPearson: {}\tL1: {}'.format( test_loss, test_pearson, test_mse)) if best < test_pearson: best = test_pearson checkpoint = { 'model': trainer.model.state_dict(), 'optim': trainer.optimizer, 'pearson': test_pearson, 'mse': test_mse, 'args': args, 'epoch': epoch } print('==> New optimum found, checkpointing everything now...') torch.save( checkpoint, '%s.pt' % os.path.join(args.save, args.expname + '.pth'))
optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.wd) model.to(device), criterion.to(device) trainer = Trainer(args, model, criterion, optimizer, device) best = - float("inf") metrics = Metrics(8) for epoch in range(args.epochs): train_loss = trainer.train(train_dataset) train_loss, train_pred = trainer.test(train_dataset) test_loss, test_preds = trainer.test(test_dataset) train_pred = train_pred[:, 1] test_preds = test_preds[:, 1] train_labels = utils.get_labels(train_dataset).squeeze(1) train_targets = utils.map_labels_to_targets(train_labels, args.num_classes) train_mse = metrics.mse(train_pred, train_labels) acc = metrics.accuracy(train_pred, train_labels) # label!!! fpr, tpr, threshold = roc_curve(train_labels, train_pred) train_auc = auc(fpr, tpr) logger.info("==> Epoch {}, Train \t Loss: {}\t Auc: {}\tMSE{} \t Accuracy{}".format( epoch, train_loss, train_auc, train_mse, acc )) test_lables = utils.get_labels(test_dataset).squeeze(1) test_targets = utils.map_labels_to_targets(test_lables, args.num_classes) test_mse = metrics.mse(test_preds, test_lables) test_acc = metrics.accuracy(test_preds, test_lables) fpr, tpr, t = roc_curve(test_lables, test_preds) test_auc = auc(fpr, tpr) logger.info("==> Epoch {}, Test \t Loss: {}\tAuc: {}\tMSE{} \t Accuracy{} \t".format( epoch, test_loss, test_auc, test_mse, test_acc ))