def run(args): train_loader = torch.utils.data.DataLoader(datasets.ImageFolder( args.data + '/train', transform=data_transforms), batch_size=args.batch_size, shuffle=True, num_workers=16) val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( args.data + '/val', transform=validation_data_transforms), batch_size=args.batch_size, shuffle=False, num_workers=16) model = CNNModel() model = nn.DataParallel(model) model = model.to(args.device) if args.checkpoint is not None: model.load_state_dict(torch.load(args.checkpoint)) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-3) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size) for epoch in range(1, args.epochs + 1): scheduler.step() train(epoch, model, optimizer, train_loader, args.log_interval) validation(epoch, model, val_loader) model_file = 'model_' + str(epoch) + '.pth' torch.save(model.state_dict(), model_file) writer.close()
return z # upload mode; if upload mode is 'image' then only images will be uploaded # if upload mode is 'caption' then captions will be created and uploaded upload_mode = sys.argv[1] selected_model = sys.argv[4] device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # load encoder try: encoder = CNNModel(pretrained=True, path=f'{DATA_PATH}/vgg16.hdf5') except: encoder = CNNModel(pretrained=True) encoder.to(device) # load the trained caption model with open(f"{DATA_PATH}/results/model_info.json", 'r') as f: model_info = json.load(f) try: caption_model = torch.load(f"{DATA_PATH}/results/{selected_model}.hdf5", map_location=device) except: caption_model = torch.load(f"{DATA_PATH}/results/final_model.hdf5", map_location=device) if upload_mode == "image": # get our data as an array from sys image_fullpath = sys.argv[2]
def main(): parser = argparse.ArgumentParser() # Required parameters parser.add_argument('--data_dir', default=None, type=str, required=True, help="The input data dir.") parser.add_argument('--model_type', default=None, type=str, required=True, help="Model type selected in [bert, xlnet, xlm, cnn, lstm]") parser.add_argument('--model_name_or_path', default='bert-base-uncased', type=str, help="Shortcut name is selected in [bert-base-uncased, ]") parser.add_argument('--task_name', default=None, type=str, required=True, help="The name of task is selected in [imdb, amazon]") parser.add_argument('--output_dir', default='../out', type=str, help="The output directory where the model predictions and checkpoints will be written.") # other parameters parser.add_argument("--cache_dir", default='../cache', type=str, help="Store the cache files.") parser.add_argument("--max_seq_length", default=256, type=int, help="The maximum total input sequence length after tokenization.") parser.add_argument("--batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.") parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay") parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.") parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm. Avoiding over-fitting.") parser.add_argument("--num_train_epochs", default=20, type=int, help="Total number of training epochs to perform.") parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.") parser.add_argument("--seed", default=42, type=int, help="Random seed for initializaiton.") parser.add_argument("--train", action='store_true', help="Whether to run training.") parser.add_argument("--eval", action='store_true', help="Whether to run eval on dev set.") parser.add_argument("--ckpt", default=-1, type=int, help="Which ckpt to load.") parser.add_argument("--from_scratch", action='store_true', help="Whether to train from scratch.") parser.add_argument("--train_type", default='normal', type=str, help="Train type is selected in [normal, rs].") args = parser.parse_args() if not os.path.exists(args.data_dir): raise ValueError("input data dir is not exist.") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") args.device = device logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO) logger.warning("model type: %s, task name: %s, device: %s, ", args.model_type, args.task_name, device) # set seed set_seed(args) # Prepare task if args.task_name not in processors: raise ValueError("Task not found: %s" % args.task_name) task_class = processors[args.task_name]() label_list = task_class.get_labels() num_labels = len(label_list) args.num_labels = num_labels # load model. # MODEL_CLASSES = { # 'bert': (BertConfig, BertForSequenceClassification, BertTokenizer), # # 'xlnet': (XLNetConfig, XLNetForSequenceClassification, XLNetTokenizer), # # 'xlm': (XLMConfig, XLMForSequenceClassification, XLMTokenizer), # } model = None tokenizer = BertTokenizer.from_pretrained(args.model_name_or_path, do_lower_case=True) args.vocab_size = tokenizer.vocab_size if args.model_type == 'bert': config = BertConfig.from_pretrained(args.model_name_or_path, num_labels=num_labels, finetuning_task=args.task_name) model = BertForSequenceClassification.from_pretrained(args.model_name_or_path, config=config) elif args.model_type == 'cnn': args.embed_size = 300 args.num_filters = 100 args.filter_sizes = (3,) model = CNNModel(n_vocab=args.vocab_size, embed_size=args.embed_size, num_classes=num_labels, num_filters=args.num_filters, filter_sizes=args.filter_sizes, device=args.device) elif args.model_type == 'lstm': args.embed_size = 300 args.hidden_size = 100 model = LSTMModel(n_vocab=args.vocab_size, embed_size=args.embed_size, num_classes=num_labels, hidden_size=args.hidden_size, device=args.device) elif args.model_type == 'char-cnn': args.alphabets = 'abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:\'"\\/|_@#$%^&*~`+-=<>()[]{}\n' args.num_features = len(args.alphabets) args.l0 = 1014 model = CharCNN(num_features=args.num_features, num_classes=args.num_labels) else: raise ValueError('model type is not found!') model.to(device) logger.info("Training/evaluation parameters %s", args) # Create output directory if needed if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) # Create cache directory if needed if not os.path.exists(args.cache_dir): os.makedirs(args.cache_dir) train_dataset = None if args.model_type != 'char-cnn': if args.train: train_dataset = load_and_cache_normal_example(args, tokenizer, evaluate=False) eval_dataset = load_and_cache_normal_example(args, tokenizer, evaluate=True) else: if args.train: train_dataset = load_and_cache_normal_char_example(args, args.alphabets, evaluate=False) eval_dataset = load_and_cache_normal_char_example(args, args.alphabets, evaluate=True) # Training if args.train: if args.from_scratch: # default False global_step, train_loss = normal_train(args, model, train_dataset, eval_dataset) else: if args.ckpt < 0: checkpoints = glob.glob( args.output_dir + '/normal_{}_{}_checkpoint-*'.format(args.task_name, args.model_type)) checkpoints.sort(key=lambda x: int(x.split('-')[-1])) checkpoint = checkpoints[-1] ckpt = int(checkpoint.split('-')[-1]) else: checkpoint = os.path.join(args.output_dir, 'normal_{}_{}_checkpoint-{}'.format(args.task_name, args.model_type, args.ckpt)) ckpt = args.ckpt model = load(args, checkpoint) print("Load model from {}".format(checkpoint)) global_step, train_loss = normal_train(args, model, train_dataset, eval_dataset, ckpt + 1) logger.info(" global_step = %s, average loss = %s", global_step, train_loss) # logger.info("Saving model checkpoint to %s", args.output_dir) # Save a trained model, configuration and tokenizer using `save_pretrained()`. # They can then be reloaded using `from_pretrained()` # model_to_save = model.module if hasattr(model, 'module') else model # Take care of distributed/parallel training # if args.model_type == 'bert': # model_to_save.save_pretrained(args.output_dir) # else: # torch.save({'state_dict': model_to_save.state_dict()}, os.path.join(args.output_dir, '{}_{}_normal_checkpoint.pth.tar'.format(args.task_name, args.model_type))) # tokenizer.save_pretrained(args.output_dir) # # Good practice: save your training arguments together with the trained model # torch.save(args, os.path.join(args.output_dir, '{}_{}_normal_training_args.bin'.format(args.task_name, args.model_type))) # save model in two ways, one is model_to_save.save_pretrained(output_dir), other is torch.save({'state_dict': # model.state_dict()}, output_file). loading way is different, BertForSequenceClassifition.from_pretrained( # output_dir), other is ckpt = torch.load('config.bin'); model = model_class.from_pretrained(ckpt); model.load_state_dict(state_dict) # Evaluation if args.eval: if args.ckpt < 0: checkpoints = glob.glob( args.output_dir + '/{}_{}_{}_checkpoint-*'.format(args.train_type, args.task_name, args.model_type)) checkpoints.sort(key=lambda x: int(x.split('-')[-1])) checkpoint = checkpoints[-1] else: checkpoint = os.path.join(args.output_dir, '{}_{}_{}_checkpoint-{}'.format(args.train_type, args.task_name, args.model_type, args.ckpt)) model = load(args, checkpoint) print("Evaluation result, load model from {}".format(checkpoint)) acc = evaluate(args, model, eval_dataset) print("acc={:.4f}".format(acc))
from model import CNNModel from evaluate import evaluate from data_helper import trainset, devset, testset, word_vectors, word2id, rel2id if __name__ == "__main__": train_loader = DataLoader(trainset, shuffle=True, batch_size=BATCH_SIZE) dev_loader = DataLoader(devset, shuffle=False, batch_size=BATCH_SIZE) test_loader = DataLoader(testset, shuffle=False, batch_size=BATCH_SIZE) model = CNNModel(torch.tensor(word_vectors, dtype=torch.float32), rel2id) optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) criterion = nn.CrossEntropyLoss() model.to(DEVICE) criterion.to(DEVICE) best_f1_micro = 0.0 waste_epoch = 0 for epoch in range(EPOCH): running_loss = 0.0 for i, data in enumerate(train_loader): model.train() tokens, pos1, pos2, label = data tokens = tokens.to(DEVICE) pos1 = pos1.to(DEVICE) pos2 = pos2.to(DEVICE) label = label.to(DEVICE)