Esempio n. 1
0
def run(args):
    train_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        args.data + '/train', transform=data_transforms),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=16)

    val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        args.data + '/val', transform=validation_data_transforms),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=16)

    model = CNNModel()
    model = nn.DataParallel(model)
    model = model.to(args.device)

    if args.checkpoint is not None:
        model.load_state_dict(torch.load(args.checkpoint))

    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-3)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size)

    for epoch in range(1, args.epochs + 1):
        scheduler.step()
        train(epoch, model, optimizer, train_loader, args.log_interval)
        validation(epoch, model, val_loader)
        model_file = 'model_' + str(epoch) + '.pth'
        torch.save(model.state_dict(), model_file)
    writer.close()
Esempio n. 2
0
    return z


# upload mode; if upload mode is 'image' then only images will be uploaded
#              if upload mode is 'caption' then captions will be created and uploaded
upload_mode = sys.argv[1]

selected_model = sys.argv[4]

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# load encoder
try:
    encoder = CNNModel(pretrained=True, path=f'{DATA_PATH}/vgg16.hdf5')
except:
    encoder = CNNModel(pretrained=True)
encoder.to(device)

# load the trained caption model

with open(f"{DATA_PATH}/results/model_info.json", 'r') as f:
    model_info = json.load(f)
try:
    caption_model = torch.load(f"{DATA_PATH}/results/{selected_model}.hdf5",
                               map_location=device)
except:
    caption_model = torch.load(f"{DATA_PATH}/results/final_model.hdf5",
                               map_location=device)

if upload_mode == "image":
    # get our data as an array from sys
    image_fullpath = sys.argv[2]
def main():
    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument('--data_dir', default=None, type=str, required=True, help="The input data dir.")
    parser.add_argument('--model_type', default=None, type=str, required=True,
                        help="Model type selected in [bert, xlnet, xlm, cnn, lstm]")
    parser.add_argument('--model_name_or_path', default='bert-base-uncased', type=str,
                        help="Shortcut name is selected in [bert-base-uncased, ]")
    parser.add_argument('--task_name', default=None, type=str, required=True,
                        help="The name of task is selected in [imdb, amazon]")
    parser.add_argument('--output_dir', default='../out', type=str,
                        help="The output directory where the model predictions and checkpoints will be written.")
    # other parameters
    parser.add_argument("--cache_dir", default='../cache', type=str, help="Store the cache files.")
    parser.add_argument("--max_seq_length", default=256, type=int,
                        help="The maximum total input sequence length after tokenization.")
    parser.add_argument("--batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.")
    parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.")
    parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay")
    parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.")
    parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm. Avoiding over-fitting.")
    parser.add_argument("--num_train_epochs", default=20, type=int, help="Total number of training epochs to perform.")
    parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.")
    parser.add_argument("--seed", default=42, type=int, help="Random seed for initializaiton.")
    parser.add_argument("--train", action='store_true', help="Whether to run training.")
    parser.add_argument("--eval", action='store_true', help="Whether to run eval on dev set.")
    parser.add_argument("--ckpt", default=-1, type=int, help="Which ckpt to load.")
    parser.add_argument("--from_scratch", action='store_true', help="Whether to train from scratch.")
    parser.add_argument("--train_type", default='normal', type=str, help="Train type is selected in [normal, rs].")

    args = parser.parse_args()

    if not os.path.exists(args.data_dir):
        raise ValueError("input data dir is not exist.")

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    args.device = device

    logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                        datefmt='%m/%d/%Y %H:%M:%S',
                        level=logging.INFO)
    logger.warning("model type: %s, task name: %s, device: %s, ", args.model_type, args.task_name, device)

    # set seed
    set_seed(args)
    # Prepare task
    if args.task_name not in processors:
        raise ValueError("Task not found: %s" % args.task_name)

    task_class = processors[args.task_name]()
    label_list = task_class.get_labels()
    num_labels = len(label_list)
    args.num_labels = num_labels

    # load model.
    # MODEL_CLASSES = {
    #     'bert': (BertConfig, BertForSequenceClassification, BertTokenizer),
    #     # 'xlnet': (XLNetConfig, XLNetForSequenceClassification, XLNetTokenizer),
    #     # 'xlm': (XLMConfig, XLMForSequenceClassification, XLMTokenizer),
    # }
    model = None
    tokenizer = BertTokenizer.from_pretrained(args.model_name_or_path, do_lower_case=True)
    args.vocab_size = tokenizer.vocab_size
    if args.model_type == 'bert':
        config = BertConfig.from_pretrained(args.model_name_or_path, num_labels=num_labels,
                                            finetuning_task=args.task_name)
        model = BertForSequenceClassification.from_pretrained(args.model_name_or_path, config=config)
    elif args.model_type == 'cnn':
        args.embed_size = 300
        args.num_filters = 100
        args.filter_sizes = (3,)
        model = CNNModel(n_vocab=args.vocab_size, embed_size=args.embed_size, num_classes=num_labels,
                         num_filters=args.num_filters, filter_sizes=args.filter_sizes, device=args.device)
    elif args.model_type == 'lstm':
        args.embed_size = 300
        args.hidden_size = 100
        model = LSTMModel(n_vocab=args.vocab_size, embed_size=args.embed_size, num_classes=num_labels,
                          hidden_size=args.hidden_size, device=args.device)
    elif args.model_type == 'char-cnn':
        args.alphabets = 'abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:\'"\\/|_@#$%^&*~`+-=<>()[]{}\n'
        args.num_features = len(args.alphabets)
        args.l0 = 1014
        model = CharCNN(num_features=args.num_features, num_classes=args.num_labels)
    else:
        raise ValueError('model type is not found!')

    model.to(device)
    logger.info("Training/evaluation parameters %s", args)

    # Create output directory if needed
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    # Create cache directory if needed
    if not os.path.exists(args.cache_dir):
        os.makedirs(args.cache_dir)

    train_dataset = None
    if args.model_type != 'char-cnn':
        if args.train:
            train_dataset = load_and_cache_normal_example(args, tokenizer, evaluate=False)
        eval_dataset = load_and_cache_normal_example(args, tokenizer, evaluate=True)
    else:
        if args.train:
            train_dataset = load_and_cache_normal_char_example(args, args.alphabets, evaluate=False)
        eval_dataset = load_and_cache_normal_char_example(args, args.alphabets, evaluate=True)
    # Training
    if args.train:
        if args.from_scratch:  # default False
            global_step, train_loss = normal_train(args, model, train_dataset, eval_dataset)
        else:
            if args.ckpt < 0:
                checkpoints = glob.glob(
                    args.output_dir + '/normal_{}_{}_checkpoint-*'.format(args.task_name, args.model_type))
                checkpoints.sort(key=lambda x: int(x.split('-')[-1]))
                checkpoint = checkpoints[-1]
                ckpt = int(checkpoint.split('-')[-1])
            else:
                checkpoint = os.path.join(args.output_dir, 'normal_{}_{}_checkpoint-{}'.format(args.task_name, args.model_type, args.ckpt))
                ckpt = args.ckpt
            model = load(args, checkpoint)
            print("Load model from {}".format(checkpoint))
            global_step, train_loss = normal_train(args, model, train_dataset, eval_dataset, ckpt + 1)
        logger.info(" global_step = %s, average loss = %s", global_step, train_loss)

        # logger.info("Saving model checkpoint to %s", args.output_dir)
        # Save a trained model, configuration and tokenizer using `save_pretrained()`.
        # They can then be reloaded using `from_pretrained()`
        # model_to_save = model.module if hasattr(model, 'module') else model  # Take care of distributed/parallel training
        # if args.model_type == 'bert':
        #     model_to_save.save_pretrained(args.output_dir)
        # else:
        #     torch.save({'state_dict': model_to_save.state_dict()}, os.path.join(args.output_dir, '{}_{}_normal_checkpoint.pth.tar'.format(args.task_name, args.model_type)))
        # tokenizer.save_pretrained(args.output_dir)
        # # Good practice: save your training arguments together with the trained model
        # torch.save(args, os.path.join(args.output_dir, '{}_{}_normal_training_args.bin'.format(args.task_name, args.model_type)))

    # save model in two ways, one is model_to_save.save_pretrained(output_dir), other is torch.save({'state_dict':
    # model.state_dict()}, output_file). loading way is different, BertForSequenceClassifition.from_pretrained(
    # output_dir), other is ckpt = torch.load('config.bin'); model = model_class.from_pretrained(ckpt); model.load_state_dict(state_dict)

    # Evaluation
    if args.eval:
        if args.ckpt < 0:
            checkpoints = glob.glob(
                args.output_dir + '/{}_{}_{}_checkpoint-*'.format(args.train_type, args.task_name, args.model_type))
            checkpoints.sort(key=lambda x: int(x.split('-')[-1]))
            checkpoint = checkpoints[-1]
        else:
            checkpoint = os.path.join(args.output_dir, '{}_{}_{}_checkpoint-{}'.format(args.train_type, args.task_name, args.model_type, args.ckpt))
        model = load(args, checkpoint)
        print("Evaluation result, load model from {}".format(checkpoint))
        acc = evaluate(args, model, eval_dataset)
        print("acc={:.4f}".format(acc))
Esempio n. 4
0
from model import CNNModel
from evaluate import evaluate
from data_helper import trainset, devset, testset, word_vectors, word2id, rel2id


if __name__ == "__main__":

    train_loader = DataLoader(trainset, shuffle=True, batch_size=BATCH_SIZE)
    dev_loader = DataLoader(devset, shuffle=False, batch_size=BATCH_SIZE)
    test_loader = DataLoader(testset, shuffle=False, batch_size=BATCH_SIZE)

    model = CNNModel(torch.tensor(word_vectors, dtype=torch.float32), rel2id)
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    criterion = nn.CrossEntropyLoss()

    model.to(DEVICE)
    criterion.to(DEVICE)

    best_f1_micro = 0.0
    waste_epoch = 0
    for epoch in range(EPOCH):
        running_loss = 0.0
        for i, data in enumerate(train_loader):
            model.train()
            tokens, pos1, pos2, label = data

            tokens = tokens.to(DEVICE)
            pos1 = pos1.to(DEVICE)
            pos2 = pos2.to(DEVICE)
            label = label.to(DEVICE)