def main():
    logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                        datefmt='%m/%d/%Y %H:%M:%S',
                        level=logging.INFO)
    logger = logging.getLogger(__name__)
    args = get_args()
    print_args(args)

    device, n_gpu = initialization.init_cuda_from_args(args, logger=logger)
    initialization.init_seed(args, n_gpu=n_gpu, logger=logger)

    initialization.init_output_dir(args)
    initialization.save_args(args)

    classifier = simple_classifier(n_classes=args.n_classes, n_hidden=args.fc_dim)
    classifier = classifier.to(device)

    optimizer = SGD(classifier.parameters(), lr=0.001, momentum=0.9)
    runner = Runner(classifier=classifier,
                    optimizer=optimizer,
                    device=device,
                    rparams=RunnerParameters(
                        num_train_epochs=args.num_train_epochs,
                        train_batch_size=args.train_batch_size,
                        eval_batch_size=args.eval_batch_size,
                    ))

    # dataset
    train_dataset = torch.load(os.path.join(args.data_dir, "train.dataset"))
    eval_dataset = torch.load(os.path.join(args.data_dir, "dev.dataset"))
    if args.mnli:
        mm_eval_dataset = torch.load(os.path.join(args.data_dir, "mm_dev.dataset"))
    else:
        mm_eval_dataset = None

    # run training and validation
    to_save = runner.run_train_val(
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        mm_eval_set=mm_eval_dataset,
    )

    # save training state to output dir.
    torch.save(to_save, os.path.join(args.output_dir, "training.info"))
예제 #2
0
def main():
    logging.basicConfig(
        format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
        datefmt='%m/%d/%Y %H:%M:%S',
        level=logging.INFO)
    logger = logging.getLogger(__name__)
    args = get_args()
    print_args(args)
    device, n_gpu = initialization.init_cuda_from_args(args, logger=logger)
    initialization.init_seed(args, n_gpu=n_gpu, logger=logger)
    initialization.init_train_batch_size(args)
    initialization.init_output_dir(args)
    initialization.save_args(args)
    task = get_task(args.task_name, args.data_dir)
    use_cuda = False if args.no_cuda else True
    verbose = args.verbose

    config = {
        'dropout_prob': args.dropout_prob,
        'n_classes': args.n_classes,
        'fc_dim': args.fc_dim,
        'enc_dim': 512,
    }

    # load model
    print("loading Universal Sentence Encoder......")
    USE = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")

    classifier = SimpleClassifier(config)
    classifier = classifier.cuda() if not args.no_cuda else classifier

    # get train examples
    train_examples = task.get_train_examples()
    # calculate t_total
    t_total = initialization.get_opt_train_steps(len(train_examples), args)

    # build optimizer.
    optimizer = optim.SGD(classifier.parameters(), lr=0.001, momentum=0.9)

    # create running parameters
    r_params = RunnerParameters(
        local_rank=args.local_rank,
        n_gpu=n_gpu,
        learning_rate=5e-5,
        gradient_accumulation_steps=args.gradient_accumulation_steps,
        t_total=t_total,
        warmup_proportion=args.warmup_proportion,
        num_train_epochs=args.num_train_epochs,
        train_batch_size=args.train_batch_size,
        eval_batch_size=args.eval_batch_size,
        verbose=verbose)

    # create runner class for training and evaluation tasks.
    runner = GlueTaskClassifierRunner(encoder_model=USE,
                                      classifier_model=classifier,
                                      optimizer=optimizer,
                                      label_list=task.get_labels(),
                                      device=device,
                                      rparams=r_params)

    if args.do_train:
        runner.run_train_classifier(train_examples)

    if args.do_val:
        val_examples = task.get_dev_examples()
        results = runner.run_val(val_examples,
                                 task_name=task.name,
                                 verbose=verbose)

        df = pd.DataFrame(results["logits"])
        df.to_csv(os.path.join(args.output_dir, "val_preds.csv"),
                  header=False,
                  index=False)
        metrics_str = json.dumps(
            {
                "loss": results["loss"],
                "metrics": results["metrics"]
            }, indent=2)
        print(metrics_str)
        with open(os.path.join(args.output_dir, "val_metrics.json"), "w") as f:
            f.write(metrics_str)

        # HACK for MNLI-mismatched
        if task.name == "mnli":
            mm_val_example = MnliMismatchedProcessor().get_dev_examples(
                task.data_dir)
            mm_results = runner.run_val(mm_val_example,
                                        task_name=task.name,
                                        verbose=verbose)

            df = pd.DataFrame(results["logits"])
            df.to_csv(os.path.join(args.output_dir, "mm_val_preds.csv"),
                      header=False,
                      index=False)
            combined_metrics = {}
            for k, v in results["metrics"].items():
                combined_metrics[k] = v
            for k, v in mm_results["metrics"].items():
                combined_metrics["mm-" + k] = v
            combined_metrics_str = json.dumps(
                {
                    "loss": results["loss"],
                    "metrics": combined_metrics,
                },
                indent=2)
            print(combined_metrics_str)
            with open(os.path.join(args.output_dir, "val_metrics.json"),
                      "w") as f:
                f.write(combined_metrics_str)
예제 #3
0
def main():
    logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                        datefmt='%m/%d/%Y %H:%M:%S',
                        level=logging.INFO)
    logger = logging.getLogger(__name__)
    args = get_args()
    print_args(args)

    device, n_gpu = initialization.init_cuda_from_args(args, logger=logger)
    initialization.init_seed(args, n_gpu=n_gpu, logger=logger)

    initialization.init_output_dir(args)
    initialization.save_args(args)

    classifier = simple_classifier(n_classes=args.n_classes, n_hidden=args.fc_dim)
    classifier = classifier.to(device)

    optimizer = SGD(classifier.parameters(), lr=0.001, momentum=0.9)
    runner = InfersentRunner(classifier=classifier,
                             optimizer=optimizer,
                             device=device,
                             rparams=RunnerParameters(
                                 num_train_epochs=args.num_train_epochs,
                                 train_batch_size=args.train_batch_size,
                                 eval_batch_size=args.eval_batch_size,
                             ))

    # dataset
    train_datasets = []
    for i in range(TRAIN_SET_NUM_MAP[args.task_name]):
        train_dataset = torch.load(os.path.join(args.data_dir, "train-{}.dataset".format(i)))
        train_datasets.append(train_dataset)

    eval_dataset = torch.load(os.path.join(args.data_dir, "dev-0.dataset"))

    if args.mnli:
        mm_eval_dataset = torch.load(os.path.join(args.data_dir, "mm_dev-0.dataset"))
    else:
        mm_eval_dataset = None

    # run training and validation
    eval_info, state_dicts = runner.run_train_val_with_state_dict_returned(
        train_dataset=train_datasets,
        eval_dataset=eval_dataset,
        mm_eval_set=mm_eval_dataset,
    )

    # save training state to output dir.
    torch.save(eval_info, os.path.join(args.output_dir, "training.info"))

    # find highest validation results, load model state dict, and then run prediction @ test set.
    val_acc = []
    mm_val_acc = []
    if args.mnli:
        for item in eval_info:
            val_acc.append(item[0]['accuracy'])
            mm_val_acc.append(item[1]['accuracy'])
        idx = val_acc.index(max(val_acc))
        print("highest accuracy on validation is: {}, index = {}. \n"
              "mis-matched is: {} \n"
              "Load state dicts and run testing...".format(val_acc[idx], idx, mm_val_acc[idx]))
    else:
        for item in eval_info:
            val_acc.append(item['accuracy'])
        idx = val_acc.index(max(val_acc))
        print("highest accuracy on validation is: {}, index = {}. \n"
              "Load state dicts and run testing...".format(val_acc[idx], idx))

    torch.save(state_dicts[idx], os.path.join(args.output_dir, "state.p"))

    test_datasets = []
    for i in range(TEST_SET_NUM_MAP[args.task_name]):
        test_dataset = torch.load(os.path.join(args.data_dir, "test-{}.dataset".format(i)))
        test_datasets.append(test_dataset)

    runner.classifier.load_state_dict(torch.load(os.path.join(args.output_dir, "state.p")))
    logits = runner.run_test(test_datasets)

    df = pd.DataFrame(logits)
    df.to_csv(os.path.join(args.output_dir, "test_preds.csv"), header=False, index=False)
    # HACK for MNLI-mismatched
    if args.mnli:
        mm_test_dataset = torch.load(os.path.join(args.data_dir, "mm_test-0.dataset"))
        logits = runner.run_test([mm_test_dataset])
        df = pd.DataFrame(logits)
        df.to_csv(os.path.join(args.output_dir, "mm_test_preds.csv"), header=False, index=False)
예제 #4
0
def main():
    logging.basicConfig(
        format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
        datefmt='%m/%d/%Y %H:%M:%S',
        level=logging.INFO)
    logger = logging.getLogger(__name__)
    args = get_args()
    print_args(args)
    device, n_gpu = initialization.init_cuda_from_args(args, logger=logger)
    initialization.init_seed(args, n_gpu=n_gpu, logger=logger)
    initialization.init_train_batch_size(args)
    initialization.init_output_dir(args)
    initialization.save_args(args)
    task = get_task(args.task_name, args.data_dir)
    use_cuda = False if args.no_cuda else True
    verbose = args.verbose

    # model config
    config = {
        'word_emb_dim': args.word_emb_dim,
        'enc_lstm_dim': args.enc_lstm_dim,
        'n_enc_layers': args.n_enc_layers,
        'dpout_model': args.dpout_model,
        'dpout_fc': args.dpout_fc,
        'fc_dim': args.fc_dim,
        'bsize': args.batch_size,
        'n_classes': args.n_classes,
        'pool_type': args.pool_type,
        'nonlinear_fc': args.nonlinear_fc,
        'use_cuda': use_cuda,
        'version': args.model_version,
        'dropout_prob': args.dropout_prob,
    }

    # load model
    if verbose:
        print('loading model...')
    model = InferSent(config)
    model.load_state_dict(torch.load(args.model_path))
    model = model.cuda() if not args.no_cuda else model
    model.set_w2v_path(args.word_emb_path)
    model.build_vocab_k_words(K=args.k_freq_words, verbose=verbose)

    # load classifier
    classifier = SimpleClassifier(config)
    classifier = classifier.cuda() if not args.no_cuda else classifier

    # get train examples
    train_examples = task.get_train_examples()
    # calculate t_total
    t_total = initialization.get_opt_train_steps(len(train_examples), args)

    # build optimizer.
    optimizer = optim.SGD(classifier.parameters(), lr=0.001, momentum=0.9)

    # create running parameters
    r_params = RunnerParameters(
        local_rank=args.local_rank,
        n_gpu=n_gpu,
        learning_rate=5e-5,
        gradient_accumulation_steps=args.gradient_accumulation_steps,
        t_total=t_total,
        warmup_proportion=args.warmup_proportion,
        num_train_epochs=args.num_train_epochs,
        train_batch_size=args.train_batch_size,
        eval_batch_size=args.eval_batch_size,
        verbose=verbose)

    # create runner class for training and evaluation tasks.
    runner = GlueTaskClassifierRunner(encoder_model=model,
                                      classifier_model=classifier,
                                      optimizer=optimizer,
                                      label_list=task.get_labels(),
                                      device=device,
                                      rparams=r_params)

    if args.do_train:
        runner.run_train_classifier(train_examples)

    if args.do_val:
        val_examples = task.get_dev_examples()
        results = runner.run_val(val_examples,
                                 task_name=task.name,
                                 verbose=verbose)

        df = pd.DataFrame(results["logits"])
        df.to_csv(os.path.join(args.output_dir, "val_preds.csv"),
                  header=False,
                  index=False)
        metrics_str = json.dumps(
            {
                "loss": results["loss"],
                "metrics": results["metrics"]
            }, indent=2)
        print(metrics_str)
        with open(os.path.join(args.output_dir, "val_metrics.json"), "w") as f:
            f.write(metrics_str)

        # HACK for MNLI-mismatched
        if task.name == "mnli":
            mm_val_example = MnliMismatchedProcessor().get_dev_examples(
                task.data_dir)
            mm_results = runner.run_val(mm_val_example,
                                        task_name=task.name,
                                        verbose=verbose)

            df = pd.DataFrame(results["logits"])
            df.to_csv(os.path.join(args.output_dir, "mm_val_preds.csv"),
                      header=False,
                      index=False)
            combined_metrics = {}
            for k, v in results["metrics"].items():
                combined_metrics[k] = v
            for k, v in mm_results["metrics"].items():
                combined_metrics["mm-" + k] = v
            combined_metrics_str = json.dumps(
                {
                    "loss": results["loss"],
                    "metrics": combined_metrics,
                },
                indent=2)
            print(combined_metrics_str)
            with open(os.path.join(args.output_dir, "val_metrics.json"),
                      "w") as f:
                f.write(combined_metrics_str)