Example #1
0
def main():
    args = parse_args()
    args.n_gpu = torch.cuda.device_count()
    set_seed(args)

    if args.model_choice == "large":
        args.per_gpu_train_batch_size = 1
        args.per_gpu_eval_batch_size = 2
        args.model_name_or_path = os.path.join(
            grg_dir, "pretrained_model/roberta-large")
    elif args.model_choice == "base":
        args.per_gpu_train_batch_size = 3
        args.per_gpu_eval_batch_size = 4
        args.model_name_or_path = os.path.join(
            grg_dir, "pretrained_model/roberta-base")
    else:
        raise ValueError

    args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)
    args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
    os.makedirs(args.output_dir, exist_ok=True)

    from baseline_cosmosqa_mask.model.model_mask_roberta_all.model_base_attn_debug import Roberta_Encoder as RobertaModel
    from baseline_cosmosqa_mask.model.model_mask_roberta_all.model_base_attn_debug import \
        TransformerForMultipleChoice_Fusion_Layer as TransformerForMultipleChoice

    config = RobertaConfig.from_pretrained(
        args.config_name if args.config_name else args.model_name_or_path,
        num_labels=4,
        finetuning_task=args.task_name)
    roberta_model = RobertaModel.from_pretrained(args.model_name_or_path)
    transformer_model = TransformerForMultipleChoice(config=config)

    train_dataset, dev_dataset = read_features(args)

    # Training
    if args.do_train:
        print("[TIME] --- time: {} ---, start train".format(
            time.ctime(time.time())))
        global_step, tr_loss, best_step = train(args, train_dataset,
                                                dev_dataset, roberta_model,
                                                transformer_model)
        logger.info(" global_step = %s, average loss = %s, best_step = %s",
                    global_step, tr_loss, best_step)
Example #2
0
def main():
    args = parse_args()
    args.n_gpu = torch.cuda.device_count()
    set_seed(args)

    if args.model_choice == "large":
        args.per_gpu_train_batch_size = 1
        args.per_gpu_eval_batch_size = 2
        args.model_name_or_path = os.path.join(
            grg_dir, "pretrained_model/roberta-large")
    elif args.model_choice == "base":
        args.per_gpu_train_batch_size = 3
        args.per_gpu_eval_batch_size = 4
        args.model_name_or_path = os.path.join(
            grg_dir, "pretrained_model/roberta-base")
    else:
        raise ValueError

    args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)
    args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
    os.makedirs(args.output_dir, exist_ok=True)

    config_class, model_class, tokenizer_class = RobertaConfig, RobertaForMultipleChoice, RobertaTokenizer
    config = config_class.from_pretrained(
        args.config_name if args.config_name else args.model_name_or_path,
        num_labels=4,
        finetuning_task=args.task_name)
    tokenizer = tokenizer_class.from_pretrained(
        args.model_name_or_path, do_lower_case=args.do_lower_case)
    model = model_class.from_pretrained(args.model_name_or_path)

    train_dataset, dev_dataset = read_features(args)

    # Training
    if args.do_train:
        print("[TIME] --- time: {} ---, start train".format(
            time.ctime(time.time())))
        global_step, tr_loss, best_step = train(args, train_dataset,
                                                dev_dataset, model)
        logger.info(" global_step = %s, average loss = %s, best_step = %s",
                    global_step, tr_loss, best_step)
Example #3
0
def main():
    args = parse_args()
    args.n_gpu = torch.cuda.device_count()
    set_seed(args)

    if args.model_choice == "large":
        args.per_gpu_train_batch_size = 1
        args.per_gpu_eval_batch_size = 2
        args.model_name_or_path = os.path.join(
            grg_dir, "pretrained_model/roberta-large")
    elif args.model_choice == "base":
        args.per_gpu_train_batch_size = 3
        args.per_gpu_eval_batch_size = 4
        args.model_name_or_path = os.path.join(
            grg_dir, "pretrained_model/roberta-base")
    else:
        raise ValueError

    args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)
    args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
    print("args.train_batch_size = ", args.train_batch_size)
    print("args.eval_batch_size = ", args.eval_batch_size)

    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    args_path = os.path.join(args.output_dir, "args.json")
    with open(args_path, "w", encoding="utf-8") as writer:
        json.dump(args.__dict__, writer, ensure_ascii=False, indent=4)

    if args.model_choice == "base":
        if args.bert_model_choice == "fusion_head":
            from baseline_cosmosqa_mask.model.model_mask_roberta_all.model_base_attn import \
                RobertaForMultipleChoice_Fusion_Head as RobertaForMultipleChoice
        elif args.bert_model_choice == "fusion_layer":
            from baseline_cosmosqa_mask.model.model_mask_roberta_all.model_base_attn import \
                RobertaForMultipleChoice_Fusion_Layer as RobertaForMultipleChoice
        elif args.bert_model_choice == "fusion_head_bert_attn":
            from baseline_cosmosqa_mask.model.model_mask_roberta_all.model_base_attn import \
                RobertaForMultipleChoice_Fusion_Head_Bert_Self_Attn as RobertaForMultipleChoice
        else:
            raise ValueError
    elif args.model_choice == "large":
        if args.bert_model_choice == "fusion_head":
            from baseline_cosmosqa_mask.model.model_mask_roberta_all.model_large_attn import \
                RobertaForMultipleChoice_Fusion_Head as RobertaForMultipleChoice
        elif args.bert_model_choice == "fusion_layer":
            from baseline_cosmosqa_mask.model.model_mask_roberta_all.model_large_attn import \
                RobertaForMultipleChoice_Fusion_Layer as RobertaForMultipleChoice
        elif args.bert_model_choice == "fusion_head_bert_attn":
            from baseline_cosmosqa_mask.model.model_mask_roberta_all.model_large_attn import \
                RobertaForMultipleChoice_Fusion_Head_Bert_Self_Attn as RobertaForMultipleChoice
        else:
            raise ValueError
    else:
        raise ValueError

    config_class, model_class, tokenizer_class = RobertaConfig, RobertaForMultipleChoice, RobertaTokenizer
    config = config_class.from_pretrained(
        args.config_name if args.config_name else args.model_name_or_path,
        num_labels=4,
        finetuning_task=args.task_name)
    tokenizer = tokenizer_class.from_pretrained(
        args.model_name_or_path, do_lower_case=args.do_lower_case)
    model = model_class.from_pretrained(args.model_name_or_path)

    train_dataset, dev_dataset = read_features(args)

    # Training
    if args.do_train:
        global_step, tr_loss, best_step = train(args, train_dataset,
                                                dev_dataset, model)
        logger.info(" global_step = %s, average loss = %s, best_step = %s",
                    global_step, tr_loss, best_step)
Example #4
0
def main():
    args = parse_args()
    args.n_gpu = torch.cuda.device_count()
    set_seed(args)

    if args.model_choice == "large":
        args.per_gpu_train_batch_size = 1
        args.per_gpu_eval_batch_size  = 2
        args.model_name_or_path = os.path.join(grg_dir, "pretrained_model/roberta-large")
    elif args.model_choice == "base":
        args.per_gpu_train_batch_size = 4
        args.per_gpu_eval_batch_size  = 4
        args.model_name_or_path = os.path.join(grg_dir, "pretrained_model/roberta-base")
    else:
        raise ValueError

    args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)
    args.eval_batch_size  = args.per_gpu_eval_batch_size  * max(1, args.n_gpu)

    if args.model_choice == "base":
        if args.bert_model_choice == "fusion_head":
            from baseline_cosmosqa_mask.model.model_mask_roberta_all.model_base_attn import \
                RobertaForMultipleChoice_Fusion_Head as RobertaForMultipleChoice
        elif args.bert_model_choice == "fusion_layer":
            from baseline_cosmosqa_mask.model.model_mask_roberta_all.model_base_attn import \
                RobertaForMultipleChoice_Fusion_Layer as RobertaForMultipleChoice
        elif args.bert_model_choice == "fusion_all":
            from baseline_cosmosqa_mask.model.model_mask_roberta_all.model_base_attn import \
                RobertaForMultipleChoice_Fusion_All as RobertaForMultipleChoice
        elif args.bert_model_choice == "fusion_head_bert_attn":
            from baseline_cosmosqa_mask.model.model_mask_roberta_all.model_base_attn import \
                RobertaForMultipleChoice_Fusion_Head_Bert_Self_Attn as RobertaForMultipleChoice
        else:
            raise ValueError
    elif args.model_choice == "large":
        if args.bert_model_choice == "fusion_head":
            from baseline_cosmosqa_mask.model.model_mask_roberta_all.model_large_attn import \
                RobertaForMultipleChoice_Fusion_Head as RobertaForMultipleChoice
        elif args.bert_model_choice == "fusion_layer":
            from baseline_cosmosqa_mask.model.model_mask_roberta_all.model_large_attn import \
                RobertaForMultipleChoice_Fusion_Layer as RobertaForMultipleChoice
        elif args.bert_model_choice == "fusion_all":
            from baseline_cosmosqa_mask.model.model_mask_roberta_all.model_large_attn import \
                RobertaForMultipleChoice_Fusion_All as RobertaForMultipleChoice
        elif args.bert_model_choice == "fusion_head_bert_attn":
            from baseline_cosmosqa_mask.model.model_mask_roberta_all.model_large_attn import \
                RobertaForMultipleChoice_Fusion_Head_Bert_Self_Attn as RobertaForMultipleChoice
        else:
            raise ValueError
    else:
        raise ValueError

    model_name = "checkpoint/best/output_base_lr_1e-5_bz_12_epoch_5_adamw_warmup_step_0_fusion_layer"
    args.model_name_or_path = os.path.join(gra_dir, model_name)
    config_class, model_class, tokenizer_class = RobertaConfig, RobertaForMultipleChoice, RobertaTokenizer
    config = config_class.from_pretrained(args.config_name if args.config_name else args.model_name_or_path,
                                          num_labels=4, finetuning_task=args.task_name)
    tokenizer = tokenizer_class.from_pretrained(args.model_name_or_path, do_lower_case=args.do_lower_case)
    model = model_class.from_pretrained(args.model_name_or_path)
    model.cuda()
    # multi-gpu training (should be after apex fp16 initialization)
    if args.n_gpu > 1:
        gpu_ids = list(range(args.n_gpu))
        model = torch.nn.DataParallel(model, device_ids=gpu_ids)

    train_dataset, dev_dataset = read_features(args)
    print("args.train_batch_size = ", args.train_batch_size)
    print("args.eval_batch_size = ",  args.eval_batch_size)
    print("args.learning_rate = ",    args.learning_rate)
    print("args.n_gpu = ",            args.n_gpu)

    if args.do_eval:
        print("[TIME] --- time: {} ---, start eval".format(time.ctime(time.time())))
        result = eval(args, model, dev_dataset)
        print("eval_acc: {}, eval_loss: {}".format(result["eval_acc"], result["eval_loss"]))
def main():
    args = parse_args()
    args.n_gpu = torch.cuda.device_count()
    set_seed(args)

    if args.model_choice == "large":
        args.per_gpu_train_batch_size = 1
        args.per_gpu_eval_batch_size = 2
        args.model_name_or_path = "../../pretrained_model/_roberta-large"
    elif args.model_choice == "base":
        args.per_gpu_train_batch_size = 3
        args.per_gpu_eval_batch_size = 4
        args.model_name_or_path = "../../pretrained_model/_roberta-base"
    else:
        raise ValueError

    args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)
    args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)

    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    if args.model_choice == "base":
        if args.bert_model_choice == "fusion_head":
            from baseline_cosmosqa_mask.model.model_mask_roberta_all.model_base_attn import \
                RobertaForMultipleChoice_Fusion_Head as RobertaForMultipleChoice
        elif args.bert_model_choice == "fusion_layer":
            from baseline_cosmosqa_mask.model.model_mask_roberta_all.model_base_attn import \
                RobertaForMultipleChoice_Fusion_Layer as RobertaForMultipleChoice
        elif args.bert_model_choice == "fusion_head_bert_attn":
            from baseline_cosmosqa_mask.model.model_mask_roberta_all.model_base_attn import \
                RobertaForMultipleChoice_Fusion_Head_Bert_Self_Attn as RobertaForMultipleChoice
        else:
            raise ValueError
    elif args.model_choice == "large":
        if args.bert_model_choice == "fusion_head":
            from baseline_cosmosqa_mask.model.model_mask_roberta_all.model_large_attn import \
                RobertaForMultipleChoice_Fusion_Head as RobertaForMultipleChoice
        elif args.bert_model_choice == "fusion_layer":
            from baseline_cosmosqa_mask.model.model_mask_roberta_all.model_large_attn import \
                RobertaForMultipleChoice_Fusion_Layer as RobertaForMultipleChoice
        elif args.bert_model_choice == "fusion_head_bert_attn":
            from baseline_cosmosqa_mask.model.model_mask_roberta_all.model_large_attn import \
                RobertaForMultipleChoice_Fusion_Head_Bert_Self_Attn as RobertaForMultipleChoice
        else:
            raise ValueError
    else:
        raise ValueError

    config_class, model_class, tokenizer_class = RobertaConfig, RobertaForMultipleChoice, RobertaTokenizer

    model_dir = "../../output_model/output_cosmosqa_mask/output_roberta_transformer_ensemble_attn/" \
        "output_base_lr_1e-5_bz_12_epoch_5_adamw_warmup_step_0_fusion_layer/"

    # model = model_class.from_pretrained(args.model_name_or_path)
    model = model_class.from_pretrained(model_dir)

    model.cuda()
    if args.n_gpu > 1:
        gpu_ids = list(range(args.n_gpu))
        model = torch.nn.DataParallel(model, device_ids=gpu_ids)
    model.eval()

    test_dataset = read_features_test(args)
    # train_dataset, dev_dataset = read_features(args)

    # Training
    if args.do_train:
        print("[TIME] --- time: {} ---, start predict".format(
            time.ctime(time.time())))
        # results = eval(args, model, dev_dataset, prefix="", test=False)
        # print("eval results = ", results)

        preds = predict(args, model, test_dataset, prefix="", test=False)
        df = pandas.DataFrame(preds, columns=["one"])
        df.to_csv("./prediction.lst",
                  columns=["one"],
                  index=False,
                  header=False)