Exemple #1
0
def main():
    # if args.use_cuda is True:
    # use deterministic algorithm for cnn
    # torch.backends.cudnn.deterministic = True
    args.kernel_sizes = [int(k) for k in args.kernel_sizes.split(',')]
    # save file
    mulu = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    args.mulu = mulu
    args.save_dir = os.path.join(args.save_dir, mulu)
    if not os.path.isdir(args.save_dir):
        os.makedirs(args.save_dir)

    Temp_Test_Result = "./Temp_Test_Result"
    if os.path.exists(Temp_Test_Result):
        shutil.rmtree(Temp_Test_Result)
    if not os.path.isdir(Temp_Test_Result):
        os.makedirs(Temp_Test_Result)

    pretrained_text_field = data.Field(lower=True)
    pretrained_label_field = data.Field(sequential=False)
    build_Pretrained_vocab(pretrained_text_field,
                           pretrained_label_field,
                           path_file=args.word_Embedding_Path)
    # print(text_field.vocab.stoi)
    args.pretrained_text_field = pretrained_text_field
    args.pretrained_label_field = pretrained_label_field
    args.embed_num = len(pretrained_text_field.vocab)
    embed, pretrained_embed_dim = model_SumPooling_Pretrained.load_pretrain(
        file=args.word_Embedding_Path, args=args)
    args.embed = embed
    args.pretrained_embed_dim = pretrained_embed_dim
    cv_result = []
    # CV loop start
    for id in range(args.nfold):
        print("\nthe {} CV file".format(id))
        # build vocab and iterator
        text_field = data.Field(lower=True)
        label_field = data.Field(sequential=False)

        cv_spilit_file(args.train_path, args.nfold, test_id=id)
        train_iter, test_iter = load_data(text_field,
                                          label_field,
                                          path_file=args.train_path,
                                          device=args.gpu_device,
                                          repeat=False,
                                          shuffle=args.epochs_shuffle,
                                          sort=False)

        args.text_field = text_field
        args.class_num = len(label_field.vocab) - 1
        args.PaddingID = pretrained_text_field.vocab.stoi[
            pretrained_text_field.pad_token]
        print("embed_num : {}, class_num : {}".format(args.embed_num,
                                                      args.class_num))
        print("PaddingID {}".format(args.PaddingID))
        # pretrained word embedding
        # if args.word_Embedding:
        #     pretrain_embed = load_pretrained_emb_zeros(path=args.word_Embedding_Path,
        #                                                text_field_words_dict=text_field.vocab.itos,
        #                                                pad=text_field.pad_token)
        #     calculate_oov(path=args.word_Embedding_Path, text_field_words_dict=text_field.vocab.itos,
        #                   pad=text_field.pad_token)
        #     args.pretrained_weight = pretrain_embed

        # print params
        show_params()

        # load model and start train
        if args.CNN is True:
            print("loading SumPooling model.....")
            # model = model_CNN.CNN_Text(args)
            model = model_SumPooling_Pretrained.SumPooling(args)
            # for param in model.parameters():
            #     param.requires_grad = False
            shutil.copy("./models/model_SumPooling_Pretrained.py",
                        args.save_dir)
            print(model)
            if args.use_cuda is True:
                print("using cuda......")
                model = model.cuda()
            print("CNN training start......")
            if os.path.exists("./Test_Result.txt"):
                os.remove("./Test_Result.txt")
            model_count = train_CV.train(train_iter, test_iter, model, args)
        cv_result.append(calculate_result(id=id))

    print(cv_result)
    cv_mean = cal_mean(cv_result)
    print("The best result is {:.6f} ".format(cv_mean))
    file = open("./Temp_Test_Result/Final_Result.txt", "a")
    for index, value in enumerate(cv_result):
        stra = str(index + 1) + "   " + str(value)
        file.write(stra)
        file.write("\n")
    file.write("mean_value  " + str(cv_mean))
    file.close()
    shutil.copytree("./Temp_Test_Result/",
                    "./snapshot/" + mulu + "/Temp_Test_Result")
Exemple #2
0
def main():
    # if args.use_cuda is True:
    # use deterministic algorithm for cnn
    # torch.backends.cudnn.deterministic = True
    args.kernel_sizes = [int(k) for k in args.kernel_sizes.split(',')]
    # save file
    mulu = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    args.mulu = mulu
    args.save_dir = os.path.join(args.save_dir, mulu)
    if not os.path.isdir(args.save_dir):
        os.makedirs(args.save_dir)

    # build vocab and iterator
    text_field = data.Field(lower=True)
    label_field = data.Field(sequential=False)
    if args.SST_1 is True:
        print("loading sst-1 dataset......")
        train_iter, dev_iter, test_iter = load_SST_1(
            text_field,
            label_field,
            train_path=args.train_path,
            dev_path=args.dev_path,
            test_path=args.test_path,
            device=args.gpu_device,
            repeat=False,
            shuffle=args.epochs_shuffle,
            sort=False)
    if args.SST_2 is True:
        print("loading sst-2 dataset......")
        train_iter, dev_iter, test_iter = load_SST_2(
            text_field,
            label_field,
            train_path=args.train_path,
            dev_path=args.dev_path,
            test_path=args.test_path,
            device=args.gpu_device,
            repeat=False,
            shuffle=args.epochs_shuffle,
            sort=False)
    if args.TREC is True:
        print("loading TREC dataset......")
        train_iter, test_iter = load_TREC(text_field,
                                          label_field,
                                          train_path=args.train_path,
                                          test_path=args.test_path,
                                          device=args.gpu_device,
                                          repeat=False,
                                          shuffle=args.epochs_shuffle,
                                          sort=False)

    args.embed_num = len(text_field.vocab)
    args.class_num = len(label_field.vocab) - 1
    args.PaddingID = text_field.vocab.stoi[text_field.pad_token]
    print("embed_num : {}, class_num : {}".format(args.embed_num,
                                                  args.class_num))
    print("PaddingID {}".format(args.PaddingID))
    # pretrained word embedding
    if args.word_Embedding:
        pretrain_embed = load_pretrained_emb_zeros(
            path=args.word_Embedding_Path,
            text_field_words_dict=text_field.vocab.itos,
            pad=text_field.pad_token)
        calculate_oov(path=args.word_Embedding_Path,
                      text_field_words_dict=text_field.vocab.itos,
                      pad=text_field.pad_token)
        args.pretrained_weight = pretrain_embed

    # print params
    show_params()

    # load model and start train
    if args.CNN is True:
        print("loading CNN model.....")
        # model = model_CNN.CNN_Text(args)
        model = model_SumPooling.SumPooling(args)
        # for param in model.parameters():
        #     param.requires_grad = False
        shutil.copy("./models/model_CNN.py", args.save_dir)
        print(model)
        if args.use_cuda is True:
            print("using cuda......")
            model = model.cuda()
        print("CNN training start......")
        if os.path.exists("./Test_Result.txt"):
            os.remove("./Test_Result.txt")
        if args.SST_1 is True or args.SST_2 is True:
            model_count = train_ALL_CNN.train(train_iter, dev_iter, test_iter,
                                              model, args)
        if args.TREC is True:
            model_count = train_CV.train(train_iter, test_iter, model, args)

    # calculate the best result
    cal_result()
Exemple #3
0
def main():
    assert args.test_interval == args.dev_interval
    assert args.CV is False, "CrossFold Dataset, Please Run main_hyperparams_CV.py"
    # if args.use_cuda is True:
    # use deterministic algorithm for cnn
    # torch.backends.cudnn.deterministic = True
    # save file
    mulu = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    args.mulu = mulu
    args.save_dir = os.path.join(args.save_dir, mulu)
    if not os.path.isdir(args.save_dir):
        os.makedirs(args.save_dir)

    # build vocab and iterator
    text_field = data.Field(lower=True)
    label_field = data.Field(sequential=False)
    if args.IMDB is True:
        print("loading IMDB dataset......")
        train_iter, test_iter = load_IMDB(
            text_field,
            label_field,
            train_path=args.train_path,
            test_path=args.test_path,
            device=args.gpu_device,
            repeat=False,
            shuffle=args.epochs_shuffle,
            sort=False,
        )

    args.embed_num = len(text_field.vocab)
    args.class_num = len(label_field.vocab) - 1
    args.PaddingID = text_field.vocab.stoi[text_field.pad_token]
    print("embed_num : {}, class_num : {}".format(args.embed_num,
                                                  args.class_num))
    print("PaddingID {}".format(args.PaddingID))
    # pretrained word embedding
    if args.word_Embedding:
        pretrain_embed = load_pretrained_emb_zeros(
            path=args.word_Embedding_Path,
            text_field_words_dict=text_field.vocab.itos,
            pad=text_field.pad_token)
        calculate_oov(path=args.word_Embedding_Path,
                      text_field_words_dict=text_field.vocab.itos,
                      pad=text_field.pad_token)
        args.pretrained_weight = pretrain_embed

    # print params
    show_params()

    # load model and start train
    if args.SumPooling is True:
        print("loading SumPooling model.....")
        model = model_SumPooling.SumPooling(args)
        shutil.copy("./models/model_SumPooling.py", args.save_dir)
        print(model)
        if args.use_cuda is True:
            print("using cuda......")
            model = model.cuda()
        print("CNN training start......")
        if os.path.exists("./Test_Result.txt"):
            os.remove("./Test_Result.txt")
        if args.IMDB is True:
            model_count = train_CV.train(train_iter, test_iter, model, args)

    # calculate the best result
    cal_result()