train['labels'] = encode(train["labels"])
dev['labels'] = encode(dev["labels"])

dev_sentences = dev['text'].tolist()
dev_preds = np.zeros((len(dev), args["n_fold"]))

if args["evaluate_during_training"]:
    for i in range(args["n_fold"]):
        if os.path.exists(args['output_dir']) and os.path.isdir(
                args['output_dir']):
            shutil.rmtree(args['output_dir'])
        print("Started Fold {}".format(i))
        model = ClassificationModel(
            MODEL_TYPE,
            MODEL_NAME,
            args=args,
            use_cuda=torch.cuda.is_available()
        )  # You can set class weights by using the optional weight argument
        train_df, eval_df = train_test_split(train,
                                             test_size=0.1,
                                             random_state=SEED * i)
        model.train_model(train_df,
                          eval_df=eval_df,
                          macro_f1=macro_f1,
                          weighted_f1=weighted_f1,
                          accuracy=sklearn.metrics.accuracy_score)
        model = ClassificationModel(MODEL_TYPE,
                                    args["best_model_dir"],
                                    args=args,
                                    use_cuda=torch.cuda.is_available())
Exemple #2
0
train['labels'] = encode(train["labels"])
test['labels'] = encode(test["labels"])

test_sentences = test['text'].tolist()
test_preds = np.zeros((len(test), args["n_fold"]))

if args["evaluate_during_training"]:
    for i in range(args["n_fold"]):
        if os.path.exists(args['output_dir']) and os.path.isdir(
                args['output_dir']):
            shutil.rmtree(args['output_dir'])
        print("Started Fold {}".format(i))
        model = ClassificationModel(
            MODEL_TYPE,
            MODEL_NAME,
            args=args,
            use_cuda=torch.cuda.is_available()
        )  # You can set class weights by using the optional weight argument
        train_df, eval_df = train_test_split(train,
                                             test_size=0.1,
                                             random_state=SEED * i)
        model.train_model(train_df,
                          eval_df=eval_df,
                          macro_f1=macro_f1,
                          weighted_f1=weighted_f1,
                          accuracy=sklearn.metrics.accuracy_score)
        model = ClassificationModel(MODEL_TYPE,
                                    args["best_model_dir"],
                                    args=args,
                                    use_cuda=torch.cuda.is_available())