예제 #1
0
def prediction_all(test_dir=FilePath.validation_data_path[0],
                   train_file_dir=FilePath.data_path[0],
                   prediction_file_path=None,
                   config=None,
                   temp_path=None):
    labels = get_all_labels(train_file_dir)
    test_text = Dataset.read_original_data((test_dir, FilePath.data_path[1]),
                                           mode="test")

    Dataset.save_splitted_file(Dataset.read_original_data(
        (train_file_dir, FilePath.data_path[1]), mode="test"),
                               Dataset.read_one_hot_label(
                                   (train_file_dir, FilePath.label_path[1]),
                                   mode="test"),
                               dir_path=train_file_dir)

    if temp_path is None:
        temp_path = root_dir()

    for label in labels:
        print(label)
        train_data = Dataset.read_splitted_train_file(
            os.path.join(train_file_dir, label + ".csv"))
        cls = TransformerClassifier(**config).train(train_data["data"],
                                                    train_data["label"])

        prob = cls.predict_prob(test_text["data"].values.tolist())
        cls.save_prob_prediction_result(prob, label, temp_path)

        label_prediction_result = cls.prob_convert_to_label(prob)
        Dataset.save_label_prediction_result(label_prediction_result, label,
                                             temp_path)

    final = Dataset.merge_all_prediction_in_dir(temp_path, labels)
    save_result_in_dir(final, prediction_file_path)
예제 #2
0
def classifier_ensemble(prediction_paths, output_path):
    from functools import reduce
    preds = [
        Dataset.read_one_hot_label(label_path=(i, ),
                                   mode="test").drop(columns=[''])
        for i in prediction_paths
    ]
    results = reduce(lambda a, b: a + b, preds)
    results = results > 0
    labels = reverse_transform(results)
    pd.DataFrame(labels).to_csv(output_path,
                                header=False,
                                index=False,
                                encoding="utf-8")