def prediction_all(test_dir=FilePath.validation_data_path[0], train_file_dir=FilePath.data_path[0], prediction_file_path=None, config=None, temp_path=None): labels = get_all_labels(train_file_dir) test_text = Dataset.read_original_data((test_dir, FilePath.data_path[1]), mode="test") Dataset.save_splitted_file(Dataset.read_original_data( (train_file_dir, FilePath.data_path[1]), mode="test"), Dataset.read_one_hot_label( (train_file_dir, FilePath.label_path[1]), mode="test"), dir_path=train_file_dir) if temp_path is None: temp_path = root_dir() for label in labels: print(label) train_data = Dataset.read_splitted_train_file( os.path.join(train_file_dir, label + ".csv")) cls = TransformerClassifier(**config).train(train_data["data"], train_data["label"]) prob = cls.predict_prob(test_text["data"].values.tolist()) cls.save_prob_prediction_result(prob, label, temp_path) label_prediction_result = cls.prob_convert_to_label(prob) Dataset.save_label_prediction_result(label_prediction_result, label, temp_path) final = Dataset.merge_all_prediction_in_dir(temp_path, labels) save_result_in_dir(final, prediction_file_path)
def classifier_ensemble(prediction_paths, output_path): from functools import reduce preds = [ Dataset.read_one_hot_label(label_path=(i, ), mode="test").drop(columns=['']) for i in prediction_paths ] results = reduce(lambda a, b: a + b, preds) results = results > 0 labels = reverse_transform(results) pd.DataFrame(labels).to_csv(output_path, header=False, index=False, encoding="utf-8")