def transform(self, multilabel_dataset): features_train = get_instance_features_train(multilabel_dataset) selected_train = self.selector_train.transform(features_train) set_instance_features_train(multilabel_dataset, selected_train) self.filter_total_features_and_update_train(multilabel_dataset) features_test = get_instance_features_test(multilabel_dataset) selected_test = self.selector_test.transform(features_test) set_instance_features_test(multilabel_dataset, selected_test) self.filter_total_features_and_update_test(multilabel_dataset) return multilabel_dataset
def classify(self, multilabel_dataset): features = get_instance_features_test(multilabel_dataset) results = list(self.classifier.predict(features)) return results
def create_features_dataframe(multilabel_dataset): features = get_instance_features_test(multilabel_dataset) total_features = get_total_features_test(multilabel_dataset) feature_names = Stream(total_features).map(lambda elem: elem[0]).as_list() return DataFrame(features, columns=feature_names)
def test_get_total_features_test(self): features = get_instance_features_test(self.multilabel_dataset) assert type(features) == type( []), "total_features_test should be a list"