class FeatureExtractorClassifier(object): """ Difference with the FeatureExtractorClassifier from ramp-workflow: `test_submission` wraps the y_proba in a DataFrame with the original index. """ def __init__(self): self.element_names = ['feature_extractor', 'classifier'] self.feature_extractor_workflow = FeatureExtractor( [self.element_names[0]]) self.classifier_workflow = Classifier([self.element_names[1]]) def train_submission(self, module_path, X_df, y_array, train_is=None): if train_is is None: train_is = slice(None, None, None) fe = self.feature_extractor_workflow.train_submission( module_path, X_df, y_array, train_is) X_train_array = self.feature_extractor_workflow.test_submission( fe, X_df.iloc[train_is]) clf = self.classifier_workflow.train_submission( module_path, X_train_array, y_array[train_is]) return fe, clf def test_submission(self, trained_model, X_df): fe, clf = trained_model X_test_array = self.feature_extractor_workflow.test_submission( fe, X_df) y_proba = self.classifier_workflow.test_submission(clf, X_test_array) arr = X_df.index.values.astype('datetime64[m]').astype(int) y = np.hstack((arr[:, np.newaxis], y_proba)) return y
def __init__(self): self.element_names = ['feature_extractor', 'classifier'] self.feature_extractor_workflow = FeatureExtractor( [self.element_names[0]]) self.classifier_workflow = Classifier([self.element_names[1]])