def test_num_features(self):
        # TODO THIS TEST REDUNDANT
        configure_pipeline(self.__experiment_dir, 'data', min_df=1)
        feature_pipe = PipelineFeatures(self.__experiment_dir, 'train.pkl.bz2')

        X_text = [self.__text[['text1']], self.__text[['text2']]]
        feature_pipe.fit(X_text, self.__text['label'])
        X = feature_pipe.transform(X_text, self.__text['label'])

        self.assertEqual(X.shape[1], 70, 0)
    def test_features_dims_two_text_columns(self):
        configure_pipeline(self.__experiment_dir, 'data', min_df=1)
        feature_pipe = PipelineFeatures(self.__experiment_dir, 'train.pkl.bz2')
        X_text = [self.__text[['text1']], self.__text[['text2']]]
        feature_pipe.fit(X_text, self.__text['label'])
        X = feature_pipe.transform(X_text, self.__text['label'])
        n_rows = X.shape[0]
        n_cols = X.shape[1]

        self.assertEqual(n_rows, 10)
        self.assertEqual(n_cols, 70)
예제 #3
0
# ---- Text processing --- #

pt.configure_pipeline(experiment_path, data_path, spell=True, split_words=True, text_headers=['RECDESC', 'EXPDESC'],
                      stop_words=True, lemmatize=False, stemm=False)

pipe_text = pt.PipelineText(experiment_path, 'train.pkl.bz2')
pipe_text.fit_transform()

pipe_text = pt.PipelineText(experiment_path, 'valid.pkl.bz2')
pipe_text.transform()


# ---- Feature engineering --- #

pf.configure_pipeline(experiment_path, feature_set=['frequency_matrix'], num_features=0, idf=True,
                       feature_selection_type='Logistic', min_df=3, min_ngram=1, max_ngram=3)

pipe_features = pf.PipelineFeatures('exp_1', 'train.pkl.bz2')
pipe_features.fit_transform()

pipe_features = pf.PipelineFeatures('exp_1', 'valid.pkl.bz2')
pipe_features.transform()


pm.configure_pipeline(experiment_path)


# ---- MODEL 1 --- #
cls = MLFactory.factory(classifier_name)
pipe_ml = pm.MLTrainTest(experiment_path, 'train.pkl.bz2', classifier=cls)
pipe_ml.fit_transform()
    def config_location(self):
        configure_pipeline(self.__experiment_dir, 'data')
        config_path = os.path.join(self.__experiment_dir, 'features',
                                   'config.json')

        self.assertEqual(config_path, '/tests/exp/features/config.json')
 def test_config_file_non_empty(self):
     configure_pipeline(self.__experiment_dir, 'data')
     config_path = os.path.join(self.__experiment_dir, 'features',
                                'config.json')
     filesize = os.path.getsize(config_path)
     self.assertNotEqual(filesize, 0)