Ejemplo n.º 1
0
    def test_treefoo_duh(self):
        bundle, datasets, stations_df = bltu.make_basic_minimal_model()

        holdout_df = datasets['holdout_df']

        y_predictions, y_test, metrics = blc.run_model_predict(bundle,
                                                               holdout_df,
                                                               stations_df,
                                                               labeled=True)

        proportion_correct_labeled_true = bltu.get_basic_proportion_correct(
            y_test, y_predictions)

        # Again but unlabeled now.

        contracted_df = blc.contract_df(holdout_df)

        widened_df = blc.widen_df_with_other_cols(contracted_df, s.ALL_COLUMNS)

        y_predictions_from_widened, _, _ = blc.run_model_predict(bundle,
                                                                 widened_df,
                                                                 stations_df,
                                                                 labeled=False)

        proportion_correct_labeled_false = bltu.get_basic_proportion_correct(
            y_test, y_predictions_from_widened)

        # and assert, evaluation should be the same..
        assert proportion_correct_labeled_false == proportion_correct_labeled_true
        pass
Ejemplo n.º 2
0
    def test_basic(self):
        csvdata = '10/1/2015 00:00:02,W 26 St & 10 Ave,Subscriber,1973,1\n10/1/2015 00:00:02,E 39 St & 2 Ave,Subscriber,1990,1'

        df = blc.hydrate_and_widen(csvdata)
        bundle, datasets, stations_df = bltu.make_basic_minimal_model()
        y_predictions, _, _ = blc.run_model_predict(bundle,
                                                    df,
                                                    stations_df,
                                                    labeled=False)
        pass
Ejemplo n.º 3
0
    def test_assertion_happens_for_nan(self):
        _, datasets, stations_df = bltu.make_basic_minimal_model()
        df = pd.DataFrame({s.NEW_END_NEIGHBORHOOD: ['foo', 'nan']})

        feature_encoding_dict = {s.NEW_END_NEIGHBORHOOD: str}

        asserted = False
        try:
            _, label_encoders = pl.make_simple_df_from_raw(
                df, stations_df, feature_encoding_dict)
        except Exception:
            asserted = True

        assert asserted
Ejemplo n.º 4
0
    def test_treefoo_with_pure_input_data(self):
        # csvdata = 'starttime,start station name,usertype,birth year,gender\n10/1/2015 00:00:02,W 26 St & 10 Ave,Subscriber,1973,1\n10/1/2015 00:00:02,E 39 St & 2 Ave,Subscriber,1990,1'

        csvdata = '10/1/2015 00:00:02,W 26 St & 10 Ave,Subscriber,1973,1\n10/1/2015 00:00:02,E 39 St & 2 Ave,Subscriber,1990,1'

        df = blc.hydrate_csv_to_df(csvdata)

        bundle, datasets, stations_df = bltu.make_basic_minimal_model()

        widened_df = blc.widen_df_with_other_cols(df, s.ALL_COLUMNS)

        y_predictions, _, _ = blc.run_model_predict(bundle,
                                                    widened_df,
                                                    stations_df,
                                                    labeled=False)

        pass
Ejemplo n.º 5
0
    def test_basic(self):
        from nose.tools import set_trace
        set_trace()

        csvdata = '10/1/2015 00:00:02,E 39 St & 2 Ave,Subscriber,1990,1'

        df = blc.hydrate_and_widen(csvdata)

        bundle, datasets, stations_df = bltu.make_basic_minimal_model()

        _, X_test = blc.df_to_np_for_clf(bundle,
                                         df,
                                         stations_df,
                                         labeled=False)
        clf = bundle['clf']

        path = clf.decision_path(X_test)
Ejemplo n.º 6
0
    def test_predict_matches_proba(self):

        bundle, datasets, stations_df = bltu.make_basic_minimal_model()
        df = datasets['holdout_df']
        clf = bundle['clf']

        prepared = blc.predict_prepare(bundle, df, stations_df, labeled=True)

        y_topk1_outputs = blmu.get_sorted_predict_proba_predictions(
            prepared['y_predict_proba'], clf.classes_, k=1)

        y_predictions, y_test, metrics = blc.run_model_predict(bundle,
                                                               df,
                                                               stations_df,
                                                               labeled=True)

        l = [
            x[1] for x in sorted(metrics['rank_k_proba_scores'].items(),
                                 key=lambda x: x[0])
        ]
        assert all([l[i] <= l[i + 1] for i in range(len(l) - 1)])

        assert [x[0] for x in y_topk1_outputs] == list(y_predictions)