Esempi in Python per get_dev_split

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: soln.dataset

Metodo/funzione: get_dev_split

Esempi su hotexamples.com: 6

get_dev_split in Python: 6 esempi trovati. Questi sono i migliori esempi reali in Python per soln.dataset.get_dev_split, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: tests.py Progetto: xign/kaggle-caterpillar

    def test_csv_merge(self):
        X_train, y_train, X_test, y_test = get_dev_split(raw)
        X_test_ext = get_extended_X(X_test, raw)

        # Check join with tube.csv.
        taid = 'TA-00034'
        df = X_test_ext[X_test['tube_assembly_id'] == taid]
        ref_diams = raw['tube'][raw['tube']['tube_assembly_id'] == taid]
        nose.tools.assert_equal(len(df['diameter'].unique()), 1)
        nose.tools.assert_equal(len(ref_diams), 1)
        nose.tools.assert_equal(
            df['diameter'].unique()[0],
            ref_diams['diameter'].values[0])

        # Check join with specs.csv.
        taid = 'TA-00207'
        ref_specs = ['SP-0063', 'SP-0070', 'SP-0080']
        df = X_test_ext[X_test['tube_assembly_id'] == taid]
        assert len(df['specs']) >= 1
        for val in df['specs']:
            nose.tools.assert_equal(val, ref_specs)

        # Check join with bill_of_materials.csv.
        taid = 'TA-00249'
        ref_components = [
            ('C-1536', 2.0),
            ('C-1642', 1.0),
            ('C-1649', 1.0),
        ]
        df = X_test_ext[X_test['tube_assembly_id'] == taid]
        assert len(df['components']) >= 1
        for val in df['components']:
            nose.tools.assert_equal(val, ref_components)

Esempio n. 2

Mostra file

File: tests.py Progetto: zhilangtaosha/kaggle-caterpillar

    def test_csv_merge(self):
        X_train, y_train, X_test, y_test = get_dev_split(raw)
        X_test_ext = get_extended_X(X_test, raw)

        # Check join with tube.csv.
        taid = 'TA-00034'
        df = X_test_ext[X_test['tube_assembly_id'] == taid]
        ref_diams = raw['tube'][raw['tube']['tube_assembly_id'] == taid]
        nose.tools.assert_equal(len(df['diameter'].unique()), 1)
        nose.tools.assert_equal(len(ref_diams), 1)
        nose.tools.assert_equal(df['diameter'].unique()[0],
                                ref_diams['diameter'].values[0])

        # Check join with specs.csv.
        taid = 'TA-00207'
        ref_specs = ['SP-0063', 'SP-0070', 'SP-0080']
        df = X_test_ext[X_test['tube_assembly_id'] == taid]
        assert len(df['specs']) >= 1
        for val in df['specs']:
            nose.tools.assert_equal(val, ref_specs)

        # Check join with bill_of_materials.csv.
        taid = 'TA-00249'
        ref_components = [
            ('C-1536', 2.0),
            ('C-1642', 1.0),
            ('C-1649', 1.0),
        ]
        df = X_test_ext[X_test['tube_assembly_id'] == taid]
        assert len(df['components']) >= 1
        for val in df['components']:
            nose.tools.assert_equal(val, ref_components)

Esempio n. 3

Mostra file

File: tests.py Progetto: xign/kaggle-caterpillar

    def test_custom_features(self):
        X_train, y_train, X_test, y_test = get_dev_split(raw)
        X_train_ext = get_extended_X(X_train, raw)
        featurizer = CustomFeaturizer()
        featurizer.fit(X_train_ext)
        X_train_feats = featurizer.transform(X_train_ext)

        # Check adj_quantity feature.
        taid = 'TA-01916'
        df = X_train_feats[X_train['tube_assembly_id'] == taid]
        nose.tools.assert_equal(len(df), 2)
        nose.tools.assert_true(np.all(df.min_order_quantity == [1, 4]))
        nose.tools.assert_true(np.all(df.quantity == [1, 1]))
        nose.tools.assert_true(np.all(df.adj_quantity == [1, 4]))

Esempio n. 4

Mostra file

File: tests.py Progetto: zhilangtaosha/kaggle-caterpillar

    def test_custom_features(self):
        X_train, y_train, X_test, y_test = get_dev_split(raw)
        X_train_ext = get_extended_X(X_train, raw)
        featurizer = CustomFeaturizer()
        featurizer.fit(X_train_ext)
        X_train_feats = featurizer.transform(X_train_ext)

        # Check adj_quantity feature.
        taid = 'TA-01916'
        df = X_train_feats[X_train['tube_assembly_id'] == taid]
        nose.tools.assert_equal(len(df), 2)
        nose.tools.assert_true(np.all(df.min_order_quantity == [1, 4]))
        nose.tools.assert_true(np.all(df.quantity == [1, 1]))
        nose.tools.assert_true(np.all(df.adj_quantity == [1, 4]))

Esempio n. 5

Mostra file

File: tests.py Progetto: xign/kaggle-caterpillar

    def test_dev_split_no_overlap(self):
        X_train, y_train, X_test, y_test = get_dev_split(raw)

        # All instances assigned to either test or train.
        nose.tools.assert_equal(len(X_train), len(y_train))
        nose.tools.assert_equal(len(X_test), len(y_test))
        nose.tools.assert_equal(
            len(X_train) + len(X_test), len(raw['train_set']))

        # Test and train contain no `tube_assembly_id`s in common.
        train_ids = set(X_train['tube_assembly_id'])
        test_ids = set(X_test['tube_assembly_id'])
        nose.tools.assert_false(train_ids.intersection(test_ids))

        # Split is roughly 90% / 10%.
        test_frac = 1.0 * len(X_test) / len(raw['train_set'])
        print test_frac
        nose.tools.assert_almost_equals(test_frac, 0.1, delta=0.05)

Esempio n. 6

Mostra file

File: tests.py Progetto: zhilangtaosha/kaggle-caterpillar

    def test_dev_split_no_overlap(self):
        X_train, y_train, X_test, y_test = get_dev_split(raw)

        # All instances assigned to either test or train.
        nose.tools.assert_equal(len(X_train), len(y_train))
        nose.tools.assert_equal(len(X_test), len(y_test))
        nose.tools.assert_equal(
            len(X_train) + len(X_test), len(raw['train_set']))

        # Test and train contain no `tube_assembly_id`s in common.
        train_ids = set(X_train['tube_assembly_id'])
        test_ids = set(X_test['tube_assembly_id'])
        nose.tools.assert_false(train_ids.intersection(test_ids))

        # Split is roughly 90% / 10%.
        test_frac = 1.0 * len(X_test) / len(raw['train_set'])
        print test_frac
        nose.tools.assert_almost_equals(test_frac, 0.1, delta=0.05)