Esempio n. 1
0
    def test_csv_merge(self):
        X_train, y_train, X_test, y_test = get_dev_split(raw)
        X_test_ext = get_extended_X(X_test, raw)

        # Check join with tube.csv.
        taid = 'TA-00034'
        df = X_test_ext[X_test['tube_assembly_id'] == taid]
        ref_diams = raw['tube'][raw['tube']['tube_assembly_id'] == taid]
        nose.tools.assert_equal(len(df['diameter'].unique()), 1)
        nose.tools.assert_equal(len(ref_diams), 1)
        nose.tools.assert_equal(
            df['diameter'].unique()[0],
            ref_diams['diameter'].values[0])

        # Check join with specs.csv.
        taid = 'TA-00207'
        ref_specs = ['SP-0063', 'SP-0070', 'SP-0080']
        df = X_test_ext[X_test['tube_assembly_id'] == taid]
        assert len(df['specs']) >= 1
        for val in df['specs']:
            nose.tools.assert_equal(val, ref_specs)

        # Check join with bill_of_materials.csv.
        taid = 'TA-00249'
        ref_components = [
            ('C-1536', 2.0),
            ('C-1642', 1.0),
            ('C-1649', 1.0),
        ]
        df = X_test_ext[X_test['tube_assembly_id'] == taid]
        assert len(df['components']) >= 1
        for val in df['components']:
            nose.tools.assert_equal(val, ref_components)
Esempio n. 2
0
    def test_csv_merge(self):
        X_train, y_train, X_test, y_test = get_dev_split(raw)
        X_test_ext = get_extended_X(X_test, raw)

        # Check join with tube.csv.
        taid = 'TA-00034'
        df = X_test_ext[X_test['tube_assembly_id'] == taid]
        ref_diams = raw['tube'][raw['tube']['tube_assembly_id'] == taid]
        nose.tools.assert_equal(len(df['diameter'].unique()), 1)
        nose.tools.assert_equal(len(ref_diams), 1)
        nose.tools.assert_equal(df['diameter'].unique()[0],
                                ref_diams['diameter'].values[0])

        # Check join with specs.csv.
        taid = 'TA-00207'
        ref_specs = ['SP-0063', 'SP-0070', 'SP-0080']
        df = X_test_ext[X_test['tube_assembly_id'] == taid]
        assert len(df['specs']) >= 1
        for val in df['specs']:
            nose.tools.assert_equal(val, ref_specs)

        # Check join with bill_of_materials.csv.
        taid = 'TA-00249'
        ref_components = [
            ('C-1536', 2.0),
            ('C-1642', 1.0),
            ('C-1649', 1.0),
        ]
        df = X_test_ext[X_test['tube_assembly_id'] == taid]
        assert len(df['components']) >= 1
        for val in df['components']:
            nose.tools.assert_equal(val, ref_components)
Esempio n. 3
0
    def test_custom_features(self):
        X_train, y_train, X_test, y_test = get_dev_split(raw)
        X_train_ext = get_extended_X(X_train, raw)
        featurizer = CustomFeaturizer()
        featurizer.fit(X_train_ext)
        X_train_feats = featurizer.transform(X_train_ext)

        # Check adj_quantity feature.
        taid = 'TA-01916'
        df = X_train_feats[X_train['tube_assembly_id'] == taid]
        nose.tools.assert_equal(len(df), 2)
        nose.tools.assert_true(np.all(df.min_order_quantity == [1, 4]))
        nose.tools.assert_true(np.all(df.quantity == [1, 1]))
        nose.tools.assert_true(np.all(df.adj_quantity == [1, 4]))
Esempio n. 4
0
    def test_custom_features(self):
        X_train, y_train, X_test, y_test = get_dev_split(raw)
        X_train_ext = get_extended_X(X_train, raw)
        featurizer = CustomFeaturizer()
        featurizer.fit(X_train_ext)
        X_train_feats = featurizer.transform(X_train_ext)

        # Check adj_quantity feature.
        taid = 'TA-01916'
        df = X_train_feats[X_train['tube_assembly_id'] == taid]
        nose.tools.assert_equal(len(df), 2)
        nose.tools.assert_true(np.all(df.min_order_quantity == [1, 4]))
        nose.tools.assert_true(np.all(df.quantity == [1, 1]))
        nose.tools.assert_true(np.all(df.adj_quantity == [1, 4]))
Esempio n. 5
0
    def test_dev_split_no_overlap(self):
        X_train, y_train, X_test, y_test = get_dev_split(raw)

        # All instances assigned to either test or train.
        nose.tools.assert_equal(len(X_train), len(y_train))
        nose.tools.assert_equal(len(X_test), len(y_test))
        nose.tools.assert_equal(
            len(X_train) + len(X_test), len(raw['train_set']))

        # Test and train contain no `tube_assembly_id`s in common.
        train_ids = set(X_train['tube_assembly_id'])
        test_ids = set(X_test['tube_assembly_id'])
        nose.tools.assert_false(train_ids.intersection(test_ids))

        # Split is roughly 90% / 10%.
        test_frac = 1.0 * len(X_test) / len(raw['train_set'])
        print test_frac
        nose.tools.assert_almost_equals(test_frac, 0.1, delta=0.05)
Esempio n. 6
0
    def test_dev_split_no_overlap(self):
        X_train, y_train, X_test, y_test = get_dev_split(raw)

        # All instances assigned to either test or train.
        nose.tools.assert_equal(len(X_train), len(y_train))
        nose.tools.assert_equal(len(X_test), len(y_test))
        nose.tools.assert_equal(
            len(X_train) + len(X_test), len(raw['train_set']))

        # Test and train contain no `tube_assembly_id`s in common.
        train_ids = set(X_train['tube_assembly_id'])
        test_ids = set(X_test['tube_assembly_id'])
        nose.tools.assert_false(train_ids.intersection(test_ids))

        # Split is roughly 90% / 10%.
        test_frac = 1.0 * len(X_test) / len(raw['train_set'])
        print test_frac
        nose.tools.assert_almost_equals(test_frac, 0.1, delta=0.05)