Ejemplo n.º 1
0
def main():
    observations = load_titanic()

    # Transform the data set, using keras_pandas
    categorical_vars = ['pclass', 'sex', 'survived']
    numerical_vars = [
        'age', 'siblings_spouses_aboard', 'parents_children_aboard', 'fare'
    ]
    text_vars = ['name']

    auto = Automater(categorical_vars=categorical_vars,
                     numerical_vars=numerical_vars,
                     text_vars=text_vars,
                     response_var='survived')
    X, y = auto.fit_transform(observations)

    # Start model with provided input nub
    x = auto.input_nub

    # Fill in your own hidden layers
    x = Dense(256)(x)
    x = Dense(256, activation='relu')(x)
    x = Dense(256)(x)

    # End model with provided output nub
    x = auto.output_nub(x)

    model = Model(inputs=auto.input_layers, outputs=x)
    model.compile(optimizer='Adam', loss=auto.loss, metrics=['accuracy'])

    # Train model
    model.fit(X, y, epochs=15, validation_split=.2)
Ejemplo n.º 2
0
    def test_whole(self):
        # Create datatype
        datatype = Boolean()

        # Load observations
        observations = lib.load_titanic()
        variable_name = 'survived'

        # Transform observations
        mapper = DataFrameMapper(
            [([variable_name], datatype.default_transformation_pipeline)],
            df_out=True)
        transformed_df = mapper.fit_transform(observations)

        # TODO Create network
        input_layer, input_nub = datatype.input_nub_generator(
            variable_name, transformed_df)
        output_nub = datatype.output_nub_generator(variable_name,
                                                   transformed_df)

        x = input_nub
        x = output_nub(x)

        model = Model(input_layer, x)
        model.compile(optimizer='adam', loss=datatype.output_suggested_loss())

        pass
Ejemplo n.º 3
0
    def test_empty_strings(self):
        data = lib.load_titanic()
        data = data[['name']]

        ev = EmbeddingVectorizer()

        ev.fit(data)
Ejemplo n.º 4
0
    def test_create_input_nub(self):
        data = lib.load_titanic()

        # One variable
        text_vars = ['name']
        auto = Automater(text_vars=text_vars)
        auto.fit(data)

        self.assertEqual(1, len(auto.input_layers))
Ejemplo n.º 5
0
def main():

    # Load data
    observations = lib.load_titanic()
    # observations = lib.load_lending_club(test_run=False)
    print('Observation columns: {}'.format(list(observations.columns)))
    print('Class balance:\n {}'.format(
        observations['survived'].value_counts()))

    # List out variable types
    numerical_vars = [
        'age', 'siblings_spouses_aboard', 'parents_children_aboard', 'fare'
    ]
    categorical_vars = ['survived', 'pclass', 'sex']
    text_vars = ['name']

    for var in categorical_vars:
        observations[var] = observations[var].astype(str)

    train_observations, test_observations = train_test_split(observations)
    train_observations = train_observations.copy()
    test_observations = test_observations.copy()

    # Create and fit Automater
    auto = Automater(numerical_vars=numerical_vars,
                     categorical_vars=categorical_vars,
                     text_vars=text_vars,
                     response_var='survived')
    auto.fit(train_observations)

    # Create and fit keras (deep learning) model
    # The auto.transform, auto.input_nub, auto.input_layers, and auto.loss are provided by keras-pandas, and
    # everything else is core Keras
    train_X, train_y = auto.transform(train_observations)
    test_X, test_y = auto.transform(test_observations)

    x = auto.input_nub
    x = Dense(32)(x)
    x = Dense(32, activation='relu')(x)
    x = Dense(32)(x)
    x = auto.output_nub(x)

    model = Model(inputs=auto.input_layers, outputs=x)
    model.compile(optimizer='Adam', loss=auto.loss, metrics=['accuracy'])

    model.fit(train_X, train_y)

    test_y_pred = model.predict(test_X)

    # Inverse transform model output, to get usable results and save all results
    test_observations[auto.response_var +
                      '_pred'] = auto.inverse_transform_output(test_y_pred)
    print('Predictions: {}'.format(test_observations[auto.response_var +
                                                     '_pred']))

    pass
Ejemplo n.º 6
0
    def test_mapper(self):
        data = lib.load_titanic()

        transformation_list = [(['name'],
                                [EmbeddingVectorizer(max_sequence_length=12)])]

        mapper = DataFrameMapper(transformation_list, df_out=True)

        mapper.fit(data)

        data_transformed = mapper.transform(data)

        assert_array_equal([2, 3, 4, 5, 1, 1, 1, 1, 1, 1, 1, 1],
                           data_transformed.values[0, :])
Ejemplo n.º 7
0
    def test_fit(self):
        data = lib.load_titanic()
        # One variable
        text_vars = ['name']

        auto = Automater(text_vars=text_vars)
        auto.fit(data)

        self.assertEqual(Automater, type(auto))
        self.assertEqual(text_vars, auto._user_provided_variables)
        self.assertTrue(auto.fitted)

        self.assertEqual([['name']],
                         list(
                             map(lambda x: x[0],
                                 auto.input_mapper.built_features)))
Ejemplo n.º 8
0
    def test_whole(self):
        # Create datatype
        datatype = Numerical()

        # Load observations
        observations = lib.load_titanic()

        # Transform observations
        mapper = DataFrameMapper(
            [(['fare'], datatype.default_transformation_pipeline)],
            df_out=True)
        transformed_df = mapper.fit_transform(observations)

        # Create network
        input_layer, input_nub = datatype.input_nub_generator(
            'fare', transformed_df)
        output_nub = datatype.output_nub_generator('fare', transformed_df)

        x = input_nub
        x = output_nub(x)

        model = Model(input_layer, x)
        model.compile(optimizer='adam', loss=datatype.output_suggested_loss())
Ejemplo n.º 9
0
    def test_whole(self):
        data = lib.load_titanic()

        msk = numpy.random.rand(len(data)) < 0.95
        data_train = data[msk]
        data_test = data[~msk]

        text_vars = ['name']
        categorical_vars = ['survived']

        # Create auto
        auto = Automater(text_vars=text_vars,
                         categorical_vars=categorical_vars,
                         response_var='survived')

        # Train auto
        auto.fit(data_train)
        X_train, y_train = auto.transform(data)

        # Create model

        x = auto.input_nub
        x = Dense(30, activation='relu')(x)
        x = auto.output_nub(x)

        model = Model(inputs=auto.input_layers, outputs=x)
        model.compile(optimizer='Adam', loss=auto.loss)

        # Train DL model
        model.fit(X_train, y_train)

        # Transform test set
        data_test = data_test.drop('survived', axis=1)
        X_test, y_test = auto.transform(data_test)
        model.predict(X_test)

        pass
Ejemplo n.º 10
0
    def test_whole(self):
        datatype = Text()

        # Load observations
        observations = lib.load_titanic()

        # Transform observations
        mapper = DataFrameMapper(
            [(['name'], datatype.default_transformation_pipeline),
             (['fare'], None)],
            df_out=True)
        transformed_df = mapper.fit_transform(observations)

        # Create network
        input_layer, input_nub = datatype.input_nub_generator(
            'name', transformed_df)
        output_nub = Dense(1)

        x = input_nub
        x = output_nub(x)

        model = Model(input_layer, x)
        model.compile(optimizer='adam', loss='mse')
        pass
Ejemplo n.º 11
0
def main():
    # TODO List out which components are supplied by Automater
    # In this example, we're utilizing X and y generated by the Automater, auto.input_nub, auto.input_layers,
    # auto.output_nub, and auto.suggest_loss

    save_results = False

    # TODO Load data
    observations = lib.load_titanic()
    print('Observation columns: {}'.format(list(observations.columns)))

    # TODO Train /test split
    train_observations, test_observations = train_test_split(observations)
    train_observations = train_observations.copy()
    test_observations = test_observations.copy()

    # TODO List out variable types

    data_type_dict = {'numerical': ['age', 'siblings_spouses_aboard', 'parents_children_aboard', 'fare'],
                      'categorical': ['survived', 'pclass', 'sex'],
                      'text': ['name'],
                      'timeseries': []
                      }
    output_var = 'survived'

    # Create and fit Automater
    auto = Automater(data_type_dict=data_type_dict, output_var=output_var)
    auto.fit(train_observations)

    # Transform data
    train_X, train_y = auto.fit_transform(train_observations)
    test_X, test_y = auto.transform(test_observations)

    # TODO Create and fit keras (deep learning) model.

    x = auto.input_nub
    x = Dense(32)(x)
    x = Dense(32)(x)
    x = auto.output_nub(x)

    model = Model(inputs=auto.input_layers, outputs=x)
    print(f'Suggested loss: {auto.suggest_loss()}\n\n')
    model.compile(optimizer='adam', loss=auto.suggest_loss(), metrics=['acc'])

    # model.fit(train_X, train_y)
    model.summary()

    print('\n\n' + '^' * 21)
    print(train_X)

    print('\n\n' + '^' * 21)
    print(train_y)
    model.fit(train_X, train_y, batch_size=32, epochs=1, validation_split=0.1)

    # Make model predictions and inverse transform model predictions, to get usable results
    pred_test_y = model.predict(test_X)
    auto.inverse_transform_output(pred_test_y)

    # Save all results
    if save_results:
        temp_dir = lib.get_temp_dir()
        model.save(os.path.join(temp_dir, 'model.h5py'))
        pickle.dump(train_X, open(os.path.join(temp_dir, 'train_X.pkl'), 'wb'))
        pickle.dump(train_y, open(os.path.join(temp_dir, 'train_y.pkl'), 'wb'))
        pickle.dump(test_X, open(os.path.join(temp_dir, 'test_X.pkl'), 'wb'))
        pickle.dump(test_y, open(os.path.join(temp_dir, 'test_y.pkl'), 'wb'))
        pickle.dump(pred_test_y, open(os.path.join(temp_dir, 'pred_test_y.pkl'), 'wb'))
Ejemplo n.º 12
0
from keras import Model
from keras.layers import Dense

from keras_pandas.Automater import Automater
from keras_pandas.lib import load_titanic

observations = load_titanic()

# Transform the data set, using keras_pandas
categorical_vars = ['pclass', 'sex', 'survived']
numerical_vars = [
    'age', 'siblings_spouses_aboard', 'parents_children_aboard', 'fare'
]
text_vars = ['name']

auto = Automater(categorical_vars=categorical_vars,
                 numerical_vars=numerical_vars,
                 text_vars=text_vars,
                 response_var='survived')
X, y = auto.fit_transform(observations)

# Start model with provided input nub
x = auto.input_nub

# Fill in your own hidden layers
x = Dense(32)(x)
x = Dense(32, activation='relu')(x)
x = Dense(32)(x)

# End model with provided output nub
x = auto.output_nub(x)