Exemple #1
0
def func():
    ct = ColumnTransformer(([("OneHot", OneHotEncoder(), [3])]),
                           remainder='passthrough')

    x_train, x_test, y_train, y_test = LoadUtil.load_data_sk(
        '50_Startups.csv', col_transformers=[ct])

    model = LinearRegression()
    model = model.fit(x_train, y_train)
    y_pred = model.predict(x_test)

    PlotUtil.display_multiple_linear_result(y_test,
                                            y_pred,
                                            x_label='#',
                                            y_label='Profit')

    print('intercept {}'.format(model.intercept_))
    print('coef {}'.format(model.coef_))
    print('y_pred = {}'.format(y_pred))
    print('MSE: {}'.format(metrics.mean_squared_error(y_test, y_pred)))
    print('RMSE: {}'.format(np.sqrt(metrics.mean_squared_error(y_test,
                                                               y_pred))))

    cdf = pd.DataFrame(
        model.coef_,
        ['state1', 'state2', 'R&D Spend', 'Administration', 'Marketing Spend'],
        columns=['Coefficients'])
    print(cdf)
Exemple #2
0
def func():
    x_train, x_test, y_train, y_test = LoadUtil.load_data_sk(
        'studentscores.csv')

    model = LinearRegression()
    model = model.fit(x_train, y_train)
    y_pred = model.predict(x_test)

    PlotUtil.display_simple_linear_result(x_test, y_test, y_pred, 'Hour',
                                          'Score')
Exemple #3
0
def func():
    (x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()
    # X_train is 60000 rows of 28x28 values; we reshape it to 60000 x 784.
    RESHAPED = 784
    #
    x_train = x_train.reshape(60000, RESHAPED)
    x_test = x_test.reshape(10000, RESHAPED)
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')

    # Normalize inputs to be within in [0, 1].
    x_train, x_test = x_train / 255.0, x_test / 255.0
    print(x_train.shape[0], 'train samples')
    print(x_test.shape[0], 'test samples')

    # Labels have one-hot representation.
    y_train = tf.keras.utils.to_categorical(y_train, 10)
    y_test = tf.keras.utils.to_categorical(y_test, 10)
    # Build the model.
    model = tf.keras.models.Sequential()
    model.add(
        tf.keras.layers.Dense(128,
                              input_shape=(RESHAPED, ),
                              name='dense_layer',
                              activation='relu'))
    model.add(tf.keras.layers.Dropout(0.3))
    model.add(
        tf.keras.layers.Dense(128, name='dense_layer_2', activation='relu'))
    model.add(tf.keras.layers.Dropout(0.3))
    model.add(
        tf.keras.layers.Dense(10, name='dense_layer_3', activation='softmax'))
    # Summary of the model.
    model.summary()
    # Compiling the model. If use RMSProp, we can converge faster, so epoch could be increased to 250
    model.compile(optimizer='SGD',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    # Training the model.
    history = model.fit(x_train,
                        y_train,
                        batch_size=128,
                        epochs=50,
                        verbose=1,
                        validation_split=0.2)
    # Evaluating the model.
    test_loss, test_acc = model.evaluate(x_test, y_test)
    print('Test accuracy:', test_acc)
    PlotUtil.display_loss(history)

    tf.keras.callbacks.TensorBoard(log_dir='./logs')
Exemple #4
0
def func():
    x_train, x_test, y_train, y_test = LoadUtil.load_simple_linear_data_sk(
        'studentscores.csv')

    PlotUtil.display_x_y(x_train, y_train, 'Hour', 'Score')

    W = sum(y_train * (x_train - np.mean(x_train))) / sum(
        (x_train - np.mean(x_train))**2)
    b = np.mean(y_train) - W * np.mean(x_train)
    print("The regression coefficients are ", W, b)

    y_pred = W * x_test + b

    PlotUtil.display_simple_linear_result(x_test, y_test, y_pred, 'Hour',
                                          'Score')
def func():
    dataset = pd.read_csv('../../resource/Social_Network_Ads.csv')
    x = dataset.iloc[:, [2, 3]].values
    y = dataset.iloc[:, 4].values
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0)

    sc = StandardScaler()
    x_train = sc.fit_transform(x_train)
    x_test = sc.transform(x_test)
    classifier = PolynomialLogisticRegression(5)
    classifier.fit(x_train, y_train)
    print(classifier.score(x_train, y_train))
    print(classifier.score(x_test, y_test))

    y_pred = classifier.predict(x_test)

    PlotUtil.display_confusion_matrix(y_test, y_pred)
    PlotUtil.display_decision_boundary(x_test, y_test, classifier)
def func():
    train_df, test_df = LoadUtil.load_data_df_sk('Social_Network_Ads.csv')
    x_train_df = train_df.iloc[:, 1:4]
    y_train_df = train_df.iloc[:, 4]
    x_test_df = test_df.iloc[:, 1:4]
    y_test_df = test_df.iloc[:, 4]

    PlotUtil.pairplot(x_train_df, hue='Gender')

    feature_columns = [
        fc.categorical_column_with_vocabulary_list('Gender', vocabulary_list=['Male', 'Female']),
        fc.numeric_column('Age', dtype=tf.float32, normalizer_fn=lambda x: (x / np.float32(100))),
        fc.numeric_column('EstimatedSalary', dtype=tf.float32, normalizer_fn=lambda x: (x / np.float32(100000)))]

    classifier = tf.estimator.LinearClassifier(feature_columns=feature_columns, n_classes=2, model_dir='logs/')

    train_input_fn = pandas_input_fn(
        x=x_train_df,
        y=y_train_df,
        num_epochs=None,
        shuffle=True)

    classifier.train(train_input_fn, steps=1000)

    test_input_fn = pandas_input_fn(
        x=x_test_df,
        y=y_test_df,
        num_epochs=1,
        shuffle=False)

    y_pred = list()
    probabilities = classifier.predict(input_fn=test_input_fn)
    for i in range(len(y_test_df.values)):
        ret = next(probabilities)
        classes = ret['classes']
        y_pred.append(int(classes))
        print('test: {}  predict: {}'.format(y_test_df.values[i], classes))

    eval_results = classifier.evaluate(input_fn=test_input_fn)
    for key, value in sorted(eval_results.items()):
        print('%s: %s' % (key, value))

    PlotUtil.display_confusion_matrix(y_test_df.values, y_pred)
def func():
    train_df, test_df = LoadUtil.load_data_df_sk('50_Startups.csv')
    x_train_df = train_df.iloc[:, :4]
    y_train_df = train_df.iloc[:, 4]
    x_test_df = test_df.iloc[:, :4]
    y_test_df = test_df.iloc[:, 4]

    PlotUtil.pairplot(x_train_df)

    feature_columns = [
        fc.numeric_column('RnDSpend', dtype=tf.float32),
        fc.numeric_column('Administration', dtype=tf.float32),
        fc.numeric_column('MarketingSpend', dtype=tf.float32),
        fc.categorical_column_with_vocabulary_list(
            'State', vocabulary_list=['New York', 'California', 'Florida'])
    ]

    train_input_fn = pandas_input_fn(x=x_train_df,
                                     y=y_train_df,
                                     num_epochs=None,
                                     shuffle=True)

    linear_est = tf.estimator.LinearRegressor(feature_columns=feature_columns,
                                              model_dir='logs/')
    linear_est.train(train_input_fn, steps=100)

    test_input_fn = pandas_input_fn(x=x_test_df,
                                    y=y_test_df,
                                    num_epochs=1,
                                    shuffle=False)
    predictions = linear_est.predict(test_input_fn)

    y_pred = list()
    for i in range(len(y_test_df.values)):
        predict = next(predictions)['predictions']
        y_pred.append(predict)

    PlotUtil.display_multiple_linear_result(y_test_df.values,
                                            y_pred,
                                            x_label='#',
                                            y_label='Profit')
Exemple #8
0
def func():
    x_train, x_test, y_train, y_test = LoadUtil.load_data_sk(
        'studentscores.csv')

    layer0 = tf.keras.layers.Dense(units=1, input_shape=[1])
    model = tf.keras.Sequential([layer0])

    model.compile(loss='mean_squared_error',
                  optimizer=tf.keras.optimizers.Adam(0.5),
                  metrics=['accuracy'])
    history = model.fit(x_train, y_train, epochs=100, verbose=False)

    weights = layer0.get_weights()
    print('weight: {} bias: {}'.format(weights[0], weights[1]))

    y_pred = model.predict(x_test)
    test_loss, test_acc = model.evaluate(x_test, y_test)
    print('Test accuracy: {}'.format(test_acc))

    PlotUtil.display_simple_linear_result(x_test, y_test, y_pred, 'Hour',
                                          'Score')
    PlotUtil.display_loss(history)
Exemple #9
0
def func():
    (x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()
    # reshape
    x_train = x_train.reshape((60000, 28, 28, 1))
    x_test = x_test.reshape((10000, 28, 28, 1))
    # normalize
    x_train, x_test = x_train / 255.0, x_test / 255.0
    # cast
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')

    # convert class vectors to binary class matrices
    y_train = tf.keras.utils.to_categorical(y_train, 10)
    y_test = tf.keras.utils.to_categorical(y_test, 10)

    # initialize the optimizer and model
    model = build_model(input_shape=(28, 28, 1), classes=10)
    model.compile(loss="categorical_crossentropy",
                  optimizer=tf.keras.optimizers.Adam(),
                  metrics=["accuracy"])
    model.summary()

    callbacks = [tf.keras.callbacks.TensorBoard(log_dir='./logs')]

    # fit
    history = model.fit(x_train,
                        y_train,
                        batch_size=128,
                        epochs=20,
                        verbose=1,
                        validation_split=0.2,
                        callbacks=callbacks)
    score = model.evaluate(x_test, y_test, verbose=1)
    print("\nTest score:", score[0])
    print('Test accuracy:', score[1])
    PlotUtil.display_loss(history)