def func(): ct = ColumnTransformer(([("OneHot", OneHotEncoder(), [3])]), remainder='passthrough') x_train, x_test, y_train, y_test = LoadUtil.load_data_sk( '50_Startups.csv', col_transformers=[ct]) model = LinearRegression() model = model.fit(x_train, y_train) y_pred = model.predict(x_test) PlotUtil.display_multiple_linear_result(y_test, y_pred, x_label='#', y_label='Profit') print('intercept {}'.format(model.intercept_)) print('coef {}'.format(model.coef_)) print('y_pred = {}'.format(y_pred)) print('MSE: {}'.format(metrics.mean_squared_error(y_test, y_pred))) print('RMSE: {}'.format(np.sqrt(metrics.mean_squared_error(y_test, y_pred)))) cdf = pd.DataFrame( model.coef_, ['state1', 'state2', 'R&D Spend', 'Administration', 'Marketing Spend'], columns=['Coefficients']) print(cdf)
def func(): x_train, x_test, y_train, y_test = LoadUtil.load_data_sk( 'studentscores.csv') model = LinearRegression() model = model.fit(x_train, y_train) y_pred = model.predict(x_test) PlotUtil.display_simple_linear_result(x_test, y_test, y_pred, 'Hour', 'Score')
def func(): (x_train, y_train), (x_test, y_test) = datasets.mnist.load_data() # X_train is 60000 rows of 28x28 values; we reshape it to 60000 x 784. RESHAPED = 784 # x_train = x_train.reshape(60000, RESHAPED) x_test = x_test.reshape(10000, RESHAPED) x_train = x_train.astype('float32') x_test = x_test.astype('float32') # Normalize inputs to be within in [0, 1]. x_train, x_test = x_train / 255.0, x_test / 255.0 print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # Labels have one-hot representation. y_train = tf.keras.utils.to_categorical(y_train, 10) y_test = tf.keras.utils.to_categorical(y_test, 10) # Build the model. model = tf.keras.models.Sequential() model.add( tf.keras.layers.Dense(128, input_shape=(RESHAPED, ), name='dense_layer', activation='relu')) model.add(tf.keras.layers.Dropout(0.3)) model.add( tf.keras.layers.Dense(128, name='dense_layer_2', activation='relu')) model.add(tf.keras.layers.Dropout(0.3)) model.add( tf.keras.layers.Dense(10, name='dense_layer_3', activation='softmax')) # Summary of the model. model.summary() # Compiling the model. If use RMSProp, we can converge faster, so epoch could be increased to 250 model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy']) # Training the model. history = model.fit(x_train, y_train, batch_size=128, epochs=50, verbose=1, validation_split=0.2) # Evaluating the model. test_loss, test_acc = model.evaluate(x_test, y_test) print('Test accuracy:', test_acc) PlotUtil.display_loss(history) tf.keras.callbacks.TensorBoard(log_dir='./logs')
def func(): x_train, x_test, y_train, y_test = LoadUtil.load_simple_linear_data_sk( 'studentscores.csv') PlotUtil.display_x_y(x_train, y_train, 'Hour', 'Score') W = sum(y_train * (x_train - np.mean(x_train))) / sum( (x_train - np.mean(x_train))**2) b = np.mean(y_train) - W * np.mean(x_train) print("The regression coefficients are ", W, b) y_pred = W * x_test + b PlotUtil.display_simple_linear_result(x_test, y_test, y_pred, 'Hour', 'Score')
def func(): dataset = pd.read_csv('../../resource/Social_Network_Ads.csv') x = dataset.iloc[:, [2, 3]].values y = dataset.iloc[:, 4].values x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0) sc = StandardScaler() x_train = sc.fit_transform(x_train) x_test = sc.transform(x_test) classifier = PolynomialLogisticRegression(5) classifier.fit(x_train, y_train) print(classifier.score(x_train, y_train)) print(classifier.score(x_test, y_test)) y_pred = classifier.predict(x_test) PlotUtil.display_confusion_matrix(y_test, y_pred) PlotUtil.display_decision_boundary(x_test, y_test, classifier)
def func(): train_df, test_df = LoadUtil.load_data_df_sk('Social_Network_Ads.csv') x_train_df = train_df.iloc[:, 1:4] y_train_df = train_df.iloc[:, 4] x_test_df = test_df.iloc[:, 1:4] y_test_df = test_df.iloc[:, 4] PlotUtil.pairplot(x_train_df, hue='Gender') feature_columns = [ fc.categorical_column_with_vocabulary_list('Gender', vocabulary_list=['Male', 'Female']), fc.numeric_column('Age', dtype=tf.float32, normalizer_fn=lambda x: (x / np.float32(100))), fc.numeric_column('EstimatedSalary', dtype=tf.float32, normalizer_fn=lambda x: (x / np.float32(100000)))] classifier = tf.estimator.LinearClassifier(feature_columns=feature_columns, n_classes=2, model_dir='logs/') train_input_fn = pandas_input_fn( x=x_train_df, y=y_train_df, num_epochs=None, shuffle=True) classifier.train(train_input_fn, steps=1000) test_input_fn = pandas_input_fn( x=x_test_df, y=y_test_df, num_epochs=1, shuffle=False) y_pred = list() probabilities = classifier.predict(input_fn=test_input_fn) for i in range(len(y_test_df.values)): ret = next(probabilities) classes = ret['classes'] y_pred.append(int(classes)) print('test: {} predict: {}'.format(y_test_df.values[i], classes)) eval_results = classifier.evaluate(input_fn=test_input_fn) for key, value in sorted(eval_results.items()): print('%s: %s' % (key, value)) PlotUtil.display_confusion_matrix(y_test_df.values, y_pred)
def func(): train_df, test_df = LoadUtil.load_data_df_sk('50_Startups.csv') x_train_df = train_df.iloc[:, :4] y_train_df = train_df.iloc[:, 4] x_test_df = test_df.iloc[:, :4] y_test_df = test_df.iloc[:, 4] PlotUtil.pairplot(x_train_df) feature_columns = [ fc.numeric_column('RnDSpend', dtype=tf.float32), fc.numeric_column('Administration', dtype=tf.float32), fc.numeric_column('MarketingSpend', dtype=tf.float32), fc.categorical_column_with_vocabulary_list( 'State', vocabulary_list=['New York', 'California', 'Florida']) ] train_input_fn = pandas_input_fn(x=x_train_df, y=y_train_df, num_epochs=None, shuffle=True) linear_est = tf.estimator.LinearRegressor(feature_columns=feature_columns, model_dir='logs/') linear_est.train(train_input_fn, steps=100) test_input_fn = pandas_input_fn(x=x_test_df, y=y_test_df, num_epochs=1, shuffle=False) predictions = linear_est.predict(test_input_fn) y_pred = list() for i in range(len(y_test_df.values)): predict = next(predictions)['predictions'] y_pred.append(predict) PlotUtil.display_multiple_linear_result(y_test_df.values, y_pred, x_label='#', y_label='Profit')
def func(): x_train, x_test, y_train, y_test = LoadUtil.load_data_sk( 'studentscores.csv') layer0 = tf.keras.layers.Dense(units=1, input_shape=[1]) model = tf.keras.Sequential([layer0]) model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Adam(0.5), metrics=['accuracy']) history = model.fit(x_train, y_train, epochs=100, verbose=False) weights = layer0.get_weights() print('weight: {} bias: {}'.format(weights[0], weights[1])) y_pred = model.predict(x_test) test_loss, test_acc = model.evaluate(x_test, y_test) print('Test accuracy: {}'.format(test_acc)) PlotUtil.display_simple_linear_result(x_test, y_test, y_pred, 'Hour', 'Score') PlotUtil.display_loss(history)
def func(): (x_train, y_train), (x_test, y_test) = datasets.mnist.load_data() # reshape x_train = x_train.reshape((60000, 28, 28, 1)) x_test = x_test.reshape((10000, 28, 28, 1)) # normalize x_train, x_test = x_train / 255.0, x_test / 255.0 # cast x_train = x_train.astype('float32') x_test = x_test.astype('float32') # convert class vectors to binary class matrices y_train = tf.keras.utils.to_categorical(y_train, 10) y_test = tf.keras.utils.to_categorical(y_test, 10) # initialize the optimizer and model model = build_model(input_shape=(28, 28, 1), classes=10) model.compile(loss="categorical_crossentropy", optimizer=tf.keras.optimizers.Adam(), metrics=["accuracy"]) model.summary() callbacks = [tf.keras.callbacks.TensorBoard(log_dir='./logs')] # fit history = model.fit(x_train, y_train, batch_size=128, epochs=20, verbose=1, validation_split=0.2, callbacks=callbacks) score = model.evaluate(x_test, y_test, verbose=1) print("\nTest score:", score[0]) print('Test accuracy:', score[1]) PlotUtil.display_loss(history)