from sklearn.naive_bayes import MultinomialNB from sklearn.svm import LinearSVC from sklearn.metrics import confusion_matrix # In[43]: N_B = MultinomialNB() N_B.fit(train_x, train_y) pred_y = N_B.predict(test_x) print(confusion_matrix(test_y, pred_y)) # In[44]: sv = LinearSVC() sv.fit(train_x, train_y) pred = sv.predict(test_x) print(confusion_matrix(test_y, pred)) # In[45]: from sklearn.linear_model import LogisticRegression # In[46]: le = LogisticRegression() le.fit(train_x, train_y) pp = le.predict(test_x) print(confusion_matrix(test_y, pp)) # In[ ]:
from sklearn.metrics import confusion_matrix from sklearn.metrics import accuracy_score # # Logistics Regression Classification # The target variable(or output), y, can take only discrete values for given set of features(or inputs), X. # In[37]: #Logistic regression from sklearn.linear_model import LogisticRegression lr = LogisticRegression() lr.fit(x_train, y_train) y_pred = lr.predict(x_test) # In[38]: cm = confusion_matrix(y_pred, y_test) cm # In[39]: a_s = accuracy_score(y_pred, y_test) a_s # # Decision Tree Classification # Decision tree uses the tree representation to solve the problem in which each leaf node corresponds to a class label and attributes are represented on the internal node of the tree.
le = LabelEncoder() data['Education'] = le.fit_transform(data['Education']) le = LabelEncoder() data['Self_Employed'] = le.fit_transform(data['Self_Employed']) le = LabelEncoder() data['Credit_History'] = le.fit_transform(data['Credit_History']) le = LabelEncoder() data['Property_Area'] = le.fit_transform(data['Property_Area']) le = LabelEncoder() data['Dependents'] = le.fit_transform(data['Dependents']) print(data.head()) x = data.drop(['Loan_Status'], axis=1) y = data['Loan_Status'] sns.countplot(data['Loan_Status'], label='count') from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3) # print(x_train) from sklearn.linear_model import LogisticRegression le = LogisticRegression() le.fit(x_train, y_train) prediction = le.predict(x_test) print(prediction) print(accuracy_score(y_test, prediction)) # print(y_train.head()) from sklearn.tree import DecisionTreeClassifier dc = DecisionTreeClassifier() dc.fit(x_train, y_train) prediction = dc.predict(x_test) print(prediction) print(accuracy_score(y_test, prediction))
#dividing the data into dependent and independent variable y=df['expenses'] x=df.drop(['expenses'],axis=1) #splitting of data from sklearn.model_selection import train_test_split x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3) # we can scale town the data but since regression model can handle this no need to scale from sklearn.linear_model import LinearRegression le=LinearRegression() le.fit(x_train,y_train) #predict y_pred=le.predict(x_test) # finding residulas for various check residual=y_test-y_pred # to check accuracy of model score=le.score(x_test,y_test) #accuracy from sklearn.metrics import mean_squared_error from sklearn.metrics import r2_score acc=r2_score(y_test,y_pred) mse=mean_squared_error(y_test,y_pred) rmse=np.sqrt(mse) # model give 73.90% accuracy
#'nthread':6, #'tree_method':'gpu_hist', #'n_gpus':2, 'colsample_bytree': 0.7 } def evalerror(preds, dtrain): labels = dtrain.get_label() # return a pair metric_name, result y = [math.exp(x) - 1 for x in labels[labels > 0]] yhat = [math.exp(x) - 1 for x in preds[labels > 0]] ssquare = [math.pow((y[i] - yhat[i]) / y[i], 2) for i in range(len(y))] return 'rmpse', math.sqrt(np.mean(ssquare)) watchlist = [(dtrain, 'train_rmpse'), (dvalid, 'valid_rmpse')] clf = xgb.train(param, dtrain, 10000, watchlist, feval=evalerror, verbose_eval=100) dtest = xgb.DMatrix(test[test['Open'] == 1][features].values) test['Sales'] = 0 test.loc[test['Open'] == 1, 'Sales'] = [math.exp(x) - 1 for x in clf.predict(dtest)] print("-> Write submission file ... ") print(datetime.now(), datetime.now() - start) test[['Id', 'Sales']].to_csv("submission.csv", index=False)
def run(): hidden_layers = [35, 49, 4] batch_size = 219 dropout = 0.2 seq_len = 25 epochs_pre = [625, 115, 933] epochs_finetune = 197 window_size = 0 features = 8 series = read_csv('pollution.csv', header=0, index_col=0) raw_values = series.values # integer encode wind direction encoder = LabelEncoder() raw_values[:, 4] = encoder.fit_transform(raw_values[:, 4]) # transform data to be stationary diff = difference(raw_values, 1) dataset = diff.values dataset = create_dataset(dataset, features, window_size) # frame as supervised learning reframed = series_to_supervised(dataset, features, seq_len, 1) drop = [ i for i in range(seq_len * features + 1, ((seq_len + 1) * features)) ] reframed.drop(reframed.columns[drop], axis=1, inplace=True) reframed = reframed.values # split into train and test sets train_size = 365 * 24 * 4 train, test = reframed[0:train_size], reframed[train_size:] # transform the scale of the data scaler, train_scaled, test_scaled = scale(train, test) # split into input and outputs x_train, y_train = train_scaled[:, 0:-1], train_scaled[:, -1] x_test, y_test = test_scaled[:, 0:-1], test_scaled[:, -1] # reshape input to be 3D [samples, timesteps, features] x_train = x_train.reshape(x_train.shape[0], seq_len, features) x_test = x_test.reshape(x_test.shape[0], seq_len, features) print(x_train.shape, y_train.shape, x_test.shape, y_test.shape) print('\nstart pretraining') print('===============') timesteps = x_train.shape[1] input_dim = x_train.shape[2] trained_encoder = [] x_train_temp = x_train i = 1 for hidden, epochs in zip(hidden_layers, epochs_pre): print('pretrain Autoencoder: {} ----> Encoder: {} ----> Epochs: {}'. format(i, hidden, epochs)) print(x_train_temp.shape) print('=============================================================') inputs = Input(batch_shape=(batch_size, timesteps, x_train_temp.shape[2])) encoded = CuDNNLSTM(hidden, batch_input_shape=(batch_size, timesteps, x_train_temp.shape[2]), stateful=False)(inputs) decoded = RepeatVector(timesteps)(encoded) decoded = CuDNNLSTM(input_dim, stateful=False, return_sequences=True)(decoded) AE = Model(inputs, decoded) encoder = Model(inputs, encoded) AE.compile(loss='mean_squared_error', optimizer='Adam') encoder.compile(loss='mean_squared_error', optimizer='Adam') AE.fit(x_train_temp, x_train, epochs=epochs, batch_size=batch_size, shuffle=True, verbose=1) # store trained encoder and its weights trained_encoder.append((AE.layers[1], AE.layers[1].get_weights())) # update training data x_train_temp = encoder.predict(x_train_temp, batch_size=batch_size) # reshape encoded input to 3D inputs = Input(shape=(x_train_temp.shape[1], )) reshape = RepeatVector(timesteps)(inputs) Repeat = Model(inputs, reshape) x_train_temp = Repeat.predict(x_train_temp, batch_size=batch_size) i = i + 1 # Fine-turning print('\nFine-turning') print('============') l = len(trained_encoder) #build finetuning model model = Sequential() for i, encod in enumerate(trained_encoder): model.add(encod[0]) model.layers[-1].set_weights(encod[1]) model.add(Dropout(dropout)) if (i + 1 != l): model.add(RepeatVector(timesteps)) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='Adam') model.fit(x_train, y_train, epochs=epochs_finetune, batch_size=batch_size, verbose=1, shuffle=True) # save trained model model.save('3layer_25.h5') # redefine the model in order to test with one sample at a time (batch_size = 1) new_model = Sequential() new_model.add( CuDNNLSTM(hidden_layers[0], batch_input_shape=(1, timesteps, input_dim), stateful=False)) for layer in model.layers[1:]: new_model.add(layer) # copy weights old_weights = model.get_weights() new_model.set_weights(old_weights) # forecast the entire training dataset to build up state for forecasting print('Forecasting Training Data') predictions_train = list() for i in range(len(y_train)): # make one-step forecast X = x_train[i] y = y_train[i] yhat = forecast_lstm(new_model, 1, X) # invert scaling yhat = invert_scale(scaler, X, yhat) # invert differencing yhat = inverse_difference(raw_values, yhat, len(raw_values) - i) # store forecast predictions_train.append(yhat) expected = raw_values[:, 0][i + 1] #print('Month=%d, Predicted=%f, Expected=%f' % (i+1, yhat, expected)) # report performance rmse_train = sqrt( mean_squared_error(raw_values[:, 0][1:len(train_scaled) + 1], predictions_train)) print('Train RMSE: %.5f' % rmse_train) # #report performance using RMSPE # RMSPE_train = RMSPE(raw_values[:,0][1:len(train_scaled)+1],predictions_train) # print('Train RMSPE: %.5f' % RMSPE_train) MAE_train = mean_absolute_error(raw_values[:, 0][1:len(train_scaled) + 1], predictions_train) print('Train MAE: %.5f' % MAE_train) # MAPE_train = MAPE(raw_values[:,0][1:len(train_scaled)+1], predictions_train) # print('Train MAPE: %.5f' % MAPE_train) SMAPE_train = smape(raw_values[:, 0][1:len(train_scaled) + 1], predictions_train) print('Train SMAPE: %.5f' % SMAPE_train) # forecast the test data print('Forecasting Testing Data') predictions_test = list() for i in range(len(y_test)): # make one-step forecast X = x_test[i] y = y_test[i] yhat = forecast_lstm(new_model, 1, X) # invert scaling yhat = invert_scale(scaler, X, yhat) # invert differencing yhat = inverse_difference(raw_values, yhat, len(test_scaled) + 1 - i) # store forecast predictions_test.append(yhat) expected = raw_values[:, 0][len(train) + i + 1] #print('Month=%d, Predicted=%f, Expected=%f' % (i+1, yhat, expected)) # report performance using RMSE rmse_test = sqrt( mean_squared_error(raw_values[:, 0][-len(test_scaled):], predictions_test)) print('Test RMSE: %.5f' % rmse_test) #report performance using RMSPE # RMSPE_test = RMSPE(raw_values[:,0][-len(test_scaled):], predictions_test) # print('Test RMSPE: %.5f' % RMSPE_test) MAE_test = mean_absolute_error(raw_values[:, 0][-len(test_scaled):], predictions_test) print('Test MAE: %.5f' % MAE_test) # MAPE_test = MAPE(raw_values[:,0][-len(test_scaled):], predictions_test) # print('Test MAPE: %.5f' % MAPE_test) SMAPE_test = smape(raw_values[:, 0][-len(test_scaled):], predictions_test) print('Test SMAPE: %.5f' % SMAPE_test) #predictions = np.concatenate((predictions_train,predictions_test),axis=0) # line plot of observed vs predicted fig, ax = plt.subplots(1) ax.plot(raw_values[:, 0][-80:], 'mo-', label='original', linewidth=2) ax.plot(predictions_test[-80:], 'co-', label='predictions', linewidth=2) #ax.axvline(x=len(train_scaled)+1,color='k', linestyle='--') ax.legend(loc='upper right') ax.set_title( 'PM2.5 hourly concentration prediction from 28/12/2014 to 31/12/2014') ax.set_ylabel('PM2.5 concentration') plt.show()
decoded = Dense(16384,activation='tanh')(encoder_output) # 构建自编码模型 autoencoder = Model(inputs=input_img, outputs=decoded) # 构建编码模型 encoder = Model(inputs=input_img, outputs=encoder_output) #编译模型 autoencoder.compile(optimizer='adam',loss='mse') autoencoder.summary() # training autoencoder.fit(X_train, X_train, epochs=40, batch_size=256, shuffle=True) auto_trainX=encoder.predict(X_train) auto_testX = encoder.predict(X_test) print(auto_trainX.shape) print(auto_testX.shape) #使用决策树 print("Training---------- 决策树") clf = tree.DecisionTreeClassifier() clf.fit(auto_trainX,train_y) score = clf.score(auto_testX,test_y) print(score) #使用BP网络 print("Training---------- BP") bp_c = MLPClassifier(solver='lbfgs', alpha=0.00001, hidden_layer_sizes=(5, 5, 4), activation='relu') bp_c.fit(auto_trainX,train_y)
classification.fit(x_train, dummy_y, shuffle=False, epochs=30, batch_size=batch_size) scores = classification.evaluate(x_test, dummy_y_test) print("\nResult on evaluation set...\n%s: %.2f%%" % (classification.metrics_names[1], scores[1] * 100)) # build a model to project inputs on the latent space encoder = Model(x, z_mean) # display a 2D plot of the digit classes in the latent space x_test_encoded = encoder.predict(x_test, batch_size=batch_size) plt.figure(figsize=(6, 6)) plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test) plt.colorbar() plt.show() # build a digit generator that can sample from the learned distribution decoder_input = Input(shape=(latent_dim, )) _h_decoded = decoder_h(decoder_input) _x_decoded_mean = decoder_mean(_h_decoded) generator = Model(decoder_input, _x_decoded_mean) # display a 2D manifold of the digits n = 15 # figure with 15x15 digits digit_size = 28 figure = np.zeros((digit_size * n, digit_size * n))
activity_regularizer=regularizers.l1(10e-5))(input_dim) decoded = Dense(ncol, activation='sigmoid')(encoded) autoencoder = Model(inputs=input_dim, outputs=decoded) autoencoder.compile(optimizer='adam', loss='mse') history = autoencoder.fit(S_X_train, S_X_train, epochs=1000, batch_size=15, shuffle=True, validation_data=(S_X_test, S_X_test), verbose=0) # THE ENCODER TO EXTRACT THE REDUCED DIMENSION FROM THE ABOVE AUTOENCODER encoder = Model(inputs=input_dim, outputs=encoded) encoded_input = Input(shape=(encoding_dim, )) encoded_out = encoder.predict(S_X_test) encoded_out2 = encoder.predict(S_X_train) result = encoder.predict(Xs) #print shape encoded_out.shape, encoded_out2.shape # Plot all losses print(history.history.keys()) plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'validation'], loc='upper right')