def predictResult(x_train, y_train, y_test, x_test): data2 = pd.read_csv("/tmp/predict_result.csv", header=0) # vamos percorrer o arquivo com o valor a ser testado, onde vamos pegar as colunas e jogar os valores numa array cols2 = data2.columns[(data2.columns != columnResultName)] fts2 = data2[cols2] fts2 = Normalizer().fit_transform(fts2) randomForest.fit(x_train, y_train) dump(randomForest, 'randomForest.model') randomForestLoaded = load('randomForest.model') prFit = randomForestLoaded.predict(x_test) print("predicao:", prFit) print("Matriz de Confusao LR:") print(cfm(y_test, prFit)) print("F1 score LR:") print(f1s(y_test, prFit)) print("Precision score LR:") print(ps(y_test, prFit)) print("Recall score LR:") print(rs(y_test, prFit)) print("Classification Report") print(cr(y_test, prFit)) pr1 = randomForestLoaded.predict(fts2) print("predico unica", pr1) return pr1
def predictResult(betterN, x_train, y_train, y_test, x_test): data2 = pd.read_csv("/tmp/predict_result.csv", header=0) # vamos percorrer o arquivo com o valor a ser testado, onde vamos pegar as colunas e jogar os valores numa array cols2 = data2.columns[(data2.columns != columnResultName)] fts2 = np.array(data2[cols2]) #quando nao mandar um vaor de betterN, significa que demos o load do modelo if betterN > 0: knn.n_neighbors = betterN knn.fit(x_train, y_train) # dump(knn, 'models/knn_teste.joblib') prFit = knn.predict(x_test) print("predicao: a", prFit) print("Matriz de Confusao NB:") print(cfm(y_test, prFit)) print("F1 score NB:") print(f1s(y_test, prFit)) print("Precision score NB:") print(ps(y_test, prFit)) print("Recall score NB:") print(rs(y_test, prFit)) print("Classification Report") print(cr(y_test, prFit)) pr1 = knn.predict(fts2) print("predico unica", int(pr1[0])) print("predicao unica score") print(pr1) return pr1
def predictResult(x_train, y_train, y_test, x_test): data2 = pd.read_csv("/tmp/predict_result.csv", header=0) # vamos percorrer o arquivo com o valor a ser testado, onde vamos pegar as colunas e jogar os valores numa array cols2 = data2.columns[(data2.columns != columnResultName)] fts2 = data2[cols2] fts2 = Normalizer().fit_transform(fts2) scores = cross_val_score(logisticR, x_train, y_train, n_jobs=30) print("scores cross val") print(scores) logisticR.fit(x_train, y_train) dump(logisticR, 'logistic.model') logisticLoaded = load('logistic.model') prFit = logisticLoaded.predict(x_test) print("predicao:", prFit) print("Matriz de Confusao LR:") print(cfm(y_test, prFit)) print("F1 score LR:") print(f1s(y_test, prFit)) print("Precision score LR:") print(ps(y_test, prFit)) print("Recall score LR:") print(rs(y_test, prFit)) print("Classification Report") print(cr(y_test, prFit)) print("Accuracy score") print(asc(y_test, prFit)) class_names = [0, 1] # name of classes fig, ax = plt.subplots() tick_marks = np.arange(len(class_names)) plt.xticks(tick_marks, class_names) plt.yticks(tick_marks, class_names) # create heatmap sns.heatmap(pd.DataFrame(cfm(y_test, prFit)), annot=True, cmap="YlGnBu", fmt='g') ax.xaxis.set_label_position("top") plt.tight_layout() plt.title('Confusion matrix', y=1.1) plt.ylabel('Actual label') plt.xlabel('Predicted label') plt.show() y_pred_proba = logisticLoaded.predict_proba(x_test)[::, 1] fpr, tpr, _ = metrics.roc_curve(y_test, y_pred_proba) auc = metrics.roc_auc_score(y_test, y_pred_proba) plt.plot(fpr, tpr, label="data 1, auc=" + str(auc)) plt.legend(loc=4) plt.show() pr1 = logisticLoaded.predict(fts2) print("predico unica", pr1) return pr1
# Print Results for each position print('timestamp, outcome, bars, stoch osc, spread, volume') for each in list(zip(candles.loc[take,'timestamp'], results, rwmin[direction][take], so[take], spreads, volume, bollinger)): print('{}\t{}\t{}\t{:.0f}\t{:.4f}\t{}\t{}'.format(each[0], each[1], each[2], each[3], each[4], each[5], each[6])) # Print Precision Scores print() print('Presision score: \t\t{:.02}\t from {}'.format(results.mean(), results.shape[0])) print('Bollinger Presision score: \t{:.2f}\t from {}'.format(ps(results, bollinger), bollinger.sum())) print('Stochastic Presision score: \t{:.2f}\t from {}'.format(ps(results, stochastic), stochastic.sum())) print('Combined Presision score: \t{:.2f}\t from {}'.format(ps(results, np.logical_and(stochastic, bollinger)), np.logical_and(stochastic, bollinger).sum())) # Print Binom print('Binom: \t{:.6f}'.format(binom)) # Print stochastic Indicator Statsitcs print('so 10: {:.2f}, {}, {:.2f}'.format(so10.mean(), so10.shape[0], binom_test(so10.sum(), so10.shape[0], results.mean())))
# fit the models rfc = RFC().fit(X_train, y_train) lr = LR().fit(X_train, y_train) knn = KNN().fit(X_train, y_train) #make predictions y_pred_rfc = rfc.predict(X_test) y_pred_lr = lr.predict(X_test) y_pred_knn = knn.predict(X_test) # get the metrics accs_rfc.append(acc(y_pred_rfc, y_test)) accs_lr.append(acc(y_pred_lr, y_test)) accs_knn.append(acc(y_pred_knn, y_test)) ps_rfc.append(ps(y_pred_rfc, y_test)) ps_lr.append(ps(y_pred_lr, y_test)) ps_knn.append(ps(y_pred_knn, y_test)) rs_rfc.append(rs(y_pred_rfc, y_test)) rs_lr.append(rs(y_pred_lr, y_test)) rs_knn.append(rs(y_pred_knn, y_test)) print(i) #============================== # examine performances of all models """ Note - can see that across all metrics, logistic regression performs best """
print('candles') candles = get_candles(currency, granularity, _from, _to, ) print('rwo') rwo, rwmin = get_position_bars(candles, position) print('so') ''' so = stochastic_oscillator(candles, 15, 5) # 10 5 seems descent so far #print(cr(rwo[1], so > 98)) #print(ps(rwo[1], so > 98)) max_get = 0 tracker = () for i in range(1, 250): for j in range(1, 250): so = stochastic_oscillator(candles, i, j) if ps(rwo[1], so > 80) > max_get: max_get = ps(rwo[1], so > 80) tracker = (i, j) print(tracker) if __name__ == '__maine_':
bollinger = (bb.sma < bb.midclose)[take] else: bollinger = (bb.sma >= bb.midclose)[take] # Print Results for each position print('timestamp, outcome, bars, stoch osc, spread, volume') for each in list( zip(candles.loc[take, 'timestamp'], results, rwmin[direction][take], so[take], spreads, volume, bollinger)): print('{}\t{}\t{}\t{:.0f}\t{:.4f}\t{}\t{}'.format( each[0], each[1], each[2], each[3], each[4], each[5], each[6])) # Print Precision Scores print() print('Presision score: \t\t{:.02}\t from {}'.format( results.mean(), results.shape[0])) print('Bollinger Presision score: \t{:.2f}\t from {}'.format( ps(results, bollinger), bollinger.sum())) print('Stochastic Presision score: \t{:.2f}\t from {}'.format( ps(results, stochastic), stochastic.sum())) print('Combined Presision score: \t{:.2f}\t from {}'.format( ps(results, np.logical_and(stochastic, bollinger)), np.logical_and(stochastic, bollinger).sum())) # Print Binom print('Binom: \t{:.6f}'.format(binom)) # Print stochastic Indicator Statsitcs print('so 10: {:.2f}, {}, {:.2f}'.format( so10.mean(), so10.shape[0], binom_test(so10.sum(), so10.shape[0], results.mean()))) print('so 20: {:.2f}, {}, {:.2f}'.format( so20.mean(), so20.shape[0], binom_test(so20.sum(), so20.shape[0], results.mean())))
X = data[['gender','age','fever','dry cough','difficulty in breathing','tiredness','soar_throat','nasal_congestion','diff_symptoms']] Y = data['result'] #Spliting data set into training and testing X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2,random_state=0) # TRaining the model rf= RandomForestClassifier(n_estimators=50, random_state=1) rf.fit(X,Y) #rf.fit(X_train,np.array(Y_train).reshape(Y_train.shape[0],1)) #predicting the values pred = np.array(rf.predict(X_test)) recall = rs(Y_test,pred) precision = ps(Y_test,pred) f1 = fs(Y_test,pred) ma = rf.score(X_test,Y_test) #Printing All score print('*** Evaluation metrics for test dataset ***\n') print('Recall Score: ',recall) print('Precision Score: ',precision) print('F1 Score: ',f1) print('Accuracy: ',ma) a = pd.DataFrame(Y_test) a['pred']= rf.predict(X_test) print('\n\tTable 3\n') print(a.head())
print "Quantidade Vizinhos:", neighbors[n] knn3 = KNeighborsClassifier(n_neighbors=neighbors[n]) knn3.fit(x_train, y_train) print "Accuracy Training KNN:", knn3.score(x_train, y_train) predictions = knn3.predict(x_test) accuracy = metrics.accuracy_score(y_test, predictions) print "Accuracy Test KNN:", accuracy print "Matriz de Confusao KNN:" print cfm(y_test, predictions) print "F1 score KNN:" print f1s(y_test, predictions) print "Precision score KNN:" print ps(y_test, predictions) print "Recall score KNN:" print rs(y_test, predictions) #svm kernel linear svm = svm.SVC(kernel='linear', C=1.0) svm.fit(x_train, y_train) predictionsSvm = svm.predict(x_test) accuracySvm = metrics.accuracy_score(predictionsSvm, y_test) print "SVM LINEAR Accuracy Test:", accuracySvm print "Matriz de Confusao SVM LINEAR:"