def runOnFullData(trainFile,testFile=None,NO_OF_EXECUTION=1,flag=None): """ Desc : Run the all classifiers on full data based on model selected. Args: trainFile: trainfilePath testFile: testFilePath NO_OF_EXECUTION: Number of executions (default=1) flag: Flag=1 -- Model_1 , Flag_2=2 -- Model_2 Returns: None """ if(flag==1): ### Business Features TrainDataset=loadData(trainFile) TestDataset=loadData(trainFile) else: ### Sentiment Features TrainDataset=loadDataSenti(trainFile) TestDataset=loadDataSenti(testFile) print('\n Train dataset after loading : {0}').format(TrainDataset.shape) print('\n Test dataset after loading : {0}').format(TestDataset.shape) No_Of_Cols=TrainDataset.shape[1]-1 Xtrain=TrainDataset[:,0:No_Of_Cols] Ytrain=TrainDataset[:,No_Of_Cols] Xtest=TestDataset[:,0:No_Of_Cols] Ytest=TestDataset[:,No_Of_Cols] classalgs = {#'Random': algs.Classifier(), 'Naive Bayes': algs.NaiveBayes(), 'Logistic Regression' : algs.LogisticRegression(), 'GradientBoostingRegressor' : algs.GradientBoost(), 'SVM_rbf' :algs.SVM(), 'SVM_linear' :algs.SVM(), 'SVM_Sigmoid' :algs.SVM('sigmoid'), 'DecisionTreeRegressor' : algs.DecisionTreeReg(), 'DecisionTreeClassifier' : algs.DecisionTreeClassifier(), 'RandomForestRegressor' : algs.RandForest() } for learnername, learner in classalgs.iteritems(): print('Running learner = {0}').format(learnername) # Train model learner.learn(Xtrain, Ytrain) # Test model predictions = learner.predict(Xtest) #print('\n Predictions : {0}').format(predictions) accuracy = getAccuracy(Ytest, predictions) print 'Accuracy for ' + learnername + ': ' + str(accuracy) accuracy=RMSE_Evaluation(Ytest, predictions) print 'RMSE Error for ' + learnername + ': ' + str(accuracy) accuracy=r_square_Evaluation(Ytest, predictions) print 'R2 Score for ' + learnername + ': ' + str(accuracy)
def runClassifiers(self,dataset,testDataset): #trainset, testset = self.splitdataset(dataset) numinputs = dataset.shape[1]-1 Xtrain = dataset[:,0:numinputs] ytrain = dataset[:,numinputs] print('Split into train={0} and test={1} ').format(Xtrain.shape, testDataset.shape) classalgs = { 'Logistic Regression_10' : algs.LogisticRegression(C=10), 'Logistic Regression_1' : algs.LogisticRegression(C=1), 'Logistic Regression_.1' : algs.LogisticRegression(C=0.1), 'Logistic Regression_.01' : algs.LogisticRegression(C=0.01), 'Logistic Regression_.001' : algs.LogisticRegression(C=0.001), #'GradientBoostingClassifier_10' : algs.GradientBoost(n_estimators=10), #'GradientBoostingClassifier_5' : algs.GradientBoost(n_estimators=5), #'GradientBoostingClassifier_4' : algs.GradientBoost(n_estimators=4), #'GradientBoostingClassifier_3' : algs.GradientBoost(n_estimators=3), #'GradientBoostingClassifier_2' : algs.GradientBoost(n_estimators=2), 'Gauassian SVM_300' :algs.SVM(C=300), 'Gauassian SVM_200' :algs.SVM(C=200), 'Gauassian SVM_100' :algs.SVM(C=100), 'Gauassian SVM_50' :algs.SVM(C=50), 'Gauassian SVM_20' :algs.SVM(C=20), #'Neural Net_4':algs.backPropogation((Xtrain.shape[1],4,1)), #'Neural Net_8':algs.backPropogation((Xtrain.shape[1],8,1)), #'Neural Net_16':algs.backPropogation((Xtrain.shape[1],16,1)), #'Neural Net_32':algs.backPropogation((Xtrain.shape[1],32,1)), #'Neural Net_64':algs.backPropogation((Xtrain.shape[1],64,1)) } # Runs all the algorithms on the data and print out results for learnername, learner in classalgs.iteritems(): print 'Running learner = ' + learnername # Train model #learner.featureSelection(trainset[0]) learner.learn(Xtrain, ytrain) # Test model predictions = learner.predict(testDataset) self.prediction=predictions print predictions #accuracy = util.getaccuracy(testset[1], predictions) #print 'Accuracy for ' + learnername + ': ' + str(accuracy) fileName='output_'+learnername+'.csv' self.writeFile(fileName)
Ytest = test[:,numinputs] print('\n XTrain : {0} \n YTrain : {1} \n Xtest : {2} \n YTest : {3}').format(Xtrain.shape,Ytrain.shape,Xtest.shape,Ytest.shape) ### Run the classifiers classalgs = {#'Logistic Regression_10' : algs.LogisticRegression(C=10), #'Logistic Regression_1' : algs.LogisticRegression(C=1), 'Logistic Regression_.1' : algs.LogisticRegression(C=0.1), #'Logistic Regression_.01' : algs.LogisticRegression(C=0.01), #'Logistic Regression_.001' : algs.LogisticRegression(C=0.001), #'GradientBoostingClassifier_10' : algs.GradientBoost(n_estimators=10), #'GradientBoostingClassifier_5' : algs.GradientBoost(n_estimators=5), #'GradientBoostingClassifier_4' : algs.GradientBoost(n_estimators=4), #'GradientBoostingClassifier_3' : algs.GradientBoost(n_estimators=3), #'GradientBoostingClassifier_2' : algs.GradientBoost(n_estimators=2), #'Gauassian SVM_300' :algs.SVM(C=300), #'Gauassian SVM_200' :algs.SVM(C=200), 'Gauassian SVM_100' :algs.SVM(C=100), #'Gauassian SVM_50' :algs.SVM(C=50), #'Gauassian SVM_20' :algs.SVM(C=20), #'Neural Net_4':algs.backPropogation((Xtrain.shape[1],4,1)), #'Neural Net_8':algs.backPropogation((Xtrain.shape[1],8,1)), #'Neural Net_16':algs.backPropogation((Xtrain.shape[1],16,1)), #'Neural Net_32':algs.backPropogation((Xtrain.shape[1],32,1)), #'Neural Net_64':algs.backPropogation((Xtrain.shape[1],64,1)) } accuracyD={} for learnername, learner in classalgs.iteritems(): print 'Running learner = ' + learnername learner.learn(Xtrain, Ytrain) predictions = learner.predict(Xtest) recall = util.getRecall(Ytest, predictions)
def execute_with_algorithm(alg, X, y, fname, headers, out_dir, record_id, target_id, feature_selection): '''execute learning task using the specified algorithm''' # feature selection # feature selection k = 30 #k = 100000 if feature_selection: print ' ...performing feature selection' if X.shape[1] < k: k = X.shape[1] pearsons = [] pearsons_print = [] for i in range(X.shape[1]): if sum(np.asarray(X[:,i])) != 0: p = pearsonr(np.squeeze(np.asarray(X[:,i])), y) pearsons.append(abs(p[0])) pearsons_print.append(p[0]) else: pearsons.append(0) pearsons_print.append(0) # best_features = np.array(pearsons).argsort()[-k:][::-1] sorted_features = np.array(pearsons).argsort()[:][::-1] best_features = [] remove_list = [] i = 0 while len(best_features) < k: if not i in remove_list: best_features.append(sorted_features[i]) for j in range(i, X.shape[1]): p = pearsonr(np.asarray(X[:,sorted_features[i]]).tolist(), np.asarray(X[:,sorted_features[j]]).tolist()) if abs(p[0]) >= 0.7: remove_list.append(j) i += 1 old_headers = list(headers) headers = [headers[i] for i in best_features] f = open(out_dir+"correlations_" + fname + '.csv', 'w') for header in headers: f.write(str(header) + ' & ' + str(float("{0:.2f}".format(pearsons_print[old_headers.index(header)]))) + '\n') f.close() new_X = X[:,best_features] else: new_X = X best_features = 'all' print alg # execute algorithm if alg == 'DT': results, model = ML.CART(new_X, y, best_features, out_dir+"{}.dot".format(fname), headers) elif alg == 'RF': results, features, model = ML.RF(new_X, y, best_features, n_estimators=100) elif alg == 'RFsmall': results, features, model = ML.RF(new_X, y, best_features, n_estimators=10) elif alg == 'SVM': results, model = ML.SVM(new_X, y, best_features) elif alg == 'LR': results, features, model = ML.LR(new_X, y, best_features) if not results: return # set2model_instance[fname] = (model, best_features) # export results # results_list.append([fname] + results[0:3]) in_out.save_results(out_dir+fname+'.csv', ["fpr", "tpr", "auc", "cm"], results, [sum(y),len(y)]) if 'features' in locals(): features = features.flatten() in_out.save_features(out_dir+"features_" + fname + '.csv', zip(headers[1:-1], features)) return model, best_features, [fname] + results[0:3]
def runClassifier(trainFile,testFile=None,NO_OF_EXECUTION=1,flag=None): """ Desc : K-fold cross validation followed by classifier executions. Display results 'Accuracy','RMSE score','R2-Score' Args: trainFile: trainfilePath testFile: testFilePath NO_OF_EXECUTION: Number of executions (default=1) flag: Flag=1 -- Model_1 , Flag_2=2 -- Model_2 Returns: None """ if(flag==1): ### Business Features dataset=loadData(trainFile) else: ### Sentiment Features dataset=loadDataSenti(trainFile) print('\n Size of dataset after loading : {0}').format(dataset.shape) kf=KfoldCrossValidation(dataset) i=1 for train_index, test_index in kf: ### For Each Fold print('\n For Fold : {0}').format(i) i=i+1 #print("TRAIN:", train_index, "TEST:", test_index) X_train, X_test = dataset[train_index], dataset[test_index] print('\n Train : {0} \n Test : {1}').format(X_train.shape, X_test.shape) #trainset,testset=getSplit(dataset) trainset,testset=getSplitNew(X_train,X_test) #trainset,testset=beliefNet(trainset,testset) print('Running on train={0} and test={1} samples').format(trainset[0].shape, testset[0].shape) #ForwardFeatureSelection(trainset[0], trainset[1],testset[0],testset[1]) classalgs = {#'Random': algs.Classifier(), 'Naive Bayes': algs.NaiveBayes(), 'Logistic Regression' : algs.LogisticRegression(), 'GradientBoostingRegressor' : algs.GradientBoost(), 'SVM_rbf' :algs.SVM(), 'SVM_linear' :algs.SVM(), 'SVM_Sigmoid' :algs.SVM('sigmoid'), 'DecisionTreeRegressor' : algs.DecisionTreeReg(), 'DecisionTreeClassifier' : algs.DecisionTreeClassifier(), 'RandomForestRegressor' : algs.RandForest() } for learnername, learner in classalgs.iteritems(): print('Running learner = {0}').format(learnername) # Train model learner.learn(trainset[0], trainset[1]) # Test model predictions = learner.predict(testset[0]) #print('\n Predictions : {0}').format(predictions) accuracy = getAccuracy(testset[1], predictions) print 'Accuracy for ' + learnername + ': ' + str(accuracy) accuracy=RMSE_Evaluation(testset[1], predictions) print 'RMSE Error for ' + learnername + ': ' + str(accuracy) accuracy=r_square_Evaluation(testset[1], predictions) print 'R2 Score for ' + learnername + ': ' + str(accuracy)