def getData(fileNameParam): testAndTrainData = IO_.giveTestAndTrainingData(fileNameParam) trainData = testAndTrainData[0] testData = testAndTrainData[1] return trainData, testData
def experiemnt_random_forest(fileNameParam): import exp_x_classifiers , IO_ testAndTrainData = IO_.giveTestAndTrainingData(fileNameParam) #print testAndTrainData print "This is 'experiemnt_random_forest' " # settign up train data trainData = testAndTrainData[0] #print trainData original_rows = trainData.shape[0] original_cols = trainData.shape[1] print "Size of training data : rows: {}, columns: {}".format( original_rows , original_cols ) # settign up test data testData = testAndTrainData[1] #print testData for selCount in xrange(original_cols): count_ = selCount + 1 if count_ <= original_cols: slected_training_data = giveSelectedTrainingData(trainData, testData, count_ ) print "################# No. of features to work with={} ############".format(count_) print "Size of selected training data : ", slected_training_data.shape emperiemntSplitters=[float(x)/float(10) for x in xrange(10) if x > 0] for elem in emperiemntSplitters: #print "Training size: {} %".format(float(elem*100)) exp_x_classifiers.runRandomForest(slected_training_data, testData, elem)
def experiemnt_CART(fileNameParam): import exp_x_classifiers, IO_ testAndTrainData = IO_.giveTestAndTrainingData(fileNameParam) print "This is 'experiemnt_CART' " # settign up train data trainData = testAndTrainData[0] original_rows = trainData.shape[0] original_cols = trainData.shape[1] print "Size of training data : rows: {}, columns: {}".format( original_rows, original_cols) # settign up test data testData = testAndTrainData[1] # for selCount in xrange(original_cols): # count_ = selCount + 1 # if count_ < original_cols: slected_training_data = giveSelectedTrainingData(trainData, testData, original_cols) print "################# No. of features to work with={} ############".format( original_cols) print "Size of selected training data : ", slected_training_data.shape emperiemntSplitters = [float(x) / float(10) for x in xrange(10) if x > 0] for elem in emperiemntSplitters: #print "Training size: {} %".format(float(elem*100)) param_exp_classifier.runCART(slected_training_data, testData, elem)
def experiemnt_gaussian_naive_bayes(fileNameParam): testAndTrainData = IO_.giveTestAndTrainingData(fileNameParam) print "This is 'experiemnt_gaussian_naive_bayes' " # settign up train data trainData = testAndTrainData[0] original_rows = trainData.shape[0] original_cols = trainData.shape[1] print "Size of training data : rows: {}, columns: {}".format( original_rows, original_cols) # settign up test data testData = testAndTrainData[1] for selCount in xrange(original_cols): count_ = selCount + 1 if count_ <= original_cols: slected_training_data = giveSelectedTrainingData( trainData, testData, count_) print "################# No. of features to work with={} ############".format( count_) print "Size of selected training data : ", slected_training_data.shape emperiemntSplitters = [ float(x) / float(10) for x in xrange(10) if x > 0 ] for elem in emperiemntSplitters: #print "Training size: {} %".format(float(elem*100)) exp_x_classifiers.runGNB(slected_training_data, testData, elem)
def experiment_mobilesoft_knn(fileNameParam): testAndTrainData = IO_.giveTestAndTrainingData(fileNameParam) trainData = testAndTrainData[0] testData = testAndTrainData[1] #print trainData selected_training_data = getPCAedFeatures(trainData) print "Size of selected training data : ", np.shape(selected_training_data) print "=" * 50 exp_x_classifiers.runKNN(selected_training_data, testData, 0.90) print "=" * 50
def mobilesoft_cart(fileNameParam, fileToWriteP): testAndTrainData = IO_.giveTestAndTrainingData(fileNameParam) trainData = testAndTrainData[0] testData = testAndTrainData[1] selected_training_data = pca_mobilesoft.getPCAedFeatures(trainData) print "Size of selected training data : ", np.shape(selected_training_data) print "=" * 50 dict_of_results = param_exp_classifier.runCART(selected_training_data, testData, 0.90) reportStr = param_exp_analysis.analyzeThis(dict_of_results) IO_.writeStrToFile(fileToWriteP, reportStr)
def speedup_random_forest(fileNameParam, fileToWriteP): testAndTrainData = IO_.giveTestAndTrainingData(fileNameParam) trainData = testAndTrainData[0] testData = testAndTrainData[1] #print trainData selected_training_data = pca_mobilesoft.getPCAedFeatures(trainData) print "Size of selected training data : ", np.shape(selected_training_data) print "="*50 dict_of_results = runRandomForest(selected_training_data, testData) reportStr = param_exp_analysis.analyzeThis(dict_of_results) IO_.writeStrToFile(fileToWriteP, reportStr)
def mobilesoft_cart(fileNameParam, fileToWriteP): indexVector = [0, 5, 10, 12, 13, 18, 19, 20] testAndTrainData = IO_.giveTestAndTrainingData(fileNameParam) trainData = testAndTrainData[0] testData = testAndTrainData[1] selected_training_data = createMobileSoftFeatures(trainData, indexVector) print "Size of selected training data : ", np.shape(selected_training_data) print "="*50 print "Glimpse at selected features (10th entry): \n", selected_training_data.iloc[9, :] print "="*50 print "Glimpse at labels (10th entry): \n", testData.iloc[9] print "="*50 dict_of_results = param_exp_classifier.runCART(selected_training_data, testData, 0.90) reportStr = param_exp_analysis.analyzeThis(dict_of_results) IO_.writeStrToFile(fileToWriteP, reportStr)
def experiment_mobilesoft_knn(fileNameParam, indexVector): testAndTrainData = IO_.giveTestAndTrainingData(fileNameParam) trainData = testAndTrainData[0] testData = testAndTrainData[1] #print trainData selected_training_data = createMobileSoftFeatures(trainData, indexVector) print "Size of selected training data : ", np.shape(selected_training_data) print "=" * 50 print "Glimpse at selected features (10th entry): \n", selected_training_data.iloc[ 9, :] print "=" * 50 print "Glimpse at labels (10th entry): \n", testData.iloc[9] print "=" * 50 exp_x_classifiers.runKNN(selected_training_data, testData, 0.90) print "=" * 50
def experiemnt_random_forest(fileNameParam, fileToWriteP): testAndTrainData = IO_.giveTestAndTrainingData(fileNameParam) print "This is 'experiemnt_random_forest' " # settign up train data trainData = testAndTrainData[0] original_rows = trainData.shape[0] original_cols = trainData.shape[1] print "Size of training data : rows: {}, columns: {}".format( original_rows , original_cols ) # settign up test data testData = testAndTrainData[1] dict_of_results = param_exp_classifier.runRandomForest(trainData, testData) reportStr = param_exp_analysis.analyzeThis(dict_of_results) IO_.writeStrToFile(fileToWriteP, reportStr)
def experiemnt_random_forest(fileNameParam, fileToWriteP): testAndTrainData = IO_.giveTestAndTrainingData(fileNameParam) print "This is 'experiemnt_random_forest' " # settign up train data trainData = testAndTrainData[0] original_rows = trainData.shape[0] original_cols = trainData.shape[1] print "Size of training data : rows: {}, columns: {}".format( original_rows, original_cols) # settign up test data testData = testAndTrainData[1] dict_of_results = param_exp_classifier.runRandomForest(trainData, testData) reportStr = param_exp_analysis.analyzeThis(dict_of_results) IO_.writeStrToFile(fileToWriteP, reportStr)
def runSVM(fileNamaParam, trainizingSizeParam): # what percent will you use ? testSplitSize = 1.0 - trainizingSizeParam testAndTrainData = IO_.giveTestAndTrainingData(fileNamaParam) trainData = testAndTrainData[0] testData = testAndTrainData[1] ### classification ## get the test and training sets featureSpace_train, featureSpace_test, vScore_train, vScore_test = cross_validation.train_test_split(trainData, testData, test_size=testSplitSize, random_state=0) ## fire up the model theSVMModel = svm.SVC(kernel='rbf', C=1).fit(featureSpace_train, vScore_train) thePredictedScores = theSVMModel.predict(featureSpace_test) #print "The original vector: " #print vScore_test #print "The predicted score vector: " #print thePredictedScores evalClassifier(vScore_test, thePredictedScores)
def runCART(fileNamaParam, trainizingSizeParam): # what percent will you use ? testSplitSize = 1.0 - trainizingSizeParam testAndTrainData = IO_.giveTestAndTrainingData(fileNamaParam) trainData = testAndTrainData[0] testData = testAndTrainData[1] ### classification ## get the test and training sets featureSpace_train, featureSpace_test, vScore_train, vScore_test = cross_validation.train_test_split( trainData, testData, test_size=testSplitSize, random_state=0) ## fire up the model theQDAModel = DecisionTreeClassifier() theQDAModel.fit(featureSpace_train, vScore_train) thePredictedScores = theQDAModel.predict(featureSpace_test) #print "The original vector: " #print vScore_test #print "The predicted score vector: " #print thePredictedScores evalClassifier(vScore_test, thePredictedScores)