def readCross(num,type,select_feature,numtrees): filename=resultFile+'_'+type+'_'+num+'_'+select_feature+'_all.csv' loader=CSVLoader() loader.setSource(File(filename)) data=loader.getDataSet() #print data.numAttributes() data.setClassIndex(data.numAttributes()-1) rf=RF() rf.setNumTrees(numtrees) #pred_output = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.PlainText", options=["-distribution"]) buffer = StringBuffer() # buffer for the predictions output=PlainText() output.setHeader(data) output.setBuffer(buffer) output.setOutputDistribution(True) attRange = Range() # attributes to output outputDistributions = Boolean(True) evaluator=Evaluation(data) evaluator.crossValidateModel(rf,data,10, Random(1),[output,attRange,outputDistributions]) print evaluator.toSummaryString() print evaluator.toClassDetailsString() print evaluator.toMatrixString() return [evaluator.precision(1),evaluator.recall(1),evaluator.fMeasure(1),evaluator.matthewsCorrelationCoefficient(1),evaluator.numTruePositives(1),evaluator.numFalsePositives(1),evaluator.numTrueNegatives(1),evaluator.numFalseNegatives(1),evaluator.areaUnderROC(1)]
def readFeature(num_features,type,select_feature,numtrees): #filename1=resultFileTest #filename2=resultFileTest2 filename1=resultFile+'_'+type+'_'+num_features+'_'+select_feature+'_train.csv' filename2=resultFile+'_'+type+'_'+num_features+'_'+select_feature+'_test.csv' #print filename1 loader=CSVLoader() loader.setSource(File(filename1)) data=loader.getDataSet() #print data.numAttributes() data.setClassIndex(data.numAttributes()-1) rf=RF() rf.setNumTrees(numtrees) rf.buildClassifier(data) #print rf loader.setSource(File(filename2)) test_data=Instances(loader.getDataSet()) test_data.setClassIndex(test_data.numAttributes()-1) ''' num=test_data.numInstances() print num for i in xrange(num): r1=rf.distributionForInstance(test_data.instance(i)) r2=rf.classifyInstance(test_data.instance(i)) ptrixrint r1 print r2''' buffer = StringBuffer() # buffer for the predictions output=PlainText() output.setHeader(test_data) output.setBuffer(buffer) attRange = Range() # attributes to output outputDistribution = Boolean(True) evaluator=Evaluation(data) evaluator.evaluateModel(rf,test_data,[output,attRange,outputDistribution]) #print evaluator.evaluateModel(RF(),['-t',filename1,'-T',filename2,'-I',str(numtrees)]) #evaluator1=Evaluation(test_data) print evaluator.toSummaryString() print evaluator.toClassDetailsString() print evaluator.toMatrixString() return [evaluator.precision(1),evaluator.recall(1),evaluator.fMeasure(1),evaluator.matthewsCorrelationCoefficient(1),evaluator.numTruePositives(1),evaluator.numFalsePositives(1),evaluator.numTrueNegatives(1),evaluator.numFalseNegatives(1),evaluator.areaUnderROC(1)]