def simple_logistic(trainData,testData,params,exparams):
    heuristicStop = int(float(params[0]))
    numBoostingIterations = int(float(params[1]))
    simplelogistic = SimpleLogistic()
    simplelogistic.setHeuristicStop(heuristicStop)
    simplelogistic.setNumBoostingIterations(numBoostingIterations)
    if (trainData.numInstances()<5):   # special case for small sample size
        simplelogistic.setUseCrossValidation(False) 
    simplelogistic.buildClassifier(trainData)  # only a trained classifier can be evaluated
    # evaluate it on the training
    evaluation = Evaluation(trainData)
    (trainOutput, trainBuffer) = util.get_buffer_for_predictions(trainData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(simplelogistic, trainData, [trainOutput, attRange, outputDistribution])
    print "--> Evaluation:\n"
    print evaluation.toSummaryString()
    trainSummary = makeTrainEvalSummary(evaluation)
    # evaluate it on testing
    evaluation = Evaluation(testData)
    (testOutput, testBuffer) = util.get_buffer_for_predictions(testData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(simplelogistic, testData, [testOutput, attRange, outputDistribution])
    return trainBuffer, testBuffer, trainSummary
def bayesian(trainData,testData,params,exparams):
    IsOptMultinomialBayes   = str2bool(params[0]) 
    IsOptNaiveKernelDensity = str2bool(params[1]) 
    if IsOptMultinomialBayes:    # optimal bayesian classifier is multinomial
        bayes = NaiveBayesMultinomial()
    else:
        bayes = NaiveBayes()
        if IsOptNaiveKernelDensity:   # use kernel density estimation
            bayes.setUseKernelEstimator(Boolean(True))   
    bayes.buildClassifier(trainData)  # only a trained classifier can be evaluated
    # evaluate it on the training
    evaluation = Evaluation(trainData)
    (trainOutput, trainBuffer) = util.get_buffer_for_predictions(trainData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(bayes, trainData, [trainOutput, attRange, outputDistribution])
    print "--> Evaluation:\n"
    print evaluation.toSummaryString()
    trainSummary = makeTrainEvalSummary(evaluation)
    # evaluate it on testing
    evaluation = Evaluation(testData)
    (testOutput, testBuffer) = util.get_buffer_for_predictions(testData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(bayes, testData, [testOutput, attRange, outputDistribution])   
    return trainBuffer, testBuffer, trainSummary
def bagging_logistic(trainData,testData,params,exparams):
    IsOptBagOnOptLog = str2bool(params[0])
    logistic = Logistic()
    bagging = Bagging()
    if IsOptBagOnOptLog:    # optimal bagging is based on optimal logistic
        ridge = float(exparams[0])
        maxIt = int(float(exparams[1]))
        logistic.setMaxIts(maxIt)
        bagSizePercent = int(float(params[1]))
        bagging.setBagSizePercent(bagSizePercent)
    else:   # ridge parameter is also optimized in the process
        ridge = float(params[1])
    numIterations = int(float(params[2]))
    bagging.setNumIterations(numIterations)
    logistic.setRidge(ridge)
    bagging.setClassifier(logistic)
    bagging.buildClassifier(trainData)  # only a trained classifier can be evaluated
    # evaluate it on the training
    evaluation = Evaluation(trainData)
    (trainOutput, trainBuffer) = util.get_buffer_for_predictions(trainData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(bagging, trainData, [trainOutput, attRange, outputDistribution])
    print "--> Evaluation:\n"
    print evaluation.toSummaryString()
    trainSummary = makeTrainEvalSummary(evaluation)
    # evaluate it on testing
    evaluation = Evaluation(testData)
    (testOutput, testBuffer) = util.get_buffer_for_predictions(testData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(bagging, testData, [testOutput, attRange, outputDistribution])
    return trainBuffer, testBuffer, trainSummary
def smo(trainData,testData,params,exparams):
    kerType = str2bool(params[0]) 
    cValue = float(params[1])
    kerParam = float(params[2])
    if kerType:     # RBF kernel
        kernel = RBFKernel()
        kernel.setGamma(kerParam)
    else:       # Polynomial kernel
        kernel = PolyKernel()
        kernel.setExponent(kerParam)
    smo = SMO()
    smo.setKernel(kernel)
    smo.setC(cValue)
    smo.buildClassifier(trainData)  # only a trained classifier can be evaluated
    # evaluate it on the training
    evaluation = Evaluation(trainData)
    (trainOutput, trainBuffer) = util.get_buffer_for_predictions(trainData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(smo, trainData, [trainOutput, attRange, outputDistribution])
    print "--> Evaluation:\n"
    print evaluation.toSummaryString()
    trainSummary = makeTrainEvalSummary(evaluation)
    # evaluate it on testing
    evaluation = Evaluation(testData)
    (testOutput, testBuffer) = util.get_buffer_for_predictions(testData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(smo, testData, [testOutput, attRange, outputDistribution])
    return trainBuffer, testBuffer, trainSummary
Exemple #5
0
def evaluate_dataset(classifier,data):
    evaluation = Evaluation(data)
    output = PlainText()
    output.setHeader(data)
    eval_buffer = StringBuffer() # buffer to use
    output.setBuffer(eval_buffer)
    options = [output]
    evaluation.evaluateModel(classifier,data,options)
    return evaluation
def readFeature(num_features,type,select_feature,numtrees):
    #filename1=resultFileTest
    #filename2=resultFileTest2
    filename1=resultFile+'_'+type+'_'+num_features+'_'+select_feature+'_train.csv'
    filename2=resultFile+'_'+type+'_'+num_features+'_'+select_feature+'_test.csv'
    #print filename1
    loader=CSVLoader()
    loader.setSource(File(filename1))
    data=loader.getDataSet()
    #print data.numAttributes()    
    
    data.setClassIndex(data.numAttributes()-1)

    rf=RF()
    rf.setNumTrees(numtrees)
    
    rf.buildClassifier(data)
   
    #print rf
    loader.setSource(File(filename2))
    

    test_data=Instances(loader.getDataSet())
    
    test_data.setClassIndex(test_data.numAttributes()-1)

    
    ''' num=test_data.numInstances()

    
    print num
   
    for i in xrange(num):

        r1=rf.distributionForInstance(test_data.instance(i))
  
        r2=rf.classifyInstance(test_data.instance(i))

        ptrixrint r1 
          
           print r2'''
    buffer = StringBuffer()  # buffer for the predictions
    output=PlainText()
    output.setHeader(test_data)
    output.setBuffer(buffer)
    
    attRange = Range()  # attributes to output
    outputDistribution = Boolean(True)
    evaluator=Evaluation(data)
    evaluator.evaluateModel(rf,test_data,[output,attRange,outputDistribution])
    #print evaluator.evaluateModel(RF(),['-t',filename1,'-T',filename2,'-I',str(numtrees)])
    #evaluator1=Evaluation(test_data)
    print evaluator.toSummaryString()
    print evaluator.toClassDetailsString()
    print evaluator.toMatrixString()
    return [evaluator.precision(1),evaluator.recall(1),evaluator.fMeasure(1),evaluator.matthewsCorrelationCoefficient(1),evaluator.numTruePositives(1),evaluator.numFalsePositives(1),evaluator.numTrueNegatives(1),evaluator.numFalseNegatives(1),evaluator.areaUnderROC(1)]
def baggin_smo(trainData,testData,params,exparams):
    IsOptBagOnOptSMO =  str2bool(params[0]) 
    if IsOptBagOnOptSMO:    # optimal bagging is based on optimal SMO thus I should use extra params
        kerType =  str2bool(params[0]) 
        cValue = float(exparams[1])
        kerParam = float(exparams[2])
        if kerType:     # RBF kernel
            kernel = RBFKernel()
            kernel.setGamma(kerParam)
        else:       # Polynomial kernel
            kernel = PolyKernel()
            kernel.setExponent(kerParam)
        bagSizePercent = int(float(params[1]))
        numIterations = int(float(params[2]))
        smo = SMO()
        bagging = Bagging()
        smo.setKernel(kernel)
        smo.setC(cValue)
        bagging.setBagSizePercent(bagSizePercent)
        bagging.setNumIterations(numIterations)
        bagging.setClassifier(smo)
    else:   # optimal bagging is based on linear SMO
        cValue = float(params[1])
        numIterations = int(float(params[2]))
        smo = SMO()
        bagging = Bagging()
        kernel = PolyKernel()
        smo.setKernel(kernel)
        smo.setC(cValue)
        bagging.setNumIterations(numIterations)
        bagging.setClassifier(smo)
    bagging.buildClassifier(trainData)  # only a trained classifier can be evaluated
    # evaluate it on the training
    evaluation = Evaluation(trainData)
    (trainOutput, trainBuffer) = util.get_buffer_for_predictions(trainData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(bagging, trainData, [trainOutput, attRange, outputDistribution])
    print "--> Evaluation:\n"
    print evaluation.toSummaryString()
    trainSummary = makeTrainEvalSummary(evaluation)
    # evaluate it on testing
    evaluation = Evaluation(testData)
    (testOutput, testBuffer) = util.get_buffer_for_predictions(testData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(bagging, testData, [testOutput, attRange, outputDistribution])
    return trainBuffer, testBuffer, trainSummary
def do_temporal_cv(t_selector, instances, num_folds):
    num_instances = instances.numInstances()
    results = []
    # Split folds
    for f in xrange(2, num_folds+1):
        print "fold:%d"%f
        for pair in split_temporal_train_test(f, num_instances):
    #        train_start = pair.train_start
    #        train_end = pair.train_end

            train_set = Instances(instances, int(pair.train_start), int(pair.train_end - pair.train_start+1))
            test_set = Instances(instances, int(pair.test_start), int(pair.test_end - pair.test_start +1))

            t_selector.buildClassifier(train_set)
            e = Evaluation(train_set)
            e.evaluateModel(t_selector, test_set)

            if e.recall(0) > 0 and e.precision(0) > 0:
                results.append(Result(instances.numAttributes(), e))


            #            print "precision: %.2f"%evalTest.precision(0)
#            print "recall: %.2f"%evalTest.recall(0)
#            print evalTest.toSummaryString()
    #        System.out.println(strSummary);
    sum_precision = 0
    sum_recall = 0
    for r in results:
#        print "precision:"
#        print r.precision
#        print "recall:"
#        print r.recall
        sum_precision += r.precision
        sum_recall +=r.recall


    precision = sum_precision*1.0/len(results)
    recall = sum_recall*1.0/len(results)
    avg_fmeasure = harmonic_mean([precision, recall])
    print "f_measure:%.2f"%avg_fmeasure
def random_forest(trainData,testData,params,exparams):
    numTrees = int(float(params[0]))
    numFeatures = int(float(params[1]))
    randomforest = RandomForest()
    randomforest.setNumTrees(numTrees)
    randomforest.setNumFeatures(numFeatures)
    randomforest.buildClassifier(trainData)  # only a trained classifier can be evaluated
    # evaluate it on the training
    evaluation = Evaluation(trainData)
    (trainOutput, trainBuffer) = util.get_buffer_for_predictions(trainData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(randomforest, trainData, [trainOutput, attRange, outputDistribution])
    print "--> Evaluation:\n"
    print evaluation.toSummaryString()
    trainSummary = makeTrainEvalSummary(evaluation)
    # evaluate it on testing
    evaluation = Evaluation(testData)
    (testOutput, testBuffer) = util.get_buffer_for_predictions(testData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(randomforest, testData, [testOutput, attRange, outputDistribution])
    return trainBuffer, testBuffer, trainSummary
def runClassifierAlgo(algo, training_filename, test_filename, do_model, do_eval, do_predict):
    """ Run classifier algorithm <algo> on training data in <training_filename> to build a model
        then run in on data in <test_filename> (equivalent of WEKA "Supplied test set") """
    training_file = FileReader(training_filename)
    training_data = Instances(training_file)
    test_file = FileReader(test_filename)
    test_data = Instances(test_file)

    # set the class Index - the index of the dependent variable
    training_data.setClassIndex(class_index)
    test_data.setClassIndex(class_index)

    # create the model
    algo.buildClassifier(training_data)

    evaluation = None
    # only a trained classifier can be evaluated
    if do_eval or do_predict:
        evaluation = Evaluation(test_data)
        buffer = StringBuffer()  # buffer for the predictions
        attRange = Range()  # no additional attributes output
        outputDistribution = Boolean(False)  # we don't want distribution
        evaluation.evaluateModel(algo, test_data, [buffer, attRange, outputDistribution])

    if verbose:
        if do_model:
            print "--> Generated model:\n"
            print algo.toString()
        if do_eval:
            print "--> Evaluation:\n"
            print evaluation.toSummaryString()
        if do_predict:
            print "--> Predictions:\n"
            print buffer

    return {"model": str(algo), "eval": str(evaluation.toSummaryString()), "predict": str(buffer)}
def adaboostM1_simple_logistic(trainData,testData,params,exparams):
    IsOptBoostOnOptSimpLog = str2bool(params[0])  
    simplelogistic = SimpleLogistic()
    adaboostm = AdaBoostM1()
    if IsOptBoostOnOptSimpLog:  # optimal adaboost is based on optimal simple logisatic 
        heuristicStop = int(float(exparams[0]))
        numBoostingIterations = int(float(exparams[1]))
        weightThreshold = int(float(params[1]))
        numIterations = int(float(params[2]))
        simplelogistic.setHeuristicStop(heuristicStop)
        simplelogistic.setNumBoostingIterations(numBoostingIterations)
        adaboostm.setWeightThreshold(weightThreshold)
        adaboostm.setNumIterations(numIterations)       
    else:
        numBoostingIterations = int(float(params[1]))
        numIterations = int(float(params[2]))
        simplelogistic.setNumBoostingIterations(numBoostingIterations)
        adaboostm.setNumIterations(numIterations)       
    adaboostm.setClassifier(simplelogistic)
    adaboostm.buildClassifier(trainData)  # only a trained classifier can be evaluated
    # evaluate it on the training
    evaluation = Evaluation(trainData)
    (trainOutput, trainBuffer) = util.get_buffer_for_predictions(trainData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(adaboostm, trainData, [trainOutput, attRange, outputDistribution])
    print "--> Evaluation:\n"
    print evaluation.toSummaryString()
    trainSummary = makeTrainEvalSummary(evaluation)
    # evaluate it on testing
    evaluation = Evaluation(testData)
    (testOutput, testBuffer) = util.get_buffer_for_predictions(testData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(adaboostm, testData, [testOutput, attRange, outputDistribution])
    return trainBuffer, testBuffer, trainSummary
def logistic(trainData,testData,params,exparams):
    ridge = float(params[0])
    maxIt = int(float(params[1]))
    print "Ridge=%s, maxIt=%s" %(str(ridge),str(maxIt))
    logistic = Logistic()
    logistic.setMaxIts(maxIt)
    logistic.setRidge(ridge)
    logistic.buildClassifier(trainData)  # only a trained classifier can be evaluated
    # evaluate it on the training
    evaluation = Evaluation(trainData)
    (trainOutput, trainBuffer) = util.get_buffer_for_predictions(trainData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(logistic, trainData, [trainOutput, attRange, outputDistribution])
    print "--> Evaluation:\n"
    print evaluation.toSummaryString()
    trainSummary = makeTrainEvalSummary(evaluation)
    # evaluate it on testing
    evaluation = Evaluation(testData)
    (testOutput, testBuffer) = util.get_buffer_for_predictions(testData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(logistic, testData, [testOutput, attRange, outputDistribution])
    return trainBuffer, testBuffer, trainSummary
def runClassifierAlgo(algo, class_index, training_filename, test_filename, do_model, do_eval, do_predict):
    """ If <test_filename>
            Run classifier algorithm <algo> on training data in <training_filename> to build a model
            then test on data in <test_filename> (equivalent of Weka "Supplied test set") 
        else
            do 10 fold CV lassifier algorithm <algo> on data in <training_filename>
        
        <class_index> is the column containing the dependent variable 
        
        http://weka.wikispaces.com/Generating+classifier+evaluation+output+manually
        http://weka.sourceforge.net/doc.dev/weka/classifiers/Evaluation.html
    """
    print ' runClassifierAlgo: training_filename= ', training_filename, ', test_filename=', test_filename
    misc.checkExists(training_filename)

    training_file = FileReader(training_filename)
    training_data = Instances(training_file)
    if test_filename:
        test_file = FileReader(test_filename)
        test_data = Instances(test_file)
    else:
        test_data = training_data

   # set the class Index - the index of the dependent variable
    training_data.setClassIndex(class_index)
    test_data.setClassIndex(class_index)

    # create the model
    if test_filename:
        algo.buildClassifier(training_data)

    evaluation = None
    # only a trained classifier can be evaluated
    if do_eval or do_predict:
        evaluation = Evaluation(test_data)
        buffer = StringBuffer()             # buffer for the predictions
        attRange = Range()                  # no additional attributes output
        outputDistribution = Boolean(False) # we don't want distribution
        if test_filename:
            evaluation.evaluateModel(algo, test_data, [buffer, attRange, outputDistribution])
        else:
           # evaluation.evaluateModel(algo, [String('-t ' + training_filename), String('-c 1')])
           # print evaluation.toSummaryString()
            rand = Random(1)
            evaluation.crossValidateModel(algo, training_data, 4, rand)
            if False:
                print 'percentage correct =', evaluation.pctCorrect()
                print 'area under ROC =', evaluation.areaUnderROC(class_index)
                confusion_matrix = evaluation.confusionMatrix()
                for l in confusion_matrix:
                    print '** ', ','.join('%2d'%int(x) for x in l)

    if verbose:
        if do_model:
            print '--> Generated model:\n'
            print algo.toString()
        if do_eval:
            print '--> Evaluation:\n'
            print evaluation.toSummaryString()
        if do_predict:
            print '--> Predictions:\n'
            print buffer

    return {'model':str(algo), 'eval':str(evaluation.toSummaryString()), 'predict':str(buffer) }
Exemple #14
0
file = FileReader(sys.argv[1])
file2 = FileReader(sys.argv[2])
data = Instances(file)
test = Instances(file2)
data.setClassIndex(data.numAttributes() - 1)
test.setClassIndex(test.numAttributes() - 1)
evaluation = Evaluation(data)
buffer = StringBuffer()
attRange = Range()  # no additional attributes output
outputDistribution = Boolean(False)  # we don't want distribution
nn = MultilayerPerceptron()
nn.buildClassifier(data)  # only a trained classifier can be evaluated

#print evaluation.evaluateModel(nn, ['-t', sys.argv[1], '-T', sys.argv[2]])#;, [buffer, attRange, outputDistribution])
res = evaluation.evaluateModel(nn, test, [buffer, attRange, outputDistribution])
f = open('predictions/' + data.relationName(), 'w')
for d in res:
	f.write(str(d) + '\n');
f.close()	

SerializationHelper.write("models/" + data.relationName() + ".model", nn)

# print out the built model
#print "--> Generated model:\n"
#print nn

#print "--> Evaluation:\n"
#print evaluation.toSummaryString()

#print "--> Predictions:\n"
# train classifiers
print "Training classifiers..."
for key in algo_keys :
   algo = algo_dict[key]
   algo.buildClassifier(data)

# evaluate classifiers and print a result summary including confusion matrix
my_evaluations = []
for key in algo_keys :
   evaluation = Evaluation(data)
   algo = algo_dict[key]
   buffer = StringBuffer()             # buffer for the predictions
   attRange = Range()                  # no additional attributes output
   outputDistribution = Boolean(False) # we don't want distribution
   evaluation.evaluateModel(algo, data, [buffer, attRange, outputDistribution])
   my_evaluations.append(evaluation)
   print "------------------------------------"
   print algo.__class__.__name__
   print evaluation.toSummaryString()
   confusion_matrix = evaluation.confusionMatrix()  # confusion matrix
   print "Confusion Matrix:"
   for l in confusion_matrix:
       print '** ', ','.join('%2d'%int(x) for x in l)

# example to collect an individual statistic for all evaluated classifiers
print "------------------------------------"
print "Example to collect an individual statistic for all evaluated classifiers"
print "Kappa"
for index in range(len(algo_keys)):
   evaluation = my_evaluations[index]
Exemple #16
0
file = FileReader(sys.argv[1])
file2 = FileReader(sys.argv[2])
data = Instances(file)
test = Instances(file2)
data.setClassIndex(data.numAttributes() - 1)
test.setClassIndex(test.numAttributes() - 1)
evaluation = Evaluation(data)
buffer = StringBuffer()
attRange = Range()  # no additional attributes output
outputDistribution = Boolean(False)  # we don't want distribution
nn = MultilayerPerceptron()
nn.buildClassifier(data)  # only a trained classifier can be evaluated

#print evaluation.evaluateModel(nn, ['-t', sys.argv[1], '-T', sys.argv[2]])#;, [buffer, attRange, outputDistribution])
res = evaluation.evaluateModel(nn, test,
                               [buffer, attRange, outputDistribution])
f = open('predictions/' + data.relationName(), 'w')
for d in res:
    f.write(str(d) + '\n')
f.close()

SerializationHelper.write("models/" + data.relationName() + ".model", nn)

# print out the built model
#print "--> Generated model:\n"
#print nn

#print "--> Evaluation:\n"
#print evaluation.toSummaryString()

#print "--> Predictions:\n"
Exemple #17
0
	buffers = [] ## List of per fold predictions
	weights = [] ## List of per fold weights per attribute


	for fld in range(0,folds):
		train =  folds_train[fld]
		test =  folds_test[fld]
		train.setClassIndex(data.numAttributes() - 1)
		test.setClassIndex(data.numAttributes() - 1)
		lr = LR()
		lr.buildClassifier(train)
		buf= StringBuffer()  # buffer for the predictions
		attRange = Range()  # no additional attributes output
		outputDistribution = Boolean(False)
		evaluation = Evaluation(test)
		evaluation.evaluateModel(lr, test, [buf, attRange, outputDistribution])
		buffers.append(buf)
		## Writing Evaluation Summaries
		f = open(''.join([directory , ''.join(['summary_',str(fld),'.report'])]) , 'w')
		f.write(evaluation.toSummaryString(True))
		f.close()

		f = open(''.join([directory , ''.join(['coeff_',str(fld),'.report'])]) , 'w')
		f.write(str(lr))
		f.close()

	## Writing predictions in a file

	f = open(''.join([directory , 'prediction.weka']) , 'w')
	for prediction in buffers:
		f.write(str(prediction))
# loop for different values of x using full dataset
data.setClassIndex(data.numAttributes() - 1)
for num in [x * 0.05 for x in range(0, 10)]:
   log.write("---------------------------------\nCF: " + str(num) + "\n")
   algo = J48()
   x = time.time()
   algo.buildClassifier(data)
   log.write("Time to build classifier: " + str(time.time() - x) + "\n")
   algo.setConfidenceFactor(num)
   evaluation = Evaluation(data)
   output = PlainText()  # plain text output for predictions
   output.setHeader(data)
   buffer = StringBuffer() # buffer to use
   output.setBuffer(buffer)
   attRange = Range()                  # no additional attributes output
   outputDistribution = Boolean(False) # we don't want distribution
   x = time.time()
   evaluation.evaluateModel(algo, data, [output, attRange, outputDistribution])
   #evaluation.crossValidateModel(algo, data, 10, rand, [output, attRange, outputDistribution]) 
   log.write("Time to evaluate model: " + str(time.time() - x) + "\n")
   log.write(evaluation.toSummaryString())
   file.write(str(num) + "," + str(evaluation.rootMeanSquaredError()) + "\n")
   # create graph
   graphfilename = "image/" + str(os.path.splitext(os.path.basename(__file__))[0]) + "_" + \
   str(os.path.splitext(os.path.basename(sys.argv[1]))[0]) + "_" + str(num) + ".dot"
   graphfile = open(graphfilename, 'wb')
   graphfile.write(algo.graph())
   graphfile.close()
file.close()
log.close()
Exemple #19
0
                  str(fulltrainset.numInstances()) + "\n")
        for dataset in [testset, fulltrainset]:
            algo = LibSVM()
            tag = SelectedTag(
                str(kerneltype), algo.TAGS_KERNELTYPE
            )  # 0 = linear, 1 = polynomial, 2 = radial basis function, 3 = sigmoid
            algo.setKernelType(tag)
            algo.setCost(int(p['svm.C']))
            algo.buildClassifier(trainset)
            evaluation = Evaluation(trainset)
            output = PlainText()  # plain text output for predictions
            output.setHeader(trainset)
            buffer = StringBuffer()  # buffer to use
            output.setBuffer(buffer)
            attRange = Range()  # no additional attributes output
            outputDistribution = Boolean(False)  # we don't want distribution
            x = time.time()
            if (int(crossvalidate)):
                evaluation.crossValidateModel(
                    algo, dataset, 10, rand,
                    [output, attRange, outputDistribution])
            else:
                evaluation.evaluateModel(
                    algo, dataset, [output, attRange, outputDistribution])
            log.write("Time to evaluate model: " + str(time.time() - x) + "\n")
            log.write(evaluation.toSummaryString())
            filelimit.write("," + str(evaluation.pctIncorrect()))
    filelimit.write("\n")
filelimit.close()
log.close()
Exemple #20
0
# load data file
print "Loading data..."
file = FileReader(sys.argv[1])
data = Instances(file)

# set the class Index - the index of the dependent variable
data.setClassIndex(data.numAttributes() - 1)

# create the model
evaluation = Evaluation(data)
output = PlainText()  # plain text output for predictions
output.setHeader(data)
buffer = StringBuffer()  # buffer to use
output.setBuffer(buffer)
attRange = Range()  # no additional attributes output
outputDistribution = Boolean(False)  # we don't want distribution
j48 = J48()
j48.buildClassifier(data)  # only a trained classifier can be evaluated
evaluation.evaluateModel(j48, data, [output, attRange, outputDistribution])

# print out the built model
print "--> Generated model:\n"
print j48

print "--> Evaluation:\n"
print evaluation.toSummaryString()

print "--> Predictions:\n"
print buffer
Exemple #21
0
# check commandline parameters
if (not (len(sys.argv) == 2)):
    print "Usage: UsingJ48Ext.py <ARFF-file>"
    sys.exit()

# load data file
print "Loading data..."
file = FileReader(sys.argv[1])
data = Instances(file)

# set the class Index - the index of the dependent variable
data.setClassIndex(data.numAttributes() - 1)

# create the model
evaluation = Evaluation(data)
buffer = StringBuffer()  # buffer for the predictions
attRange = Range()  # no additional attributes output
outputDistribution = Boolean(False)  # we don't want distribution
j48 = J48()
j48.buildClassifier(data)  # only a trained classifier can be evaluated
evaluation.evaluateModel(j48, data, [buffer, attRange, outputDistribution])

# print out the built model
print "--> Generated model:\n"
print j48

print "--> Evaluation:\n"
print evaluation.toSummaryString()

print "--> Predictions:\n"
print buffer
Exemple #22
0
    ## Prediction
    buffers = []  ## List of per fold predictions
    weights = []  ## List of per fold weights per attribute

    for fld in range(0, folds):
        train = folds_train[fld]
        test = folds_test[fld]
        train.setClassIndex(data.numAttributes() - 1)
        test.setClassIndex(data.numAttributes() - 1)
        lr = LR()
        lr.buildClassifier(train)
        buf = StringBuffer()  # buffer for the predictions
        attRange = Range()  # no additional attributes output
        outputDistribution = Boolean(False)
        evaluation = Evaluation(test)
        evaluation.evaluateModel(lr, test, [buf, attRange, outputDistribution])
        buffers.append(buf)
        ## Writing Evaluation Summaries
        f = open(
            ''.join([directory, ''.join(['summary_',
                                         str(fld), '.report'])]), 'w')
        f.write(evaluation.toSummaryString(True))
        f.close()

        f = open(
            ''.join([directory, ''.join(['coeff_',
                                         str(fld), '.report'])]), 'w')
        f.write(str(lr))
        f.close()

    ## Writing predictions in a file