logfile = "logs/" + classifiername + "_" + dataname + crossvalidate + ".log"
log=open(logfile, 'w', bufsize) # open general log file

for num in range(int(p['j48.initial']),fulltrainset.numInstances(),(fulltrainset.numInstances() / int(p['j48.numdatapoints']))):
   filelimit.write(str(num))
   trainset = Instances(fulltrainset,0,num)   # create training set 
   trainset.setClassIndex(trainset.numAttributes() - 1)
   log.write("---------------------------------\nTraining Set Size: " + str(trainset.numInstances()) + ", Test Set Size: " + str(testset.numInstances()) + ", Full data set size: " + str(fulltrainset.numInstances()) + "\n")
   for dataset in [testset, fulltrainset]:   
       algo = J48()
       algo.buildClassifier(trainset)
       algo.setConfidenceFactor(float(p['j48.C']))
       evaluation = Evaluation(trainset)
       output = PlainText()  # plain text output for predictions
       output.setHeader(trainset)
       buffer = StringBuffer() # buffer to use
       output.setBuffer(buffer)
       attRange = Range()                  # no additional attributes output
       outputDistribution = Boolean(False) # we don't want distribution
       x = time.time()
       if (int(crossvalidate)):
           evaluation.crossValidateModel(algo, dataset, 10, rand, [output, attRange, outputDistribution])
       else:
           evaluation.evaluateModel(algo, dataset, [output, attRange, outputDistribution])
       log.write("Time to evaluate model: " + str(time.time() - x) + "\n")
       log.write(evaluation.toSummaryString())
       filelimit.write("," + str(evaluation.pctIncorrect()))
   filelimit.write("\n")
filelimit.close()
log.close()
Exemple #2
0
                  str(fulltrainset.numInstances()) + "\n")
        for dataset in [testset, fulltrainset]:
            algo = LibSVM()
            tag = SelectedTag(
                str(kerneltype), algo.TAGS_KERNELTYPE
            )  # 0 = linear, 1 = polynomial, 2 = radial basis function, 3 = sigmoid
            algo.setKernelType(tag)
            algo.setCost(int(p['svm.C']))
            algo.buildClassifier(trainset)
            evaluation = Evaluation(trainset)
            output = PlainText()  # plain text output for predictions
            output.setHeader(trainset)
            buffer = StringBuffer()  # buffer to use
            output.setBuffer(buffer)
            attRange = Range()  # no additional attributes output
            outputDistribution = Boolean(False)  # we don't want distribution
            x = time.time()
            if (int(crossvalidate)):
                evaluation.crossValidateModel(
                    algo, dataset, 10, rand,
                    [output, attRange, outputDistribution])
            else:
                evaluation.evaluateModel(
                    algo, dataset, [output, attRange, outputDistribution])
            log.write("Time to evaluate model: " + str(time.time() - x) + "\n")
            log.write(evaluation.toSummaryString())
            filelimit.write("," + str(evaluation.pctIncorrect()))
    filelimit.write("\n")
filelimit.close()
log.close()