logfile = "logs/" + classifiername + "_" + dataname + crossvalidate + ".log" log=open(logfile, 'w', bufsize) # open general log file for num in range(int(p['j48.initial']),fulltrainset.numInstances(),(fulltrainset.numInstances() / int(p['j48.numdatapoints']))): filelimit.write(str(num)) trainset = Instances(fulltrainset,0,num) # create training set trainset.setClassIndex(trainset.numAttributes() - 1) log.write("---------------------------------\nTraining Set Size: " + str(trainset.numInstances()) + ", Test Set Size: " + str(testset.numInstances()) + ", Full data set size: " + str(fulltrainset.numInstances()) + "\n") for dataset in [testset, fulltrainset]: algo = J48() algo.buildClassifier(trainset) algo.setConfidenceFactor(float(p['j48.C'])) evaluation = Evaluation(trainset) output = PlainText() # plain text output for predictions output.setHeader(trainset) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time() if (int(crossvalidate)): evaluation.crossValidateModel(algo, dataset, 10, rand, [output, attRange, outputDistribution]) else: evaluation.evaluateModel(algo, dataset, [output, attRange, outputDistribution]) log.write("Time to evaluate model: " + str(time.time() - x) + "\n") log.write(evaluation.toSummaryString()) filelimit.write("," + str(evaluation.pctIncorrect())) filelimit.write("\n") filelimit.close() log.close()
str(fulltrainset.numInstances()) + "\n") for dataset in [testset, fulltrainset]: algo = LibSVM() tag = SelectedTag( str(kerneltype), algo.TAGS_KERNELTYPE ) # 0 = linear, 1 = polynomial, 2 = radial basis function, 3 = sigmoid algo.setKernelType(tag) algo.setCost(int(p['svm.C'])) algo.buildClassifier(trainset) evaluation = Evaluation(trainset) output = PlainText() # plain text output for predictions output.setHeader(trainset) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time() if (int(crossvalidate)): evaluation.crossValidateModel( algo, dataset, 10, rand, [output, attRange, outputDistribution]) else: evaluation.evaluateModel( algo, dataset, [output, attRange, outputDistribution]) log.write("Time to evaluate model: " + str(time.time() - x) + "\n") log.write(evaluation.toSummaryString()) filelimit.write("," + str(evaluation.pctIncorrect())) filelimit.write("\n") filelimit.close() log.close()