logfile = (
    "logs/"
    + str(os.path.splitext(os.path.basename(__file__))[0])
    + "_"
    + str(os.path.splitext(os.path.basename(sys.argv[1]))[0])
    + "_tunable.log"
)
log = open(logfile, "w", bufsize)  # open general log file

# loop for different values of c
data.setClassIndex(data.numAttributes() - 1)
for num in range(-10, 10, 2):
    c = 2 ** (num)
    file.write(str(c))
    for kerneltype in range(0, 4):
        algo = LibSVM()
        tag = SelectedTag(
            str(kerneltype), algo.TAGS_KERNELTYPE
        )  # 0 = linear, 1 = polynomial, 2 = radial basis function, 3 = sigmoid
        algo.setKernelType(tag)
        algo.setCost(c)
        log.write("---------------------------------\nC: " + str(c) + ", KernelType: " + str(kerneltype) + "\n")
        x = time.time()
        algo.buildClassifier(data)
        log.write("Time to build classifier: " + str(time.time() - x) + "\n")
        evaluation = Evaluation(data)
        output = PlainText()  # plain text output for predictions
        output.setHeader(data)
        buffer = StringBuffer()  # buffer to use
        output.setBuffer(buffer)
        attRange = Range()  # no additional attributes output
    sys.exit()

# load data file
print "Loading data..."
file = FileReader(sys.argv[1])
data = Instances(file)

# set the class Index - the index of the dependent variable
data.setClassIndex(data.numAttributes() - 1)

# define the algorithms to be used.
algo_list = [(NaiveBayes(), 'NaiveBayes'), (BayesNet(), 'BayesNet'),
             (J48(), 'J48'), (JRip(), 'JRip'), (KStar(), 'KStar'),
             (RandomForest(), 'RandomForest'), (AdaBoostM1(), 'AdaBoostM1'),
             (MultilayerPerceptron(), 'MultilayerPerceptron'),
             (LibSVM(), 'LibSVM')]
algo_dict = dict([(x[1], x[0]) for x in algo_list])
algo_keys = [
    'NaiveBayes', 'J48', 'BayesNet', 'JRip', 'RandomForest', 'KStar',
    'AdaBoostM1', 'LibSVM', 'MultilayerPerceptron'
]

# example to set kernal type on libsvm.  Default is 2
#algo = algo_dict['LibSVM']
#tag = SelectedTag("1",algo.TAGS_KERNELTYPE)  # 0 = linear, 1 = polynomial, 2 = radial basis function, 3 = sigmoid
#algo.setKernelType(tag)

# train classifiers but filter out the name column first
print "Training classifiers..."
for key in algo_keys:
    algo = algo_dict[key]
Beispiel #3
0
logfile = "logs/" + classifiername + "_" + dataname + crossvalidate + ".log"
log = open(logfile, 'w', bufsize)  # open general log file

for num in range(int(p['svm.initial']), fulltrainset.numInstances(),
                 (fulltrainset.numInstances() / int(p['svm.numdatapoints']))):
    trainset = Instances(fulltrainset, 0, num)  # create training set
    trainset.setClassIndex(trainset.numAttributes() - 1)

    filelimit.write(str(num))
    for kerneltype in range(0, 4):
        log.write("---------------------------------\nTraining Set Size: " +
                  str(trainset.numInstances()) + ", Test Set Size: " +
                  str(testset.numInstances()) + ", Full data set size: " +
                  str(fulltrainset.numInstances()) + "\n")
        for dataset in [testset, fulltrainset]:
            algo = LibSVM()
            tag = SelectedTag(
                str(kerneltype), algo.TAGS_KERNELTYPE
            )  # 0 = linear, 1 = polynomial, 2 = radial basis function, 3 = sigmoid
            algo.setKernelType(tag)
            algo.setCost(int(p['svm.C']))
            algo.buildClassifier(trainset)
            evaluation = Evaluation(trainset)
            output = PlainText()  # plain text output for predictions
            output.setHeader(trainset)
            buffer = StringBuffer()  # buffer to use
            output.setBuffer(buffer)
            attRange = Range()  # no additional attributes output
            outputDistribution = Boolean(False)  # we don't want distribution
            x = time.time()
            if (int(crossvalidate)):
dataname = str(os.path.splitext(os.path.basename(sys.argv[1]))[0])
datafilelimit = "data/plot/" + classifiername + "_" + dataname + crossvalidate + "_instances.csv"
filelimit=open(datafilelimit, 'w', bufsize)
filelimit.write("instances,lineartest,lineartrain,polytest,polytrain,radialtest,radialtrain,sigmoidtest,sigmoidtrain\n")
logfile = "logs/" + classifiername + "_" + dataname + crossvalidate + ".log"
log=open(logfile, 'w', bufsize) # open general log file

for num in range(int(p['svm.initial']),fulltrainset.numInstances(),(fulltrainset.numInstances() / int(p['svm.numdatapoints']))):
   trainset = Instances(fulltrainset,0,num)   # create training set
   trainset.setClassIndex(trainset.numAttributes() - 1)

   filelimit.write(str(num))
   for kerneltype in range(0,4):
      log.write("---------------------------------\nTraining Set Size: " + str(trainset.numInstances()) + ", Test Set Size: " + str(testset.numInstances()) + ", Full data set size: " + str(fulltrainset.numInstances()) + "\n")
      for dataset in [testset, fulltrainset]:
          algo = LibSVM()
          tag = SelectedTag(str(kerneltype),algo.TAGS_KERNELTYPE)  # 0 = linear, 1 = polynomial, 2 = radial basis function, 3 = sigmoid
          algo.setKernelType(tag)
          algo.setCost(int(p['svm.C']))
          algo.buildClassifier(trainset)
          evaluation = Evaluation(trainset)
          output = PlainText()  # plain text output for predictions
          output.setHeader(trainset)
          buffer = StringBuffer() # buffer to use
          output.setBuffer(buffer)
          attRange = Range()                  # no additional attributes output
          outputDistribution = Boolean(False) # we don't want distribution
          x = time.time()
          if (int(crossvalidate)):
              evaluation.crossValidateModel(algo, dataset, 10, rand, [output, attRange, outputDistribution])
          else:
Beispiel #5
0
datafile = "data/plot/" + str(os.path.splitext(os.path.basename(__file__))[0]) + "_" + \
   str(os.path.splitext(os.path.basename(sys.argv[1]))[0]) + "_rmse.csv"
file=open(datafile, 'w', bufsize)  # open a file for rmse data
file.write("c,linear,polynomial,radial,sigmoid\n")

logfile = "logs/" + str(os.path.splitext(os.path.basename(__file__))[0]) + "_" + \
   str(os.path.splitext(os.path.basename(sys.argv[1]))[0]) + "_tunable.log"
log=open(logfile, 'w', bufsize) # open general log file

# loop for different values of c
data.setClassIndex(data.numAttributes() - 1)
for num in range(-10,10,2):
   c = 2 ** (num)
   file.write(str(c))
   for kerneltype in range(0,4):
      algo = LibSVM()
      tag = SelectedTag(str(kerneltype),algo.TAGS_KERNELTYPE)  # 0 = linear, 1 = polynomial, 2 = radial basis function, 3 = sigmoid
      algo.setKernelType(tag)
      algo.setCost(c)
      log.write("---------------------------------\nC: " + str(c) + ", KernelType: " + str(kerneltype) + "\n")
      x = time.time()
      algo.buildClassifier(data) 
      log.write("Time to build classifier: " + str(time.time() - x) + "\n")
      evaluation = Evaluation(data)
      output = PlainText()  # plain text output for predictions
      output.setHeader(data)
      buffer = StringBuffer() # buffer to use
      output.setBuffer(buffer)
      attRange = Range()                  # no additional attributes output
      outputDistribution = Boolean(False) # we don't want distribution
      x = time.time()
if (not (len(sys.argv) == 2)):
    print "Usage: supervised.py <ARFF-file>"
    sys.exit()

# load data file
print "Loading data..."
file = FileReader(sys.argv[1])
data = Instances(file)

# set the class Index - the index of the dependent variable
data.setClassIndex(data.numAttributes() - 1)

# define the algorithms to be used.
algo_list = [(NaiveBayes(), 'NaiveBayes'), (BayesNet(),'BayesNet'), (J48(),'J48'), (JRip(), 'JRip'),
                 (KStar(), 'KStar'), (RandomForest(), 'RandomForest'), (AdaBoostM1(),'AdaBoostM1'),
                 (MultilayerPerceptron(),'MultilayerPerceptron'), (LibSVM(), 'LibSVM')]
algo_dict = dict([(x[1], x[0]) for x in algo_list])
algo_keys = ['NaiveBayes', 'J48', 'BayesNet', 'JRip', 'RandomForest', 'KStar', 'AdaBoostM1', 'LibSVM', 'MultilayerPerceptron']

# example to set kernal type on libsvm.  Default is 2
algo = algo_dict['LibSVM']
tag = SelectedTag("1",algo.TAGS_KERNELTYPE)  # 0 = linear, 1 = polynomial, 2 = radial basis function, 3 = sigmoid
algo.setKernelType(tag)

# train classifiers
print "Training classifiers..."
for key in algo_keys :
   algo = algo_dict[key]
   algo.buildClassifier(data)

# evaluate classifiers and print a result summary including confusion matrix