def createPerceptronClassifier(img, samples, class_names, n_samples, ops=None, filepath=None, params={}): mp = MultilayerPerceptron() if "learning_rate" in params: # In (0, 1] mp.setLearningRate(params.get("learning_rate", mp.getLearningRate())) # Number of nodes per layer: a set of comma-separated values (numbers), or: # 'a' = (number of attributes + number of classes) / 2 # 'i' = number of attributes, # 'o' = number of classes # 't' = number of attributes + number of classes. # See MultilayerPerceptron.setHiddenLayers # https://weka.sourceforge.io/doc.dev/weka/classifiers/functions/MultilayerPerceptron.html#setHiddenLayers-java.lang.String- mp.setHiddenLayers(params.get("hidden_layers", "10,5")) return trainClassifier(mp, img, samples, class_names, n_samples, ops=ops, filepath=filepath)
print "Usage: supervised.py <ARFF-file>" sys.exit() # load data file print "Loading data..." file = FileReader(sys.argv[1]) data = Instances(file) # set the class Index - the index of the dependent variable data.setClassIndex(data.numAttributes() - 1) # define the algorithms to be used. algo_list = [(NaiveBayes(), 'NaiveBayes'), (BayesNet(), 'BayesNet'), (J48(), 'J48'), (JRip(), 'JRip'), (KStar(), 'KStar'), (RandomForest(), 'RandomForest'), (AdaBoostM1(), 'AdaBoostM1'), (MultilayerPerceptron(), 'MultilayerPerceptron'), (LibSVM(), 'LibSVM')] algo_dict = dict([(x[1], x[0]) for x in algo_list]) algo_keys = [ 'NaiveBayes', 'J48', 'BayesNet', 'JRip', 'RandomForest', 'KStar', 'AdaBoostM1', 'LibSVM', 'MultilayerPerceptron' ] # example to set kernal type on libsvm. Default is 2 #algo = algo_dict['LibSVM'] #tag = SelectedTag("1",algo.TAGS_KERNELTYPE) # 0 = linear, 1 = polynomial, 2 = radial basis function, 3 = sigmoid #algo.setKernelType(tag) # train classifiers but filter out the name column first print "Training classifiers..." for key in algo_keys:
file.write("epochs,rmse\n") wallfile = "data/plot/" + str(os.path.splitext(os.path.basename(__file__))[0]) + "_" + \ str(os.path.splitext(os.path.basename(sys.argv[1]))[0]) + "_wall.csv" filewall=open(wallfile, 'w', bufsize) # open a file for wall clock time filewall.write("epochs,seconds\n") logfile = "logs/" + str(os.path.splitext(os.path.basename(__file__))[0]) + "_" + \ str(os.path.splitext(os.path.basename(sys.argv[1]))[0]) + "_tunable.log" log=open(logfile, 'w', bufsize) # open general log file # loop for different number of training epochs data.setClassIndex(data.numAttributes() - 1) for num in range(1,1000,50): log.write("---------------------------------\nEpoch: " + str(num) + "\n") algo = MultilayerPerceptron() algo.setTrainingTime(num) x = time.time() algo.buildClassifier(data) log.write("Time to build classifier: " + str(time.time() - x) + "\n") filewall.write(str(num) + "," + str(time.time() - x) + "\n") evaluation = Evaluation(data) output = PlainText() # plain text output for predictions output.setHeader(data) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time() #evaluation.evaluateModel(algo, data, [output, attRange, outputDistribution]) evaluation.crossValidateModel(algo, data, 10, rand, [output, attRange, outputDistribution])
filelimit.write("instances,pctincorrecttest,pctincorrecttrain\n") logfile = "logs/" + classifiername + "_" + dataname + crossvalidate + ".log" log=open(logfile, 'w', bufsize) # open general log file timefilename = "data/plot/" + classifiername + "_" + dataname + crossvalidate + "_traintime.csv" timefile = open(timefilename, 'w', bufsize) timefile.write("instances,timetest,timetrain\n") for num in range(int(p['mlp.initial']),fulltrainset.numInstances(),(fulltrainset.numInstances() / int(p['mlp.numdatapoints']))): trainset = Instances(fulltrainset,0,num) # create training set trainset.setClassIndex(trainset.numAttributes() - 1) log.write("---------------------------------\nTraining Set Size: " + str(trainset.numInstances()) + ", Test Set Size: " + str(testset.numInstances()) + ", Full data set size: " + str(fulltrainset.numInstances()) + "\n") filelimit.write(str(trainset.numInstances())) timefile.write(str(num)) for dataset in [testset, fulltrainset]: algo = MultilayerPerceptron() algo.setTrainingTime(int(p['mlp.N'])) x = time.time() algo.buildClassifier(trainset) evaluation = Evaluation(trainset) timefile.write("," + str(time.time() - x)) output = PlainText() # plain text output for predictions output.setHeader(trainset) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time() if (int(crossvalidate)): evaluation.crossValidateModel(algo, dataset, 10, rand, [output, attRange, outputDistribution]) else:
timefile = open(timefilename, 'w', bufsize) timefile.write("instances,timetest,timetrain\n") for num in range(int(p['mlp.initial']), fulltrainset.numInstances(), (fulltrainset.numInstances() / int(p['mlp.numdatapoints']))): trainset = Instances(fulltrainset, 0, num) # create training set trainset.setClassIndex(trainset.numAttributes() - 1) log.write("---------------------------------\nTraining Set Size: " + str(trainset.numInstances()) + ", Test Set Size: " + str(testset.numInstances()) + ", Full data set size: " + str(fulltrainset.numInstances()) + "\n") filelimit.write(str(trainset.numInstances())) timefile.write(str(num)) for dataset in [testset, fulltrainset]: algo = MultilayerPerceptron() algo.setTrainingTime(int(p['mlp.N'])) x = time.time() algo.buildClassifier(trainset) evaluation = Evaluation(trainset) timefile.write("," + str(time.time() - x)) output = PlainText() # plain text output for predictions output.setHeader(trainset) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time() if (int(crossvalidate)): evaluation.crossValidateModel( algo, dataset, 10, rand,
# check commandline parameters if (not (len(sys.argv) == 3)): print "Usage: weka.py <ARFF-file>" sys.exit() file = FileReader(sys.argv[1]) file2 = FileReader(sys.argv[2]) data = Instances(file) test = Instances(file2) data.setClassIndex(data.numAttributes() - 1) test.setClassIndex(test.numAttributes() - 1) evaluation = Evaluation(data) buffer = StringBuffer() attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution nn = MultilayerPerceptron() nn.buildClassifier(data) # only a trained classifier can be evaluated #print evaluation.evaluateModel(nn, ['-t', sys.argv[1], '-T', sys.argv[2]])#;, [buffer, attRange, outputDistribution]) res = evaluation.evaluateModel(nn, test, [buffer, attRange, outputDistribution]) f = open('predictions/' + data.relationName(), 'w') for d in res: f.write(str(d) + '\n') f.close() SerializationHelper.write("models/" + data.relationName() + ".model", nn) # print out the built model #print "--> Generated model:\n" #print nn
if (not (len(sys.argv) == 2)): print "Usage: supervised.py <ARFF-file>" sys.exit() # load data file print "Loading data..." file = FileReader(sys.argv[1]) data = Instances(file) # set the class Index - the index of the dependent variable data.setClassIndex(data.numAttributes() - 1) # define the algorithms to be used. algo_list = [(NaiveBayes(), 'NaiveBayes'), (BayesNet(),'BayesNet'), (J48(),'J48'), (JRip(), 'JRip'), (KStar(), 'KStar'), (RandomForest(), 'RandomForest'), (AdaBoostM1(),'AdaBoostM1'), (MultilayerPerceptron(),'MultilayerPerceptron'), (LibSVM(), 'LibSVM')] algo_dict = dict([(x[1], x[0]) for x in algo_list]) algo_keys = ['NaiveBayes', 'J48', 'BayesNet', 'JRip', 'RandomForest', 'KStar', 'AdaBoostM1', 'LibSVM', 'MultilayerPerceptron'] # example to set kernal type on libsvm. Default is 2 algo = algo_dict['LibSVM'] tag = SelectedTag("1",algo.TAGS_KERNELTYPE) # 0 = linear, 1 = polynomial, 2 = radial basis function, 3 = sigmoid algo.setKernelType(tag) # train classifiers print "Training classifiers..." for key in algo_keys : algo = algo_dict[key] algo.buildClassifier(data) # evaluate classifiers and print a result summary including confusion matrix
# check commandline parameters if (not (len(sys.argv) == 3)): print "Usage: weka.py <ARFF-file>" sys.exit() file = FileReader(sys.argv[1]) file2 = FileReader(sys.argv[2]) data = Instances(file) test = Instances(file2) data.setClassIndex(data.numAttributes() - 1) test.setClassIndex(test.numAttributes() - 1) evaluation = Evaluation(data) buffer = StringBuffer() attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution nn = MultilayerPerceptron() nn.buildClassifier(data) # only a trained classifier can be evaluated #print evaluation.evaluateModel(nn, ['-t', sys.argv[1], '-T', sys.argv[2]])#;, [buffer, attRange, outputDistribution]) res = evaluation.evaluateModel(nn, test, [buffer, attRange, outputDistribution]) f = open('predictions/' + data.relationName(), 'w') for d in res: f.write(str(d) + '\n'); f.close() SerializationHelper.write("models/" + data.relationName() + ".model", nn) # print out the built model #print "--> Generated model:\n" #print nn
for i in range(len(variances)): stack.addSlice("Variance" + str(variances[i]), imgvars[i].getProcessor()) for i in range(len(channels)): stack.addSlice("channel" + str(i + 1), channels[i].getProcessor()) # create empty feature stack features = FeatureStack(stack.getWidth(), stack.getHeight(), False) # set my features to the feature stack features.setStack(stack) # put my feature stack into the array featuresArray.set(features, 0) featuresArray.setEnabledFeatures(features.getEnabledFeatures()) mp = MultilayerPerceptron() hidden_layers = "%i,%i,%i" % (20, 14, 8) mp.setHiddenLayers(hidden_layers) mp.setLearningRate(0.7) mp.setDecay(True) mp.setTrainingTime(200) mp.setMomentum(0.3) wekaSegmentation = WekaSegmentation(image) wekaSegmentation.setFeatureStackArray(featuresArray) wekaSegmentation.setClassifier(mp) wekaSegmentation.addExample(0, posroi, 1) wekaSegmentation.addExample(1, negroi, 1) wekaSegmentation.trainClassifier() wekaSegmentation.saveClassifier(folder + "\\vessel-classifier_big.model")