def _getInstances(self, classAttr): # create attributes self.classAttr = classAttr attName2Obj = {} attVector = FastVector() for attName in self.numericAttributes: attr = Attribute(attName) attVector.addElement(attr) attName2Obj[attName] = attr for (attName, domain) in self.attName2Domain.iteritems(): vDomain = FastVector(len(domain)) for v in domain: #print v vDomain.addElement(String(str(v))) attr = Attribute(attName, vDomain) attVector.addElement(attr) attName2Obj[attName] = attr self.attName2Obj = attName2Obj # create Instances object instances = Instances("instances", attVector, len(self.instances)) for i in self.instances: inst = self._makeInstance(i) instances.add(inst) instances.setClass(attName2Obj[classAttr]) return instances
def readDataFromResultsTable(attributes, rt): data = Instances("results", ArrayList(attributes), rt.size()) nrOfFeatures = len(attributes) for i in range(0, rt.size()): inst = DenseInstance(nrOfFeatures) for j in range(0, nrOfFeatures): value = rt.getValue(attributes[j].name(), i) inst.setValue(attributes[j], value) data.add(inst) return data
def createTrainingInstances(matchingExamples, mismatchingExamples): """ Expects the matchingExamples to be a list of feature lists, i.e. the feature vector is a list. """ numFeatures = len(matchingExamples[0]) attributes = [Attribute(str(i) + " numeric") for i in range(numFeatures)] attributes.append(Attribute("class", ArrayList(["true", "false"]))) trainingData = Instances("matches", ArrayList(attributes), len(matchingExamples) + len(mismatchingExamples)) trainingData.setClassIndex(len(attributes) -1) # the last index for f in matchingExamples: trainingData.add(DenseInstance(1.0, f + [1])) # 1 is True for f in mismatchingExamples: trainingData.add(DenseInstance(1.0, f + [0])) # 0 is False return trainingData
def classify(classifier, matches): """ Expects one vector numFeatures length """ """ returns a list of [result, distributionforinstance match]""" attributes = createAttributes(matches[0]) instances = Instances("tests", attributes, 1) instances.setClassIndex(len(attributes) -1) distribution=[] ### for match in matches: instances.add(DenseInstance(1.0, match + [0])) for i in range(len(matches)): result=classifier.classifyInstance(instances.instance(i)) dist=(classifier.distributionForInstance(instances.instance(i))) results=[result, dist[1]] return results
def build_instances(state,dataset): class_attributes = ["Sunny", "Fog", "Rain", "Snow", "Hail", "Thunder", "Tornado"] header = ["state","lat", "lon", "day","temp","dewp","weather"] #build attributes based on the header and types attributes = [] for h in header[:-1]: attributes.append(Attribute(h)) #add the classification attribute classification_vector = FastVector(len(class_attributes)) for c in class_attributes: classification_vector.addElement(c) attributes.append(Attribute("toClassify", classification_vector)) fvWekaAttributes = FastVector(len(dataset[0])) for a in attributes: fvWekaAttributes.addElement(a) training_set = Instances("C4.5Set", fvWekaAttributes, len(dataset)) training_set.setClassIndex(len(header)-1) for d in dataset: inst = Instance(len(d)) for i in range(len(d)-1): try: inst.setValue(fvWekaAttributes.elementAt(i), float(d[i])) except: pass #print "failed on", i, d[i], d[i].__class__ inst.setValue(fvWekaAttributes.elementAt(len(d)-1), d[-1]) training_set.add(inst) j48 = J48() j48.buildClassifier(training_set) return state,parse_tree(str(j48))
def createTrainingData(img, samples, class_names, n_samples=0, ops=None): """ img: a 2D RandomAccessibleInterval. samples: a sequence of long[] (or int numeric sequence or Localizable) and class_index pairs; can be a generator. n_samples: optional, the number of samples (in case samples is e.g. a generator). class_names: a list of class names, as many as different class_index. ops: optional, the sequence of ImgMath ops to apply to the img, defaults to filterBank(img) return an instance of WEKA Instances """ ops = ops if ops else filterBank(img) if 0 == n_samples: n_samples = len(samples) # Define a WEKA Attribute for each feature (one for op in the filter bank, plus the class) attribute_names = ["attr-%i" % (i+1) for i in xrange(len(ops))] attributes = ArrayList() for name in attribute_names: attributes.add(Attribute(name)) # Add an attribute at the end for the classification classes attributes.add(Attribute("class", class_names)) # Create the training data structure training_data = Instances("training", attributes, n_samples) training_data.setClassIndex(len(attributes) -1) opImgs = [compute(op).into(ArrayImgs.floats([img.dimension(0), img.dimension(1)])) for op in ops] ra = Views.collapse(Views.stack(opImgs)).randomAccess() for position, class_index in samples: ra.setPosition(position) tc = ra.get() vector = array((tc.get(i).getRealDouble() for i in xrange(len(opImgs))), 'd') vector += array([class_index], 'd') training_data.add(DenseInstance(1.0, vector)) return training_data
target = ArrayImgs.floats([width, height]) interval = FinalInterval([14, 14], [17, 17]) n_samples = Intervals.numElements(interval) for ci, v in enumerate([fillValue, backgroundValue]): for _ in xrange(training_data.size() / 4): # the other 2/4 are the membrane and mit boundary other = syntheticEM([], width, height, 0, v, noise=True) vectors = [zeros(len(attributes), 'd') for _ in xrange(n_samples)] for k, op in enumerate(filterBank(IL.wrap(other), sumType=DoubleType())): imgOp = compute(op).into(target) for i, v in enumerate(Views.interval(imgOp, interval)): vectors[i][k] = v.getRealDouble() for vector in vectors: vector[-1] = ci + 2 # class index training_data.add(DenseInstance(1.0, vector)) # Create a classifier: support vector machine (SVM, an SMO in WEKA) classifier = SMO() classifier.buildClassifier(training_data) print classifier.toString() # Save the trained classifier for later SerializationHelper.write("/tmp/svm-em-mem-mit", classifier) """ imp = WindowManager.getImage("180-220-sub512x512-30.tif") # IJ.getImage() # e.g. 8-bit EM of Drosophila neurons 180-220-sub512x512-30.tif #imp = IJ.openImage("/home/albert/lab/TEM/abd/microvolumes/Seg/180-220-sub/180-220-sub512x512-30.tif") img = IL.wrap(imp) """ """ from hr.irb.fastRandomForest import FastRandomForest