def classify(img, classifier, class_names, ops=None, distribution_class_index=-1): """ img: a 2D RandomAccessibleInterval. classifier: a WEKA Classifier instance, like SMO or FastRandomForest, etc. Any. If it's a string, interprets it as a file path and attempts to deserialize a previously saved trained classifier. class_names: the list of names of each class to learn. ops: the filter bank of ImgMath ops for the img. distribution_class_index: defaults to -1, meaning return the class index for each pixel. When larger than -1, it's interpreted as a class index, and returns instead the floating-point value of each pixel in the distribution of that particular class index. """ if type(classifier) == str: classifier = SerializationHelper.read(classifier) ops = ops if ops else filterBank(img) attributes = ArrayList() for i in xrange(len(ops)): attributes.add(Attribute("attr-%i" % i)) #for name in classifier.attributeNames()[0][1]: # attributes.add(Attribute(name)) attributes.add(Attribute("class", class_names)) info = Instances("structure", attributes, 1) info.setClassIndex(len(attributes) -1) opImgs = [compute(op).into(ArrayImgs.floats([img.dimension(0), img.dimension(1)])) for op in ops] cs_opImgs = Views.collapse(Views.stack(opImgs)) result = ArrayImgs.floats([img.dimension(0), img.dimension(1)]) cr = result.cursor() cop = Views.iterable(cs_opImgs).cursor() while cr.hasNext(): tc = cop.next() vector = array((tc.get(i).getRealDouble() for i in xrange(len(opImgs))), 'd') vector += array([0], 'd') di = DenseInstance(1.0, vector) di.setDataset(info) # the list of attributes if distribution_class_index > -1: cr.next().setReal(classifier.distributionForInstance(di)[distribution_class_index]) else: cr.next().setReal(classifier.classifyInstance(di)) return result
def createTrainingData(img, samples, class_names, n_samples=0, ops=None): """ img: a 2D RandomAccessibleInterval. samples: a sequence of long[] (or int numeric sequence or Localizable) and class_index pairs; can be a generator. n_samples: optional, the number of samples (in case samples is e.g. a generator). class_names: a list of class names, as many as different class_index. ops: optional, the sequence of ImgMath ops to apply to the img, defaults to filterBank(img) return an instance of WEKA Instances """ ops = ops if ops else filterBank(img) if 0 == n_samples: n_samples = len(samples) # Define a WEKA Attribute for each feature (one for op in the filter bank, plus the class) attribute_names = ["attr-%i" % (i+1) for i in xrange(len(ops))] attributes = ArrayList() for name in attribute_names: attributes.add(Attribute(name)) # Add an attribute at the end for the classification classes attributes.add(Attribute("class", class_names)) # Create the training data structure training_data = Instances("training", attributes, n_samples) training_data.setClassIndex(len(attributes) -1) opImgs = [compute(op).into(ArrayImgs.floats([img.dimension(0), img.dimension(1)])) for op in ops] ra = Views.collapse(Views.stack(opImgs)).randomAccess() for position, class_index in samples: ra.setPosition(position) tc = ra.get() vector = array((tc.get(i).getRealDouble() for i in xrange(len(opImgs))), 'd') vector += array([class_index], 'd') training_data.add(DenseInstance(1.0, vector)) return training_data