class ClassificationModelFingerprint(ClassificationModel): """ Class to run the survey response classification task with Coritcal.io fingerprint encodings. From the experiment runner, the methods expect to be fed one sample at a time. """ def __init__(self, fingerprintType=EncoderTypes.word, unionSparsity=0.20, retinaScaling=1.0, retina="en_associative", apiKey=None, k=1, classifierMetric="rawOverlap", cacheRoot=None, **kwargs): super(ClassificationModelFingerprint, self).__init__(**kwargs) self.classifier = KNNClassifier(k=k, distanceMethod=classifierMetric, exact=False, verbosity=self.verbosity-1) # Need a valid API key for the Cortical.io encoder (see CioEncoder # constructor for details). if fingerprintType is (not EncoderTypes.document or not EncoderTypes.word): raise ValueError("Invalid type of fingerprint encoding; see the " "EncoderTypes class for eligble types.") self.encoder = CioEncoder(retinaScaling=retinaScaling, fingerprintType=fingerprintType, unionSparsity=unionSparsity, retina=retina, apiKey=apiKey, cacheDir=cacheRoot) self.currentDocument = None def trainToken(self, token, labels, sampleId, reset=0): """ Train the model with the given text token, associated labels, and sampleId. See base class for params and return type descriptions. """ if self.currentDocument is None: # start of a new document self.currentDocument = [token] else: # accumulate text for this document self.currentDocument.append(token) if reset == 1: # all text accumulated, proceed w/ training on this document document = " ".join(self.currentDocument) bitmap = self.encoder.encode(document)["fingerprint"]["positions"] if self.verbosity >= 2: print "CioFP model training with: '{}'".format(document) print "\tBitmap:", bitmap for label in labels: self.classifier.learn( bitmap, label, isSparse=self.encoder.n, partitionId=sampleId) self.currentDocument = None def inferToken(self, token, reset=0, returnDetailedResults=False, sortResults=True): """ Classify the token (i.e. run inference on the model with this document) and return classification results and (optionally) a list of sampleIds and distances. Repeated sampleIds are NOT removed from the results. See base class for params and return type descriptions. """ if self.currentDocument is None: # start of a new document self.currentDocument = [token] else: # accumulate text for this document self.currentDocument.append(token) if reset == 0: return numpy.zeros(self.numLabels), [], numpy.zeros(0) # With reset=1, all text accumulated, proceed w/ classifying this document document = " ".join(self.currentDocument) bitmap = self.encoder.encode(document)["fingerprint"]["positions"] densePattern =self.encoder.densifyPattern(bitmap) (_, inferenceResult, dist, _) = self.classifier.infer(densePattern) if self.verbosity >= 2: print "CioFP model inference with: '{}'".format(document) print "\tBitmap:", bitmap print "\tInference result=", inferenceResult print "\tDistances=", dist self.currentDocument = None # Figure out format of returned results if not returnDetailedResults: # Return non-detailed results. return inferenceResult, None, None if not sortResults: idList = [self.classifier.getPartitionId(i) for i in xrange(len(dist))] return inferenceResult, idList, dist # Return sorted results sortedIndices = dist.argsort() idList = [self.classifier.getPartitionId(i) for i in sortedIndices] sortedDistances = dist[sortedIndices] return inferenceResult, idList, sortedDistances def getEncoder(self): """ Returns the encoder instance for the model. """ return self.encoder def getClassifier(self): """ Returns the classifier instance for the model. """ return self.classifier
class ClassificationModelFingerprint(ClassificationModel): """ Class to run the survey response classification task with Coritcal.io fingerprint encodings. From the experiment runner, the methods expect to be fed one sample at a time. """ def __init__(self, fingerprintType=EncoderTypes.word, unionSparsity=0.20, retinaScaling=1.0, retina="en_associative", apiKey=None, k=1, classifierMetric="rawOverlap", cacheRoot=None, **kwargs): super(ClassificationModelFingerprint, self).__init__(**kwargs) self.classifier = KNNClassifier(k=k, distanceMethod=classifierMetric, exact=False, verbosity=self.verbosity - 1) # Need a valid API key for the Cortical.io encoder (see CioEncoder # constructor for details). if fingerprintType is (not EncoderTypes.document or not EncoderTypes.word): raise ValueError("Invalid type of fingerprint encoding; see the " "EncoderTypes class for eligble types.") self.encoder = CioEncoder(retinaScaling=retinaScaling, fingerprintType=fingerprintType, unionSparsity=unionSparsity, retina=retina, apiKey=apiKey, cacheDir=cacheRoot) self.currentDocument = None def trainToken(self, token, labels, sampleId, reset=0): """ Train the model with the given text token, associated labels, and sampleId. See base class for params and return type descriptions. """ if self.currentDocument is None: # start of a new document self.currentDocument = [token] else: # accumulate text for this document self.currentDocument.append(token) if reset == 1: # all text accumulated, proceed w/ training on this document document = " ".join(self.currentDocument) bitmap = self.encoder.encode(document)["fingerprint"]["positions"] if self.verbosity >= 2: print "CioFP model training with: '{}'".format(document) print "\tBitmap:", bitmap for label in labels: self.classifier.learn(bitmap, label, isSparse=self.encoder.n, partitionId=sampleId) self.currentDocument = None def inferToken(self, token, reset=0, returnDetailedResults=False, sortResults=True): """ Classify the token (i.e. run inference on the model with this document) and return classification results and (optionally) a list of sampleIds and distances. Repeated sampleIds are NOT removed from the results. See base class for params and return type descriptions. """ if self.currentDocument is None: # start of a new document self.currentDocument = [token] else: # accumulate text for this document self.currentDocument.append(token) if reset == 0: return numpy.zeros(self.numLabels), [], numpy.zeros(0) # With reset=1, all text accumulated, proceed w/ classifying this document document = " ".join(self.currentDocument) bitmap = self.encoder.encode(document)["fingerprint"]["positions"] densePattern = self.encoder.densifyPattern(bitmap) (_, inferenceResult, dist, _) = self.classifier.infer(densePattern) if self.verbosity >= 2: print "CioFP model inference with: '{}'".format(document) print "\tBitmap:", bitmap print "\tInference result=", inferenceResult print "\tDistances=", dist self.currentDocument = None # Figure out format of returned results if not returnDetailedResults: # Return non-detailed results. return inferenceResult, None, None if not sortResults: idList = [ self.classifier.getPartitionId(i) for i in xrange(len(dist)) ] return inferenceResult, idList, dist # Return sorted results sortedIndices = dist.argsort() idList = [self.classifier.getPartitionId(i) for i in sortedIndices] sortedDistances = dist[sortedIndices] return inferenceResult, idList, sortedDistances def getEncoder(self): """ Returns the encoder instance for the model. """ return self.encoder def getClassifier(self): """ Returns the classifier instance for the model. """ return self.classifier