class ClassificationModelFingerprint(ClassificationModel):
  """
  Class to run the survey response classification task with Coritcal.io
  fingerprint encodings.

  From the experiment runner, the methods expect to be fed one sample at a time.
  """

  def __init__(self,
               fingerprintType=EncoderTypes.word,
               unionSparsity=0.20,
               retinaScaling=1.0,
               retina="en_associative",
               apiKey=None,
               k=1,
               classifierMetric="rawOverlap",
               cacheRoot=None,
               **kwargs):

    super(ClassificationModelFingerprint, self).__init__(**kwargs)

    self.classifier = KNNClassifier(k=k,
                                    distanceMethod=classifierMetric,
                                    exact=False,
                                    verbosity=self.verbosity-1)

    # Need a valid API key for the Cortical.io encoder (see CioEncoder
    # constructor for details).
    if fingerprintType is (not EncoderTypes.document or not EncoderTypes.word):
      raise ValueError("Invalid type of fingerprint encoding; see the "
                       "EncoderTypes class for eligble types.")

    self.encoder = CioEncoder(retinaScaling=retinaScaling,
                              fingerprintType=fingerprintType,
                              unionSparsity=unionSparsity,
                              retina=retina,
                              apiKey=apiKey,
                              cacheDir=cacheRoot)

    self.currentDocument = None


  def trainToken(self, token, labels, sampleId, reset=0):
    """
    Train the model with the given text token, associated labels, and
    sampleId.

    See base class for params and return type descriptions.
    """
    if self.currentDocument is None:
      # start of a new document
      self.currentDocument = [token]
    else:
      # accumulate text for this document
      self.currentDocument.append(token)

    if reset == 1:
      # all text accumulated, proceed w/ training on this document
      document = " ".join(self.currentDocument)
      bitmap = self.encoder.encode(document)["fingerprint"]["positions"]


      if self.verbosity >= 2:
        print "CioFP model training with: '{}'".format(document)
        print "\tBitmap:", bitmap

      for label in labels:
        self.classifier.learn(
            bitmap, label, isSparse=self.encoder.n, partitionId=sampleId)

      self.currentDocument = None


  def inferToken(self, token, reset=0, returnDetailedResults=False,
                 sortResults=True):
    """
    Classify the token (i.e. run inference on the model with this document) and
    return classification results and (optionally) a list of sampleIds and
    distances.   Repeated sampleIds are NOT removed from the results.

    See base class for params and return type descriptions.
    """
    if self.currentDocument is None:
      # start of a new document
      self.currentDocument = [token]
    else:
      # accumulate text for this document
      self.currentDocument.append(token)

    if reset == 0:
      return numpy.zeros(self.numLabels), [], numpy.zeros(0)

    # With reset=1, all text accumulated, proceed w/ classifying this document
    document = " ".join(self.currentDocument)
    bitmap = self.encoder.encode(document)["fingerprint"]["positions"]

    densePattern  =self.encoder.densifyPattern(bitmap)

    (_, inferenceResult, dist, _) = self.classifier.infer(densePattern)

    if self.verbosity >= 2:
      print "CioFP model inference with: '{}'".format(document)
      print "\tBitmap:", bitmap
      print "\tInference result=", inferenceResult
      print "\tDistances=", dist

    self.currentDocument = None

    # Figure out format of returned results

    if not returnDetailedResults:
      # Return non-detailed results.
      return inferenceResult, None, None

    if not sortResults:
      idList = [self.classifier.getPartitionId(i) for i in xrange(len(dist))]
      return inferenceResult, idList, dist

    # Return sorted results
    sortedIndices = dist.argsort()
    idList = [self.classifier.getPartitionId(i) for i in sortedIndices]
    sortedDistances = dist[sortedIndices]
    return inferenceResult, idList, sortedDistances


  def getEncoder(self):
    """
    Returns the encoder instance for the model.
    """
    return self.encoder


  def getClassifier(self):
    """
    Returns the classifier instance for the model.
    """
    return self.classifier
class ClassificationModelFingerprint(ClassificationModel):
    """
  Class to run the survey response classification task with Coritcal.io
  fingerprint encodings.

  From the experiment runner, the methods expect to be fed one sample at a time.
  """
    def __init__(self,
                 fingerprintType=EncoderTypes.word,
                 unionSparsity=0.20,
                 retinaScaling=1.0,
                 retina="en_associative",
                 apiKey=None,
                 k=1,
                 classifierMetric="rawOverlap",
                 cacheRoot=None,
                 **kwargs):

        super(ClassificationModelFingerprint, self).__init__(**kwargs)

        self.classifier = KNNClassifier(k=k,
                                        distanceMethod=classifierMetric,
                                        exact=False,
                                        verbosity=self.verbosity - 1)

        # Need a valid API key for the Cortical.io encoder (see CioEncoder
        # constructor for details).
        if fingerprintType is (not EncoderTypes.document
                               or not EncoderTypes.word):
            raise ValueError("Invalid type of fingerprint encoding; see the "
                             "EncoderTypes class for eligble types.")

        self.encoder = CioEncoder(retinaScaling=retinaScaling,
                                  fingerprintType=fingerprintType,
                                  unionSparsity=unionSparsity,
                                  retina=retina,
                                  apiKey=apiKey,
                                  cacheDir=cacheRoot)

        self.currentDocument = None

    def trainToken(self, token, labels, sampleId, reset=0):
        """
    Train the model with the given text token, associated labels, and
    sampleId.

    See base class for params and return type descriptions.
    """
        if self.currentDocument is None:
            # start of a new document
            self.currentDocument = [token]
        else:
            # accumulate text for this document
            self.currentDocument.append(token)

        if reset == 1:
            # all text accumulated, proceed w/ training on this document
            document = " ".join(self.currentDocument)
            bitmap = self.encoder.encode(document)["fingerprint"]["positions"]

            if self.verbosity >= 2:
                print "CioFP model training with: '{}'".format(document)
                print "\tBitmap:", bitmap

            for label in labels:
                self.classifier.learn(bitmap,
                                      label,
                                      isSparse=self.encoder.n,
                                      partitionId=sampleId)

            self.currentDocument = None

    def inferToken(self,
                   token,
                   reset=0,
                   returnDetailedResults=False,
                   sortResults=True):
        """
    Classify the token (i.e. run inference on the model with this document) and
    return classification results and (optionally) a list of sampleIds and
    distances.   Repeated sampleIds are NOT removed from the results.

    See base class for params and return type descriptions.
    """
        if self.currentDocument is None:
            # start of a new document
            self.currentDocument = [token]
        else:
            # accumulate text for this document
            self.currentDocument.append(token)

        if reset == 0:
            return numpy.zeros(self.numLabels), [], numpy.zeros(0)

        # With reset=1, all text accumulated, proceed w/ classifying this document
        document = " ".join(self.currentDocument)
        bitmap = self.encoder.encode(document)["fingerprint"]["positions"]

        densePattern = self.encoder.densifyPattern(bitmap)

        (_, inferenceResult, dist, _) = self.classifier.infer(densePattern)

        if self.verbosity >= 2:
            print "CioFP model inference with: '{}'".format(document)
            print "\tBitmap:", bitmap
            print "\tInference result=", inferenceResult
            print "\tDistances=", dist

        self.currentDocument = None

        # Figure out format of returned results

        if not returnDetailedResults:
            # Return non-detailed results.
            return inferenceResult, None, None

        if not sortResults:
            idList = [
                self.classifier.getPartitionId(i) for i in xrange(len(dist))
            ]
            return inferenceResult, idList, dist

        # Return sorted results
        sortedIndices = dist.argsort()
        idList = [self.classifier.getPartitionId(i) for i in sortedIndices]
        sortedDistances = dist[sortedIndices]
        return inferenceResult, idList, sortedDistances

    def getEncoder(self):
        """
    Returns the encoder instance for the model.
    """
        return self.encoder

    def getClassifier(self):
        """
    Returns the classifier instance for the model.
    """
        return self.classifier