class FluentWrapper(object):
  """ Wraps nupic.fluent Model """

  def __init__(self, dataPath):
    """
    initializes nupic.fluent model with given sample data

    :param str dataPath: Path to sample data file.
                         Must be a CSV file having 'ID and 'Sample' columns
    """
    g_log.info("Initialize nupic.fluent")
    # Initialize nupic.fluent model runner
    self._fluent = FluentRunner(dataPath=dataPath,
                        resultsDir="",
                        experimentName="imbu_fingerprints",
                        load=False,
                        modelName="ClassificationModelFingerprint",
                        modelModuleName="fluent.models.classify_fingerprint",
                        numClasses=1,  # must be >0 to go through training
                        plots=0,
                        orderedSplit=False,
                        trainSizes=[],
                        verbosity=0)

    # Train model with given sample data
    self._fluent.initModel()
    self._fluent.setupData()
    self._fluent.trainSize = len(self._fluent.samples)
    self._fluent.encodeSamples()
    self._fluent.resetModel(0)

    for i in range(self._fluent.trainSize):
      self._fluent.model.trainModel(i)


  def query(self, text):
    """ Queries fluent model and returns an ordered list of matching documents.

    :param str text: The text to match.

    :returns: a sequence of matching samples.

    ::
    [
        {"id": "1", "text": "sampleText", "score": "0.75"},
        ...
    ]
    """
    results = []
    if text:
      g_log.info("Query model for : %s", text)
      sampleIDs, sampleDists = self._fluent.model.queryModel(text, False)
      for sID, dist in zip (sampleIDs, sampleDists):
        results.append({"id": sID,
                        "text": self._fluent.dataDict[sID][0],
                        "score": dist.item()})

    return results
Exemple #2
0
class FluentWrapper(object):
    """ Wraps nupic.fluent Model """

    def __init__(self, dataPath):
        """
    initializes nupic.fluent model with given sample data

    :param str dataPath: Path to sample data file.
                         Must be a CSV file having 'ID and 'Sample' columns
    """
        g_log.info("Initialize nupic.fluent")
        # Initialize nupic.fluent model runner
        self._fluent = FluentRunner(
            dataPath=dataPath,
            resultsDir="",
            experimentName="imbu_fingerprints",
            load=False,
            modelName="ClassificationModelFingerprint",
            modelModuleName="fluent.models.classify_fingerprint",
            numClasses=1,  # must be >0 to go through training
            plots=0,
            orderedSplit=False,
            trainSizes=[],
            verbosity=0,
        )

        # Train model with given sample data
        self._fluent.initModel()
        self._fluent.setupData()
        self._fluent.trainSize = len(self._fluent.samples)
        self._fluent.encodeSamples()
        self._fluent.resetModel(0)

        for i in range(self._fluent.trainSize):
            self._fluent.model.trainModel(i)

    def query(self, text):
        """ Queries fluent model and returns an ordered list of matching documents.

    :param str text: The text to match.

    :returns: a sequence of matching samples.

    ::
    [
        {"id": "1", "text": "sampleText", "score": "0.75"},
        ...
    ]
    """
        results = []
        if text:
            g_log.info("Query model for : %s", text)
            sampleIDs, sampleDists = self._fluent.model.queryModel(text, False)
            for sID, dist in zip(sampleIDs, sampleDists):
                results.append({"id": sID, "text": self._fluent.dataDict[sID][0], "score": dist.item()})

        return results
Exemple #3
0
def run(args):
  start = time.time()

  root = os.path.dirname(os.path.realpath(__file__))
  resultsDir = os.path.join(root, args.resultsDir)

  runner = Runner(dataPath=args.dataPath,
                  resultsDir=resultsDir,
                  experimentName=args.experimentName,
                  load=args.load,
                  modelName=args.modelName,
                  modelModuleName=args.modelModuleName,
                  numClasses=args.numClasses,
                  plots=args.plots,
                  orderedSplit=args.orderedSplit,
                  trainSize=args.trainSize,
                  verbosity=args.verbosity)

  runner.initModel()

  print "Reading in data and preprocessing."
  dataTime = time.time()
  runner.setupData()
  print ("Data setup complete; elapsed time is {0:.2f} seconds.\nNow encoding "
        "the data".format(time.time() - dataTime))

  encodeTime = time.time()
  runner.encodeSamples()
  print ("Encoding complete; elapsed time is {0:.2f} seconds.\nNow running the "
         "experiment.".format(time.time() - encodeTime))

  runner.runExperiment()

  runner.calculateResults()

  runner.save()

  print "Experiment complete in {0:.2f} seconds.".format(time.time() - start)

  if args.validation:
    print "Validating experiment against expected classifications..."
    print runner.validateExperiment(args.validation)