class FluentWrapper(object): """ Wraps nupic.fluent Model """ def __init__(self, dataPath): """ initializes nupic.fluent model with given sample data :param str dataPath: Path to sample data file. Must be a CSV file having 'ID and 'Sample' columns """ g_log.info("Initialize nupic.fluent") # Initialize nupic.fluent model runner self._fluent = FluentRunner(dataPath=dataPath, resultsDir="", experimentName="imbu_fingerprints", load=False, modelName="ClassificationModelFingerprint", modelModuleName="fluent.models.classify_fingerprint", numClasses=1, # must be >0 to go through training plots=0, orderedSplit=False, trainSizes=[], verbosity=0) # Train model with given sample data self._fluent.initModel() self._fluent.setupData() self._fluent.trainSize = len(self._fluent.samples) self._fluent.encodeSamples() self._fluent.resetModel(0) for i in range(self._fluent.trainSize): self._fluent.model.trainModel(i) def query(self, text): """ Queries fluent model and returns an ordered list of matching documents. :param str text: The text to match. :returns: a sequence of matching samples. :: [ {"id": "1", "text": "sampleText", "score": "0.75"}, ... ] """ results = [] if text: g_log.info("Query model for : %s", text) sampleIDs, sampleDists = self._fluent.model.queryModel(text, False) for sID, dist in zip (sampleIDs, sampleDists): results.append({"id": sID, "text": self._fluent.dataDict[sID][0], "score": dist.item()}) return results
class FluentWrapper(object): """ Wraps nupic.fluent Model """ def __init__(self, dataPath): """ initializes nupic.fluent model with given sample data :param str dataPath: Path to sample data file. Must be a CSV file having 'ID and 'Sample' columns """ g_log.info("Initialize nupic.fluent") # Initialize nupic.fluent model runner self._fluent = FluentRunner( dataPath=dataPath, resultsDir="", experimentName="imbu_fingerprints", load=False, modelName="ClassificationModelFingerprint", modelModuleName="fluent.models.classify_fingerprint", numClasses=1, # must be >0 to go through training plots=0, orderedSplit=False, trainSizes=[], verbosity=0, ) # Train model with given sample data self._fluent.initModel() self._fluent.setupData() self._fluent.trainSize = len(self._fluent.samples) self._fluent.encodeSamples() self._fluent.resetModel(0) for i in range(self._fluent.trainSize): self._fluent.model.trainModel(i) def query(self, text): """ Queries fluent model and returns an ordered list of matching documents. :param str text: The text to match. :returns: a sequence of matching samples. :: [ {"id": "1", "text": "sampleText", "score": "0.75"}, ... ] """ results = [] if text: g_log.info("Query model for : %s", text) sampleIDs, sampleDists = self._fluent.model.queryModel(text, False) for sID, dist in zip(sampleIDs, sampleDists): results.append({"id": sID, "text": self._fluent.dataDict[sID][0], "score": dist.item()}) return results
def run(args): start = time.time() root = os.path.dirname(os.path.realpath(__file__)) resultsDir = os.path.join(root, args.resultsDir) runner = Runner(dataPath=args.dataPath, resultsDir=resultsDir, experimentName=args.experimentName, load=args.load, modelName=args.modelName, modelModuleName=args.modelModuleName, numClasses=args.numClasses, plots=args.plots, orderedSplit=args.orderedSplit, trainSize=args.trainSize, verbosity=args.verbosity) runner.initModel() print "Reading in data and preprocessing." dataTime = time.time() runner.setupData() print ("Data setup complete; elapsed time is {0:.2f} seconds.\nNow encoding " "the data".format(time.time() - dataTime)) encodeTime = time.time() runner.encodeSamples() print ("Encoding complete; elapsed time is {0:.2f} seconds.\nNow running the " "experiment.".format(time.time() - encodeTime)) runner.runExperiment() runner.calculateResults() runner.save() print "Experiment complete in {0:.2f} seconds.".format(time.time() - start) if args.validation: print "Validating experiment against expected classifications..." print runner.validateExperiment(args.validation)