def instantiateModel(args): """ Set some specific arguments and return an instance of the model we will use. """ args.networkConfig = getNetworkConfig(args.networkConfigPath) args.k = kValues.get(args.modelName, 1) return createModel(**vars(args))
def instantiateModel(args): """ Set some specific arguments and return an instance of the model we will use. """ args.networkConfig = getNetworkConfig(args.networkConfigPath) args.k = kValues.get(args.modelName, 1) return createModel(**vars(args))
def _loadNetworkConfig(jsonName=None): """ Load network config by calculating path relative to this file, and load with htmresearch.frameworks.nlp.model_factory.getNetworkConfig() """ if not jsonName: raise RuntimeError("Need a config file to build the network model.") root = (os.path.dirname( os.path.dirname( os.path.dirname(os.path.dirname(os.path.realpath(__file__)))))) return getNetworkConfig( os.path.join(root, "projects/nlp/data/network_configs", jsonName))
def instantiateModel(args): """ Return an instance of the model we will use. """ # Some values of K we know work well for this problem for specific model types kValues = {"keywords": 21, "docfp": 3} # Create model after setting specific arguments required for this experiment args.networkConfig = getNetworkConfig(args.networkConfigPath) args.numLabels = 2 args.k = kValues.get(args.modelName, 1) return createModel(**vars(args))
def instantiateModel(args): """ Return an instance of the model we will use. """ # Some values of K we know work well for this problem for specific model types kValues = {"keywords": 21, "docfp": 3} # Create model after setting specific arguments required for this experiment args.networkConfig = getNetworkConfig(args.networkConfigPath) args.numLabels = 2 args.k = kValues.get(args.modelName, 1) return createModel(**vars(args))
def testSensorSimpleUPKNN(self): # Build model modelName = "htm" modelDir = os.path.join(self.modelDir, "htm.checkpoint") networkConfigPath = os.path.join( _ROOT, "projects/nlp/data/network_configs/sensor_simple_TP_knn.json") self.modelParams.update( networkConfig=getNetworkConfig(networkConfigPath), numLabels=2, modelDir=modelDir, ) model = self._executeModelLifecycle(modelName, modelDir) # Test model inference self._validateInference(model, modelName) self._inferWithFirstDocument(model, modelName)
def _loadNetworkConfig(jsonName=None): """ Load network config by calculating path relative to this file, and load with htmresearch.frameworks.nlp.model_factory.getNetworkConfig() """ if not jsonName: raise RuntimeError("Need a config file to build the network model.") root = ( os.path.dirname( os.path.dirname( os.path.dirname( os.path.dirname( os.path.realpath(__file__) ) ) ) ) ) return getNetworkConfig( os.path.join(root, "projects/nlp/data/network_configs", jsonName))
def runExperiment(args): if not os.path.exists(SAVE_PATH): os.makedirs(SAVE_PATH) (trainingDataDup, labelRefs, documentCategoryMap, documentTextMap) = readDataAndReshuffle(args) # remove duplicates from training data includedDocIds = set() trainingData = [] for record in trainingDataDup: if record[2] not in includedDocIds: includedDocIds.add(record[2]) trainingData.append(record) args.networkConfig = getNetworkConfig(args.networkConfigPath) model = createModel(numLabels=1, **vars(args)) model = trainModel(args, model, trainingData, labelRefs) numDocs = model.getClassifier()._numPatterns print "Model trained with %d documents" % (numDocs,) knn = model.getClassifier() hc = HierarchicalClustering(knn) hc.cluster("complete") protos, clusterSizes = hc.getClusterPrototypes(args.numClusters, numDocs) # Run test to ensure consistency with KNN if args.knnTest: knnTest(protos, knn) return # Summary statistics # bucketCounts[i, j] is the number of occurrances of bucket j in cluster i bucketCounts = numpy.zeros((args.numClusters, len(labelRefs))) for clusterId in xrange(len(clusterSizes)): print print "Cluster %d with %d documents" % (clusterId, clusterSizes[clusterId]) print "===============" prototypeNum = 0 for index in protos[clusterId]: if index != -1: docId = trainingData[index][2] prototypeNum += 1 display = prototypeNum <= args.numPrototypes if display: print "(%d) %s" % (docId, trainingData[index][0]) print "Buckets:" # The docId keys in documentCategoryMap are strings rather than ints if docId in documentCategoryMap: for bucketId in documentCategoryMap[docId]: bucketCounts[clusterId, bucketId] += 1 if display: print " ", labelRefs[bucketId] elif display: print " <None>" if display: print "\n\n" createBucketClusterPlot(args, bucketCounts) create2DSVDProjection(args, protos, trainingData, documentCategoryMap, knn)
def runExperiment(args): if not os.path.exists(SAVE_PATH): os.makedirs(SAVE_PATH) (trainingDataDup, labelRefs, documentCategoryMap, documentTextMap) = readDataAndReshuffle(args) # remove duplicates from training data includedDocIds = set() trainingData = [] for record in trainingDataDup: if record[2] not in includedDocIds: includedDocIds.add(record[2]) trainingData.append(record) args.networkConfig = getNetworkConfig(args.networkConfigPath) model = createModel(numLabels=1, **vars(args)) model = trainModel(args, model, trainingData, labelRefs) numDocs = model.getClassifier()._numPatterns print "Model trained with %d documents" % (numDocs, ) knn = model.getClassifier() hc = HierarchicalClustering(knn) hc.cluster("complete") protos, clusterSizes = hc.getClusterPrototypes(args.numClusters, numDocs) # Run test to ensure consistency with KNN if args.knnTest: knnTest(protos, knn) return # Summary statistics # bucketCounts[i, j] is the number of occurrances of bucket j in cluster i bucketCounts = numpy.zeros((args.numClusters, len(labelRefs))) for clusterId in xrange(len(clusterSizes)): print print "Cluster %d with %d documents" % (clusterId, clusterSizes[clusterId]) print "===============" prototypeNum = 0 for index in protos[clusterId]: if index != -1: docId = trainingData[index][2] prototypeNum += 1 display = prototypeNum <= args.numPrototypes if display: print "(%d) %s" % (docId, trainingData[index][0]) print "Buckets:" # The docId keys in documentCategoryMap are strings rather than ints if docId in documentCategoryMap: for bucketId in documentCategoryMap[docId]: bucketCounts[clusterId, bucketId] += 1 if display: print " ", labelRefs[bucketId] elif display: print " <None>" if display: print "\n\n" createBucketClusterPlot(args, bucketCounts) create2DSVDProjection(args, protos, trainingData, documentCategoryMap, knn)