def testCartMustBuildCategoricalCategorical(self): random.seed(12345) numpy.seterr(divide="ignore", invalid="ignore") dataset = Dataset.fromIterable(((a, b, c) for (x, y, z, a, b, c) in TestProducerCart.data()), 100000, ("a", "b", "c")) tree = TreeNode.fromWholeDataset(dataset, "c") tree.splitMaxDepth(2) doc = tree.pfaDocument({"type": "record", "name": "Datum", "fields": [{"name": "a", "type": "string"}, {"name": "b", "type": "string"}]}, "TreeNode") # look(doc, maxDepth=8) self.assertEqual(doc["cells"]["tree"]["init"]["field"], "a") self.assertEqual(doc["cells"]["tree"]["init"]["value"], ["A0", "A1", "A2", "A3"]) self.assertEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["field"], "b") self.assertEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["value"], ["B6", "B8"]) self.assertEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["pass"]["string"], "C6") self.assertEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["fail"]["string"], "C3") self.assertEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["field"], "b") self.assertEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["value"], ["B0"]) self.assertEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["pass"]["string"], "C0") self.assertEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["fail"]["string"], "C0") engine, = PFAEngine.fromJson(doc) self.assertEqual(engine.action({"a": "A1", "b": "B6"}), "C6") self.assertEqual(engine.action({"a": "A1", "b": "B2"}), "C3") self.assertEqual(engine.action({"a": "A5", "b": "B0"}), "C0") self.assertEqual(engine.action({"a": "A5", "b": "B4"}), "C0") doc = tree.pfaDocument( {"type": "record", "name": "Datum", "fields": [{"name": "a", "type": "string"}, {"name": "b", "type": "string"}]}, "TreeNode", nodeScores=True, datasetSize=True, predictandDistribution=True, predictandUnique=True, entropy=True, gain=True) # look(doc, maxDepth=8) engine, = PFAEngine.fromJson(doc)
def testCartMustBuildNumericalCategorical(self): random.seed(12345) numpy.seterr(divide="ignore", invalid="ignore") dataset = Dataset.fromIterable(((x, y, c) for (x, y, z, a, b, c) in TestProducerCart.data()), 100000, ("x", "y", "c")) tree = TreeNode.fromWholeDataset(dataset, "c") tree.splitMaxDepth(2) doc = tree.pfaDocument({"type": "record", "name": "Datum", "fields": [{"name": "x", "type": "double"}, {"name": "y", "type": "double"}]}, "TreeNode") # look(doc, maxDepth=8) self.assertEqual(doc["cells"]["tree"]["init"]["field"], "x") self.assertAlmostEqual(doc["cells"]["tree"]["init"]["value"], 4.00, places=2) self.assertEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["field"], "y") self.assertAlmostEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["value"], 6.00, places=2) self.assertEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["pass"]["string"], "C3") self.assertEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["fail"]["string"], "C6") self.assertEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["field"], "y") self.assertAlmostEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["value"], 2.00, places=2) self.assertEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["pass"]["string"], "C0") self.assertEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["fail"]["string"], "C0") engine, = PFAEngine.fromJson(doc) self.assertEqual(engine.action({"x": 2.0, "y": 3.0}), "C3") self.assertEqual(engine.action({"x": 2.0, "y": 8.0}), "C6") self.assertEqual(engine.action({"x": 7.0, "y": 1.0}), "C0") self.assertEqual(engine.action({"x": 7.0, "y": 5.0}), "C0") doc = tree.pfaDocument( {"type": "record", "name": "Datum", "fields": [{"name": "x", "type": "double"}, {"name": "y", "type": "double"}]}, "TreeNode", nodeScores=True, datasetSize=True, predictandDistribution=True, predictandUnique=True, entropy=True, gain=True) # look(doc, maxDepth=8) engine, = PFAEngine.fromJson(doc)
def fromPFA(cls, pfaDoc, ext): from titus.genpy import PFAEngine if ext in (".yml", ".yaml"): engine = PFAEngine.fromYaml(pfaDoc)[0] else: engine = PFAEngine.fromJson(pfaDoc)[0] return cls(engine, "PFA")
def test_main_classification(mock_parameters, mock_save_results, mock_get_results, mock_fetch_data, method, name): # create mock objects from database mock_parameters.return_value = {'type': method} mock_fetch_data.return_value = fx.inputs_classification( include_categorical=True) mock_get_results.return_value = None main(job_id=None, generate_pfa=True) pfa = mock_save_results.call_args[0][0] pfa_dict = json.loads(pfa) # NOTE: this does not work due to bug in jsonpickle # deserialize model # estimator = deserialize_sklearn_estimator(pfa_dict['metadata']['estimator']) # assert estimator.__class__.__name__ == name # make some prediction with PFA from titus.genpy import PFAEngine engine, = PFAEngine.fromJson(pfa_dict) engine.action({ 'stress_before_test1': 10., 'iq': 10., 'agegroup': '50-59y' })
def testTop5List(self): pfaDocument = titus.prettypfa.jsonNode(''' input: <<INPUT>> output: array(string) cells: mostPlanets(array(Star)) = [] action: // update the list of stars, keeping only the 5 with the most planets var currentList = mostPlanets to fcn(old: array(Star) -> array(Star)) stat.sample.topN(input, old, 5, u.morePlanets); // map this top 5 list of stars to their names a.map(currentList, fcn(x: Star -> string) x.name) fcns: // our comparison function morePlanets = fcn(x: Star, y: Star -> boolean) a.len(x.planets) < a.len(y.planets) '''.replace("<<INPUT>>", open("test/prettypfa/exoplanetsSchema.ppfa").read()), check=False, lineNumbers=False) engine, = PFAEngine.fromJson(pfaDocument) self.assertEqual(self.runEngine(engine), ["KOI-351", "HD 40307", "GJ 667C", "Kepler-11", "HD 10180"])
def testTree(self): engine, = PFAEngine.fromJson(open("test/hipparcos_numerical_10.pfa")) data = [] for line in open("test/hipparcos_numerical.csv"): ra, dec, dist, mag, absmag, x, y, z, vx, vy, vz, spectrum = line.split( ",") data.append({ "ra": float(ra), "dec": float(dec), "dist": float(dist), "mag": float(mag), "absmag": float(absmag), "x": float(x), "y": float(y), "z": float(z), "vx": float(vx), "vy": float(vy), "vz": float(vz) }) i = 0 startTime = time.time() for datum in data: engine.action(datum) i += 1 if i % 5000 == 0: print "{0}, {1}".format(time.time() - startTime, i)
def testSimpleKMeansWithStrings(self): # define the workflow, leaving clusters as an empty array for now pfaDocument = titus.prettypfa.jsonNode(''' input: <<INPUT>> output: string cells: clusters(array(record(id: string, center: array(double)))) = [] action: // ifnotnull runs the first block if all four expressions are not null // input.mag has type union(double, null) while mag has type double, etc. ifnotnull(mag: input.mag, dist: input.dist, mass: input.mass, radius: input.radius) model.cluster.closest(new(array(double), mag, dist, mass, radius), clusters, metric.simpleEuclidean)["id"] else "MISSING" '''.replace("<<INPUT>>", open("test/prettypfa/exoplanetsSchema.ppfa").read())) # fill in the clusters with the k-means result if self.kmeansResult is None: self.doKmeans() pfaDocument["cells"]["clusters"]["init"] = self.kmeansResult.pfaValue( self.clusterNames) # build a scoring engine and test it engine, = PFAEngine.fromJson(pfaDocument) self.runEngine(engine)
def testTop5List(self): pfaDocument = titus.prettypfa.jsonNode(''' input: <<INPUT>> output: array(string) cells: mostPlanets(array(Star)) = [] action: // update the list of stars, keeping only the 5 with the most planets var currentList = mostPlanets to fcn(old: array(Star) -> array(Star)) stat.sample.topN(input, old, 5, u.morePlanets); // map this top 5 list of stars to their names a.map(currentList, fcn(x: Star -> string) x.name) fcns: // our comparison function morePlanets = fcn(x: Star, y: Star -> boolean) a.len(x.planets) < a.len(y.planets) '''.replace("<<INPUT>>", TestClustering.recordSchema), check=False, lineNumbers=False) engine, = PFAEngine.fromJson(pfaDocument) self.assertEqual( self.runEngine(engine), ["KOI-351", "HD 40307", "GJ 667C", "Kepler-11", "HD 10180"])
def testSimpleKMeansWithStrings(self): # define the workflow, leaving clusters as an empty array for now pfaDocument = titus.prettypfa.jsonNode(''' input: <<INPUT>> output: string cells: clusters(array(record(id: string, center: array(double)))) = [] action: // ifnotnull runs the first block if all four expressions are not null // input.mag has type union(double, null) while mag has type double, etc. ifnotnull(mag: input.mag, dist: input.dist, mass: input.mass, radius: input.radius) model.cluster.closest(new(array(double), mag, dist, mass, radius), clusters, metric.simpleEuclidean)["id"] else "MISSING" '''.replace("<<INPUT>>", open("test/prettypfa/exoplanetsSchema.ppfa").read())) # fill in the clusters with the k-means result if self.kmeansResult is None: self.doKmeans() pfaDocument["cells"]["clusters"]["init"] = self.kmeansResult.pfaValue(self.clusterNames) # build a scoring engine and test it engine, = PFAEngine.fromJson(pfaDocument) self.runEngine(engine)
def testSimpleKMeansWithEnums(self): # same as the above using enums rather than strings and compacted a bit pfaDocument = titus.prettypfa.jsonNode(''' input: <<INPUT>> output: enum([cluster0, cluster1, cluster2, cluster3, cluster4, MISSING], ClusterId) cells: clusters(array(record(id: ClusterId, center: array(double)))) = [] action: ifnotnull(mag: input.mag, dist: input.dist, mass: input.mass, radius: input.radius) model.cluster.closest(new(array(double), mag, dist, mass, radius), clusters, metric.simpleEuclidean)["id"] else ClusterId@MISSING '''.replace("<<INPUT>>", open("test/prettypfa/exoplanetsSchema.ppfa").read())) if self.kmeansResult is None: self.doKmeans() pfaDocument["cells"]["clusters"]["init"] = self.kmeansResult.pfaValue( self.clusterNames) engine, = PFAEngine.fromJson(pfaDocument) self.runEngine(engine)
def testPopulationOfClosestCluster(self): # now that the ifnotnull clause has become three lines long, notice that it needs to be # surrounded by curly brackets and expressions must be separated by semicolons # (the last semicolon is optional: they're delimiters, not line terminators) pfaDocument = titus.prettypfa.jsonNode(''' input: <<INPUT>> output: union(int, null) cells: clusters(array(record(id: string, center: array(double), population: int))) = [] action: ifnotnull(mag: input.mag, dist: input.dist, mass: input.mass, radius: input.radius) model.cluster.closest(new(array(double), mag, dist, mass, radius), clusters, metric.simpleEuclidean)["population"] else null '''.replace("<<INPUT>>", open("test/prettypfa/exoplanetsSchema.ppfa").read())) if self.kmeansResult is None: self.doKmeans() pfaDocument["cells"]["clusters"]["init"] = self.kmeansResult.pfaValue( self.clusterNames, populations=True) engine, = PFAEngine.fromJson(pfaDocument) self.runEngine(engine)
def testDistanceToClosestCluster(self): # now that the ifnotnull clause has become three lines long, notice that it needs to be # surrounded by curly brackets and expressions must be separated by semicolons # (the last semicolon is optional: they're delimiters, not line terminators) pfaDocument = titus.prettypfa.jsonNode(''' input: <<INPUT>> output: union(double, null) cells: clusters(array(record(id: string, center: array(double)))) = [] action: ifnotnull(mag: input.mag, dist: input.dist, mass: input.mass, radius: input.radius) { var datum = new(array(double), mag, dist, mass, radius); var closestCluster = model.cluster.closest(datum, clusters, metric.simpleEuclidean); metric.simpleEuclidean(datum, closestCluster["center"]) } else null '''.replace("<<INPUT>>", TestClustering.recordSchema)) if self.kmeansResult is None: self.doKmeans() pfaDocument["cells"]["clusters"]["init"] = self.kmeansResult.pfaValue( self.clusterNames) engine, = PFAEngine.fromJson(pfaDocument) self.runEngine(engine)
def test_aggregate_knn(mock_save_results, mock_get_results, mock_fetch_data): # get one PFA mock_fetch_data.return_value = fx.inputs_regression(include_integer=True) mock_get_results.return_value = None compute() pfa = mock_save_results.call_args[0][0] def mock_results(job_id): if job_id == '1': return mock.MagicMock(data=pfa, error='') elif job_id == '2': return mock.MagicMock(data=pfa, error='') mock_get_results.side_effect = mock_results aggregate_knn(['1', '2']) pfa_combined = mock_save_results.call_args[0][0] pfa_dict = json.loads(pfa_combined) assert len(pfa_dict['cells']['codebook']['init']) == 2 * len( json.loads(pfa)['cells']['codebook']['init']) # make some prediction with PFA from titus.genpy import PFAEngine engine, = PFAEngine.fromJson(pfa_dict) engine.action({ 'stress_before_test1': 10., 'iq': 10., 'subjectageyears': 70 })
def test_aggregate_kmeans(mock_save_results, mock_load_intermediate_json_results, mock_fetch_data): mock_fetch_data.return_value = fx.inputs_regression( include_categorical=True) mock_load_intermediate_json_results.return_value = [ intermediate_data_1(), intermediate_data_2() ] aggregate_kmeans([1, 2]) pfa_dict = json.loads(mock_save_results.call_args[0][0]) np.testing.assert_allclose( json.loads(pfa_dict['metadata']['centroids']), np.array([[-0.12348661147125002, 0.20922071836500003, 0.0, 1.0], [-0.1852486658437501, 0.09447887226000021, 0.5, 0.0]]), 1e-5) # make some prediction with PFA from titus.genpy import PFAEngine engine, = PFAEngine.fromJson(pfa_dict) ret = engine.action({ 'stress_before_test1': 10., 'iq': 10., 'agegroup': '-50y' }) assert ret == 1
def testKMeansTransform(self): random.seed(12345) numpy.seterr(divide="ignore", invalid="ignore") dataset = numpy.empty((100000, 3), dtype=numpy.dtype(float)) for i, (x, y, z) in enumerate( TestProducerKMeans.data([2, 1, 1], [3, 2, 5], [8, 2, 7], [5, 8, 5], [1, 1, 9])): if i >= dataset.shape[0]: break dataset[i, :] = [x * 10.0, y * 20.0, z * 30.0] trans = Transformation("x/10.0", "y/20.0", "z/30.0") kmeans = KMeans(5, trans.transform(dataset, ["x", "y", "z"])) kmeans.optimize(whileall(moving(), maxIterations(1000))) centers = kmeans.centers() self.assertArrayAlmostEqual(centers[0], [1.01, 1.00, 9.01], places=1) self.assertArrayAlmostEqual(centers[1], [2.00, 1.01, 1.00], places=1) self.assertArrayAlmostEqual(centers[2], [3.01, 2.01, 5.00], places=1) self.assertArrayAlmostEqual(centers[3], [4.99, 8.00, 4.99], places=1) self.assertArrayAlmostEqual(centers[4], [8.02, 2.00, 7.01], places=1) doc = kmeans.pfaDocument("Cluster", ["one", "two", "three", "four", "five"], preprocess=trans.new(AvroArray(AvroDouble()), x="input[0]", y="input[1]", z="input[2]")) # look(doc, maxDepth=10) self.assertArrayAlmostEqual( doc["cells"]["clusters"]["init"][0]["center"], [1.01, 1.00, 9.01], places=1) self.assertArrayAlmostEqual( doc["cells"]["clusters"]["init"][1]["center"], [2.00, 1.01, 1.00], places=1) self.assertArrayAlmostEqual( doc["cells"]["clusters"]["init"][2]["center"], [3.01, 2.01, 5.00], places=1) self.assertArrayAlmostEqual( doc["cells"]["clusters"]["init"][3]["center"], [4.99, 8.00, 4.99], places=1) self.assertArrayAlmostEqual( doc["cells"]["clusters"]["init"][4]["center"], [8.02, 2.00, 7.01], places=1) engine, = PFAEngine.fromJson(doc) self.assertEqual(engine.action([1.01 * 10, 1.00 * 20, 9.01 * 30]), "one") self.assertEqual(engine.action([2.00 * 10, 1.01 * 20, 1.00 * 30]), "two") self.assertEqual(engine.action([3.01 * 10, 2.01 * 20, 5.00 * 30]), "three") self.assertEqual(engine.action([4.99 * 10, 8.00 * 20, 4.99 * 30]), "four") self.assertEqual(engine.action([8.02 * 10, 2.00 * 20, 7.01 * 30]), "five")
def get_engine(json_string): """Creates a PFA engine based on the json_string provided as constructor class. If an engine was already created, this method does nothing""" try: # pylint: disable=unbalanced-tuple-unpacking engine, = PFAEngine.fromJson(json.loads(json_string)) except ValueError as ex: # JSON validation logging.error( "The file provided does not contain a valid JSON document: " + str(ex)) sys.exit(1) except PFASyntaxException as ex: # Syntax validation logging.error( "The file provided does not contain a valid PFA compliant document: " + str(ex)) sys.exit(1) except PFASemanticException as ex: # PFA semantic check logging.error( "The file provided contains inconsistent PFA semantics: " + str(ex)) sys.exit(1) except PFAInitializationException as ex: # Scoring engine check logging.error( "It wasn't possible to build a valid scoring engine from the PFA document: " + str(ex)) sys.exit(1) except Exception as ex: # Other exceptions logging.error("An unknown exception occurred: " + str(ex)) sys.exit(1) # Check that the PFA file uses the "map" method. Other methods are not supported # (because irrelevant) by the MIP if not engine.config.method == "map": logging.error( "The PFA method you used is not supported. Please use the PFA 'map' method" ) sys.exit(1) # Check that the PFA file uses a "record" type as input if not isinstance(engine.config.input, AvroRecord): logging.error("The PFA document must take a record as input parameter. " \ "Each field of the record must describe a variable") sys.exit(1) # Check that the PFA file has a least one input field if not engine.config.input.fields: logging.error( "The PFA document must describe an input record with at least one field" ) sys.exit(1) return engine
def __init__(self, config): # Opening JSON file f = open('./iris-pfa.json', ) # returns JSON object as a dictionary pfaDocument = json.load(f) self.engine, = PFAEngine.fromJson(pfaDocument) # Closing file f.close()
def on_put_model(self,request,response): url = request.path splitted = url.split("/") parentpath = os.path.join('/',*splitted[0:-1]) modelname = splitted[-1] if self.path_exists(url): path,file_id,parent_id,content,engine = self.cache.getFile(url) # overwrite content, restart engine newcontent = request.stream.read(request.content_length or 0) newengine, = PFAEngine.fromJson(json.loads(newcontent)) newengine.begin() self.queries.updateFile(file_id = file_id,content = newcontent) # update cache self.cache.path2file[url] = (url,file_id,parent_id,newcontent,newengine) elif self.path_exists(parentpath) and self.is_path_dir(parentpath): # create model in dir, if parent directory exists, start engine path,file_id,parent_id,content,engine = self.cache.getFile(parentpath) content = request.stream.read(request.content_length or 0) engine, = PFAEngine.fromJson(json.loads(content)) engine.begin() # read the file-rights of the directory read_owner,write_owner,execute_owner,read_group,write_group,execute_group,read_other,write_other,execute_other = self.cache.getFileRights(file_id) owner_id, group_id = self.cache.getFileOwners(file_id) new_file_id = self.queries.insertFile( content = content, name = modelname, parent_id = file_id, owner_id = owner_id, group_id = group_id, read_owner = read_owner, write_owner = write_owner, execute_owner = execute_owner, read_group = read_group, write_group = write_group, execute_group = execute_group, read_other = read_other, write_other = write_other, execute_other = execute_other ) # insert new file in cache self.cache.path2file[url] = (url,new_file_id,parent_id,content,engine) self.cache.filesMetaData[new_file_id] = (owner_id, group_id, read_owner,read_group,read_other,write_owner,write_group,write_other,execute_owner,execute_group,execute_other) response.status = falcon.HTTP_200 else: response.body = "%s not found" % url response.status = falcon.HTTP_404
def get_engine(self): """Creates a PFA engine based on the json_string provided as constructor class. If an engine was already created, this method does nothing""" if not self.engine: # pylint: disable=unbalanced-tuple-unpacking engine, = PFAEngine.fromJson(json.loads(self.json_string)) self.engine = engine return self.engine
def score_model(partition): # Create PFA engine from titus.genpy import PFAEngine engine, = PFAEngine.fromJson(pfa_model.value) # Score Partition data row-by-row score_results = list() for row in partition: score_results.append([engine.action(row.asDict())]) return score_results
def __load_cache(self): fileMetaData = self.cursor.execute("select file_id, owner_id, group_id, read_owner,write_owner,execute_owner,read_group,write_group,execute_group,read_other,write_other,execute_other from s_file;").fetchall() self.filesMetaData = dict([ (fileMetaData[i][0],fileMetaData[i][1:]) for i in range(len(fileMetaData))] ) files = self.cursor.execute("select gr.path,f.file_id,f.parent_id, f.content from s_file f left join global_rights gr on f.file_id = gr.file_id where gr.uid = 1").fetchall() self.path2file = {} for file in files: path,file_id,parent_id,content = file if not content is None: content = str(content) engine, = PFAEngine.fromJson(json.loads(content)) engine.begin() self.path2file[path] = (path,file_id,parent_id,content, engine) else: self.path2file[path] = (path,file_id,parent_id,None,None)
def testKMeans(self): random.seed(12345) numpy.seterr(divide="ignore", invalid="ignore") dataset = numpy.empty((100000, 3), dtype=numpy.dtype(float)) for i, x in enumerate( TestProducerKMeans.data([2, 1, 1], [3, 2, 5], [8, 2, 7], [5, 8, 5], [1, 1, 9])): if i >= dataset.shape[0]: break dataset[i, :] = x kmeans = KMeans(5, dataset) kmeans.optimize(whileall(moving(), maxIterations(1000))) centers = kmeans.centers() self.assertArrayAlmostEqual(centers[0], [1.01, 1.00, 9.01], places=1) self.assertArrayAlmostEqual(centers[1], [2.00, 1.01, 1.00], places=1) self.assertArrayAlmostEqual(centers[2], [3.01, 2.01, 5.00], places=1) self.assertArrayAlmostEqual(centers[3], [4.99, 8.00, 4.99], places=1) self.assertArrayAlmostEqual(centers[4], [8.02, 2.00, 7.01], places=1) doc = kmeans.pfaDocument("Cluster", ["one", "two", "three", "four", "five"]) # look(doc, maxDepth=8) self.assertArrayAlmostEqual( doc["cells"]["clusters"]["init"][0]["center"], [1.01, 1.00, 9.01], places=1) self.assertArrayAlmostEqual( doc["cells"]["clusters"]["init"][1]["center"], [2.00, 1.01, 1.00], places=1) self.assertArrayAlmostEqual( doc["cells"]["clusters"]["init"][2]["center"], [3.01, 2.01, 5.00], places=1) self.assertArrayAlmostEqual( doc["cells"]["clusters"]["init"][3]["center"], [4.99, 8.00, 4.99], places=1) self.assertArrayAlmostEqual( doc["cells"]["clusters"]["init"][4]["center"], [8.02, 2.00, 7.01], places=1) engine, = PFAEngine.fromJson(doc) self.assertEqual(engine.action([1.01, 1.00, 9.01]), "one") self.assertEqual(engine.action([2.00, 1.01, 1.00]), "two") self.assertEqual(engine.action([3.01, 2.01, 5.00]), "three") self.assertEqual(engine.action([4.99, 8.00, 4.99]), "four") self.assertEqual(engine.action([8.02, 2.00, 7.01]), "five")
def test_compute(mock_get_param, mock_save_results, mock_fetch_data): # create mock objects from database mock_get_param.return_value = 2 mock_fetch_data.return_value = fx.inputs_regression( include_categorical=True) compute() pfa = mock_save_results.call_args[0][0] pfa_dict = json.loads(pfa) # make some prediction with PFA from titus.genpy import PFAEngine engine, = PFAEngine.fromJson(pfa_dict) engine.action({'stress_before_test1': 10., 'iq': 10., 'agegroup': '-50y'})
def _predict_pfa(X, types, pfa): engine, = PFAEngine.fromJson(pfa) columns = [c for c, _ in types] pfa_pred = [] for x in X: p = {} for col, e, (_, typ) in zip(columns, x, types): if typ == 'integer': p[col] = int(e) else: p[col] = e pfa_pred.append(engine.action(p)) return np.array(pfa_pred)
def testTree(self): engine, = PFAEngine.fromJson(open("test/hipparcos_numerical_10.pfa")) data = [] for line in open("test/hipparcos_numerical.csv"): ra, dec, dist, mag, absmag, x, y, z, vx, vy, vz, spectrum = line.split(",") data.append({"ra": float(ra), "dec": float(dec), "dist": float(dist), "mag": float(mag), "absmag": float(absmag), "x": float(x), "y": float(y), "z": float(z), "vx": float(vx), "vy": float(vy), "vz": float(vz)}) i = 0 startTime = time.time() for datum in data: engine.action(datum) i += 1 if i % 5000 == 0: print "{0}, {1}".format(time.time() - startTime, i)
def on_post_dir(self,request,response): url = request.path try: name = request.get_header("filename") username = request.get_header("username") if not name.endswith(".pfa"): raise Exception("not supported file: %s" % name) path,file_id,parent_id,content,engine = self.cache.getFile(url) # post model to directory if it does not exist if not self.path_exists(os.path.join(url,name)): content = request.stream.read(request.content_length or 0) engine, = PFAEngine.fromJson(json.loads(content)) engine.begin() # read the file-rights of the directory read_owner,write_owner,execute_owner,read_group,write_group,execute_group,read_other,write_other,execute_other = self.cache.getFileRights(file_id) owner_id, group_id = self.cache.getFileOwners(file_id) new_file_id = self.queries.insertFile( content = content, name = name, parent_id = file_id, owner_id = owner_id, group_id = group_id, read_owner = read_owner, write_owner = write_owner, execute_owner = execute_owner, read_group = read_group, write_group = write_group, execute_group = execute_group, read_other = read_other, write_other = write_other, execute_other = execute_other ) # insert new file in cache newfilepath = os.path.join(url,name) self.cache.path2file[newfilepath] = (newfilepath,new_file_id,parent_id,content,engine) self.cache.filesMetaData[new_file_id] = (owner_id, group_id, read_owner,read_group,read_other,write_owner,write_group,write_other,execute_owner,execute_group,execute_other) response.status = falcon.HTTP_200 else: modelpath = os.path.join(url,name) response.body = "%s already exists. try PUT %s" % (modelpath,modelpath) response.status = falcon.HTTP_409 # Conflict, model already exists except Exception, e: response.body = "%s an error occured, %s" % (url,e) response.status = falcon.HTTP_505
def test_compute_regression(mock_save_results, mock_get_results, mock_fetch_data): # create mock objects from database mock_fetch_data.return_value = fx.inputs_regression(include_integer=True) mock_get_results.return_value = None compute() pfa = mock_save_results.call_args[0][0] pfa_dict = json.loads(pfa) # make some prediction with PFA from titus.genpy import PFAEngine engine, = PFAEngine.fromJson(pfa_dict) engine.action({ 'stress_before_test1': 10., 'iq': 10., 'subjectageyears': 70 })
def test_main_distributed(mock_parameters, mock_save_results, mock_get_results, mock_fetch_data, method, name): mock_parameters.return_value = {'type': method} mock_fetch_data.return_value = fx.inputs_regression() mock_get_results.return_value = None # run intermediate job main(job_id=None, generate_pfa=False) mock_get_results.return_value = mock.MagicMock(data=mock_save_results.call_args[0][0]) # generate PFA main(job_id='1', generate_pfa=True) pfa = mock_save_results.call_args_list[1][0][0] pfa_dict = json.loads(pfa) # make some prediction with PFA from titus.genpy import PFAEngine engine, = PFAEngine.fromJson(pfa_dict) engine.action({'stress_before_test1': 10., 'iq': 10., 'agegroup': '-50y'})
def testKMeansTransform(self): random.seed(12345) numpy.seterr(divide="ignore", invalid="ignore") dataset = numpy.empty((100000, 3), dtype=numpy.dtype(float)) for i, (x, y, z) in enumerate(TestProducerKMeans.data([1, 1, 1], [3, 2, 5], [8, 2, 7], [5, 8, 5], [1, 1, 9])): if i >= dataset.shape[0]: break dataset[i,:] = [x * 10.0, y * 20.0, z * 30.0] trans = Transformation("x/10.0", "y/20.0", "z/30.0") kmeans = KMeans(5, trans.transform(dataset, ["x", "y", "z"])) kmeans.optimize(whileall(moving(), maxIterations(1000))) centers = kmeans.centers() self.assertArrayAlmostEqual(centers[0], [1.00, 1.01, 1.00], places=1) self.assertArrayAlmostEqual(centers[1], [1.01, 1.00, 9.01], places=1) self.assertArrayAlmostEqual(centers[2], [3.01, 2.01, 5.00], places=1) self.assertArrayAlmostEqual(centers[3], [4.99, 8.00, 4.99], places=1) self.assertArrayAlmostEqual(centers[4], [8.02, 2.00, 7.01], places=1) doc = kmeans.pfaDocument("Cluster", ["one", "two", "three", "four", "five"], preprocess=trans.new(AvroArray(AvroDouble()), x="input[0]", y="input[1]", z="input[2]")) # look(doc, maxDepth=10) self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][0]["center"], [1.00, 1.01, 1.00], places=2) self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][1]["center"], [1.01, 1.00, 9.01], places=2) self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][2]["center"], [3.01, 2.01, 5.00], places=2) self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][3]["center"], [4.99, 8.00, 4.99], places=2) self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][4]["center"], [8.02, 2.00, 7.01], places=2) engine, = PFAEngine.fromJson(doc) self.assertEqual(engine.action([1.00 * 10, 1.01 * 20, 1.00 * 30]), "one") self.assertEqual(engine.action([1.01 * 10, 1.00 * 20, 9.01 * 30]), "two") self.assertEqual(engine.action([3.01 * 10, 2.01 * 20, 5.00 * 30]), "three") self.assertEqual(engine.action([4.99 * 10, 8.00 * 20, 4.99 * 30]), "four") self.assertEqual(engine.action([8.02 * 10, 2.00 * 20, 7.01 * 30]), "five")
def testSimpleKMeansEmitExample(self): # the emit method allows us to ignore the "else" clause in ifnotnull pfaDocument = titus.prettypfa.jsonNode(''' input: <<INPUT>> output: string method: emit cells: clusters(array(record(id: string, center: array(double)))) = [] action: ifnotnull(mag: input.mag, dist: input.dist, mass: input.mass, radius: input.radius) emit(model.cluster.closest(new(array(double), mag, dist, mass, radius), clusters, metric.simpleEuclidean)["id"]) '''.replace("<<INPUT>>", open("test/prettypfa/exoplanetsSchema.ppfa").read())) if self.kmeansResult is None: self.doKmeans() pfaDocument["cells"]["clusters"]["init"] = self.kmeansResult.pfaValue(self.clusterNames) engine, = PFAEngine.fromJson(pfaDocument) self.runEngine(engine)
def testSimpleKMeansWithEnums(self): # same as the above using enums rather than strings and compacted a bit pfaDocument = titus.prettypfa.jsonNode(''' input: <<INPUT>> output: enum([cluster0, cluster1, cluster2, cluster3, cluster4, MISSING], ClusterId) cells: clusters(array(record(id: ClusterId, center: array(double)))) = [] action: ifnotnull(mag: input.mag, dist: input.dist, mass: input.mass, radius: input.radius) model.cluster.closest(new(array(double), mag, dist, mass, radius), clusters, metric.simpleEuclidean)["id"] else ClusterId@MISSING '''.replace("<<INPUT>>", open("test/prettypfa/exoplanetsSchema.ppfa").read())) if self.kmeansResult is None: self.doKmeans() pfaDocument["cells"]["clusters"]["init"] = self.kmeansResult.pfaValue(self.clusterNames) engine, = PFAEngine.fromJson(pfaDocument) self.runEngine(engine)
def testPopulationOfClosestCluster(self): # now that the ifnotnull clause has become three lines long, notice that it needs to be # surrounded by curly brackets and expressions must be separated by semicolons # (the last semicolon is optional: they're delimiters, not line terminators) pfaDocument = titus.prettypfa.jsonNode(''' input: <<INPUT>> output: union(int, null) cells: clusters(array(record(id: string, center: array(double), population: int))) = [] action: ifnotnull(mag: input.mag, dist: input.dist, mass: input.mass, radius: input.radius) model.cluster.closest(new(array(double), mag, dist, mass, radius), clusters, metric.simpleEuclidean)["population"] else null '''.replace("<<INPUT>>", open("test/prettypfa/exoplanetsSchema.ppfa").read())) if self.kmeansResult is None: self.doKmeans() pfaDocument["cells"]["clusters"]["init"] = self.kmeansResult.pfaValue(self.clusterNames, populations=True) engine, = PFAEngine.fromJson(pfaDocument) self.runEngine(engine)
def testSimpleKMeansEmitExample(self): # the emit method allows us to ignore the "else" clause in ifnotnull pfaDocument = titus.prettypfa.jsonNode(''' input: <<INPUT>> output: string method: emit cells: clusters(array(record(id: string, center: array(double)))) = [] action: ifnotnull(mag: input.mag, dist: input.dist, mass: input.mass, radius: input.radius) emit(model.cluster.closest(new(array(double), mag, dist, mass, radius), clusters, metric.simpleEuclidean)["id"]) '''.replace("<<INPUT>>", TestClustering.recordSchema)) if self.kmeansResult is None: self.doKmeans() pfaDocument["cells"]["clusters"]["init"] = self.kmeansResult.pfaValue( self.clusterNames) engine, = PFAEngine.fromJson(pfaDocument) self.runEngine(engine)
def testHistogram2d(self): pfaDocument = titus.prettypfa.jsonNode(''' input: <<INPUT>> output: Histogram cells: histogram(record(Histogram, xnumbins: int, xlow: double, xhigh: double, ynumbins: int, ylow: double, yhigh: double, values: array(array(double)))) = { xnumbins: 10, xlow: 0.0, xhigh: 3.0, ynumbins: 10, ylow: 0.0, yhigh: 3.0, values: [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]} method: emit action: ifnotnull(mass: input.mass, radius: input.radius) emit(histogram to fcn(old: Histogram -> Histogram) stat.sample.fillHistogram2d(mass, radius, 1.0, old)) '''.replace("<<INPUT>>", open("test/prettypfa/exoplanetsSchema.ppfa").read()), check=False, lineNumbers=False) engine, = PFAEngine.fromJson(pfaDocument) self.assertEqual(self.runEngine(engine), {"values": [[6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [3.0, 33.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 8.0, 118.0, 28.0, 6.0, 0.0, 1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 33.0, 184.0, 72.0, 25.0, 8.0, 4.0, 0.0, 1.0], [0.0, 0.0, 1.0, 12.0, 45.0, 34.0, 20.0, 3.0, 4.0, 1.0], [0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 4.0, 4.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]], "xhigh": 3.0, "yhigh": 3.0, "ynumbins": 10, "xnumbins": 10, "ylow": 0.0, "xlow": 0.0})
def testKMeans(self): random.seed(12345) numpy.seterr(divide="ignore", invalid="ignore") dataset = numpy.empty((100000, 3), dtype=numpy.dtype(float)) for i, x in enumerate(TestProducerKMeans.data([1, 1, 1], [3, 2, 5], [8, 2, 7], [5, 8, 5], [1, 1, 9])): if i >= dataset.shape[0]: break dataset[i,:] = x kmeans = KMeans(5, dataset) kmeans.optimize(whileall(moving(), maxIterations(1000))) centers = kmeans.centers() self.assertArrayAlmostEqual(centers[0], [1.00, 1.01, 1.00], places=2) self.assertArrayAlmostEqual(centers[1], [1.01, 1.00, 9.01], places=2) self.assertArrayAlmostEqual(centers[2], [3.01, 2.01, 5.00], places=2) self.assertArrayAlmostEqual(centers[3], [4.99, 8.00, 4.99], places=2) self.assertArrayAlmostEqual(centers[4], [8.02, 2.00, 7.01], places=2) doc = kmeans.pfaDocument("Cluster", ["one", "two", "three", "four", "five"]) # look(doc, maxDepth=8) self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][0]["center"], [1.00, 1.01, 1.00], places=2) self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][1]["center"], [1.01, 1.00, 9.01], places=2) self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][2]["center"], [3.01, 2.01, 5.00], places=2) self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][3]["center"], [4.99, 8.00, 4.99], places=2) self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][4]["center"], [8.02, 2.00, 7.01], places=2) engine, = PFAEngine.fromJson(doc) self.assertEqual(engine.action([1.00, 1.01, 1.00]), "one") self.assertEqual(engine.action([1.01, 1.00, 9.01]), "two") self.assertEqual(engine.action([3.01, 2.01, 5.00]), "three") self.assertEqual(engine.action([4.99, 8.00, 4.99]), "four") self.assertEqual(engine.action([8.02, 2.00, 7.01]), "five")
def testHistogram2d(self): pfaDocument = titus.prettypfa.jsonNode(''' input: <<INPUT>> output: Histogram cells: histogram(record(Histogram, xnumbins: int, xlow: double, xhigh: double, ynumbins: int, ylow: double, yhigh: double, values: array(array(double)))) = { xnumbins: 10, xlow: 0.0, xhigh: 3.0, ynumbins: 10, ylow: 0.0, yhigh: 3.0, values: [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]} method: emit action: ifnotnull(mass: input.mass, radius: input.radius) emit(histogram to fcn(old: Histogram -> Histogram) stat.sample.fillHistogram2d(mass, radius, 1.0, old)) '''.replace("<<INPUT>>", TestClustering.recordSchema), check=False, lineNumbers=False) engine, = PFAEngine.fromJson(pfaDocument) self.assertEqual( self.runEngine(engine), { "values": [[6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [3.0, 33.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 8.0, 118.0, 28.0, 6.0, 0.0, 1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 33.0, 184.0, 72.0, 25.0, 8.0, 4.0, 0.0, 1.0], [0.0, 0.0, 1.0, 12.0, 45.0, 34.0, 20.0, 3.0, 4.0, 1.0], [0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 4.0, 4.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]], "xhigh": 3.0, "yhigh": 3.0, "ynumbins": 10, "xnumbins": 10, "ylow": 0.0, "xlow": 0.0 })
inputFile, = sys.argv[1:] # Failures that I'm giving up on: # # prob.dist.binomialQF({"p": 0.99999, "prob": 1e-05, "size": 1}) should be 1, is 0 (rounding in count) # {"p": 0.9, "prob": 0.1, "size": 1} should be 1, is 0 (same reason) # prob.dist.hypergeometricPDF \ # prob.dist.hypergeometricCDF } many errors! and the QF has a long or infinite loop # prob.dist.hypergeometricQF / # prob.dist.negativeBinomialPDF({"x": 17, "prob": 0.9, "size": 100}) should be 0.00245, is 0.02715 # {"x": 100, "prob": 0.1, "size": 17} should be 0.00245, is 0.00462 # {"x": 100, "prob": 0.5, "size": 100} should be 5.7e42, is 0.02817 # prob.dist.negativeBinomialQF has many errors (though not as many as the hypergeometric) for counter, example in enumerate(getExamples(open(inputFile))): engine, = PFAEngine.fromJson(example["engine"]) if example["function"] in ("prob.dist.binomialQF", "prob.dist.hypergeometricPDF", "prob.dist.hypergeometricCDF", "prob.dist.hypergeometricQF", "prob.dist.negativeBinomialPDF", "prob.dist.negativeBinomialQF"): continue functionWritten = False def maybeWriteFunction(functionWritten): if not functionWritten: print "%4d %-20s%s" % (counter + 1, example["function"], json.dumps(example["engine"])) return True for trial in example["trials"]: trialWritten = False try: result = {"success": convertOut(engine.action(trial["sample"]), engine.outputType.jsonNode(set()), dobase64=True)} except PFARuntimeException as err:
server_address = sys.argv[1] pfa_model = sys.argv[2] kafka_topic_in = sys.argv[3] kafka_topic_out = sys.argv[4] print "================" print "Kafka Scoring" print "Bootstrap server:\t%s" % server_address print "PFA model:\t\t%s" % pfa_model print "Topic consumed:\t\t%s" % kafka_topic_in print "Topic produced:\t\t%s" % kafka_topic_out # Create PFA engine try: pfa_engine, = PFAEngine.fromJson(json.load(open(pfa_model))) except Exception: sys.exit("Failed to create scoring engine") # Initialize PFA engine pfa_engine.begin() # Configure Kafka connection try: consumer = KafkaConsumer(kafka_topic_in, bootstrap_servers=server_address) producer = KafkaProducer(bootstrap_servers=server_address) except Exception: sys.exit("Failed to configure Kafka") count = 0 bad_data = 0
def testNormalized(self): # for k-means on normalized data, we have to explicitly normalize, # re-compute the clusters, and put the same transformation into PFA # get a dataset for the k-means generator dataset = [] for record in DataFileReader(open("test/prettypfa/exoplanets.avro", "r"), DatumReader()): mag, dist, mass, radius = record.get("mag"), record.get("dist"), record.get("mass"), record.get("radius") if mag is not None and dist is not None and mass is not None and radius is not None: dataset.append([mag, dist, mass, radius]) dataset = numpy.array(dataset) # compute the normalization (1st to 99th percentile instead of strict min/max) maglow, maghigh = numpy.percentile(dataset[:,0], [1, 99]) distlow, disthigh = numpy.percentile(dataset[:,1], [1, 99]) masslow, masshigh = numpy.percentile(dataset[:,2], [1, 99]) radiuslow, radiushigh = numpy.percentile(dataset[:,3], [1, 99]) # transform the data normalized = numpy.empty_like(dataset) normalized[:,0] = (dataset[:,0] - maglow) / (maghigh - maglow) normalized[:,1] = (dataset[:,1] - distlow) / (disthigh - distlow) normalized[:,2] = (dataset[:,2] - masslow) / (masshigh - masslow) normalized[:,3] = (dataset[:,3] - radiuslow) / (radiushigh - radiuslow) # set up and run the k-means generator kmeansResult = KMeans(len(self.clusterNames), normalized) kmeansResult.optimize(whileall(moving(), maxIterations(1000))) # put the transformation into PFA by string replacement # this re.subs will replace French quotes (<< >>) with Python variable values inputSchema = open("test/prettypfa/exoplanetsSchema.ppfa").read() namesToSubstitute = locals() pfaDocument = titus.prettypfa.jsonNode( re.sub("<<[A-Za-z0-9]+>>", lambda x: str(namesToSubstitute[x.group().lstrip("<<").rstrip(">>")]), ''' input: <<inputSchema>> output: string cells: clusters(array(record(id: string, center: array(double)))) = [] action: ifnotnull(mag: input.mag, dist: input.dist, mass: input.mass, radius: input.radius) { var normmag = (mag - <<maglow>>) / (<<maghigh>> - <<maglow>>); var normdist = (dist - <<distlow>>) / (<<disthigh>> - <<distlow>>); var normmass = (mass - <<masslow>>) / (<<masshigh>> - <<masslow>>); var normradius = (radius - <<radiuslow>>) / (<<radiushigh>> - <<radiuslow>>); model.cluster.closest(new(array(double), normmag, normdist, normmass, normradius), clusters, metric.simpleEuclidean)["id"] } else "MISSING" ''')) # now put the clusters in and run the scoring engine pfaDocument["cells"]["clusters"]["init"] = kmeansResult.pfaValue(self.clusterNames) engine, = PFAEngine.fromJson(pfaDocument) self.runEngine(engine)
def post(self, model_name): pfa_model = db.get_model(model_name) pfa_engine, = PFAEngine.fromJson(pfa_model) data_to_score = tornado.escape.json_decode(self.request.body) db.update_usage_stats(model_name) self.write(str(pfa_engine.action(data_to_score)))
inputFile, = sys.argv[1:] # Failures that I'm giving up on: # # prob.dist.binomialQF({"p": 0.99999, "prob": 1e-05, "size": 1}) should be 1, is 0 (rounding in count) # {"p": 0.9, "prob": 0.1, "size": 1} should be 1, is 0 (same reason) # prob.dist.hypergeometricPDF \ # prob.dist.hypergeometricCDF } many errors! and the QF has a long or infinite loop # prob.dist.hypergeometricQF / # prob.dist.negativeBinomialPDF({"x": 17, "prob": 0.9, "size": 100}) should be 0.00245, is 0.02715 # {"x": 100, "prob": 0.1, "size": 17} should be 0.00245, is 0.00462 # {"x": 100, "prob": 0.5, "size": 100} should be 5.7e42, is 0.02817 # prob.dist.negativeBinomialQF has many errors (though not as many as the hypergeometric) for counter, example in enumerate(getExamples(open(inputFile))): engine, = PFAEngine.fromJson(example["engine"]) if example["function"] in ("prob.dist.binomialQF", "prob.dist.hypergeometricPDF", "prob.dist.hypergeometricCDF", "prob.dist.hypergeometricQF", "prob.dist.negativeBinomialPDF", "prob.dist.negativeBinomialQF"): continue functionWritten = False def maybeWriteFunction(functionWritten): if not functionWritten: print "%4d %-20s%s" % (counter + 1, example["function"], json.dumps(example["engine"]))