Exemple #1
0
    def testCartMustBuildCategoricalCategorical(self):
        random.seed(12345)
        numpy.seterr(divide="ignore", invalid="ignore")
        dataset = Dataset.fromIterable(((a, b, c) for (x, y, z, a, b, c) in TestProducerCart.data()), 100000, ("a", "b", "c"))

        tree = TreeNode.fromWholeDataset(dataset, "c")
        tree.splitMaxDepth(2)

        doc = tree.pfaDocument({"type": "record", "name": "Datum", "fields": [{"name": "a", "type": "string"}, {"name": "b", "type": "string"}]}, "TreeNode")
        # look(doc, maxDepth=8)

        self.assertEqual(doc["cells"]["tree"]["init"]["field"], "a")
        self.assertEqual(doc["cells"]["tree"]["init"]["value"], ["A0", "A1", "A2", "A3"])
        self.assertEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["field"], "b")
        self.assertEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["value"], ["B6", "B8"])
        self.assertEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["pass"]["string"], "C6")
        self.assertEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["fail"]["string"], "C3")
        self.assertEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["field"], "b")
        self.assertEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["value"], ["B0"])
        self.assertEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["pass"]["string"], "C0")
        self.assertEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["fail"]["string"], "C0")

        engine, = PFAEngine.fromJson(doc)
        self.assertEqual(engine.action({"a": "A1", "b": "B6"}), "C6")
        self.assertEqual(engine.action({"a": "A1", "b": "B2"}), "C3")
        self.assertEqual(engine.action({"a": "A5", "b": "B0"}), "C0")
        self.assertEqual(engine.action({"a": "A5", "b": "B4"}), "C0")

        doc = tree.pfaDocument(
            {"type": "record", "name": "Datum", "fields": [{"name": "a", "type": "string"}, {"name": "b", "type": "string"}]},
            "TreeNode",
            nodeScores=True, datasetSize=True, predictandDistribution=True, predictandUnique=True, entropy=True, gain=True)
        # look(doc, maxDepth=8)
        engine, = PFAEngine.fromJson(doc)
Exemple #2
0
    def testCartMustBuildCategoricalCategorical(self):
        random.seed(12345)
        numpy.seterr(divide="ignore", invalid="ignore")
        dataset = Dataset.fromIterable(((a, b, c) for (x, y, z, a, b, c) in TestProducerCart.data()), 100000, ("a", "b", "c"))

        tree = TreeNode.fromWholeDataset(dataset, "c")
        tree.splitMaxDepth(2)

        doc = tree.pfaDocument({"type": "record", "name": "Datum", "fields": [{"name": "a", "type": "string"}, {"name": "b", "type": "string"}]}, "TreeNode")
        # look(doc, maxDepth=8)

        self.assertEqual(doc["cells"]["tree"]["init"]["field"], "a")
        self.assertEqual(doc["cells"]["tree"]["init"]["value"], ["A0", "A1", "A2", "A3"])
        self.assertEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["field"], "b")
        self.assertEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["value"], ["B6", "B8"])
        self.assertEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["pass"]["string"], "C6")
        self.assertEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["fail"]["string"], "C3")
        self.assertEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["field"], "b")
        self.assertEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["value"], ["B0"])
        self.assertEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["pass"]["string"], "C0")
        self.assertEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["fail"]["string"], "C0")

        engine, = PFAEngine.fromJson(doc)
        self.assertEqual(engine.action({"a": "A1", "b": "B6"}), "C6")
        self.assertEqual(engine.action({"a": "A1", "b": "B2"}), "C3")
        self.assertEqual(engine.action({"a": "A5", "b": "B0"}), "C0")
        self.assertEqual(engine.action({"a": "A5", "b": "B4"}), "C0")

        doc = tree.pfaDocument(
            {"type": "record", "name": "Datum", "fields": [{"name": "a", "type": "string"}, {"name": "b", "type": "string"}]},
            "TreeNode",
            nodeScores=True, datasetSize=True, predictandDistribution=True, predictandUnique=True, entropy=True, gain=True)
        # look(doc, maxDepth=8)
        engine, = PFAEngine.fromJson(doc)
Exemple #3
0
    def testCartMustBuildNumericalCategorical(self):
        random.seed(12345)
        numpy.seterr(divide="ignore", invalid="ignore")
        dataset = Dataset.fromIterable(((x, y, c) for (x, y, z, a, b, c) in TestProducerCart.data()), 100000, ("x", "y", "c"))

        tree = TreeNode.fromWholeDataset(dataset, "c")
        tree.splitMaxDepth(2)

        doc = tree.pfaDocument({"type": "record", "name": "Datum", "fields": [{"name": "x", "type": "double"}, {"name": "y", "type": "double"}]}, "TreeNode")
        # look(doc, maxDepth=8)

        self.assertEqual(doc["cells"]["tree"]["init"]["field"], "x")
        self.assertAlmostEqual(doc["cells"]["tree"]["init"]["value"], 4.00, places=2)
        self.assertEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["field"], "y")
        self.assertAlmostEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["value"], 6.00, places=2)
        self.assertEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["pass"]["string"], "C3")
        self.assertEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["fail"]["string"], "C6")
        self.assertEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["field"], "y")
        self.assertAlmostEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["value"], 2.00, places=2)
        self.assertEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["pass"]["string"], "C0")
        self.assertEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["fail"]["string"], "C0")

        engine, = PFAEngine.fromJson(doc)
        self.assertEqual(engine.action({"x": 2.0, "y": 3.0}), "C3")
        self.assertEqual(engine.action({"x": 2.0, "y": 8.0}), "C6")
        self.assertEqual(engine.action({"x": 7.0, "y": 1.0}), "C0")
        self.assertEqual(engine.action({"x": 7.0, "y": 5.0}), "C0")

        doc = tree.pfaDocument(
            {"type": "record", "name": "Datum", "fields": [{"name": "x", "type": "double"}, {"name": "y", "type": "double"}]},
            "TreeNode",
            nodeScores=True, datasetSize=True, predictandDistribution=True, predictandUnique=True, entropy=True, gain=True)
        # look(doc, maxDepth=8)
        engine, = PFAEngine.fromJson(doc)
Exemple #4
0
    def testCartMustBuildNumericalCategorical(self):
        random.seed(12345)
        numpy.seterr(divide="ignore", invalid="ignore")
        dataset = Dataset.fromIterable(((x, y, c) for (x, y, z, a, b, c) in TestProducerCart.data()), 100000, ("x", "y", "c"))

        tree = TreeNode.fromWholeDataset(dataset, "c")
        tree.splitMaxDepth(2)

        doc = tree.pfaDocument({"type": "record", "name": "Datum", "fields": [{"name": "x", "type": "double"}, {"name": "y", "type": "double"}]}, "TreeNode")
        # look(doc, maxDepth=8)

        self.assertEqual(doc["cells"]["tree"]["init"]["field"], "x")
        self.assertAlmostEqual(doc["cells"]["tree"]["init"]["value"], 4.00, places=2)
        self.assertEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["field"], "y")
        self.assertAlmostEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["value"], 6.00, places=2)
        self.assertEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["pass"]["string"], "C3")
        self.assertEqual(doc["cells"]["tree"]["init"]["pass"]["TreeNode"]["fail"]["string"], "C6")
        self.assertEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["field"], "y")
        self.assertAlmostEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["value"], 2.00, places=2)
        self.assertEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["pass"]["string"], "C0")
        self.assertEqual(doc["cells"]["tree"]["init"]["fail"]["TreeNode"]["fail"]["string"], "C0")

        engine, = PFAEngine.fromJson(doc)
        self.assertEqual(engine.action({"x": 2.0, "y": 3.0}), "C3")
        self.assertEqual(engine.action({"x": 2.0, "y": 8.0}), "C6")
        self.assertEqual(engine.action({"x": 7.0, "y": 1.0}), "C0")
        self.assertEqual(engine.action({"x": 7.0, "y": 5.0}), "C0")

        doc = tree.pfaDocument(
            {"type": "record", "name": "Datum", "fields": [{"name": "x", "type": "double"}, {"name": "y", "type": "double"}]},
            "TreeNode",
            nodeScores=True, datasetSize=True, predictandDistribution=True, predictandUnique=True, entropy=True, gain=True)
        # look(doc, maxDepth=8)
        engine, = PFAEngine.fromJson(doc)
Exemple #5
0
 def fromPFA(cls, pfaDoc, ext):
     from titus.genpy import PFAEngine
     if ext in (".yml", ".yaml"):
         engine = PFAEngine.fromYaml(pfaDoc)[0]
     else:
         engine = PFAEngine.fromJson(pfaDoc)[0]
     return cls(engine, "PFA")
Exemple #6
0
def test_main_classification(mock_parameters, mock_save_results,
                             mock_get_results, mock_fetch_data, method, name):
    # create mock objects from database
    mock_parameters.return_value = {'type': method}
    mock_fetch_data.return_value = fx.inputs_classification(
        include_categorical=True)
    mock_get_results.return_value = None

    main(job_id=None, generate_pfa=True)

    pfa = mock_save_results.call_args[0][0]
    pfa_dict = json.loads(pfa)

    # NOTE: this does not work due to bug in jsonpickle
    # deserialize model
    # estimator = deserialize_sklearn_estimator(pfa_dict['metadata']['estimator'])
    # assert estimator.__class__.__name__ == name

    # make some prediction with PFA
    from titus.genpy import PFAEngine
    engine, = PFAEngine.fromJson(pfa_dict)
    engine.action({
        'stress_before_test1': 10.,
        'iq': 10.,
        'agegroup': '50-59y'
    })
Exemple #7
0
    def testTop5List(self):
        pfaDocument = titus.prettypfa.jsonNode('''
input: <<INPUT>>
output: array(string)

cells:
  mostPlanets(array(Star)) = []

action:
  // update the list of stars, keeping only the 5 with the most planets
  var currentList =
    mostPlanets to fcn(old: array(Star) -> array(Star))
        stat.sample.topN(input, old, 5, u.morePlanets);

  // map this top 5 list of stars to their names
  a.map(currentList, fcn(x: Star -> string) x.name)

fcns:
  // our comparison function
  morePlanets = fcn(x: Star, y: Star -> boolean) a.len(x.planets) < a.len(y.planets)

'''.replace("<<INPUT>>", open("test/prettypfa/exoplanetsSchema.ppfa").read()), check=False, lineNumbers=False)

        engine, = PFAEngine.fromJson(pfaDocument)
        self.assertEqual(self.runEngine(engine), ["KOI-351", "HD 40307", "GJ 667C", "Kepler-11", "HD 10180"])
Exemple #8
0
    def testTree(self):
        engine, = PFAEngine.fromJson(open("test/hipparcos_numerical_10.pfa"))

        data = []
        for line in open("test/hipparcos_numerical.csv"):
            ra, dec, dist, mag, absmag, x, y, z, vx, vy, vz, spectrum = line.split(
                ",")
            data.append({
                "ra": float(ra),
                "dec": float(dec),
                "dist": float(dist),
                "mag": float(mag),
                "absmag": float(absmag),
                "x": float(x),
                "y": float(y),
                "z": float(z),
                "vx": float(vx),
                "vy": float(vy),
                "vz": float(vz)
            })

        i = 0
        startTime = time.time()
        for datum in data:
            engine.action(datum)
            i += 1
            if i % 5000 == 0:
                print "{0}, {1}".format(time.time() - startTime, i)
Exemple #9
0
    def testSimpleKMeansWithStrings(self):
        # define the workflow, leaving clusters as an empty array for now
        pfaDocument = titus.prettypfa.jsonNode('''
input: <<INPUT>>
output: string

cells:
    clusters(array(record(id: string, center: array(double)))) = []

action:
    // ifnotnull runs the first block if all four expressions are not null
    // input.mag has type union(double, null) while mag has type double, etc.

    ifnotnull(mag: input.mag,
              dist: input.dist,
              mass: input.mass,
              radius: input.radius)
        model.cluster.closest(new(array(double), mag, dist, mass, radius),
                              clusters,
                              metric.simpleEuclidean)["id"]
    else
        "MISSING"

'''.replace("<<INPUT>>",
            open("test/prettypfa/exoplanetsSchema.ppfa").read()))

        # fill in the clusters with the k-means result
        if self.kmeansResult is None: self.doKmeans()
        pfaDocument["cells"]["clusters"]["init"] = self.kmeansResult.pfaValue(
            self.clusterNames)

        # build a scoring engine and test it
        engine, = PFAEngine.fromJson(pfaDocument)
        self.runEngine(engine)
Exemple #10
0
    def testTop5List(self):
        pfaDocument = titus.prettypfa.jsonNode('''
input: <<INPUT>>
output: array(string)

cells:
  mostPlanets(array(Star)) = []

action:
  // update the list of stars, keeping only the 5 with the most planets
  var currentList =
    mostPlanets to fcn(old: array(Star) -> array(Star))
        stat.sample.topN(input, old, 5, u.morePlanets);

  // map this top 5 list of stars to their names
  a.map(currentList, fcn(x: Star -> string) x.name)

fcns:
  // our comparison function
  morePlanets = fcn(x: Star, y: Star -> boolean) a.len(x.planets) < a.len(y.planets)

'''.replace("<<INPUT>>", TestClustering.recordSchema),
                                               check=False,
                                               lineNumbers=False)

        engine, = PFAEngine.fromJson(pfaDocument)
        self.assertEqual(
            self.runEngine(engine),
            ["KOI-351", "HD 40307", "GJ 667C", "Kepler-11", "HD 10180"])
Exemple #11
0
    def testSimpleKMeansWithStrings(self):
        # define the workflow, leaving clusters as an empty array for now
        pfaDocument = titus.prettypfa.jsonNode('''
input: <<INPUT>>
output: string

cells:
    clusters(array(record(id: string, center: array(double)))) = []

action:
    // ifnotnull runs the first block if all four expressions are not null
    // input.mag has type union(double, null) while mag has type double, etc.

    ifnotnull(mag: input.mag,
              dist: input.dist,
              mass: input.mass,
              radius: input.radius)
        model.cluster.closest(new(array(double), mag, dist, mass, radius),
                              clusters,
                              metric.simpleEuclidean)["id"]
    else
        "MISSING"

'''.replace("<<INPUT>>", open("test/prettypfa/exoplanetsSchema.ppfa").read()))

        # fill in the clusters with the k-means result
        if self.kmeansResult is None: self.doKmeans()
        pfaDocument["cells"]["clusters"]["init"] = self.kmeansResult.pfaValue(self.clusterNames)

        # build a scoring engine and test it
        engine, = PFAEngine.fromJson(pfaDocument)
        self.runEngine(engine)
Exemple #12
0
    def testSimpleKMeansWithEnums(self):
        # same as the above using enums rather than strings and compacted a bit
        pfaDocument = titus.prettypfa.jsonNode('''
input: <<INPUT>>
output: enum([cluster0, cluster1, cluster2, cluster3, cluster4, MISSING], ClusterId)

cells:
    clusters(array(record(id: ClusterId, center: array(double)))) = []

action:
    ifnotnull(mag: input.mag, dist: input.dist, mass: input.mass, radius: input.radius)
        model.cluster.closest(new(array(double), mag, dist, mass, radius),
                              clusters,
                              metric.simpleEuclidean)["id"]
    else
        ClusterId@MISSING

'''.replace("<<INPUT>>",
            open("test/prettypfa/exoplanetsSchema.ppfa").read()))

        if self.kmeansResult is None: self.doKmeans()
        pfaDocument["cells"]["clusters"]["init"] = self.kmeansResult.pfaValue(
            self.clusterNames)
        engine, = PFAEngine.fromJson(pfaDocument)
        self.runEngine(engine)
Exemple #13
0
    def testPopulationOfClosestCluster(self):
        # now that the ifnotnull clause has become three lines long, notice that it needs to be
        # surrounded by curly brackets and expressions must be separated by semicolons
        # (the last semicolon is optional: they're delimiters, not line terminators)
        pfaDocument = titus.prettypfa.jsonNode('''
input: <<INPUT>>
output: union(int, null)
cells:
    clusters(array(record(id: string, center: array(double), population: int))) = []

action:
    ifnotnull(mag: input.mag, dist: input.dist, mass: input.mass, radius: input.radius)
        model.cluster.closest(new(array(double), mag, dist, mass, radius),
                              clusters,
                              metric.simpleEuclidean)["population"]
    else
        null
'''.replace("<<INPUT>>",
            open("test/prettypfa/exoplanetsSchema.ppfa").read()))

        if self.kmeansResult is None: self.doKmeans()
        pfaDocument["cells"]["clusters"]["init"] = self.kmeansResult.pfaValue(
            self.clusterNames, populations=True)
        engine, = PFAEngine.fromJson(pfaDocument)
        self.runEngine(engine)
Exemple #14
0
    def testDistanceToClosestCluster(self):
        # now that the ifnotnull clause has become three lines long, notice that it needs to be
        # surrounded by curly brackets and expressions must be separated by semicolons
        # (the last semicolon is optional: they're delimiters, not line terminators)
        pfaDocument = titus.prettypfa.jsonNode('''
input: <<INPUT>>
output: union(double, null)
cells:
    clusters(array(record(id: string, center: array(double)))) = []

action:
    ifnotnull(mag: input.mag, dist: input.dist, mass: input.mass, radius: input.radius) {
        var datum = new(array(double), mag, dist, mass, radius);
        var closestCluster = model.cluster.closest(datum, clusters, metric.simpleEuclidean);
        metric.simpleEuclidean(datum, closestCluster["center"])
    }
    else
        null
'''.replace("<<INPUT>>", TestClustering.recordSchema))

        if self.kmeansResult is None: self.doKmeans()
        pfaDocument["cells"]["clusters"]["init"] = self.kmeansResult.pfaValue(
            self.clusterNames)
        engine, = PFAEngine.fromJson(pfaDocument)
        self.runEngine(engine)
Exemple #15
0
def test_aggregate_knn(mock_save_results, mock_get_results, mock_fetch_data):
    # get one PFA
    mock_fetch_data.return_value = fx.inputs_regression(include_integer=True)
    mock_get_results.return_value = None
    compute()
    pfa = mock_save_results.call_args[0][0]

    def mock_results(job_id):
        if job_id == '1':
            return mock.MagicMock(data=pfa, error='')
        elif job_id == '2':
            return mock.MagicMock(data=pfa, error='')

    mock_get_results.side_effect = mock_results

    aggregate_knn(['1', '2'])

    pfa_combined = mock_save_results.call_args[0][0]
    pfa_dict = json.loads(pfa_combined)
    assert len(pfa_dict['cells']['codebook']['init']) == 2 * len(
        json.loads(pfa)['cells']['codebook']['init'])

    # make some prediction with PFA
    from titus.genpy import PFAEngine
    engine, = PFAEngine.fromJson(pfa_dict)
    engine.action({
        'stress_before_test1': 10.,
        'iq': 10.,
        'subjectageyears': 70
    })
def test_aggregate_kmeans(mock_save_results,
                          mock_load_intermediate_json_results,
                          mock_fetch_data):
    mock_fetch_data.return_value = fx.inputs_regression(
        include_categorical=True)

    mock_load_intermediate_json_results.return_value = [
        intermediate_data_1(), intermediate_data_2()
    ]

    aggregate_kmeans([1, 2])
    pfa_dict = json.loads(mock_save_results.call_args[0][0])

    np.testing.assert_allclose(
        json.loads(pfa_dict['metadata']['centroids']),
        np.array([[-0.12348661147125002, 0.20922071836500003, 0.0, 1.0],
                  [-0.1852486658437501, 0.09447887226000021, 0.5, 0.0]]), 1e-5)

    # make some prediction with PFA
    from titus.genpy import PFAEngine
    engine, = PFAEngine.fromJson(pfa_dict)
    ret = engine.action({
        'stress_before_test1': 10.,
        'iq': 10.,
        'agegroup': '-50y'
    })
    assert ret == 1
Exemple #17
0
    def testKMeansTransform(self):
        random.seed(12345)
        numpy.seterr(divide="ignore", invalid="ignore")

        dataset = numpy.empty((100000, 3), dtype=numpy.dtype(float))
        for i, (x, y, z) in enumerate(
                TestProducerKMeans.data([2, 1, 1], [3, 2, 5], [8, 2, 7],
                                        [5, 8, 5], [1, 1, 9])):
            if i >= dataset.shape[0]:
                break
            dataset[i, :] = [x * 10.0, y * 20.0, z * 30.0]

        trans = Transformation("x/10.0", "y/20.0", "z/30.0")
        kmeans = KMeans(5, trans.transform(dataset, ["x", "y", "z"]))
        kmeans.optimize(whileall(moving(), maxIterations(1000)))

        centers = kmeans.centers()
        self.assertArrayAlmostEqual(centers[0], [1.01, 1.00, 9.01], places=1)
        self.assertArrayAlmostEqual(centers[1], [2.00, 1.01, 1.00], places=1)
        self.assertArrayAlmostEqual(centers[2], [3.01, 2.01, 5.00], places=1)
        self.assertArrayAlmostEqual(centers[3], [4.99, 8.00, 4.99], places=1)
        self.assertArrayAlmostEqual(centers[4], [8.02, 2.00, 7.01], places=1)

        doc = kmeans.pfaDocument("Cluster",
                                 ["one", "two", "three", "four", "five"],
                                 preprocess=trans.new(AvroArray(AvroDouble()),
                                                      x="input[0]",
                                                      y="input[1]",
                                                      z="input[2]"))
        # look(doc, maxDepth=10)

        self.assertArrayAlmostEqual(
            doc["cells"]["clusters"]["init"][0]["center"], [1.01, 1.00, 9.01],
            places=1)
        self.assertArrayAlmostEqual(
            doc["cells"]["clusters"]["init"][1]["center"], [2.00, 1.01, 1.00],
            places=1)
        self.assertArrayAlmostEqual(
            doc["cells"]["clusters"]["init"][2]["center"], [3.01, 2.01, 5.00],
            places=1)
        self.assertArrayAlmostEqual(
            doc["cells"]["clusters"]["init"][3]["center"], [4.99, 8.00, 4.99],
            places=1)
        self.assertArrayAlmostEqual(
            doc["cells"]["clusters"]["init"][4]["center"], [8.02, 2.00, 7.01],
            places=1)

        engine, = PFAEngine.fromJson(doc)

        self.assertEqual(engine.action([1.01 * 10, 1.00 * 20, 9.01 * 30]),
                         "one")
        self.assertEqual(engine.action([2.00 * 10, 1.01 * 20, 1.00 * 30]),
                         "two")
        self.assertEqual(engine.action([3.01 * 10, 2.01 * 20, 5.00 * 30]),
                         "three")
        self.assertEqual(engine.action([4.99 * 10, 8.00 * 20, 4.99 * 30]),
                         "four")
        self.assertEqual(engine.action([8.02 * 10, 2.00 * 20, 7.01 * 30]),
                         "five")
Exemple #18
0
def get_engine(json_string):
    """Creates a PFA engine based on the json_string provided as constructor class. If an
    engine was already created, this method does nothing"""

    try:
        # pylint: disable=unbalanced-tuple-unpacking
        engine, = PFAEngine.fromJson(json.loads(json_string))

    except ValueError as ex:
        # JSON validation
        logging.error(
            "The file provided does not contain a valid JSON document: " +
            str(ex))
        sys.exit(1)
    except PFASyntaxException as ex:
        # Syntax validation
        logging.error(
            "The file provided does not contain a valid PFA compliant document: "
            + str(ex))
        sys.exit(1)
    except PFASemanticException as ex:
        # PFA semantic check
        logging.error(
            "The file provided contains inconsistent PFA semantics: " +
            str(ex))
        sys.exit(1)
    except PFAInitializationException as ex:
        # Scoring engine check
        logging.error(
            "It wasn't possible to build a valid scoring engine from the PFA document: "
            + str(ex))
        sys.exit(1)
    except Exception as ex:
        # Other exceptions
        logging.error("An unknown exception occurred: " + str(ex))
        sys.exit(1)

    # Check that the PFA file uses the "map" method. Other methods are not supported
    # (because irrelevant) by the MIP
    if not engine.config.method == "map":
        logging.error(
            "The PFA method you used is not supported. Please use the PFA 'map' method"
        )
        sys.exit(1)

    # Check that the PFA file uses a "record" type as input
    if not isinstance(engine.config.input, AvroRecord):
        logging.error("The PFA document must take a record as input parameter. " \
                      "Each field of the record must describe a variable")
        sys.exit(1)

    # Check that the PFA file has a least one input field
    if not engine.config.input.fields:
        logging.error(
            "The PFA document must describe an input record with at least one field"
        )
        sys.exit(1)

    return engine
Exemple #19
0
 def __init__(self, config):
     # Opening JSON file
     f = open('./iris-pfa.json', )
     # returns JSON object as a dictionary
     pfaDocument = json.load(f)
     self.engine, = PFAEngine.fromJson(pfaDocument)
     # Closing file
     f.close()
Exemple #20
0
 def on_put_model(self,request,response):
     url = request.path
     splitted = url.split("/")
     parentpath = os.path.join('/',*splitted[0:-1])
     modelname = splitted[-1]
     if self.path_exists(url):
         path,file_id,parent_id,content,engine = self.cache.getFile(url)
         # overwrite content, restart engine
         newcontent = request.stream.read(request.content_length or 0)
         newengine, = PFAEngine.fromJson(json.loads(newcontent))
         newengine.begin()
         self.queries.updateFile(file_id = file_id,content = newcontent)
         # update cache
         self.cache.path2file[url] = (url,file_id,parent_id,newcontent,newengine)
     elif self.path_exists(parentpath) and self.is_path_dir(parentpath): # create model in dir, if parent directory exists, start engine
         path,file_id,parent_id,content,engine = self.cache.getFile(parentpath)
         content = request.stream.read(request.content_length or 0)
         engine, = PFAEngine.fromJson(json.loads(content))
         engine.begin()
         # read the file-rights of the directory
         read_owner,write_owner,execute_owner,read_group,write_group,execute_group,read_other,write_other,execute_other = self.cache.getFileRights(file_id)
         owner_id, group_id = self.cache.getFileOwners(file_id)
         new_file_id = self.queries.insertFile(
            content = content,
            name = modelname,
            parent_id = file_id,
            owner_id = owner_id,
            group_id = group_id,
            read_owner = read_owner,
            write_owner = write_owner,
            execute_owner = execute_owner,
            read_group = read_group,
            write_group = write_group,
            execute_group = execute_group,
            read_other = read_other,
            write_other = write_other,
            execute_other = execute_other
         )
         # insert new file in cache
         self.cache.path2file[url] = (url,new_file_id,parent_id,content,engine)
         self.cache.filesMetaData[new_file_id] = (owner_id, group_id, read_owner,read_group,read_other,write_owner,write_group,write_other,execute_owner,execute_group,execute_other)            
         response.status = falcon.HTTP_200
     else:
         response.body = "%s not found" % url
         response.status = falcon.HTTP_404
    def get_engine(self):
        """Creates a PFA engine based on the json_string provided as constructor class. If an
        engine was already created, this method does nothing"""
        if not self.engine:
            # pylint: disable=unbalanced-tuple-unpacking
            engine, = PFAEngine.fromJson(json.loads(self.json_string))
            self.engine = engine

        return self.engine
Exemple #22
0
def score_model(partition):
    # Create PFA engine
    from titus.genpy import PFAEngine
    engine, = PFAEngine.fromJson(pfa_model.value)

    # Score Partition data row-by-row
    score_results = list()
    for row in partition:
        score_results.append([engine.action(row.asDict())])
    return score_results
Exemple #23
0
 def __load_cache(self):
     fileMetaData = self.cursor.execute("select file_id, owner_id, group_id, read_owner,write_owner,execute_owner,read_group,write_group,execute_group,read_other,write_other,execute_other from s_file;").fetchall()
     self.filesMetaData = dict([ (fileMetaData[i][0],fileMetaData[i][1:]) for i in range(len(fileMetaData))] )
     files = self.cursor.execute("select gr.path,f.file_id,f.parent_id, f.content from s_file f  left join global_rights gr on f.file_id = gr.file_id where gr.uid = 1").fetchall()
     self.path2file = {}
     for file in files:
         path,file_id,parent_id,content = file
         if not content is None:
             content = str(content)
             engine, = PFAEngine.fromJson(json.loads(content))
             engine.begin()
             self.path2file[path] =  (path,file_id,parent_id,content, engine)
         else:
             self.path2file[path] = (path,file_id,parent_id,None,None)
Exemple #24
0
    def testKMeans(self):
        random.seed(12345)
        numpy.seterr(divide="ignore", invalid="ignore")

        dataset = numpy.empty((100000, 3), dtype=numpy.dtype(float))
        for i, x in enumerate(
                TestProducerKMeans.data([2, 1, 1], [3, 2, 5], [8, 2, 7],
                                        [5, 8, 5], [1, 1, 9])):
            if i >= dataset.shape[0]:
                break
            dataset[i, :] = x

        kmeans = KMeans(5, dataset)
        kmeans.optimize(whileall(moving(), maxIterations(1000)))

        centers = kmeans.centers()
        self.assertArrayAlmostEqual(centers[0], [1.01, 1.00, 9.01], places=1)
        self.assertArrayAlmostEqual(centers[1], [2.00, 1.01, 1.00], places=1)
        self.assertArrayAlmostEqual(centers[2], [3.01, 2.01, 5.00], places=1)
        self.assertArrayAlmostEqual(centers[3], [4.99, 8.00, 4.99], places=1)
        self.assertArrayAlmostEqual(centers[4], [8.02, 2.00, 7.01], places=1)

        doc = kmeans.pfaDocument("Cluster",
                                 ["one", "two", "three", "four", "five"])
        # look(doc, maxDepth=8)

        self.assertArrayAlmostEqual(
            doc["cells"]["clusters"]["init"][0]["center"], [1.01, 1.00, 9.01],
            places=1)
        self.assertArrayAlmostEqual(
            doc["cells"]["clusters"]["init"][1]["center"], [2.00, 1.01, 1.00],
            places=1)
        self.assertArrayAlmostEqual(
            doc["cells"]["clusters"]["init"][2]["center"], [3.01, 2.01, 5.00],
            places=1)
        self.assertArrayAlmostEqual(
            doc["cells"]["clusters"]["init"][3]["center"], [4.99, 8.00, 4.99],
            places=1)
        self.assertArrayAlmostEqual(
            doc["cells"]["clusters"]["init"][4]["center"], [8.02, 2.00, 7.01],
            places=1)

        engine, = PFAEngine.fromJson(doc)

        self.assertEqual(engine.action([1.01, 1.00, 9.01]), "one")
        self.assertEqual(engine.action([2.00, 1.01, 1.00]), "two")
        self.assertEqual(engine.action([3.01, 2.01, 5.00]), "three")
        self.assertEqual(engine.action([4.99, 8.00, 4.99]), "four")
        self.assertEqual(engine.action([8.02, 2.00, 7.01]), "five")
def test_compute(mock_get_param, mock_save_results, mock_fetch_data):
    # create mock objects from database
    mock_get_param.return_value = 2
    mock_fetch_data.return_value = fx.inputs_regression(
        include_categorical=True)

    compute()

    pfa = mock_save_results.call_args[0][0]
    pfa_dict = json.loads(pfa)

    # make some prediction with PFA
    from titus.genpy import PFAEngine
    engine, = PFAEngine.fromJson(pfa_dict)
    engine.action({'stress_before_test1': 10., 'iq': 10., 'agegroup': '-50y'})
Exemple #26
0
def _predict_pfa(X, types, pfa):
    engine, = PFAEngine.fromJson(pfa)
    columns = [c for c, _ in types]

    pfa_pred = []
    for x in X:
        p = {}
        for col, e, (_, typ) in zip(columns, x, types):
            if typ == 'integer':
                p[col] = int(e)
            else:
                p[col] = e

        pfa_pred.append(engine.action(p))
    return np.array(pfa_pred)
Exemple #27
0
    def testTree(self):
        engine, = PFAEngine.fromJson(open("test/hipparcos_numerical_10.pfa"))

        data = []
        for line in open("test/hipparcos_numerical.csv"):
            ra, dec, dist, mag, absmag, x, y, z, vx, vy, vz, spectrum = line.split(",")
            data.append({"ra": float(ra), "dec": float(dec), "dist": float(dist), "mag": float(mag), "absmag": float(absmag), "x": float(x), "y": float(y), "z": float(z), "vx": float(vx), "vy": float(vy), "vz": float(vz)})

        i = 0
        startTime = time.time()
        for datum in data:
            engine.action(datum)
            i += 1
            if i % 5000 == 0:
                print "{0}, {1}".format(time.time() - startTime, i)
Exemple #28
0
 def on_post_dir(self,request,response):
     url = request.path
     try:
         name = request.get_header("filename")
         username = request.get_header("username")
         if not name.endswith(".pfa"):
             raise Exception("not supported file: %s" % name)
         path,file_id,parent_id,content,engine = self.cache.getFile(url)
         # post model to directory if it does not exist
         if not self.path_exists(os.path.join(url,name)):
             content = request.stream.read(request.content_length or 0)
             engine, = PFAEngine.fromJson(json.loads(content))
             engine.begin()
             # read the file-rights of the directory
             read_owner,write_owner,execute_owner,read_group,write_group,execute_group,read_other,write_other,execute_other = self.cache.getFileRights(file_id)
             owner_id, group_id = self.cache.getFileOwners(file_id)
             new_file_id = self.queries.insertFile(
                content = content,
                name = name,
                parent_id = file_id,
                owner_id = owner_id,
                group_id = group_id,
                read_owner = read_owner,
                write_owner = write_owner,
                execute_owner = execute_owner,
                read_group = read_group,
                write_group = write_group,
                execute_group = execute_group,
                read_other = read_other,
                write_other = write_other,
                execute_other = execute_other
             )
             # insert new file in cache
             newfilepath = os.path.join(url,name)
             self.cache.path2file[newfilepath] = (newfilepath,new_file_id,parent_id,content,engine)
             self.cache.filesMetaData[new_file_id] = (owner_id, group_id, read_owner,read_group,read_other,write_owner,write_group,write_other,execute_owner,execute_group,execute_other)
             response.status = falcon.HTTP_200
         else:
             modelpath = os.path.join(url,name)
             response.body = "%s already exists. try PUT %s" % (modelpath,modelpath)
             response.status = falcon.HTTP_409 # Conflict, model already exists
     except Exception, e:
         response.body = "%s an error occured, %s" % (url,e)
         response.status = falcon.HTTP_505
Exemple #29
0
def test_compute_regression(mock_save_results, mock_get_results,
                            mock_fetch_data):
    # create mock objects from database
    mock_fetch_data.return_value = fx.inputs_regression(include_integer=True)
    mock_get_results.return_value = None

    compute()

    pfa = mock_save_results.call_args[0][0]
    pfa_dict = json.loads(pfa)

    # make some prediction with PFA
    from titus.genpy import PFAEngine
    engine, = PFAEngine.fromJson(pfa_dict)
    engine.action({
        'stress_before_test1': 10.,
        'iq': 10.,
        'subjectageyears': 70
    })
Exemple #30
0
def test_main_distributed(mock_parameters, mock_save_results, mock_get_results, mock_fetch_data, method, name):
    mock_parameters.return_value = {'type': method}
    mock_fetch_data.return_value = fx.inputs_regression()
    mock_get_results.return_value = None

    # run intermediate job
    main(job_id=None, generate_pfa=False)

    mock_get_results.return_value = mock.MagicMock(data=mock_save_results.call_args[0][0])

    # generate PFA
    main(job_id='1', generate_pfa=True)

    pfa = mock_save_results.call_args_list[1][0][0]
    pfa_dict = json.loads(pfa)

    # make some prediction with PFA
    from titus.genpy import PFAEngine
    engine, = PFAEngine.fromJson(pfa_dict)
    engine.action({'stress_before_test1': 10., 'iq': 10., 'agegroup': '-50y'})
Exemple #31
0
    def testKMeansTransform(self):
        random.seed(12345)
        numpy.seterr(divide="ignore", invalid="ignore")

        dataset = numpy.empty((100000, 3), dtype=numpy.dtype(float))
        for i, (x, y, z) in enumerate(TestProducerKMeans.data([1, 1, 1], [3, 2, 5], [8, 2, 7], [5, 8, 5], [1, 1, 9])):
            if i >= dataset.shape[0]:
                break
            dataset[i,:] = [x * 10.0, y * 20.0, z * 30.0]

        trans = Transformation("x/10.0", "y/20.0", "z/30.0")
        kmeans = KMeans(5, trans.transform(dataset, ["x", "y", "z"]))
        kmeans.optimize(whileall(moving(), maxIterations(1000)))

        centers = kmeans.centers()
        self.assertArrayAlmostEqual(centers[0], [1.00, 1.01, 1.00], places=1)
        self.assertArrayAlmostEqual(centers[1], [1.01, 1.00, 9.01], places=1)
        self.assertArrayAlmostEqual(centers[2], [3.01, 2.01, 5.00], places=1)
        self.assertArrayAlmostEqual(centers[3], [4.99, 8.00, 4.99], places=1)
        self.assertArrayAlmostEqual(centers[4], [8.02, 2.00, 7.01], places=1)

        doc = kmeans.pfaDocument("Cluster",
                                 ["one", "two", "three", "four", "five"],
                                 preprocess=trans.new(AvroArray(AvroDouble()),
                                                      x="input[0]", y="input[1]", z="input[2]"))
        # look(doc, maxDepth=10)

        self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][0]["center"], [1.00, 1.01, 1.00], places=2)
        self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][1]["center"], [1.01, 1.00, 9.01], places=2)
        self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][2]["center"], [3.01, 2.01, 5.00], places=2)
        self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][3]["center"], [4.99, 8.00, 4.99], places=2)
        self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][4]["center"], [8.02, 2.00, 7.01], places=2)

        engine, = PFAEngine.fromJson(doc)

        self.assertEqual(engine.action([1.00 * 10, 1.01 * 20, 1.00 * 30]), "one")
        self.assertEqual(engine.action([1.01 * 10, 1.00 * 20, 9.01 * 30]), "two")
        self.assertEqual(engine.action([3.01 * 10, 2.01 * 20, 5.00 * 30]), "three")
        self.assertEqual(engine.action([4.99 * 10, 8.00 * 20, 4.99 * 30]), "four")
        self.assertEqual(engine.action([8.02 * 10, 2.00 * 20, 7.01 * 30]), "five")
Exemple #32
0
    def testSimpleKMeansEmitExample(self):
        # the emit method allows us to ignore the "else" clause in ifnotnull
        pfaDocument = titus.prettypfa.jsonNode('''
input: <<INPUT>>
output: string
method: emit

cells:
    clusters(array(record(id: string, center: array(double)))) = []

action:
    ifnotnull(mag: input.mag, dist: input.dist, mass: input.mass, radius: input.radius)
        emit(model.cluster.closest(new(array(double), mag, dist, mass, radius),
                                   clusters,
                                   metric.simpleEuclidean)["id"])

'''.replace("<<INPUT>>", open("test/prettypfa/exoplanetsSchema.ppfa").read()))

        if self.kmeansResult is None: self.doKmeans()
        pfaDocument["cells"]["clusters"]["init"] = self.kmeansResult.pfaValue(self.clusterNames)
        engine, = PFAEngine.fromJson(pfaDocument)
        self.runEngine(engine)
Exemple #33
0
    def testSimpleKMeansWithEnums(self):
        # same as the above using enums rather than strings and compacted a bit
        pfaDocument = titus.prettypfa.jsonNode('''
input: <<INPUT>>
output: enum([cluster0, cluster1, cluster2, cluster3, cluster4, MISSING], ClusterId)

cells:
    clusters(array(record(id: ClusterId, center: array(double)))) = []

action:
    ifnotnull(mag: input.mag, dist: input.dist, mass: input.mass, radius: input.radius)
        model.cluster.closest(new(array(double), mag, dist, mass, radius),
                              clusters,
                              metric.simpleEuclidean)["id"]
    else
        ClusterId@MISSING

'''.replace("<<INPUT>>", open("test/prettypfa/exoplanetsSchema.ppfa").read()))

        if self.kmeansResult is None: self.doKmeans()
        pfaDocument["cells"]["clusters"]["init"] = self.kmeansResult.pfaValue(self.clusterNames)
        engine, = PFAEngine.fromJson(pfaDocument)
        self.runEngine(engine)
Exemple #34
0
    def testPopulationOfClosestCluster(self):
        # now that the ifnotnull clause has become three lines long, notice that it needs to be
        # surrounded by curly brackets and expressions must be separated by semicolons
        # (the last semicolon is optional: they're delimiters, not line terminators)
        pfaDocument = titus.prettypfa.jsonNode('''
input: <<INPUT>>
output: union(int, null)
cells:
    clusters(array(record(id: string, center: array(double), population: int))) = []

action:
    ifnotnull(mag: input.mag, dist: input.dist, mass: input.mass, radius: input.radius)
        model.cluster.closest(new(array(double), mag, dist, mass, radius),
                              clusters,
                              metric.simpleEuclidean)["population"]
    else
        null
'''.replace("<<INPUT>>", open("test/prettypfa/exoplanetsSchema.ppfa").read()))

        if self.kmeansResult is None: self.doKmeans()
        pfaDocument["cells"]["clusters"]["init"] = self.kmeansResult.pfaValue(self.clusterNames, populations=True)
        engine, = PFAEngine.fromJson(pfaDocument)
        self.runEngine(engine)
Exemple #35
0
    def testSimpleKMeansEmitExample(self):
        # the emit method allows us to ignore the "else" clause in ifnotnull
        pfaDocument = titus.prettypfa.jsonNode('''
input: <<INPUT>>
output: string
method: emit

cells:
    clusters(array(record(id: string, center: array(double)))) = []

action:
    ifnotnull(mag: input.mag, dist: input.dist, mass: input.mass, radius: input.radius)
        emit(model.cluster.closest(new(array(double), mag, dist, mass, radius),
                                   clusters,
                                   metric.simpleEuclidean)["id"])

'''.replace("<<INPUT>>", TestClustering.recordSchema))

        if self.kmeansResult is None: self.doKmeans()
        pfaDocument["cells"]["clusters"]["init"] = self.kmeansResult.pfaValue(
            self.clusterNames)
        engine, = PFAEngine.fromJson(pfaDocument)
        self.runEngine(engine)
Exemple #36
0
    def testHistogram2d(self):
        pfaDocument = titus.prettypfa.jsonNode('''
input: <<INPUT>>
output: Histogram

cells:
  histogram(record(Histogram,
                   xnumbins: int,
                   xlow: double,
                   xhigh: double,
                   ynumbins: int,
                   ylow: double,
                   yhigh: double,
                   values: array(array(double)))) = {
      xnumbins: 10, xlow: 0.0, xhigh: 3.0,
      ynumbins: 10, ylow: 0.0, yhigh: 3.0,
      values: [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]}

method: emit      
action:
  ifnotnull(mass: input.mass, radius: input.radius)
      emit(histogram to fcn(old: Histogram -> Histogram)
          stat.sample.fillHistogram2d(mass, radius, 1.0, old))

'''.replace("<<INPUT>>", open("test/prettypfa/exoplanetsSchema.ppfa").read()), check=False, lineNumbers=False)

        engine, = PFAEngine.fromJson(pfaDocument)
        self.assertEqual(self.runEngine(engine), {"values": [[6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [3.0, 33.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 8.0, 118.0, 28.0, 6.0, 0.0, 1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 33.0, 184.0, 72.0, 25.0, 8.0, 4.0, 0.0, 1.0], [0.0, 0.0, 1.0, 12.0, 45.0, 34.0, 20.0, 3.0, 4.0, 1.0], [0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 4.0, 4.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]], "xhigh": 3.0, "yhigh": 3.0, "ynumbins": 10, "xnumbins": 10, "ylow": 0.0, "xlow": 0.0})
Exemple #37
0
    def testKMeans(self):
        random.seed(12345)
        numpy.seterr(divide="ignore", invalid="ignore")

        dataset = numpy.empty((100000, 3), dtype=numpy.dtype(float))
        for i, x in enumerate(TestProducerKMeans.data([1, 1, 1], [3, 2, 5], [8, 2, 7], [5, 8, 5], [1, 1, 9])):
            if i >= dataset.shape[0]:
                break
            dataset[i,:] = x

        kmeans = KMeans(5, dataset)
        kmeans.optimize(whileall(moving(), maxIterations(1000)))

        centers = kmeans.centers()
        self.assertArrayAlmostEqual(centers[0], [1.00, 1.01, 1.00], places=2)
        self.assertArrayAlmostEqual(centers[1], [1.01, 1.00, 9.01], places=2)
        self.assertArrayAlmostEqual(centers[2], [3.01, 2.01, 5.00], places=2)
        self.assertArrayAlmostEqual(centers[3], [4.99, 8.00, 4.99], places=2)
        self.assertArrayAlmostEqual(centers[4], [8.02, 2.00, 7.01], places=2)

        doc = kmeans.pfaDocument("Cluster", ["one", "two", "three", "four", "five"])
        # look(doc, maxDepth=8)

        self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][0]["center"], [1.00, 1.01, 1.00], places=2)
        self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][1]["center"], [1.01, 1.00, 9.01], places=2)
        self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][2]["center"], [3.01, 2.01, 5.00], places=2)
        self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][3]["center"], [4.99, 8.00, 4.99], places=2)
        self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][4]["center"], [8.02, 2.00, 7.01], places=2)

        engine, = PFAEngine.fromJson(doc)

        self.assertEqual(engine.action([1.00, 1.01, 1.00]), "one")
        self.assertEqual(engine.action([1.01, 1.00, 9.01]), "two")
        self.assertEqual(engine.action([3.01, 2.01, 5.00]), "three")
        self.assertEqual(engine.action([4.99, 8.00, 4.99]), "four")
        self.assertEqual(engine.action([8.02, 2.00, 7.01]), "five")
Exemple #38
0
    def testHistogram2d(self):
        pfaDocument = titus.prettypfa.jsonNode('''
input: <<INPUT>>
output: Histogram

cells:
  histogram(record(Histogram,
                   xnumbins: int,
                   xlow: double,
                   xhigh: double,
                   ynumbins: int,
                   ylow: double,
                   yhigh: double,
                   values: array(array(double)))) = {
      xnumbins: 10, xlow: 0.0, xhigh: 3.0,
      ynumbins: 10, ylow: 0.0, yhigh: 3.0,
      values: [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]}

method: emit      
action:
  ifnotnull(mass: input.mass, radius: input.radius)
      emit(histogram to fcn(old: Histogram -> Histogram)
          stat.sample.fillHistogram2d(mass, radius, 1.0, old))

'''.replace("<<INPUT>>", TestClustering.recordSchema),
                                               check=False,
                                               lineNumbers=False)

        engine, = PFAEngine.fromJson(pfaDocument)
        self.assertEqual(
            self.runEngine(engine), {
                "values":
                [[6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                 [3.0, 33.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                 [0.0, 8.0, 118.0, 28.0, 6.0, 0.0, 1.0, 1.0, 0.0, 0.0],
                 [0.0, 0.0, 33.0, 184.0, 72.0, 25.0, 8.0, 4.0, 0.0, 1.0],
                 [0.0, 0.0, 1.0, 12.0, 45.0, 34.0, 20.0, 3.0, 4.0, 1.0],
                 [0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 4.0, 4.0, 0.0, 0.0],
                 [0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
                "xhigh":
                3.0,
                "yhigh":
                3.0,
                "ynumbins":
                10,
                "xnumbins":
                10,
                "ylow":
                0.0,
                "xlow":
                0.0
            })
Exemple #39
0
inputFile, = sys.argv[1:]

# Failures that I'm giving up on:
# 
# prob.dist.binomialQF({"p": 0.99999, "prob": 1e-05, "size": 1}) should be 1, is 0 (rounding in count)
#                      {"p": 0.9, "prob": 0.1, "size": 1}        should be 1, is 0 (same reason)
# prob.dist.hypergeometricPDF  \
# prob.dist.hypergeometricCDF   }  many errors! and the QF has a long or infinite loop
# prob.dist.hypergeometricQF   /
# prob.dist.negativeBinomialPDF({"x": 17, "prob": 0.9, "size": 100}) should be 0.00245, is 0.02715
#                               {"x": 100, "prob": 0.1, "size": 17}  should be 0.00245, is 0.00462
#                               {"x": 100, "prob": 0.5, "size": 100} should be 5.7e42, is 0.02817
# prob.dist.negativeBinomialQF has many errors (though not as many as the hypergeometric)

for counter, example in enumerate(getExamples(open(inputFile))):
    engine, = PFAEngine.fromJson(example["engine"])

    if example["function"] in ("prob.dist.binomialQF", "prob.dist.hypergeometricPDF", "prob.dist.hypergeometricCDF", "prob.dist.hypergeometricQF", "prob.dist.negativeBinomialPDF", "prob.dist.negativeBinomialQF"):
        continue

    functionWritten = False
    def maybeWriteFunction(functionWritten):
        if not functionWritten:
            print "%4d    %-20s%s" % (counter + 1, example["function"], json.dumps(example["engine"]))
        return True

    for trial in example["trials"]:
        trialWritten = False
        try:
            result = {"success": convertOut(engine.action(trial["sample"]), engine.outputType.jsonNode(set()), dobase64=True)}
        except PFARuntimeException as err:
Exemple #40
0
server_address = sys.argv[1]
pfa_model = sys.argv[2]
kafka_topic_in = sys.argv[3]
kafka_topic_out = sys.argv[4]

print "================"
print "Kafka Scoring"
print "Bootstrap server:\t%s" % server_address
print "PFA model:\t\t%s" % pfa_model
print "Topic consumed:\t\t%s" % kafka_topic_in
print "Topic produced:\t\t%s" % kafka_topic_out

# Create PFA engine
try:
    pfa_engine, = PFAEngine.fromJson(json.load(open(pfa_model)))
except Exception:
    sys.exit("Failed to create scoring engine")

# Initialize PFA engine
pfa_engine.begin()

# Configure Kafka connection
try:
    consumer = KafkaConsumer(kafka_topic_in, bootstrap_servers=server_address)
    producer = KafkaProducer(bootstrap_servers=server_address)
except Exception:
    sys.exit("Failed to configure Kafka")

count = 0
bad_data = 0
Exemple #41
0
    def testNormalized(self):
        # for k-means on normalized data, we have to explicitly normalize,
        # re-compute the clusters, and put the same transformation into PFA

        # get a dataset for the k-means generator
        dataset = []
        for record in DataFileReader(open("test/prettypfa/exoplanets.avro", "r"), DatumReader()):
            mag, dist, mass, radius = record.get("mag"), record.get("dist"), record.get("mass"), record.get("radius")
            if mag is not None and dist is not None and mass is not None and radius is not None:
                dataset.append([mag, dist, mass, radius])
        dataset = numpy.array(dataset)

        # compute the normalization (1st to 99th percentile instead of strict min/max)
        maglow,    maghigh    = numpy.percentile(dataset[:,0], [1, 99])
        distlow,   disthigh   = numpy.percentile(dataset[:,1], [1, 99])
        masslow,   masshigh   = numpy.percentile(dataset[:,2], [1, 99])
        radiuslow, radiushigh = numpy.percentile(dataset[:,3], [1, 99])

        # transform the data
        normalized = numpy.empty_like(dataset)
        normalized[:,0] = (dataset[:,0] - maglow) / (maghigh - maglow)
        normalized[:,1] = (dataset[:,1] - distlow) / (disthigh - distlow)
        normalized[:,2] = (dataset[:,2] - masslow) / (masshigh - masslow)
        normalized[:,3] = (dataset[:,3] - radiuslow) / (radiushigh - radiuslow)

        # set up and run the k-means generator
        kmeansResult = KMeans(len(self.clusterNames), normalized)
        kmeansResult.optimize(whileall(moving(), maxIterations(1000)))

        # put the transformation into PFA by string replacement
        # this re.subs will replace French quotes (<< >>) with Python variable values
        inputSchema = open("test/prettypfa/exoplanetsSchema.ppfa").read()
        namesToSubstitute = locals()
        pfaDocument = titus.prettypfa.jsonNode(
            re.sub("<<[A-Za-z0-9]+>>",
                   lambda x: str(namesToSubstitute[x.group().lstrip("<<").rstrip(">>")]),
                   '''
input: <<inputSchema>>
output: string
cells:
    clusters(array(record(id: string, center: array(double)))) = []

action:
    ifnotnull(mag: input.mag, dist: input.dist, mass: input.mass, radius: input.radius) {
        var normmag = (mag - <<maglow>>) / (<<maghigh>> - <<maglow>>);
        var normdist = (dist - <<distlow>>) / (<<disthigh>> - <<distlow>>);
        var normmass = (mass - <<masslow>>) / (<<masshigh>> - <<masslow>>);
        var normradius = (radius - <<radiuslow>>) / (<<radiushigh>> - <<radiuslow>>);

        model.cluster.closest(new(array(double), normmag, normdist, normmass, normradius),
                              clusters,
                              metric.simpleEuclidean)["id"]
    }
    else
        "MISSING"
'''))

        # now put the clusters in and run the scoring engine
        pfaDocument["cells"]["clusters"]["init"] = kmeansResult.pfaValue(self.clusterNames)
        engine, = PFAEngine.fromJson(pfaDocument)
        self.runEngine(engine)
Exemple #42
0
 def post(self, model_name):
     pfa_model = db.get_model(model_name)
     pfa_engine, = PFAEngine.fromJson(pfa_model)
     data_to_score = tornado.escape.json_decode(self.request.body)
     db.update_usage_stats(model_name)
     self.write(str(pfa_engine.action(data_to_score)))
Exemple #43
0
inputFile, = sys.argv[1:]

# Failures that I'm giving up on:
#
# prob.dist.binomialQF({"p": 0.99999, "prob": 1e-05, "size": 1}) should be 1, is 0 (rounding in count)
#                      {"p": 0.9, "prob": 0.1, "size": 1}        should be 1, is 0 (same reason)
# prob.dist.hypergeometricPDF  \
# prob.dist.hypergeometricCDF   }  many errors! and the QF has a long or infinite loop
# prob.dist.hypergeometricQF   /
# prob.dist.negativeBinomialPDF({"x": 17, "prob": 0.9, "size": 100}) should be 0.00245, is 0.02715
#                               {"x": 100, "prob": 0.1, "size": 17}  should be 0.00245, is 0.00462
#                               {"x": 100, "prob": 0.5, "size": 100} should be 5.7e42, is 0.02817
# prob.dist.negativeBinomialQF has many errors (though not as many as the hypergeometric)

for counter, example in enumerate(getExamples(open(inputFile))):
    engine, = PFAEngine.fromJson(example["engine"])

    if example["function"] in ("prob.dist.binomialQF",
                               "prob.dist.hypergeometricPDF",
                               "prob.dist.hypergeometricCDF",
                               "prob.dist.hypergeometricQF",
                               "prob.dist.negativeBinomialPDF",
                               "prob.dist.negativeBinomialQF"):
        continue

    functionWritten = False

    def maybeWriteFunction(functionWritten):
        if not functionWritten:
            print "%4d    %-20s%s" % (counter + 1, example["function"],
                                      json.dumps(example["engine"]))