def test_parse_type_value_continuous(self): template = """ <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3"> <DataDictionary> <DataField name="test" optype="{}" dataType="{}"/> </DataDictionary> </PMML>""" values = ["1234", 1234, 12.34, True] types = [ ['continuous', 'integer', int], ['continuous', 'float', float], ['continuous', 'double', float], ] for value in values: for type in types: optype, pmml_type, data_type = type clf = PMMLBaseEstimator(pmml=StringIO(template.format(optype, pmml_type))) data_dictionary = clf.find(clf.root, "DataDictionary") data_field = clf.find(data_dictionary, "DataField") result = clf.parse_type(value, data_field) assert isinstance(result, data_type)
def test_parse_type_value_continuous_boolean(self): template = """ <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3"> <DataDictionary> <DataField name="test" optype="{}" dataType="{}"/> </DataDictionary> </PMML>""" tests = { "1": True, "True": True, "YES": True, 1: True, True: True, "0": False, "False": False, 0: False } for value, expected in tests.items(): clf = PMMLBaseEstimator(pmml=StringIO(template.format('continuous', 'boolean'))) data_dictionary = clf.find(clf.root, "DataDictionary") data_field = clf.find(data_dictionary, "DataField") result = clf.parse_type(value, data_field) assert isinstance(result, Boolean) assert result == expected
def test_parse_type_value_categorical(self): template = """ <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3"> <DataDictionary> <DataField name="Class" optype="categorical" dataType="string"> <Value value="setosa"/> <Value value="versicolor"/> <Value value="virginica"/> </DataField> </DataDictionary> </PMML>""" clf = PMMLBaseEstimator(pmml=StringIO(template)) data_dictionary = clf.find(clf.root, "DataDictionary") data_field = clf.find(data_dictionary, "DataField") with self.assertRaises(Exception) as cm: clf.parse_type("not_in_category", data_field) assert str(cm.exception) == "Value does not match any category." assert clf.parse_type("setosa", data_field) == "setosa" assert clf.parse_type("versicolor", data_field) == "versicolor" assert clf.parse_type("virginica", data_field) == "virginica"
def test_parse_type_interval(self): template = """ <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3"> <DataDictionary> <DataField name="test" optype="ordinal" dataType="float"> <Interval closure="openOpen" rightMargin="1"/> <Interval closure="openClosed" leftMargin="1" rightMargin="1.5"/> <Interval closure="openOpen" leftMargin="1.5" rightMargin="2.5"/> <Interval closure="closedOpen" leftMargin="2.5" rightMargin="3.5"/> <Interval closure="closedClosed" leftMargin="3.5" /> </DataField> </DataDictionary> </PMML>""" clf = PMMLBaseEstimator(pmml=StringIO(template)) data_dictionary = clf.find(clf.root, "DataDictionary") data_field = clf.find(data_dictionary, "DataField") assert clf.parse_type(-1, data_field) == Interval(-1, rightMargin=1, closure='openOpen') with self.assertRaises(Exception): clf.parse_type(1, data_field) assert clf.parse_type(2, data_field) == Interval(2, leftMargin=1.5, rightMargin=2.5, closure='openOpen') assert clf.parse_type(2.5, data_field) == Interval(2.5, leftMargin=2.5, rightMargin=3.5, closure='closedOpen') assert clf.parse_type(3.5, data_field) == Interval(3.5, leftMargin=3.5, closure='closedClosed')
def test_parse_type_value_exception(self): template = """ <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3"> <DataDictionary> <DataField name="test" optype="{}" dataType="{}"/> </DataDictionary> </PMML>""" # Test invalid data type clf = PMMLBaseEstimator(pmml=StringIO(template.format("continuous", "does_not_exist"))) data_dictionary = clf.find(clf.root, "DataDictionary") data_field = clf.find(data_dictionary, "DataField") with self.assertRaises(Exception) as cm: clf.parse_type("test", data_field) assert str(cm.exception) == "Unsupported data type." # Test invalid operation type clf = PMMLBaseEstimator(pmml=StringIO(template.format("does_not_exist", "string"))) data_dictionary = clf.find(clf.root, "DataDictionary") data_field = clf.find(data_dictionary, "DataField") with self.assertRaises(Exception) as cm: clf.parse_type("test", data_field) assert str(cm.exception) == "Unsupported operation type."