def test_get_type_exception(self): template = """ <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3"> <DataDictionary> <DataField name="test" optype="{}" dataType="{}"/> </DataDictionary> </PMML>""" # Test invalid data type pmml = StringIO(template.format("continuous", "does_not_exist")) clf = PMMLBaseEstimator(pmml) data_dictionary = clf.root.find("DataDictionary") data_field = data_dictionary.find("DataField") with self.assertRaises(Exception) as cm: get_type(data_field) assert str(cm.exception) == "Unsupported data type." # Test invalid operation type pmml = StringIO(template.format("does_not_exist", "string")) clf = PMMLBaseEstimator(pmml) data_dictionary = clf.root.find("DataDictionary") data_field = data_dictionary.find("DataField") with self.assertRaises(Exception) as cm: get_type(data_field) assert str(cm.exception) == "Unsupported operation type."
def test_parse_type_value_continuous(self): template = """ <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3"> <DataDictionary> <DataField name="test" optype="{}" dataType="{}"/> </DataDictionary> </PMML>""" values = ["1234", 1234, 12.34, True] types = [ ['continuous', 'integer', int], ['continuous', 'float', float], ['continuous', 'double', float], ] for value in values: for field_type in types: op_type, pmml_type, data_type = field_type clf = PMMLBaseEstimator(StringIO(template.format(op_type, pmml_type))) data_dictionary = clf.root.find("DataDictionary") data_field = data_dictionary.find("DataField") result = get_type(data_field)(value) assert isinstance(result, data_type)
def test_parse_type_value_continuous_boolean(self): template = """ <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3"> <DataDictionary> <DataField name="test" optype="{}" dataType="{}"/> </DataDictionary> </PMML>""" tests = { "1": True, "True": True, "YES": True, 1: True, True: True, "0": False, "False": False, 0: False } for value, expected in tests.items(): pmml = StringIO(template.format('continuous', 'boolean')) clf = PMMLBaseEstimator(pmml) data_dictionary = clf.root.find("DataDictionary") data_field = data_dictionary.find("DataField") result = get_type(data_field)(value) assert isinstance(result, bool) assert result == expected
def test_get_type_ordinal(self): template = """ <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3"> <DataDictionary> <DataField name="Volume" optype="ordinal" dataType="string"> <Value value="loud"/> <Value value="louder"/> <Value value="loudest"/> </DataField> </DataDictionary> </PMML>""" clf = PMMLBaseEstimator(pmml=StringIO(template)) data_dictionary = clf.root.find("DataDictionary") data_field = data_dictionary.find("DataField") data_type: Category = get_type(data_field) assert data_type.categories == ['loud', 'louder', 'loudest'] assert data_type.ordered
def test_get_type_categorical(self): template = """ <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3"> <DataDictionary> <DataField name="Class" optype="categorical" dataType="string"> <Value value="setosa"/> <Value value="versicolor"/> <Value value="virginica"/> </DataField> </DataDictionary> </PMML>""" clf = PMMLBaseEstimator(pmml=StringIO(template)) data_dictionary = clf.root.find("DataDictionary") data_field = data_dictionary.find("DataField") data_type: Category = get_type(data_field) assert data_type.categories == ['setosa', 'versicolor', 'virginica'] assert not data_type.ordered