def test_xml2(self): """Test filled with multiple attributes and a text node""" xml_text = """<?xml version="1.0" encoding="utf-8"?> <Root> <record> <field name="Country or Area"> <key>USA</key> United States </field> </record> </Root> """ xml_set = import_xml(io.StringIO(xml_text)) expected = \ Set(Couplet('Root', Set(Couplet('record', Set(Couplet('field', Set( Couplet('$', 'United States'), Couplet('key', 'USA'), Couplet('name', 'Country or Area') ) )) )) )) self.assertEqual(xml_set, expected)
def get_nations(regionname): """Execute the equivalent of the following XQuery statement and convert the XML into a clan: for $x in doc("regions.xml")/regions/region[name="MIDDLE EAST"]/nation return <nation>{$x/nationkey}<nationname>{data($x/name)}</nationname></nation> """ timer = FunctionTimer() short_prints = True # Load the XML document. (Don't use multiplicity or sequence; our data doesn't require this.) regions = xml.import_xml("regions.xml", convert_numerics=True) timer.lap("regions", short=short_prints) # Get a clan where each region is a row. regions_clan = regions("regions")["region"] timer.lap("regions_clan", short=short_prints) # Filter this clan down to the region of interest (name is `regionname`). target_region = clans.superstrict(regions_clan, clans.from_dict({"name": regionname})) timer.lap("target_region", short=short_prints) # Get all 'nation' lefts out of this clan and create a clan where every row is a nation's data. nations_clan = target_region["nation"] timer.lap("nations_clan", short=short_prints) # Rename 'name' to 'nationname' and project 'nationkey' and 'nationname' (removing 'comment'). nations = clans.compose(nations_clan, clans.from_dict({"nationkey": "nationkey", "nationname": "name"})) timer.end("nations", short=short_prints) return nations
def get_nations(regionname): """Execute the equivalent of the following XQuery statement and convert the XML into a clan: for $x in doc("regions.xml")/regions/region[name="MIDDLE EAST"]/nation return <nation>{$x/nationkey}<nationname>{data($x/name)}</nationname></nation> """ timer = FunctionTimer() short_prints = True # Load the XML document. (Don't use multiplicity or sequence; our data doesn't require this.) regions = xml.import_xml('regions.xml', convert_numerics=True) timer.lap('regions', short=short_prints) # Get a clan where each region is a row. regions_clan = regions('regions')['region'] timer.lap('regions_clan', short=short_prints) # Filter this clan down to the region of interest (name is `regionname`). target_region = clans.superstrict(regions_clan, clans.from_dict({'name': regionname})) timer.lap('target_region', short=short_prints) # Get all 'nation' lefts out of this clan and create a clan where every row is a nation's data. nations_clan = target_region['nation'] timer.lap('nations_clan', short=short_prints) # Rename 'name' to 'nationname' and project 'nationkey' and 'nationname' (removing 'comment'). nations = clans.compose(nations_clan, clans.from_dict({'nationkey': 'nationkey', 'nationname': 'name'})) timer.end('nations', short=short_prints) return nations
def test_xml(self): xml_text = """<?xml version="1.0"?> <employee id="0034404"> <name first="john" last="doe"></name> <project lead="Foo"> <name>GAaaS</name> </project> <project name="Data Algebra"> <lead>Bar</lead> </project> </employee> """ xml_set = import_xml(io.StringIO(xml_text)) json_text = """ { "employee": { "id": "0034404", "name": { "first": "john", "last": "doe" }, "project": [ { "lead": "Foo", "name": "GAaaS" }, { "name": "Data Algebra", "lead": "Bar" } ] } } """ json_set = import_json(io.StringIO(json_text)) self.assertEqual(json_set, xml_set) from algebraixlib.algebras.relations import compose c = next(iter(xml_set)) self.assertEqual(Atom('employee'), c.left) self.assertEqual(4, len(c.right)) cid = compose(c.right, Set(Couplet('id', 'id'))) self.assertEqual(cid, Set(Couplet('id', '0034404'))) name = next(iter(compose(c.right, Set(Couplet('name', 'name'))))) self.assertEqual(name.right, Set([Couplet('first', 'john'), Couplet('last', 'doe')])) projects = compose(c.right, Set(Couplet('project', 'project'))) self.assertEqual(2, len(projects)) ep = Set([Couplet('project', Set([Couplet('name', x['name']), Couplet('lead', x['lead'])])) for x in [{'name': 'GAaaS', 'lead': 'Foo'}, {'lead': 'Bar', 'name': 'Data Algebra'}]]) self.assertEqual(projects, ep)
# algebraixlib is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License along with algebraixlib. # If not, see <http://www.gnu.org/licenses/>. # -------------------------------------------------------------------------------------------------- # Show the xml data, truncated, prettied from algebraixlib.io.xml import xml_to_str pretty_xml = xml_to_str('TPC-H_Query5/regions.xml') print(pretty_xml) from algebraixlib.util.mathobjectprinter import mo_to_str from algebraixlib.io.xml import import_xml regions_document = import_xml('TPC-H_Query5/regions.xml', convert_numerics=True) print('regions_document:\n' + mo_to_str(regions_document)) # Get all regions # /regions/region regions = regions_document('regions') print('regions_relation:\n' + mo_to_str(regions)) regions = regions_document('regions')['region'] print('regions:\n' + mo_to_str(regions)) # Get all region keys # /regions/region/regionkey region_keys = regions['regionkey'] print('region_keys:\n' + mo_to_str(region_keys))
def test_xml(self): xml_text = """<?xml version="1.0"?> <employee id="0034404"> <name first="john" last="doe"></name> <project lead="Foo"> <name>GAaaS</name> </project> <project name="Data Algebra"> <lead>Bar</lead> </project> </employee> """ xml_set = import_xml(io.StringIO(xml_text)) json_text = """ { "employee": { "id": "0034404", "name": { "first": "john", "last": "doe" }, "project": [ { "lead": "Foo", "name": "GAaaS" }, { "name": "Data Algebra", "lead": "Bar" } ] } } """ json_set = import_json(io.StringIO(json_text)) self.assertEqual(json_set, xml_set) from algebraixlib.algebras.relations import compose c = next(iter(xml_set)) self.assertEqual(Atom('employee'), c.left) self.assertEqual(4, len(c.right)) cid = compose(c.right, Set(Couplet('id', 'id'))) self.assertEqual(cid, Set(Couplet('id', '0034404'))) name = next(iter(compose(c.right, Set(Couplet('name', 'name'))))) self.assertEqual( name.right, Set([Couplet('first', 'john'), Couplet('last', 'doe')])) projects = compose(c.right, Set(Couplet('project', 'project'))) self.assertEqual(2, len(projects)) ep = Set([ Couplet( 'project', Set([Couplet('name', x['name']), Couplet('lead', x['lead'])])) for x in [{ 'name': 'GAaaS', 'lead': 'Foo' }, { 'lead': 'Bar', 'name': 'Data Algebra' }] ]) self.assertEqual(projects, ep)