def __init__(self, xslt_processor='lxml'): ''' Initiates MODSMapper, with option of what XSLT processor to use. - lxml: faster, but does not provide XSLT 2.0 support (though the included stylesheet does not require) - pyjxslt: slower, but offers XSLT 2.0 support Args: xslt_processor (str)['lxml','pyjxslt']: Selects which XSLT processor to use. ''' self.xslt_processor = xslt_processor self.xslt_filepath = '/opt/combine/inc/xslt/MODS_extract.xsl' if self.xslt_processor == 'lxml': # set xslt transformer xslt_tree = etree.parse(self.xslt_filepath) self.xsl_transform = etree.XSLT(xslt_tree) elif self.xslt_processor == 'pyjxslt': # prepare pyjxslt gateway self.gw = pyjxslt.Gateway(6767) with open(self.xslt_filepath,'r') as f: self.gw.add_transform('xslt_transform', f.read())
class XMLToJsonTestCase(unittest.TestCase): # Just a quick test as the actual transform is tested elsewhere. Our job is just to make sure # that we get what we expect through the gateway gw = pyjxslt.Gateway() if not gw.gateway_connected(reconnect=False): print("Gateway must be running on port 25333") def compare_jsons(self, json1, json2): json1d = json.loads(json1) try: json2d = json.loads(json2) except json.JSONDecodeError as e: print(str(e)) return False success, txt = dict_compare(json1d, json2d) if not success: print(txt) return success def test1(self): self.assertTrue( self.compare_jsons(expected_json, self.gw.to_json(xml1))) self.assertEqual(expected_bad, self.gw.to_json(bad_xml)) self.assertTrue( self.compare_jsons( expected_pi, self.gw.to_json(xml_with_processing_instruction)))
def transform_xml_udf(job_id, row, xslt_string): # attempt transformation and save out put to 'document' try: # transform with pyjxslt gateway gw = pyjxslt.Gateway(6767) gw.add_transform('xslt_transform', xslt_string) result = gw.transform('xslt_transform', row.document) gw.drop_transform('xslt_transform') # set trans_result tuple trans_result = (result, '', 1) # catch transformation exception and save exception to 'error' except Exception as e: # set trans_result tuple trans_result = ('', str(e), 0) # return Row return Row(record_id=row.record_id, document=trans_result[0], error=trans_result[1], job_id=int(job_id), oai_set=row.oai_set, success=trans_result[2])
def _transform_xslt(self, row): try: # attempt to parse xslt prior to submitting to pyjxslt try: etree.fromstring(self.payload.encode('utf-8')) except Exception as err: return str(err) # transform with pyjxslt gateway gateway = pyjxslt.Gateway(6767) gateway.add_transform('xslt_transform', self.payload) result = gateway.transform('xslt_transform', row.document) gateway.drop_transform('xslt_transform') # return return result except Exception as err: return str(err)
def test_gw_down(self): gw = pyjxslt.Gateway(port=23456) # a non-existent port self.assertIsNone(gw.to_json(xml1))
class TestGateway(unittest.TestCase): gw = pyjxslt.Gateway() def testSimple(self): self.gw.add_transform('k1', xsl1) self.assertEqual( """<?xml version="1.0" encoding="UTF-8"?> ENTRY: 17:FOO ENTRY: 42:BAR""", self.gw.transform('k1', xml1)) def testParms(self): self.gw.add_transform('k2', xsl2) self.assertEqual( """<?xml version="1.0" encoding="UTF-8"?> Parm1: DEVEL Parm2: 42 ENTRY: 17:FOO ENTRY: 42:BAR""", self.gw.transform('k2', xml1, p2=42)) self.assertEqual( """<?xml version="1.0" encoding="UTF-8"?> Parm1: PROD Parm2: 42 ENTRY: 17:FOO ENTRY: 42:BAR""", self.gw.transform('k2', xml1, p2=42, p1="PROD")) def testFile(self): self.gw.add_transform('k3', os.path.join('data', 'file3.xsl')) self.assertEqual( """<?xml version="1.0" encoding="UTF-8"?> ENTRY: 17:FOO ENTRY: 42:BAR""", self.gw.transform('k3', xml1)) self.gw.add_transform('k3', os.path.join('data', 'file3a.xsl')) self.assertEqual( """<?xml version="1.0" encoding="UTF-8"?> entry: 17:FOO entry: 42:BAR""", self.gw.transform('k3', xml1)) def testReplace(self): self.gw.add_transform('k4', xsl1) self.assertEqual( """<?xml version="1.0" encoding="UTF-8"?> ENTRY: 17:FOO ENTRY: 42:BAR""", self.gw.transform('k4', xml1)) self.gw.add_transform('k4', xsl2) self.assertEqual( """<?xml version="1.0" encoding="UTF-8"?> Parm1: DEVEL Parm2: 17 ENTRY: 17:FOO ENTRY: 42:BAR""", self.gw.transform('k4', xml1)) def testBadXSL(self): self.assertRaises(ValueError, self.gw.add_transform, *('e1', xsl3)) self.assertIsNone(self.gw.add_transform('e2', xsl4)) self.gw.add_transform('e2', xsl2) self.assertEqual( """<?xml version="1.0" encoding="UTF-8"?> Parm1: DEVEL Parm2: 17 ENTRY: 17:FOO ENTRY: 42:BAR""", self.gw.transform('e2', xml1)) def testBadXML(self): self.gw.add_transform('k1', xsl1) self.assertTrue(self.gw.transform('k1', xml2).startswith('ERROR:'))