Example #1
0
	def __init__(self, xslt_processor='lxml'):

		'''
		Initiates MODSMapper, with option of what XSLT processor to use.
			- lxml: faster, but does not provide XSLT 2.0 support (though the included stylesheet does not require)
			- pyjxslt: slower, but offers XSLT 2.0 support

		Args:
			xslt_processor (str)['lxml','pyjxslt']: Selects which XSLT processor to use.
		'''

		self.xslt_processor = xslt_processor
		self.xslt_filepath = '/opt/combine/inc/xslt/MODS_extract.xsl'

		if self.xslt_processor == 'lxml':

			# set xslt transformer
			xslt_tree = etree.parse(self.xslt_filepath)
			self.xsl_transform = etree.XSLT(xslt_tree)

		elif self.xslt_processor == 'pyjxslt':

			# prepare pyjxslt gateway
			self.gw = pyjxslt.Gateway(6767)
			with open(self.xslt_filepath,'r') as f:
				self.gw.add_transform('xslt_transform', f.read())
Example #2
0
class XMLToJsonTestCase(unittest.TestCase):
    # Just a quick test as the actual transform is tested elsewhere.  Our job is just to make sure
    # that we get what we expect through the gateway
    gw = pyjxslt.Gateway()
    if not gw.gateway_connected(reconnect=False):
        print("Gateway must be running on port 25333")

    def compare_jsons(self, json1, json2):
        json1d = json.loads(json1)
        try:
            json2d = json.loads(json2)
        except json.JSONDecodeError as e:
            print(str(e))
            return False
        success, txt = dict_compare(json1d, json2d)
        if not success:
            print(txt)
        return success

    def test1(self):
        self.assertTrue(
            self.compare_jsons(expected_json, self.gw.to_json(xml1)))
        self.assertEqual(expected_bad, self.gw.to_json(bad_xml))
        self.assertTrue(
            self.compare_jsons(
                expected_pi, self.gw.to_json(xml_with_processing_instruction)))
Example #3
0
        def transform_xml_udf(job_id, row, xslt_string):

            # attempt transformation and save out put to 'document'
            try:

                # transform with pyjxslt gateway
                gw = pyjxslt.Gateway(6767)
                gw.add_transform('xslt_transform', xslt_string)
                result = gw.transform('xslt_transform', row.document)
                gw.drop_transform('xslt_transform')

                # set trans_result tuple
                trans_result = (result, '', 1)

            # catch transformation exception and save exception to 'error'
            except Exception as e:
                # set trans_result tuple
                trans_result = ('', str(e), 0)

            # return Row
            return Row(record_id=row.record_id,
                       document=trans_result[0],
                       error=trans_result[1],
                       job_id=int(job_id),
                       oai_set=row.oai_set,
                       success=trans_result[2])
Example #4
0
    def _transform_xslt(self, row):

        try:

            # attempt to parse xslt prior to submitting to pyjxslt
            try:
                etree.fromstring(self.payload.encode('utf-8'))
            except Exception as err:
                return str(err)

            # transform with pyjxslt gateway
            gateway = pyjxslt.Gateway(6767)
            gateway.add_transform('xslt_transform', self.payload)
            result = gateway.transform('xslt_transform', row.document)
            gateway.drop_transform('xslt_transform')

            # return
            return result

        except Exception as err:
            return str(err)
Example #5
0
 def test_gw_down(self):
     gw = pyjxslt.Gateway(port=23456)  # a non-existent port
     self.assertIsNone(gw.to_json(xml1))
Example #6
0
class TestGateway(unittest.TestCase):
    gw = pyjxslt.Gateway()

    def testSimple(self):
        self.gw.add_transform('k1', xsl1)
        self.assertEqual(
            """<?xml version="1.0" encoding="UTF-8"?>
ENTRY: 17:FOO
ENTRY: 42:BAR""", self.gw.transform('k1', xml1))

    def testParms(self):
        self.gw.add_transform('k2', xsl2)
        self.assertEqual(
            """<?xml version="1.0" encoding="UTF-8"?>
Parm1: DEVEL
Parm2: 42
ENTRY: 17:FOO
ENTRY: 42:BAR""", self.gw.transform('k2', xml1, p2=42))
        self.assertEqual(
            """<?xml version="1.0" encoding="UTF-8"?>
Parm1: PROD
Parm2: 42
ENTRY: 17:FOO
ENTRY: 42:BAR""", self.gw.transform('k2', xml1, p2=42, p1="PROD"))

    def testFile(self):
        self.gw.add_transform('k3', os.path.join('data', 'file3.xsl'))
        self.assertEqual(
            """<?xml version="1.0" encoding="UTF-8"?>
ENTRY: 17:FOO
ENTRY: 42:BAR""", self.gw.transform('k3', xml1))
        self.gw.add_transform('k3', os.path.join('data', 'file3a.xsl'))
        self.assertEqual(
            """<?xml version="1.0" encoding="UTF-8"?>
entry: 17:FOO
entry: 42:BAR""", self.gw.transform('k3', xml1))

    def testReplace(self):
        self.gw.add_transform('k4', xsl1)
        self.assertEqual(
            """<?xml version="1.0" encoding="UTF-8"?>
ENTRY: 17:FOO
ENTRY: 42:BAR""", self.gw.transform('k4', xml1))
        self.gw.add_transform('k4', xsl2)
        self.assertEqual(
            """<?xml version="1.0" encoding="UTF-8"?>
Parm1: DEVEL
Parm2: 17
ENTRY: 17:FOO
ENTRY: 42:BAR""", self.gw.transform('k4', xml1))

    def testBadXSL(self):
        self.assertRaises(ValueError, self.gw.add_transform, *('e1', xsl3))
        self.assertIsNone(self.gw.add_transform('e2', xsl4))
        self.gw.add_transform('e2', xsl2)
        self.assertEqual(
            """<?xml version="1.0" encoding="UTF-8"?>
Parm1: DEVEL
Parm2: 17
ENTRY: 17:FOO
ENTRY: 42:BAR""", self.gw.transform('e2', xml1))

    def testBadXML(self):
        self.gw.add_transform('k1', xsl1)
        self.assertTrue(self.gw.transform('k1', xml2).startswith('ERROR:'))