def test_extract_pair(): """Sequences of EXTRACT nodes should get joined""" with XMLBuilder("ROOT") as ctx: ctx.TAG1() ctx.EXTRACT("contents1") with ctx.EXTRACT("contents2"): ctx.TAG2() ctx.TAG3() ctx.EXTRACT("contents3") ctx.TAG4() ctx.EXTRACT("contents4") contents = "contents1\ncontents2<TAG2/><TAG3/>" with XMLBuilder("ROOT") as ctx2: ctx2.TAG1() ctx2.child_from_string('<EXTRACT>{0}</EXTRACT>'.format(contents)) ctx2.EXTRACT("contents3") # First pass will only merge one ctx2.TAG4() ctx2.EXTRACT("contents4") assert preprocessors.ExtractTags().extract_pair(ctx.xml[1]) assert ctx2.xml_str == ctx.xml_str contents += "\ncontents3" with XMLBuilder("ROOT") as ctx3: ctx3.TAG1() ctx3.child_from_string('<EXTRACT>{0}</EXTRACT>'.format(contents)) ctx3.TAG4() ctx3.EXTRACT("contents4") assert preprocessors.ExtractTags().extract_pair(ctx.xml[1]) assert ctx3.xml_str == ctx.xml_str
def test_sandwich_last_tag(): """For sandwich to be triggered, EXTRACT tag can't be the last tag""" with XMLBuilder("ROOT") as ctx: ctx.GPOTABLE() ctx.EXTRACT() original = ctx.xml_copy() assert not preprocessors.ExtractTags().sandwich(ctx.xml[1]) assert ctx.xml_str == etree.tounicode(original)
def test_extract_pair_last_node(): """XML shouldn't be modified when the EXTRACT is the last element""" with XMLBuilder("ROOT") as ctx: ctx.TAG1() ctx.EXTRACT("contents") original = ctx.xml_copy() assert not preprocessors.ExtractTags().extract_pair(ctx.xml[1]) assert ctx.xml_str == etree.tounicode(original)
def test_extract_pair_not_pair(): """XML shouldn't be modified and should get a negative response if this pattern isn't present""" with XMLBuilder("ROOT") as ctx: ctx.EXTRACT("contents") ctx.TAG1() original = ctx.xml_copy() assert not preprocessors.ExtractTags().extract_pair(ctx.xml[0]) assert ctx.xml_str == etree.tounicode(original)
def test_sandwich_bad_filling(): """For sandwich to be triggered, EXTRACT tags need to surround one of a handful of specific tags""" with XMLBuilder("ROOT") as ctx: ctx.EXTRACT() ctx.P() ctx.EXTRACT() original = ctx.xml_copy() assert not preprocessors.ExtractTags().sandwich(ctx.xml[0]) assert ctx.xml_str == etree.tounicode(original)
def test_sandwich_no_bread(): """For sandwich to be triggered, EXTRACT tags need to be separated by only one tag""" with XMLBuilder("ROOT") as ctx: ctx.EXTRACT() ctx.GPOTABLE() ctx.GPOTABLE() ctx.EXTRACT() original = ctx.xml_copy() assert not preprocessors.ExtractTags().sandwich(ctx.xml[0]) assert ctx.xml_str == etree.tounicode(original)
def test_sandwich(): """When the correct tags are separated by EXTRACTs, they should get merged""" with XMLBuilder("ROOT") as ctx: ctx.TAG1() ctx.EXTRACT("extract contents") ctx.GPOTABLE("table contents") ctx.EXTRACT() with XMLBuilder("ROOT") as ctx2: ctx2.TAG1() ctx2.child_from_string( '<EXTRACT>extract contents\n<GPOTABLE>table contents' '</GPOTABLE></EXTRACT>') ctx2.EXTRACT() assert preprocessors.ExtractTags().sandwich(ctx.xml[1]) assert ctx.xml_str == ctx2.xml_str
def setUp(self): super(ExtractTagsTests, self).setUp() self.et = preprocessors.ExtractTags()