def test_xml2dict_invalid_xml(): """invalid XML raises ParseError""" xml = "<tag>1</ta>" with pytest.raises(ET.ParseError): d = xml2dict(xml)
def test_xml2dict_smoke(): """Simple smoke test - just an example""" xml = """ <root> <tag1>1</tag1> <nested> <tagn>111</tagn> <tagn>222</tagn> </nested> </root>""" d = xml2dict(xml) # root assert len(d.items()) == 1 assert "root" in d.keys() # first level assert len(d["root"].items()) == 2 assert "tag1" in d["root"].keys() assert "nested" in d["root"].keys() assert d["root"]["tag1"] == "1" # second level assert len(d["root"]["nested"].items()) == 1 assert "tagn" in d["root"]["nested"].keys() assert len(d["root"]["nested"]["tagn"]) == 2 assert d["root"]["nested"]["tagn"][0] == "111" assert d["root"]["nested"]["tagn"][1] == "222"
def test_xml2dict_value_is_case_sensitive(): """values are of course case sensitive""" xml = "<tag>Val</tag>" d = xml2dict(xml) assert d["tag"] == "Val"
def test_xml2dict_value_is_striped(): """values in result are stripped""" xml = "<tag>\t1 \n</tag>" d = xml2dict(xml) assert d["tag"] == "1"
def test_xml2dict_value_is_string(): """every value in result is string""" xml = "<tag>1</tag>" d = xml2dict(xml) assert type(d["tag"]) is str
def test_xml2dict_tag_is_case_sensitive(): """tags are case sensitive""" xml = "<tag>1</tag>" d = xml2dict(xml) assert "tag" in d.keys() assert "TAG" not in d.keys()
def test_xml2dict_empty(): """working with empty XML""" xml = " \t\n" d = xml2dict(xml) # empty dict assert type(d) is dict assert len(d.items()) == 0
def test_xml2dict_real(): """we are dealing with XML from QVD file - here is shortened example just to show real things""" xml = """<?xml version="1.0" encoding="UTF-8" standalone="yes"?> <QvdTableHeader> <QvBuildNo>7314</QvBuildNo> <Fields> <QvdFieldHeader> <FieldName>ID</FieldName> </QvdFieldHeader> <QvdFieldHeader> <FieldName>NAME</FieldName> </QvdFieldHeader> </Fields> </QvdTableHeader>""" d = xml2dict(xml) assert d["QvdTableHeader"]["Fields"]["QvdFieldHeader"][0]["FieldName"] == "ID" assert d["QvdTableHeader"]["Fields"]["QvdFieldHeader"][1]["FieldName"] == "NAME"
def __init__(self, name): """ QvdFile object has two mutually exclusive modes - we either read existing file or create new one During init we determine what mode we are in and for that we use file extension: - if it is ".qvd" then we are reading - if it is ".xml" then we are creating new QVD file with the stucture defined in XML XML file has exactly the same structure as metadata section of QVD file, there is a tool which extracts metadata section from QVD file and clears necessary data (e.g. number of rows is unknown), this tool is also capable of creating XML template with just one field. """ self.mode = os.O_RDONLY if os.path.split(name)[1].split( '.')[1].lower() == 'qvd' else os.O_WRONLY """ We never ever want to erase exisitng QVD file, so we check QVD file presence and raise exception if it exists """ if self.mode == os.O_WRONLY and os.access( name.split('.')[0] + '.qvd', os.R_OK): raise FileExistsError f = os.open(name, os.O_RDONLY) bta = bytearray() buf = os.read(f, 100000) while buf: # read file in chunks, looking for end of metadata bta.extend(buf) if buf.find(b'</QvdTableHeader>') > 0: break buf = os.read(f, 100000) else: # malformed QVD file, raise exception raise BadFormat buf = bytes(bta) self.fName = name if self.mode == os.O_RDONLY: self.fp = f # we need QVD file to be opened - we are going to read from it else: # in the case of creating new QVD we do not need XML eny longer self.fp = None f.close() # form metadata bytes xml = buf.split(b'</QvdTableHeader>')[0] + b'\n</QvdTableHeader>' if self.mode == os.O_RDONLY: self.stPos = len(xml) self.xml = None # do not need XML in case of reading # there might be some symbols in the end of metadata - skip them os.lseek(f, self.stPos, 0) while True: b = os.read(f, 1) if b not in [b'\r', b'\n', b'\0']: break self.stPos += 1 else: self.xml = xml # save metadata - we need them for the metadata section (in the case of creating new QVD) self.stPos = 0 # this will be known at the moment of writing file, now just something # convert XML to dict and add "shortcuts" self.root = xml2dict(b'<QvdTableHeader>' + xml.split(b'<QvdTableHeader>')[1]) self.attribs = self.root[ "QvdTableHeader"] # dict of attributes - save some typing self.fields = self.root["QvdTableHeader"]["Fields"][ "QvdFieldHeader"] # list of fields - save typing # some configurable staff self.NoneValueStr = "(None)" # value of the field with 0 values (if requested)