def parse_flatfilenames(path, reference): """Returns the @name attribute for each flatFile whose @definitionReference attribute value matches the supplied value. """ root = readfile(path).getroot() for flatfile in iter_flatfiles(root): if parse_reference(flatfile) == reference: flatfilename = parse_name(flatfile) yield flatfilename
def test_iter_flatfiles(): """Test iter_flatfiles by asserting that only the relevant sections are iterated through from the testdata. """ file1 = f.definition_elems('flatFile', 'file1', reference='def1') file2 = f.definition_elems('flatFile', 'file2', reference='def2') file3 = f.definition_elems('flatFile', 'file3', reference='def3') def1 = f.definition_elems('flatFileDefinition', 'def1', reference='type1') def2 = f.definition_elems('flatFileDefinition', 'def2', reference='type2') xml = a.addml(child_elements=[file1, file2, file3, def1, def2]) i = 0 for iter_elem in f.iter_flatfiles(xml): i = i + 1 assert iter_elem.get('name') == 'file' + six.text_type(i) assert i == 3
def create_new_addml(root, flatfiledefinition): """Creates new addml metadata for each flatFileDefinition in the original addml metadata. Only the relevant sections from flatFiles, flatFileTypes and recordTypes are included in the new addml metadata as well as the fieldTypes section. The sections relevance is derived from reading the corresponding @typeReference and @name attributes from each section starting from the <flatFileDefinition> element. """ flatfiles_list = [] namereference = parse_name(flatfiledefinition) for flatfile in iter_flatfiles(root): if parse_reference(flatfile) == namereference: flatfiles_list.append(copy.deepcopy(flatfile)) typereference = parse_reference(flatfiledefinition) flatfiledefinitions = wrapper_elems( 'flatFileDefinitions', child_elements=[copy.deepcopy(flatfiledefinition)]) flatfiles_list.append(flatfiledefinitions) structuretypes_list = [] flatfiletype = find_section_by_name(root, 'flatFileType', typereference) flatfiletypes = wrapper_elems('flatFileTypes', child_elements=[copy.deepcopy(flatfiletype)]) structuretypes_list.append(flatfiletypes) for recorddefinition in iter_sections(flatfiledefinitions, 'recordDefinition'): if parse_reference(recorddefinition): recordtype = find_section_by_name( root, 'recordType', parse_reference(recorddefinition)) recordtypes = wrapper_elems( 'recordTypes', child_elements=[copy.deepcopy(recordtype)]) structuretypes_list.append(recordtypes) for fieldtypes in iter_sections(root, 'fieldTypes'): structuretypes_list.append(copy.deepcopy(fieldtypes)) structuretypes = wrapper_elems('structureTypes', child_elements=structuretypes_list) flatfiles_list.append(structuretypes) flatfiles = wrapper_elems('flatFiles', child_elements=flatfiles_list) addmldata = addml(child_elements=[flatfiles]) return addmldata
def get_charset_with_filename(path, filename): """Returns the charset from the ADDML data for a given file. The filename is matched against the @name attribute for each flatFile element and the correct charset is returned from the correct flatFileType section that matches the flatFile. """ root = readfile(path).getroot() for flatfile in iter_flatfiles(root): if parse_name(flatfile) == filename: def_reference = parse_reference(flatfile) definition = find_section_by_name(root, 'flatFileDefinition', def_reference) type_reference = parse_reference(definition) flatfiletype = find_section_by_name(root, 'flatFileType', type_reference) charset = 'charset=%s' % parse_charset(flatfiletype) return charset return None
def test_parse_flatfiledefinitions_medium(): """Tests the parse_flatfiledefinitions function by supplying testdata to the function and asserting that the correct number of ADDML sections are returned per addmldata. Also asserts that the correct number of addml data files are returned and that the sections have correct name attributes. """ addml = 'tests/data/addml_medium.xml' i = 0 for addmls in s.parse_flatfiledefinitions(addml): i = i + 1 assert f.flatfile_count(addmls) == 1 assert f.flatfiledefinition_count(addmls) == 1 assert a.sections_count(addmls, 'flatFileType') == 1 assert a.sections_count(addmls, 'recordType') == 1 assert a.sections_count(addmls, 'fieldTypes') == 1 for flatfile in f.iter_flatfiles(addmls): assert \ a.parse_name(flatfile) == 'csvfile' + six.text_type(i) + '.csv' assert i == 3