Example #1
0
def eml_data_table(tables: list) -> list:
    data_table = list()
    for table in tables:
        t = dict()
        t["Entity Name"] = clean(table.find("./entityName").xpath("string()"))
        description = table.find("./entityDescription")
        if description is not None:
            t["Entity Description"] = clean(description.xpath("string("))
        physical = table.findall(".//physical")
        if len(physical) > 0:
            t["Physical"] = eml_physical(physical)
        data_table.append(t)
        methods = table.find("./methods")
        if methods is not None:
            t["Methods"] = eml_methods(methods)
    return data_table
def eml_responsible_party(parties: list, position: bool) -> list:
    # list of element_objs
    responsible_party = list()
    for party in parties:
        individual_names = list()
        organization_names = list()
        position_names = list()
        _individual_names = party.findall("./individualName")
        for _individual_name in _individual_names:
            given_names = list()
            _given_names = _individual_name.findall("./givenName")
            for _given_name in _given_names:
                given_name = (clean(_given_name.xpath("string()"))).strip()
                given_names.append(given_name)
            _sur_name = _individual_name.find("./surName")
            sur_name = (clean(_sur_name.xpath("string()"))).strip()
            individual_name = {
                "sur_name": sur_name,
                "given_names": given_names
            }
            individual_names.append(individual_name)
        _organization_names = party.findall("./organizationName")
        for _organization_name in _organization_names:
            organization_name = (clean(
                _organization_name.xpath("string()"))).strip()
            organization_names.append(organization_name)
        _position_names = party.findall("./positionName")
        for _position_name in _position_names:
            position_name = (clean(_position_name.xpath("string()")))
            position_names.append(position_name)
        party_grp = {
            "individual_names": individual_names,
            "organization_names": organization_names,
            "position_names": position_names
        }
        responsible_party.append(party_grp)

        processed = process_responsible_party(responsible_party, position)

    return processed
Example #3
0
def eml_resource(element_obj) -> dict:
    resource = dict()

    resource["Title"] = clean(element_obj.find("./title").xpath("string()"))
    creator = element_obj.findall("./creator")
    resource["Creator"] = eml_responsible_party(creator, position=False)

    alternate_identifiers = element_obj.findall("./alternateIdentifier")
    alt_id = list()
    for alternate_identifier in alternate_identifiers:
        alt_id.append(clean(alternate_identifier.xpath("string()")))
    resource["Alternate Identifier"] = alt_id

    abstract = element_obj.find("./abstract")
    if abstract is not None:
        resource["Abstract"] = html_text(eml_text(abstract))

    intellectual_rights = element_obj.find("./intellectualRights")
    if intellectual_rights is not None:
        resource["Intellectual Rights"] = \
            html_text(eml_text(intellectual_rights))

    return resource
Example #4
0
def eml_physical(phys: list) -> list:
    physical = list()
    for phy in phys:
        p = dict()
        p["Object Name"] = phy.find("./objectName").text.strip()
        size = phy.find("./size")
        if size is not None:
            value = clean(size.xpath("string()"))
            unit = size.attrib["unit"].strip()
            p["Size"] = f"{value} ({unit})"
        checksums = phy.findall(".//authentication")
        if len(checksums) > 0:
            for checksum in checksums:
                c = list()
                value = clean(checksum.xpath("string()"))
                method = checksum.attrib["method"].strip()
                c.append(f"{value} ({method})")
            p["Checksum(s)"] = c
        compression_methods = phy.findall(".//compressionMethod")
        if len(compression_methods) > 0:
            for compression_method in compression_methods:
                c = list()
                c.append(clean(compression_method.xpath("string()")))
            p["Compression Method"] = c
        encoding_methods = phy.findall(".//encodingMethod")
        if len(encoding_methods) > 0:
            for encoding_method in encoding_methods:
                c = list()
                c.append(clean(encoding_method.xpath("string()")))
            p["Encoding Method"] = c
        character_encoding = phy.find("./characterEncoding")
        if character_encoding is not None:
            value = clean(character_encoding.xpath("string()"))
            p["Character Encoding"] = value
        p["Data Format"] = eml_data_format(phy.find("./dataFormat"))
        physical.append(p)
    return physical
Example #5
0
def eml_method_step(element_obj) -> dict:
    method_step = dict()
    description = element_obj.find("./description")
    method_step["Description"] = html_text(eml_text(description))
    instrumentation = element_obj.findall("./instrumentation")
    if len(instrumentation) > 0:
        i = list()
        for instrument in instrumentation:
            i.append(clean(instrument.xpath("string()")))
        method_step["Instrumentation"] = i
    substep = element_obj.findall("./subStep")
    if len(substep) > 0:
        s = list()
        for step in substep:
            s.append(eml_method_step(step))
        method_step["Sub Step(s)"] = s
    return method_step
Example #6
0
def eml_data_format(df) -> dict:
    data_format = dict()
    f = df.getchildren()[0]  # Can only be one child
    if f.tag == "textFormat":
        tf = dict()
        num_header_lines = f.find("./numHeaderLines")
        if num_header_lines is not None:
            tf["Header Lines"] = clean(num_header_lines.xpath("string()"))
        num_footer_lines = f.find("./numFooterLines")
        if num_footer_lines is not None:
            tf["Footer Lines"] = clean(num_footer_lines.xpath("string()"))
        record_delimiters = f.findall("./recordDelimiter")
        if record_delimiters is not None:
            rd = list()
            for record_delimiter in record_delimiters:
                rd.append(clean(record_delimiter.xpath("string()")))
            tf["Record Delimiter(s)"] = rd
        physical_line_delimiters = f.findall(".//physicalLineDelimiter")
        if len(physical_line_delimiters) > 0:
            pd = list()
            for physical_line_delimiter in physical_line_delimiters:
                pd.append(clean(physical_line_delimiter.xpath("string()")))
            tf["Physical Line Delimiter(s)"] = pd
        num_physical_lines_per_record = f.find("./numPhysicalLinesPerRecord")
        if num_physical_lines_per_record is not None:
            tf["Physical Lines Per Record"] = \
                clean(num_physical_lines_per_record.xpath("string()"))
        max_record_length = f.find("./maxRecordLength")
        if max_record_length is not None:
            tf["Maximum Record Length"] = \
                clean(max_record_length.xpath("string()"))
        attribute_orientation = f.find("./attributeOrientation")
        tf["Attribute Orientation"] = \
            clean(attribute_orientation.xpath("string()"))
        simple_delimited = f.find("./simpleDelimited")
        if simple_delimited is not None:
            td = dict()
            fd = list()
            field_delimiters = simple_delimited.findall("./fieldDelimiter")
            for field_delimiter in field_delimiters:
                fd.append(clean(field_delimiter.xpath("string()")))
            td["Field Delimiter"] = fd
            collapse_delimiters = simple_delimited.find("./collapseDelimiters")
            if collapse_delimiters is not None:
                td["Collapse Delimiters"] = \
                    clean(collapse_delimiters.xpath("string()"))
            quote_characters = simple_delimited.findall("./quoteCharacter")
            if quote_characters is not None:
                qc = list()
                for quote_character in quote_characters:
                    qc.append(clean(quote_character.xpath("string()")))
                td["Quote Character"] = qc
            literal_characters = simple_delimited.findall("./literalCharacter")
            if literal_characters is not None:
                lc = list()
                for literal_character in literal_characters:
                    lc.append(clean(literal_character.xpath("string()")))
                td["Literal Character"] = lc
        complex = f.find("./complex")
        if complex is not None:
            c = list()
            c_children = complex.getchildren()
            for c_child in c_children:
                if c_child == "textFixed":
                    tf = dict()
                    field_width = c_child.find("./fieldWidth")
                    tf["Field Width"] = clean(field_width.xpath("string()"))
                    line_number = c_child.find("./lineNumber")
                    if line_number is not None:
                        tf["Line Number"] = \
                            clean(line_number.xpath("string()"))
                    field_start_column = c_child.find("./fieldStartColumn")
                    if field_start_column is not None:
                        tf["Field Start Column"] = \
                            clean(field_start_column.xpath("string()"))
                    c.append({"textFixed": tf})
                else:  # c_child == "textDelimited"
                    td = dict()
                    field_delimiter = c_child.find("./fieldDelimiter")
                    td["Field Delimiter"] = \
                        clean(field_delimiter.xpath("string()"))
                    collapse_delimiters = \
                        c_child.find("./collapseDelimiters")
                    if collapse_delimiters is not None:
                        td["Collpase Delimiters"] = \
                            clean(collapse_delimiters.xpath("string()"))
                    line_number = c_child.find("./lineNumber")
                    if line_number is not None:
                        td["Line Number"] = clean(
                            line_number.xpath("string()"))
                    quote_characters = c_child.findall("./quoteCharacter")
                    if quote_characters is not None:
                        qc = list()
                        for quote_character in quote_characters:
                            qc.append(clean(quote_character.xpath("string()")))
                        td["Quote Character"] = qc
                    literal_characters = c_child.findall("./literalCharacter")
                    if literal_characters is not None:
                        lc = list()
                        for literal_character in literal_characters:
                            lc.append(
                                clean(literal_character.xpath("string()")))
                        td["Literal Character"] = lc
                    c.append({"Text Delimited": td})
        data_format["Text"] = tf
    elif f.tag == "externallyDefinedFormat":
        edf = dict()
        format_name = f.find("./formatName")
        edf["Format Name"] = clean(format_name.xpath("string()"))
        format_version = f.find("./formatVersion")
        if format_version is not None:
            edf["Format Version"] = clean(format_version.xpath("string()"))
        data_format[{"Externally Defined"}] = edf
    else:  # f.tag == binaryRasterFormat
        brf = dict()
        row_col_orientation = f.find("./rowColumnOrientation")
        brf["Row/Column Orientation"] = \
            clean(row_col_orientation.xpath("string()"))
        multi_band = f.find("./multiBand")
        if multi_band is not None:
            nbands = multi_band.find("./nbands")
            layout = multi_band.find("./layout")
            brf["Multi-band"] = {
                "Number of bands": clean(nbands.xpath("string()")),
                "Layout": clean(layout.xpath("string()"))
            }
        num_of_bits = f.find("./nbits")
        brf["Number of bits"] = clean(num_of_bits.xpath("string()"))
        byte_order = f.find("./byteorder")
        brf["Byte Order"] = clean(byte_order.xpath("string()"))
        skip_bytes = f.find("./skipbytes")
        if skip_bytes is not None:
            brf["Skip bytes"] = clean(skip_bytes.xpath("string()"))
        band_row_bytes = f.find("./bandrowbytes")
        if band_row_bytes is not None:
            brf["Band row bytes"] = clean(band_row_bytes.xpath("string()"))
        total_row_bytes = f.find("./totalrowbytes")
        if total_row_bytes is not None:
            brf["Total row bytes"] = clean(total_row_bytes.xpath("string()"))
        band_gap_bytes = f.find("./bandgapbytes")
        if band_gap_bytes is not None:
            brf["Band-gap bytes"] = clean(band_gap_bytes.xpath("string()"))
        data_format["Binary Raster"] = brf
    return data_format