Exemple #1
0
    def test_replace_child(self):
        individual_name = Node(names.INDIVIDUALNAME)
        sur_name_1 = Node(names.SURNAME, parent=individual_name)
        sur_name_1.content = 'Gaucho'
        individual_name.add_child(sur_name_1)
        sur_name_2 = Node(names.SURNAME, parent=individual_name)
        sur_name_2.content = 'Carroll'
        self.assertIn(sur_name_1, individual_name.children)
        self.assertNotIn(sur_name_2, individual_name.children)
        individual_name.replace_child(old_child=sur_name_1,
                                      new_child=sur_name_2)
        self.assertIn(sur_name_2, individual_name.children)
        self.assertNotIn(sur_name_1, individual_name.children)

        # Test for old child removal from node store
        self.assertNotIn(sur_name_1.id, Node.store)

        # Test for child node type mismatch
        given_name = Node(names.GIVENNAME)
        given_name.content = 'Chase'
        try:
            individual_name.replace_child(old_child=sur_name_2,
                                          new_child=given_name)
        except ValueError as e:
            self.assertIsNotNone(e)
Exemple #2
0
def _process_element(e, clean, literals) -> Node:
    """
    Process an lxml etree element into a Metapype node. If the clean attribute is true, then
    remove leading and trailing whitespace from the element content.

    Args:
        e: lxml etree element
        clean: boolean to clean leading and trailing whitespace from node content
        literals: tuple of XML elements whose content should not be altered

    Returns: Node

    """
    tag = e.tag[e.tag.find("}") + 1:]  # Remove any prepended namespace

    node = Node(tag)
    node.nsmap = e.nsmap
    node.prefix = e.prefix

    if clean:
        if e.text is not None:
            if tag in literals:
                node.content = e.text
            else:
                # if text consists entirely of one or more spaces and/or non-breaking spaces, keep it
                if re.search("^[ \xA0]+$", e.text):
                    node.content = e.text
                else:
                    node.content = None if e.text.strip() == '' else " ".join(
                        e.text.split())
        if e.tail is not None:
            # if tail consists entirely of one or more spaces and/or non-breaking spaces, keep it
            if re.search("^[ \xA0]+$", e.tail):
                node.tail = e.tail
            else:
                node.tail = None if e.tail.strip() == '' else " ".join(
                    e.tail.split())
    else:
        node.content = e.text
        node.tail = e.tail

    for name, value in e.attrib.items():
        if "{" not in name:
            node.add_attribute(name, value)
        else:
            nsname = _format_extras(name, node.nsmap)
            node.add_extras(nsname, value)

    for _ in e:
        if _.tag is not etree.Comment:
            node.add_child(_process_element(_, clean, literals))
    for child in node.children:
        child.parent = node
        if child.nsmap == node.nsmap:
            child.nsmap = node.nsmap  # Map to single instance of nsmap
    return node
Exemple #3
0
def load_other_entity(dataset_node: Node = None,
                      uploads_path: str = None,
                      data_file: str = ''):
    full_path = f'{uploads_path}/{data_file}'

    other_entity_node = Node(names.OTHERENTITY, parent=dataset_node)
    add_child(dataset_node, other_entity_node)

    physical_node = Node(names.PHYSICAL, parent=other_entity_node)
    add_child(other_entity_node, physical_node)
    physical_node.add_attribute('system', 'EDI')

    entity_name_node = Node(names.ENTITYNAME, parent=other_entity_node)
    add_child(other_entity_node, entity_name_node)
    entity_name = entity_name_from_data_file(data_file)
    entity_name_node.content = entity_name

    object_name_node = Node(names.OBJECTNAME, parent=physical_node)
    add_child(physical_node, object_name_node)
    object_name_node.content = data_file

    file_size = get_file_size(full_path)
    if file_size is not None:
        size_node = Node(names.SIZE, parent=physical_node)
        add_child(physical_node, size_node)
        size_node.add_attribute('unit', 'byte')
        size_node.content = str(file_size)

    md5_hash = get_md5_hash(full_path)
    if md5_hash is not None:
        hash_node = Node(names.AUTHENTICATION, parent=physical_node)
        add_child(physical_node, hash_node)
        hash_node.add_attribute('method', 'MD5')
        hash_node.content = str(md5_hash)

    data_format_node = Node(names.DATAFORMAT, parent=physical_node)
    add_child(physical_node, data_format_node)

    externally_defined_format_node = Node(names.EXTERNALLYDEFINEDFORMAT,
                                          parent=data_format_node)
    add_child(data_format_node, externally_defined_format_node)

    format_name_node = Node(names.FORMATNAME,
                            parent=externally_defined_format_node)
    add_child(externally_defined_format_node, format_name_node)
    format_name_node.content = format_name_from_data_file(data_file)

    entity_type_node = new_child_node(names.ENTITYTYPE,
                                      parent=other_entity_node)
    entity_type_node.content = format_name_from_data_file(data_file)

    delete_data_files(uploads_path)

    return other_entity_node
Exemple #4
0
def test_validate_annotation():
    annotation = Node(names.ANNOTATION)
    property_uri = Node(names.PROPERTYURI)
    property_uri.content = "http://purl.obolibrary.org/obo/IAO_0000136"
    property_uri.add_attribute("label", "some property label")
    annotation.add_child(property_uri)
    value_uri = Node(names.VALUEURI)
    value_uri.content = "http://purl.obolibrary.org/obo/IAO_0000136"
    value_uri.add_attribute("label", "some value label")
    annotation.add_child(value_uri)
    validate.tree(annotation)
Exemple #5
0
def to_xml(node: Node, level: int = 0) -> str:
    xml = ""
    closed = False
    boiler = ('xmlns:eml="https://eml.ecoinformatics.org/eml-2.2.0" '
              'xmlns:stmml="http://www.xml-cml.org/schema/stmml-1.2" '
              'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
              'xsi:schemaLocation="https://eml.ecoinformatics.org/eml-2.2.0 '
              'https://nis.lternet.edu/schemas/EML/eml-2.2.0/xsd/eml.xsd"')
    name = node.name
    attributes = ""
    for attribute in node.attributes:
        attributes += ' {0}="{1}"'.format(attribute,
                                          node.attributes[attribute])
    if level == 0:
        indent = ""
        if name == "eml":
            name = node.name + ":" + node.name
            attributes += " " + boiler
    else:
        indent = space * level
    open_tag = "<" + name + attributes + ">"
    close_tag = "</" + name + ">"
    xml += indent + open_tag
    if node.content is not None:
        if isinstance(node.content, str):
            # if it hasn't been escaped already, escape it
            if all(x not in node.content for x in ('&amp;', '&lt;', '&gt;')):
                node.content = escape(node.content)
                # Hopefully, this is a temporary hack. Need to figure out a better way...
                # The problem is that <para> tags are treated idiosyncratically because their rules aren't fully
                #  supported. They appear within node content, unlike other tags.
                node.content = node.content.replace('&lt;para&gt;',
                                                    '<para>').replace(
                                                        '&lt;/para&gt;',
                                                        '</para>')
        xml += str(node.content) + close_tag + "\n"
        closed = True
    elif len(node.children) > 0:
        xml += "\n"
    for child in node.children:
        xml += to_xml(child, level + 1)
    if not closed:
        if len(node.children) > 0:
            xml += indent
        xml += close_tag + "\n"

    return xml
Exemple #6
0
def test_taxonid():
    taxonId = Node(names.TAXONID, parent=None)
    taxonId.content = "42"
    # without the provider, we should get an error
    with pytest.raises(MetapypeRuleError):
        validate.node(taxonId)
    # with the provider, it should be ok
    taxonId.add_attribute("provider", "https://www.itis.gov")
    validate.node(taxonId)
Exemple #7
0
 def test_to_json(self):
     eml = Node(names.EML)
     eml.add_attribute('packageId', 'edi.23.1')
     eml.add_attribute('system', 'metapype')
     access = Node(names.ACCESS, parent=eml)
     access.add_attribute('authSystem', 'pasta')
     access.add_attribute('order', 'allowFirst')
     eml.add_child(access)
     allow = Node(names.ALLOW, parent=access)
     access.add_child(allow)
     principal = Node(names.PRINCIPAL, parent=allow)
     principal.content = 'uid=gaucho,o=EDI,dc=edirepository,dc=org'
     allow.add_child(principal)
     permission = Node(names.PERMISSION, parent=allow)
     permission.content = 'all'
     allow.add_child(permission)
     j = mp_io.to_json(eml)
     self.assertIsInstance(j, str)
Exemple #8
0
def test_delete_node_no_children():
    eml = Node(names.EML)
    eml.add_attribute("packageId", "edi.23.1")
    eml.add_attribute("system", "metapype")
    access = Node(names.ACCESS, parent=eml)
    access.add_attribute("authSystem", "pasta")
    access.add_attribute("order", "allowFirst")
    eml.add_child(access)
    allow = Node(names.ALLOW, parent=access)
    access.add_child(allow)
    principal = Node(names.PRINCIPAL, parent=allow)
    principal.content = "uid=gaucho,o=EDI,dc=edirepository,dc=org"
    allow.add_child(principal)
    permission = Node(names.PERMISSION, parent=allow)
    permission.content = "all"
    allow.add_child(permission)
    node = Node.get_node_instance(principal.id)
    assert principal is node
    Node.delete_node_instance(eml.id, children=False)
    assert principal.id in Node.store
Exemple #9
0
 def test_delete_node_no_children(self):
     eml = Node(names.EML)
     eml.add_attribute('packageId', 'edi.23.1')
     eml.add_attribute('system', 'metapype')
     access = Node(names.ACCESS, parent=eml)
     access.add_attribute('authSystem', 'pasta')
     access.add_attribute('order', 'allowFirst')
     eml.add_child(access)
     allow = Node(names.ALLOW, parent=access)
     access.add_child(allow)
     principal = Node(names.PRINCIPAL, parent=allow)
     principal.content = 'uid=gaucho,o=EDI,dc=edirepository,dc=org'
     allow.add_child(principal)
     permission = Node(names.PERMISSION, parent=allow)
     permission.content = 'all'
     allow.add_child(permission)
     node = Node.get_node_instance(principal.id)
     self.assertIs(principal, node)
     Node.delete_node_instance(eml.id, children=False)
     self.assertIn(principal.id, Node.store)
Exemple #10
0
def test_replace_child():
    individual_name = Node(names.INDIVIDUALNAME)
    sur_name_1 = Node(names.SURNAME, parent=individual_name)
    sur_name_1.content = "Gaucho"
    individual_name.add_child(sur_name_1)
    sur_name_2 = Node(names.SURNAME, parent=individual_name)
    sur_name_2.content = "Carroll"
    assert sur_name_1 in individual_name.children
    assert sur_name_2 not in individual_name.children
    individual_name.replace_child(old_child=sur_name_1, new_child=sur_name_2)
    assert sur_name_2 in individual_name.children
    assert sur_name_1 not in individual_name.children

    # Test for old child removal from node store
    assert sur_name_1.id not in Node.store

    # Test for child node type mismatch
    given_name = Node(names.GIVENNAME)
    given_name.content = "Chase"
    with pytest.raises(ValueError):
        individual_name.replace_child(old_child=sur_name_2,
                                      new_child=given_name)
Exemple #11
0
def test_missing_numerical_unit():
    unit = Node(names.UNIT, parent=None)
    r = rule.get_rule(names.UNIT)
    with pytest.raises(MetapypeRuleError):
        r.validate_rule(unit)
    # Check error
    errs = []
    validate.tree(unit, errs)
    assert len(errs) == 1
    err_code, msg, node, *args = errs[0]
    assert err_code == ValidationError.MIN_CHOICE_UNMET
    assert args[0] == 'unit'
    # With a customUnit, it should be ok
    custom_unit = Node(names.CUSTOMUNIT, parent=unit)
    custom_unit.content = 'bushels per parsec'
    unit.add_child(custom_unit)
    validate.tree(unit)
Exemple #12
0
def from_xml_element(xml_elem, metapype_node, metapype_parent):
    """
    Creates a metapype node corresponding to an xml element.

    Args:
        xml_elem:  the xml element.
        metapype_node:  the metapype_node corresponding to that xml element.
                        metapype_node == None, except at the root of the tree.
        metapype_parent:  the parent metapype_node for this node.
    """
    if metapype_node is None:  # Will be None except at the root
        metapype_node = Node(name=xml_elem.tag, parent=metapype_parent)
    # xml_element_lookup_by_node_id[metapype_node.id] = (metapype_node, xml_elem)
    for name, value in xml_elem.attrib.items():
        if '}' not in name:
            metapype_node.add_attribute(name, value)
    if xml_elem.text:
        metapype_node.content = xml_elem.text
    if metapype_parent is not None:
        metapype_parent.add_child(metapype_node)
    for xml_child in xml_elem:
        from_xml_element(xml_child, None, metapype_node)
Exemple #13
0
def from_json(json_node: dict, parent: Node = None) -> Node:
    '''
    Recursively traverse Python JSON and build a metapype model
    instance.

    Args:
        json_node: JSON converted to Python structure
        parent: parent node reference to child

    Returns:
        Node: Child node of decomposed and parsed JSON

    '''
    # Get first inner JSON object from dict and discard outer
    _ = json_node.popitem()
    name = _[0]
    body = _[1]
    node = Node(name, id=body[0]['id'])

    if parent is not None:
        node.parent = parent

    attributes = body[1]['attributes']
    if attributes is not None:
        for attribute in attributes:
            node.add_attribute(attribute, attributes[attribute])

    content = body[2]['content']
    if content is not None:
        node.content = content

    children = body[3]['children']
    for child in children:
        child_node = from_json(child, node)
        node.add_child(child_node)

    return node
Exemple #14
0
def test_bounding_altitudes():
    bounding_coordinates = Node(names.BOUNDINGCOORDINATES, parent=None)
    bc_west = Node(names.WESTBOUNDINGCOORDINATE, parent=bounding_coordinates)
    bc_east = Node(names.EASTBOUNDINGCOORDINATE, parent=bounding_coordinates)
    bc_north = Node(names.NORTHBOUNDINGCOORDINATE, parent=bounding_coordinates)
    bc_south = Node(names.SOUTHBOUNDINGCOORDINATE, parent=bounding_coordinates)
    bc_west.content = "0.0"
    bc_east.content = "0.0"
    bc_north.content = "0.0"
    bc_south.content = "0.0"
    bounding_coordinates.add_child(bc_west)
    bounding_coordinates.add_child(bc_east)
    bounding_coordinates.add_child(bc_north)
    bounding_coordinates.add_child(bc_south)
    # without boundingAltitudes should be ok
    validate.node(bounding_coordinates)
    # boundingAltitudes should fail if not all required children present
    bounding_altitudes = Node(names.BOUNDINGALTITUDES, parent=bounding_coordinates)
    bounding_coordinates.add_child(bounding_altitudes)
    with pytest.raises(MetapypeRuleError):
        validate.tree(bounding_coordinates)
    altitude_minimum = Node(names.ALTITUDEMINIMUM, parent=bounding_altitudes)
    bounding_altitudes.add_child(altitude_minimum)
    with pytest.raises(MetapypeRuleError):
        validate.tree(bounding_coordinates)
    altitude_minimum.content = "0.0"
    with pytest.raises(MetapypeRuleError):
        validate.tree(bounding_coordinates)
    # boundingAltitudes should fail if not all required children have content
    altitude_maximum = Node(names.ALTITUDEMAXIMUM, parent=bounding_altitudes)
    bounding_altitudes.add_child(altitude_maximum)
    altitude_units = Node(names.ALTITUDEUNITS, parent=bounding_altitudes)
    bounding_altitudes.add_child(altitude_units)
    with pytest.raises(MetapypeRuleError):
        validate.tree(bounding_coordinates)
    # with content filled in, should pass
    altitude_maximum.content = "1000.0"
    altitude_units.content = "meter"
    validate.tree(bounding_coordinates)
Exemple #15
0
def keyword(filename=None, node_id=None):
    eml_node = load_eml(filename=filename)
    dataset_node = eml_node.find_child(names.DATASET)

    if not dataset_node:
        dataset_node = Node(names.DATASET, parent=eml_node)
        add_child(eml_node, dataset_node)

    form = KeywordForm(filename=filename, node_id=node_id)
    form.init_keywords()

    # Process POST
    if request.method == 'POST' and BTN_CANCEL in request.form:
        url = url_for(PAGE_KEYWORD_SELECT, filename=filename)
        return redirect(url)

    # if request.method == 'POST' and form.validate_on_submit():
    if request.method == 'POST':
        form_value = request.form
        form_dict = form_value.to_dict(flat=False)
        new_page = PAGE_KEYWORD_SELECT
        if form_dict:
            for key in form_dict:
                val = form_dict[key][0]  # value is the first list element
                new_page = check_val_for_hidden_buttons(
                    val, new_page, new_page)

        submit_type = None
        if is_dirty_form(form):
            submit_type = 'Save Changes'
        # flash(f'submit_type: {submit_type}')

        if submit_type == 'Save Changes':
            keyword = form.keyword.data
            keyword_type = form.keyword_type.data
            keyword_thesaurus = form.keyword_thesaurus.data

            # If so thesaurus was specified, see if the LTER Controlled Vocabulary applies
            if not keyword_thesaurus:
                lter_keywords = get_keywords('LTER')
                if keyword in lter_keywords:
                    keyword_thesaurus = 'LTER Controlled Vocabulary'

            keyword_set_nodes = []
            eml_node.find_all_descendants(names.KEYWORDSET, keyword_set_nodes)

            keyword_set_node = None
            for kws_node in keyword_set_nodes:
                keyword_thesaurus_node = kws_node.find_child(
                    names.KEYWORDTHESAURUS)
                if keyword_thesaurus_node and keyword_thesaurus_node.content == keyword_thesaurus:
                    keyword_set_node = kws_node
                    break
                if not keyword_thesaurus_node and not keyword_thesaurus:
                    keyword_set_node = kws_node
                    break
            if not keyword_set_node:
                keyword_set_node = Node(names.KEYWORDSET, parent=dataset_node)
                add_child(dataset_node, keyword_set_node)
                if keyword_thesaurus:
                    keyword_thesaurus_node = Node(names.KEYWORDTHESAURUS,
                                                  parent=keyword_set_node)
                    keyword_thesaurus_node.content = keyword_thesaurus
                    keyword_set_node.children.append(keyword_thesaurus_node)

            keyword_node = Node(names.KEYWORD, parent=keyword_set_node)
            create_keyword(keyword_node, keyword, keyword_type)

            if node_id and len(node_id) != 1:
                old_keyword_node = Node.get_node_instance(node_id)

                if old_keyword_node:
                    keyword_parent_node = old_keyword_node.parent
                    keyword_parent_node.replace_child(old_keyword_node,
                                                      keyword_node)
                else:
                    msg = f"No keyword node found in the node store with node id {node_id}"
                    raise Exception(msg)
            else:
                add_child(keyword_set_node, keyword_node)

            save_both_formats(filename=filename, eml_node=eml_node)

        url = url_for(new_page, filename=filename)
        return redirect(url)

    # Process GET
    if node_id == '1':
        form.init_md5()
    else:
        keyword_set_nodes = []
        eml_node.find_all_descendants(names.KEYWORDSET, keyword_set_nodes)
        found = False
        for keyword_set_node in keyword_set_nodes:
            keyword_nodes = keyword_set_node.find_all_children(names.KEYWORD)
            keyword_thesaurus_node = keyword_set_node.find_child(
                names.KEYWORDTHESAURUS)
            if keyword_nodes:
                for kw_node in keyword_nodes:
                    if node_id == kw_node.id:
                        populate_keyword_form(form, kw_node,
                                              keyword_thesaurus_node)
                        found = True
                        break
            if found:
                break

    set_current_page('keyword')
    help = [get_help('keywords')]
    return render_template('keyword.html',
                           title='Keyword',
                           form=form,
                           filename=filename,
                           help=help)
Exemple #16
0
def load_data_table(uploads_path: str = None,
                    data_file: str = '',
                    num_header_rows: str = '1',
                    delimiter: str = ',',
                    quote_char: str = '"'):

    # if Config.LOG_DEBUG:
    log_info(f'Entering load_data_table: {data_file}')

    full_path = f'{uploads_path}/{data_file}'

    datatable_node = metapype_client.new_child_node(names.DATATABLE,
                                                    parent=None)

    physical_node = metapype_client.new_child_node(names.PHYSICAL,
                                                   parent=datatable_node)
    physical_node.add_attribute('system', 'EDI')

    entity_name_node = metapype_client.new_child_node(names.ENTITYNAME,
                                                      parent=datatable_node)
    entity_name = entity_name_from_data_file(data_file)
    entity_name_node.content = entity_name

    object_name_node = metapype_client.new_child_node(names.OBJECTNAME,
                                                      parent=physical_node)
    object_name_node.content = data_file

    file_size = get_file_size(full_path)
    if file_size is not None:
        size_node = metapype_client.new_child_node(names.SIZE, physical_node)
        size_node.add_attribute('unit', 'byte')
        size_node.content = str(file_size)

    md5_hash = get_md5_hash(full_path)
    if md5_hash is not None:
        hash_node = Node(names.AUTHENTICATION, parent=physical_node)
        metapype_client.add_child(physical_node, hash_node)
        hash_node.add_attribute('method', 'MD5')
        hash_node.content = str(md5_hash)

    data_format_node = Node(names.DATAFORMAT, parent=physical_node)
    metapype_client.add_child(physical_node, data_format_node)

    text_format_node = Node(names.TEXTFORMAT, parent=data_format_node)
    metapype_client.add_child(data_format_node, text_format_node)

    num_header_lines_node = Node(names.NUMHEADERLINES, parent=text_format_node)
    metapype_client.add_child(text_format_node, num_header_lines_node)
    num_header_lines_node.content = num_header_rows

    num_footer_lines_node = Node(names.NUMFOOTERLINES, parent=text_format_node)
    metapype_client.add_child(text_format_node, num_footer_lines_node)
    num_footer_lines_node.content = '0'

    simple_delimited_node = Node(names.SIMPLEDELIMITED,
                                 parent=text_format_node)
    metapype_client.add_child(text_format_node, simple_delimited_node)

    field_delimiter_node = Node(names.FIELDDELIMITER,
                                parent=simple_delimited_node)
    metapype_client.add_child(simple_delimited_node, field_delimiter_node)
    field_delimiter_node.content = delimiter

    quote_character_node = Node(names.QUOTECHARACTER,
                                parent=simple_delimited_node)
    metapype_client.add_child(simple_delimited_node, quote_character_node)
    quote_character_node.content = quote_char

    if file_size == 0:
        raise DataTableError("The CSV file is empty.")

    check_column_name_uniqueness(full_path, delimiter)

    with open(full_path) as file:
        next(file)
        line_terminator = repr(file.newlines).replace("'", "")
    record_delimiter_node = Node(names.RECORDDELIMITER,
                                 parent=text_format_node)
    metapype_client.add_child(text_format_node, record_delimiter_node)
    record_delimiter_node.content = line_terminator

    # log_info('pd.read_csv')
    try:
        data_frame = pd.read_csv(full_path,
                                 encoding='utf8',
                                 sep=delimiter,
                                 quotechar=quote_char)
    except pd.errors.ParserError as e:
        raise DataTableError(e.args[0])

    column_vartypes = []
    column_names = []
    column_categorical_codes = []

    if data_frame is not None:

        number_of_records = Node(names.NUMBEROFRECORDS, parent=datatable_node)
        metapype_client.add_child(datatable_node, number_of_records)
        row_count = data_frame.shape[0]
        record_count = row_count
        number_of_records.content = f'{record_count}'

        attribute_list_node = Node(names.ATTRIBUTELIST, parent=datatable_node)
        metapype_client.add_child(datatable_node, attribute_list_node)

        # data_frame = data_frame.convert_dtypes()

        columns = data_frame.columns

        for col in columns:
            dtype = data_frame[col][1:].infer_objects().dtype
            # dtype = data_frame.dtypes[col]

            var_type, codes = infer_col_type(data_frame, col)
            log_info(f'col: {col}  var_type: {var_type}')

            column_vartypes.append(var_type)
            column_names.append(col)
            column_categorical_codes.append(codes)

            attribute_node = metapype_client.new_child_node(
                names.ATTRIBUTE, attribute_list_node)
            attribute_name_node = metapype_client.new_child_node(
                names.ATTRIBUTENAME, attribute_node)
            attribute_name_node.content = col

            att_label_node = Node(names.ATTRIBUTELABEL, parent=attribute_node)
            metapype_client.add_child(attribute_node, att_label_node)
            att_label_node.content = col

            att_def_node = Node(names.ATTRIBUTEDEFINITION,
                                parent=attribute_node)
            metapype_client.add_child(attribute_node, att_def_node)

            ms_node = Node(names.MEASUREMENTSCALE, parent=attribute_node)
            metapype_client.add_child(attribute_node, ms_node)

            missing_value_code = guess_missing_value_code(
                full_path, delimiter, quote_char, col)

            if missing_value_code:
                mv_node = Node(names.MISSINGVALUECODE, parent=attribute_node)
                metapype_client.add_child(attribute_node, mv_node)
                code_node = Node(names.CODE, parent=mv_node)
                metapype_client.add_child(mv_node, code_node)
                code_node.content = missing_value_code

            if var_type == metapype_client.VariableType.CATEGORICAL:
                codes = force_categorical_codes(attribute_node, dtype, codes)
                codes = force_missing_value_code(missing_value_code, dtype,
                                                 codes)

                # nominal / nonNumericDomain / enumeratedDomain / ...codes...
                nominal_node = metapype_client.new_child_node(
                    names.NOMINAL, ms_node)
                non_numeric_domain_node = metapype_client.new_child_node(
                    names.NONNUMERICDOMAIN, nominal_node)
                enumerated_domain_node = metapype_client.new_child_node(
                    names.ENUMERATEDDOMAIN, non_numeric_domain_node)

                for code in codes:
                    code_definition_node = metapype_client.new_child_node(
                        names.CODEDEFINITION, enumerated_domain_node)
                    code_node = metapype_client.new_child_node(
                        names.CODE, code_definition_node)
                    code_node.content = str(code)
                    definition_node = metapype_client.new_child_node(
                        names.DEFINITION, code_definition_node)

            elif var_type == metapype_client.VariableType.NUMERICAL:
                # ratio / numericDomain
                ratio_node = metapype_client.new_child_node(
                    names.RATIO, ms_node)
                numeric_domain_node = metapype_client.new_child_node(
                    names.NUMERICDOMAIN, ratio_node)
                number_type = 'real'
                if str(dtype).startswith(
                        'int'):  # FIXME - we can do better than this
                    number_type = 'integer'
                number_type_node = metapype_client.new_child_node(
                    names.NUMBERTYPE, numeric_domain_node)
                number_type_node.content = number_type
                numeric_domain_node = metapype_client.new_child_node(
                    names.UNIT, ratio_node)

            elif var_type == metapype_client.VariableType.TEXT:
                # nominal / nonNumericDomain / textDomain
                nominal_node = metapype_client.new_child_node(
                    names.NOMINAL, ms_node)
                non_numeric_domain_node = metapype_client.new_child_node(
                    names.NONNUMERICDOMAIN, nominal_node)
                text_domain_node = metapype_client.new_child_node(
                    names.TEXTDOMAIN, non_numeric_domain_node)
                definition_node = metapype_client.new_child_node(
                    names.DEFINITION, text_domain_node)

            elif var_type == metapype_client.VariableType.DATETIME:
                # dateTime / formatString
                datetime_node = Node(names.DATETIME, parent=ms_node)
                metapype_client.add_child(ms_node, datetime_node)

                format_string_node = Node(names.FORMATSTRING,
                                          parent=datetime_node)
                metapype_client.add_child(datetime_node, format_string_node)
                format_string_node.content = codes

    # if Config.LOG_DEBUG:
    # log_info(f'Leaving load_data_table')

    return datatable_node, column_vartypes, column_names, column_categorical_codes, data_frame, missing_value_code
Exemple #17
0
def node():
    eml = Node(names.EML)
    eml.add_attribute("packageId", "edi.23.1")
    eml.add_attribute("system", "metapype")

    access = Node(names.ACCESS, parent=eml)
    access.add_attribute("authSystem", "pasta")
    access.add_attribute("order", "allowFirst")
    eml.add_child(access)

    allow = Node(names.ALLOW, parent=access)
    access.add_child(allow)

    principal_allow = Node(names.PRINCIPAL, parent=allow)
    principal_allow.content = "uid=gaucho,o=EDI,dc=edirepository,dc=org"
    allow.add_child(principal_allow)

    permission_allow = Node(names.PERMISSION, parent=allow)
    permission_allow.content = "all"
    allow.add_child(permission_allow)

    deny = Node(names.DENY, parent=access)
    access.add_child(deny)

    principal_deny = Node(names.PRINCIPAL, parent=deny)
    principal_deny.content = "public"
    deny.add_child(principal_deny)

    permission_deny = Node(names.PERMISSION, parent=deny)
    permission_deny.content = "write"
    deny.add_child(permission_deny)

    dataset = Node(names.DATASET, parent=eml)
    eml.add_child(dataset)

    title = Node(names.TITLE, parent=dataset)
    title.content = "Green sea turtle counts: Tortuga Island 20017"
    dataset.add_child(title)

    creator = Node(names.CREATOR, parent=dataset)
    dataset.add_child(creator)

    individualName_creator = Node(names.INDIVIDUALNAME, parent=creator)
    creator.add_child(individualName_creator)

    salutation_creator = Node(names.SALUTATION, parent=individualName_creator)
    salutation_creator.content = "Mr."
    individualName_creator.add_child(salutation_creator)

    given_name_creator = Node(names.GIVENNAME, parent=individualName_creator)
    given_name_creator.content = "Chase"
    individualName_creator.add_child(given_name_creator)

    surName_creator = Node(names.SURNAME, parent=individualName_creator)
    surName_creator.content = "Gaucho"
    individualName_creator.add_child(surName_creator)

    value = Node(names.VALUE, parent=surName_creator)
    value.add_attribute("lang", "en")
    value.content = "Gaucho"
    surName_creator.add_child(value)

    address = Node(names.ADDRESS, parent=creator)
    creator.add_child(address)

    delivery_point_1 = Node(names.DELIVERYPOINT, parent=address)
    delivery_point_1.content = "100 Maple St"
    address.add_child(delivery_point_1)

    delivery_point_2 = Node(names.DELIVERYPOINT, parent=address)
    delivery_point_2.content = "Apt. 10-B"
    address.add_child(delivery_point_2)

    city = Node(names.CITY, parent=address)
    city.content = "Gotham City"
    address.add_child(city)

    administrative_area = Node(names.ADMINISTRATIVEAREA, parent=address)
    administrative_area.content = "New York"
    address.add_child(administrative_area)

    postal_code = Node(names.POSTALCODE, parent=address)
    postal_code.content = "11111"
    address.add_child(postal_code)

    country = Node(names.COUNTRY, parent=address)
    country.content = "USA"
    address.add_child(country)

    phone = Node(names.PHONE, parent=creator)
    phone.content = "555-555-5555"
    phone.add_attribute("phonetype", "voice")
    creator.add_child(phone)

    electronic_mail_address = Node(names.ELECTRONICMAILADDRESS, parent=creator)
    electronic_mail_address.content = "*****@*****.**"
    creator.add_child(electronic_mail_address)

    online_url = Node(names.ONLINEURL, parent=creator)
    online_url.content = "https://www.somecollege.edu/people/cgaucho"
    creator.add_child(online_url)

    user_id = Node(names.USERID, parent=creator)
    user_id.content = "uid=jgaucho,o=EDI,dc=edirepository,dc=org"
    user_id.add_attribute(
        "directory", "ldap:///ldap.edirepository.org/dc=edirepository," "dc=org"
    )
    creator.add_child(user_id)

    pubdate = Node(names.PUBDATE, parent=dataset)
    pubdate.content = "2018"
    dataset.add_child(pubdate)

    abstract = Node(names.ABSTRACT, parent=dataset)
    abstract.add_attribute("lang", "en")
    section = Node(names.SECTION, parent=abstract)
    abstract.add_child(section)
    para = Node(names.PARA, parent=abstract)
    section.add_child(para)
    para.content = "para section"
    dataset.add_child(abstract)

    keyword_set = Node(names.KEYWORDSET, parent=dataset)
    dataset.add_child(keyword_set)

    keyword_1 = Node(names.KEYWORD, parent=keyword_set)
    keyword_1.content = "phytoplankton ecology"
    keyword_set.add_child(keyword_1)

    keyword_2 = Node(names.KEYWORD, parent=keyword_set)
    keyword_2.add_attribute("keywordType", "place")
    keyword_2.content = "lake"
    keyword_set.add_child(keyword_2)

    keyword_thesaurus = Node(names.KEYWORDTHESAURUS, parent=keyword_set)
    keyword_thesaurus.content = "IRIS keyword thesaurus"
    keyword_set.add_child(keyword_thesaurus)

    coverage = Node(names.COVERAGE, parent=dataset)
    dataset.add_child(coverage)

    taxonomic_coverage = Node(names.TAXONOMICCOVERAGE, parent=coverage)
    coverage.add_child(taxonomic_coverage)
    general_taxonomic_coverage = Node(
        names.GENERALTAXONOMICCOVERAGE, parent=taxonomic_coverage
    )
    taxonomic_coverage.add_child(general_taxonomic_coverage)
    general_taxonomic_coverage.content = "All vascular plants were \
        identified to family or species, mosses and lichens were \
        identified as moss or lichen."

    taxonomic_classification_genus = Node(
        names.TAXONOMICCLASSIFICATION, parent=taxonomic_coverage
    )
    taxonomic_coverage.add_child(taxonomic_classification_genus)

    taxon_rank_name_genus = Node(
        names.TAXONRANKNAME, parent=taxonomic_classification_genus
    )
    taxonomic_classification_genus.add_child(taxon_rank_name_genus)
    taxon_rank_name_genus.content = "Genus"

    taxon_rank_value_genus = Node(
        names.TAXONRANKVALUE, parent=taxonomic_classification_genus
    )
    taxonomic_classification_genus.add_child(taxon_rank_value_genus)
    taxon_rank_value_genus.content = "Escherichia"

    taxonomic_classification_species = Node(
        names.TAXONOMICCLASSIFICATION, parent=taxonomic_classification_genus
    )
    taxonomic_classification_genus.add_child(taxonomic_classification_species)

    taxon_rank_name_species = Node(
        names.TAXONRANKNAME, parent=taxonomic_classification_species
    )
    taxonomic_classification_species.add_child(taxon_rank_name_species)
    taxon_rank_name_species.content = "Species"

    taxon_rank_value_species = Node(
        names.TAXONRANKVALUE, parent=taxonomic_classification_species
    )
    taxonomic_classification_species.add_child(taxon_rank_value_species)
    taxon_rank_value_species.content = "coli"

    contact = Node(names.CONTACT, parent=dataset)
    dataset.add_child(contact)

    individualName_contact = Node(names.INDIVIDUALNAME, parent=contact)
    contact.add_child(individualName_contact)

    surName_contact = Node(names.SURNAME, parent=individualName_contact)
    surName_contact.content = "Gaucho"
    individualName_contact.add_child(surName_contact)

    additional_metadata = Node(names.ADDITIONALMETADATA, parent=eml)
    eml.add_child(additional_metadata)
    metadata = Node(names.METADATA, parent=additional_metadata)
    fictitious = Node("fictitious")
    fictitious.content = "<tag>more fictitious content</tag>"
    metadata.add_child(fictitious)
    additional_metadata.add_child(metadata)
    return eml
Exemple #18
0
def load_other_entity(dataset_node: Node = None,
                      uploads_path: str = None,
                      data_file: str = '',
                      node_id: str = None):
    full_path = f'{uploads_path}/{data_file}'

    doing_reupload = node_id is not None and node_id != '1'

    if doing_reupload:
        other_entity_node = Node.get_node_instance(node_id)
        object_name_node = other_entity_node.find_descendant(names.OBJECTNAME)
    else:
        other_entity_node = Node(names.OTHERENTITY, parent=dataset_node)
        metapype_client.add_child(dataset_node, other_entity_node)

        physical_node = Node(names.PHYSICAL, parent=other_entity_node)
        metapype_client.add_child(other_entity_node, physical_node)
        physical_node.add_attribute('system', 'EDI')

        entity_name_node = Node(names.ENTITYNAME, parent=other_entity_node)
        metapype_client.add_child(other_entity_node, entity_name_node)

        entity_name = entity_name_from_data_file(data_file)
        entity_name_node.content = entity_name

        object_name_node = Node(names.OBJECTNAME, parent=physical_node)
        metapype_client.add_child(physical_node, object_name_node)

    object_name_node.content = data_file

    file_size = get_file_size(full_path)
    if file_size is not None:
        if not doing_reupload:
            size_node = Node(names.SIZE, parent=physical_node)
            metapype_client.add_child(physical_node, size_node)
            size_node.add_attribute('unit', 'byte')
        else:
            size_node = other_entity_node.find_descendant(names.SIZE)

        size_node.content = str(file_size)

    md5_hash = get_md5_hash(full_path)
    if md5_hash is not None:
        if not doing_reupload:
            hash_node = Node(names.AUTHENTICATION, parent=physical_node)
            metapype_client.add_child(physical_node, hash_node)
            hash_node.add_attribute('method', 'MD5')
        else:
            hash_node = other_entity_node.find_descendant(names.AUTHENTICATION)

        hash_node.content = str(md5_hash)

    if not doing_reupload:
        data_format_node = Node(names.DATAFORMAT, parent=physical_node)
        metapype_client.add_child(physical_node, data_format_node)

        externally_defined_format_node = Node(names.EXTERNALLYDEFINEDFORMAT,
                                              parent=data_format_node)
        metapype_client.add_child(data_format_node,
                                  externally_defined_format_node)

        format_name_node = Node(names.FORMATNAME,
                                parent=externally_defined_format_node)
        metapype_client.add_child(externally_defined_format_node,
                                  format_name_node)
    else:
        format_name_node = other_entity_node.find_descendant(names.FORMATNAME)

    format_name_node.content = format_name_from_data_file(data_file)

    if not doing_reupload:
        entity_type_node = metapype_client.new_child_node(
            names.ENTITYTYPE, parent=other_entity_node)
    else:
        entity_type_node = other_entity_node.find_descendant(names.ENTITYTYPE)

    entity_type_node.content = format_name_from_data_file(data_file)

    user_data.add_data_table_upload_filename(data_file)

    delete_data_files(uploads_path)

    return other_entity_node
Exemple #19
0
def load_data_table(dataset_node: Node = None,
                    uploads_path: str = None,
                    data_file: str = ''):
    full_path = f'{uploads_path}/{data_file}'
    datatable_node = Node(names.DATATABLE, parent=dataset_node)
    add_child(dataset_node, datatable_node)

    physical_node = Node(names.PHYSICAL, parent=datatable_node)
    add_child(datatable_node, physical_node)
    physical_node.add_attribute('system', 'EDI')

    entity_name_node = Node(names.ENTITYNAME, parent=datatable_node)
    add_child(datatable_node, entity_name_node)
    entity_name = entity_name_from_data_file(data_file)
    entity_name_node.content = entity_name

    object_name_node = Node(names.OBJECTNAME, parent=physical_node)
    add_child(physical_node, object_name_node)
    object_name_node.content = data_file

    file_size = get_file_size(full_path)
    if file_size is not None:
        size_node = Node(names.SIZE, parent=physical_node)
        add_child(physical_node, size_node)
        size_node.add_attribute('unit', 'byte')
        size_node.content = str(file_size)

    data_format_node = Node(names.DATAFORMAT, parent=physical_node)
    add_child(physical_node, data_format_node)

    text_format_node = Node(names.TEXTFORMAT, parent=data_format_node)
    add_child(data_format_node, text_format_node)

    num_header_lines_node = Node(names.NUMHEADERLINES, parent=text_format_node)
    add_child(text_format_node, num_header_lines_node)
    num_header_lines_node.content = '1'

    num_footer_lines_node = Node(names.NUMFOOTERLINES, parent=text_format_node)
    add_child(text_format_node, num_footer_lines_node)
    num_footer_lines_node.content = '0'

    data_frame = pd.read_csv(full_path, comment='#')

    if data_frame is not None:

        number_of_records = Node(names.NUMBEROFRECORDS, parent=datatable_node)
        add_child(datatable_node, number_of_records)
        row_count = data_frame.shape[0]
        number_of_records.content = f'{row_count}'

        attribute_list_node = Node(names.ATTRIBUTELIST, parent=datatable_node)
        add_child(datatable_node, attribute_list_node)

        columns = data_frame.columns

        for col in columns:
            dtype = str(data_frame[col].dtype)
            print(f'{col}: {dtype}')

            attribute_node = Node(names.ATTRIBUTE, parent=attribute_list_node)
            add_child(attribute_list_node, attribute_node)

            attribute_name_node = Node(names.ATTRIBUTENAME,
                                       parent=attribute_node)
            add_child(attribute_node, attribute_name_node)
            attribute_name_node.content = col

            att_label_node = Node(names.ATTRIBUTELABEL, parent=attribute_node)
            add_child(attribute_node, att_label_node)
            att_label_node.content = col

            att_def_node = Node(names.ATTRIBUTEDEFINITION,
                                parent=attribute_node)
            add_child(attribute_node, att_def_node)
            att_def_node.content = f'Attribute definition for {col}'

            ms_node = Node(names.MEASUREMENTSCALE, parent=attribute_node)
            add_child(attribute_node, ms_node)

            if dtype == 'bool':

                nominal_node = Node(names.NOMINAL, parent=ms_node)
                add_child(ms_node, nominal_node)

                non_numeric_domain_node = Node(names.NONNUMERICDOMAIN,
                                               parent=nominal_node)
                add_child(nominal_node, non_numeric_domain_node)

            elif dtype == 'object':

                if is_datetime_column(col):
                    datetime_node = Node(names.DATETIME, parent=ms_node)
                    add_child(ms_node, datetime_node)

                    format_string_node = Node(names.FORMATSTRING,
                                              parent=datetime_node)
                    add_child(datetime_node, format_string_node)
                    format_string_node.content = ''

                else:
                    nominal_node = Node(names.NOMINAL, parent=ms_node)
                    add_child(ms_node, nominal_node)

                    non_numeric_domain_node = Node(names.NONNUMERICDOMAIN,
                                                   parent=nominal_node)
                    add_child(nominal_node, non_numeric_domain_node)

            elif dtype.startswith('float') or dtype.startswith('int'):

                number_type = 'real'
                if dtype.startswith('int'):
                    number_type = 'integer'

                ratio_node = Node(names.RATIO, parent=ms_node)
                add_child(ms_node, ratio_node)

                numeric_domain_ratio_node = Node(names.NUMERICDOMAIN,
                                                 parent=ratio_node)
                add_child(ratio_node, numeric_domain_ratio_node)

                number_type_ratio_node = Node(names.NUMBERTYPE,
                                              parent=numeric_domain_ratio_node)
                add_child(numeric_domain_ratio_node, number_type_ratio_node)
                number_type_ratio_node.content = number_type

    delete_data_files(uploads_path)

    return datatable_node
Exemple #20
0
 def test_copy(self):
     node = Node(names.GIVENNAME)
     node.content = 'Chase'
     validate.node(node)
     node_copy = node.copy()
     validate.node(node_copy)
Exemple #21
0
def load_data_table(uploads_path: str = None,
                    data_file: str = '',
                    num_header_rows: int = 1,
                    delimiter: str = ',',
                    quote_char: str = '"'):

    if Config.LOG_DEBUG:
        app = Flask(__name__)
        with app.app_context():
            current_app.logger.info(f'Entering load_data_table')

    full_path = f'{uploads_path}/{data_file}'

    # datatable_node = new_child_node(names.DATATABLE, parent=dataset_node)
    datatable_node = new_child_node(names.DATATABLE, parent=None)

    physical_node = new_child_node(names.PHYSICAL, parent=datatable_node)
    physical_node.add_attribute('system', 'EDI')

    entity_name_node = new_child_node(names.ENTITYNAME, parent=datatable_node)
    entity_name = entity_name_from_data_file(data_file)
    entity_name_node.content = entity_name

    object_name_node = new_child_node(names.OBJECTNAME, parent=physical_node)
    object_name_node.content = data_file

    file_size = get_file_size(full_path)
    if file_size is not None:
        size_node = new_child_node(names.SIZE, physical_node)
        size_node.add_attribute('unit', 'byte')
        size_node.content = str(file_size)

    md5_hash = get_md5_hash(full_path)
    if md5_hash is not None:
        hash_node = Node(names.AUTHENTICATION, parent=physical_node)
        add_child(physical_node, hash_node)
        hash_node.add_attribute('method', 'MD5')
        hash_node.content = str(md5_hash)

    data_format_node = Node(names.DATAFORMAT, parent=physical_node)
    add_child(physical_node, data_format_node)

    text_format_node = Node(names.TEXTFORMAT, parent=data_format_node)
    add_child(data_format_node, text_format_node)

    num_header_lines_node = Node(names.NUMHEADERLINES, parent=text_format_node)
    add_child(text_format_node, num_header_lines_node)
    num_header_lines_node.content = num_header_rows

    num_footer_lines_node = Node(names.NUMFOOTERLINES, parent=text_format_node)
    add_child(text_format_node, num_footer_lines_node)
    num_footer_lines_node.content = '0'

    simple_delimited_node = Node(names.SIMPLEDELIMITED,
                                 parent=text_format_node)
    add_child(text_format_node, simple_delimited_node)

    field_delimiter_node = Node(names.FIELDDELIMITER,
                                parent=simple_delimited_node)
    add_child(simple_delimited_node, field_delimiter_node)
    field_delimiter_node.content = delimiter

    quote_character_node = Node(names.QUOTECHARACTER,
                                parent=simple_delimited_node)
    add_child(simple_delimited_node, quote_character_node)
    quote_character_node.content = quote_char

    with open(full_path) as file:
        next(file)
        line_terminator = repr(file.newlines).replace("'", "")
    record_delimiter_node = Node(names.RECORDDELIMITER,
                                 parent=text_format_node)
    add_child(text_format_node, record_delimiter_node)
    record_delimiter_node.content = line_terminator

    data_frame = pd.read_csv(full_path,
                             comment='#',
                             encoding='utf8',
                             sep=delimiter,
                             quotechar=quote_char)

    column_vartypes = []
    column_names = []
    column_categorical_codes = []

    if data_frame is not None:

        number_of_records = Node(names.NUMBEROFRECORDS, parent=datatable_node)
        add_child(datatable_node, number_of_records)
        row_count = data_frame.shape[0]
        record_count = row_count
        number_of_records.content = f'{record_count}'

        attribute_list_node = Node(names.ATTRIBUTELIST, parent=datatable_node)
        add_child(datatable_node, attribute_list_node)

        columns = data_frame.columns

        for col in columns:
            dtype = data_frame[col][1:].infer_objects().dtype

            var_type, codes = infer_col_type(data_frame, col)

            column_vartypes.append(var_type)
            column_names.append(col)
            column_categorical_codes.append(codes)

            attribute_node = new_child_node(names.ATTRIBUTE,
                                            attribute_list_node)
            attribute_name_node = new_child_node(names.ATTRIBUTENAME,
                                                 attribute_node)
            attribute_name_node.content = col

            att_label_node = Node(names.ATTRIBUTELABEL, parent=attribute_node)
            add_child(attribute_node, att_label_node)
            att_label_node.content = col

            att_def_node = Node(names.ATTRIBUTEDEFINITION,
                                parent=attribute_node)
            att_def_node = Node(names.ATTRIBUTEDEFINITION,
                                parent=attribute_node)
            add_child(attribute_node, att_def_node)

            ms_node = Node(names.MEASUREMENTSCALE, parent=attribute_node)
            add_child(attribute_node, ms_node)

            if var_type == VariableType.CATEGORICAL:
                # nominal / nonNumericDomain / enumeratedDomain / ...codes...
                nominal_node = new_child_node(names.NOMINAL, ms_node)
                non_numeric_domain_node = new_child_node(
                    names.NONNUMERICDOMAIN, nominal_node)
                enumerated_domain_node = new_child_node(
                    names.ENUMERATEDDOMAIN, non_numeric_domain_node)

                for code in codes:
                    code_definition_node = new_child_node(
                        names.CODEDEFINITION, enumerated_domain_node)
                    code_node = new_child_node(names.CODE,
                                               code_definition_node)
                    code_node.content = code
                    definition_node = new_child_node(names.DEFINITION,
                                                     code_definition_node)

            elif var_type == VariableType.NUMERICAL:
                # ratio / numericDomain
                ratio_node = new_child_node(names.RATIO, ms_node)
                numeric_domain_node = new_child_node(names.NUMERICDOMAIN,
                                                     ratio_node)
                number_type = 'real'
                if str(dtype).startswith(
                        'int'):  # FIXME - we can do better than this
                    number_type = 'integer'
                number_type_node = new_child_node(names.NUMBERTYPE,
                                                  numeric_domain_node)
                number_type_node.content = number_type
                numeric_domain_node = new_child_node(names.UNIT, ratio_node)

            elif var_type == VariableType.TEXT:
                # nominal / nonNumericDomain / textDomain
                nominal_node = new_child_node(names.NOMINAL, ms_node)
                non_numeric_domain_node = new_child_node(
                    names.NONNUMERICDOMAIN, nominal_node)
                text_domain_node = new_child_node(names.TEXTDOMAIN,
                                                  non_numeric_domain_node)
                definition_node = new_child_node(names.DEFINITION,
                                                 text_domain_node)

            elif var_type == VariableType.DATETIME:
                # dateTime / formatString
                datetime_node = Node(names.DATETIME, parent=ms_node)
                add_child(ms_node, datetime_node)

                format_string_node = Node(names.FORMATSTRING,
                                          parent=datetime_node)
                add_child(datetime_node, format_string_node)
                format_string_node.content = codes

    if Config.LOG_DEBUG:
        app = Flask(__name__)
        with app.app_context():
            current_app.logger.info(f'Leaving load_data_table')

    return datatable_node, column_vartypes, column_names, column_categorical_codes