def _process_nodes(nodes): for node in nodes: if node.nodeType == node.ELEMENT_NODE: # attributes and children are sets of Couplets. attributes = set(_process_attributes(node)) children = set(_process_nodes( node.childNodes)) # May include text (text_left). children = children.union(attributes) if len(children) == 1 and _contains_text_node(children): # We have a single child that is a text node. Remove one layer of couplets. yield _mo.Couplet( left=_util.get_left_cached(node.tagName), right=_misc.get_single_iter_elem(children).right, direct_load=True) else: yield _mo.Couplet(left=_util.get_left_cached(node.tagName), right=_mo.Set(children, direct_load=True), direct_load=True) elif node.nodeType == node.TEXT_NODE: text_node_text = node.data.strip() if len(text_node_text) > 0: yield _mo.Couplet(left=text_left, right=_mo.Atom( _get_atom_value(text_node_text), direct_load=True), direct_load=True) else: assert False # Node type not supported.
def _process_nodes(nodes): for node in nodes: if node.nodeType == node.ELEMENT_NODE: # attributes and children are sets of Couplets. attributes = set(_process_attributes(node)) children = set(_process_nodes(node.childNodes)) # May include text (text_left). children = children.union(attributes) if len(children) == 1 and _contains_text_node(children): # We have a single child that is a text node. Remove one layer of couplets. yield _mo.Couplet( left=_util.get_left_cached(node.tagName), right=_miscellaneous.get_single_iter_elem(children).right, direct_load=True) else: yield _mo.Couplet( left=_util.get_left_cached(node.tagName), right=_mo.Set(children, direct_load=True), direct_load=True) elif node.nodeType == node.TEXT_NODE: text_node_text = node.data.strip() if len(text_node_text) > 0: yield _mo.Couplet( left=text_left, right=_mo.Atom(_get_atom_value(text_node_text), direct_load=True), direct_load=True) else: assert False # Node type not supported.
def _import_csv(csv_file): for _ in range(0, skip_rows): next(csv_file) reader = _csv.DictReader(csv_file, fieldnames=columns) _index = 0 for row in reader: filtered_row = {key: val for key, val in _filter_row(row)} if import_csv.regular and len(row) != len(filtered_row): import_csv.regular = False for key, val in types.items(): if key in filtered_row: filtered_row[key] = val(filtered_row[key]) if index_column is not None: filtered_row[index_column] = _index _index += 1 yield _mo.Set( (_mo.Couplet(left=_util.get_left_cached(left), right=_mo.Atom(right), direct_load=True) for left, right in filtered_row.items()), direct_load=True)\ .cache_relation(_mo.CacheStatus.IS).cache_functional(_mo.CacheStatus.IS)
def _import_csv(csv_file): import csv for _ in range(0, skip_rows): next(csv_file) reader = csv.DictReader(csv_file) _index = 0 for row in reader: filtered_row = {key: val for key, val in _filter_row(row)} for key, val in types.items(): if key in filtered_row: filtered_row[key] = val(filtered_row[key]) if index_column is not None: filtered_row[index_column] = _index _index += 1 yield _mo.Set( (_mo.Couplet(left=_util.get_left_cached(left), right=_mo.Atom(right), direct_load=True) for left, right in filtered_row.items()), direct_load=True).cache_is_relation(True).cache_is_left_functional(True)
def _process_attributes(node): for (name, value) in node.attributes.items(): yield _mo.Couplet(left=_util.get_left_cached(name), right=_mo.Atom(_get_atom_value(value), True), direct_load=True)
def _process_attributes(node): for (name, value) in node.attributes.items(): yield _mo.Couplet( left=_util.get_left_cached(name), right=_mo.Atom(_get_atom_value(value), True), direct_load=True)