예제 #1
0
def read_pgf(pgf_file, graph=None, pickle_graph=False):
    """
    Import graph from Graph Python Format file

    PGF format is the modules own file format consisting of a serialized
    graph data, nodes and edges dictionaries. Import either as plain text
    serialized dictionary or pickled graph object.
    The format is feature rich with good performance but is not portable.

    :param pgf_file:      PGF data to parse
    :type pgf_file:       File, string, stream or URL
    :param graph:         Graph object to import to or Graph by default
    :type graph:          :graphit:Graph
    :param pickle_graph:  PGF format is a pickled graph
    :type pickle_graph:   :py:bool

    :return:              Graph instance
    :rtype:               :graphit:Graph
    """

    # Unpickle pickled PGF format
    if pickle_graph:
        pgf_file = open_anything(pgf_file, mode='rb')
        pgraph = pickle.load(pgf_file)

        # Transfer data from unpickled graph to graph if defined
        if graph:
            graph.origin.nodes, graph.origin.edges, graph.origin.adjacency, graph.origin.data = graph.storagedriver(
                pgraph.nodes, pgraph.edges, pgraph.data)
            return graph
        return pgraph

    pgf_file = open_anything(pgf_file)

    # Import graph from serialized Graph Python Format
    if graph is None:
        graph = Graph()
    elif not isinstance(graph, Graph):
        raise GraphitException('Unsupported graph type {0}'.format(
            type(graph)))

    pgf_eval = ast.literal_eval(pgf_file.read())
    if not isinstance(pgf_eval, dict):
        raise GraphitException('Invalid PGF file format')

    missing_data = [d for d in pgf_file if d not in ('data', 'nodes', 'edges')]
    if missing_data:
        raise GraphitException(
            'Invalid PGF file format, missing required attributes: {0}'.format(
                ','.join(missing_data)))

    graph.origin.nodes, graph.origin.edges, graph.origin.adjacency, graph.origin.data = graph.storagedriver(
        pgf_eval['nodes'], pgf_eval['edges'], pgf_eval['data'])

    return graph
예제 #2
0
def graph_join(graph1, graph2, links=None, run_node_new=True, run_edge_new=True):
    """
    Add graph2 as subgraph to graph1

    All nodes and edges of graph 2 are added to graph 1. Final links between
    nodes in graph 1 and newly added nodes of graph 2 are defined by the edges
    in the `links` list.

    :param graph1:       graph to add to
    :type graph1:        GraphAxis
    :param graph2:       graph added
    :type graph2:        GraphAxis
    :param links:        links between nodes in graph1 and graph2
    :type links:         :py:list
    :param run_edge_new: run the custom initiation method (new method) of
                         the new edge once.
    :type run_edge_new:  py:bool
    :param run_node_new: run the custom initiation method (new method) of
                         the new node once.
    :type run_node_new:  :py:bool

    :return:             node mapping
    :rtype:              :py:dict
    """

    # Validate if all arguments are Graphs
    check_graphbase_instance(graph1, graph2)

    # Check if graph 1 link nodes exist
    if links:
        for link in links:
            if not link[0] in graph1.nodes:
                raise GraphitException('Link node {0} not in graph1'.format(link[0]))
            if not link[1] in graph2.nodes:
                raise GraphitException('Link node {0} not in graph1'.format(link[1]))

    # Add all nodes and attributes of graph 2 to 1 and register node mapping
    mapping = {}
    for nid, attr in graph2.nodes.items():
        newnid = graph1.add_node(node=nid, run_node_new=run_node_new, **attr)
        mapping[nid] = newnid

    # Transfer edges and attributes from graph 2 to graph 1 and map node IDs
    for eid, attr in graph2.edges.items():
        if eid[0] in mapping and eid[1] in mapping:
            graph1.add_edge(mapping[eid[0]], mapping[eid[1]], run_edge_new=run_edge_new, directed=True, **attr)

    # Link graph 2 nodes to graph 1
    attach_nids = []
    if links:
        for link in links:
            graph1.add_edge(link[0], mapping[link[1]], run_edge_new=run_edge_new, directed=graph1.directed)
            attach_nids.append(mapping[link[1]])

    return mapping
예제 #3
0
def write_xml(graph):
    """
    Export a graph to an XML data format
    
    :param graph:
    :return:
    """

    # Graph should be of type GraphAxis with a root node nid defined
    if not isinstance(graph, GraphAxis):
        raise TypeError('Unsupported graph type {0}'.format(type(graph)))
    if graph.root is not None:
        raise GraphitException('No graph root node defines')

    # Set current NodeTools aside and register new one
    curr_nt = graph.node_tools
    graph.node_tools = XMLNodeTools

    # Define start node for recursive export
    if len(graph) > 1:
        root = graph.getnodes(resolve_root_node(graph))
    else:
        root = graph.getnodes(graph.nid)

    # Start recursive parsing
    tree = root.serialize()

    # Restore original NodeTools
    graph.node_tools = curr_nt

    return et.tostring(tree)
예제 #4
0
def write_xml(graph, node_tools=XMLNodeTools):
    """
    Export a graph to an XML data format

    Custom XML serializers may be introduced as a custom NodeTools
    class using the `node_tools` attribute. In addition, the graph
    ORM may be used to inject tailored `serialize` methods in specific
    nodes or edges.

    :param graph:       Graph to export
    :type graph:        :graphit:Graph
    :param node_tools:  NodeTools class with node serialize method
    :type node_tools:   :graphit:NodeTools

    :return:            Graph exported as a hierarchical XML node structure
    :rtype:             :py:str
    """

    # Graph should be of type GraphAxis with a root node nid defined
    if not isinstance(graph, GraphAxis):
        raise TypeError('Unsupported graph type {0}'.format(type(graph)))
    if graph.root is not None:
        raise GraphitException('No graph root node defines')

    # Set current NodeTools aside and register new one
    if not issubclass(node_tools, NodeTools):
        raise GraphitException('Node_tools ({0}) needs to inherit from the NodeTools class'.format(type(node_tools)))
    curr_nt = graph.node_tools
    graph.node_tools = node_tools

    # Define start node for recursive export
    if len(graph) > 1:
        root = graph.getnodes(resolve_root_node(graph))
    else:
        root = graph.getnodes(graph.nid)

    # Start recursive parsing. Build adjacency only once
    with root.adjacency as adj:
        tree = root.serialize()

    # Restore original NodeTools
    graph.node_tools = curr_nt

    # Return pretty printed XML using minidom.parseString
    return minidom.parseString(et.tostring(root)).toprettyxml(indent="   ")
예제 #5
0
def read_xml(xml_file, graph=None):
    """
    Parse hierarchical XML data structure to a graph

    Uses the Python build-in etree cElementTree parser to parse the XML
    document and convert the elements into nodes.
    The XML element tag becomes the node key, XML text becomes the node
    value and XML attributes are added to the node as additional attributes.

    :param xml_file:       XML data to parse
    :type xml_file:        File, string, stream or URL
    :param graph:          Graph object to import dictionary data in
    :type graph:           :graphit:Graph

    :return:               GraphAxis object
    :rtype:                :graphit:GraphAxis
    """

    xml_file = open_anything(xml_file)

    # User defined or default GraphAxis object
    if graph is None:
        graph = GraphAxis()
    elif not isinstance(graph, GraphAxis):
        raise GraphitException('Unsupported graph type {0}'.format(type(graph)))

    # Try parsing the string using default Python cElementTree parser
    try:
        tree = et.fromstring(xml_file.read())
    except et.ParseError as error:
        logging.error('Unable to parse XML file. cElementTree error: {0}'.format(error))
        return

    is_empty = graph.empty()

    # Add root element
    element_data = tree.attrib
    if tree.text and len(tree.text.strip()):
        element_data[graph.data.value_tag] = tree.text.strip()
    rid = graph.add_node(tree.tag, **element_data)

    if is_empty:
        graph.root = rid

    # Recursive add XML elements as nodes
    walk_element_tree(tree, graph, parent=graph.root)

    return graph
예제 #6
0
def read_gml(gml, graph=None):
    """
    Read graph in GML format

    :param gml:             GML graph data.
    :type gml:              File, string, stream or URL
    :param graph:           Graph object to import GML data in
    :type graph:            :graphit:Graph

    :return:                Graph object
    :rtype:                 :graphit:Graph
    """

    # User defined or default Graph object
    if graph is None:
        graph = Graph()
    elif not isinstance(graph, Graph):
        raise GraphitException('Unsupported graph type {0}'.format(type(graph)))

    # Parse GML into nested structure of Record class instances
    gml_stream = StreamReader(open_anything(gml))
    records = Record(gml_stream, name='root')

    gml_graphs = [g for g in records if g.name == 'graph']
    if len(gml_graphs) > 1:
        logging.warning("GML file contains {0} 'graph' objects. Only parse first".format(len(gml_graphs)))
    gml_graph_record = gml_graphs[0]

    # GML node and edge labels are unique, turn off auto_nid
    graph.data['auto_nid'] = False

    # Set graph meta-data and attributes
    graph_attr = gml_graph_record.to_dict({})
    graph.directed = True
    if 'directed' in graph_attr:
        directed = graph_attr.pop('directed')
        graph.directed = True if directed == 1 else False
        graph.data['directed'] = graph.directed

    graph.data.update(graph_attr)

    # Build graph from records
    build_nodes(graph, gml_graph_record)
    build_edges(graph, gml_graph_record)

    return graph
예제 #7
0
    def _resolve_nid(self):
        """
        Return the node ID (nid) of the current node

        When using single node graph objects this method will return the nid of
        the given node, in multi-node graphs it will return the first nid in
        the keys list and in empty graphs it will return None.
        """

        if self.root is None:
            raise GraphitException(
                'Graph node descendant requires a root node')

        try:
            return self.nid
        except AttributeError:
            return self.root
예제 #8
0
    def update(self, edges=None, nodes=None):

        if edges is not None:
            if nodes is not None:
                self.add_nodes(nodes)
                self.add_edges(edges)
            else:
                if hasattr(edges, 'nodes') and hasattr(edges, 'edges'):
                    for node, attr in edges.nodes.items():
                        self.add_node(node, **attr)
                    for edge, attr in edges.edges.items():
                        self.add_edge(*edge, **attr)
                else:
                    self.add_edges(edges)
        elif nodes is not None:
            self.add_nodes(nodes)
        else:
            raise GraphitException("update needs nodes or edges input")
예제 #9
0
def read_adl(adl_file, graph=None):
    """
    Construct a graph from a adjacency list (ADL)

    .. note:: the directionality of the graph is not defined explicitly
              in the adjacency list and thus depends on the graph.directional
              attribute that is False (undirectional) by default.

    :param adl_file:        ADL graph data.
    :type adl_file:         File, string, stream or URL
    :param graph:           Graph object to import ADL data in
    :type graph:            :graphit:Graph

    :return:                Graph object
    :rtype:                 :graphit:Graph
    """

    adl_file = open_anything(adl_file)

    # User defined or default Graph object
    if graph is None:
        graph = Graph()
    elif not isinstance(graph, Graph):
        raise GraphitException('Unsupported graph type {0}'.format(type(graph)))

    # ADL node labels are unique, turn off auto_nid
    graph.data['auto_nid'] = False

    for line in adl_file.readlines():

        # Ignore comments (# ..)
        line = line.split('#')[0].strip()
        if line:

            nodes = line.split()
            graph.add_nodes(nodes)
            if len(nodes) > 1:
                graph.add_edges([(nodes[0], n) for n in nodes[1:]])

    return graph
def write_flattened(graph, sep='.', default=None, allow_none=False, **kwargs):

    # No nodes, return empty dict
    if graph.empty():
        logging.info('Graph is empty: {0}'.format(repr(graph)))
        return {}

    # Graph should be of type GraphAxis with a root node nid defined
    if not isinstance(graph, GraphAxis):
        raise TypeError('Unsupported graph type {0}'.format(type(graph)))
    if graph.root is None:
        raise GraphitException('No graph root node defines')

    # Set current NodeTools aside and register new one
    curr_nt = graph.node_tools
    graph.origin.node_tools = NodeAxisTools

    # Create empty file buffer
    string_buffer = StringIO()

    value_tag = graph.data.value_tag
    for leaf in node_leaves(graph):
        node = graph.getnodes(leaf)
        value = node.get(value_tag, default=default)

        if value is None and not allow_none:
            continue

        path = '{0}{1}{2}\n'.format(node.path(sep=sep, **kwargs), sep, value)
        string_buffer.write(path)

    # Restore original NodeTools
    graph.origin.node_tools = curr_nt

    # Reset buffer cursor
    string_buffer.seek(0)
    return string_buffer.read()
예제 #11
0
def write_gexf(graph, node_tools=GEXFNodeTools, edge_tools=GEXFEdgeTools):
    """
    Export a graph to an GEXF data format

    Custom XML serializers may be introduced as a custom NodeTools
    class using the `node_tools` attribute. In addition, the graph
    ORM may be used to inject tailored `serialize` methods in specific
    nodes or edges.

    :param graph:       Graph to export
    :type graph:        :graphit:Graph
    :param node_tools:  NodeTools class with node serialize method
    :type node_tools:   :graphit:NodeTools
    :param edge_tools:  EdgeTools class with node serialize method
    :type edge_tools:   :graphit:EdgeTools

    :return:            Graph exported as a hierarchical XML node structure
    :rtype:             :py:str
    """

    # Set current NodeTools and EdgeTools aside and register new one
    if not issubclass(node_tools, NodeTools):
        raise GraphitException(
            'Node_tools ({0}) needs to inherit from the NodeTools class'.
            format(type(node_tools)))
    if not issubclass(edge_tools, EdgeTools):
        raise GraphitException(
            'Edge_tools ({0}) needs to inherit from the NodeTools class'.
            format(type(edge_tools)))

    curr_nt = graph.node_tools
    curr_et = graph.edge_tools
    graph.node_tools = node_tools
    graph.edge_tools = edge_tools

    # Create GEXF root element and add meta-data
    root = et.Element('gexf')
    root.attrib = {
        'xmlns':
        'http://www.gexf.net/1.2draft',
        'version':
        '1.2',
        'xmlns:xsi':
        graph.data.get('xmlns:xsi',
                       'http://www.w3/org/2001/XMLSchema-instance'),
        'xsi.schemaLocation':
        graph.data.get(
            'xsi.schemaLocation',
            'http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd'
        )
    }

    root_meta = et.SubElement(root, 'meta')
    root_meta.attrib = {'lastmodifieddate': str(datetime.date.today())}
    for key, value in graph.data.items():
        if key not in ('mode', 'lastmodifieddate'):
            meta = et.SubElement(root_meta, key)
            meta.text = str(value)

    root_graph = et.SubElement(root, 'graph')
    root_graph.attrib = {
        'mode': graph.data.get('mode', 'static'),
        'defaultedgetype': 'directed' if graph.directed else 'undirected'
    }

    # Add nodes
    if len(graph.nodes):
        root_nodes = et.SubElement(root_graph, 'nodes')
        for node in graph.iternodes():
            node.serialize(tree=root_nodes)

    # Add edges
    if len(graph.edges):
        root_edges = et.SubElement(root_graph, 'edges')
        for edge in graph.iteredges():
            edge.serialize(tree=root_edges)

    # Restore original NodeTools and EdgeTools
    graph.node_tools = curr_nt
    graph.edge_tools = curr_et

    # Return pretty printed XML using minidom.parseString
    return minidom.parseString(et.tostring(root)).toprettyxml(indent="   ")
예제 #12
0
def write_pydata(graph, default=None, allow_none=True, export_all=False, include_root=False):
    """
    Export a graph to a (nested) dictionary

    Convert graph representation of the dictionary tree into a dictionary
    using a nested representation of the dictionary hierarchy.

    Dictionary keys and values are obtained from the node attributes using
    `key_tag` and `value_tag`. The key_tag is set to graph key_tag by default.
    Export using these primary key_tag/value_tag pairs is de default
    behaviour. If a node contains more data these can be exported as part of
    a dictionary using the `export_all` argument.

    .. note:: `export_all` is important when dictionary data structures where
              imported using level=1 in `read_pydata`. In this case, all key
              value pairs at the same dictionary level are contained in the
              same node.

    Node values that are 'None' are exported by default unless `allow_none`
    equals False.
    If the key_tag exists but value_tag is absent use `default` as default.

    .. note:: if a graph is composed out of multiple, independent subgraphs
              only the subgraph for which the root node is defined will be
              exported. To export all, iterate over the subgraphs and define
              the appropriate root for each of them.

    :param graph:          Graph object to export
    :type graph:           :graphit:GraphAxis
    :param default:        value to use when node value was not found using
                           value_tag.
    :type default:         mixed
    :param allow_none:     allow None values in the output
    :type allow_none:      :py:bool
    :param export_all:     Export the full node storage dictionary.
    :type export_all:      :py:bool
    :param include_root:   Include the root node in the hierarchy
    :type include_root:    :py:bool

    :rtype:                :py:dict
    """

    # No nodes, return empty dict
    if graph.empty():
        logging.info('Graph is empty: {0}'.format(repr(graph)))
        return {}

    # Graph should be of type GraphAxis with a root node nid defined
    if not isinstance(graph, GraphAxis):
        raise TypeError('Unsupported graph type {0}'.format(type(graph)))
    if graph.root is None:
        raise GraphitException('No graph root node defines')

    # Build ORM with format specific conversion classes
    pydataorm = GraphORM(inherit=False)
    pydataorm.node_mapping.add(ParseDictionaryType, lambda x: x.get('format') == 'dict')
    pydataorm.node_mapping.add(ParseListType, lambda x: x.get('format') == 'list')
    pydataorm.node_mapping.add(ParseSetType, lambda x: x.get('format') == 'set')
    pydataorm.node_mapping.add(ParseTupleType, lambda x: x.get('format') == 'tuple')

    # Set current ORM aside and register new one.
    curr_orm = graph.orm
    graph.orm = pydataorm

    # Set current NodeTools aside and register new one
    curr_nt = graph.node_tools
    graph.node_tools = PyDataNodeTools

    # Define start node for recursive export
    if len(graph) > 1:
        root = graph.getnodes(resolve_root_node(graph))
    else:
        root = graph.getnodes(list(graph.nodes.keys()))

    # Start recursive parsing
    # If we export the full node dictionary, also export None key/value pairs
    root_key, data = root.serialize(allow_none=True if export_all else allow_none,
                                    export_all=export_all, default=default)

    # Include root_key or not
    if include_root and root_key:
        data = {root_key: data}

    # Restore original ORM and NodeTools
    graph.node_tools = curr_nt
    graph.orm = curr_orm

    return data
예제 #13
0
def read_pydata(data, graph=None, parser_classes=None, level=0):
    """
    Parse (hierarchical) python data structures to a graph

    Many data formats are first parsed to a python structure before they are
    converted to a graph using the `read_pydata` function.
    The function supports any object that is an instance of, or behaves as, a
    Python dictionary, list, tuple or set and converts these (nested)
    structures to graph nodes and edges for connectivity. Data is stored in
    nodes using the node and edge 'key_tag' and 'value_tag' attributes in the
    Graph class.

    Data type and format information are also stored as part of the nodes to
    enable reconstruction of the Python data structure on export using the
    `write_pydata` function. Changing type and format on a node or edge
    allows for customized data export.

    Parsing of data structures to nodes and edges is handled by parser classes
    that need to define the methods `deserialize` for reading and `serialize`
    for writing. In `write_pydata` these classes are registered with the ORM
    to fully customize the use of the `serialize` method. In the `read_pydata`
    function the ORM cannot be used because the nodes/edges themselves do not
    yet exist. Instead they are provided as a dictionary through the
    `parser_classes` argument. The dictionary defines the string representation
    of the Python data type as key and parser class as value.

    Parser customization is important as Python data structures can be
    represented as a graph structure in different ways. This is certainly true
    for dictionaries where key/value pairs can be part of the node attributes,
    as separate nodes or as a combination of the two.
    `read_pydata` has quick support for two scenario's using the `level`
    argument:

        * level 0: every dictionary key/value pair is represented as a node
          regardless of its position in the nested data structure
        * level 1: all keys at the same level in the hierarchy that have a
          primitive type value are stored as part of the node attributes.

    If the `graph` is empty, the first node added to the graph is assigned
    as root node. If the `graph` is not empty, new nodes and edges will be
    added to it as subgraph. Edge connections between the two will have to be
    made afterwards.

    :param data:            Python (hierarchical) data structure
    :param graph:           GraphAxis object to import dictionary data in
    :type graph:            :graphit:GraphAxis
    :param parser_classes:  parser class definition for different Python data
                            types. Updates default classes for level 0 or 1
    :type parser_classes:   :py:dict
    :param level:           dictionary parsing mode
    :type level:            :py:int

    :return:                GraphAxis object
    :rtype:                 :graphit:GraphAxis
    """

    # User defined or default GraphAxis object
    if graph is None:
        graph = GraphAxis()
    elif not isinstance(graph, GraphAxis):
        raise GraphitException('Unsupported graph type {0}'.format(type(graph)))

    # Determine parser classes to use based on level
    assert level in (0, 1), GraphitException('Unsupported level {0}. Required to be 0 or 1'.format(level))
    if level == 0:
        parser_class_dict = copy.copy(ORMDEFS_LEVEL0)
    else:
        parser_class_dict = copy.copy(ORMDEFS_LEVEL1)

    # Update parser_class_dict with custom classes if any
    if isinstance(parser_classes, dict):
        parser_class_dict.update(parser_classes)

    # Define root
    if graph.empty():
        graph.root = graph.data.nodeid

    # Start recursive parsing by calling the `deserialize` method on the parser object
    parser = parser_class_dict.get(return_instance_type(data), parser_class_dict['fallback'])
    p = parser()
    p.deserialize(data, graph, parser_class_dict)

    return graph
예제 #14
0
def read_jgf(jgf_format, graph=None):
    """
    Read JSON graph format (.jgf)

    This is a propitiatory format in which the graph meta-data, the nodes,
    edges and their data dictionaries are stored in JSON format.

    Format description. Primary key/value pairs:
    * graph: Graph class meta-data. Serializes all class attributes of type
             int, float, bool, long, str or unicode.
    * nodes: Graph node identifiers (keys) and attributes (values)
    * edges: Graph enumerated edge identifiers
    * edge_attr: Graph edge attributes

    :param jgf_format:  JSON encoded graph data to parse
    :type jgf_format:   :py:str
    :param graph:       Graph object to import TGF data in
    :type graph:        :graphit:Graph

    :return:            Graph object
    :rtype:             Graph or GraphAxis object
    """

    # Try parsing the string using default Python json parser
    if isinstance(jgf_format, dict):
        parsed = jgf_format
    else:
        jgf_format = open_anything(jgf_format)
        try:
            parsed = json.load(jgf_format)
        except IOError:
            logger.error('Unable to decode JSON string')
            return

    # Check graphit version and format validity
    if not check_graphit_version(parsed['data'].get('graphit_version')):
        return
    keywords = ['graph', 'data', 'nodes', 'edges', 'edge_attr']
    if not set(keywords).issubset(set(parsed.keys())):
        logger.error('JSON format does not contain required graph data')
        return

    # User defined or default Graph object
    if graph is None:
        if parsed['graph'].get('root') is not None:
            graph = GraphAxis(data=parsed['data'])
        else:
            graph = Graph(data=parsed['data'])
    elif not isinstance(graph, (Graph, GraphAxis)):
        raise GraphitException('Unsupported graph type {0}'.format(type(graph)))

    # Init graph meta-data attributes
    for key, value in parsed['graph'].items():
        setattr(graph, key, value)

    # Init graph nodes
    for node_key, node_value in parsed['nodes'].items():

        # JSON objects don't accept integers as dictionary keys
        # If graph.auto_nid equals True, course node_key to integer
        if graph.data.auto_nid:
            node_key = int(node_key)

        graph.nodes[node_key] = node_value

    # Init graph edges
    for edge_key, edge_value in parsed['edges'].items():
        edge_value = tuple(edge_value)
        graph.edges[edge_value] = parsed['edge_attr'].get(edge_key, {})

    # Set auto nid
    graph._set_auto_nid()

    return graph
예제 #15
0
def read_tgf(tgf, graph=None):
    """
    Read graph in Trivial Graph Format

    TGF format dictates that nodes to be listed in the file first with each
    node on a new line. A '#' character signals the end of the node list and
    the start of the edge list.

    Node and edge ID's can be integers, float or strings. They are parsed
    automatically to their most likely format.
    Simple node and edge labels are supported in TGF as all characters that
    follow the node or edge ID's. They are parsed and stored in the Graph
    node and edge data stores using the graphs default or custom 'key_tag'.

    TGF data is imported into a default Graph object if no custom Graph
    instance is provided. The graph behaviour and the data import process is
    influenced and can be controlled using a (custom) Graph class.

    .. note:: TGF format always defines edges in a directed fashion.
              This is enforced even for custom graphs.

    :param tgf:             TGF graph data.
    :type tgf:              File, string, stream or URL
    :param graph:           Graph object to import TGF data in
    :type graph:            :graphit:Graph

    :return:                Graph object
    :rtype:                 :graphit:Graph
    """

    tgf_file = open_anything(tgf)

    if graph is None:
        graph = Graph()
    elif not isinstance(graph, Graph):
        raise GraphitException('Unsupported graph type {0}'.format(
            type(graph)))

    # TGF defines edges in a directed fashion. Enforce but restore later
    default_directionality = graph.directed
    graph.directed = True

    # TGF node and edge labels are unique, turn off auto_nid
    graph.data['auto_nid'] = False

    # Start parsing. First extract nodes
    nodes = True
    node_dict = {}
    for line in tgf_file.readlines():

        line = line.strip()
        if len(line):

            # Reading '#' character means switching from node
            # definition to edges
            if line.startswith('#'):
                nodes = False
                continue

            # Coarse string to types
            line = [coarse_type(n) for n in line.split()]

            # Parse nodes
            if nodes:

                attr = {}
                # Has node data
                if len(line) > 1:
                    attr = {graph.data.key_tag: ' '.join(line[1:])}
                nid = graph.add_node(line[0], **attr)
                node_dict[line[0]] = nid

            # Parse edges
            else:
                e1 = node_dict[line[0]]
                e2 = node_dict[line[1]]

                attr = {}
                # Has edge data
                if len(line) > 2:
                    attr = {graph.data.key_tag: ' '.join(line[2:])}
                graph.add_edge(e1, e2, **attr)

    tgf_file.close()

    # Restore directionality
    graph.directed = default_directionality

    return graph
예제 #16
0
def write_gml(graph, node_tools=None, edge_tools=None):
    """
    Export a graphit graph to GML format

    Export graphit Graph data, nodes and edges in Graph Modelling Language
    (GML) format. The function replaces the graph NodeTools and EdgeTools
    with a custom version exposing a `serialize` method responsible for
    serializing the node/edge attributes in a GML format. The NodeTools
    class is also used to export Graph.data attributes.

    Custom serializers may be introduced as custom NodeTools or EdgeTools
    classes using the `node_tools` and/or `edge_tools` attributes.
    In addition, the graph ORM may be used to inject tailored `serialize`
    methods in specific nodes or edges.

    :param graph:       Graph object to export
    :type graph:        :graphit:Graph
    :param node_tools:  NodeTools class with node serialize method
    :type node_tools:   :graphit:NodeTools
    :param edge_tools:  EdgeTools class with edge serialize method
    :type edge_tools:   :graphit:EdgeTools

    :return:            GML graph representation
    :rtype:             :py:str
    """

    # Set current node and edge tools aside and register GML ones for export
    curr_nt = graph.node_tools
    curr_et = graph.edge_tools

    if node_tools and not issubclass(node_tools, NodeTools):
        raise GraphitException('Node_tools ({0}) needs to inherit from the NodeTools class'.format(type(node_tools)))
    graph.node_tools = node_tools or type('GMLNodeTools', (GMLTools, NodeTools), {})

    if edge_tools and not issubclass(edge_tools, EdgeTools):
        raise GraphitException('Edge_tools ({0}) needs to inherit from the EdgeTools class'.format(type(edge_tools)))
    graph.edge_tools = edge_tools or type('GMLEdgeTools', (GMLTools, EdgeTools), {})

    # Create empty file buffer
    string_buffer = StringIO()

    # Serialize main graph instance
    gs = graph.node_tools()
    string_buffer.write('graph [\n')
    gs.serialize(graph.data.to_dict(), string_buffer, indent=2)

    # Serialize nodes
    for node in graph.iternodes(sort_key=int):
        node.serialize(node.nodes[node.nid], string_buffer, indent=2, class_name='node')

    # Serialize edges
    for edge in graph.iteredges():
        edge.serialize(edge.edges[edge.nid], string_buffer, indent=2, class_name='edge')

    string_buffer.write(']\n')

    # Restore original node and edge tools
    graph.node_tools = curr_nt
    graph.edge_tools = curr_et

    logger.info('Graph {0} exported in GML format'.format(repr(graph)))

    # Reset buffer cursor
    string_buffer.seek(0)
    return string_buffer.read()
예제 #17
0
def read_p2g(p2g_file, graph=None):
    """
    Read graph in P2G format

    :param p2g_file:      P2G data to parse
    :type p2g_file:       File, string, stream or URL
    :param graph:         Graph object to import to or Graph by default
    :type graph:          :graphit:Graph

    :return:              Graph instance
    :rtype:               :graphit:Graph
    """

    p2g_file = open_anything(p2g_file)

    if graph is None:
        graph = Graph()
    elif not isinstance(graph, Graph):
        raise GraphitException('Unsupported graph type {0}'.format(
            type(graph)))

    # P2G graphs are directed
    graph.directed = True

    graph_name = None
    graph_layout = None
    curr_node = None
    nodes = {}
    for i, line in enumerate(p2g_file.readlines()):

        line = line.strip()
        if line:

            # Parse p2g graph name (first line)
            sline = line.split()
            if not graph_name:
                graph_name = line
                continue

            # Parse number of nodes and edges (second line)
            elif not graph_layout:
                try:
                    graph_layout = map(int, sline)
                except ValueError:
                    raise GraphitException(
                        'P2G import error: line {0} - {1}'.format(i, line))
                continue

            # Parse nodes and edges
            if len(sline) == 1:
                nodes[line] = []
                curr_node = line
            elif len(sline) == 2:
                try:
                    nodes[curr_node] = map(int, sline)
                except ValueError:
                    raise GraphitException(
                        'P2G import error: malformed edge on line {0} - {1}'.
                        format(i, line))
            else:
                raise GraphitException(
                    'P2G import error: line {0} - {1}'.format(i, line))

    graph.data['name'] = graph_name

    # Add nodes
    mapped_nodes = graph.add_nodes(nodes.keys())

    # Add edges
    for i, nid in enumerate(nodes.keys()):
        for e in nodes[nid]:
            if e < len(mapped_nodes):
                graph.add_edge(mapped_nodes[i], mapped_nodes[e])
            else:
                raise GraphitException(
                    'P2G import error: edge node index {0} not in graph'.
                    format(e))

    if len(nodes) != graph_layout[0] or (len(graph.edges)) != graph_layout[1]:
        logging.warning(
            'P2G import warning: declared number of nodes and edges {0}-{1} does not match {2}-{3}'
            .format(graph_layout[0], graph_layout[1], len(nodes),
                    len(graph.edges)))

    return graph
예제 #18
0
def read_json_schema(schema, graph=None, exclude_args=None, resolve_ref=True):
    """
    Import hierarchical data structures defined in a JSON schema format

    :param schema:            JSON Schema data format to import
    :type schema:             dict, file, string, stream or URL
    :param graph:             graph object to import TGF data in
    :type graph:              :graphit:Graph
    :param exclude_args:      JSON schema arguments to exclude from import
    :type exclude_args:       :py:list
    :param resolve_ref:       Parse JSON schema 'definitions'
    :type resolve_ref:        :py:bool

    :return:                  Graph object
    :rtype:                   :graphit:Graph
    """

    json_schema = schema
    if not isinstance(schema, dict):

        # Try parsing the string using default Python json parser
        json_schema = open_anything(schema)
        try:
            json_schema = json.load(json_schema)
        except (IOError, ValueError) as error:
            logger.error('Unable to decode JSON string: {0}'.format(error))
            return

    # User defined or default Graph object
    if graph is None:
        graph = GraphAxis()
    elif not isinstance(graph, GraphAxis):
        raise GraphitException('Unsupported graph type {0}'.format(
            type(graph)))

    if graph.empty():
        rid = graph.add_node('root')
        graph.root = rid

    # Build JSON schema parser ORM with format specific conversion classes
    graph.node_tools = JSONSchemaValidatorDraft07
    graph.orm = JSONSchemaORMDraft07

    # What data-blocks to parse, properties by default, definitions if required
    datablock = ['properties']
    if resolve_ref:
        datablock.append('definitions')

    if exclude_args is None:
        exclude_args = []

    def walk_schema(schema_block, parent=None):

        # Get all JSON schema definitions for this data instance
        attributes = dict([(k, v) for k, v in schema_block.items()
                           if not isinstance(v, dict) and k not in exclude_args
                           ])
        node = graph.getnodes(parent)
        node.update(attributes)

        # Get 'required' attribute
        required = schema_block.get('required', [])
        if not isinstance(required, list):
            required = []

        # Store default data or None
        if attributes.get('default') is not None:
            node.set(graph.data.value_tag, attributes.get('default'))

        # For all child elements in datablock, make new node
        # and parse using recursive calls to parse_schema
        for block in schema_block.keys():
            if block in datablock:
                for child, attr in schema_block[block].items():
                    nid = graph.add_node(child)

                    # Register block_name in child attributes
                    attr['schema_label'] = block

                    # Register 'required' elements
                    if child in required:
                        attr['required'] = True

                    graph.add_edge(parent, nid)
                    walk_schema(attr, parent=nid)

    walk_schema(json_schema, graph.root)

    # Parse schema meta data
    document_path = ''
    if isinstance(schema, PY_STRING):
        document_path = os.path.abspath(schema)

    root = graph.get_root()
    root.set('document_path', document_path)
    parse_schema_meta_data(root)

    # Resolve JSON Schema $ref
    if resolve_ref:
        resolve_json_ref(graph, exclude_args=exclude_args)

    return graph
예제 #19
0
def read_lgr(lgr, graph=None, edge_label='label'):
    """
    Read graph in LEDA format

    Nodes are added to the graph using a unique ID or with the node data
    as label depending if the graph.data.auto_nid is True or False.
    Edge data is added to the edge attributes using `edge_label` as key.
    The data types for both nodes and edges is set according to the
    specifications in the LEDA header as either string, int, float or bool.

    :param lgr:             LEDA graph data.
    :type lgr:              File, string, stream or URL
    :param graph:           Graph object to import LEDA data in
    :type graph:            :graphit:Graph
    :param edge_label:      edge data label name
    :type edge_label:       :py:str

    :return:                Graph object
    :rtype:                 :graphit:Graph
    :raises:                TypeError if node/edge type conversion failed
                            GraphitException in case of malformed LEDA file
    """

    # User defined or default Graph object
    if graph is None:
        graph = Graph()
    elif not isinstance(graph, Graph):
        raise GraphitException('Unsupported graph type {0}'.format(
            type(graph)))

    # Parse LEDA file
    lgr_file = open_anything(lgr)
    header = []
    nodes = []
    edges = []
    container = header
    for line in lgr_file.readlines():
        line = line.strip()

        if line:
            if line.startswith('#header'):
                container = header
                continue
            if line.startswith('#nodes'):
                container = nodes
                continue
            if line.startswith('#edges'):
                container = edges
                continue

            container.append(line)

    # Parse LEDA header
    if not header[0] == 'LEDA.GRAPH':
        raise GraphitException('File is not a valid LEDA graph format')

    # Node and edge data types and graph directionality
    node_type = data_types.get(header[1])
    edge_type = data_types.get(header[2])
    graph.directed = int(header[3]) == -1

    # Parse LEDA nodes
    node_mapping = {}
    for i, node in enumerate(nodes[1:], start=1):
        data = node.strip('|{}|') or None
        if node_type and data:
            data = node_type(data)
        nid = graph.add_node(data)
        node_mapping[i] = nid

    # Parse LEDA edges
    for edge in edges[1:]:
        try:
            source, target, reversal, label = edge.split()
        except ValueError:
            raise GraphitException(
                'Too few fields in LEDA edge {0}'.format(edge))

        attr = {edge_label: label.strip('|{}|') or None}
        if edge_type and attr[edge_label]:
            attr[edge_label] = edge_type(attr[edge_label])
        graph.add_edge(node_mapping[int(source)], node_mapping[int(target)],
                       **attr)

    return graph
예제 #20
0
def write_web(graph, orm_data_tag='haddock_type', indent=2, root_nid=None):
    """
    Export a graph in Spyder .web format

    Empty data blocks or Python None values are not exported.

    .. note::
    Web graph export uses the Graph iternodes and iteredges methods to retrieve
    nodes and edges and 'get' the data labels. The behaviour of this process is
    determined by the single node/edge mixin classes and the ORM mapper.

    :param graph:          Graph object to export
    :type graph:           :graphit:Graph
    :param orm_data_tag:   data key to use for .web data identifier
    :type orm_data_tag:    :py:str
    :param indent:         .web file white space indentation level
    :type indent:          :py:int
    :param root_nid:       Root node ID in graph hierarchy

    :return:               Spyder .web graph representation
    :rtype:                :py:str
    """

    # Build ORM with format specific conversion classes
    weborm = GraphORM()
    weborm.node_mapping.add(
        RestraintsInterface,
        lambda x: x.get(graph.data.key_tag) == 'activereslist')
    weborm.node_mapping.add(
        RestraintsInterface,
        lambda x: x.get(graph.data.key_tag) == 'passivereslist')

    # Resolve the root node (if any) for hierarchical data structures
    if root_nid and root_nid not in graph.nodes:
        raise GraphitException(
            'Root node ID {0} not in graph'.format(root_nid))
    else:
        root_nid = resolve_root_node(graph)
        if root_nid is None:
            raise GraphitException('Unable to resolve root node ID')

    # Set current NodeTools aside and register new one
    curr_nt = graph.node_tools
    graph.node_tools = WebNodeTools

    # Set current ORM aside and register new one.
    curr_orm = graph.orm
    graph.orm = weborm

    # Create empty file buffer
    string_buffer = StringIO()

    # Traverse node hierarchy
    def _walk_dict(node, indent_level):

        # First, collect all leaf nodes and write. Sort according to 'key'
        for leaf in sorted(
            [n for n in node.children(include_self=True) if n.isleaf],
                key=lambda obj: obj.key):

            # Do not export nodes that have no data or None but do export
            # empty data blocks (has orm_data_tag)
            if leaf.get(graph.data.value_tag, None) is None:
                if leaf.get(orm_data_tag):
                    string_buffer.write('{0}{1} = {2} (\n'.format(
                        ' ' * indent_level, leaf.get(graph.data.key_tag),
                        leaf.get(orm_data_tag)))
                    string_buffer.write('{0}),\n'.format(' ' * indent_level))
                continue

            # Format 'Array' types when they are list style leaf nodes
            if leaf.get('is_array', False) or leaf.get('type') == 'array':
                string_buffer.write('{0}{1} = {2} (\n'.format(
                    ' ' * indent_level, leaf.get(graph.data.key_tag),
                    leaf.get(orm_data_tag)))

                array_indent = indent_level + indent
                for array_type in leaf.get(graph.data.value_tag, default=[]):
                    string_buffer.write('{0}{1},\n'.format(
                        ' ' * array_indent, array_type))

                string_buffer.write('{0}),\n'.format(' ' * indent_level))

            # Format key, value pairs
            else:
                string_buffer.write('{0}{1} = {2},\n'.format(
                    ' ' * indent_level, leaf.get(graph.data.key_tag),
                    leaf.get(graph.data.value_tag, default='')))

        # Second, process child non-leaf nodes
        for child in [n for n in node.children() if not n.isleaf]:

            # Write block header
            key = ''
            if not child.get('is_array',
                             False) or child.get('type') == 'array':
                key = '{0} = '.format(child.get(graph.data.key_tag))
            string_buffer.write('{0}{1}{2} (\n'.format(
                ' ' * indent_level, key, child.get(orm_data_tag)))

            # Indent new data block one level down and walk children
            indent_level += indent
            _walk_dict(child, indent_level)

            # Close data block and indent one level up
            indent_level -= indent
            string_buffer.write('{0}),\n'.format(' ' * indent_level))

    # Build adjacency only once
    with graph.adjacency as adj:
        rootnode = graph.getnodes(root_nid)

    if rootnode.isleaf:
        _walk_dict(rootnode, 0)
    else:
        string_buffer.write('{0} (\n'.format(rootnode.get(orm_data_tag)))
        _walk_dict(rootnode, indent)
        string_buffer.write(')\n')

    # Restore original ORM and NodeTools
    graph.node_tools = curr_nt
    graph.orm = curr_orm

    logger.info('Graph {0} exported in WEB format'.format(repr(graph)))

    # Reset buffer cursor
    string_buffer.seek(0)
    return string_buffer.read()
예제 #21
0
def read_web(web,
             graph=None,
             orm_data_tag='haddock_type',
             auto_parse_format=True):
    """
    Import hierarchical data structures defined in the Spider .web format

    The data block type identifiers used in the .web format are stored in
    the nodes using the `orm_data_tag` attribute. These can be used by the
    Graph ORM mapper for custom data exchange in the graph.

    :param web:               Spider .web data format to import
    :type web:                file, string, stream or URL
    :param graph:             graph object to import TGF data in
    :type graph:              :graphit:Graph
    :param orm_data_tag:      data key to use for .web data identifier
    :type orm_data_tag:       :py:str
    :param auto_parse_format: automatically detect basic format types using JSON decoding
    :type auto_parse_format:  :py:bool

    :return:                  Graph object
    :rtype:                   :graphit:Graph
    """

    web_file = open_anything(web)
    if graph is None:
        graph = GraphAxis()
    elif not isinstance(graph, GraphAxis):
        raise GraphitException('Unsupported graph type {0}'.format(
            type(graph)))

    # Build .web parser ORM with format specific conversion classes
    weborm = GraphORM()
    weborm.node_mapping.add(
        RestraintsInterface,
        lambda x: x.get(graph.data.key_tag) == 'activereslist')
    weborm.node_mapping.add(
        RestraintsInterface,
        lambda x: x.get(graph.data.key_tag) == 'passivereslist')

    # Set current ORM aside and register parser ORM.
    curr_orm = graph.orm
    graph.orm = weborm

    curr_obj_nid = None
    object_open_tags = 0
    object_close_tags = 0
    array_key_counter = 1
    array_store = []
    for i, line in enumerate(web_file.readlines()):
        line = line.strip()
        if len(line):

            # Detect start of new object definition
            if line.endswith('('):

                # Process data
                meta_data = [n.strip() for n in line.strip('(').split('=', 1)]
                ddict = {orm_data_tag: meta_data[-1], 'is_array': False}
                if len(meta_data) > 1:
                    node_key = meta_data[0]
                else:
                    node_key = 'item{0}'.format(array_key_counter)
                    ddict['is_array'] = True
                    array_key_counter += 1

                # Clear the array store
                array_store = []

                # First object defines graph root
                if graph.empty():
                    curr_obj_nid = graph.add_node(node_key, **ddict)
                    graph.root = curr_obj_nid

                # Add new object as child of current object
                else:
                    child_obj_nid = graph.add_node(node_key, **ddict)
                    graph.add_edge(curr_obj_nid, child_obj_nid)
                    curr_obj_nid = child_obj_nid

                object_open_tags += 1

            # Detect end of object definition
            elif line.startswith(')'):

                # If there is data in the array store, add it to node
                if len(array_store):
                    array_node = graph.getnodes(curr_obj_nid)
                    array_node.is_array = True
                    array_node.set(graph.data.value_tag, array_store)

                # Reset array key counter
                array_key_counter = 1

                # Move one level up the object three
                curr_obj_nid = node_parent(graph, curr_obj_nid,
                                           graph.root) or graph.root
                object_close_tags += 1

            # Parse object parameters
            else:

                # Parse key,value pairs and add as leaf node
                params = [n.strip() for n in line.rstrip(',').split('=', 1)]

                if '=' in line and len(params) == 2:
                    leaf_nid = graph.add_node(params[0])
                    graph.add_edge(curr_obj_nid, leaf_nid)

                    value = params[1]
                    if auto_parse_format:
                        value = json_decode_params(params[1])

                    leaf_node = graph.getnodes(leaf_nid)
                    leaf_node.set(graph.data.value_tag, value)

                # Parse single values as array data
                elif len(params) == 1:

                    value = params[0]
                    if auto_parse_format:
                        value = json_decode_params(params[0])

                    # Store array items as nodes
                    array_store.append(value)

                else:
                    logger.warning(
                        'Unknown .web data formatting on line: {0}, {1}'.
                        format(i, line))

    web_file.close()

    # Object blocks opening '(' and closing ')' tag count should be balanced
    if object_open_tags != object_close_tags:
        raise AssertionError(
            'Unbalanced object block, something is wrong with the file format')

    # Restore original ORM
    graph.orm = curr_orm

    # Root is of type 'array', rename key from 'item1' to 'project'
    root = graph.getnodes(graph.root)
    root.key = 'project'

    return graph
예제 #22
0
def read_dot(dot, graph=None):
    """
    Read graph in DOT format

    :param dot:             DOT graph data.
    :type dot:              File, string, stream or URL
    :param graph:           Graph object to import DOT data in
    :type graph:            :graphit:Graph

    :return:                Graph object
    :rtype:                 :graphit:Graph
    """

    dot_stream = StreamReader(open_anything(dot))

    # User defined or default Graph object
    if graph is None:
        graph = Graph()
    elif not isinstance(graph, Graph):
        raise GraphitException('Unsupported graph type {0}'.format(type(graph)))

    block = None
    node_attr = {}
    edges = []
    nodes = []

    parse = True
    init_graph = False
    while parse or dot_stream.has_more:

        # Parse start graph block
        if not init_graph:
            graph = parse_graph_type(dot_stream, graph)
            if graph is not None:
                init_graph = True

        # Parse up to DOT reserved chars
        line, char = dot_stream.read_upto_char(('\n', ';', '[', ']', '}'))
        line = line.strip() if line else ''

        if line:

            # Comment line
            if line[0] in ('/', '#'):
                logging.info('Skip DOT comment line: "{0}"'.format(line))

                # Read till end of line
                if char != '\n':
                    dot_stream.read_upto_char('\n')

            # Grouping not supported
            elif line[0] == '{':
                nxt_line, nxt_char = dot_stream.read_upto_char('}')
                logging.info('Skip group: {0}{1}{2}{3}'.format(line, char, nxt_line, nxt_char))

            # Subgraphs
            elif 'subgraph' in line:
                block = 'subgraph'
                node_attr[block] = shlex.split(line)[1]

            # Node attribute block
            elif 'node' in line:
                block = 'node'
                node_attr = {}

            # Parse edges
            elif '--' in line or '->' in line:

                attr = {}
                if char == '[':
                    attr = parse_attributes(dot_stream.read_upto_char(']')[0])
                edges.extend(parse_edge(line, graph, attr=attr))

            else:
                if '=' in line:
                    if block in ('subgraph', 'node'):
                        node_attr.update(parse_attributes(line))
                    else:
                        graph.data.update(parse_attributes(line))
                else:
                    nodes.extend(parse_nodes(line, graph))

        elif (char == '}' and block == 'subgraph') or block == 'node':
            logging.info('Stop parsing {0} group at position: {1}'.format(block, dot_stream.block_pos[1]))

            nodes.extend(list(set(sum(edges, ()))))
            for node in nodes:
                graph.nodes[node].update(node_attr)

            node_attr = {}
            edges = []
            nodes = []
            block = None

        else:
            parse = False

    return graph
예제 #23
0
def write_pydata(graph, nested=True, sep='.', default=None, allow_none=True, export_all=False, include_root=False):
    """
    Export a graph to a (nested) dictionary
    
    Convert graph representation of the dictionary tree into a dictionary
    using a nested or flattened representation of the dictionary hierarchy.
    
    In a flattened representation, the keys are concatenated using the `sep`
    separator.
    Dictionary keys and values are obtained from the node attributes using
    `key_tag` and `value_tag`. The key_tag is set to
    graph key_tag by default.
    
    Exporting only primary key_tag/value_tag pairs is default
    behaviour. Use the 'export_all' argument to export the full node
    dictionary.
    
    TODO: include ability to export multiple isolated subgraphs
    
    :param graph:          Graph object to export
    :type graph:           :graphit:GraphAxis
    :param nested:         return a nested or flattened dictionary
    :type nested:          :py:bool
    :param sep:            key separator used in flattening the dictionary
    :type sep:             :py:str
    :param default:        value to use when node value was not found using
                           value_tag.
    :type default:         mixed
    :param allow_none:     allow None values in the output
    :type allow_none:      :py:bool
    :param export_all:     Export the full node storage dictionary.
    :type export_all:      :py:bool
    
    :rtype:                :py:dict
    """
    
    # No nodes, return empty dict
    if graph.empty():
        logging.info('Graph is empty: {0}'.format(repr(graph)))
        return {}
    
    # Graph should be of type GraphAxis with a root node nid defined
    if not isinstance(graph, GraphAxis):
        raise TypeError('Unsupported graph type {0}'.format(type(graph)))
    if graph.root is None:
        raise GraphitException('No graph root node defines')
    
    # Build ORM with format specific conversion classes
    pydataorm = GraphORM(inherit=False)
    pydataorm.node_mapping.add(ParseDictionaryType, lambda x: x.get('format') == 'dict')
    pydataorm.node_mapping.add(ParseListType, lambda x: x.get('format') == 'list')
    pydataorm.node_mapping.add(ParseSetType, lambda x: x.get('format') == 'set')
    pydataorm.node_mapping.add(ParseTupleType, lambda x: x.get('format') == 'tuple')
    
    # Set current ORM aside and register new one.
    curr_orm = graph.orm
    graph.orm = pydataorm
    
    # Set current NodeTools aside and register new one
    curr_nt = graph.node_tools
    graph.node_tools = PyDataNodeTools
    
    # Define start node for recursive export
    if len(graph) > 1:
        root = graph.getnodes(resolve_root_node(graph))
    else:
        root = graph.getnodes(list(graph.nodes.keys()))
    
    # If we export the full node dictionary, also export None key/value pairs
    if export_all:
        allow_none = True
    
    # Start recursive parsing
    root_key, data = root.serialize(allow_none=allow_none, export_all=export_all, default=default)
    
    # Include root_key or not
    if include_root and root_key:
        data = {root_key: data}
    
    # Flatten the dictionary if needed
    if not nested:
        data = flatten_nested_dict(data, sep=sep)
    
    # Restore original ORM and NodeTools
    graph.node_tools = curr_nt
    graph.orm = curr_orm
    
    return data
예제 #24
0
def read_lgf(lgf, graph=None):
    """
    Read graph in LEMON Graph Format (LGF)

    :param lgf:             LGF graph data.
    :type lgf:              File, string, stream or URL
    :param graph:           Graph object to import LGF data in
    :type graph:            :graphit:Graph

    :return:                Graph object
    :rtype:                 :graphit:Graph
    """

    lgf_file = open_anything(lgf)

    # User defined or default Graph object
    if graph is None:
        graph = Graph()
    elif not isinstance(graph, Graph):
        raise GraphitException('Unsupported graph type {0}'.format(
            type(graph)))

    # LGF node and edge labels are unique, turn off auto_nid
    graph.data['auto_nid'] = False

    parser = None
    header = None
    did_parse_nodes = False
    is_directed = False
    for line in lgf_file.readlines():
        line = line.strip()

        # Skip empty lines and comment lines
        if not len(line) or line.startswith('#'):
            parser = None
            continue

        # Define parser
        if line.startswith('@') or parser is None:
            if 'nodes' in line:
                parser = parse_nodes
                did_parse_nodes = True
            elif line.startswith('@edges'):
                parser = parse_edges
            elif line.startswith('@arcs'):
                parser = parse_arcs
                is_directed = True
            elif line.startswith('@attributes'):
                logging.warning(
                    'Not importing LGF @attributes. Graph attributes not supported by graphit'
                )
            header = None
            continue

        # Immediately after parser definition, parse table column headers
        if header is None:
            header = split_line(line)
            continue

        parser(line, header, graph, did_parse_nodes=did_parse_nodes)

    # Set graph to 'directed' if arcs where parsed
    if is_directed:
        graph.directed = True

    return graph
예제 #25
0
def read_gexf(gexf_file, graph=None):
    """
    Read graphs in GEXF format

    Uses the Python build-in etree cElementTree parser to parse the XML
    document and convert the elements into nodes.
    The XML element tag becomes the node key, XML text becomes the node
    value and XML attributes are added to the node as additional attributes.

    :param gexf_file:      XML data to parse
    :type gexf_file:       File, string, stream or URL
    :param graph:          Graph object to import dictionary data in
    :type graph:           :graphit:Graph

    :return:               GraphAxis object
    :rtype:                :graphit:GraphAxis
    """

    gexf_file = open_anything(gexf_file)

    # User defined or default Graph object
    if graph is None:
        graph = Graph()
    elif not isinstance(graph, Graph):
        raise GraphitException('Unsupported graph type {0}'.format(
            type(graph)))

    # Try parsing the string using default Python cElementTree parser
    try:
        tree = et.fromstring(gexf_file.read())
    except et.ParseError as error:
        logging.error(
            'Unable to parse GEXF file. cElementTree error: {0}'.format(error))
        return

    # Get XMLNS namespace from root
    xmlns = None
    for elem in tree.iter():
        if elem.tag.endswith('gexf'):
            xmlns = elem.tag.split('}')[0] + '}'
            break

    if xmlns is None:
        raise GraphitException(
            'Invalid GEXF file format, "gexf" tag not found')

    # Add graph meta-data and XMLNS namespace
    for meta in tree.iter('{0}meta'.format(xmlns)):
        graph.data.update(meta.attrib)
        for meta_data in meta:
            tag = meta_data.tag.split('}')[1]
            graph.data[tag] = meta_data.text

    # GEXF node and edge labels are unique, turn off auto_nid
    graph.data['auto_nid'] = False

    graph_tag = tree.find('{0}graph'.format(xmlns))
    graph.directed = graph_tag.get('defaultedgetype', 'directed') == 'directed'
    graph.data.update(graph_tag.attrib)

    # Parse all nodes
    nodes = tree.findall('.//{0}node'.format(xmlns))
    if not len(nodes):
        raise GraphitException('GEXF file containes no "node" elements')
    for node in nodes:
        attr = node.attrib
        attr = parse_attvalue_elements(node, attr, xmlns=xmlns)
        graph.add_node(attr['id'],
                       **dict([n for n in attr.items() if n[0] != 'id']))

    # Parse all edges
    edges = tree.findall('.//{0}edge'.format(xmlns))
    for edge in edges:
        attr = edge.attrib

        # Edge direction differs from global graph directionality
        edge_directed = graph.directed
        if 'type' in attr:
            edge_directed = attr['type'] == 'directed'

        attr = parse_attvalue_elements(edge, attr, xmlns=xmlns)
        graph.add_edge(attr['source'],
                       attr['target'],
                       directed=edge_directed,
                       **dict([
                           n for n in attr.items()
                           if n[0] not in ('source', 'target')
                       ]))

    logger.info('Import graph in GEXF format. XMLNS: {0}'.format(xmlns))

    return graph
예제 #26
0
def write_lgr(graph,
              node_key=None,
              edge_key=None,
              node_data_type='string',
              edge_data_type='void'):
    """
    Export a graph to an LGR data format

    The LEDA format allows for export of only one node or edge data type
    (as: |{data type}|). For nodes this is usually the node label and for
    edges any arbitrary data key,value pair. In both cases the data type
    is required to be of either: string, int, float or bool.

    Nodes and edges are exported by iterating over them using `iternodes`
    and `iteredges`. Iteration uses the graphit Object Relations Mapper (ORM)
    allowing full control over the data export by overriding the `get`
    method globally in the 'NodeTools' or 'EdgeTools' classes or using custom
    classes registered with the ORM.
    Data returned by the `get` method will be serialized regardless the return
    type.

    The node and edge data types are registered globally in the LENA file using
    `node_data_type` and `edge_data_type` set to 'void' (no data) by default.

    :param graph:           Graph to export
    :type graph:            :graphit:Graph
    :param node_key:        key name of node data to export
    :type node_key:         :py:str
    :param edge_key:        key name of edge data to export
    :type edge_key:         :py:str
    :param node_data_type:  primitive data type of exported node data
    :type node_data_type:   :py:str
    :param edge_data_type:  primitive data type of exported edge data
    :type edge_data_type:   :py:str

    :return:                Graph exported as LGR format
    :rtype:                 :py:str
    :raises:                GraphitException
    """

    # Default node_key to graph.data.key_tag
    if node_key is None:
        node_key = graph.data.key_tag

    # If export of node/edge data corresponding data types need to be defined
    if (node_key is not None
            and node_data_type == 'void') or (edge_key is not None
                                              and edge_data_type == 'void'):
        raise GraphitException('Define node_data_type and/or edge_data_type')

    # Create empty file buffer
    string_buffer = StringIO()

    # Print header
    string_buffer.write('#header section\nLEDA.GRAPH\n{0}\n{1}\n'.format(
        node_data_type, edge_data_type))
    string_buffer.write('{0}\n'.format(-1 if graph.directed else -2))

    # Print nodes
    string_buffer.write('#nodes section\n{0}\n'.format(len(graph.nodes)))
    node_mapping = {}
    for i, node in enumerate(graph.iternodes(), start=1):
        string_buffer.write('|{{{0}}}|\n'.format(
            str(node.get(node_key, default=''))))
        node_mapping[node.nid] = i

    # Print edges
    string_buffer.write('#edges section\n{0}\n'.format(len(graph.edges)))
    for edge in graph.iteredges():
        source, target = edge.nid
        string_buffer.write('{0} {1} 0 |{{{2}}}|\n'.format(
            node_mapping[source], node_mapping[target],
            str(edge.get(edge_key, default=''))))

    logger.info('Graph {0} exported in LEDA format'.format(repr(graph)))

    # Reset buffer cursor
    string_buffer.seek(0)
    return string_buffer.read()