Ejemplo n.º 1
0
    def test_manual_build(self):
        print('Testing: Manual build with NiceCXBuilder')
        nice_cx_builder = NiceCXBuilder()

        node_id_1 = nice_cx_builder.add_node(name=1, represents=2)
        node_id_2 = nice_cx_builder.add_node(name='node%s' % str(2), represents='DEF')
        try:
            nice_cx_builder.add_node_attribute(node_id_1, 'testing_attr', None)
        except TypeError as te:
            print('Correctly identified bad node value')

        nice_cx_builder.add_node_attribute(node_id_1, 'testing_attr_double', [1.2, 2.5, 2.7])

        nice_cx_builder.add_node_attribute(node_id_1, 'testing_attr_int', [16, 4, 8])
        nice_cx_builder.add_node_attribute(node_id_1, 'testing_attr_int', [16, 4, 8]) # duplicate - should be ignored
        nice_cx_builder.add_node_attribute(node_id_1, 'testing_attr_int', [16, 4, 8]) # duplicate - should be ignored

        try:
            nice_cx_builder.add_node_attribute(node_id_1, 'testing_attr2', [10.2, 20.5, 'abc'], type='list_of_float')
        except ValueError:
            print('Correctly identified bad value in node attribute list')

        edge_id = nice_cx_builder.add_edge(id=1, source=node_id_1, target=node_id_2, interaction='test-relationship')

        nice_cx_builder.add_edge_attribute(edge_id, 'testing_attr', [1.2, 2.5, '2.7'], type='list_of_float')
        nice_cx_builder.add_edge_attribute(edge_id, 'testing_attr', [1.2, 2.5, '2.7'], type='list_of_float') # duplicate - should be ignored
        nice_cx_builder.add_edge_attribute(edge_id, 'testing_attr', [1.2, 2.5, '2.7'], type='list_of_float') # duplicate - should be ignored

        try:
            nice_cx_builder.add_edge_attribute(edge_id, 'testing_attr2', [10.2, 20.5, 'abc'], type='list_of_float')
        except ValueError:
            print('Correctly identified bad value in list')

        nice_cx_builder.set_name('Network manual build')
        nice_cx_builder.nice_cx.set_namespaces({'ndex context': 'http://dev.ndexbio.org'})
        nice_cx = nice_cx_builder.get_nice_cx()

        node_attrs = nice_cx.get_node_attributes(node_id_1)
        edge_attrs = nice_cx.get_edge_attributes(edge_id)

        upload_message = nice_cx.upload_to(upload_server, upload_username, upload_password)

        self.assertTrue(upload_message)

        node1_attr_double = nice_cx.get_node_attribute(node_id_1, 'testing_attr_double')
        self.assertTrue(node1_attr_double.get('d') == 'list_of_double')

        node1_attr_int = nice_cx.get_node_attribute(node_id_1, 'testing_attr_int')
        self.assertTrue(node1_attr_int.get('d') == 'list_of_integer')

        self.assertTrue(len(node_attrs) == 2)
        self.assertTrue(len(edge_attrs) == 1)
Ejemplo n.º 2
0
def load_matrix_to_ndex(x, x_cols, x_rows, server, username, password, name):
    """
    Testing 1
    :param X: param 1
    :type X:
    :param X_cols:
    :type X_cols:
    :param X_rows:
    :type X_rows:
    :param server:
    :type server:
    :param username:
    :type username:
    :param password:
    :type password:
    :param name:
    :type name:
    :return:
    :rtype:
    """
    if not isinstance(x, np.ndarray):
        raise Exception('Provided matrix is not of type numpy.ndarray')
    if not isinstance(x_cols, list):
        raise Exception(
            'Provided column header is not in the correct format.  Please provide a list of strings'
        )
    if not isinstance(x_rows, list):
        raise Exception(
            'Provided row header is not in the correct format.  Please provide a list of strings'
        )

    if not x.flags['C_CONTIGUOUS']:
        x = np.ascontiguousarray(x)

    serialized = base64.b64encode(x.tobytes())

    nice_cx_builder = NiceCXBuilder()
    nice_cx_builder.set_name(name)
    nice_cx_builder.add_node(name='Sim Matrix', represents='Sim Matrix')

    nice_cx_builder.add_opaque_aspect('matrix', [{'v': serialized}])
    nice_cx_builder.add_opaque_aspect('matrix_cols', [{'v': x_cols}])
    nice_cx_builder.add_opaque_aspect('matrix_rows', [{'v': x_rows}])
    nice_cx_builder.add_opaque_aspect('matrix_dtype', [{'v': x.dtype.name}])

    nice_cx = nice_cx_builder.get_nice_cx()

    #print(x)
    ont_url = nice_cx.upload_to(server, username, password)

    return ont_url
Ejemplo n.º 3
0
    def test_load_edges(self):
        print('Testing: NiceCXBuilder')
        niceCxBuilder = NiceCXBuilder()

        node_id_1 = niceCxBuilder.add_node(name='node%s' % str(1), represents='ABC')
        node_id_2 = niceCxBuilder.add_node(name='node%s' % str(2), represents='DEF')
        niceCxBuilder.add_edge(id=1, source=node_id_1, target=node_id_2, interaction='neighbor')
        niceCxBuilder.set_name('Network manual build')
        niceCxBuilder.nice_cx.set_namespaces({'ndex context': 'http://dev.ndexbio.org'})
        niceCx = niceCxBuilder.get_nice_cx()

        #niceCx.set_provenance(['Provenance'])
        upload_message = niceCx.upload_to(upload_server, upload_username, upload_password)
        self.assertTrue(upload_message)
Ejemplo n.º 4
0
def load_matrix_to_ndex(x, x_cols, x_rows, server, username, password, name):
    """
    Testing 1
    :param X: param 1
    :type X:
    :param X_cols:
    :type X_cols:
    :param X_rows:
    :type X_rows:
    :param server:
    :type server:
    :param username:
    :type username:
    :param password:
    :type password:
    :param name:
    :type name:
    :return:
    :rtype:
    """
    if not isinstance(x, np.ndarray):
        raise Exception('Provided matrix is not of type numpy.ndarray')
    if not isinstance(x_cols, list):
        raise Exception('Provided column header is not in the correct format.  Please provide a list of strings')
    if not isinstance(x_rows, list):
        raise Exception('Provided row header is not in the correct format.  Please provide a list of strings')

    if not x.flags['C_CONTIGUOUS']:
        x = np.ascontiguousarray(x)

    serialized = base64.b64encode(x.tobytes())

    nice_cx_builder = NiceCXBuilder()
    nice_cx_builder.set_name(name)
    nice_cx_builder.add_node(name='Sim Matrix', represents='Sim Matrix')

    nice_cx_builder.add_opaque_aspect('matrix', [{'v': serialized}])
    nice_cx_builder.add_opaque_aspect('matrix_cols', [{'v': x_cols}])
    nice_cx_builder.add_opaque_aspect('matrix_rows', [{'v': x_rows}])
    nice_cx_builder.add_opaque_aspect('matrix_dtype', [{'v': x.dtype.name}])

    nice_cx = nice_cx_builder.get_nice_cx()

    #print(x)
    ont_url = nice_cx.upload_to(server, username, password)

    return ont_url
Ejemplo n.º 5
0
def convert_pandas_to_nice_cx_with_load_plan(pandas_dataframe,
                                             load_plan,
                                             max_rows=None,
                                             name=None,
                                             description=None,
                                             network_attributes=None,
                                             provenance=None):

    # open the schema first
    here = path.abspath(path.dirname(__file__))
    with open(path.join(here, 'loading_plan_schema.json')) as json_file:
        plan_schema = json.load(json_file)

    jsonschema.validate(load_plan, plan_schema)

    node_lookup = {}
    nice_cx_builder = NiceCXBuilder()
    row_count = 0
    t1 = int(time.time() * 1000)

    #Add context if they are defined
    context = load_plan.get('context')
    if context:
        if network_attributes is None:
            network_attributes = []
        network_attributes.append({"n": "@context", "v": json.dumps(context)})

    total_row_count = pandas_dataframe.shape
    if len(total_row_count) > 1:
        total_row_count = str(total_row_count[0])
    for index, row in pandas_dataframe.iterrows():
        # As each row is processed, self.G_nx is updated
        process_row(nice_cx_builder, load_plan, row, node_lookup)
        row_count = row_count + 1
        if max_rows and row_count > max_rows + 2:
            break

        if row_count % 2500 == 0:
            logger.info('processing %s out of %s edges' %
                        (str(row_count), total_row_count))

    if network_attributes:
        for attribute in network_attributes:
            if attribute.get("n") == "name":
                nice_cx_builder.set_name(attribute.get("v"))
            else:
                nice_cx_builder.add_network_attribute(
                    name=attribute.get('n'),
                    values=attribute.get('v'),
                    type=attribute.get('d'))

    tsv_data_event = {
        "inputs": None,
        "startedAtTime": t1,
        "endedAtTime": int(time.time() * 1000),
        "eventType": "TSV network generation",
        "properties": [{
            "name": "TSV loader version",
            "value": version
        }]
    }

    # name and description take precedence over any prior values
    if name:
        nice_cx_builder.set_name(name)
    if description:
        nice_cx_builder.add_network_attribute(name='description',
                                              values=description)

    return nice_cx_builder.get_nice_cx()
Ejemplo n.º 6
0
def create_nice_cx_from_pandas(df,
                               source_field=None,
                               target_field=None,
                               source_node_attr=[],
                               target_node_attr=[],
                               edge_attr=[],
                               edge_interaction=None,
                               source_represents=None,
                               target_represents=None):
    """
    Create a :py:func:`~ndex2.nice_cx_network.NiceCXNetwork` from a :py:class:`pandas.DataFrame`
    in which each row specifies one edge in the network.

    .. versionchanged:: 3.5.0
        Removed print statements showing progress and network name is
        now being set

    If only the **df** argument is provided the :py:class:`pandas.DataFrame` is treated
    as 'SIF' format, where the first two columns specify the source and target node ids
    of the edge and all other columns are ignored. The edge interaction is
    defaulted to "interacts-with"

    If both the source_field and target_field arguments are provided, then those and any other
    arguments refer to headers in the :py:class:`pandas.DataFrame`, controlling the
    mapping of columns to the attributes of nodes, and edges in the resulting
    :py:func:`~ndex2.nice_cx_network.NiceCXNetwork`.

    If a header is not mapped, the corresponding column is ignored.

    If the edge_interaction is not specified, interaction is set to "interacts-with"

    .. code-block:: python

        import ndex2
        import pandas as pd

        data = {'source': ['Node 1','Node 2'],
                'target': ['Node 2','Node 3'],
                'interaction': ['helps', 'hurts']}
        df = pd.DataFrame.from_dict(data)

        net = ndex2.create_nice_cx_from_pandas(df, source_field='source',
                                               target_field='target',
                                               edge_interaction='interaction')

        print(net.get_nodes())
        print(net.get_edges())

    .. note::
        The datatype for everything added to the network is the CX string type


    :param df: Pandas dataframe to process
    :type df: :py:class:`pandas.DataFrame`
    :param source_field: header name specifying the name of the source node.
    :type source_field: str
    :param target_field: header name specifying the name of the target node.
    :type target_field: str
    :param source_node_attr: list of header names specifying attributes of the source node.
    :type source_node_attr: list
    :param target_node_attr: list of header names specifying attributes of the target node.
    :type target_node_attr: list
    :param edge_attr: list of header names specifying attributes of the edge.
    :type edge_attr: list
    :param edge_interaction: the relationship between the source node and the
                             target node, defaulting to "interacts-with"
    :type edge_interaction: str
    :param source_represents:
    :type source_represents: str
    :param target_represents:
    :type target_represents: str
    :return: NiceCXNetwork
    :rtype: :py:func:`~ndex2.nice_cx_network.NiceCXNetwork`
    """
    # ====================================================
    # IF NODE FIELD NAME (SOURCE AND TARGET) IS PROVIDED
    # THEN USE THOSE FIELDS OTHERWISE USE INDEX 0 & 1
    # ====================================================
    source_predicate = ''
    target_predicate = ''

    cx_builder = NiceCXBuilder()
    cx_builder.set_name('created from pandas by '
                        'ndex2.create_nice_cx_from_pandas()')

    if source_field and target_field:
        for index, row in df.iterrows():

            # =============
            # ADD NODES
            # =============

            if source_represents is not None:
                source_node_id = cx_builder.add_node(
                    name=source_predicate + str(row[source_field]),
                    represents=source_predicate + str(row[source_represents]))
            else:
                source_node_id = cx_builder.add_node(
                    name=source_predicate + str(row[source_field]),
                    represents=source_predicate + str(row[source_field]))

            if target_represents is not None:
                target_node_id = cx_builder.add_node(
                    name=target_predicate + str(row[target_field]),
                    represents=target_predicate + str(row[target_represents]))
            else:
                target_node_id = cx_builder.add_node(
                    name=target_predicate + str(row[target_field]),
                    represents=target_predicate + str(row[target_field]))

            # =============
            # ADD EDGES
            # =============
            if edge_interaction:
                if row.get(edge_interaction):
                    use_this_interaction = row[edge_interaction]
                else:
                    use_this_interaction = edge_interaction
            else:
                use_this_interaction = 'interacts-with'

            cx_builder.add_edge(id=index,
                                source=source_node_id,
                                target=target_node_id,
                                interaction=use_this_interaction)

            # ==============================
            # ADD SOURCE NODE ATTRIBUTES
            # ==============================
            for sp in source_node_attr:

                #TODO - need to be smarter about how data type is inferred
                #row[sp], attr_type = _infer_data_type(row[sp])

                attr_type = None

                #attr_type = None
                #if type(row[sp]) is float and math.isnan(row[sp]):
                #    row[sp] = ''
                #    attr_type = 'float'
                #elif type(row[sp]) is float and math.isinf(row[sp]):
                #    row[sp] = 'Inf'
                #    attr_type = 'float'
                #elif type(row[sp]) is float:
                #    attr_type = 'float'
                #elif isinstance(row[sp], int):
                #    attr_type = 'integer'
                if sp == 'citation' and not isinstance(row[sp], list):
                    row[sp] = [row[sp]]
                    attr_type = 'list_of_string'
                cx_builder.add_node_attribute(source_node_id,
                                              sp,
                                              str(row[sp]),
                                              type=attr_type)

            # ==============================
            # ADD TARGET NODE ATTRIBUTES
            # ==============================
            for tp in target_node_attr:
                #TODO - need to be smarter about how data type is inferred
                #row[tp], attr_type = _infer_data_type(row[tp])

                attr_type = None

                #attr_type = None
                #if type(row[tp]) is float and math.isnan(row[tp]):
                #    row[tp] = ''
                #    attr_type = 'float'
                #elif type(row[tp]) is float and math.isinf(row[tp]):
                #    row[tp] = 'Inf'
                #    attr_type = 'float'
                #elif type(row[tp]) is float:
                #    attr_type = 'float'
                #elif isinstance(row[tp], int):
                #    attr_type = 'integer'

                if tp == 'citation' and not isinstance(row[tp], list):
                    row[tp] = [row[tp]]
                    attr_type = 'list_of_string'
                cx_builder.add_node_attribute(target_node_id,
                                              tp,
                                              str(row[tp]),
                                              type=attr_type)

            # ==============================
            # ADD EDGE ATTRIBUTES
            # ==============================
            for ep in edge_attr:
                #TODO - need to be smarter about how data type is inferred
                #row[ep], attr_type = _infer_data_type(row[ep])

                attr_type = None

                #attr_type = None
                #if type(row[ep]) is float and math.isnan(row[ep]):
                #    row[ep] = ''
                #    attr_type = 'float'
                #elif type(row[ep]) is float and math.isinf(row[ep]):
                #    row[ep] = 'INFINITY'
                #    attr_type = 'float'

                if ep == 'citation' and not isinstance(row[ep], list):
                    row[ep] = [row[ep]]
                    attr_type = 'list_of_string'

                cx_builder.add_edge_attribute(property_of=index,
                                              name=ep,
                                              values=row[ep],
                                              type=attr_type)

    else:
        for index, row in df.iterrows():
            # =============
            # ADD NODES
            # =============
            source_node_id = cx_builder.add_node(name=str(row[0]),
                                                 represents=str(row[0]))

            target_node_id = cx_builder.add_node(name=str(row[1]),
                                                 represents=str(row[1]))

            # =============
            # ADD EDGES
            # =============
            if len(row) > 2:
                cx_builder.add_edge(id=index,
                                    source=source_node_id,
                                    target=target_node_id,
                                    interaction=row[2])
            else:
                cx_builder.add_edge(id=index,
                                    source=source_node_id,
                                    target=target_node_id,
                                    interaction='interacts-with')

    return cx_builder.get_nice_cx()  # my_nicecx
Ejemplo n.º 7
0
def create_nice_cx_from_networkx(G):
    """
    Creates a :py:class:`~ndex2.nice_cx_network.NiceCXNetwork` based on a
    :class:`networkx.Graph` graph.

    .. versionchanged:: 3.5.0
       Major refactor to fix multiple bugs #83, #84, #90

    .. code-block:: python

        import ndex2
        import networkx as nx

        G = nx.Graph()
        G.add_node(1, someval=1.5, name='node 1')
        G.add_node(2, someval=2.5, name='node 2')
        G.add_edge(1, 2, weight=5)

        print(ndex2.create_nice_cx_from_networkx(G).to_cx())

    The resulting :py:class:`~ndex2.nice_cx_network.NiceCXNetwork`
    contains the nodes, edges and their attributes from the
    :class:`networkx.Graph`
    graph and also preserves the graph 'pos' attribute as a CX
    cartesian coordinates aspect
    :py:const:`~ndex2.constants.CARTESIAN_LAYOUT_ASPECT`
    with the values of `Y` inverted

    Description of how conversion is performed:

    **Network:**

    * Network name is set value of ``G.graph.get('name')`` or to
      ``created from networkx by ndex2.create_nice_cx_networkx()`` if
      `name` is ``None`` or not present

    **Nodes:**

    * Node id is value of ``n`` from this for loop:
      ``for n, d G.nodes(data=True):`` if ``n`` is **NOT** an
      :py:class:`int`, new ids starting from ``0`` are used

    * Node name is value of `name` attribute on the node or is
      set to id of node if `name` is not present.

    * Node `represents` is value of `represents` attribute on the
      node or set is to node `name` if ``None`` or not present

    **Edges:**

    * Interaction is value of `interaction` attribute on the edge
      or is set to ``neighbor-of`` if ``None`` or not present

    .. note::

        Data types are inferred by using :py:func:`isinstance` and
        converted to corresponding CX data types. For list items,
        only the 1st item is examined to determine type

    :param G: Graph to convert
    :type G: :class:`networkx.Graph`
    :raises Exception: if **G** parameter is ``None`` or there is another error
                       in conversion
    :return: Converted network
    :rtype: :py:class:`~ndex2.nice_cx_network.NiceCXNetwork`
    """
    cx_builder = NiceCXBuilder()
    if G is None:
        raise Exception('Networkx input is empty')

    network_name = G.graph.get('name')
    if network_name is not None:
        cx_builder.set_name(network_name)
    else:
        cx_builder.set_name('created from networkx by '
                            'ndex2.create_nice_cx_networkx()')

    for n, d in G.nodes(data=True):
        if isinstance(n, int):
            n_name = d.get('name')
            if n_name is None:
                n_name = str(n)
            node_id = cx_builder.add_node(name=n_name,
                                          represents=d.get('represents'),
                                          id=n,
                                          map_node_ids=True)
        else:
            node_id = cx_builder.add_node(name=n,
                                          represents=d.get('represents'),
                                          map_node_ids=True)

        # ======================
        # ADD NODE ATTRIBUTES
        # ======================
        for k, v in d.items():

            # if node attribute is 'name' skip it cause that will be used
            # for name of node, also skip 'represents'
            # fix for https://github.com/ndexbio/ndex2-client/issues/84
            if k == 'name' or k == 'represents':
                continue

            use_this_value, attr_type = cx_builder._infer_data_type(
                v, split_string=False)

            # This might go away, waiting on response to
            # https://ndexbio.atlassian.net/browse/UD-2181
            if k == 'citation' and not isinstance(use_this_value, list):
                use_this_value = [str(use_this_value)]
                attr_type = constants.LIST_OF_STRING
            if use_this_value is not None:
                cx_builder.add_node_attribute(node_id,
                                              k,
                                              use_this_value,
                                              type=attr_type)

    index = 0
    for u, v, d in G.edges(data=True):
        # =============
        # ADD EDGES
        # =============
        if d.get('interaction') is None or d.get('interaction') == 'null':
            interaction = 'neighbor-of'
        else:
            interaction = d.get('interaction')

        if isinstance(u, int):
            cx_builder.add_edge(source=u,
                                target=v,
                                interaction=interaction,
                                id=index)
        else:
            cx_builder.add_edge(source=cx_builder.node_id_lookup.get(u),
                                target=cx_builder.node_id_lookup.get(v),
                                interaction=interaction,
                                id=index)

        # ==============================
        # ADD EDGE ATTRIBUTES
        # ==============================
        for k, val in d.items():
            if k == 'interaction':
                continue
            use_this_value, attr_type = cx_builder._infer_data_type(
                val, split_string=False)

            # This might go away, waiting on response to
            # https://ndexbio.atlassian.net/browse/UD-2181
            if k == 'citation' and not isinstance(use_this_value, list):
                use_this_value = [str(use_this_value)]
                attr_type = constants.LIST_OF_STRING

            if use_this_value is not None:
                cx_builder.add_edge_attribute(property_of=index,
                                              name=k,
                                              values=use_this_value,
                                              type=attr_type)

        index += 1

    if hasattr(G, 'pos'):
        aspect = _create_cartesian_coordinates_aspect_from_networkx(G)
        cx_builder.add_opaque_aspect(constants.CARTESIAN_LAYOUT_ASPECT, aspect)

    return cx_builder.get_nice_cx()