예제 #1
0
def minimum_weight_full_matching(G, top_nodes=None, weight='weight'):
    try:
        import scipy.optimize
    except ImportError:
        raise ImportError('minimum_weight_full_matching requires SciPy: ' +
                          'https://scipy.org/')
    left, right = nx.bipartite.sets(G, top_nodes)
    # Ensure that the graph is complete. This is currently a requirement in
    # the underlying  optimization algorithm from SciPy, but the constraint
    # will be removed in SciPy 1.4.0, at which point it can also be removed
    # here.
    for (u, v) in itertools.product(left, right):
        # As the graph is undirected, make sure to check for edges in
        # both directions
        if (u, v) not in G.edges() and (v, u) not in G.edges():
            raise ValueError('The bipartite graph must be complete.')
    U = list(left)
    V = list(right)
    weights = biadjacency_matrix(G, row_order=U, column_order=V,
                                 weight=weight).toarray()
    left_matches = scipy.optimize.linear_sum_assignment(weights)
    d = {U[u]: V[v] for u, v in zip(*left_matches)}
    # d will contain the matching from edges in left to right; we need to
    # add the ones from right to left as well.
    d.update({v: u for u, v in d.items()})
    return d
def get_optimal_alignment(dict_pairs, model):
    """creates a weighted complete bipartite graph between the sets of content words of the two sentences and runs a bipartite graph matching algorithm known as the Hungarian method"""

    aligned_sents = []

    for pair in dict_pairs:
        words_1 = [
            w for w in list(pair[0].keys())
            if w not in stop_words and w not in punct
        ]
        words_2 = [
            w for w in list(pair[1].keys())
            if w not in stop_words and w not in punct
        ]
        nodes_1 = [i + "_&" for i in words_1]
        nodes_2 = [i + "_@" for i in words_2]

        edges = []
        for i, w_1 in enumerate(words_1):
            for j, w_2 in enumerate(words_2):
                weight = model.similarity(w_1, w_2)
                edge = (nodes_1[i], nodes_2[j], {"weight": weight})
                edges.append(edge)

        B = nx.Graph()
        B.add_nodes_from(nodes_1, bipartite=0)
        B.add_nodes_from(nodes_2, bipartite=1)
        B.add_edges_from(edges)

        M = biadjacency_matrix(B, row_order=nodes_1,
                               column_order=nodes_2).todense()
        M = np.array(M)
        minimum = min(min(row) for row in M)
        M += abs(minimum)
        M *= 100
        M = M.astype(int)
        cost_matrix = np.array(make_cost_matrix(M))
        row_index, col_index = linear_sum_assignment(cost_matrix)

        aligned_words = []
        for i, j in zip(row_index, col_index):
            aligned_words.append((words_1[i], words_2[j]))
        aligned_sents.append(aligned_words)

    return aligned_sents
예제 #3
0
def minimum_weight_full_matching(G, top_nodes=None, weight="weight"):
    r"""Returns a minimum weight full matching of the bipartite graph `G`.

    Let :math:`G = ((U, V), E)` be a weighted bipartite graph with real weights
    :math:`w : E \to \mathbb{R}`. This function then produces a matching
    :math:`M \subseteq E` with cardinality

    .. math::
       \lvert M \rvert = \min(\lvert U \rvert, \lvert V \rvert),

    which minimizes the sum of the weights of the edges included in the
    matching, :math:`\sum_{e \in M} w(e)`, or raises an error if no such
    matching exists.

    When :math:`\lvert U \rvert = \lvert V \rvert`, this is commonly
    referred to as a perfect matching; here, since we allow
    :math:`\lvert U \rvert` and :math:`\lvert V \rvert` to differ, we
    follow Karp [1]_ and refer to the matching as *full*.

    Parameters
    ----------
    G : NetworkX graph

      Undirected bipartite graph

    top_nodes : container

      Container with all nodes in one bipartite node set. If not supplied
      it will be computed.

    weight : string, optional (default='weight')

       The edge data key used to provide each value in the matrix.

    Returns
    -------
    matches : dictionary

      The matching is returned as a dictionary, `matches`, such that
      ``matches[v] == w`` if node `v` is matched to node `w`. Unmatched
      nodes do not occur as a key in `matches`.

    Raises
    ------
    ValueError
      Raised if no full matching exists.

    ImportError
      Raised if SciPy is not available.

    Notes
    -----
    The problem of determining a minimum weight full matching is also known as
    the rectangular linear assignment problem. This implementation defers the
    calculation of the assignment to SciPy.

    References
    ----------
    .. [1] Richard Manning Karp:
       An algorithm to Solve the m x n Assignment Problem in Expected Time
       O(mn log n).
       Networks, 10(2):143–152, 1980.

    """
    try:
        import numpy as np
        import scipy.optimize
    except ImportError as e:
        raise ImportError("minimum_weight_full_matching requires SciPy: " +
                          "https://scipy.org/") from e
    left, right = nx.bipartite.sets(G, top_nodes)
    U = list(left)
    V = list(right)
    # We explicitly create the biadjancency matrix having infinities
    # where edges are missing (as opposed to zeros, which is what one would
    # get by using toarray on the sparse matrix).
    weights_sparse = biadjacency_matrix(G,
                                        row_order=U,
                                        column_order=V,
                                        weight=weight,
                                        format="coo")
    weights = np.full(weights_sparse.shape, np.inf)
    weights[weights_sparse.row, weights_sparse.col] = weights_sparse.data
    left_matches = scipy.optimize.linear_sum_assignment(weights)
    d = {U[u]: V[v] for u, v in zip(*left_matches)}
    # d will contain the matching from edges in left to right; we need to
    # add the ones from right to left as well.
    d.update({v: u for u, v in d.items()})
    return d
예제 #4
0
def minimum_weight_full_matching(G, top_nodes=None, weight='weight'):
    r"""Returns the minimum weight full matching of the bipartite graph `G`.

    Let :math:`G = ((U, V), E)` be a complete weighted bipartite graph with
    real weights :math:`w : E \to \mathbb{R}`. This function then produces
    a maximum matching :math:`M \subseteq E` which, since the graph is
    assumed to be complete, has cardinality

    .. math::
       \lvert M \rvert = \min(\lvert U \rvert, \lvert V \rvert),

    and which minimizes the sum of the weights of the edges included in the
    matching, :math:`\sum_{e \in M} w(e)`.

    When :math:`\lvert U \rvert = \lvert V \rvert`, this is commonly
    referred to as a perfect matching; here, since we allow
    :math:`\lvert U \rvert` and :math:`\lvert V \rvert` to differ, we
    follow Karp [1]_ and refer to the matching as *full*.

    Parameters
    ----------
    G : NetworkX graph

      Undirected bipartite graph

    top_nodes : container

      Container with all nodes in one bipartite node set. If not supplied
      it will be computed.

    weight : string, optional (default='weight')

       The edge data key used to provide each value in the matrix.

    Returns
    -------
    matches : dictionary

      The matching is returned as a dictionary, `matches`, such that
      ``matches[v] == w`` if node `v` is matched to node `w`. Unmatched
      nodes do not occur as a key in matches.

    Raises
    ------
    ValueError
      Raised if the input bipartite graph is not complete.

    ImportError
      Raised if SciPy is not available.

    Notes
    -----
    The problem of determining a minimum weight full matching is also known as
    the rectangular linear assignment problem. This implementation defers the
    calculation of the assignment to SciPy.

    References
    ----------
    .. [1] Richard Manning Karp:
       An algorithm to Solve the m x n Assignment Problem in Expected Time
       O(mn log n).
       Networks, 10(2):143–152, 1980.

    """
    try:
        import scipy.optimize
    except ImportError:
        raise ImportError('minimum_weight_full_matching requires SciPy: ' +
                          'https://scipy.org/')
    left, right = nx.bipartite.sets(G, top_nodes)
    # Ensure that the graph is complete. This is currently a requirement in
    # the underlying  optimization algorithm from SciPy, but the constraint
    # will be removed in SciPy 1.4.0, at which point it can also be removed
    # here.
    for (u, v) in itertools.product(left, right):
        # As the graph is undirected, make sure to check for edges in
        # both directions
        if (u, v) not in G.edges() and (v, u) not in G.edges():
            raise ValueError('The bipartite graph must be complete.')
    U = list(left)
    V = list(right)
    weights = biadjacency_matrix(G, row_order=U, column_order=V,
                                 weight=weight).toarray()
    left_matches = scipy.optimize.linear_sum_assignment(weights)
    d = {U[u]: V[v] for u, v in zip(*left_matches)}
    # d will contain the matching from edges in left to right; we need to
    # add the ones from right to left as well.
    d.update({v: u for u, v in d.items()})
    return d
예제 #5
0
 def tosparse(self):
     return biadjacency_matrix(self.g, self.cn_nodes, self.vn_nodes)

df1 = pd.read_csv("tumor_interaction_gene.txt", sep=",")
tumor_nodes_col = df1["tumor"]
gene_nodes_col = df1["gene"]

#print tumor_nodes_col,gene_nodes_col
edgelist = zip(tumor_nodes_col, gene_nodes_col)
B = nx.DiGraph()
B.add_nodes_from(tumor_nodes_col, bipartite=0)
B.add_nodes_from(gene_nodes_col, bipartite=1)
B.add_edges_from(edgelist)
tum_nodes = set(n for n, d in B.nodes(data=True) if d['bipartite'] == 0)
gene_nodes = set(n for n, d in B.nodes(data=True) if d['bipartite'] == 1)

matrix = biadjacency_matrix(B, row_order=tum_nodes, column_order=gene_nodes)
matrix = matrix.A

m = matrix.shape[0]
n = matrix.shape[1]
FOLDS = 10
sz = m * n
fsz = int(sz / FOLDS)
#np.random.shuffle(IDX)
offset = 0
print "Fold size", fsz
AUC_test = np.zeros(FOLDS)
AUC_roc_test = np.zeros(FOLDS)

for f in xrange(FOLDS):
    print "Fold:", f
예제 #7
0
                           names=col_names,
                           header=None)

source = df_drugs_sim["left_side"]
destination = df_drugs_sim["right_side"]
similarity = df_drugs_sim["similairity"]

###Drugs similarity Network#####
edge_list = zip(source, destination, similarity)
#print edge_list
print "Side effect graph information loading....."
G = nx.Graph()
G.add_weighted_edges_from(edge_list)

matrix = biadjacency_matrix(B,
                            row_order=side_effect_nodes,
                            column_order=drug_nodes)
matrix = matrix.A
m = matrix.shape[0]
n = matrix.shape[1]

#query = "Gastric ulcer"
#query = "Angioedema"
#query = "Suicide"
#query = "Nausea"
#query = "Diarrhoea"
#query = "Constipation"
query = "Anaemia"
#query = "Anaemia megaloblastic"
idx_query = side_effect_nodes.index(query)
GR_TR = matrix[idx_query, :]