def MP_graph(D, x): N, M = D.shape z = np.zeros((M, 1)) z_temp = np.zeros(M) r = np.copy(x) num_iter = 30 # Create bipartite graph G = SGraph() x_vertices = [Vertex(i) for i in xrange(N)] z_vertices = [Vertex(j + N) for j in xrange(M)] D_edges = [Edge(i, j) for i in xrange(N) for j in xrange(N, N + M)] G.add_vertices(x_vertices, z_vertices) G.add_edges(D_edges) for i in xrange(N): x_vertices[i]["value"] = x[i] for j in xrange(M): z_vertices[j]["value"] = 0.0 z_vertices[j]["dummy"] = 0.0 z_vertices[j]["max"] = 0.0 for i in xrange(N): for j in xrange(M): Edge(x_vertices[i], z_vertices[j])["value"] = D[i][j] def inner_prod(s, e, t): t["dummy"] += e["value"] * s["value"] def update_z(s, e, t): if not t["max"] == 0.0: t["value"] += e["value"] * s["value"] def compute_residual(s, e, t): if not t["max"] == 0.0: s["value"] -= t["value"] * e["value"] for itr in xrange(num_iter): # Compute inner products with r print "NUM ITR = ", itr G = G.triple_apply(inner_prod, mutated_fields=["value", "dummy"]) for i in xrange(M): z_vertices[i]["max"] = 0.0 z_temp[i] = z_vertices[i]["dummy"] max_pos = np.argmax(z_temp) z_vertices[max_pos]["max"] = z_temp[max_pos] G = G.triple_apply(update_z, mutated_fields=["max", "value"]) for i in xrange(M): z[i] = z_vertices[i]["value"] return z
def extract_backbone(flavor_network, vertices, edges, alpha): """ Builds a new graph with only the edges with weights that exceed the threshold for statistical significance :param flavor_network: flavor-ingredient network to prune :param vertices: separate list of vertices (to speed extraction) :param edges: separate list of edges (to speed extraction) :param alpha: threshold p-value for keeping an edge in the network :return: the pruned SGraph """ def degree_count_fn(src, connecting_edge, dst): """ increments the degree of the nodes on this edge :param src: source node :param connecting_edge: connecting edge :param dst: destination node :return: source and destination with degree attribute incremented """ src['deg'] += 1 dst['deg'] += 1 return src, connecting_edge, dst def compute_node_moments(node_k): """ computes mean and standard deviation for this node :param node_k: node to compute :return: mean and sigma """ mean = 2*node_k/(node_k+1) sigma = sqrt(node_k**2*((20 + 4*node_k)/((node_k + 1)*(node_k + 2)*(node_k + 3)) - 4/(node_k + 1)**2)) return mean, sigma def test_for_significance(edge, weights_lookup, alpha): """ tests this edge for statistical significance based on it's source and destination nodes :param edge: edge to test :param weights_lookup: quick (hash table) lookup for the edge weights :param alpha: significance threshold :return: significance boolean check """ y_obs = edge.attr['weight'] node1_k = weights_lookup[edge.dst_vid] node2_k = weights_lookup[edge.src_vid] m1, sig1 = compute_node_moments(float(node1_k)) m2, sig2 = compute_node_moments(float(node2_k)) return y_obs >= abs(m1 + alpha*sig1) or y_obs >= abs(m2 + alpha*sig2) flavor_network_w_degree = SGraph() new_node_list = flavor_network.vertices.fillna('deg', 0) flavor_network_w_degree = flavor_network_w_degree.add_vertices(new_node_list).add_edges(edges) flavor_network_w_degree = flavor_network_w_degree.triple_apply(degree_count_fn, mutated_fields=['deg']) weights_dict = flavor_network_w_degree.vertices.to_dataframe().set_index('__id').to_dict()['deg'] significant_edges = [] for edge in edges: if test_for_significance(edge, weights_dict, alpha): significant_edges.append(edge) pruned_network = SGraph().add_vertices(new_node_list) pruned_network = pruned_network.add_edges(significant_edges) return significant_edges, pruned_network
def extract_backbone(flavor_network, alpha): """ makes a new graph with only the edges with weights that exceed the threshold for statistical significance :param ing_comp_graph: full flavor ingredient network :return: the pruned SGraph """ def degree_count_fn(src, edge, dst): """ increments the degree of the nodes on this edge :param src: :param edge: :param dst: :return: """ src['deg'] += 1 dst['deg'] += 1 return src, edge, dst def compute_node_moments(node_k): mean = 2 * node_k / (node_k + 1) sigma = sqrt(node_k**2 * ((20 + 4 * node_k) / ((node_k + 1) * (node_k + 2) * (node_k + 3)) - 4 / (node_k + 1)**2)) return mean, sigma def test_for_significance(edge, weights_lookup, alpha): y_obs = edge['weight'] node1_k = weights_lookup[edge['__dst_id']] node2_k = weights_lookup[edge['__src_id']] m1, sig1 = compute_node_moments(float(node1_k)) m2, sig2 = compute_node_moments(float(node2_k)) return y_obs >= abs(m1 + alpha * sig1) or y_obs >= abs(m2 + alpha * sig2) flav_net_w_deg = SGraph() edge_list = flavor_network.get_edges() new_node_list = flavor_network.vertices.fillna('deg', 0) flav_net_w_deg = flav_net_w_deg.add_vertices(new_node_list).add_edges( edge_list) flav_net_w_deg = flav_net_w_deg.triple_apply(degree_count_fn, mutated_fields=['deg']) weights_dict = flav_net_w_deg.vertices.to_dataframe().set_index( '__id').to_dict()['deg'] significant_edges = [] for edge in flav_net_w_deg.get_edges(): if test_for_significance(edge, weights_dict, alpha): significant_edges.append( flav_net_w_deg.get_edges(src_ids=edge['__src_id'], dst_ids=edge['__dst_id'], format='list')[0]) pruned_network = SGraph().add_vertices(new_node_list) pruned_network = pruned_network.add_edges(significant_edges) return pruned_network
def extract_backbone(flavor_network, alpha): """ makes a new graph with only the edges with weights that exceed the threshold for statistical significance :param ing_comp_graph: full flavor ingredient network :return: the pruned SGraph """ def degree_count_fn(src, edge, dst): """ increments the degree of the nodes on this edge :param src: :param edge: :param dst: :return: """ src['deg'] += 1 dst['deg'] += 1 return src, edge, dst def compute_node_moments(node_k): mean = 2*node_k/(node_k+1) sigma = sqrt(node_k**2*((20 + 4*node_k)/((node_k + 1)*(node_k + 2)*(node_k + 3)) - 4/(node_k + 1)**2)) return mean, sigma def test_for_significance(edge, weights_lookup, alpha): y_obs = edge['weight'] node1_k = weights_lookup[edge['__dst_id']] node2_k = weights_lookup[edge['__src_id']] m1, sig1 = compute_node_moments(float(node1_k)) m2, sig2 = compute_node_moments(float(node2_k)) return y_obs >= abs(m1 + alpha*sig1) or y_obs >= abs(m2 + alpha*sig2) flav_net_w_deg = SGraph() edge_list = flavor_network.get_edges() new_node_list = flavor_network.vertices.fillna('deg', 0) flav_net_w_deg = flav_net_w_deg.add_vertices(new_node_list).add_edges(edge_list) flav_net_w_deg = flav_net_w_deg.triple_apply(degree_count_fn, mutated_fields=['deg']) weights_dict = flav_net_w_deg.vertices.to_dataframe().set_index('__id').to_dict()['deg'] significant_edges = [] for edge in flav_net_w_deg.get_edges(): if test_for_significance(edge, weights_dict, alpha): significant_edges.append(flav_net_w_deg.get_edges(src_ids=edge['__src_id'], dst_ids=edge['__dst_id'], format='list')[0]) pruned_network = SGraph().add_vertices(new_node_list) pruned_network = pruned_network.add_edges(significant_edges) return pruned_network