Ejemplo n.º 1
0
def extract_backbone(flavor_network, alpha):
    """
    makes a new graph with only the edges with weights that exceed the threshold for statistical significance
    :param ing_comp_graph: full flavor ingredient network
    :return: the pruned SGraph
    """
    def degree_count_fn(src, edge, dst):
        """
        increments the degree of the nodes on this edge
        :param src:
        :param edge:
        :param dst:
        :return:
        """
        src['deg'] += 1
        dst['deg'] += 1
        return src, edge, dst

    def compute_node_moments(node_k):
        mean = 2 * node_k / (node_k + 1)
        sigma = sqrt(node_k**2 * ((20 + 4 * node_k) /
                                  ((node_k + 1) * (node_k + 2) *
                                   (node_k + 3)) - 4 / (node_k + 1)**2))
        return mean, sigma

    def test_for_significance(edge, weights_lookup, alpha):
        y_obs = edge['weight']
        node1_k = weights_lookup[edge['__dst_id']]
        node2_k = weights_lookup[edge['__src_id']]
        m1, sig1 = compute_node_moments(float(node1_k))
        m2, sig2 = compute_node_moments(float(node2_k))

        return y_obs >= abs(m1 + alpha * sig1) or y_obs >= abs(m2 +
                                                               alpha * sig2)

    flav_net_w_deg = SGraph()
    edge_list = flavor_network.get_edges()
    new_node_list = flavor_network.vertices.fillna('deg', 0)
    flav_net_w_deg = flav_net_w_deg.add_vertices(new_node_list).add_edges(
        edge_list)
    flav_net_w_deg = flav_net_w_deg.triple_apply(degree_count_fn,
                                                 mutated_fields=['deg'])
    weights_dict = flav_net_w_deg.vertices.to_dataframe().set_index(
        '__id').to_dict()['deg']

    significant_edges = []
    for edge in flav_net_w_deg.get_edges():
        if test_for_significance(edge, weights_dict, alpha):
            significant_edges.append(
                flav_net_w_deg.get_edges(src_ids=edge['__src_id'],
                                         dst_ids=edge['__dst_id'],
                                         format='list')[0])
    pruned_network = SGraph().add_vertices(new_node_list)
    pruned_network = pruned_network.add_edges(significant_edges)
    return pruned_network
Ejemplo n.º 2
0
def extract_backbone(flavor_network, alpha):
    """
    makes a new graph with only the edges with weights that exceed the threshold for statistical significance
    :param ing_comp_graph: full flavor ingredient network
    :return: the pruned SGraph
    """
    def degree_count_fn(src, edge, dst):
        """
        increments the degree of the nodes on this edge
        :param src:
        :param edge:
        :param dst:
        :return:
        """
        src['deg'] += 1
        dst['deg'] += 1
        return src, edge, dst

    def compute_node_moments(node_k):
        mean = 2*node_k/(node_k+1)
        sigma = sqrt(node_k**2*((20 + 4*node_k)/((node_k + 1)*(node_k + 2)*(node_k + 3)) - 4/(node_k + 1)**2))
        return mean, sigma

    def test_for_significance(edge, weights_lookup, alpha):
        y_obs = edge['weight']
        node1_k = weights_lookup[edge['__dst_id']]
        node2_k = weights_lookup[edge['__src_id']]
        m1, sig1 = compute_node_moments(float(node1_k))
        m2, sig2 = compute_node_moments(float(node2_k))

        return y_obs >= abs(m1 + alpha*sig1) or y_obs >= abs(m2 + alpha*sig2)

    flav_net_w_deg = SGraph()
    edge_list = flavor_network.get_edges()
    new_node_list = flavor_network.vertices.fillna('deg', 0)
    flav_net_w_deg = flav_net_w_deg.add_vertices(new_node_list).add_edges(edge_list)
    flav_net_w_deg = flav_net_w_deg.triple_apply(degree_count_fn, mutated_fields=['deg'])
    weights_dict = flav_net_w_deg.vertices.to_dataframe().set_index('__id').to_dict()['deg']

    significant_edges = []
    for edge in flav_net_w_deg.get_edges():
        if test_for_significance(edge, weights_dict, alpha):
            significant_edges.append(flav_net_w_deg.get_edges(src_ids=edge['__src_id'],
                                                              dst_ids=edge['__dst_id'], format='list')[0])
    pruned_network = SGraph().add_vertices(new_node_list)
    pruned_network = pruned_network.add_edges(significant_edges)
    return pruned_network