Ejemplo n.º 1
0
def MP_graph(D, x):
    N, M = D.shape
    z = np.zeros((M, 1))
    z_temp = np.zeros(M)
    r = np.copy(x)
    num_iter = 30
    # Create bipartite graph
    G = SGraph()
    x_vertices = [Vertex(i) for i in xrange(N)]
    z_vertices = [Vertex(j + N) for j in xrange(M)]
    D_edges = [Edge(i, j) for i in xrange(N) for j in xrange(N, N + M)]
    G.add_vertices(x_vertices, z_vertices)
    G.add_edges(D_edges)

    for i in xrange(N):
        x_vertices[i]["value"] = x[i]
    for j in xrange(M):
        z_vertices[j]["value"] = 0.0
        z_vertices[j]["dummy"] = 0.0
        z_vertices[j]["max"] = 0.0
    for i in xrange(N):
        for j in xrange(M):
            Edge(x_vertices[i], z_vertices[j])["value"] = D[i][j]

    def inner_prod(s, e, t):
        t["dummy"] += e["value"] * s["value"]

    def update_z(s, e, t):
        if not t["max"] == 0.0:
            t["value"] += e["value"] * s["value"]

    def compute_residual(s, e, t):
        if not t["max"] == 0.0:
            s["value"] -= t["value"] * e["value"]

    for itr in xrange(num_iter):
        # Compute inner products with r
        print "NUM ITR = ", itr
        G = G.triple_apply(inner_prod, mutated_fields=["value", "dummy"])
        for i in xrange(M):
            z_vertices[i]["max"] = 0.0
            z_temp[i] = z_vertices[i]["dummy"]
        max_pos = np.argmax(z_temp)
        z_vertices[max_pos]["max"] = z_temp[max_pos]
        G = G.triple_apply(update_z, mutated_fields=["max", "value"])

    for i in xrange(M):
        z[i] = z_vertices[i]["value"]

    return z
Ejemplo n.º 2
0
def MP_graph(D, x):
    N, M = D.shape
    z = np.zeros((M, 1))
    z_temp = np.zeros(M)
    r = np.copy(x)
    num_iter = 30
    # Create bipartite graph
    G = SGraph()
    x_vertices = [Vertex(i) for i in xrange(N)]
    z_vertices = [Vertex(j + N) for j in xrange(M)]
    D_edges = [Edge(i, j) for i in xrange(N) for j in xrange(N, N + M)]
    G.add_vertices(x_vertices, z_vertices)
    G.add_edges(D_edges)

    for i in xrange(N):
        x_vertices[i]["value"] = x[i]
    for j in xrange(M):
        z_vertices[j]["value"] = 0.0
        z_vertices[j]["dummy"] = 0.0
        z_vertices[j]["max"] = 0.0
    for i in xrange(N):
        for j in xrange(M):
            Edge(x_vertices[i], z_vertices[j])["value"] = D[i][j]

    def inner_prod(s, e, t):
        t["dummy"] += e["value"] * s["value"]

    def update_z(s, e, t):
        if not t["max"] == 0.0:
            t["value"] += e["value"] * s["value"]

    def compute_residual(s, e, t):
        if not t["max"] == 0.0:
            s["value"] -= t["value"] * e["value"]

    for itr in xrange(num_iter):
        # Compute inner products with r
        print "NUM ITR = ", itr
        G = G.triple_apply(inner_prod, mutated_fields=["value", "dummy"])
        for i in xrange(M):
            z_vertices[i]["max"] = 0.0
            z_temp[i] = z_vertices[i]["dummy"]
        max_pos = np.argmax(z_temp)
        z_vertices[max_pos]["max"] = z_temp[max_pos]
        G = G.triple_apply(update_z, mutated_fields=["max", "value"])

    for i in xrange(M):
        z[i] = z_vertices[i]["value"]

    return z
Ejemplo n.º 3
0
def create_initial_bayesian_network():
  '''
  Start from a randomly generated Bayesian network where there is no edge between the variables of the same type.
  First, create a blacklist. 
  '''
  g = load_sgraph('data_graph')
  edges = g.get_edges()
  features = edges[['__dst_id', 'relation']].unique()
  features.rename({'__dst_id': 'feature_id', 'relation': 'feature_type'})
  
  bn = SGraph()
  bn = bn.add_vertices(features, vid_field = 'feature_id')
  n_features = features.num_rows()
  edges_data_graph = g.get_edges()
  n_patients = edges_data_graph['__src_id'].unique().size()
 
  random.seed(1234)
  for i in range(20):
    src = features['feature_id'][random.randint(0, n_features-1)]
    dst = 'E8498'
    #dst = features['feature_id'][random.randint(0, n_features-1)]
    bn = bn.add_edges(Edge(src, dst))
    print "Added edge between " + src + " and " + dst

  bic = get_bic_score(g, bn, n_patients)
  return g
Ejemplo n.º 4
0
    def get_subgraph(self, ids, radius=1, full_subgraph=True):
        verts = ids

        # find the vertices within radius (and the path edges)
        for i in range(radius):
            edges_out = self._graph.get_edges(src_ids=verts)
            # edges_in = self._graph.get_edges(dst_ids=verts)

            verts = list(edges_out['__src_id']) + list(edges_out['__dst_id'])
            verts = list(set(verts))

        # make a new graph to return and add the vertices
        g = SGraph()
        g = g.add_vertices(self._graph.get_vertices(verts), vid_field='__id')

        # add the requested edge set
        if full_subgraph is True:
            df_induced = self._graph.get_edges(src_ids=verts)
            # induced_edge_in = self._graph.get_edges(dst_ids=verts)
            # df_induced = induced_edge_out.append(induced_edge_in)
            df_induced = df_induced.groupby(df_induced.column_names(), {})

            verts_sa = SArray(list(ids))
            edges = df_induced.filter_by(verts_sa, "__src_id")
            edges.append(df_induced.filter_by(verts_sa, "__dst_id"))

            g = g.add_edges(edges, src_field='__src_id', dst_field='__dst_id')
        return GlGraph(is_directed=self.is_directed, graph_obj=g)
Ejemplo n.º 5
0
def fillSGraph(matchedBills):
    if not graphlab_is_installed: print "GraphLab is not installed!"
    Gmatches = None    
    try:
        Gmatches = SGraph()
        #Read the dataset
        weights = list()
        for mb in matchedBills:
            #load all nodes/edges into graph 
            if mb['modelBill'] != mb['stateBill']:
                label1 = mb['modelBill'].split("/")[-1]
                label2 = mb['stateBill'].split("/")[-1]
                #use inverse similarity as weight
                #Gmatches.add_edge(label1,label2,weight=1./float(mb['matchPrecent']))
                vertices = list()
                vertices.append(Vertex(label1))
                vertices.append(Vertex(label2))
                Gmatches = Gmatches.add_vertices(vertices)
                Gmatches = Gmatches.add_edges(Edge(label1,label2))
                weights.append(1./float(mb['matchPrecent']))
            
        Gmatches.edges['weight'] = weights  
    except: pass 
    
    return Gmatches    
Ejemplo n.º 6
0
def create_initial_bayesian_network():
    '''
  Start from a randomly generated Bayesian network where there is no edge between the variables of the same type.
  First, create a blacklist. 
  '''
    g = load_sgraph('data_graph')
    edges = g.get_edges()
    features = edges[['__dst_id', 'relation']].unique()
    features.rename({'__dst_id': 'feature_id', 'relation': 'feature_type'})

    bn = SGraph()
    bn = bn.add_vertices(features, vid_field='feature_id')
    n_features = features.num_rows()
    edges_data_graph = g.get_edges()
    n_patients = edges_data_graph['__src_id'].unique().size()

    random.seed(1234)
    for i in range(20):
        src = features['feature_id'][random.randint(0, n_features - 1)]
        dst = 'E8498'
        #dst = features['feature_id'][random.randint(0, n_features-1)]
        bn = bn.add_edges(Edge(src, dst))
        print "Added edge between " + src + " and " + dst

    bic = get_bic_score(g, bn, n_patients)
    return g
Ejemplo n.º 7
0
def extract_backbone(flavor_network, vertices, edges, alpha):
    """
    Builds a new graph with only the edges with weights that exceed the threshold for statistical significance
    :param flavor_network: flavor-ingredient network to prune
    :param vertices: separate list of vertices (to speed extraction)
    :param edges: separate list of edges (to speed extraction)
    :param alpha: threshold p-value for keeping an edge in the network
    :return: the pruned SGraph
    """
    def degree_count_fn(src, connecting_edge, dst):
        """
        increments the degree of the nodes on this edge
        :param src: source node
        :param connecting_edge: connecting edge
        :param dst: destination node
        :return: source and destination with degree attribute incremented
        """
        src['deg'] += 1
        dst['deg'] += 1
        return src, connecting_edge, dst

    def compute_node_moments(node_k):
        """
        computes mean and standard deviation for this node
        :param node_k: node to compute
        :return: mean and sigma
        """
        mean = 2*node_k/(node_k+1)
        sigma = sqrt(node_k**2*((20 + 4*node_k)/((node_k + 1)*(node_k + 2)*(node_k + 3)) - 4/(node_k + 1)**2))
        return mean, sigma

    def test_for_significance(edge, weights_lookup, alpha):
        """
        tests this edge for statistical significance based on it's source and destination nodes
        :param edge: edge to test
        :param weights_lookup: quick (hash table) lookup for the edge weights
        :param alpha: significance threshold
        :return: significance boolean check
        """
        y_obs = edge.attr['weight']
        node1_k = weights_lookup[edge.dst_vid]
        node2_k = weights_lookup[edge.src_vid]
        m1, sig1 = compute_node_moments(float(node1_k))
        m2, sig2 = compute_node_moments(float(node2_k))
        return y_obs >= abs(m1 + alpha*sig1) or y_obs >= abs(m2 + alpha*sig2)

    flavor_network_w_degree = SGraph()
    new_node_list = flavor_network.vertices.fillna('deg', 0)
    flavor_network_w_degree = flavor_network_w_degree.add_vertices(new_node_list).add_edges(edges)
    flavor_network_w_degree = flavor_network_w_degree.triple_apply(degree_count_fn, mutated_fields=['deg'])
    weights_dict = flavor_network_w_degree.vertices.to_dataframe().set_index('__id').to_dict()['deg']

    significant_edges = []
    for edge in edges:
        if test_for_significance(edge, weights_dict, alpha):
            significant_edges.append(edge)
    pruned_network = SGraph().add_vertices(new_node_list)
    pruned_network = pruned_network.add_edges(significant_edges)
    return significant_edges, pruned_network
Ejemplo n.º 8
0
def extract_backbone(flavor_network, alpha):
    """
    makes a new graph with only the edges with weights that exceed the threshold for statistical significance
    :param ing_comp_graph: full flavor ingredient network
    :return: the pruned SGraph
    """
    def degree_count_fn(src, edge, dst):
        """
        increments the degree of the nodes on this edge
        :param src:
        :param edge:
        :param dst:
        :return:
        """
        src['deg'] += 1
        dst['deg'] += 1
        return src, edge, dst

    def compute_node_moments(node_k):
        mean = 2 * node_k / (node_k + 1)
        sigma = sqrt(node_k**2 * ((20 + 4 * node_k) /
                                  ((node_k + 1) * (node_k + 2) *
                                   (node_k + 3)) - 4 / (node_k + 1)**2))
        return mean, sigma

    def test_for_significance(edge, weights_lookup, alpha):
        y_obs = edge['weight']
        node1_k = weights_lookup[edge['__dst_id']]
        node2_k = weights_lookup[edge['__src_id']]
        m1, sig1 = compute_node_moments(float(node1_k))
        m2, sig2 = compute_node_moments(float(node2_k))

        return y_obs >= abs(m1 + alpha * sig1) or y_obs >= abs(m2 +
                                                               alpha * sig2)

    flav_net_w_deg = SGraph()
    edge_list = flavor_network.get_edges()
    new_node_list = flavor_network.vertices.fillna('deg', 0)
    flav_net_w_deg = flav_net_w_deg.add_vertices(new_node_list).add_edges(
        edge_list)
    flav_net_w_deg = flav_net_w_deg.triple_apply(degree_count_fn,
                                                 mutated_fields=['deg'])
    weights_dict = flav_net_w_deg.vertices.to_dataframe().set_index(
        '__id').to_dict()['deg']

    significant_edges = []
    for edge in flav_net_w_deg.get_edges():
        if test_for_significance(edge, weights_dict, alpha):
            significant_edges.append(
                flav_net_w_deg.get_edges(src_ids=edge['__src_id'],
                                         dst_ids=edge['__dst_id'],
                                         format='list')[0])
    pruned_network = SGraph().add_vertices(new_node_list)
    pruned_network = pruned_network.add_edges(significant_edges)
    return pruned_network
Ejemplo n.º 9
0
def extract_backbone(flavor_network, alpha):
    """
    makes a new graph with only the edges with weights that exceed the threshold for statistical significance
    :param ing_comp_graph: full flavor ingredient network
    :return: the pruned SGraph
    """
    def degree_count_fn(src, edge, dst):
        """
        increments the degree of the nodes on this edge
        :param src:
        :param edge:
        :param dst:
        :return:
        """
        src['deg'] += 1
        dst['deg'] += 1
        return src, edge, dst

    def compute_node_moments(node_k):
        mean = 2*node_k/(node_k+1)
        sigma = sqrt(node_k**2*((20 + 4*node_k)/((node_k + 1)*(node_k + 2)*(node_k + 3)) - 4/(node_k + 1)**2))
        return mean, sigma

    def test_for_significance(edge, weights_lookup, alpha):
        y_obs = edge['weight']
        node1_k = weights_lookup[edge['__dst_id']]
        node2_k = weights_lookup[edge['__src_id']]
        m1, sig1 = compute_node_moments(float(node1_k))
        m2, sig2 = compute_node_moments(float(node2_k))

        return y_obs >= abs(m1 + alpha*sig1) or y_obs >= abs(m2 + alpha*sig2)

    flav_net_w_deg = SGraph()
    edge_list = flavor_network.get_edges()
    new_node_list = flavor_network.vertices.fillna('deg', 0)
    flav_net_w_deg = flav_net_w_deg.add_vertices(new_node_list).add_edges(edge_list)
    flav_net_w_deg = flav_net_w_deg.triple_apply(degree_count_fn, mutated_fields=['deg'])
    weights_dict = flav_net_w_deg.vertices.to_dataframe().set_index('__id').to_dict()['deg']

    significant_edges = []
    for edge in flav_net_w_deg.get_edges():
        if test_for_significance(edge, weights_dict, alpha):
            significant_edges.append(flav_net_w_deg.get_edges(src_ids=edge['__src_id'],
                                                              dst_ids=edge['__dst_id'], format='list')[0])
    pruned_network = SGraph().add_vertices(new_node_list)
    pruned_network = pruned_network.add_edges(significant_edges)
    return pruned_network
Ejemplo n.º 10
0
def build_weighted_graph(ing_comp_dict):
    """
    builds the weighted undirected graph that is the flavor network
    :param ing_comp_dict: ingredient:compound dictionary
    :return: SGraph that represents the flavor network
    """
    flav_network = SGraph()
    vert_list = []
    edge_list = []
    ingrds_not_seen = ing_comp_dict.keys()
    for node_1_ingr, compounds in ing_comp_dict.iteritems():
        ingrds_not_seen.remove(node_1_ingr)
        vert_list.append(Vertex(node_1_ingr, attr={'deg': 0}))
        for node_2_ingr in ingrds_not_seen:
            weight = len(set(ing_comp_dict[node_2_ingr]).intersection(set(compounds)))
            if weight > 0:
                edge_list.append(Edge(node_1_ingr, node_2_ingr, attr={'weight': weight}))
        vert_list.append(Vertex(node_1_ingr))

    flav_network = flav_network.add_vertices(vert_list)
    flav_network = flav_network.add_edges(edge_list)
    return flav_network
Ejemplo n.º 11
0
def parallel(A, B, prior, observationSequence):
	# parallel HMM training with graphlab
	g = SGraph()

	vertices = map(lambda i: Vertex(str(i) + "a", 
		attr={'i': i, 'ait': [prior[i]] + ([0] * OBSERVATION_LENGTH), 
			'bit': ([0] * OBSERVATION_LENGTH) + [1], 
			'b': B[i, :], 'git': [0] * (OBSERVATION_LENGTH + 1), 
			'self': A[i, i], 'git_sum': 0.0}), xrange(NUM_STATES))

	g = g.add_vertices(vertices)
	edges = []
	for i in xrange(NUM_STATES):
		for j in xrange(NUM_STATES):
			if i != j:
				edges.append(Edge(str(i) + "a", str(j) + "a", 
					attr={'aij': A[i, j], 'xi': 0.0}))

	g = g.add_edges(edges)
	g = hmm.train(g, observationSequence, NITERS, NUM_STATES, NUM_OBSERVATIONS)
	print g.vertices
	print g.edges
Ejemplo n.º 12
0
def build_weighted_graph(ing_comp_dict):
    """
    Builds the weighted, undirected graph that is the flavor network
    :param ing_comp_dict: ingredient:compound dictionary
    :return: SGraph that represents the flavor network
    """
    flavor_network = SGraph()
    vertices = []
    edge_list = []
    ingredients = ing_comp_dict.keys()

    for ingredient_node_1, compounds in ing_comp_dict.iteritems():
        ingredients.remove(ingredient_node_1)
        vertices.append(Vertex(ingredient_node_1, attr={'deg': 0}))
        for ingredient_node_2 in ingredients:
            weight = len(set(ing_comp_dict[ingredient_node_2]).intersection(set(compounds)))
            if weight > 0:
                edge_list.append(Edge(ingredient_node_1, ingredient_node_2, attr={'weight': weight}))
        vertices.append(Vertex(ingredient_node_1))

    flavor_network = flavor_network.add_vertices(vertices)
    flavor_network = flavor_network.add_edges(edge_list)
    return flavor_network, vertices, edge_list
Ejemplo n.º 13
0
def build_weighted_graph(ing_comp_dict):
    """
    builds the weighted undirected graph that is the flavor network
    :param ing_comp_dict: ingredient:compound dictionary
    :return: SGraph that represents the flavor network
    """
    flav_network = SGraph()
    vert_list = []
    edge_list = []
    ingrds_not_seen = ing_comp_dict.keys()
    for node_1_ingr, compounds in ing_comp_dict.iteritems():
        ingrds_not_seen.remove(node_1_ingr)
        vert_list.append(Vertex(node_1_ingr, attr={'deg': 0}))
        for node_2_ingr in ingrds_not_seen:
            weight = len(
                set(ing_comp_dict[node_2_ingr]).intersection(set(compounds)))
            if weight > 0:
                edge_list.append(
                    Edge(node_1_ingr, node_2_ingr, attr={'weight': weight}))
        vert_list.append(Vertex(node_1_ingr))

    flav_network = flav_network.add_vertices(vert_list)
    flav_network = flav_network.add_edges(edge_list)
    return flav_network
Ejemplo n.º 14
0
    "FundingRound", "HQ", "keywords", "Member", "Office", "organizations",
    "PrimaryImage", "TeamMember", "Website", "companies_acquired_by_sap"
]
edgesFiles = [
    "GeoInformation", "acquisitions", "categories_keywords_edges",
    "investments", "keywords_descriptions_edges", "keywords_webpages_edges",
    "relationships", "companies_acquired_by_sap_edges"
]
g = SGraph()

for f in vertexFiles:
    content = SFrame.read_csv(path + f + '.csv',
                              na_values='null',
                              verbose=verbose)
    if 'path' in content.column_names():
        g = g.add_vertices(content, vid_field='path')
    elif 'url' in content.column_names():
        g = g.add_vertices(content, vid_field='url')
    else:
        print "Unknown vid field: ", content.column_names()
        sys.exit()

for f in edgesFiles:
    content = SFrame.read_csv(path + f + '.csv',
                              na_values='null',
                              verbose=verbose)
    if 'src' in content.column_names() and 'dst' in content.column_names():
        g = g.add_edges(content, src_field='src', dst_field='dst')
    elif 'source' in content.column_names(
    ) and 'target' in content.column_names():
        g = g.add_edges(content, src_field='source', dst_field='target')
Ejemplo n.º 15
0
def main():
    g = SGraph()

    verts = []

    #initialize the Karate graph with unique label fields for each node
    for i in range(0, 34):
        verts.append(Vertex(i, attr={'label': str(i)}))

    g = g.add_vertices(verts)

    #prepare the path for the Karate network data
    fname = "./karate.txt"
    #read the edges from Karate.txt and add them to the SGraph object
    with open(fname) as f:

        for l in f:
            #print(l)

            #parse the src and dst ids for the next edge
            ids = l.split()
            src = int(ids[0])
            dst = int(ids[1])

            #add the edge as a graphlabl.Edge object to the graph
            g = g.add_edges(Edge(src, dst))

    #visualize the graph
    #print(g.summary())
    #randId=rn.sample(range(0,34),1)[0]
    #print(randId)
    #test = g.get_vertices(fields={'label':'1'})[randId]
    #test.show()

    #print(test)
    ids = range(0, 34)

    #label propagation loop
    flag = False

    iteration = 0
    #rounds=5

    #initialize neigh dict for performance
    gns = {}
    cur_max = 0
    start = time.time()
    #start=time.time()
    while flag == False:
        #pick vertice iteration order randomly
        rn.shuffle(ids)
        flag = True
        #print(ids)
        start = time.time()
        for index in ids:
            #print(index)

            cur_max = LPA(g, index)
            if str(cur_max) != g.get_vertices(ids=[index])['label'][0]:
                flag = False
                g.vertices['label'] = g.vertices.apply(lambda x: str(
                    cur_max) if x['__id'] == index else x['label'])

        #print(end-start)
        iteration += 1
        print(iteration)
    end = time.time()
    #end=time.time()
    print(end - start)
    print iteration
    g.show(vlabel='label')