def load_graph(fn):
    """Load graph_tool.Graph from weighted edge list."""
    return gt.load_graph_from_csv(
        fn,
        directed=True,
        eprop_types=('string', 'string', 'double'),
        eprop_names=('fromId', 'toId', 'weight'),
        string_vals=True,
        hashed=True,
        skip_first=True,
        ecols=(2, 3),
    )
import graph_tool.all as gt
# from graph_tool.all import *
import math
import matplotlib

g = gt.load_graph_from_csv(
    "csv/graphdata_test.csv",
    hashed=True,
    eprop_types=['string', 'string'],
    eprop_names=["source_handle", "target_handle", "weight"])

# import pdb; pdb.set_trace()
source_handle = g.ep["source_handle"]
weight = g.ep["weight"]

gt.graph_draw(g,
              vertex_font_size=12,
              vertex_text="g.ep.source_handle",
              vertex_shape="double_circle",
              vertex_fill_color="#729fcf",
              vertex_pen_width=3,
              edge_pen_width=1,
              output="graph-draw.png",
              output_size=(4000, 4000))
edgelist_df = pd.read_csv(processed_edge_list)
edgelist_df.head()

# In this "generic gene map", 1 denotes a generic gene and 0 is all other genes. A gene is considered generic if it had a high percentile from SOPHIE and the manually curated set based on the correlation plot seen [here](../pseudomonas_analysis/2_identify_generic_genes_pathways.ipynb).

# In[4]:

annot_df = pd.read_csv(generic_gene_map, sep='\t', index_col=0)
annot_df.head()

# In[5]:

G = gt.load_graph_from_csv(processed_edge_list,
                           skip_first=True,
                           directed=False,
                           hashed=True,
                           eprop_names=['weight'],
                           eprop_types=['float'])

# In[6]:

# add vertex property for generic genes
vprop_generic = G.new_vertex_property('bool')
for ix, v in enumerate(G.vertices()):
    v_name = G.vp['name'][v]
    v_label = annot_df.loc[v_name, 'label']
    vprop_generic[v] = v_label

G.vertex_properties['is_generic'] = vprop_generic

# In[7]:
                n_clusters=G.graph['number_communities'])
        except:
            D = [100]
        zsvd.append(np.mean(D))

        Y = fct.mds_shortest_paths(G, dimension)
        D = fct.comp_clusters_communities(
            Y,
            G.graph['labels_communities'],
            algo=False,
            n_clusters=G.graph['number_communities'])
        zmds.append(np.mean(D))

        g = gt.load_graph_from_csv(G.graph['edgelist'],
                                   directed=isDirected,
                                   csv_options={
                                       "delimiter": " ",
                                       "quotechar": '"'
                                   })
        block = gt.minimize_nested_blockmodel_dl(
            g,
            B_min=G.graph['number_communities'],
            B_max=G.graph['number_communities'])
        num_block = block.levels[0].get_B()
        block = block.levels[0].get_blocks()
        partition = [0 for i in range(G.number_of_nodes())]
        for i in range(G.number_of_nodes()):  #for every node
            partition[i] = block[i]
        zsbm.append(ami(partition, G.graph['labels_communities']))

        igraph = ig.Read_Edgelist(G.graph['edgelist'])
        part = igraph.community_infomap()
Exemple #5
0
    pending = len(iterables)
    nexts = cycle(iter(it).next for it in iterables)
    while pending:
        try:
            for next in nexts:
                yield next()
        except StopIteration:
            pending -= 1
            nexts = cycle(islice(nexts, pending))


conceptnet_path = os.path.expanduser(
    '~/project/KB_dump/conceptnet/conceptnet-en.csv')

g = load_graph_from_csv(conceptnet_path,
                        directed=False,
                        eprop_types=['string', 'string'],
                        string_vals=True)

prefix = '/c/en/'
entities = [
    ['capoeira', 'hand', 'cartwheel', 'shirt', 'handstand'],
    ['sunscreen', 'skateboarding', 'soccer', 'tan', 'rubbing'],
    ['cream', 'mascara', 'writing', 'lifting', 'dictaphone'],
]

blackListVertex = set([
    find_vertex(g, prop=g.properties[('v', 'name')], match=prefix + b)[0]
    for b in ['object', 'thing']
])

blackListEdge = set(['/r/DerivedFrom', '/r/RelatedTo'])
Exemple #6
0
def plot_log_log_dist(g, fname):
    (data_xs, data_ys) = deg_frequency(g.get_total_degrees(g.get_vertices()))
    ys = np.divide(data_ys, np.sum(data_ys))
    plt.clf()
    plt.scatter(data_xs, ys, alpha=0.5, color='b', label='Dataset')
    plt.legend(loc='lower left')
    plt.xscale('log')
    plt.yscale('log')
    plt.xlim(0.5, 1500)
    plt.ylim(0.0001, 0.5)
    plt.xlabel("degree")
    plt.ylabel("fraction of nodes")
    plt.savefig(fname)


G = gt.load_graph_from_csv(FILENAME, csv_options={"delimiter": "\t"})
plot_log_log_dist(G, "dist.png")
# state1 = gt.minimize_blockmodel_dl(G, verbose=True)
N = len(G.get_vertices())
print(len(G.get_edges()))

knock_count = int(KNOCKOUT * N)
# to_remove = np.random.randint(0, N, knock_count)
# G.remove_vertex(to_remove)

# top_degree_nodes = [[idx[0], elem] for idx, elem in np.ndenumerate(G.get_total_degrees(G.get_vertices()))]
# top_degree_nodes.sort(key=lambda x: x[1], reverse=True)
# top_degree_nodes = top_degree_nodes[0:knock_count]
# top_degree_nodes = [i[1] for i in top_degree_nodes]
# G.remove_vertex(top_degree_nodes)
Exemple #7
0
                         squeeze=True)

for i, node in enumerate(essentials.values):
    node_ = ''.join(
        node.split('-'))  # eliminamos '-' de los nombres de las proteinas
    essentials[i] = node_.replace(' ', '').upper(
    )  # eliminamos los ' ' inutiles de los nombres de las proteinas

############################################################################################################
####        Creacion del grafo
############################################################################################################
if args.format == 'csv':  # si el formato es una lista de links hay que tratarlo distinto (csv)
    graph = gp.load_graph_from_csv(args.data,
                                   string_vals=True,
                                   directed=args.is_directed,
                                   csv_options={
                                       "delimiter": "\t",
                                       "quotechar": "#"
                                   })
else:  # si no es csv, que lo lea tranqui..
    graph = gp.load_graph(args.data, fmt=args.format)

############################################################################################################
####        Distribucion de grado (todos los nodos)
############################################################################################################
v_degrees = np.array([v.out_degree() for v in graph.vertices()
                      ])  # lista de grado por id de nodo
degrees, hist = np.unique(
    v_degrees,  # degrees: lista de grados existentes en la red
    return_counts=True
)  # hist: cada elemento k es el numero de nodos de grado k