Ejemplo n.º 1
0
def to_igraph(network):
    """
    Convert cytoscape.js style graphs to igraph object.

    :param network: the cytoscape.js style netwrok.

    :return: the igraph object.
    """
    nodes = network['elements']['nodes']
    edges = network['elements']['edges']
    network_attr = network['data']

    node_count = len(nodes)
    edge_count = len(edges)

    g = ig.Graph()

    # Graph attributes
    for key in network_attr.keys():
        g[key] = network_attr[key]

    g.add_vertices(nodes)

    # Add node attributes
    node_attributes = {}
    node_id_dict = {}
    for i, node in enumerate(nodes):
        data = node['data']
        for key in data.keys():
            if key not in node_attributes:
                node_attributes[key] = [None] * node_count

            # Save index to map
            if key == 'id':
                node_id_dict[data[key]] = i

            node_attributes[key][i] = data[key]

    for key in node_attributes.keys():
        g.vs[key] = node_attributes[key]

    # Create edges
    edge_tuples = []
    edge_attributes = {}
    for i, edge in enumerate(edges):
        data = edge['data']
        source = data['source']
        target = data['target']
        edge_tuple = (node_id_dict[source], node_id_dict[target])
        edge_tuples.append(edge_tuple)
        for key in data.keys():
            if key not in edge_attributes:
                edge_attributes[key] = [None] * edge_count

            # Save index to map
            edge_attributes[key][i] = data[key]

    g.add_edges(edge_tuples)

    # Assign edge attributes
    for key in edge_attributes.keys():
        if key == 'source' or key == 'target':
            continue
        else:
            g.es[key] = edge_attributes[key]

    return g
Ejemplo n.º 2
0
def make_graph_from_post_processor(postprocessor):
    """
    Create a directed graph from a postprocessor of an OpenFOAM simulation.

    The edges and vertices hold a dictionary attribute 'index' that corresponds to
    the edges indices on RBC paths. These indices need not be the same as the igraph index.

    Args:
        postprocessor (HemoglobinOnSegmentsPostProcessor): postprocessor object

    Returns:
        igraph.Graph instance
    """
    path_analyzer = postprocessor.rbcDataPostProcessor.rbc_path_analyzer
    graph = igraph.Graph(directed=True)
    for ei in postprocessor.edge_ids():
        try:
            positive_flow = path_analyzer.positive_flow(ei)
        except FlowReversalError:
            warnings.warn("Skipping edge {:d} due to flow reversal".format(ei))
            continue
        # add vertices
        graph.add_vertex()
        graph.add_vertex()
        v0 = graph.vs[graph.vcount() - 2]
        v1 = graph.vs[graph.vcount() - 1]
        # add oriented edges along flow direction
        oriented_tuple = (v0, v1) if positive_flow else (v1, v0)
        original_ei = postprocessor.segment_index_adapter.segment_to_edge_index(
            ei)
        graph.add_edge(*oriented_tuple,
                       edge_index=ei,
                       original_edge_index=original_ei)

    # merge the vertices which are connected through RBC paths by a connecting node
    mapping = range(graph.vcount())
    vids_to_delete = set()
    for e in graph.es:
        # build edges that flow in and out of the upstream vertex
        edges_in, edges_out = incident_edges_to_upstream_vertex(
            e, path_analyzer)
        # update mapping from the list of vertices that build a connecting node
        node_vids = [e_out.tuple[0] for e_out in edges_out
                     ] + [e_in.tuple[1] for e_in in edges_in]
        new_node_vi = min(node_vids)  # unambiguous choice of a new vertex id
        for vi in node_vids:
            mapping[vi] = new_node_vi
        vids_to_delete.update(node_vids)
        vids_to_delete.remove(new_node_vi)
    graph.contract_vertices(mapping)
    # if contract_vertices deleted vertices, remove these indices from vids_to_delete
    vids_to_delete.difference_update(range(graph.vcount(), len(mapping)))
    graph.delete_vertices(list(vids_to_delete))

    # add edge attributes
    edge_attribute_names = [
        'length', 'radius_rbc', 'radius_plasma', 'radius_wall', 'ld',
        'rbc_velocity', 'rbc_flow'
    ]
    for name in edge_attribute_names:
        graph.es[name] = None
    for e in graph.es:
        ei = e['edge_index']
        original_ei = e['original_edge_index']
        e['length'] = postprocessor.scoord_interval_length(ei)
        e['radius_plasma'] = postprocessor.graph_data.edge_radius(original_ei)
        e['radius_rbc'] = max(
            postprocessor.rbc_radius_factor * e['radius_plasma'],
            postprocessor.rbc_radius_min)
        e['radius_wall'] = postprocessor.wall_radius_factor * e['radius_plasma']
        e['ld'] = postprocessor.mean_linear_density(ei)
        e['rbc_velocity'] = postprocessor.mean_velocity(ei)
        e['rbc_flow'] = postprocessor.mean_rbc_flow(ei)
    if not graph.is_dag():
        warnings.warn('The produced graph is not a directed acyclic graph',
                      UserWarning)
    return graph
Ejemplo n.º 3
0
ID2Index = {id: index for index, id in enumerate(index2ID)}

# Hack to account for 2 degree capitalized "FROM"
fromKey = "From"
if (fromKey not in edgesData):
    fromKey = "FROM"

# Converting edges from IDs to new indices
# Invert edges so it means a citation between from to to
edgesZip = zip(edgesData[fromKey].tolist(), edgesData["To"].tolist())
edgesList = [(ID2Index[toID], ID2Index[fromID]) for fromID, toID in edgesZip
             if fromID in ID2Index and toID in ID2Index]

vertexAttributes = {key: nodesData[key].tolist() for key in nodesData}

for key in nodesData:
    nodesData[key].tolist()

graph = ig.Graph(n=len(index2ID),
                 edges=edgesList,
                 directed=True,
                 vertex_attrs=vertexAttributes)

# verticesToDelete = np.where(np.logical_or(np.array(graph.indegree())==0,np.array(graph.degree())==0))[0]
# graph.delete_vertices(verticesToDelete)

graph.vs["KCore"] = graph.shell_index(mode="IN")
graph.vs["year"] = [int(s[0:4]) for s in graph.vs["date"]]
# graph.vs["Community"] = [str(c) for c in graph.community_infomap().membership];
os.makedirs("../networks", exist_ok=True)
xn.igraph2xnet(graph, "../networks/" + queryID + ".xnet")
Ejemplo n.º 4
0
def build_igraph_from_pp(net, respect_switches=False):
    """
    This function uses the igraph library to create an igraph graph for a given pandapower network.
    Lines, transformers and switches are respected.
    Performance vs. networkx: https://graph-tool.skewed.de/performance

    Input:

        **net** - pandapower network

    Example:

        graph = build_igraph_from_pp(net

    """
    try:
        import igraph as ig
    except (DeprecationWarning, ImportError):
        raise ImportError("Please install python-igraph")
    g = ig.Graph(directed=True)
    g.add_vertices(net.bus.shape[0])
    g.vs["label"] = net.bus.index.tolist(
    )  # [s.encode('unicode-escape') for s in net.bus.name.tolist()]
    pp_bus_mapping = dict(
        list(zip(net.bus.index, list(range(net.bus.index.shape[0])))))

    # add lines
    nogolines = set(net.switch.element[(net.switch.et == "l") & (net.switch.closed == 0)]) \
                if respect_switches else set()
    for lix in (ix for ix in net.line.index if ix not in nogolines):
        l = net.line.ix[lix]
        g.add_edge(pp_bus_mapping[l.from_bus], pp_bus_mapping[l.to_bus])
    g.es["weight"] = net.line.length_km.values

    # add trafos
    for _, trafo in net.trafo.iterrows():
        g.add_edge(pp_bus_mapping[trafo.hv_bus],
                   pp_bus_mapping[trafo.lv_bus],
                   weight=0.01)

    for _, trafo3w in net.trafo3w.iterrows():
        g.add_edge(pp_bus_mapping[trafo3w.hv_bus],
                   pp_bus_mapping[trafo3w.lv_bus],
                   weight=0.01)
        g.add_edge(pp_bus_mapping[trafo3w.hv_bus],
                   pp_bus_mapping[trafo3w.mv_bus],
                   weight=0.01)

    # add switches
    bs = net.switch[(net.switch.et == "b") & (net.switch.closed == 1)] if respect_switches else \
                    net.switch[(net.switch.et == "b")]
    for fb, tb in zip(bs.bus, bs.element):
        g.add_edge(pp_bus_mapping[fb], pp_bus_mapping[tb], weight=0.001)

    meshed = False
    for i in range(1, net.bus.shape[0]):
        if len(g.get_all_shortest_paths(0, i, mode="ALL")) > 1:
            meshed = True
            break

    roots = [pp_bus_mapping[s] for s in net.ext_grid.bus.values]
    return g, meshed, roots  # g, (not g.is_dag())
Ejemplo n.º 5
0
def displayGraph(data):
    """ nodes{keyname, name, size}, links{source, target, frequence}
    format: nodes: keynames
    nodes_thickness : occurrences
    edges : relation
    thickness_edges: frequence"""
    N = len(data['nodes'])  # nombre de nodes

    L = len(data['links'])  # nombre de edges
    Edges = [(data['links'][k]['source'], data['links'][k]['target'])
             for k in range(L)]  # list of edges
    print Edges

    G = ig.Graph(Edges, directed=False)

    G = ig.Graph(Edges, directed=False)

    keynames = []
    occurrences = []
    lcolors = []
    lwidth = []
    lname = []
    for node in data['nodes']:
        keynames.append(node['name'])
        occurrences.append(node['size'] * 2)
        lcolors.append(node['keyname'] * 2)
        lname.append(node['name'])
    for edge in data['links']:
        lwidth.append(edge['width'])

    layt = G.layout('kk', dim=3)
    Xn = [layt[k][0] for k in range(N)]  # x-coordinates of nodes
    Yn = [layt[k][1] for k in range(N)]  # y-coordinates
    Zn = [layt[k][2] for k in range(N)]  # z-coordinates
    Xe = []
    Ye = []
    Ze = []
    for e in Edges:
        Xe += [layt[e[0]][0], layt[e[1]][0],
               None]  # x-coordinates of edge ends
        Ye += [layt[e[0]][1], layt[e[1]][1], None]
        Ze += [layt[e[0]][2], layt[e[1]][2], None]

    trace1 = Scatter3d(x=Xe,
                       y=Ye,
                       z=Ze,
                       mode='lines',
                       name='sentence',
                       line=Line(color='rgb(125,125,125)', width=1),
                       text=lwidth,
                       hoverinfo='text')
    trace2 = Scatter3d(
        x=Xn,
        y=Yn,
        z=Zn,
        mode='markers',
        name='keyword',
        marker=Marker(
            symbol='dot',
            size=occurrences,
            color=lcolors,
            #color=group,
            colorscale='Viridis',
            line=Line(color='rgb(50,50,50)', width=0.5)),
        text=lname,
        hoverinfo='text')
    axis = dict(showbackground=False,
                showline=False,
                zeroline=False,
                showgrid=False,
                showticklabels=False,
                title='')

    layout = Layout(
        title="visualization of your text",
        width=1000,
        height=1000,
        showlegend=False,
        scene=Scene(
            xaxis=XAxis(axis),
            yaxis=YAxis(axis),
            zaxis=ZAxis(axis),
        ),
        margin=Margin(t=100),
        hovermode='closest',
        annotations=Annotations([
            Annotation(
                showarrow=False,
                text=
                "Data source: <a href='http://bost.ocks.org/mike/miserables/miserables.json'>[1] miserables.json</a>",
                xref='paper',
                yref='paper',
                x=0,
                y=0.1,
                xanchor='left',
                yanchor='bottom',
                font=Font(size=14))
        ]),
    )

    data = Data([trace1, trace2])
    plotly.offline.plot(data, layout)
Ejemplo n.º 6
0
    while (i < total_snapshots):

        originalfile_prefix = './powlaw_degree_small_snapshot_graph_for_streaming_sampling/' + 'new_' + dataset[
            0] + '_u_20/'
        #        rewriteEdgelistFromZero(originalfile_prefix+ 'output-prefix.t0000' + str(i) + '.graph', ' ')
        #        rewriteClusteringFromZero(originalfile_prefix+ 'output-prefix.t0000'+ str(i) + '.comms', ' ')

        originalfile = originalfile_prefix + 'output-prefix.t0000' + str(
            i) + '.graph'
        print "original_snapshot" + originalfile
        fp_originalfile = open(originalfile, 'r')
        original_snapshot_graph = nx.read_edgelist(fp_originalfile,
                                                   nodetype=int)
        fp_originalfile.close()
        igr = igraph.Graph(n=original_snapshot_graph.number_of_nodes(),
                           edges=nx.convert_node_labels_to_integers(
                               original_snapshot_graph).edges())
        #        igr = igraph.Graph(n = original_snapshot_graph.number_of_nodes(), edges = nx.convert_node_labels_to_integers(original_snapshot_graph, first_label=0).edges())

        org_complete_tcommsfile = originalfile
        if (org_complete_tcommsfile not in org_complete_tcomms.keys()):
            org_complete_tcomms[org_complete_tcommsfile] = []

        ground_truthfile = originalfile_prefix + 'output-prefix.t0000' + str(
            i) + '.comms'
        fp_complete_commsfile = open(ground_truthfile, 'r')
        for line in fp_complete_commsfile.readlines():
            org_complete_tcomms[org_complete_tcommsfile].append(
                str(i) + ' ' + line)
        fp_complete_commsfile.close()
Ejemplo n.º 7
0
def run():
    cg_dpath = dpaths['baseline', '2009', 'countGraph']
    cg_prefix = prefixs['baseline', '2009', 'countGraph']
    gp_dpath = dpaths['baseline', '2009', 'groupPartition']
    gp_prefix = prefixs['baseline', '2009', 'groupPartition']
    #
    check_dir_create(gp_dpath)
    #
    gp_summary_fpath = '%s/%ssummary.csv' % (gp_dpath, gp_prefix)
    gp_original_fpath = '%s/%soriginal.pkl' % (gp_dpath, gp_prefix)
    gp_drivers_fpath = '%s/%sdrivers.pkl' % (gp_dpath, gp_prefix)
    #
    with open(gp_summary_fpath, 'wt') as w_csvfile:
        writer = csv.writer(w_csvfile, lineterminator='\n')
        writer.writerow([
            'groupName', 'numDrivers', 'numRelations', 'graphComplexity',
            'tieStrength', 'contribution', 'benCon'
        ])
    #
    logger.info('Start handling SP_group_dpath')
    if not check_path_exist(gp_original_fpath):
        original_graph = {}
        for fn in get_all_files(cg_dpath, '%s*' % cg_prefix):
            count_graph = load_pickle_file('%s/%s' % (cg_dpath, fn))
            logger.info('Start handling; %s' % fn)
            numEdges = len(count_graph)
            moduloNumber = numEdges / 10
            for i, ((did0, did1), w) in enumerate(count_graph.iteritems()):
                if i % moduloNumber == 0:
                    logger.info('Handling; %.2f' % (i / float(numEdges)))
                original_graph[did0, did1] = w
        save_pickle_file(gp_original_fpath, original_graph)
    else:
        original_graph = load_pickle_file(gp_original_fpath)
    #
    logger.info('igraph converting')
    igid, did_igid = 0, {}
    igG = ig.Graph(directed=True)
    numEdges = len(original_graph)
    moduloNumber = numEdges / 10
    for i, ((did0, did1), w) in enumerate(original_graph.iteritems()):
        if i % moduloNumber == 0:
            logger.info('Handling; %.2f' % i / float(numEdges))
        if not did_igid.has_key(did0):
            igG.add_vertex(did0)
            did_igid[did0] = igid
            igid += 1
        if not did_igid.has_key(did1):
            igG.add_vertex(did1)
            did_igid[did1] = igid
            igid += 1
        igG.add_edge(did_igid[did0], did_igid[did1], weight=abs(w))
    #
    logger.info('Partitioning')
    part = louvain.find_partition(igG, method='Modularity', weight='weight')
    logger.info('Each group pickling and summary')
    gn_drivers = {}
    for i, sg in enumerate(part.subgraphs()):
        gn = 'G(%d)' % i
        group_fpath = '%s/%s%s.pkl' % (gp_dpath, gp_prefix, gn)
        sg.write_pickle(group_fpath)
        #
        drivers = [v['name'] for v in sg.vs]
        weights = [e['weight'] for e in sg.es]
        graphComplexity = len(weights) / float(len(drivers))
        tie_strength = sum(weights) / float(len(drivers))
        contribution = sum(weights) / float(len(weights))
        benCon = tie_strength / float(len(drivers))
        with open(gp_summary_fpath, 'a') as w_csvfile:
            writer = csv.writer(w_csvfile, lineterminator='\n')
            writer.writerow([
                gn,
                len(drivers),
                len(weights), graphComplexity, tie_strength, contribution,
                benCon
            ])
        gl_img_fpath = '%s/%simg-%s.pdf' % (gp_dpath, gp_prefix, gn)
        layout = sg.layout("kk")
        if len(drivers) < 100:
            ig.plot(sg, gl_img_fpath, layout=layout, vertex_label=drivers)
        else:
            ig.plot(sg, gl_img_fpath, layout=layout)
        gn_drivers[gn] = drivers
        gc_fpath = '%s/%scoef-%s.csv' % (gp_dpath, gp_prefix, gn)
        with open(gc_fpath, 'wt') as w_csvfile:
            writer = csv.writer(w_csvfile, lineterminator='\n')
            writer.writerow(['groupName', 'did0', 'did1', 'coef'])
            for e in sg.es:
                did0, did1 = [sg.vs[nIndex]['name'] for nIndex in e.tuple]
                coef = e['weight']
                writer.writerow([gn, did0, did1, coef])
    save_pickle_file(gp_drivers_fpath, gn_drivers)
Ejemplo n.º 8
0
def graphgen(N,
             directed=True,
             noigraph_gen=False,
             return_layout_as_object=True,
             graph_shape='rectangle'):
    """
    This function creates a directed lattice in d=2 where edges go up or right.
    The ig.Graph.Lattice function does not appear to create directed graphs well.
    Use plot_graph to test with a small N.

    Returns: a tuple (g,l) where g is an igraph object, and l is an igraph layout

    directed=True   produces a directed lattice with only up/right paths. Currently the other functions cannot handle the undirected lattice

    noigraph_gen=True does not generate the igraph object. This is mostly used for debugging. 

    return_layout_as_object=True    returns the second return value as an igraph object

    graph_shape='rectangle' or 'triangle'
    If chosen to be a triangle, helps cut down on computation time for limit shape computations. This is because you do not want to limit shape to be truncated.

    igraph does not check for uniqueness when adding vertices by name.

    Oct 25 2017 The for loop in this function is very slow. An iterator that yields is definitely better since the for loop is run by the igraph creation routine.

    Oct 24 2017 This is a fairly inefficient function. Probably easier to add vertices by generating a list of names first. noigraph_gen simply retuns edges and vertices
    """

    if dbg >= 1:
        print('Start generating graph: ' + time.asctime())

    verts = vertgen(N, graph_shape=graph_shape)
    edges = edgegen(N, graph_shape=graph_shape)

    if dbg >= 1:
        print('Done generating vertex and edge lists: ' + time.asctime())

    # make a graph layout for plotting
    if not noigraph_gen:
        if dbg >= 3:
            print('generating new graph')
        try:
            g = ig.Graph(directed=directed)
            #import ipdb; ipdb.set_trace()
            g.add_vertices(verts)
            g.add_edges(edges)
        except:
            print('Error in generating igraph')

        # make layout for plotting
        if graph_shape == 'rectangle':
            layoutlist = [(x, y) for x in range(N) for y in range(N)]
        elif graph_shape == 'triangle':
            layoutlist = [(x, y) for x in range(N) for y in range(N - x)]

        if dbg >= 1:
            print('Done generating igraph object and layout: ' +
                  time.asctime())

        if return_layout_as_object:
            return g, ig.Layout(layoutlist)
        else:
            return g, layoutlist
    else:
        # if noigraph_gen == True
        return ([x for x in verts], [x for x in edges])
Ejemplo n.º 9
0
def orthoFromSampleRecs(nfrec, outortdir, nsample=[], methods=['mixed'], \
                        foutdiffog=None, outputOGperSampledRecGT=True, colourTreePerSampledRecGT=False, \
                        graphCombine=None, majRuleCombine=None, **kw):
    """"""
    verbose = kw.get('verbose')
    fam = os.path.basename(nfrec).split('-', 1)[0]
    if verbose: print "\n# # # %s" % fam
    # collect the desired sample from the reconciliation file
    dparserec = parseALERecFile(nfrec,
                                skipLines=True,
                                skipEventFreq=True,
                                nsample=nsample,
                                returnDict=True)
    lrecgt = dparserec['lrecgt']
    if kw.get('userefspetree'):
        refspetree = dparserec['spetree']
    else:
        refspetree = None
    colourCombinedTree = kw.get('colourCombinedTree')

    ddogs = {}
    dnexustrans = {}
    drevnexustrans = {}
    ltaxnexus = []
    llabs = []
    for i, recgenetree in enumerate(lrecgt):
        if nsample: g = nsample[i]
        else: g = i
        if verbose: print recgenetree
        if verbose: print "\n# # reconciliation sample %d" % g
        N = recgenetree.nb_leaves()
        dlabs = {}
        if set(['strict', 'mixed']) & set(methods):
            if verbose: print "\n# strict_ogs:\n"
            strict_ogs, unclassified, dlabs = getOrthologues(
                recgenetree,
                method='strict',
                refspetree=refspetree,
                dlabs=dlabs,
                **kw)
            n1 = summaryOGs(strict_ogs, dlabs, N, verbose)
        else:
            strict_ogs = unclassified = None
            n1 = 'NA'
        if 'unicopy' in methods:
            if verbose: print "\n# unicopy_ogs:\n"
            unicopy_ogs, notrelevant, dlabs = getOrthologues(
                recgenetree,
                method='unicopy',
                refspetree=refspetree,
                dlabs=dlabs,
                **kw)
            n2 = summaryOGs(unicopy_ogs, dlabs, N, verbose)
        else:
            unicopy_ogs = None
            n2 = 'NA'
        if 'mixed' in methods:
            if verbose: print "\n# mixed_ogs:\n"
            mixed_ogs, unclassified, dlabs = getOrthologues(
                recgenetree,
                method='mixed',
                strict_ogs=strict_ogs,
                unclassified=unclassified,
                refspetree=refspetree,
                dlabs=dlabs,
                **kw)  #
            n3 = summaryOGs(mixed_ogs, dlabs, N, verbose)
        else:
            mixed_ogs = None
            n3 = 'NA'

        if foutdiffog or verbose:
            o12 = str(sum([int(o in strict_ogs) for o in unicopy_ogs
                           ])) if (strict_ogs and unicopy_ogs) else 'NA'
            o13 = str(sum([int(o in strict_ogs) for o in mixed_ogs
                           ])) if (strict_ogs and mixed_ogs) else 'NA'
            o23 = str(sum([int(o in unicopy_ogs) for o in mixed_ogs
                           ])) if (mixed_ogs and unicopy_ogs) else 'NA'
        if verbose:
            print "\n# summary:\n"
            print "overlap strict_ogs with unicopy_ogs:", o12
            print "overlap strict_ogs with mixed_ogs:", o13
            print "overlap unicopy_ogs with mixed_ogs:", o23
        if foutdiffog:
            foutdiffog.write(
                '\t'.join([fam, str(g), n1, n2, n3, o12, o13, o23]) + '\n')

        if colourTreePerSampledRecGT or colourCombinedTree:
            if i == 0:
                recgenetree, dnexustrans, drevnexustrans, ltaxnexus = indexCleanTreeLabels(
                    recgenetree, dlabs)
            else:
                recgenetree, dnexustrans, drevnexustrans, ltaxnexus = indexCleanTreeLabels(recgenetree, dlabs, \
                         dnexustrans=dnexustrans, drevnexustrans=drevnexustrans, ltaxnexus=ltaxnexus, update=False)

        ddogs[g] = {
            'strict': strict_ogs,
            'unicopy': unicopy_ogs,
            'mixed': mixed_ogs
        }
        if verbose: print "\n# # # # # # # #"
        if i == 0:
            # collect the leaf labels; just do once
            llabs = dlabs.values()
            llabs.sort()

    R = len(lrecgt)
    gs = nsample if nsample else range(R)
    for method in methods:
        ltrees = []
        nfoutrad = os.path.join(outortdir, method, "%s_%s" % (fam, method))
        if colourTreePerSampledRecGT:
            logs = [ddogs[g][method] for g in gs]
            writeRecGeneTreesColouredByOrthologs(lrecgt, logs, nfoutrad+"_orthologous_groups.nex", drevnexustrans, \
             treenames=["tree_%d" for g in gs], ltax=ltaxnexus, dtranslate=dnexustrans, figtree=True)
        if outputOGperSampledRecGT:
            with open(nfoutrad + ".orthologs.per_sampled_tree",
                      'w') as foutort:
                for g in gs:
                    ogs = ddogs[g][method]
                    foutort.write('\n'.join([' '.join(x)
                                             for x in ogs]) + '\n#\n')

        if graphCombine or majRuleCombine:
            ## for later output
            recgt0 = lrecgt[0] if colourCombinedTree else None
            # could also use the ALE consensus tree, which has branch supports but has no lengths
            ## first make a dict of edge frequencies
            dedgefreq = {}
            for g in gs:
                ogs = ddogs[g][method]
                for og in ogs:
                    if len(og) == 1:
                        orfan = og[0]
                        combo = (orfan, orfan)
                        dedgefreq[combo] = dedgefreq.get(combo, 0) + 1
                    else:
                        # get all pairs of genes in the OG
                        combogs = combinations(sorted(og), 2)
                        # add the counts
                        for combo in combogs:
                            dedgefreq[combo] = dedgefreq.get(combo, 0) + 1
            ## build a graph of connectivity of the genes in OGs, integrating over the sample
            gOG = igraph.Graph()
            gOG.add_vertices(len(llabs))
            gOG.vs['name'] = llabs
            # first make a full weighted graph
            # add the edges to the graph
            edges, freqs = zip(*dedgefreq.iteritems())
            gOG.add_edges(edges)
            gOG.es['weight'] = freqs
            if majRuleCombine:
                ## make a majority rule unweighted graph
                mjgOG = gOG.copy()
                # select edges with frequency below the threshold
                mjdropedges = []
                minfreq = majRuleCombine * R
                for e in mjgOG.es:
                    # use strict majority (assuming the parameter majRuleCombine=0.5, the default) to avoid obtaining family-wide single components
                    if e['weight'] <= minfreq: mjdropedges.append(e.index)
                # remove the low-freq edges to the graph
                mjgOG.delete_edges(mjdropedges)
                if verbose:
                    print "Majority Rule Consensus network: droped %d edges with weight <= %d from the full network (%d edges)" % (
                        len(mjdropedges), minfreq, len(gOG.es))
                # find connected components (i.e. perform clustering)
                compsOGs = mjgOG.components()
                # resolve conflicts in orthology classification
                mjgOG, compsOGs = enforceUnicity(mjgOG,
                                                 compsOGs,
                                                 getVertexClustering,
                                                 communitymethod='components',
                                                 **kw)
                # write results
                writeGraphCombinedOrthologs(nfoutrad, "majrule_combined_%f"%majRuleCombine, mjgOG, compsOGs, llabs, \
                                                         colourCombinedTree=colourCombinedTree, recgt=recgt0, drevnexustrans=drevnexustrans, \
                                                         ltax=ltaxnexus, dtranslate=dnexustrans, ltreenames=["tree_0"], figtree=True)
            if graphCombine:
                # find communities (i.e. perform clustering) in full weighted graph
                commsOGs = getVertexClustering(gOG, graphCombine)
                # resolve conflicts in orthology classification
                gOG, commsOGs = enforceUnicity(gOG,
                                               commsOGs,
                                               getVertexClustering,
                                               maxdrop=20,
                                               communitymethod=graphCombine,
                                               **kw)
                # write results
                writeGraphCombinedOrthologs(nfoutrad, 'graph_combined_%s'%graphCombine, gOG, commsOGs, llabs, \
                                                         colourCombinedTree=colourCombinedTree, recgt=recgt0, drevnexustrans=drevnexustrans, \
                                                         ltax=ltaxnexus, dtranslate=dnexustrans, ltreenames=["tree_0"], figtree=True)
Ejemplo n.º 10
0
def parse_obo_graph(path):
    stored_pickle_file_prefix = 'obo.graphs'
    stored_pickles_found = False

    g = {
        'biological_process': igraph.Graph(directed=True),
        'cellular_component': igraph.Graph(directed=True),
        'molecular_function': igraph.Graph(directed=True)
    }

    for ns in g:
        pickle_file_path = "{0}.{1}".format(stored_pickle_file_prefix, ns)
        if os.path.exists(pickle_file_path):
            print("Using stored ontology graph: {0}".format(pickle_file_path))
            g[ns] = igraph.Graph.Read_Pickle(fname=pickle_file_path)
            stored_pickles_found = True

    # key: GO:ID, value = {'ns': 'biological_process', 'idx': 25}
    terms = dict()

    if stored_pickles_found is True:
        print(
            "Using stored terms data structure: {0}".format(pickle_file_path))
        with open("{0}.terms".format(stored_pickle_file_prefix), 'rb') as f:
            terms = pickle.load(f)

    # key: namespace, value=int
    next_idx = {
        'biological_process': 0,
        'cellular_component': 0,
        'molecular_function': 0
    }

    id = None
    namespace = None
    name = None

    # Pass through the file once just to get all the GO terms and their namespaces
    #  This makes the full pass far easier, since terms can be referenced which haven't
    #  been seen yet.

    if stored_pickles_found is False:
        for line in open(path):
            line = line.rstrip()
            if line.startswith('[Term]'):
                if id is not None:
                    # error checking
                    if namespace is None:
                        raise Exception(
                            "Didn't find a namespace for term {0}".format(id))

                    g[namespace].add_vertices(1)
                    idx = next_idx[namespace]
                    g[namespace].vs[idx]['id'] = id
                    g[namespace].vs[idx]['name'] = name
                    next_idx[namespace] += 1
                    terms[id] = {'ns': namespace, 'idx': idx}

                # reset for next term
                id = None
                namespace = None
                name = None

            elif line.startswith('id:'):
                id = line.split(' ')[1]

            elif line.startswith('namespace:'):
                namespace = line.split(' ')[1]

            elif line.startswith('name:'):
                m = re.match('name: (.+)', line)
                if m:
                    name = m.group(1).rstrip()
                else:
                    raise Exception(
                        "Failed to regex this line: {0}".format(line))

    id = None
    alt_ids = list()
    namespace = None
    name = None
    is_obsolete = False
    is_a = list()

    # Now actually parse the rest of the properties
    if stored_pickles_found is False:
        for line in open(path):
            line = line.rstrip()
            if line.startswith('[Term]'):
                if id is not None:
                    # make any edges in the graph
                    for is_a_id in is_a:
                        # these two terms should be in the same namespace
                        if terms[id]['ns'] != terms[is_a_id]['ns']:
                            raise Exception(
                                "is_a relationship found with terms in different namespaces"
                            )

                        #g[namespace].add_edges([(terms[id]['idx'], terms[is_a_id]['idx']), ])
                        # the line above is supposed to be able to instead be this, according to the
                        # documentation, but it fails:
                        g[namespace].add_edge(terms[id]['idx'],
                                              terms[is_a_id]['idx'])

                # reset for this term
                id = None
                alt_ids = list()
                namespace = None
                is_obsolete = False
                is_a = list()

            elif line.startswith('id:'):
                id = line.split(' ')[1]

            elif line.startswith('namespace:'):
                namespace = line.split(' ')[1]

            elif line.startswith('is_a:'):
                is_a.append(line.split(' ')[1])

    if stored_pickles_found is False:
        for ns in g:
            pickle_file_path = "{0}.{1}".format(stored_pickle_file_prefix, ns)
            g[ns].write_pickle(fname=pickle_file_path)

        ## save the terms too so we don't have to redo that parse
        with open("{0}.terms".format(stored_pickle_file_prefix), 'wb') as f:
            pickle.dump(terms, f, pickle.HIGHEST_PROTOCOL)

    return terms, g
Ejemplo n.º 11
0
def plot_3D(adj_list, df_node_label, title='Graph'):
    # Importing libs:

    # Copy to Newtwork x:
    graph1 = nx.Graph()
    graph1.add_edges_from(adj_list.values)
    N = graph1.number_of_nodes()
    L = graph1.number_of_edges()
    Edges = [tuple(i) for i in adj_list.values]
    G = ig.Graph(Edges, directed=True)
    # Node labels:
    group = df_node_label['label'].tolist()
    # Setting plotly
    layt = G.layout('kk', dim=3)

    Xn = [layt[k][0] for k in range(N)]  # x-coordinates of nodes
    Yn = [layt[k][1] for k in range(N)]  # y-coordinates
    Zn = [layt[k][2] for k in range(N)]  # z-coordinates
    Xe = []
    Ye = []
    Ze = []
    for e in Edges:
        Xe += [layt[e[0]][0], layt[e[1]][0],
               None]  # x-coordinates of edge ends
        Ye += [layt[e[0]][1], layt[e[1]][1], None]
        Ze += [layt[e[0]][2], layt[e[1]][2], None]

        # PArameters:
    trace1 = go.Scatter3d(x=Xe,
                          y=Ye,
                          z=Ze,
                          mode='lines',
                          line=dict(color='rgb(125,125,125)', width=1),
                          hoverinfo='none')

    trace2 = go.Scatter3d(x=Xn,
                          y=Yn,
                          z=Zn,
                          mode='markers',
                          name='actors',
                          marker=dict(symbol='circle',
                                      size=6,
                                      color=group,
                                      colorscale='Viridis',
                                      line=dict(color='rgb(50,50,50)',
                                                width=0.5)),
                          text=group,
                          hoverinfo='text')

    axis = dict(showbackground=False,
                showline=False,
                zeroline=False,
                showgrid=False,
                showticklabels=False,
                title='')

    layout = go.Layout(
        title=title,
        width=1000,
        height=1000,
        showlegend=False,
        scene=dict(
            xaxis=dict(axis),
            yaxis=dict(axis),
            zaxis=dict(axis),
        ),
        margin=dict(t=100),
        hovermode='closest',
        annotations=[
            dict(showarrow=False,
                 text="Data source: {}".format(title),
                 xref='paper',
                 yref='paper',
                 x=0,
                 y=0.1,
                 xanchor='left',
                 yanchor='bottom',
                 font=dict(size=14))
        ],
    )

    data = [trace1, trace2]
    #     plotly.offline.plot({'data': data, 'layout': layout},
    #              auto_open=True, image = 'png', image_filename='graph',
    #              output_type='file', image_width=800, image_height=600,
    #              filename='temp-plot.html', validate=False)

    dload = os.path.expanduser('~/Downloads')
    title_png = title + '.png'
    f_load = os.path.join(dload, title_png)
    f_save = os.path.join(
        '/Users/marcelogutierrez/Projects/Gamma/capsuleSans/diagrams',
        title_png)
    html_file = '{}.html'.format(title)

    plotly.offline.plot({
        "data": data,
        "layout": layout
    },
                        image='png',
                        filename=html_file,
                        image_filename=title,
                        auto_open=True)

    sleep(3)

    shutil.move(f_load, f_save)
Ejemplo n.º 12
0
def create_item_graph(mode='train'):
    """
        Creates graph, whose vertices correspond to items. 
        For each purchase, an edge is added from each searched item to the one that was bought. 
        Edges may be repeated.
    """
    """
        Fetch data
    """
    TRAIN_LINES = 413163
    TEST_LINES = 177070
    df = read_item_data()
    df['item_id'] = df.index
    dct_title = df['title'].to_dict()
    dct_domain = df['domain_id'].to_dict()
    dct_price = df['price'].to_dict()
    """ Ratio stuff """
    from input.create_ratio import get_ratio
    dct_ratio_dom = get_ratio(which='domain_id')

    ratio_df = get_ratio(which='item_id', full=True)
    ratio_df['popularity'] = 100.0 * ratio_df['bought'] + ratio_df['searched']
    dct_ratio_item_b = ratio_df['popularity'].to_dict()
    """
        JSON
    
    """
    if mode == 'train':
        check = lambda x: x <= np.round(413163 * 0.8).astype(np.int32)
    elif mode == 'val':
        check = lambda x: x > np.round(413163 * 0.8).astype(np.int32)
    else:
        check = lambda x: True

    DATA_PATH = path.join(
        DATA_DIR, 'test_dataset.jl' if mode == 'test' else 'train_dataset.jl')
    line_i = 0
    """
        Create graph vertices
    """
    g = ig.Graph()

    counter, f_map_func, r_map_func = get_mappings()

    for k in dct_title.keys():
        g.add_vertex(value=k,
                     deg=dct_ratio_item_b[k],
                     domain_id=dct_domain[k],
                     price=dct_price[k],
                     cat='item_id')
    """ ['item_id','domain_id','category_id','product_id'] """

    for k in pd.unique(df['domain_id']):
        g.add_vertex(value=k, cat='domain_id')

    for k in pd.unique(df['category_id']):
        g.add_vertex(value=k, cat='category_id')

    for k in pd.unique(df['product_id']):
        g.add_vertex(value=k, cat='product_id')
    """
        Create edges
    """
    E1 = []
    E2 = []

    with jsonlines.open(DATA_PATH) as reader:
        for line_i, obj in enumerate(reader):
            if check(line_i):
                print(line_i)
                L = []
                for h in obj['user_history']:
                    if h['event_type'] == 'view':
                        #print("Viewed {}".format(dct[h['event_info']]))
                        L.append(h['event_info'])
                    elif h['event_type'] == 'search':
                        #print("Searched {}".format(h['event_info']))
                        pass
                L = pd.unique(L)
                #L_domain = [dct_domain[k] for k in L]
                for i in range(len(L)):
                    E1.append(L[i])
                    E2.append(obj['item_bought'])

    E1 = f_map_func['item_id'](E1)
    E2 = f_map_func['item_id'](E2)

    E = list(zip(E1, E2))
    g.add_edges(E)

    #g  = g.as_undirected()

    g.write_pickle(fname=path.join(DATA_DIR, 'graph_domain_id.pkl'))
                _sid, _did = int(line['sourceid']), int(line['dstid'])
                edge = (_sid, _did) if _sid < _did else (_did, _sid)
                if edge in weighted_edges:  # consider multiple edges between the same two nodes
                    total_weight, num_edges = weighted_edges[edge]
                    weighted_edges[edge] = (float(line['mean_travel_time']) +
                                            total_weight, num_edges + 1)
                else:
                    weighted_edges[edge] = (float(line['mean_travel_time']), 1)

        # merge duplicated edges by averaging their weights
        for edge in weighted_edges.keys():
            total_weight, num_edges = weighted_edges[edge]
            weighted_edges[edge] = total_weight / num_edges

        g = ig.Graph(
            [e for e in weighted_edges.keys()],
            edge_attrs=dict(weight=[w for w in weighted_edges.values()]))
        # set vertex indices attribute to keep track of indices in later graph manipulations
        for index, vertex in enumerate(g.vs):
            vertex['index'] = index
        print("The graph has {0} vertices and {1} edges.".format(
            g.vcount(), g.ecount()))
        g_gcc = g.clusters().giant()

        pickle.dump(g_gcc, file_object)
        print("Graph and its GCC generated...")

print("The Giant Connected Component has {0} vertices and {1} edges.".format(
    g_gcc.vcount(), g_gcc.ecount()))
g_gcc_indices_lookup = {
    vertex['index']: i
Ejemplo n.º 14
0
def plotly_graph(
    kmgraph,
    graph_layout="kk",
    colorscale=default_colorscale,
    showscale=True,
    factor_size=3,
    edge_linecolor="rgb(180,180,180)",
    edge_linewidth=1.5,
    node_linecolor="rgb(255,255,255)",
    node_linewidth=1.0,
):
    """Generate Plotly data structures that represent the mapper graph

    Parameters
    ----------
    kmgraph: dict representing the mapper graph,
             returned by the function get_mapper_graph()
    graph_layout: igraph layout; recommended 'kk' (kamada-kawai)
                  or 'fr' (fruchterman-reingold)
    colorscale: a Plotly colorscale(colormap) to color graph nodes
    showscale: boolean to display or not the colorbar
    factor_size: a factor for the node size

    Returns
    -------
    The plotly traces (dicts) representing the graph edges and nodes
    """
    # define an igraph.Graph instance of n_nodes
    n_nodes = len(kmgraph["nodes"])
    if n_nodes == 0:
        raise ValueError("Your graph has 0 nodes")
    G = ig.Graph(n=n_nodes)
    links = [(e["source"], e["target"]) for e in kmgraph["links"]]
    G.add_edges(links)
    layt = G.layout(graph_layout)

    hover_text = [node["name"] for node in kmgraph["nodes"]]
    color_vals = [node["color"] for node in kmgraph["nodes"]]
    node_size = np.array(
        [factor_size * node["size"] for node in kmgraph["nodes"]],
        dtype=np.int)
    Xn, Yn, Xe, Ye = _get_plotly_data(links, layt)

    edge_trace = dict(
        type="scatter",
        x=Xe,
        y=Ye,
        mode="lines",
        line=dict(color=edge_linecolor, width=edge_linewidth),
        hoverinfo="none",
    )

    node_trace = dict(
        type="scatter",
        x=Xn,
        y=Yn,
        mode="markers",
        marker=dict(
            size=node_size.tolist(),
            color=color_vals,
            opacity=1.0,
            colorscale=colorscale,
            showscale=showscale,
            line=dict(color=node_linecolor, width=node_linewidth),
            colorbar=dict(thickness=20,
                          ticklen=4,
                          x=1.01,
                          tickfont=dict(size=10)),
        ),
        text=hover_text,
        hoverinfo="text",
    )

    return [edge_trace, node_trace]
Ejemplo n.º 15
0
args = parser.parse_args()

if not args.prefix:
    args.prefix = os.path.splitext(os.path.basename(args.dataset))[0]

# Load csv file
dataset = np.loadtxt(sys.argv[1])

# Compute nearest neighbors
print 'Building kd-tree index...'
flann = FLANN()
flann.build_index(dataset)

# Create the state-space graph
graph = ig.Graph(directed=args.directed)
graph.add_vertices(np.alen(dataset))

def heat_kernel(i, j, dists, knn=None):
    return np.exp(-1*dists[i,j]/args.sigma)

def locally_scaled_heat_kernel(i, j, dists, knn):
    return np.exp(-1*dists[i,j]/(np.sqrt(dists[i,args.local_scaling-1])*np.sqrt(dists[knn[i,j],args.local_scaling-1])))

if args.local_scaling:
    print 'Scaling locally'
    similarity = locally_scaled_heat_kernel
else:
    similarity = heat_kernel

if args.radius:
Ejemplo n.º 16
0
def build_igraph_from_pp(net, respect_switches=False, buses=None):
    """
    This function uses the igraph library to create an igraph graph for a given pandapower network.
    Lines, transformers and switches are respected.
    Performance vs. networkx: https://graph-tool.skewed.de/performance

    :param net: pandapower network
    :type net: pandapowerNet
    :param respect_switches: if True, exclude edges for open switches (also lines that are \
        connected via line switches)
    :type respect_switches: bool, default False

    :Example:
        graph, meshed, roots = build_igraph_from_pp(net)
    """
    try:
        import igraph as ig
    except (DeprecationWarning, ImportError):
        raise ImportError("Please install python-igraph with "
                          "`pip install python-igraph` or "
                          "`conda install python-igraph` "
                          "or from https://www.lfd.uci.edu/~gohlke/pythonlibs")
    g = ig.Graph(directed=True)
    bus_index = net.bus.index if buses is None else np.array(buses)
    nr_buses = len(bus_index)
    g.add_vertices(nr_buses)
    # g.vs["label"] = [s.encode('unicode-escape') for s in net.bus.name.tolist()]
    g.vs["label"] = list(bus_index)
    pp_bus_mapping = dict(list(zip(bus_index, list(range(nr_buses)))))
    if respect_switches:
        open_switches = ~net.switch.closed.values.astype(bool)
    # add lines
    mask = _get_element_mask_from_nodes(net, "line", ["from_bus", "to_bus"],
                                        buses)
    if respect_switches:
        mask &= _get_switch_mask(net, "line", "l", open_switches)
    for line in net.line[mask].itertuples():
        g.add_edge(pp_bus_mapping[line.from_bus],
                   pp_bus_mapping[line.to_bus],
                   weight=line.length_km)

    # add trafos
    mask = _get_element_mask_from_nodes(net, "trafo", ["hv_bus", "lv_bus"],
                                        buses)
    if respect_switches:
        mask &= _get_switch_mask(net, "trafo", "t", open_switches)
    for trafo in net.trafo[mask].itertuples():
        g.add_edge(pp_bus_mapping[trafo.hv_bus],
                   pp_bus_mapping[trafo.lv_bus],
                   weight=0.01)

    # add trafo3w
    mask = _get_element_mask_from_nodes(net, "trafo3w",
                                        ["hv_bus", "mv_bus", "lv_bus"], buses)
    if respect_switches:
        mask &= _get_switch_mask(net, "trafo3w", "t3", open_switches)
    for trafo3w in net.trafo3w[mask].itertuples():
        g.add_edge(pp_bus_mapping[trafo3w.hv_bus],
                   pp_bus_mapping[trafo3w.lv_bus],
                   weight=0.01)
        g.add_edge(pp_bus_mapping[trafo3w.hv_bus],
                   pp_bus_mapping[trafo3w.mv_bus],
                   weight=0.01)

    # add switches
    mask = net.switch.et.values == "b"
    if respect_switches:
        mask &= ~open_switches
    for switch in net.switch[mask].itertuples():
        g.add_edge(pp_bus_mapping[switch.element],
                   pp_bus_mapping[switch.bus],
                   weight=0.001)

    meshed = _igraph_meshed(g)

    roots = [
        pp_bus_mapping[b] for b in net.ext_grid.bus.values if b in bus_index
    ]
    return g, meshed, roots  # g, (not g.is_dag())
Ejemplo n.º 17
0
    def compute_communities(self):
        '''Compute communities from a matrix with fixed nodes

        Returns:
            None, but SemiAnnotate.membership is set as an array with
            size N - n_fixed with the atlas cell types of all cells from the
            new dataset.
        '''
        import inspect
        import igraph as ig
        import leidenalg

        # Check whether this version of Leiden has fixed nodes support
        opt = leidenalg.Optimiser()
        sig = inspect.getfullargspec(opt.optimise_partition)
        if 'fixed_nodes' not in sig.args:
            raise ImportError('This version of the leidenalg module does not support fixed nodes. Please update to a later (development) version')

        matrix = self.matrix
        sizes = self.sizes
        n_fixed = self.n_fixed
        clustering_metric = self.clustering_metric
        resolution_parameter = self.resolution_parameter
        neighbors = self.neighbors

        L, N = matrix.shape
        n_fixede = int(np.sum(sizes[:n_fixed]))
        Ne = int(np.sum(sizes))

        # Construct graph from the lists of neighbors
        edges_d = set()
        for i, neis in enumerate(neighbors):
            for n in neis:
                edges_d.add(frozenset((i, n)))

        edges = [tuple(e) for e in edges_d]
        g = ig.Graph(n=N, edges=edges, directed=False)

        # NOTE: initial membership is singletons except for atlas nodes, which
        # get the membership they have.
        initial_membership = []
        for isi in range(N):
            if isi < n_fixed:
                for ii in range(int(self.sizes[isi])):
                    initial_membership.append(isi)
            else:
                initial_membership.append(isi)

        if len(initial_membership) != Ne:
            raise ValueError('initial_membership list has wrong length!')

        # Compute communities with semi-supervised Leiden
        if clustering_metric == 'cpm':
            partition = leidenalg.CPMVertexPartition(
                    g,
                    resolution_parameter=resolution_parameter,
                    initial_membership=initial_membership,
                    )
        elif clustering_metric == 'modularity':
            partition = leidenalg.ModularityVertexPartition(
                    g,
                    resolution_parameter=resolution_parameter,
                    initial_membership=initial_membership,
                    )
        else:
            raise ValueError(
                'clustering_metric not understood: {:}'.format(clustering_metric))

        fixed_nodes = [int(i < n_fixede) for i in range(Ne)]
        opt.optimise_partition(partition, fixed_nodes=fixed_nodes)
        membership = partition.membership[n_fixede:]

        # Convert the known cell types
        lstring = len(max(self.cell_types, key=len))
        self.membership = np.array(
                [str(x) for x in membership],
                dtype='U{:}'.format(lstring))
        for i, ct in enumerate(self.cell_types):
            self.membership[self.membership == str(i)] = ct
Ejemplo n.º 18
0
def networkx_to_igraph(G):
    mapping = dict(zip(G.nodes(), range(G.number_of_nodes())))
    reverse_mapping = dict(zip(range(G.number_of_nodes()), G.nodes()))
    G = nx.relabel_nodes(G, mapping)
    G_ig = ig.Graph(len(G), list(zip(*list(zip(*nx.to_edgelist(G)))[:2])))
    return G_ig, reverse_mapping
#------------------------------------------------------------------

# we will use three basic features:

# number of overlapping words in title
overlap_title = []

# temporal distance between the papers
temp_diff = []

# number of common authors
comm_auth = []

# author citation history
cit_hist = []
auth_graph = igraph.Graph(directed=True)
#------------------------------------------------------------------
# document similarity for abstract
similarity = []
doc_similarity = features_TFIDF.dot(features_TFIDF.T)
#------------------------------------------------------------------
# inverse shortest-path (to avoid distance = infinity)
inverse_shortest_distances = []
#------------------------------------------------------------------
# keyword overlap
overlap_keyword = []


def inverse_shortest_dist(g, source, target):
    try:
        return 1. / (len(nx.shortest_path(g, source=source, target=target)) +
Ejemplo n.º 20
0
for edge_idx in range(edge_attr.shape[0]):
    if edge_attr[edge_idx, 5] == 1:
        new_edge = tuple(edge_index[:, edge_idx].tolist())

        if new_edge[0] not in node_to_colo:
            node_to_colo[new_edge[0]] = colo_node_iter
            colo_node_iter += 1
        if new_edge[1] not in node_to_colo:
            node_to_colo[new_edge[1]] = colo_node_iter
            colo_node_iter += 1

        colonial_edges.append(tuple([node_to_colo[new_edge[0]], node_to_colo[new_edge[1]]]))

N = colo_node_iter

G=ig.Graph(colonial_edges, directed=True)
layt=G.layout(layout='kk')

Xn=[layt[k][0] for k in range(N)]# x-coordinates of nodes
Yn=[layt[k][1] for k in range(N)]# y-coordinates
Xe=[]
Ye=[]
for e in colonial_edges:
    Xe+=[layt[e[0]][0],layt[e[1]][0], None]# x-coordinates of edge ends
    Ye+=[layt[e[0]][1],layt[e[1]][1], None]



trace1=go.Scatter(x=Xe,
               y=Ye,
               mode='lines',
Ejemplo n.º 21
0
                    st.subheader("val")
                    if len(val) > 0:
                        st.write(
                            i.score(val.drop(y_label, axis=1), val[y_label]))
                    else:
                        st.write("No Data")
                    st.subheader("test")
                    st.write(
                        i.score(test.drop(y_label, axis=1), test[y_label]))

                    trained_models.append(i)

            elif option == "Network Graph":
                st.markdown("### Example ")
                # https://plotly.com/python/v3/igraph-networkx-comparison/
                S = igraph.Graph(directed=True)
                S.add_vertices([1, 2, 3, 4, 5, 6, 7, 8, 10])
                S.vs["id"] = [1, 2, 3, 4, 5, 6, 7, 8]
                S.vs["label"] = [1, 2, 3, 4, 5, 6, 7, 8]
                S.add_edges([(1, 2), (2, 3), (4, 5), (1, 6)])
                # igraph.drawing.plot(S,'test.png',layout=S.layout_lgl())

                # import matplotlib.pyplot as plt
                # fig,ax=plt.subplots()
                # igraph.plot(S,target=ax)
                out_png = igraph.drawing.plot(
                    S, "temp.png", layout=S.layout_lgl())
                out_png.save("temp.png")
                st.image("temp.png")

                if option2 == "Chain":
Ejemplo n.º 22
0
def main(df):
    global model, g

    texts = df.text.to_list()
    #1.Разбейте всю коллекцию отзывов на предложения. Лемматизируйте все слова.

    # функция для удаления стоп-слов
    mystopwords = stopwords.words('russian') + [
        'это', 'наш', 'тыс', 'млн', 'млрд', 'также', 'т', 'д', 'который',
        'прошлый', 'сей', 'свой', 'мочь', 'в', 'я', '-', 'мой', 'ваш', 'и', '5'
    ]

    def remove_stopwords(text, mystopwords=mystopwords):
        try:
            return " ".join([
                token for token in text.lower().split()
                if not token in mystopwords
            ])
        except:
            return ""

    # функция лемматизации
    def lemmatize(text, morph=MorphAnalyzer()):
        try:
            lemmas = [
                morph.parse(word)[0].normal_form for word in text.split()
            ]
            return ' '.join(lemmas)
        except:
            return ""

    pattern = re.compile('[а-яА-Я]+')

    def only_words(text, p=pattern):
        return ' '.join(p.findall(text)).strip()

    #разбиваем каждый текст на предложения и помещаем в один список
    sentences = []
    pattern = re.compile('[а-яА-Я]+')
    for text in texts:
        text = lemmatize(remove_stopwords(text))
        text_sentences = sent_tokenize(text)
        for sentence in text_sentences:
            sentences.append(pattern.findall(sentence))

    model = Word2Vec(min_count=1)
    model.build_vocab(sentences)
    model.train(sentences,
                total_examples=model.corpus_count,
                epochs=model.iter)
    #определение ближайших слов
    result1 = model.wv.most_similar(positive="банк", topn=10)
    print('result1 = ', result1)
    #аналогии
    result2 = model.wv.most_similar(positive=['кредит', 'вклад'],
                                    negative=['долг'])
    print('result2 = ', result2)
    #определение лишнего слова
    result3 = model.wv.doesnt_match("банк перевод счет отделение".split())
    print('result3 = ', result3)

    #Полученные результаты (AMOUNT=10000)
    #result1 =  [('клик', 0.655503511428833), ('банком', 0.6381771564483643), ('банка', 0.5996867418289185), ('мобайл', 0.5682080984115601), ('банку', 0.5554714202880859), ('клике', 0.5553926229476929), ('клика', 0.5493252873420715), ('беларусь', 0.545136570930481), ('банке', 0.5433052778244019), ('терроризирует', 0.5427347421646118)]
    #result2 =  [('депозит', 0.7282594442367554), ('посочувствовали', 0.6706955432891846), ('вклада', 0.6341916918754578), ('автокопилку', 0.6184648871421814), ('депозита', 0.6164340972900391), ('вклады', 0.6158702373504639), ('преддефолтный', 0.6100568771362305), ('баррикадной', 0.6076372861862183), ('ргают', 0.6062372922897339), ('потребкредить', 0.5898075103759766)]
    #result3 =  отделение

    df['text_without_stopwords'] = df.text.apply(remove_stopwords)
    df['lemmas'] = df['text_without_stopwords'].apply(lemmatize)
    df['lemmas'] = df['lemmas'].apply(remove_stopwords)

    vectors = TfidfVectorizer(max_features=500).fit_transform(
        df['lemmas'][:AMOUNT])
    X_reduced = TruncatedSVD(n_components=5,
                             random_state=40).fit_transform(vectors)
    X_embedded = TSNE(n_components=2, perplexity=5,
                      verbose=0).fit_transform(X_reduced)

    vis_df = pd.DataFrame({
        'X': X_embedded[:200, 0],
        'Y': X_embedded[:200, 1],
        'topic': df.title[:200]
    })

    #визуализация TSNE
    g = sns.FacetGrid(vis_df, hue="topic", size=10).map(plt.scatter, "X",
                                                        "Y").add_legend()
    g.savefig("tsne.png")

    #красный цвет - проблемы с он-лайн обслуживанием, зеленый цвет - отказ банка

    #визуализация банков на плоскости
    v1 = model['хороший'] - model['плохой']
    v2 = model['быстрый'] - model['медленный']
    banks = [
        'сбербанк', 'втб', 'тинькофф', 'россельхозбанк', 'росбанк', 'авангард',
        'ситибанк', 'альфабанк'
    ]
    banks_x = []
    banks_y = []
    for bank in banks:
        banks_x.append(np.dot(v1, model[bank]))
        banks_y.append(np.dot(v2, model[bank]))

    fig, ax = plt.subplots()
    ax.scatter(banks_x, banks_y)

    for i, txt in enumerate(banks):
        ax.annotate(txt, (banks_x[i], banks_y[i]))

    ax.set(xlabel='плохо-хорошо', ylabel='медленно-быстро')
    fig.savefig('plane.png')

    #plt.show()

    # пример построения графа

    keys = list(model.wv.vocab.keys())[:AMOUNT]

    g = ig.Graph(directed=True)
    labels = []
    fixes = []
    weights = []

    positive_words = [
        'любезно', 'готовый', 'хороший', 'уважаемый', 'положительный', 'выбор'
    ]
    negative_words = [
        'беспокоить', 'достает', 'неважно', 'неграмотность', 'никак',
        'просрочить'
    ]

    for word in keys:
        label = -1  #непомеченные слова
        fix = False
        if word in positive_words:
            label = 1  #положительная метка
            fix = True
        if word in negative_words:
            label = 0  #отрицательная метка
            fix = True

        labels.append(label)
        fixes.append(True)

        g.add_vertex(word)

    for word in keys:
        node = g.vs.select(name=word).indices[0]
        similar_words = model.most_similar(word, topn=10)
        for sim in similar_words:
            try:
                word1 = sim[0]
                val = sim[1]
                new_node = g.vs.select(name=word1).indices[0]
                g.add_edge(node, new_node, weight=val)
                weights.append(val)

            except Exception as err:
                print('Error', err)

    m = g.community_label_propagation(initial=labels,
                                      weights=weights,
                                      fixed=fixes)
    print('membership = ', m.membership)  # массив меток слов
    print('labels = ', labels)
    print('weights = ', weights)
    print('len weights = ', len(weights))
    return m
Ejemplo n.º 23
0
def write_stats(stats, file_obj):
    """Writes a dictionary of statistics as an igraph graph."""
    stats_graph = igraph.Graph()
    for k, v in stats.items():
        stats_graph[k] = v
    write_graph(stats_graph, file_obj)
Ejemplo n.º 24
0
# -*- coding: utf-8 -*-

import igraph as gr

g = gr.Graph()

file = open("dic.txt", "r")
dic = str(file.read())
code = compile(dic, '<string>', 'exec')
exec(code)

#print (dic['\xd9\x85\xd8\xac\xdb\x8c\xd8\xaf \xd8\xa8\xd8\xb1\xd8\xb2\xda\xaf\xd8\xb1'])
#print (dic["مسعود کیمیایی"][9])

actors = []
f = open("r.txt", "a")
for i in dic.keys():
    g.add_vertex(i)

    for j in dic[i]:
        if j not in actors:
            g.add_vertex(j)
            actors.append(j)

        eid = g.get_eid(i, j, error=False)
        if (eid != -1):
            g.es[eid]["weight"] += 1
#            f.write(str(i.encode("utf-8"))+"  "+str(j.encode("utf-8")))
#            f.write("\n")
#            print(i, "  ", j)
        else:
Ejemplo n.º 25
0
def load_citation_network_igraph(data_dir, court_name, directed=True):
    jurisdictions = pd.read_csv(data_dir + 'clean/jurisdictions.csv',
                                index_col='abbrev')

    all_courts = set(jurisdictions.index)
    if not((court_name in all_courts) or (court_name == 'all')):
        raise ValueError('invalid court_name')

    start = time.time()
    if court_name == 'all':
        case_metadata = pd.read_csv(data_dir + 'clean/case_metadata_master.csv')

        edgelist = pd.read_csv(data_dir + 'clean/edgelist_master.csv')
    else:
        net_dir = data_dir + 'clean/' + court_name + '/'
        if not os.path.exists(net_dir):
            os.makedirs(net_dir)
            make_court_subnetwork(court_name, data_dir)

        case_metadata = pd.read_csv(net_dir + 'case_metadata.csv')

        edgelist = pd.read_csv(net_dir + 'edgelist.csv')
        edgelist.drop('Unnamed: 0', inplace=True, axis=1)

    # create a dictonary that maps court listener ids to igraph ids
    cl_to_ig_id = {}
    cl_ids = case_metadata['id'].tolist()
    for i in range(case_metadata['id'].size):
        cl_to_ig_id[cl_ids[i]] = i

    # add nodes
    V = case_metadata.shape[0]
    g = ig.Graph(n=V, directed=directed)
    # g.vs['date'] = case_metadata['date'].tolist()
    g.vs['name'] = case_metadata['id'].tolist()

    # create igraph edgelist
    cases_w_metadata = set(cl_to_ig_id.keys())
    ig_edgelist = []
    missing_cases = 0
    start = time.time()
    for row in edgelist.itertuples():

        cl_ing = row[1]
        cl_ed = row[2]

        if (cl_ing in cases_w_metadata) and (cl_ed in cases_w_metadata):
            ing = cl_to_ig_id[cl_ing]
            ed = cl_to_ig_id[cl_ed]
        else:
            missing_cases += 0

        ig_edgelist.append((ing, ed))

    # add edges to graph
    g.add_edges(ig_edgelist)

    # add vertex attributes
    g.vs['court'] = case_metadata['court'].tolist()
    g.vs['year'] = [int(d.split('-')[0]) for d in case_metadata['date'].tolist()]

    end = time.time()
    g.simplify(multiple=True)

    return g
Ejemplo n.º 26
0
    f.close()

    # 获取数据规模
    num_nodes = len(data['nodes'])
    num_edges = len(data['links'])
    print('节点数:', num_nodes, '连接数:', num_edges)

    # 读取节点姓名,连接及权重信息
    edges = [(data['links'][k]['source'], data['links'][k]['target']) for k in range(num_edges)]
    weights = [(data['links'][k]['value']) for k in range(num_edges)]
    edges_weights = [(data['links'][k]['source'],
                      data['links'][k]['target'],
                      data['links'][k]['value']) for k in range(num_edges)]

    # 建立一个Graph,并添加节点,连接信息
    G_ig = ig.Graph()
    G_ig.add_vertices(num_nodes)
    G_ig = ig.Graph(edges, directed=False)

    #开始社区划分
    result, num_comm = comm_detection(g=G_ig, algorithm=k_algorithm, weights_input=weights)
    print(len(result[0]), len(result[1]), len(result[2]))

    #开始多次聚类
    result = multiple_clustering(comm=result, edge_weight=edges_weights, max_n=max_nodes)
    print(len(result[0]), len(result[1]), len(result[2]))

    #开始簇团信息的分类,簇团间和簇团内以及所有连接信息
    edges_inside, edges_outside, edges_comm = edges_sort(comm=result, links=edges)

    # 建立一个Graph,并添加节点及簇团内的连接信息
Ejemplo n.º 27
0
import igraph
#print igraph.__version__

g = igraph.Graph()

#34 Members of the club
g.add_vertices(34)

#Connections from the first matrix
g.add_edges([(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (0, 8)])
g.add_edges([(0, 10), (0, 11), (0, 12), (0, 13), (0, 17), (0, 19), (0, 21),
             (0, 31)])
g.add_edges([
    (1, 2),
    (1, 3),
    (1, 7),
    (1, 13),
    (1, 17),
    (1, 19),
    (1, 21),
    (1, 30),
])
g.add_edges([(2, 3), (2, 7), (2, 8), (2, 9), (2, 13), (2, 27), (2, 28),
             (2, 32)])
g.add_edges([(3, 7), (3, 12), (3, 13)])
g.add_edges([(4, 6), (4, 10)])
g.add_edges([(5, 6), (5, 10), (5, 16)])
g.add_edges([(6, 16)])
g.add_edges([(8, 30), (8, 32), (8, 33)])
g.add_edges([(9, 33)])
g.add_edges([(13, 33)])
Ejemplo n.º 28
0
"""
Created on Wed Apr 18 15:26:11 2018

@author: Jinglin
"""

import csv
import igraph
import numpy
import matplotlib.pyplot as plt
import louvain
import pylab as pl
import time


graph9bus=igraph.Graph() 
    
with open('9busnode.csv','rb') as csvfileNode:
    csvreaderNode=csv.reader(csvfileNode)
    mycsvNode=list(csvreaderNode)
    for row in mycsvNode:
        graph9bus.add_vertex(name=row[0])
        
nodeNumber=graph9bus.vcount()

SVQ=numpy.zeros((nodeNumber,nodeNumber))
with open('9busbranch.csv','rb') as csvfileBranch:
    csvreaderBranch=csv.reader(csvfileBranch)
    mycsvBranch=list(csvreaderBranch)
    for row in mycsvBranch:
        B=(1/complex(float(row[2]),float(row[3]))).imag
Ejemplo n.º 29
0
#1.4 a feladat megoldása

def component(network, node):
    vizsgalt = []
    def magic(node_list):
        for i in node_list:
            if i not in vizsgalt:
                vizsgalt.append(i)
                magic(network.neighbors(i))
    magic([node])
    return vizsgalt

#1.4 b feladat megoldása

def delta(network, node_list):
    cc = network.subgraph(node_list)
    return cc.ecount() - cc.vcount()

if __name__ == "__main__":
    import igraph
    print "A feladat: A megadott csúcs-al rendelkezõ komponens összes csúcsának listája:"
    net = igraph.Graph(8, directed = False)
    net.add_edges([(0,1),(1,2),(2,3),(3,4),(3,5),(6,7)])
    node = 2
    comp = component(net, node)
    print comp
    print "B feladat: A megadott csúcsok alapján létrejött komponens éleinek számát kivonjuk a csúcsok számából:"
    #net2 = igraph.Graph.Erdos_Renyi(20,0.08)
    #nodes= [3,5,15,18,11,6]
    print delta(net, comp)
Ejemplo n.º 30
0
def from_graph6(graph6_str):
    "graph6 ascii-encoded bytes to igraph"
    nx_graph = networkx.readwrite.from_graph6_bytes(
        b">>graph6<<" + graph6_str.strip().encode("ascii"))
    edges = list(nx_graph.edges)
    return igraph.Graph(edges)