Esempio n. 1
0
def mag_query_input_to_xnet(nodes_file, edges_file, output_file):
    edgesData = pd.read_csv(edges_file)
    nodesData = pd.read_csv(nodes_file, dtype=MAGColumnTypes)

    # Replacing NaN for empty string
    for key in MAGColumnTypes:
        if (key in nodesData):
            nodesData[key].fillna("", inplace=True)

    # Generating continous indices for papers
    index2ID = nodesData["paper_id"].tolist()
    ID2Index = {id: index for index, id in enumerate(index2ID)}

    # Hack to account for 2 degree capitalized "FROM"
    fromKey = "From"
    if (fromKey not in edgesData):
        fromKey = "FROM"
    if (fromKey not in edgesData):
        fromKey = "Citing"

    toKey = "To"
    if (toKey not in edgesData):
        toKey = "TO"
    if (toKey not in edgesData):
        toKey = "Cited"

    # Converting edges from IDs to new indices
    # Invert edges so it means a citation between from to to
    edgesZip = zip(edgesData[fromKey].tolist(), edgesData[toKey].tolist())
    edgesList = [(ID2Index[toID], ID2Index[fromID])
                 for fromID, toID in edgesZip
                 if fromID in ID2Index and toID in ID2Index]

    vertexAttributes = {key: nodesData[key].tolist() for key in nodesData}

    for key in nodesData:
        nodesData[key].tolist()

    graph = ig.Graph(n=len(index2ID),
                     edges=edgesList,
                     directed=True,
                     vertex_attrs=vertexAttributes)

    # verticesToDelete = np.where(np.logical_or(np.array(graph.indegree())==0,np.array(graph.degree())==0))[0]
    # graph.delete_vertices(verticesToDelete)
    graph.vs["KCore"] = graph.shell_index(mode="IN")

    if ("year" in graph.vertex_attributes()):
        graph.vs["year"] = [int(year) for year in graph.vs["year"]]
    else:
        graph.vs["year"] = [int(s[0:4]) for s in graph.vs["date"]]

    giantComponent = graph.clusters(mode="WEAK").giant()
    giantCopy = giantComponent.copy()
    giantCopy.to_undirected()
    giantComponent.vs["Community"] = [
        str(c) for c in giantCopy.community_multilevel().membership
    ]
    xn.igraph2xnet(giantComponent, output_file)
Esempio n. 2
0
def get_nets_by_3months(search, year, output_dir):
    for month in range(1, 13):  # 12 meses
        print("Current year:", year, "Current month:", month)
        g = generate_graph_year_3months(search, year, month, transition_years)
        if g:
            xnet.igraph2xnet(
                g,
                output_dir + 'dep_' + str(year) + '_' + str(month) + '.xnet')
        print()
Esempio n. 3
0
def calculate_dist(filenames):
    for filename in filenames:
        # print(filename)
        net = xnet.xnet2igraph(filename)
        weights = net.es['weight']
        weights = [math.sqrt(2 * (1 - w)) for w in weights]
        if len(weights) > 0:
            net.es['distance'] = weights
            xnet.igraph2xnet(net, filename[:-5] + "_dist.xnet")
        else:
            print('error', filename)
Esempio n. 4
0
def get_nets_by_year(search, years, output_dir):
    for year in years:
        common_votes = defaultdict(lambda: 0)
        props = search[year]
        n_props = len(props)
        for prop in props:
            get_votes(common_votes, prop)
        if n_props > 6:
            g = generate_graph(common_votes, n_props)
            xnet.igraph2xnet(g,
                             output_dir + 'dep_' + str(year) + '_obstr.xnet')
        else:
            print('Problem in year', year)
            print('Number of propositions', n_props)
def create_nets_by_year():
    # CRIA AS REDES POR ANO
    file = 'data/plos_one_2019_subj_areas.json'
    content = open(file, 'r').read()
    json_content = json.loads(content)

    for year in range(2008, 2018):
        vertices = set()
        edges = []
        for doi, paper in json_content.items():
            if len(paper['time_series']['months']) == 0:
                continue
            c_year = int(float(paper['time_series']['months'][0]))
            if year <= c_year and year + 4 >= c_year:

                subs = paper['infos']['subj_areas']

                vertices |= set(subs)
                combs = combinations(subs, 2)
                for pair in combs:
                    edges.append(pair)

        g = Graph()
        g.add_vertices(len(vertices))
        g.vs['name'] = list(vertices)
        g.add_edges(edges)
        g.es['weight'] = 1
        g.simplify(combine_edges=sum)

        i = 0
        for c in g.community_multilevel(weights='weight'):
            for idx in c:
                g.vs[idx]['comm'] = i

            i += 1

        xnet.igraph2xnet(g,
                         'data/subj_areas/nets/all_with_comm_%d_4.xnet' % year)
Esempio n. 6
0
def get_largest_component(g):
		components = g.components()
		giant = components.giant()
		return giant

def identify_communities_leidenalg(net):
	giant = get_largest_component(net)
	comms = leidenalg.find_partition(giant, leidenalg.ModularityVertexPartition)
	comm_list = comms.subgraphs() # communities in current level
	print('Number of communities identified:',len(comm_list))
	net_copy = net.copy()
	net_copy.vs['community'] = "-1"
	for idx,comm in enumerate(comm_list):
		for v1 in comm.vs:
			v2 = net_copy.vs.find(name=v1['name'])
			v2['community'] = str(idx+1)
	return net_copy		

filenames = glob.glob("data/1991-2019/by_year/*.xnet")
filenames = sorted(filenames)

graphs = []	
for filename in filenames:
	print(filename)
	net = xnet.xnet2igraph(filename)
	net = identify_communities_leidenalg(net)

	output = filename[:-5] + '_leidenalg.xnet'
	xnet.igraph2xnet(net,output)
Esempio n. 7
0
def main():
    t0 = time.time()
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('--graphml', required=True, help='graphml')
    parser.add_argument('--level', default=5, help='Acessibility param')
    parser.add_argument('--outdir', default='/tmp/out/', help='outdir')
    args = parser.parse_args()

    if not os.path.isdir(args.outdir): os.mkdir(args.outdir)

    logging.basicConfig(format='[%(asctime)s] %(message)s',
                        datefmt='%Y%m%d %H:%M',
                        level=logging.INFO)

    xnetgraphpath = os.path.join(args.outdir, 'out.xnet')
    accessibilitypath = os.path.join(args.outdir, 'accessibility.txt')

    info('Loading graph...')
    g = igraph.Graph.Read(args.graphml)

    for attr in ['ref', 'highway', 'osmid', 'id']:
        del (g.vs[attr])
    for attr in g.es.attributes():
        del (g.es[attr])

    if not os.path.exists(accessibilitypath):
        info('Converting to xnet...')
        xnet.igraph2xnet(g, xnetgraphpath)

        cmd = 'Build_Linux/CVAccessibility -l {} {} {}'.\
            format(args.level, xnetgraphpath, accessibilitypath)

        info('Running {}'.format(cmd))
        proc = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE)
        out, err = proc.communicate()
        # acc = out.decode('utf-8').strip().split('\n')
        # acc = np.array([float(a) for a in acc])
        if err: info('err:{}'.format(err.decode('utf-8')))

    with open(accessibilitypath) as fh:
        aux = fh.read().strip().split('\n')
        acc = np.array([float(a) for a in aux])

    g.simplify()
    g.to_undirected()

    visual = dict(bbox=(1200, 1200),
                  vertex_size=1.5,
                  vertex_shape='circle',
                  vertex_frame_width=0,
                  edge_arrow_width=.5,
                  edge_arrow_size=.5)

    info('accessibility {} ({})'.format(np.mean(acc), np.std(acc)))
    # g.vs['accessibility'] = acc
    # plotalpha = 0.8
    plotalpha = 1
    mincolour = 0.3
    acc1 = (acc / np.max(acc)) * (1 - mincolour)
    colours = [[mincolour, mincolour, mincolour + c, plotalpha] for c in acc1]
    coords = [(float(x), -float(y)) for x, y in zip(g.vs['x'], g.vs['y'])]

    visual['vertex_size'] = 0.0
    visual['edge_width'] = 1
    igraph.plot(g,
                os.path.join(args.outdir, 'skel.pdf'),
                layout=coords,
                **visual)

    visual['vertex_size'] = 2.0
    visual['edge_width'] = 0
    igraph.plot(g,
                os.path.join(args.outdir, 'acc_all.pdf'),
                layout=coords,
                vertex_color=colours,
                **visual)

    ###########################################################
    # generate plots for diff levels
    for thresh in [0.35, 0.5, 0.75]:
        acc1 = np.ones(len(acc))
        quantile = thresh * np.max(acc)
        info('accessibility quantile: {:.2f} ({}%)'.format(
            quantile, int(thresh * 100)))
        inds = np.where(acc > quantile)
        acc1[inds] = 0

        colours = [[c, c, c, plotalpha] for c in acc1]
        visual['vertex_size'] = 2.0
        visual['edge_width'] = 0
        gpath = os.path.join(args.outdir, 'acc_thresh_{}.pdf'.format(thresh))
        igraph.plot(g, gpath, layout=coords, vertex_color=colours, **visual)
Esempio n. 8
0
# Hack to account for 2 degree capitalized "FROM"
fromKey = "From"
if (fromKey not in edgesData):
    fromKey = "FROM"

# Converting edges from IDs to new indices
# Invert edges so it means a citation between from to to
edgesZip = zip(edgesData[fromKey].tolist(), edgesData["To"].tolist())
edgesList = [(ID2Index[toID], ID2Index[fromID]) for fromID, toID in edgesZip
             if fromID in ID2Index and toID in ID2Index]

vertexAttributes = {key: nodesData[key].tolist() for key in nodesData}

for key in nodesData:
    nodesData[key].tolist()

graph = ig.Graph(n=len(index2ID),
                 edges=edgesList,
                 directed=True,
                 vertex_attrs=vertexAttributes)

# verticesToDelete = np.where(np.logical_or(np.array(graph.indegree())==0,np.array(graph.degree())==0))[0]
# graph.delete_vertices(verticesToDelete)

graph.vs["KCore"] = graph.shell_index(mode="IN")
graph.vs["year"] = [int(s[0:4]) for s in graph.vs["date"]]
# graph.vs["Community"] = [str(c) for c in graph.community_infomap().membership];
os.makedirs("../networks", exist_ok=True)
xn.igraph2xnet(graph, "../networks/" + queryID + ".xnet")