def read_nets_by_years(path): filenames = glob.glob(path) filenames = sorted(filenames) dates = [] nets = [] for filename in filenames: net = xnet.xnet2igraph(filename) net.vs['political_party'] = [ filter_pp_name(p) for p in net.vs['political_party'] ] nets.append(net.components().giant()) base = filename.split('dep')[1].split('_') # date = float(filename.split('dep')[1].split('_')[0]) date = float(filename.split('_')[2].split('.')[0]) dates.append(date) dates = np.asarray(dates) nets = np.asarray(nets) sorted_idxs = np.argsort(dates) dates = dates[sorted_idxs] nets = nets[sorted_idxs] return dates, nets
def xnet_input_to_wordcloud(input_file, output_file): graph = xn.xnet2igraph(input_file) wc = WordCloud(background_color="white", max_words=2000, scale=10, contour_width=3, contour_color='white') wc.generate("\n".join(graph.vs["paper_abstract"])) wc.to_file(output_file)
def calculate_dist(filenames): for filename in filenames: # print(filename) net = xnet.xnet2igraph(filename) weights = net.es['weight'] weights = [math.sqrt(2 * (1 - w)) for w in weights] if len(weights) > 0: net.es['distance'] = weights xnet.igraph2xnet(net, filename[:-5] + "_dist.xnet") else: print('error', filename)
def xnet_input_to_communities_wordcloud(input_file, output_file, minYear=minYear, minKCore=minKCore): graph = xn.xnet2igraph(input_file) verticesToDelete = np.where( np.logical_or( np.array(graph.vs["year"]) < minYear, np.array(graph.vs["KCore"]) < minKCore))[0] graph.delete_vertices(verticesToDelete) graph = graph.clusters(mode="WEAK").giant() communities = graph.vs["Community"] sortedCommunities = sortByFrequency(communities)[0:maxCommunities] fig = plt.figure(figsize=(20, 5 * math.ceil(len(sortedCommunities) / 2))) allAbstracts = "\n".join(graph.vs["paper_abstract"]) allFrequencies = WordCloud( max_words=maxAllWords).process_text(allAbstracts) amask = np.zeros((500, 1000), dtype='B') amask[:10, :] = 255 amask[-10:, :] = 255 amask[:, :10] = 255 amask[:, -10:] = 255 for index, community in enumerate(sortedCommunities): communityColor = (_styleColors[index] if index < len(_styleColors) else "#aaaaaa") abstracts = "\n".join([ vertex["paper_abstract"] for vertex in graph.vs if vertex["Community"] == community ]) plt.subplot(math.ceil(len(sortedCommunities) / 2), 2, index + 1) wc = WordCloud(background_color="white", max_words=maxInternalWords, width=1000, height=500, mask=amask, contour_width=10, contour_color=communityColor, random_state=3, color_func=generateColorFunction(communityColor)) inCommunityFrequency = wc.process_text(abstracts) relativeFrequencies = { key: frequency / math.log(allFrequencies[key] + 1) for key, frequency in inCommunityFrequency.items() if key in allFrequencies } wc.generate_from_frequencies(relativeFrequencies) plt.imshow(wc, interpolation='bilinear') plt.axis("off") plt.tight_layout() plt.savefig(output_file) plt.close(fig)
def get_areas_all_years(): f = 'data/subj_areas/nets/all.xnet' g = xnet.xnet2igraph(f) print(g.vs.attributes()) i = 0 for c in g.community_multilevel(weights='weight'): for idx in c: #g.vs[idx]['comm'] = i print(g.vs[idx]['name'], end=' ') i += 1 print() return g
def get_areas_by_year(): files = glob.glob('data/subj_areas/nets/all_with_comm_*.xnet') gs = [] for file in files: gs.append(xnet.xnet2igraph(file)) year = 2008 comms = set(gs[0].vs['comm']) map_to_all = [[(year, c)] for c in comms] for i in range(len(gs) - 3): g1 = gs[i] g2 = gs[i + 1] comms1 = defaultdict(lambda: []) comms2 = defaultdict(lambda: []) for v in g1.vs: comms1[v['comm']].append(v['name']) for v in g2.vs: comms2[v['comm']].append(v['name']) if len(comms1) <= 0 or len(comms2) <= 0: continue for c1, vtxs1 in comms1.items(): sims = [] for c2, vtxs2 in comms2.items(): sim = jaccard(vtxs1, vtxs2) sims.append((sim, c2)) s_max = max(sims) if s_max[0] < 0.2: continue print(sims) stop = False N = len(map_to_all) for j in range(N): m = map_to_all[j] if m[-1][1] == c1 and year == m[-1][0]: m.append((year + 1, s_max[1])) stop = True break if not stop: map_to_all.append([(year, c1), (year + 1, s_max[1])]) year += 1 return map_to_all
def xnet_input_to_wordcloud(input_file, output_file): graph = xn.xnet2igraph(input_file) wc = WordCloud(background_color="white", max_words=2000, scale=10, contour_width=3, contour_color='white') if ("paper_abstract" not in graph.vertex_attributes()): textData = graph.vs["original_title"] else: textData = [ "%s. %s" % (titleAndAbstract) for titleAndAbstract in zip( graph.vs["original_title"], graph.vs["paper_abstract"]) ] wc.generate("\n".join(textData)) wc.to_file(output_file)
def main(): file_path = '../data/wosAPSWithPACS_WithMAG_raw.xnet' g = xnet2igraph(file_path) data = { 'abstract': g.vs['Title and Abstract'], 'year': g.vs['Year Published'], 'language': g.vs['Language'], 'adj_list': g.get_adjlist() } df = pd.DataFrame.from_dict(data) df['adj_list'] = df['adj_list'].astype(str) df.to_feather('data/all_data.feather')
def plot_(gauss, bins, map_to_all): fig, axs = plt.subplots(len(map_to_all), 1, sharex=True, sharey=True, figsize=(12, 3 * len(map_to_all))) for i, group in enumerate(map_to_all): if len(group) == 1: continue print(i, group) incr, decr = [], [] for year, c in group: dois = [] f = glob.glob('data/subj_areas/nets/all_with_comm_%s.xnet' % year)[0] # print(f) g = xnet.xnet2igraph(f) words = set(g.vs.select(comm_eq=c)['name']) for doi, paper in complete_data.items(): for w in paper['infos']['subj_areas']: if w in words: dois.append(doi) break dois = set(dois) incr_temp, decr_temp = incr_decr(dois, year, year + 1) incr += incr_temp decr += decr_temp range0 = (2008, 2020) hist0, bins_edges0 = np.histogram(incr, bins=bins, range=range0) hist1, bins_edges1 = np.histogram(decr, bins=bins, range=range0) # plt.figure(figsize=(12,3)) y0 = np.convolve(hist0, gauss, mode='same') y1 = np.convolve(hist1, gauss, mode='same') x = np.arange(bins_edges0[0], bins_edges1[-1], (bins_edges1[-1] - bins_edges1[0]) / len(y0)) axs[i].set_title("%d - %d" % (group[0][0], group[-1][0])) axs[i].bar(x - 0.05, y0, width=0.05, label='incr') axs[i].bar(x, y1, width=0.05, label='decr') axs[i].legend() plt.tight_layout() plt.savefig('hist_areas.pdf')
def xnet_input_to_figure(input_file, output_file, minYear=minYear, minKCore=minKCore): originalGraph = xn.xnet2igraph(input_file) graph = originalGraph.clusters(mode="WEAK").giant() verticesToDelete = np.where( np.logical_or( np.array(graph.vs["year"]) < minYear, np.array(graph.vs["KCore"]) < minKCore))[0] graph.delete_vertices(verticesToDelete) graph = graph.clusters(mode="WEAK").giant() indegree = graph.indegree() maxIndegree = max(indegree) graph.vs["vertex_size"] = [x / maxIndegree * 10 + 4 for x in indegree] colormap = plt.get_cmap("plasma") if ("Community" not in graph.vertex_attributes()): graph.vs["color"] = [ convertColorToRGBAString(*colormap(math.log(value + 1))) for value in indegree ] else: communities = graph.vs["Community"] sortedCommunities = sortByFrequency(communities) communityToColor = { community: (_styleColors[index] if index < len(_styleColors) else "#aaaaaa") for index, community in enumerate(sortedCommunities) } graph.vs["color"] = [ communityToColor[community] for community in communities ] for edgeIndex in range(graph.ecount()): sourceIndex = graph.es[edgeIndex].source graph.es[edgeIndex]['color'] = graph.vs["color"][sourceIndex] + "20" fig, ax = plt.subplots(figsize=(10, 10)) drawGraph(graph, ax) plt.axis("off") plt.savefig(output_file) plt.close()
source_by_year = 'data/1991-2019/by_year/dep_*_obstr_0.8_leidenalg' source_by_mandate = 'data/1991-2019/mandate/dep_*_0.8' # Called only once source = 'data/1991-2019/by_year/dep_*_obstr_0.8_leidenalg' filenames = glob.glob(source + '.xnet') calculate_dist(filenames) filenames_by_year = sorted(glob.glob(source_by_year + '_dist.xnet')) filenames_by_mandate = sorted(glob.glob(source_by_mandate + '_dist.xnet')) dates_by_year, dates_by_mandate = [], [] nets_by_year, nets_by_mandate = [], [] for filename in filenames_by_year: net = xnet.xnet2igraph(filename) net.vs['political_party'] = [ filter_pp_name(p) for p in net.vs['political_party'] ] nets_by_year.append(net.components().giant()) date = int(filename.split('dep_')[1].split('_')[0]) dates_by_year.append(date) for filename in filenames_by_mandate: net = xnet.xnet2igraph(filename) net.vs['political_party'] = [ filter_pp_name(p) for p in net.vs['political_party'] ] nets_by_mandate.append(net.components().giant())