def export_to_graphml(self, filename: str): """Save the network as .graphml file. Parameters ---------- filename Specify filename for exporting the graph. """ nx.write_graphml_lxml(self.graph, filename)
def main_entry_point(argv=None): arguments = docopt(__doc__, version=__version__, argv=argv) # Since there are no support for default positional arguments in # docopt yet. Might be useful for complex default values, too if arguments["<out-file>"] is None: arguments["<out-file>"] = "-" coref_tags = set([arguments["--tag"]]) with smart_open(arguments["<in-file>"]) as in_stream: inpt_dict = json.load(in_stream) sys_antecedents = (( mention, (( antecedent["id"], float(antecedent["score"]), antecedent["gold"] in coref_tags, ) for antecedent in v["candidates"] if antecedent["sys"] in coref_tags), ) for mention, v in inpt_dict.items()) if arguments["--algo"] == "best": sys_links = best_first(sys_antecedents) elif arguments["--algo"] == "all": sys_links = transitive(sys_antecedents) else: raise ValueError(f"Invalid algo: arguments['--algo']") S = nx.DiGraph() S.add_nodes_from(( m, { "content": v["content"], "color": ("black" if any(a["gold"] in coref_tags for a in v["candidates"]) else "yellow"), }, ) for m, v in inpt_dict.items()) # S.add_edges_from(gold_antecedents(inpt_dict, coref_tags)) S.add_edges_from(sys_links) with smart_open(arguments["<out-file>"], "wb") as out_stream: nx.write_graphml_lxml(S, out_stream) if arguments["--gold"] is not None: gold_links = [(m, a["id"]) for m, v in inpt_dict.items() for a in v["candidates"] if a["gold"] in coref_tags] S = nx.DiGraph() S.add_nodes_from((m, { "content": v["content"] }) for m, v in inpt_dict.items()) S.add_edges_from(gold_links) with smart_open(arguments["--gold"], "wb") as out_stream: nx.write_graphml_lxml(S, out_stream)
def build_network_from_df(df,key,threshold=None,k1="k1",k2="k2",save_path="/Users/iris/Documents/QMUL-2018/Individual_Project/coding/final_networks/"): if threshold is not None: df=df[df[key]>threshold] output_net=nx.from_pandas_edgelist(df,k1,k2,[key]) #quick view of the networks nx.draw_networkx(output_net, with_labels=True) plt.show() nx.write_graphml_lxml(output_net,save_path+"net_{}.graphml".format(key)) return output_net
def export_to_graphml(self, filename: str): """Save the network as .graphml file. Parameters ---------- filename Specify filename for exporting the graph. """ if not self.graph: raise ValueError("No network found. Make sure to first run .create_network() step") nx.write_graphml_lxml(self.graph, filename)
def save(G, fName): """suggest different file formats""" nx.write_gml(G, fName + ".gml") # G = nx.read_gml(fName+".gml") if False: ox.save_graphml(G, filename=fName, folder=baseDir + "gis/graph/") ox.save_gdf_shapefile(graph, filename=fName + ".gdf", folder=baseDir + "gis/graph/") ox.save_graphml(G, filename=gName, folder=baseDir + "gis/graph/") G = ox.load_graphml(filename=gName, folder=baseDir + "gis/graph/") ox.save_gdf_shapefile(G, filename=fName + ".gdf", folder=baseDir + "gis/graph/") G = ox.load_graphml(filename=fName + ".gdf", folder=baseDir + "gis/graph/") nx.readwrite.write_shp(G, baseDir + "gis/graph/berlin_street") nx.write_graphml_lxml(G, baseDir + "fName" + ".graphml")
def dumpDataset(dataset_name, dataset, lxml=False): # add a folder for the dataset try: os.mkdir(os.getcwd() + '/' + dataset_name) except Exception: return # dump all graphml files for i, graph in enumerate(dataset): if lxml: with open(f"{dataset_name}/{dataset_name}_{i + 1}.graphml", 'w') as f: nx.write_graphml_lxml( graph, f"{dataset_name}/{dataset_name}_{i + 1}.graphml") else: with open(f"{dataset_name}/{dataset_name}_{i + 1}.graphml", 'w') as f: nx.write_graphml_xml( graph, f"{dataset_name}/{dataset_name}_{i + 1}.graphml")
def load_graph(self, keep_isolates=False, output=False): path = os.path.join(self.dir, self.filename) df = pd.read_csv(path, index_col=0) if self.source is 'comentions': G = nx.from_pandas_adjacency(df) elif self.source is 'topics': G = nx.Graph() MPs = df.index.values.tolist() topics = list(df.columns) G.add_nodes_from(MPs, bipartite=0) G.add_nodes_from(topics, bipartite=1) weighted_edges = [(idx, topic, df.loc[idx, topic]) for topic in topics for idx, row in df.iterrows() if df.loc[idx, topic] > 0] G.add_weighted_edges_from(weighted_edges) if keep_isolates is False: G = remove_isolates(G) if output: nx.write_graphml_lxml(G, os.path.join(self.dir, f"{self.source}_{self.convocation_id}_graph.graphml")) self.G = G
def main(): # Get list of chunks as generic chunks divorced from type genericChunkList = readInputFile() typedChunkList = [] # Process the generic chunks into their actual chunk types for chunk in genericChunkList: newChunk = processChunk(chunk) typedChunkList.append(newChunk) # Clear the generic chunk list to free up memory genericChunkList.clear() # DEBUG: Print out the chunks for chunk in typedChunkList: print(chunk) # Generate graph based on chunks graph = generateGraph(typedChunkList) networkx.write_graphml_lxml(graph, CONST_INPUT_FILE_NAME + ".graphml")
def build_network_from_df(df,key,threshold=None,k1="k1",k2="k2",save_path="/Users/iris/Documents/QMUL-2018/Individual_Project/coding/"): if threshold is not None: df=df[df[key]>threshold] output_net=nx.from_pandas_edgelist(df,k1,k2,[key]) ###add edge weights to network kk1 = tuple(df[k1]) #print type(kk1) kk2 = tuple(df[k2]) #print type(kk1) kk3 = tuple(df[key]) tuple_edgeweight = zip(kk1,kk2,kk3) output_net.add_weighted_edges_from(tuple_edgeweight, weight='weight') #quick view of the networks nx.draw_networkx(output_net, with_labels=True) plt.show() nx.write_graphml_lxml(output_net,save_path+"net_{}.graphml".format(key)) return output_net
f.write('\t'.join([str(x) for x in line]) + '\n') print('[i] most frequent 10 changes') print( tabulate( [line[:-1] for line in table][:10], tablefmt='pipe', headers=['Source', 'Target', 'Frequency', 'Relative Frequency'])) print('[i] most rare 10 changes') print( tabulate( [line[:-1] for line in table[::-1]][:10], tablefmt='pipe', headers=['Source', 'Target', 'Frequency', 'Relative Frequency'])) if "network" in argv: nx.write_graphml_lxml(G, argv[1][:-4] + '.graphml') #if 'suffixes' in argv: # data = defaultdict(list) # for idx, doculect, tokens, relation, process in wl.iter_rows( # 'doculect', 'tokens', 'relation', 'process'): # if doculect == "Indo-European" and relation == 'derivation': # if ',' in process: # grade, suffix = process.split(', ') # if suffix in ['suffix', 'prefix']: # affix_type, affix_form = suffix.split(': ') # else: # grade = '' # else: # grade, affix_type, affix_form = process, '', '' # if grade:
def DRSToItem(): import matplotlib.pyplot as plt # Declare relevant variables DRSGraph = None DRSLines = [] # Read in DRS instructions from file DRSFile = open("DRS_read_in.txt", "r") for line in DRSFile: # Get DRS command and remove any leading and ending whitespace DRSLines.append(line.strip()) # Get numbers of which lines are headers ([A, B, C, ...] and conditionals (=>) ) symbolLines = getSymbolLines(DRSLines) categorizedDRSLines = categorizeDRSLines(DRSLines, symbolLines) # Get all if-then sets conditionalSets = getConditionals(DRSLines, categorizedDRSLines) # print(conditionalSets) # Set up the predicate switcher predSwitcher = predicateSwitcher() # Set up counter for question response questionCounter = 1 # Iterate through the DRS instructions for index, currentInstruction in enumerate(DRSLines): # take next instruction or exit nextStep = '' # As long as no "exit" given if nextStep != 'exit': print(currentInstruction) # If the current line is an instruction if categorizedDRSLines.get(index) == CONST_INSTRUCTION_TAG: # Get the predicate type and contents instructionCountInMatchingIfBlock, conditionalWithMatchingIfBlock = \ checkCurrentInstructionIf(DRSLines, index, currentInstruction, conditionalSets) if instructionCountInMatchingIfBlock == 0: DRSGraph = splitAndRun(currentInstruction, predSwitcher) # Break out of loop with exit else: break # On end of reading in instructions # process conditionals first: for conditional in conditionalSets: if not conditional.processed: DRSGraph = runFullConditional(conditional, predSwitcher, DRSGraph, conditionalSets) # Set up questionSwitcher qSwitcher = questionSwitcher() questionInput = input('Please enter a question') # "exit" is trigger word to end questioning while questionInput != 'exit': questionLines = APEWebserviceCall(questionInput) while questionLines is None: questionInput = input( 'There was an error with the ACE entered - please try again.') questionLines = APEWebserviceCall(questionInput) for currentLine in questionLines: predicateSplit = currentLine.split('(', 1) predicateType = predicateSplit[0] predicateContents = predicateSplit[1] # print(categorizedDRSLines.get(index)) qSwitcher.callFunction(predicateType, predicateContents, DRSGraph) print(currentInstruction) result = qSwitcher.resolveQuestion() if result: print("Question", str(questionCounter), "Answer: Yes") elif not result and result is not None: print("Question", str(questionCounter), "Answer: No") else: print("Question", str(questionCounter), "Answer: Unknown") questionCounter = questionCounter + 1 # I have my doubts about these lines below but they seem to work DRSGraph = qSwitcher.returnDRSGraph() predSwitcher.updateDRSGraph(DRSGraph.graph) # Reset qSwitcher to be a new question switcher qSwitcher = questionSwitcher() questionInput = input('Please enter a DRS line for your question') # Once "exit" has been entered # At end of program, if an ontology was built at all, print it out and export it in GraphML if DRSGraph is not None: # networkx.draw(DRSGraph.graph, labels=networkx.get_node_attributes(DRSGraph.graph, CONST_NODE_VALUE_KEY)) # plt.show() jsonFile = open("jsonFile.txt", "w") jsonSerializable = networkx.readwrite.json_graph.node_link_data( DRSGraph.graph) jsonOutput = json.dumps(jsonSerializable) jsonFile.write(jsonOutput) networkx.write_graphml_lxml(DRSGraph.graph, "DRSGraph.graphml")
def write_adj_matrix(gnx, file_name): nx.write_graphml_lxml(gnx, file_name)
# build network import networkx as nx net=nx.from_pandas_edgelist(relationships,create_using=nx.DiGraph()) # make sure isolates are in the network net.add_nodes_from(twusers.twitter) #add attributes from data frame ### data frame as dictionary attributes=twusers.set_index('twitter').to_dict('index') ### add attributes of nodes to network nx.set_node_attributes(net, attributes) #%% nx.write_graphml_lxml(net, "presiAmericas.graphml") #%% nx.draw_networkx(net) #%% pos = nx.nx_pydot.graphviz_layout(net) nx.draw_networkx(net,pos=pos) #%% import matplotlib.pyplot as plt
def main(simulated_time): random.seed(RANDOM_SEED) np.random.seed(RANDOM_SEED) """ TOPOLOGY from a json """ t = Topology() t.G = nx.read_graphml("Euclidean.graphml") t.G = nx.convert_node_labels_to_integers(t.G, first_label=0, ordering='default', label_attribute=None) print "Nodes: %i" % len(t.G.nodes()) print "Edges: %i" % len(t.G.edges()) #MANDATORY fields of a link # Default values = {"BW": 1, "PR": 1} valuesOne = dict(itertools.izip(t.G.edges(), np.ones(len(t.G.edges())))) nx.set_edge_attributes(t.G, name='BW', values=valuesOne) nx.set_edge_attributes(t.G, name='PR', values=valuesOne) centrality = nx.betweenness_centrality(t.G) nx.set_node_attributes(t.G, name="centrality", values=centrality) sorted_clustMeasure = sorted(centrality.items(), key=operator.itemgetter(1), reverse=True) top20_devices = sorted_clustMeasure[:20] main_fog_device = copy.copy(top20_devices[0][0]) print "-" * 20 print "Top 20 centralised nodes:" for item in top20_devices: print item print "-" * 20 """ APPLICATION """ app1 = create_application("app1") """ PLACEMENT algorithm """ #There are not modules to place. placement = NoPlacementOfModules("NoPlacement") """ POPULATION algorithm """ number_generators = int(len(t.G) * 0.1) print number_generators #you can use whatever funciton to change the topology dStart = deterministicDistributionStartPoint(0, 100, name="Deterministic") dStart2 = exponentialDistributionStartPoint(500, 100.0, name="Deterministic") pop = Pop_and_Failures(name="mttf-nodes", srcs=number_generators, activation_dist=dStart2) pop.set_sink_control({ "ids": top20_devices, "number": 1, "module": app1.get_sink_modules() }) dDistribution = deterministicDistribution(name="Deterministic", time=10) pop.set_src_control({ "number": 1, "message": app1.get_message("M.Action"), "distribution": dDistribution }) #In addition, a source includes a distribution function: """-- SELECTOR algorithm """ selectorPath = BroadPath() """ SIMULATION ENGINE """ s = Sim(t, default_results_path="Results_%s_exp" % (simulated_time)) s.deploy_app(app1, placement, pop, selectorPath) s.run(simulated_time, test_initial_deploy=False, show_progress_monitor=False) # s.draw_allocated_topology() # for debugging print "Total nodes available in the toopology %i" % len( s.topology.G.nodes()) print "Total edges available in the toopology %i" % len( s.topology.G.edges()) print pop.nodes_removed nx.write_graphml_lxml(s.topology.G, "final_network.graphml")
}, { "Reasoning": ["Reasoning", "Memory", "Learning"] }, { "Sentiment": ["Language", "Memory", "Attention", "Emotion"] }, { "Size": ["Spatial", "Perceptual", "Attention"] }, { "State": ["Reasoning", "Categorization", "Perceptual"] }, { "Numerosity": ["Reasoning", "Perceptual", "Attention"] }] graph = networkx.Graph() graph.add_nodes_from(capacities) for gapType in vqa_gaps: # Go through each gap type for gapTypeName in gapType: # Get the list of capabilities associated with it gapCapabilityList = gapType.get(gapTypeName) print(gapTypeName) print(gapCapabilityList) # Create a node for the gap type gapTypeName = gapTypeName graph.add_node(gapTypeName) # Iterate through the capabilities and connect each one to the gap type via an edge for capability in gapCapabilityList: graph.add_edge(gapTypeName, capability, label="affects") networkx.write_graphml_lxml(graph, fileLocation)
def main(): # params IEXCLOUD_TOKEN = os.getenv("IEXCLOUD_TOKEN") # 需要事前提供的檔案 # 在執行時會自動產生的檔案 output_folder = './outputs' downlaods_folder = './downloads' entities_json = f"{output_folder}/wd_entities.json" tk_csv = './downloads/bats_symbols_traded_byx.csv' tk_info_json = "./downloads/iex_ticker_info.json" urls_json = f"{output_folder}/wiki_urls.json" mentions_json = f"{output_folder}/wiki_mentions.json" sent_cooccurs_json = f"{output_folder}/corpus_mentions_sent_cooccurs.json" atk_cooccurs_json = f"{output_folder}/corpus_mentions_atk_cooccurs.json" atk_bags_json = f"{output_folder}/corpus_mentions_atk_bags.json" freqs_json = f"{output_folder}/corpus_mentions_freqs.json" # Wiki processor requires: explore_n_wk_depth: int = 2 # 探索wk的層數 adpot_n_wk_depth: int = 1 # 在n層以內的wk-titles會被實際採用(其他用作graph計算) wkd_dump_json = "./downloads/latest-all.json.bz2" seeded_wk_titles = [] sp500_csv = f"{downlaods_folder}/s_and_p_500.csv" # Wiki processor outputs: wk_titles_graphml = f"{output_folder}/wk_titles.graphml.bz2" wk_pagerank_json = f"{output_folder}/wk_pagerank.json" wk_cat_tags_json = f"{output_folder}/wk_cat_tags.json" # wk_tags_json = f"{output_folder}/wk_tags.json" wk_tags_pagerank_csv = f"{output_folder}/wk_tags_pagerank.csv" wkd_filtered_entities_json = f"{output_folder}/wkd_filtered_entities.json" wk_ranked_titles_json = f"{output_folder}/wk_ranked_titles.json" wkd_entites_by_ranked_titles_json = f"{output_folder}/wkd_entites_by_ranked_titles.json" pathlib.Path(output_folder).mkdir(exist_ok=True) # print(get_matched_wkd_entities(titles, wkd_dump_path=wkd_dump_json)) # entities = load_or_run(wkd_entites_by_ranked_titles_json, # lambda: get_matched_wkd_entities(titles, wkd_dump_path=wkd_dump_json), # forcerun=True) # print("從wikidata取得具有symbol屬性的entities") # results = load_or_run( # entities_json, lambda: query_wikidata_by_property()) # comp_wdids = [e['item']['value'].split('/')[-1] # for e in results['results']['bindings']] # print("找wikidata-entity對應的wikipage") # comp_titles = load_or_run( # comp_titles_json, lambda: query_wikipage_title(comp_wdids)) # return # print("讀取tickers") # df = pd.read_csv(tk_csv) # tickers = list(df['Symbols']) # # tickers = ['ADBE', 'BA', 'RXT', 'TTOO'] # print(f"載入ticker數量: {len(tickers)}") # print("從iexcloud抓ticker info") # infos = load_or_run( # tk_info_json, lambda: download_ticker_info_from_iexcloud(tickers, IEXCLOUD_TOKEN)) # names = [v['companyName'] for k, v in infos.items()] # print("找ticker-info中的公司名搜尋對應的wikipage") # urls = load_or_run( # urls_json, lambda: search_wikipage(names)) # 掃wikipedia-dump,從company的wiki-page開始抓裡面的mentions # 將新加入的mentions設為next_entities,重複抓取n次(=爬n層) # print(f"取得跟公司關聯的mentions - {depth}階層") # titles = [v.split('/')[-1].replace("_", " ") # for _, v in urls.items() if v is not None] print(f"# 連線elasticsearch(用於存放wiki-page, news-corpus)") es.connect(["es:9200"]) print(f"# 以S&P500 wikipage為起點,爬'{explore_n_wk_depth}階層'的titles,建立graph") # seedtitles = ["List of S&P 500 companies"] seedtitles = ["Wilson (company)"] try: # raise FileNotFoundError g = gt.load_graph(wk_titles_graphml) print(f"File loaded: {wk_titles_graphml}") except FileNotFoundError: print(f"File not found, create new one") g = get_wktitles_graph(seedtitles, n_depth=explore_n_wk_depth) for n in g: g.nodes[n]['mentions'] = json.dumps(g.nodes[n]['mentions'], ensure_ascii=False, default=serialize_sets) nx.write_graphml_lxml(g, wk_titles_graphml) g = gt.load_graph(wk_titles_graphml) print("# 使用完整的graph跑pagerank(為避免記憶體不足,改用graph-tool庫)") ranks = load_or_run(wk_pagerank_json, lambda: calc_pagerank(g), forcerun=True) print(f"# 挑出graph中的wiki-category,再找主要描述此category的wiki-title") def _cat_tags() -> Iterable[str]: _, wk_title, _ = zip(*ranks) cats = filter(lambda e: "Category:" in e, wk_title) # print(list(cats)) # print([c for c in cats]) tags = [es.get_corresponded_wktitles(cat_title=c) for c in cats] tags = set(itertools.chain(*tags)) # tags &= set(tags) return tags cat_tags = load_or_run(wk_cat_tags_json, lambda: _cat_tags(), forcerun=True) print(f"# 依照wk-title找尋對應的wkd-entity") # tags = ["Technology", "Internet", "Metal"] cattag_entity = get_matched_wkd_entities(cat_tags) ranks_by_tags = [] for _, wk_title, pagerank in ranks: try: e = cattag_entity[wk_title] ranks_by_tags.append((e.entity_id, e.get_enwiki_title(), e.get_label("zh"), pagerank)) except KeyError: pass print("# 將ranks存成csv格式") wkd_id, wk_title, zh_label, pagerank = zip(*ranks_by_tags) tags = wk_title df = pd.DataFrame({ 'wkd_id': wkd_id, 'wk_title': wk_title, 'zh_label': zh_label, 'pagerank': pagerank }) df.to_csv(wk_tags_pagerank_csv, index=False) return print("# 找一個ticker的tags") def get_neighbors(v: gt.Vertex, n_expands: int = 2): seeds = set([v]) traveled = set() for i in range(n_expands): nextseeds = set() for v in seeds: nextseeds |= set(v.out_neighbors()) nextseeds -= seeds traveled |= seeds seeds = nextseeds return traveled # tags = set(["joint venture"]) tickers = ["Wilson (company)"] tags_by_tickers = [] for tk in tickers: v = gt.find_vertex(g, g.vp['_graphml_vertex_id'], tk)[0] neighbors = get_neighbors(v, n_expands=2) neighbors = set([g.vp['_graphml_vertex_id'][v] for v in neighbors]) tags_by_tickers.append((tk, tags & neighbors)) print(tags_by_tickers) return print(f"tag的排序、重要度、重複性(用max_flow、n_path之類的方式)") # for tk in tickers: # neighbors = get_neighbors(tk) print(f"TODO:巡所有的news,計算mentions的詞頻") # print(f"巡所有的news,計算mentions的詞頻") # TODO: 擴展同義詞(用於flashtext) # print(f"載入S&P500,做為seed-wk-titles") # df = pd.read_csv(sp500_csv) # seedtitles = list(df['Name']) # print(f"以seed-wk-titles為起點,爬'{explore_n_wk_depth}階層'的wk-titles,建立graph") # try: # # raise FileNotFoundError # g = gt.load_graph(wk_titles_graphml) # print(f"File loaded: {wk_titles_graphml}") # except FileNotFoundError: # print(f"File not found, create new one") # g = get_wktitles_graph(seedtitles, n_depth=explore_n_wk_depth) # for n in g: # g.nodes[n]['mentions'] = json.dumps( # g.nodes[n]['mentions'], ensure_ascii=False, default=serialize_sets) # nx.write_graphml_lxml(g, wk_titles_graphml) # g = gt.load_graph(wk_titles_graphml) # print(f"僅採用{adpot_n_wk_depth}-depth的wk-titles") # vp_label = g.vp['_graphml_vertex_id'] # vp_depth = g.vp['depth'] # wktitles = [vp_label[v] # for v in g.vertices() if vp_depth[v] <= adpot_n_wk_depth] # print("掃wkd-dump,將沒有中文名、有位置claim(很可能是地點)、是人的wk-titles排除") # try: # raise FileNotFoundError # entities = WikidataJsonDump(wkd_filtered_entities_json) # filtered_wktitles = set([e.get_enwiki_title() for e in entities]) # print(f"File loaded: {wkd_filtered_entities_json}") # except FileNotFoundError: # print(f"File not found, create new one") # entities = get_matched_wkd_entities( # wktitles, wkd_dump_path=wkd_dump_json) # dump_entities_to_json(entities, wkd_filtered_entities_json) # filtered_wktitles = set([e.get_enwiki_title() for e in entities]) # print("使用完整的graph跑pagerank(為避免記憶體不足,改用graph-tool庫)") # load_or_run(wk_filtered_pagerank_json, # lambda: calc_pagerank(g, included_wktitles=filtered_wktitles), forcerun=True) return
def generate_graph(start_number=None, end_number=None, graph="new", blocks_path=os.path.dirname(os.path.realpath(__file__)), continuation=True): # Add the ability to split a number of blocks into a many graphs then combine to save # memory and prevent hangups blocks_added = [] # list of added blocks for the graph_meta.json block_file_list = [] # file, so that added blocks are ignored # #debug vars__ to be deleted debug_zero_div = [] os.chdir(blocks_path + "/blocks") if ("meta.json" in os.listdir()): print(str(len(os.listdir()) - 2) + " Blocks Found") block_file_list = natsorted(os.listdir()) print("Popped", block_file_list.pop(), block_file_list.pop()) #popping the meta.json file else: print(str(len(os.listdir())) + "Blocks Found") if graph == "new": graph = nx.MultiDiGraph() graph.add_node("Coinbase") graph_meta = {} graph_meta["blocks_added"] = blocks_added if start_number == None: start_number = int(block_file_list[0].split('_')[1].split('.')[0]) graph_meta["starting_block"] = start_number else: graph_meta["starting_block"] = start_number if end_number == None: end_number = int(block_file_list[len(block_file_list) - 1].split('_')[1].split('.')[0]) graph_meta["starting_block"] = end_number else: graph_meta["starting_block"] = end_number with open("graph_meta.json", "w+") as graph_meta_file: json.dump(graph_meta, graph_meta_file) else: with open("graph_meta.json") as graph_meta_file: graph_meta = json.load(graph_meta_file) blocks_added = graph_meta["blocks_added"] for block_file2open in block_file_list: # Make sure to remove the blocks from memory every X blocks block_number = block_file2open.split('_')[1].split('.')[0] #if graph != "new" and (block_number in blocks_added): # If block is already added # continue # and we aren't creating a # reimplemented # new graph, then the # # # print("Graphing block " + block_number) with open(block_file2open) as block_file: current_block_file = json.load(block_file) current_ntxs = current_block_file['blocks'][0]['n_tx'] current_block = current_block_file['blocks'][0]["height"] current_input_address = None current_input_value = None current_output_address = None current_output_value = None if (current_block in blocks_added) and (continuation == True): continue for tx in range(current_ntxs): # Add support for coinbase txs, # Find out what to do with segregated # witness (are there other weird tx # types ?) #note to self: nx doesn't add duplicate nodes current_transaction = current_block_file['blocks'][0]["tx"][tx] inputs = current_block_file['blocks'][0]["tx"][tx]["inputs"] outputs = current_block_file['blocks'][0]["tx"][tx]["out"] n_of_inputs = int(current_block_file['blocks'][0]["tx"][tx] ["vin_sz"]) #faster than len? n_of_outputs = int(current_block_file['blocks'][0]["tx"][tx] ["vout_sz"]) #faster than len? # it is worse because all changes in inputs need to be manually adjusted is_coinbase = False input_vals = {} output_vals = {} tx_vals_temp = [] tx_vals = [] for inputx in range(len(inputs)): try: current_input_address = inputs[inputx]["prev_out"][ "addr"] current_input_value = inputs[inputx]["prev_out"][ "value"] input_vals[current_input_address] = current_input_value graph.add_node(current_input_address) except: if (n_of_inputs == 1) and (n_of_outputs == 1): #input_vals["Coinbase"] = outputs[0]["value"] #--> which is output value is_coinbase = True graph.add_weighted_edges_from([ ("Coinbase", outputs[0]["addr"], outputs[0]["value"]) ]) else: continue # Add checks for values of difficulty vs reward # for checking authenticity and to catch simmilar txs that # are not Coinbase # No need to add Coinbase multiple times # already added when creating graph # only add inputs here then add outputs and edges together # this seems to be rather inefficient O(n^2) # Be careful not to assume seg witness et al are # coinbase just because there might not be an input address # or an input value #Adding input address to graph, doesn't duplicate if is_coinbase == True: #Then skip output loop because we already added the Tx break for out in range(len(outputs)): current_output_address = outputs[out]["addr"] current_output_value = outputs[out]["value"] if current_output_address in input_vals: # remove from output list and remove the returned # change value from the transaction input_vals[ current_output_address] -= current_output_value #Removed change from tx n_of_outputs -= 1 #coinbase txs can still be recorded continue #don't add to output_vals and later to edges else: #If output is not a change tx, add node then generate edges graph.add_node(current_output_address) output_vals[ current_output_address] = current_output_value tx_vals_temp.append([ current_input_address, current_output_address, None ]) #if n_of_outputs <= 0: # If it is one of the weird reorganizing txs # continue # then skip (refer to block 546) try: current_input_value = current_input_value / n_of_outputs except ZeroDivisionError: debug_zero_div.append((block_number, tx)) print( "alarm ____________________________________________ zero division error" ) for tx_tuple in tx_vals: tx_tuple[2] = current_input_value tx_tuple = tuple(tx_tuple) tx_vals.append(tx_tuple) graph.add_weighted_edges_from(tx_vals) blocks_added.append(block_number) print("-----Nodes in graph: " + str(graph.number_of_nodes())) print("-----Edges in graph: " + str(graph.number_of_edges())) print("Zero div list ", debug_zero_div) #The edge generating loop O(n^2) print("writing edgelist please wait as this might take a while") #Modify nx to add status updates in the write process? nx.readwrite.edgelist.write_edgelist( graph, "../" + str(start_number) + "to" + str(end_number) + ".edgelist") print("Edgelist written") print("writing graphML please wait as this might take a while") nx.write_graphml_lxml( graph, "../" + str(start_number) + "to" + str(end_number) + ".graphml")
iterations = int((percent / 100) * len(list(G.edges))) for x in range(iterations): edges = sorted(list(G.edges)) non_edges = list(nx.non_edges(G)) chosen_non_edge = random.choice(non_edges) print('Chosen edge to add: ', chosen_non_edge) print('List of current edges: ', edges) G.add_edge(*chosen_non_edge) print('Total edge count end: ', len(list(G.edges))) for x in range(10): G = nx.read_graphml('hiv.graphml') remove_percent_edges(G, 1) nx.write_graphml_lxml(G, "graphs/hiv-remove-1-" + str(x + 1) + ".graphml") for x in range(10): G = nx.read_graphml('hiv.graphml') remove_percent_edges(G, 5) nx.write_graphml_lxml(G, "graphs/hiv-remove-5-" + str(x + 1) + ".graphml") for x in range(10): G = nx.read_graphml('hiv.graphml') remove_percent_edges(G, 10) nx.write_graphml_lxml(G, "graphs/hiv-remove-10-" + str(x + 1) + ".graphml") for x in range(10): G = nx.read_graphml('hiv.graphml') remove_percent_edges(G, 1) nx.write_graphml_lxml(G, "graphs/hiv-add-1-" + str(x + 1) + ".graphml")
C_2005_2013_btwness = C_2005_2013 C_2005_2013_sprding = C_2005_2013 C_2007_2015_eigenvec = C_2007_2015 C_2007_2015_harmnic = C_2007_2015 C_2007_2015_btwness = C_2007_2015 C_2007_2015_sprding = C_2007_2015 C_2010_2018_eigenvec = C_2010_2018 C_2010_2018_harmnic = C_2010_2018 C_2010_2018_btwness = C_2010_2018 C_2010_2018_sprding = C_2010_2018 # graph-tool conversions C_2010_2018_gt = C_2010_2018 nx.write_graphml_lxml(C_2010_2018_gt, "mea_2018_gt.graphml") gt_2018_univ = graph_tool.load_graph("mea_2018_gt.graphml", fmt='auto', ignore_vp=None, ignore_ep=None, ignore_gp=None) # extra C_2010_2018_eigenvec_gt = C_2010_2018 C_2010_2018_harmnic_gt = C_2010_2018 # Eigenvector centrality graph-tool nx.write_graphml_lxml(C_2010_2018_eigenvec_gt, "mea_eigen_ctrlty_2018_gt.graphml") gt_eigen_2018_new = graph_tool.load_graph("mea_eigen_ctrlty_2018_gt.graphml", fmt='auto', ignore_vp=None, ignore_ep=None, ignore_gp=None) # harmonic centrality graph-tool nx.write_graphml_lxml(C_2010_2018_harmnic_gt, "mea_harmnic_ctrlty_2018_gt.graphml") gt_harmnic_2018_new = graph_tool.load_graph("mea_harmnic_ctrlty_2018_gt.graphml", fmt='auto', ignore_vp=None, ignore_ep=None, ignore_gp=None) # Eigenvector centrality
sys.path.append('/scratch/kzltin001/sabcom/') from SABCoModel import * # load parameters with open('parameters.json') as json_file: parameters = json.load(json_file) # load neighbourhood data with open('neighbourhood_data.json') as json_file: neighbourhood_data = json.load(json_file) # Monte Carlo simulation pos = int(os.getenv('SLURM_ARRAY_TASK_ID')) seed = pos # initialization environment = EnvironmentNetwork(seed, parameters, neighbourhood_data) # running the simulation runner = Runner() runner.baseline(environment, seed) # save network if not parameters["high_performance"]: for idx, network in enumerate(environment.infection_states): for i, node in enumerate(network.nodes): network.nodes[i]['agent'] = network.nodes[i]['agent'].status nx.write_graphml_lxml( network, "measurement/" + str(pos) + "/network_time{}.graphml".format(idx))
def make_classyfire_network( classyfire_obo_filename='ChemOnt_2_1.obo', nodetable_filename='classyfire_chemont_nodetable.tab', network_filename="classyfire_ontology_network_template.graphml", parentchild_filename='classyfire_chemont_network.tab', save=True): from copy import deepcopy import pandas as pd import numpy as np import networkx as nx with open(classyfire_obo_filename, 'r') as fid: chemont = fid.read() chemont = chemont.split('[Term]') chemont = [c.strip() for c in chemont] chemont.pop(0) #remove header print(('There are %d entries' % len(chemont))) # make an empty dict that has all possible chemont terms attributes = {} for c in chemont: for a in c.split('\n'): attributes[a.split(': ')[0]] = np.nan chemont_df = [] for c in chemont: chemont_df.append(deepcopy(attributes)) chemont_df[-1]['synonym'] = '' for a in c.split('\n'): split_str = a.split(': ') attr = split_str[0] value = split_str[-1] # there are many synonyms for each entry. make delimited list if attr in ['synonym', 'xref']: chemont_df[-1][attr] = '%s; %s' % (chemont_df[-1]['synonym'], value) if ' ! ' in value: chemont_df[-1][attr] = value.split(' ! ')[0].strip() else: chemont_df[-1][attr] = value.strip() chemont_df = pd.DataFrame(chemont_df) if save is True: chemont_df[['id', 'is_a']].to_csv(parentchild_filename, index=None, sep='\t') chemont_info = chemont_df[[ 'id', 'name', 'def', 'synonym', 'xref', 'alt_id', 'comment' ]] chemont_info = chemont_info.drop_duplicates(['id', 'name']) chemont_info.rename(columns={'name': 'ontology_name'}, inplace=True) if save is True: chemont_info.to_csv(nodetable_filename, index=None, sep='\t') G = nx.from_pandas_edgelist(chemont_df, 'is_a', 'id') nx.set_node_attributes(G, chemont_info.set_index('id').to_dict('index')) G.remove_node('CHEMONTID:0000000') if save is True: nx.write_graphml_lxml(G, network_filename) return G
def write_graphml(self, file_name): nx.write_graphml_lxml(self, file_name)
from examples.coviidnetwork.src.environment import Environment from examples.coviidnetwork.src.runner import Runner import networkx as nx args = ["configs/environments/", "config_coviid", "log/", 1] # initialization environment_directory = str(args[0]) identifier = str(args[1]) log_directory = str(args[2]) runs = args[3] # Monte Carlo Simulations for i in range(runs): # initialize environment and runner from files environment = Environment(environment_directory, identifier, seed=i) runner = Runner(environment) # do the run runner.do_run(environment, seed=i) # save network for idx, network in enumerate(environment.infection_states): for idx2, node in enumerate(network.nodes): network.nodes[idx2]['agent'] = network.nodes[idx2]['agent'].status nx.write_graphml_lxml( network, "measurements/{}-network_time{}.graphml".format(i, idx))
# load neighbourhood data with open('parameters/lock_down/neighbourhood_data.json') as json_file: neighbourhood_data = json.load(json_file) # load age data age_distribution = pd.read_csv('age_dist.csv', sep=';', index_col=0) age_distribution_per_ward = dict(age_distribution.transpose()) # Monte Carlo simulation for seed in range(parameters['monte_carlo_runs']): # make new folder for seed, if it does not exist if not os.path.exists('measurement/lockdown/seed{}'.format(seed)): os.makedirs('measurement/lockdown/seed{}'.format(seed)) # initialization environment = EnvironmentNetwork(seed, parameters, neighbourhood_data, age_distribution_per_ward) # running the simulation runner = Runner() runner.lock_down(environment, seed) # save network if not parameters["high_performance"]: for idx, network in enumerate(environment.infection_states): for i, node in enumerate(network.nodes): network.nodes[i]['agent'] = network.nodes[i]['agent'].status idx_string = '{0:04}'.format(idx) nx.write_graphml_lxml(network, "measurement/lockdown/seed{}/network_time{}.graphml".format(seed, idx_string))
def dump_graph(_graph, out='transactions'): """ Dump networkx graph to graphml lxml """ nx.write_graphml_lxml(_graph, f'output/{out}.graphml')