def formatNetwork2(filePath): network = linkpred.read_network(filePath) G = nx.convert_node_labels_to_integers(network, 1, "default", "label") jsonNodeArray = [] jsonEdgeArray = [] for node in json.loads(nx.jit_data(G)): authorName = node['data']['label'] authorId = node['data']['id'] jsonNodeArray.append({ "id": int(authorId), "label": authorName }) if 'adjacencies' in node: edges = node['adjacencies'] for edge in edges: jsonEdgeArray.append({ "from": int(authorId), "to": int(edge['nodeTo']), "value": int(edge['data']['weight']), "edgeID": str(authorId)+'to'+str(edge['nodeTo']) }) jsonNetwork = { "nodes": jsonNodeArray, "edges": jsonEdgeArray, } return jsonNetwork
def test_read_network(): with temp_file(suffix=".net") as fname: with open(fname, "w") as fh: fh.write("""*vertices 2 1 "A" 2 "B" *arcs 2 1 2 2 1""") expected = nx.DiGraph() expected.add_edges_from([("A", "B"), ("B", "A")]) G = linkpred.read_network(fname) assert_equal(set(G.edges()), set(expected.edges())) with open(fname) as fh: G = linkpred.read_network(fname) assert_equal(set(G.edges()), set(expected.edges()))
def hit(rate, batch, option): timestramp1 = datetime.now() in_file = open('./data/dbpedia50/smallScaleTrain' + str(rate) + '.txt', 'r') in_file2 = open('./data/dbpedia50/notSimpleData' + str(rate) + '.txt', 'r') triples = [] for line in in_file: inputs = line.strip().split('\t') e1 = inputs[0] r = inputs[1] e2 = inputs[2] triples.append([e1, r, e2]) notInTriples = [] for line in in_file2: inputs = line.strip().split('\t') e1 = inputs[0] r = inputs[1] e2 = inputs[2] notInTriples.append([e1, r, e2]) G = linkpred.read_network('./data/dbpedia50_' + str(rate) + '.net') timestramp2 = datetime.now() load_data_time = timestramp2 - timestramp1 timestramp1 = datetime.now() predictor = getPredictor(option, G) results = predictor.predict() timestramp2 = datetime.now() rooted_pagerank_time = timestramp2 - timestramp1 timestramp1 = datetime.now() batchList_without_check = get_batch_without_check(results, batch) hitRate_without_check, hitRate_without_check2 = getHitRate( batchList_without_check, notInTriples) timestramp2 = datetime.now() top_check_time = timestramp2 - timestramp1 out_file = open('./log.txt', 'a') out_file.write('\n') out_file.write('rate: ' + rate + ' batch: ' + str(batch) + ' option ' + option + '\n') out_file.write('load_time: ' + str(load_data_time) + '\n') out_file.write('rooted_pagerank_time: ' + str(rooted_pagerank_time) + '\n') out_file.write('top_check_time: ' + str(top_check_time) + '\n') out_file.write('The hit rate of ' + option + ' is ' + str(hitRate_without_check) + ' ' + str(hitRate_without_check2) + '\n') out_file.close()
def remove(): global H global filename_path log = logging.getLogger(__name__) # recherche G = linkpred.read_network(filename_path) H = G.copy() num_loops = nx.number_of_selfloops(G) if num_loops: sentence = ("Network contains {} self-loops. " "Removing..." + format(num_loops)) H.remove_edges_from(nx.selfloop_edges(G)) txt.delete(0.0, END) txt.insert(0.0, sentence)
def downloadAsCSV(): data = request.json["data"] filename = data['filename'] G = linkpred.read_network(os.path.join( app.config['UPLOAD_FOLDER'], filename)) H = G.copy() num_loops = nx.number_of_selfloops(G) if num_loops: H.remove_edges_from(nx.selfloop_edges(G)) G = nx.convert_node_labels_to_integers(H, 1, "default", "label") for newConnection in data['newConnections']: source = G.nodes[int(newConnection['from'])]['label'] target = G.nodes[int(newConnection['to'])]['label'] H.add_edge(source, target, weight=1.0) path = app.config['UPLOAD_FOLDER'] + filename+'FutureLinks'+'.csv' nx.write_pajek(H, path) return send_file(path)
''' CPU(intel xeon cpu e5-2673 v3 @ 2.40ghz), 8GB RAM, Windows OS Python==3.6.8, Stellargraph==1.2.1, Tensorflow==2.1.0, linkpred==0.5.1. ''' import linkpred G_train = linkpred.read_network("train1.net") # train dataset G_entire = linkpred.read_network("whole.net") # entire dataset test = G_entire training = G_train test.remove_edges_from(training.edges()) # create testset ###### 1. Jaccard jaccard = linkpred.predictors.Jaccard(training, excluded=training.edges()) # train model based on jaccard index jaccard_results = jaccard.predict() # predict testset based on jaccard index test_set = set(linkpred.evaluation.Pair(u,v) for u,v in test.edges()) evaluation1 = linkpred.evaluation.EvaluationSheet(jaccard_results, test_set) # compare predicted value with real value p1 = evaluation1.precision() f1 = evaluation1.f_score() r1 = evaluation1.recall() print("Jaccard p : ",p1[np.argmax(f1)]) print("Jaccard r : ",r1[np.argmax(f1)]) # extract precision and recall when f1-score is optimal print("Jaccard f1 : ",max(f1)) # extract optimal f1-score ###### 2. CommonNeighbours CN = linkpred.predictors.CommonNeighbours(training, excluded=training.edges()) # train model based on Common Neighbours CN_results = CN.predict() # predict testset based on Common Neighbours test_set = set(linkpred.evaluation.Pair(u,v) for u,v in test.edges()) # compare predicted value with real value
def test_read_unknown_network_type(): fd, fname = tempfile.mkstemp(suffix=".foo") with assert_raises(linkpred.exceptions.LinkPredError): linkpred.read_network(fname) os.close(fd) os.unlink(fname)
def test_read_unknown_network_type(): with temp_file(suffix=".foo") as fname: linkpred.read_network(fname)
def view_file(): filename = request.args.get('filename') if Stuff.query.filter_by(title=filename).first(): initialGraphJson = formatNetwork2( os.path.join(app.config['UPLOAD_FOLDER'], filename)) G = linkpred.read_network( os.path.join(app.config['UPLOAD_FOLDER'], filename)) H = G.copy() num_loops = nx.number_of_selfloops(G) if num_loops: H.remove_edges_from(nx.selfloop_edges(G)) CommonNeighbours = mypred.predictors.CommonNeighboursGF( H, excluded=H.edges()) CommonNeighbours_results = CommonNeighbours.predict() top = CommonNeighbours_results.top() sentence = [] sentenceunsorted = [] newLinks = [] jsonDict = [] # resultsList = [] G = nx.convert_node_labels_to_integers(H, 1, "default", "label") CommonNeighboursG = mypred.predictors.CommonNeighboursGF( G, excluded=G.edges()) CommonNeighbours_resultsG = CommonNeighboursG.predict() topG = CommonNeighbours_resultsG.top() for authors, score in topG.items(): authorsArray = [authors[0], authors[1]] common = intersection(list(G.neighbors(authors[0])), list(G.neighbors(authors[1]))) + authorsArray subG = G.subgraph(common) cngfScore = 0 for nodeID, nodeInfo in subG.nodes(data=True): if nodeID not in authorsArray: cngfScore = cngfScore + \ (subG.degree[nodeID] / math.log10(G.degree[nodeID])) authorOne = G.nodes[authorsArray[1]] authorTwo = G.nodes[authorsArray[0]] sentenceunsorted.append({ "text": authorOne['label'] + " - " + authorTwo['label'] + " le score est :" + str(cngfScore), "score": cngfScore }) newLinks.append({ "from": authorOne['id'], "to": authorTwo['id'], "value": float(1.0), "authOne": authorOne, "authTwo": authorTwo, "score": float("{:.2f}".format(cngfScore)) }) for s in sentenceunsorted: sentence.append(s['text']) for authors, score in top.items(): jsonDict.append({ "authorSource": str(authors).split(' - ')[0], "authorDest": str(authors).split(' - ')[1], "score": cngfScore }) return render_template('viewGraph.html', newLinks=newLinks, predictions=sentence, data=initialGraphJson, filename=filename) else: abort(404)
import linkpred from datetime import datetime G = linkpred.read_network("./data/Freebase13.net") a = datetime.now() neighbour_rank1 = linkpred.predictors.CommonNeighbours(G, excluded=G.edges()) neighbour_rank_results1 = neighbour_rank1.predict() b = datetime.now() print("The time of CommonNeighbores: " + str(b - a)) a = datetime.now() neighbour_rank2 = linkpred.predictors.AdamicAdar(G, excluded=G.edges()) neighbour_rank_results2 = neighbour_rank2.predict() b = datetime.now() print("The time of AdamicAdar: " + str(b - a)) a = datetime.now() neighbour_rank3 = linkpred.predictors.Jaccard(G, excluded=G.edges()) neighbour_rank_results3 = neighbour_rank3.predict() b = datetime.now() print("The time of Jaccard: " + str(b - a)) a = datetime.now() neighbour_rank4 = linkpred.predictors.ResourceAllocation(G, excluded=G.edges()) neighbour_rank_results4 = neighbour_rank4.predict() b = datetime.now() print("The time of ResourceAllocation: " + str(b - a)) a = datetime.now() neighbour_rank5 = linkpred.predictors.DegreeProduct(G, excluded=G.edges())
def graph(): if request.method == 'POST': if 'file' not in request.files: flash('No file part') return redirect(request.url) file = request.files['file'] if file.filename == '': flash('Veillez choisir un fichier') return redirect(request.url) if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) initialGraphJson = formatNetwork2( os.path.join(app.config['UPLOAD_FOLDER'], filename)) G = linkpred.read_network( os.path.join(app.config['UPLOAD_FOLDER'], filename)) H = G.copy() num_loops = nx.number_of_selfloops(G) if num_loops: H.remove_edges_from(nx.selfloop_edges(G)) CommonNeighbours = mypred.predictors.CommonNeighboursGF( H, excluded=H.edges()) CommonNeighbours_results = CommonNeighbours.predict() top = CommonNeighbours_results.top() sentence = [] sentenceunsorted = [] newLinks = [] jsonDict = [] # resultsList = [] G = nx.convert_node_labels_to_integers(H, 1, "default", "label") CommonNeighboursG = mypred.predictors.CommonNeighboursGF( G, excluded=G.edges()) CommonNeighbours_resultsG = CommonNeighboursG.predict() topG = CommonNeighbours_resultsG.top() for authors, score in topG.items(): authorsArray = [authors[0], authors[1]] common = intersection(list(G.neighbors( authors[0])), list(G.neighbors(authors[1]))) + authorsArray subG = G.subgraph(common) cngfScore = 0 for nodeID, nodeInfo in subG.nodes(data=True): if nodeID not in authorsArray: cngfScore = cngfScore + \ (subG.degree[nodeID] / math.log10(G.degree[nodeID])) authorOne = G.nodes[authorsArray[1]] authorTwo = G.nodes[authorsArray[0]] sentenceunsorted.append({ "text": authorOne['label'] + " - " + authorTwo['label'] + " le score est :" + str(cngfScore), "score": cngfScore }) newLinks.append({ "from": authorOne['id'], "to": authorTwo['id'], "value": float(1.0), "authOne": authorOne, "authTwo": authorTwo, "score": cngfScore }) for s in sentenceunsorted: sentence.append(s['text']) for authors, score in top.items(): jsonDict.append({ "authorSource": str(authors).split(' - ')[0], "authorDest": str(authors).split(' - ')[1], "score": cngfScore }) # responseDict = {"results": jsonDict} # return json.dumps(newLinks) return render_template('generatedGraph.html', newLinks=newLinks, predictions=sentence, data=initialGraphJson, filename=filename, DL_AS_NET_URL=DL_AS_NET_URL) else: flash( "format inccorecte, veillez sélectionner un fichier .net valide " ) return redirect(request.url) return render_template('downloads.html')
def hit(rate, batch, option): timestramp1 = datetime.now() in_file = open('./data/Freebase13/smallScaleTrain' + str(rate) + '.txt', 'r') in_file2 = open('./data/Freebase13/notSimpleData' + str(rate) + '.txt', 'r') triples = [] for line in in_file: inputs = line.strip().split('\t') e1 = inputs[0] r = inputs[1] e2 = inputs[2] triples.append([e1, r, e2]) notInTriples = [] for line in in_file2: inputs = line.strip().split('\t') e1 = inputs[0] r = inputs[1] e2 = inputs[2] notInTriples.append([e1, r, e2]) G = linkpred.read_network('./data/Freebase13_' + str(rate) + '.net') timestramp2 = datetime.now() load_data_time = timestramp2 - timestramp1 timestramp1 = datetime.now() predictor = getPredictor(option, G) results = predictor.predict() timestramp2 = datetime.now() rooted_pagerank_time = timestramp2 - timestramp1 timestramp1 = datetime.now() getEntitiesSet(triples) batchList_with_check = get_batch_with_check(results, batch) hitRate_with_check, hitRate_with_check2 = getHitRate(batchList_with_check, notInTriples) timestramp2 = datetime.now() entities_cluster_check_time = timestramp2 - timestramp1 timestramp1 = datetime.now() batchList_without_check = get_batch_without_check(results, batch) hitRate_without_check, hitRate_without_check2 = getHitRate(batchList_without_check, notInTriples) timestramp2 = datetime.now() top_check_time = timestramp2 - timestramp1 out_file = open('./log.txt', 'a') out_file.write('\n') out_file.write('rate: ' + rate + ' batch: ' + str(batch) + ' option ' + option + '\n') out_file.write('load_time: ' + str(load_data_time) + '\n') out_file.write('rooted_pagerank_time: ' + str(rooted_pagerank_time) + '\n') out_file.write('entities_cluster_check_time: ' + str(entities_cluster_check_time) + '\n') out_file.write('top_check_time: ' + str(top_check_time) + '\n') out_file.write('The hit rate without check of ' + option + ' is ' + str(hitRate_without_check) + ' ' + str( hitRate_without_check2) + '\n') out_file.write('The hit rate with check of ' + option + ' is ' + str(hitRate_with_check) + ' ' + str( hitRate_with_check2) + '\n') out_file.close() results_file = open('./data/results/Freebase13_' + str(rate) + '_' + str(batch) + '_results.txt', 'w') for pair in batchList_with_check: x, y = pair results_file.write(x + '\t' + y + '\n') results_file.close()
def upload_graph(): if request.method == 'POST': if 'file' not in request.files: flash('No file part', 'netErrors') return redirect(request.url) file = request.files['file'] if file.filename == '': flash('Veillez choisir un fichier', 'netErrors') return redirect(request.url) if file and allowed_file(file.filename): filename = secure_filename(file.filename) file_save_name = datetime.utcnow().strftime( "%Y_%d_%m_%H_%M_%S_") + filename file_path = os.path.join(app.config['UPLOAD_FOLDER'], file_save_name) file.save(file_path) originalFile = Stuff(title=file_save_name, type="net", user=current_user) db.session.add(originalFile) initialGraphJson = formatNetwork2(file_path) G = linkpred.read_network(file_path) H = G.copy() num_loops = nx.number_of_selfloops(G) if num_loops: H.remove_edges_from(nx.selfloop_edges(G)) CommonNeighbours = mypred.predictors.CommonNeighboursGF( H, excluded=H.edges()) CommonNeighbours_results = CommonNeighbours.predict() top = CommonNeighbours_results.top() topAll = CommonNeighbours_results.top(0) sentence = [] sentenceunsorted = [] newLinks = [] jsonDict = [] # resultsList = [] G = nx.convert_node_labels_to_integers(H, 1, "default", "label") CommonNeighboursG = mypred.predictors.CommonNeighboursGF( G, excluded=G.edges()) CommonNeighbours_resultsG = CommonNeighboursG.predict() topG = CommonNeighbours_resultsG.top(13) for authors, score in topG.items(): authorsArray = [authors[0], authors[1]] common = intersection(list(G.neighbors( authors[0])), list(G.neighbors(authors[1]))) + authorsArray subG = G.subgraph(common) cngfScore = 0 for nodeID, nodeInfo in subG.nodes(data=True): if nodeID not in authorsArray: cngfScore = cngfScore + \ (subG.degree[nodeID] / math.log10(G.degree[nodeID])) authorOne = G.nodes[authorsArray[1]] authorTwo = G.nodes[authorsArray[0]] sentenceunsorted.append({ "text": authorOne['label'] + " - " + authorTwo['label'] + " le score est :" + str(cngfScore), "score": cngfScore }) newLinks.append({ "from": authorOne['id'], "to": authorTwo['id'], "value": float(1.0), "authOne": authorOne, "authTwo": authorTwo, "score": float("{:.4f}".format(cngfScore)) }) for s in sentenceunsorted: sentence.append(s['text']) for authors, score in topAll.items(): jsonDict.append({ "authorSource": str(authors).split(' - ')[0].encode("utf-8"), "authorDest": str(authors).split(' - ')[1].encode("utf-8"), "score": score }) # responseDict = {"results": jsonDict} # return json.dumps(newLinks) csv_columns = ['authorSource', 'authorDest', 'score'] csv_file = file_path + ".csv" try: with open(csv_file, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=csv_columns) writer.writeheader() for data in jsonDict: writer.writerow(data) CSV_results = Stuff(title=file_save_name + ".csv", type="csv", user=current_user) db.session.add(CSV_results) except IOError: print("I/O error") db.session.commit() return render_template('generatedGraph.html', newLinks=newLinks, predictions=sentence, data=initialGraphJson, filename=file_save_name) else: flash( "format inccorecte, veillez sélectionner un fichier .net valide ", 'netErrors') return redirect(request.url) return render_template('upload_graph.html', title='Upload')
import linkpred import random from matplotlib import pyplot as plt emb = input("Select the dataset" ) random.seed(100) # Read network G = linkpred.read_network(emb) # Create test network test = G.subgraph(random.sample(G.nodes(), 20)) # Exclude test network from learning phase training = G.copy() training.remove_edges_from(test.edges()) simrank = linkpred.predictors.SimRank(training, excluded=training.edges()) simrank_results = simrank.predict(c=0.5) test_set = set(linkpred.evaluation.Pair(u, v) for u, v in test.edges()) evaluation = linkpred.evaluation.EvaluationSheet(simrank_results, test_set) recall = evaluation.recall() precision = evaluation.precision() plt.clf() plt.plot(recall, precision, color='navy', label='Precision-Recall curve') plt.xlabel('Recall')
def upload_file(): if request.method == 'POST': # check if the post request has the file part if 'file' not in request.files: flash('No file part') return redirect(request.url) file = request.files['file'] # if user does not select file, browser also # submit an empty part without filename # return Response(file).get_data() if file.filename == '': flash('No selected file') return redirect(request.url) if file and allowed_file(file.filename): # return redirect(url_for('uploaded_file',filename=filename)) filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) initialGraphJson = formatNetwork2( os.path.join(app.config['UPLOAD_FOLDER'], filename)) G = linkpred.read_network( os.path.join(app.config['UPLOAD_FOLDER'], filename)) H = G.copy() num_loops = nx.number_of_selfloops(G) if num_loops: H.remove_edges_from(nx.selfloop_edges(G)) CommonNeighbours = linkpred.predictors.CommonNeighbours( H, excluded=H.edges()) CommonNeighbours_results = CommonNeighbours.predict() top = CommonNeighbours_results.top() sentence = [] sentenceunsorted = [] newLinks = [] jsonDict = [] # resultsList = [] G = nx.convert_node_labels_to_integers(H, 1, "default", "label") CommonNeighboursG = linkpred.predictors.CommonNeighbours( G, excluded=G.edges()) CommonNeighbours_resultsG = CommonNeighboursG.predict() topG = CommonNeighbours_resultsG.top(10) # for authors, score in top.items(): # sentence.append( # str(authors) + " le score est :" + str(score)) # newLinks.append({ # "from": getNodefromLabelFromJson(str(authors[0]), initialGraphJson['nodes'])['id'], # "to": getNodefromLabelFromJson(str(authors[1]), initialGraphJson['nodes'])['id'], # "value": float(1.0) # }) for authors, score in topG.items(): authorsArray = [authors[0], authors[1]] common = intersection(list(G.neighbors( authors[0])), list(G.neighbors(authors[1]))) + authorsArray subG = G.subgraph(common) cngfScore = 0 for nodeID, nodeInfo in subG.nodes(data=True): if nodeID not in authorsArray: cngfScore = cngfScore + \ (subG.degree[nodeID] / math.log10(G.degree[nodeID])) authorOne = G.nodes[authorsArray[1]] authorTwo = G.nodes[authorsArray[0]] sentenceunsorted.append({ "text": authorOne['label'] + " - " + authorTwo['label'] + " le score est :" + str(cngfScore), "score": cngfScore }) newLinks.append({ "from": authorOne['id'], "to": authorTwo['id'], "value": float(1.0), "authOne": authorOne, "authTwo": authorTwo, "score": cngfScore }) # return json.dumps(newLinks) # newLinks.append([str(authors[0]), str(authors[1])]) newLinks = sorted(newLinks, key=lambda i: i['score'], reverse=True) sentenceunsorted = sorted(sentenceunsorted, key=lambda i: i['score'], reverse=True) for s in sentenceunsorted: sentence.append(s['text']) for authors, score in top.items(): jsonDict.append({ "authorSource": str(authors).split(' - ')[0], "authorDest": str(authors).split(' - ')[1], "score": cngfScore }) # responseDict = {"results": jsonDict} # return json.dumps(newLinks) return render_template('generatedGraph.html', newLinks=newLinks, predictions=sentence, data=initialGraphJson, filename=filename) else: flash( "format inccorecte, veillez sélectionner un fichier .net valide " ) return redirect(request.url) return render_template('downloads.html')
def test_read_unknown_network_type(): with temp_file(suffix=".foo") as fname: with pytest.raises(linkpred.exceptions.LinkPredError): linkpred.read_network(fname)