def calculatingWeights(graph, nodesnotLinked, database, calculatingFile): pdb = Base(calculatingFile) pdb.create('node1', 'node2', 'WCNFI','WAAFI') pdb.create_index('node1', 'node2') element = 0 qtyofNodesToProcess = len(nodesnotLinked) for pair in nodesnotLinked: element = element+1 FormatingDataSets.printProgressofEvents(element, qtyofNodesToProcess, "Calculating features for nodes not liked: ") neighbors_node1 = all_neighbors(graph, pair[0]) neighbors_node2 = all_neighbors(graph, pair[1]) len_neihbors_node1 = len(neighbors_node1) len_neihbors_node2 = len(neighbors_node2) CommonNeigbors = neighbors_node1.intersection(neighbors_node2) WCNFI = 0; WAAFI = 0; for cn in CommonNeigbors: item = get_partOfWeightCalculating(graph, database, pair, cn) WCNFI = WCNFI + item['WCN']; WAAFI = WAAFI + item['WAA']; pdb.insert(str(pair[0]), str(pair[1]), WCNFI, WAAFI ) pdb.commit() return pdb;
def __init__(self, preparedParameters, filePathResults, filePathAnalyseResult, topRank): print "Starting Analysing the results", datetime.today() absFilePath = filePathResults absfilePathAnalyseResult = filePathAnalyseResult #FormatingDataSets.get_abs_file_path(filePathAnalyseResult) fResult = open(absFilePath, 'r') with open(absfilePathAnalyseResult, 'w') as fnodes: self.success = 0 element = 0 for line in fResult: element = element+1 FormatingDataSets.printProgressofEvents(element, topRank, "Analysing the results: ") cols = line.strip().replace('\n','').split('\t') if len(list(networkx.common_neighbors(preparedParameters.testGraph, cols[len(cols)-2] , cols[len(cols)-1] ))) != 0: self.success = self.success + 1 fnodes.write(cols[len(cols)-2] + '\t' + cols[len(cols)-1] + '\t' + 'SUCCESS \r\n') else: fnodes.write(cols[len(cols)-2] + '\t' + cols[len(cols)-1] + '\t' + 'FAILED \r\n') if element == topRank: break result = float(self.success) / float(topRank) *100 strResult = 'Final Result: \t' + str(result) + '%' fnodes.write(strResult) fnodes.write('\n#\t'+str(self.success)) fnodes.close() print "Analysing the results finished", datetime.today()
def get_pair_nodes_not_linked(self, graph, file, min_papers): print "Starting getting pair of nodes that is not liked", datetime.today( ) results = [] nodesinGraph = set(n for n, d in graph.nodes(data=True) if d['node_type'] == 'N') currentNodes = set() for n in nodesinGraph: papers = set(networkx.all_neighbors(graph, n)) print papers if (len(papers) >= min_papers): currentNodes.add(n) print 'qty of authors: ', len(currentNodes) nodesOrdered = sorted(currentNodes) element = 0 totalnodesOrdered = len(nodesOrdered) for node1 in nodesOrdered: element = element + 1 FormatingDataSets.printProgressofEvents( element, totalnodesOrdered, "Checking Node not liked: ") others = set(n for n in nodesOrdered if n > node1) notLinked = set() for other_node in others: if len(set(networkx.common_neighbors(graph, node1, other_node))) == 0: #notLinked.add(other_node) # como estava antes # esse if abaixo verifica se estao perto if networkx.has_path(graph, node1, other_node): tamanho_caminho = len( networkx.shortest_path(graph, node1, other_node)) - 2 #print "%s ate %s: %s" %(node1, other_node,tamanho_caminho) #print repr(networkx.shortest_path(graph, node1, other_node)); if (tamanho_caminho > 0) and (tamanho_caminho <= self.MAX_NUMBER_OF_PEOPLE_BETWEEN * 2 + 1): # -2 porque inclui o inicio e fim print "adicionando %s - %s" % (node1, other_node) notLinked.add(other_node) if len(notLinked) > 0: results.append([node1, notLinked]) if element % 2000 == 0: for item in results: file.write(str(item[0]) + '\t' + repr(item[1]) + '\n') results = [] for item in results: file.write(str(item[0]) + '\t' + repr(item[1]) + '\n') results = [] print "getting pair of nodes that is not liked finished", datetime.today( )
def readingOrginalDataset(self): print "Starting Reading Original Dataset", datetime.today() with open(self.OriginalDataSet) as f: self.OrignalContent = f.readlines() f.close() articleid = 0 articles = [] authornames = [] authorofArticles = [] authors = [] article = None element = 0 for line in self.OrignalContent: element = element + 1 FormatingDataSets.printProgressofEvents( element, len(self.OrignalContent), "Reading File Content to Generate Graph: " ) line = line.strip() if line.startswith("#*"): articleid = articleid + 1 article = Article("p_" + str(articleid)) article.articlename = line.replace("#*", "").replace("\r\n", "") if line.startswith("#t"): article.time = line.replace("#t", "").replace("\r\n", "") if line.startswith("#@"): authorsofArticle = line.replace("#@", "").replace("\r\n", "").split(",") for author in authorsofArticle: author = author.strip() if not author in authornames: authornames.append(author) articleauthor = AuthorInArticle(article.articleid, authornames.index(author) + 1) authorofArticles.append(articleauthor) if line.startswith("#!"): articles.append(article) for index in range(len(authornames)): author = Author(index + 1, authornames[index]) authors.append(author) self.Graph = networkx.Graph() for item_article in articles: self.Graph.add_node( item_article.articleid, {"node_type": "E", "title": item_article.articlename.decode("latin_1"), "time": int(item_article.time)}, ) for item_author in authors: self.Graph.add_node( int(item_author.authorid), {"node_type": "N", "name": item_author.name.decode("latin_1")} ) for item_edge in authorofArticles: self.Graph.add_edge(item_edge.articleid, int(item_edge.authorid)) print "Reading Original Dataset finished", datetime.today()
def readingOrginalDataset(self): print "Starting Reading Original Dataset", datetime.today() con = None try: con = psycopg2.connect(database='projetomestrado', user='******', password='******') curPublicacao = con.cursor() curPublicacao.execute("select distinct p.idpublicacao, p.titulo, p.ano from projetomestrado.publicacao p inner join projetomestrado.autorpublicacao a on a.idpublicacao = p.idpublicacao where a.idautor in (select idautor from projetomestrado.autor where afiliacao = 'Instituto Militar de Engenharia')") curPublicacaoData = curPublicacao.fetchall() element = 0 qty = len(curPublicacaoData) print qty for linha in curPublicacaoData: element = element+1 FormatingDataSets.printProgressofEvents(element, qty, "Adding paper to new graph: ") idpublicacao = linha[0] curPublicacaoPalavras = con.cursor() curPublicacaoPalavras.execute("select k.keyword from projetomestrado.keyword k inner join projetomestrado.publicacaokeyword pk on pk.idkeyword = k.idkeyword where pk.idpublicacao =" + str(idpublicacao)) palavras = [] for palavra in curPublicacaoPalavras.fetchall(): palavras.append(palavra[0].strip()) curAutores = con.cursor() curAutores.execute("select a.idautor, a.primeironome, a.ultimonome from projetomestrado.autorpublicacao ap inner join projetomestrado.autor a on a.idautor = ap.idautor where ap.idpublicacao = "+ str(idpublicacao)) autores = [] for autor in curAutores.fetchall(): autores.append([autor[0], autor[1] + "," + autor[2]]) self.Publications.append([idpublicacao, linha[1], linha[2], palavras, autores ]) self.Graph = networkx.Graph() for item_article in self.Publications: self.Graph.add_node('P_' + str(item_article[0]), {'node_type' : 'E', 'title' : item_article[1].decode("latin_1"), 'time' : int(item_article[2]), 'keywords': str(item_article[3]) }) for item_autor in item_article[4]: self.Graph.add_node(int(item_autor[0]), {'node_type' : 'N', 'name' : item_autor[1].decode("latin_1") }) self.Graph.add_edge('P_' + str(item_article[0]), int(item_autor[0]) ) print "Reading Original Dataset finished", datetime.today() except psycopg2.DatabaseError, e: print 'Error %s' % e
def calculatingInputToFuzzy(graph, nodesnotLinked, params): result = [] #pdb = Base(calculatingFile) #pdb.create('node1', 'node2', 'IntensityNode1', 'IntencityNode2' ,'Similarity','AgesNode1', 'AgesNode2') #pdb.create_index('node1', 'node2') element = 0 qtyofNodesToProcess = len(nodesnotLinked) for pair in nodesnotLinked: element = element+1 FormatingDataSets.printProgressofEvents(element, qtyofNodesToProcess, "Calculating features for nodes not liked: ") neighbors_node1 = all_neighbors(graph, pair[0]) neighbors_node2 = all_neighbors(graph, pair[1]) len_neihbors_node1 = len(neighbors_node1) len_neihbors_node2 = len(neighbors_node2) CommonNeigbors = neighbors_node1.intersection(neighbors_node2) IntensityNode1 = 0; IntensityNode2 = 0; Similarities = 0; Similarity = 0; AgesNode1 = 0; AgesNode2 = 0; for cn in CommonNeigbors: infoNode1 = list(edge for n1, n2, edge in graph.edges([ pair[0], cn], data=True) if ((n1 == pair[0] and n2 == cn) or (n1 == cn and n2 == pair[0])) ) infoNode2 = list(edge for n1, n2, edge in graph.edges([pair[1], cn], data=True) if ((n1 == pair[1] and n2 == cn) or (n1 == cn and n2 == pair[1])) ) IntensityNode1 = IntensityNode1 + len(infoNode1) IntensityNode2 = IntensityNode2 + len(infoNode2) MaxTimeNode1 = max(info['time'] for info in infoNode1 if 1==1) MaxTimeNode2 = max(info['time'] for info in infoNode2 if 1==1) AgesNode1 = max(AgesNode1,MaxTimeNode1) AgesNode2 = max(AgesNode2,MaxTimeNode1) bagofWordsNode1 = list(info['keywords'] for info in infoNode1 if 1==1) bagofWordsNode2 = list(info['keywords'] for info in infoNode2 if 1==1) Similarities = Similarities + get_jacard_domain(bagofWordsNode1, bagofWordsNode2) AgesNode1 = abs(params.t0_ - AgesNode1) AgesNode2 = abs(params.t0_ - AgesNode2) if len(CommonNeigbors) > 0: Similarity = (Similarities / len(CommonNeigbors)) *100 result.append({ 'no1': str(pair[0]), 'no2' :str(pair[1]), 'intensityno1' : IntensityNode1,'intensityno2' : IntensityNode2, 'similarity' : Similarity, 'ageno1' : AgesNode1, 'ageno2' :AgesNode2 }) return result
def calculatingWeights(graph, nodesnotLinked, database, calculatingFile): pdb = Base(calculatingFile) pdb.create('node1', 'node2', 'cnWTS02', 'cnWTS05', 'cnWTS08', 'aaWTS02', 'aaWTS05', 'aaWTS08') pdb.create_index('node1', 'node2') element = 0 qtyofNodesToProcess = len(nodesnotLinked) for pair in nodesnotLinked: element = element + 1 FormatingDataSets.printProgressofEvents( element, qtyofNodesToProcess, "Calculating features for nodes not liked: ") neighbors_node1 = all_neighbors(graph, pair[0]) neighbors_node2 = all_neighbors(graph, pair[1]) len_neihbors_node1 = len(neighbors_node1) len_neihbors_node2 = len(neighbors_node2) CommonNeigbors = neighbors_node1.intersection(neighbors_node2) CNWts02Feature = 0 CNWts05Feature = 0 CNWts08Feature = 0 AAWts02Feature = 0 AAWts05Feature = 0 AAWts08Feature = 0 CNWJCFeature = 0 AAWJCFeature = 0 for cn in CommonNeigbors: item = get_partOfWeightCalculating(graph, database, pair, cn) CNWts02Feature = CNWts02Feature + item['cnWts02'] CNWts05Feature = CNWts05Feature + item['cnWts05'] CNWts08Feature = CNWts08Feature + item['cnWts08'] AAWts02Feature = AAWts02Feature + item['aaWts02'] AAWts05Feature = AAWts05Feature + item['aaWts05'] AAWts08Feature = AAWts08Feature + item['aaWts08'] #CNWJCFeature = CNWJCFeature + item['cnWJC']; #AAWJCFeature = AAWJCFeature + item['aaWJC']; pdb.insert(str(pair[0]), str(pair[1]), CNWts02Feature, CNWts05Feature, CNWts08Feature, AAWts02Feature, AAWts05Feature, AAWts08Feature) pdb.commit() return pdb
def calculatingWeights(graph, nodesnotLinked, database, calculatingFile): pdb = Base(calculatingFile) pdb.create('node1', 'node2', 'cnWTS02','cnWTS05','cnWTS08', 'aaWTS02', 'aaWTS05', 'aaWTS08') pdb.create_index('node1', 'node2') element = 0 qtyofNodesToProcess = len(nodesnotLinked) for pair in nodesnotLinked: element = element+1 FormatingDataSets.printProgressofEvents(element, qtyofNodesToProcess, "Calculating features for nodes not liked: ") neighbors_node1 = all_neighbors(graph, pair[0]) neighbors_node2 = all_neighbors(graph, pair[1]) len_neihbors_node1 = len(neighbors_node1) len_neihbors_node2 = len(neighbors_node2) CommonNeigbors = neighbors_node1.intersection(neighbors_node2) CNWts02Feature = 0; CNWts05Feature = 0; CNWts08Feature = 0; AAWts02Feature = 0; AAWts05Feature = 0; AAWts08Feature = 0; CNWJCFeature = 0; AAWJCFeature = 0; for cn in CommonNeigbors: item = get_partOfWeightCalculating(graph, database, pair, cn) CNWts02Feature = CNWts02Feature + item['cnWts02']; CNWts05Feature = CNWts05Feature + item['cnWts05']; CNWts08Feature = CNWts08Feature + item['cnWts08']; AAWts02Feature = AAWts02Feature + item['aaWts02']; AAWts05Feature = AAWts05Feature + item['aaWts05']; AAWts08Feature = AAWts08Feature + item['aaWts08']; #CNWJCFeature = CNWJCFeature + item['cnWJC']; #AAWJCFeature = AAWJCFeature + item['aaWJC']; pdb.insert(str(pair[0]), str(pair[1]), CNWts02Feature, CNWts05Feature, CNWts08Feature, AAWts02Feature, AAWts05Feature, AAWts08Feature ) pdb.commit() return pdb;
def __init__(self, preparedParameters, filePathResults, filePathAnalyseResult, topRank): print "Starting Analysing the results", datetime.today() absFilePath = filePathResults absfilePathAnalyseResult = filePathAnalyseResult #FormatingDataSets.get_abs_file_path(filePathAnalyseResult) fResult = open(absFilePath, 'r') with open(absfilePathAnalyseResult, 'w') as fnodes: self.success = 0 element = 0 for line in fResult: element = element + 1 FormatingDataSets.printProgressofEvents( element, topRank, "Analysing the results: ") cols = line.strip().replace('\n', '').split('\t') if len( list( networkx.common_neighbors( preparedParameters.testGraph, cols[len(cols) - 2], cols[len(cols) - 1]))) != 0: self.success = self.success + 1 fnodes.write(cols[len(cols) - 2] + '\t' + cols[len(cols) - 1] + '\t' + 'SUCCESS \r\n') else: fnodes.write(cols[len(cols) - 2] + '\t' + cols[len(cols) - 1] + '\t' + 'FAILED \r\n') if element == topRank: break result = float(self.success) / float(topRank) * 100 strResult = 'Final Result: \t' + str(result) + '%' fnodes.write(strResult) fnodes.write('\n#\t' + str(self.success)) fnodes.close() print "Analysing the results finished", datetime.today()
def get_pair_nodes_not_linked(self): print "Starting getting pair of nodes that is not liked", datetime.today() results = [] nodesinGraph =self.graph.nodes() nodesOrdered = sorted(nodesinGraph) totalnodesOrdered = len(nodesOrdered) element = 0 for node in nodesOrdered: element = element+1 FormatingDataSets.printProgressofEvents(element, totalnodesOrdered, "Checking Node not liked: ") publicacoes = self.graph.edges(node,data=False) qtdepublicacoes = len(publicacoes) #print "O autor e seus papers ",node,qtdepublicacoes ,publicacoes if (qtdepublicacoes >= self.min_papers): others = set(n for n in nodesOrdered if n > node) for otherNode in others: other_publicacoes = self.graph.edges(otherNode,data=False) other_qtdepublicacoes = len(other_publicacoes) if (other_qtdepublicacoes >= self.min_papers): if (not self.graph.has_edge(node, otherNode)): if self.USE_MAX_NUMBER_OF_PEOPLE_BETWEEN == True: if networkx.has_path(self.graph, node, otherNode): shortestPathResult = networkx.shortest_path(self.graph, node, otherNode) #print shortestPathResult tamanho_caminho = len(shortestPathResult) - 1 #print "%s ate %s: %s" %(node1, other_node,tamanho_caminho) #print repr(networkx.shortest_path(graph, node1, other_node)); if ( tamanho_caminho > 0 ) and (tamanho_caminho <= self.MAX_NUMBER_OF_PEOPLE_BETWEEN ): # -2 porque inclui o inicio e fim #print "adicionando %s - %s" %(node, otherNode) results.append([node, otherNode]) else: results.append([node, otherNode]) print "getting pair of nodes that is not liked finished", datetime.today() return results
def get_pair_nodes_not_linked(self, graph, file, min_papers): print "Starting getting pair of nodes that is not liked", datetime.today() results = [] nodesinGraph =set(n for n,d in graph.nodes(data=True) if d['node_type'] == 'N') currentNodes = set() for n in nodesinGraph: papers = set(networkx.all_neighbors(graph, n)) print papers if (len(papers) >= min_papers): currentNodes.add(n) print 'qty of authors: ', len(currentNodes) nodesOrdered = sorted(currentNodes) element = 0 totalnodesOrdered = len(nodesOrdered) for node1 in nodesOrdered: element = element+1 FormatingDataSets.printProgressofEvents(element, totalnodesOrdered, "Checking Node not liked: ") others = set(n for n in nodesOrdered if n > node1) notLinked = set() for other_node in others: if len(set(networkx.common_neighbors(graph, node1, other_node))) == 0: notLinked.add(other_node) results.append([node1, notLinked]) if element % 2000 == 0: for item in results: file.write(str(item[0]) + '\t' + repr(item[1]) + '\n') results = [] for item in results: file.write(str(item[0]) + '\t' + repr(item[1]) + '\n') results = [] print "getting pair of nodes that is not liked finished", datetime.today()
def readingOrginalDataset(self): print "Starting Reading Original Dataset", datetime.today() con = None try: con = psycopg2.connect(database='projetomestrado', user='******', password='******') curPublicacao = con.cursor() curPublicacao.execute( "select idpublicacao, titulo, ano from projetomestrado.publicacao where ano >= 1993 and ano <= 2000" ) curPublicacaoData = curPublicacao.fetchall() element = 0 for linha in curPublicacaoData: element = element + 1 FormatingDataSets.printProgressofEvents( element, len(curPublicacaoData), "Adding paper to new graph: ") idpublicacao = linha[0] curPublicacaoPalavras = con.cursor() curPublicacaoPalavras.execute( "select k.keyword from projetomestrado.keyword k inner join projetomestrado.publicacaokeyword pk on pk.idkeyword = k.idkeyword where pk.idpublicacao =" + str(idpublicacao)) palavras = [] for palavra in curPublicacaoPalavras.fetchall(): palavras.append(palavra[0].strip()) curAutores = con.cursor() curAutores.execute( "select a.idautor, a.primeironome, a.ultimonome from projetomestrado.autorpublicacao ap inner join projetomestrado.autor a on a.idautor = ap.idautor where ap.idpublicacao = " + str(idpublicacao)) autores = [] for autor in curAutores.fetchall(): autores.append([autor[0], autor[1] + "," + autor[2]]) self.Publications.append( [idpublicacao, linha[1], linha[2], palavras, autores]) self.Graph = networkx.Graph() for item_article in self.Publications: self.Graph.add_node( 'P_' + str(item_article[0]), { 'node_type': 'E', 'title': item_article[1].decode("latin_1"), 'time': int(item_article[2]), 'keywords': str(item_article[3]) }) for item_autor in item_article[4]: self.Graph.add_node(int(item_autor[0]), { 'node_type': 'N', 'name': item_autor[1].decode("latin_1") }) self.Graph.add_edge('P_' + str(item_article[0]), int(item_autor[0])) print "Reading Original Dataset finished", datetime.today() except psycopg2.DatabaseError, e: print 'Error %s' % e
def calculatingWeights(graph, nodesnotLinked, database, calculatingFile): pdb = Base(calculatingFile) pdb.create('node1', 'node2', 'WCNFTI01','WCNFTI02', 'WCNFTI03','WCNFTI04','WCNFTI05','WCNFTI06','WCNFTI07','WCNFTI08','WCNFTI09','WAAFTI01','WAAFTI02', 'WAAFTI03','WAAFTI04','WAAFTI05','WAAFTI06','WAAFTI07','WAAFTI08','WAAFTI09') pdb.create_index('node1', 'node2') element = 0 qtyofNodesToProcess = len(nodesnotLinked) for pair in nodesnotLinked: element = element+1 FormatingDataSets.printProgressofEvents(element, qtyofNodesToProcess, "Calculating features for nodes not liked: ") neighbors_node1 = all_neighbors(graph, pair[0]) neighbors_node2 = all_neighbors(graph, pair[1]) len_neihbors_node1 = len(neighbors_node1) len_neihbors_node2 = len(neighbors_node2) CommonNeigbors = neighbors_node1.intersection(neighbors_node2) WCNFTI01 = 0; WCNFTI02 = 0; WCNFTI03 = 0; WCNFTI04 = 0; WCNFTI05 = 0; WCNFTI06 = 0; WCNFTI07 = 0; WCNFTI08 = 0; WCNFTI09 = 0; WAAFTI01 = 0; WAAFTI02 = 0; WAAFTI03 = 0; WAAFTI04 = 0; WAAFTI05 = 0; WAAFTI06 = 0; WAAFTI07 = 0; WAAFTI08 = 0; WAAFTI09 = 0; for cn in CommonNeigbors: item = get_partOfWeightCalculating(graph, database, pair, cn) WCNFTI01 = WCNFTI01 + item['WCN'][0]; WCNFTI02 = WCNFTI02 + item['WCN'][1]; WCNFTI03 = WCNFTI03 + item['WCN'][2]; WCNFTI04 = WCNFTI04 + item['WCN'][3]; WCNFTI05 = WCNFTI05 + item['WCN'][4]; WCNFTI06 = WCNFTI06 + item['WCN'][5]; WCNFTI07 = WCNFTI07 + item['WCN'][6]; WCNFTI08 = WCNFTI08 + item['WCN'][7]; WCNFTI09 = WCNFTI09 + item['WCN'][8]; WAAFTI01 = WAAFTI01 + item['WAA'][0]; WAAFTI02 = WAAFTI02 + item['WAA'][1]; WAAFTI03 = WAAFTI03 + item['WAA'][2]; WAAFTI04 = WAAFTI04 + item['WAA'][3]; WAAFTI05 = WAAFTI05 + item['WAA'][4]; WAAFTI06 = WAAFTI06 + item['WAA'][5]; WAAFTI07 = WAAFTI07 + item['WAA'][6]; WAAFTI08 = WAAFTI08 + item['WAA'][7]; WAAFTI09 = WAAFTI09 + item['WAA'][8]; pdb.insert(str(pair[0]), str(pair[1]), WCNFTI01, WCNFTI02, WCNFTI02, WCNFTI03,WCNFTI04,WCNFTI05,WCNFTI06,WCNFTI07,WCNFTI08,WCNFTI09, WAAFTI01, WAAFTI02, WAAFTI02, WAAFTI03,WAAFTI04,WAAFTI05,WAAFTI06,WAAFTI07,WAAFTI08,WAAFTI09, ) pdb.commit() return pdb;
def readingOrginalDataset(self): print "Starting Reading Original Dataset", datetime.today() with open(self.OriginalDataSet) as f: self.OrignalContent = f.readlines() f.close() articleid = 0 articles = [] authornames = [] authorofArticles = [] authors = [] article = None element = 0 for line in self.OrignalContent: element = element + 1 FormatingDataSets.printProgressofEvents( element, len(self.OrignalContent), "Reading File Content to Generate Graph: ") line = line.strip() if line.startswith('#*'): articleid = articleid + 1 article = Article('p_' + str(articleid)) article.articlename = line.replace('#*', '').replace('\r\n', '') if line.startswith('#t'): article.time = line.replace('#t', '').replace('\r\n', '') if line.startswith('#@'): authorsofArticle = line.replace('#@', '').replace('\r\n', '').split(',') for author in authorsofArticle: author = author.strip() if not author in authornames: authornames.append(author) articleauthor = AuthorInArticle( article.articleid, authornames.index(author) + 1) authorofArticles.append(articleauthor) if line.startswith('#!'): articles.append(article) for index in range(len(authornames)): author = Author(index + 1, authornames[index]) authors.append(author) self.Graph = networkx.Graph() for item_article in articles: self.Graph.add_node( item_article.articleid, { 'node_type': 'E', 'title': item_article.articlename.decode("latin_1"), 'time': int(item_article.time) }) for item_author in authors: self.Graph.add_node(int(item_author.authorid), { 'node_type': 'N', 'name': item_author.name.decode("latin_1") }) for item_edge in authorofArticles: self.Graph.add_edge(item_edge.articleid, int(item_edge.authorid)) print "Reading Original Dataset finished", datetime.today()
def calculatingWeights(graph, nodesnotLinked, database, calculatingFile): pdb = Base(calculatingFile) pdb.create('node1', 'node2', 'WCNFTI01', 'WCNFTI02', 'WCNFTI03', 'WCNFTI04', 'WCNFTI05', 'WCNFTI06', 'WCNFTI07', 'WCNFTI08', 'WCNFTI09', 'WAAFTI01', 'WAAFTI02', 'WAAFTI03', 'WAAFTI04', 'WAAFTI05', 'WAAFTI06', 'WAAFTI07', 'WAAFTI08', 'WAAFTI09') pdb.create_index('node1', 'node2') element = 0 qtyofNodesToProcess = len(nodesnotLinked) for pair in nodesnotLinked: element = element + 1 FormatingDataSets.printProgressofEvents( element, qtyofNodesToProcess, "Calculating features for nodes not liked: ") neighbors_node1 = all_neighbors(graph, pair[0]) neighbors_node2 = all_neighbors(graph, pair[1]) len_neihbors_node1 = len(neighbors_node1) len_neihbors_node2 = len(neighbors_node2) CommonNeigbors = neighbors_node1.intersection(neighbors_node2) WCNFTI01 = 0 WCNFTI02 = 0 WCNFTI03 = 0 WCNFTI04 = 0 WCNFTI05 = 0 WCNFTI06 = 0 WCNFTI07 = 0 WCNFTI08 = 0 WCNFTI09 = 0 WAAFTI01 = 0 WAAFTI02 = 0 WAAFTI03 = 0 WAAFTI04 = 0 WAAFTI05 = 0 WAAFTI06 = 0 WAAFTI07 = 0 WAAFTI08 = 0 WAAFTI09 = 0 for cn in CommonNeigbors: item = get_partOfWeightCalculating(graph, database, pair, cn) WCNFTI01 = WCNFTI01 + item['WCN'][0] WCNFTI02 = WCNFTI02 + item['WCN'][1] WCNFTI03 = WCNFTI03 + item['WCN'][2] WCNFTI04 = WCNFTI04 + item['WCN'][3] WCNFTI05 = WCNFTI05 + item['WCN'][4] WCNFTI06 = WCNFTI06 + item['WCN'][5] WCNFTI07 = WCNFTI07 + item['WCN'][6] WCNFTI08 = WCNFTI08 + item['WCN'][7] WCNFTI09 = WCNFTI09 + item['WCN'][8] WAAFTI01 = WAAFTI01 + item['WAA'][0] WAAFTI02 = WAAFTI02 + item['WAA'][1] WAAFTI03 = WAAFTI03 + item['WAA'][2] WAAFTI04 = WAAFTI04 + item['WAA'][3] WAAFTI05 = WAAFTI05 + item['WAA'][4] WAAFTI06 = WAAFTI06 + item['WAA'][5] WAAFTI07 = WAAFTI07 + item['WAA'][6] WAAFTI08 = WAAFTI08 + item['WAA'][7] WAAFTI09 = WAAFTI09 + item['WAA'][8] pdb.insert( str(pair[0]), str(pair[1]), WCNFTI01, WCNFTI02, WCNFTI02, WCNFTI03, WCNFTI04, WCNFTI05, WCNFTI06, WCNFTI07, WCNFTI08, WCNFTI09, WAAFTI01, WAAFTI02, WAAFTI02, WAAFTI03, WAAFTI04, WAAFTI05, WAAFTI06, WAAFTI07, WAAFTI08, WAAFTI09, ) pdb.commit() return pdb