Esempio n. 1
0
def calculatingWeights(graph, nodesnotLinked, database, calculatingFile):
    pdb = Base(calculatingFile)
    pdb.create('node1', 'node2', 'WCNFI','WAAFI')
    pdb.create_index('node1', 'node2')
                
    element = 0
    qtyofNodesToProcess = len(nodesnotLinked)
    for pair in nodesnotLinked:
        element = element+1
        FormatingDataSets.printProgressofEvents(element, qtyofNodesToProcess, "Calculating features for nodes not liked: ")
        neighbors_node1 = all_neighbors(graph, pair[0])
        neighbors_node2 = all_neighbors(graph, pair[1])
        len_neihbors_node1 = len(neighbors_node1)
        len_neihbors_node2 = len(neighbors_node2)
        CommonNeigbors = neighbors_node1.intersection(neighbors_node2)
        WCNFI = 0;
        WAAFI = 0;
        
        for cn in CommonNeigbors:
            item = get_partOfWeightCalculating(graph, database, pair, cn)
            WCNFI = WCNFI + item['WCN'];
            WAAFI = WAAFI + item['WAA'];
        pdb.insert(str(pair[0]), str(pair[1]), WCNFI, WAAFI )   
    pdb.commit()
    return pdb;
 def __init__(self, preparedParameters, filePathResults, filePathAnalyseResult, topRank):
     print "Starting Analysing the results", datetime.today()
     
     absFilePath = filePathResults
     absfilePathAnalyseResult = filePathAnalyseResult #FormatingDataSets.get_abs_file_path(filePathAnalyseResult)
     fResult = open(absFilePath, 'r')
     with open(absfilePathAnalyseResult, 'w') as fnodes:
         self.success = 0
         element = 0
         for line in fResult:
             element = element+1
             FormatingDataSets.printProgressofEvents(element, topRank, "Analysing the results: ")
             cols = line.strip().replace('\n','').split('\t')
             if len(list(networkx.common_neighbors(preparedParameters.testGraph, cols[len(cols)-2] ,  cols[len(cols)-1] ))) != 0:
                 self.success = self.success + 1
                 fnodes.write(cols[len(cols)-2]  + '\t' + cols[len(cols)-1] + '\t' +  'SUCCESS \r\n')
             else:
                 fnodes.write(cols[len(cols)-2]  + '\t' + cols[len(cols)-1] + '\t' +  'FAILED \r\n')
             
             
             
             if element == topRank:
                 break 
         
         result =  float(self.success) / float(topRank) *100
         strResult = 'Final Result: \t' + str(result) + '%'
         fnodes.write(strResult)
         fnodes.write('\n#\t'+str(self.success))
         fnodes.close()
     print "Analysing the results finished", datetime.today()
    def get_pair_nodes_not_linked(self, graph, file, min_papers):
        print "Starting getting pair of nodes that is not liked", datetime.today(
        )
        results = []
        nodesinGraph = set(n for n, d in graph.nodes(data=True)
                           if d['node_type'] == 'N')
        currentNodes = set()
        for n in nodesinGraph:

            papers = set(networkx.all_neighbors(graph, n))
            print papers
            if (len(papers) >= min_papers):
                currentNodes.add(n)

        print 'qty of authors: ', len(currentNodes)
        nodesOrdered = sorted(currentNodes)
        element = 0
        totalnodesOrdered = len(nodesOrdered)
        for node1 in nodesOrdered:
            element = element + 1
            FormatingDataSets.printProgressofEvents(
                element, totalnodesOrdered, "Checking Node not liked: ")

            others = set(n for n in nodesOrdered if n > node1)
            notLinked = set()
            for other_node in others:
                if len(set(networkx.common_neighbors(graph, node1,
                                                     other_node))) == 0:
                    #notLinked.add(other_node) # como estava antes
                    # esse if abaixo verifica se estao perto
                    if networkx.has_path(graph, node1, other_node):
                        tamanho_caminho = len(
                            networkx.shortest_path(graph, node1,
                                                   other_node)) - 2
                        #print "%s ate %s: %s" %(node1, other_node,tamanho_caminho)
                        #print repr(networkx.shortest_path(graph, node1, other_node));
                        if (tamanho_caminho >
                                0) and (tamanho_caminho <=
                                        self.MAX_NUMBER_OF_PEOPLE_BETWEEN * 2 +
                                        1):  # -2 porque inclui o inicio e fim
                            print "adicionando %s - %s" % (node1, other_node)
                            notLinked.add(other_node)
            if len(notLinked) > 0:
                results.append([node1, notLinked])
            if element % 2000 == 0:
                for item in results:
                    file.write(str(item[0]) + '\t' + repr(item[1]) + '\n')
                results = []

        for item in results:
            file.write(str(item[0]) + '\t' + repr(item[1]) + '\n')
        results = []

        print "getting pair of nodes that is not liked finished", datetime.today(
        )
    def readingOrginalDataset(self):
        print "Starting Reading Original Dataset", datetime.today()
        with open(self.OriginalDataSet) as f:
            self.OrignalContent = f.readlines()
            f.close()

        articleid = 0
        articles = []
        authornames = []
        authorofArticles = []
        authors = []
        article = None
        element = 0
        for line in self.OrignalContent:
            element = element + 1
            FormatingDataSets.printProgressofEvents(
                element, len(self.OrignalContent), "Reading File Content to Generate Graph: "
            )
            line = line.strip()
            if line.startswith("#*"):
                articleid = articleid + 1
                article = Article("p_" + str(articleid))
                article.articlename = line.replace("#*", "").replace("\r\n", "")
            if line.startswith("#t"):
                article.time = line.replace("#t", "").replace("\r\n", "")

            if line.startswith("#@"):
                authorsofArticle = line.replace("#@", "").replace("\r\n", "").split(",")
                for author in authorsofArticle:
                    author = author.strip()
                    if not author in authornames:
                        authornames.append(author)
                    articleauthor = AuthorInArticle(article.articleid, authornames.index(author) + 1)
                    authorofArticles.append(articleauthor)
            if line.startswith("#!"):
                articles.append(article)
        for index in range(len(authornames)):
            author = Author(index + 1, authornames[index])
            authors.append(author)
        self.Graph = networkx.Graph()
        for item_article in articles:
            self.Graph.add_node(
                item_article.articleid,
                {"node_type": "E", "title": item_article.articlename.decode("latin_1"), "time": int(item_article.time)},
            )
        for item_author in authors:
            self.Graph.add_node(
                int(item_author.authorid), {"node_type": "N", "name": item_author.name.decode("latin_1")}
            )
        for item_edge in authorofArticles:
            self.Graph.add_edge(item_edge.articleid, int(item_edge.authorid))

        print "Reading Original Dataset finished", datetime.today()
 def readingOrginalDataset(self):
     print "Starting Reading Original Dataset", datetime.today()
     con = None
     try:
         con = psycopg2.connect(database='projetomestrado', user='******', password='******')
         
         curPublicacao = con.cursor()
         curPublicacao.execute("select distinct p.idpublicacao, p.titulo, p.ano from projetomestrado.publicacao p inner join projetomestrado.autorpublicacao a on a.idpublicacao = p.idpublicacao where a.idautor in (select idautor from projetomestrado.autor where afiliacao = 'Instituto Militar de Engenharia')")
         curPublicacaoData = curPublicacao.fetchall()
         element = 0
         qty = len(curPublicacaoData)
         print qty
         for linha in curPublicacaoData:
             element = element+1
             FormatingDataSets.printProgressofEvents(element, qty, "Adding paper to new graph: ")
         
             idpublicacao = linha[0]
             curPublicacaoPalavras = con.cursor()
             curPublicacaoPalavras.execute("select k.keyword from projetomestrado.keyword k inner join projetomestrado.publicacaokeyword pk on pk.idkeyword = k.idkeyword where pk.idpublicacao =" + str(idpublicacao))
             palavras = []
             for palavra in curPublicacaoPalavras.fetchall():
                 palavras.append(palavra[0].strip())
             curAutores = con.cursor()
             curAutores.execute("select a.idautor, a.primeironome, a.ultimonome from projetomestrado.autorpublicacao ap inner join projetomestrado.autor a on a.idautor = ap.idautor where ap.idpublicacao = "+ str(idpublicacao))
             autores = []
             for autor in curAutores.fetchall():
                 autores.append([autor[0], autor[1] + "," + autor[2]])
         
                 
             self.Publications.append([idpublicacao, linha[1], linha[2], palavras, autores ])
         
         self.Graph = networkx.Graph()
         
         for item_article in self.Publications:
             self.Graph.add_node('P_' + str(item_article[0]), {'node_type' : 'E', 'title' : item_article[1].decode("latin_1"), 'time' : int(item_article[2]), 'keywords': str(item_article[3]) })
             for item_autor in item_article[4]:
                 self.Graph.add_node(int(item_autor[0]), {'node_type' : 'N', 'name' : item_autor[1].decode("latin_1") })
                 self.Graph.add_edge('P_' + str(item_article[0]), int(item_autor[0]) )
         
         print "Reading Original Dataset finished", datetime.today()
         
         
 
         
         
         
         
     except psycopg2.DatabaseError, e:
         print 'Error %s' % e
def calculatingInputToFuzzy(graph, nodesnotLinked,  params):
    
    result = []
    #pdb = Base(calculatingFile)
    #pdb.create('node1', 'node2', 'IntensityNode1', 'IntencityNode2' ,'Similarity','AgesNode1', 'AgesNode2')
    #pdb.create_index('node1', 'node2')
                
    element = 0
    qtyofNodesToProcess = len(nodesnotLinked)
    for pair in nodesnotLinked:
        element = element+1
        FormatingDataSets.printProgressofEvents(element, qtyofNodesToProcess, "Calculating features for nodes not liked: ")
        neighbors_node1 = all_neighbors(graph, pair[0])
        neighbors_node2 = all_neighbors(graph, pair[1])
        len_neihbors_node1 = len(neighbors_node1)
        len_neihbors_node2 = len(neighbors_node2)
        CommonNeigbors = neighbors_node1.intersection(neighbors_node2)
        IntensityNode1 = 0;
        IntensityNode2 = 0;
        Similarities = 0;
        Similarity = 0;
        AgesNode1 = 0;
        AgesNode2 = 0;
        
        for cn in CommonNeigbors:
            infoNode1 = list(edge for n1, n2, edge in graph.edges([ pair[0], cn], data=True) if ((n1 ==  pair[0] and n2 == cn) or (n1 == cn and n2 == pair[0])) )
            infoNode2 = list(edge for n1, n2, edge in graph.edges([pair[1], cn], data=True) if ((n1 ==  pair[1] and n2 == cn) or (n1 == cn and n2 == pair[1])) )

            IntensityNode1 = IntensityNode1 + len(infoNode1)
            IntensityNode2 = IntensityNode2 + len(infoNode2)
            
            MaxTimeNode1 =  max(info['time'] for info in infoNode1 if 1==1)
            MaxTimeNode2 =  max(info['time'] for info in infoNode2 if 1==1)

            AgesNode1 = max(AgesNode1,MaxTimeNode1)
            AgesNode2 = max(AgesNode2,MaxTimeNode1)
            
            bagofWordsNode1 =  list(info['keywords'] for info in infoNode1 if 1==1)
            bagofWordsNode2 =  list(info['keywords'] for info in infoNode2 if 1==1)
            
            
            
            Similarities = Similarities + get_jacard_domain(bagofWordsNode1, bagofWordsNode2)
        AgesNode1 = abs(params.t0_ - AgesNode1)    
        AgesNode2 = abs(params.t0_ - AgesNode2)
        if len(CommonNeigbors) > 0:
            Similarity = (Similarities / len(CommonNeigbors)) *100
            result.append({ 'no1':  str(pair[0]), 'no2' :str(pair[1]), 'intensityno1' : IntensityNode1,'intensityno2' : IntensityNode2, 'similarity' : Similarity, 'ageno1' :  AgesNode1, 'ageno2' :AgesNode2 })
    return result   
def calculatingWeights(graph, nodesnotLinked, database, calculatingFile):
    pdb = Base(calculatingFile)
    pdb.create('node1', 'node2', 'cnWTS02', 'cnWTS05', 'cnWTS08', 'aaWTS02',
               'aaWTS05', 'aaWTS08')
    pdb.create_index('node1', 'node2')

    element = 0
    qtyofNodesToProcess = len(nodesnotLinked)
    for pair in nodesnotLinked:
        element = element + 1
        FormatingDataSets.printProgressofEvents(
            element, qtyofNodesToProcess,
            "Calculating features for nodes not liked: ")
        neighbors_node1 = all_neighbors(graph, pair[0])
        neighbors_node2 = all_neighbors(graph, pair[1])
        len_neihbors_node1 = len(neighbors_node1)
        len_neihbors_node2 = len(neighbors_node2)
        CommonNeigbors = neighbors_node1.intersection(neighbors_node2)
        CNWts02Feature = 0
        CNWts05Feature = 0
        CNWts08Feature = 0
        AAWts02Feature = 0
        AAWts05Feature = 0
        AAWts08Feature = 0
        CNWJCFeature = 0
        AAWJCFeature = 0

        for cn in CommonNeigbors:
            item = get_partOfWeightCalculating(graph, database, pair, cn)
            CNWts02Feature = CNWts02Feature + item['cnWts02']
            CNWts05Feature = CNWts05Feature + item['cnWts05']
            CNWts08Feature = CNWts08Feature + item['cnWts08']
            AAWts02Feature = AAWts02Feature + item['aaWts02']
            AAWts05Feature = AAWts05Feature + item['aaWts05']
            AAWts08Feature = AAWts08Feature + item['aaWts08']
            #CNWJCFeature = CNWJCFeature + item['cnWJC'];
            #AAWJCFeature = AAWJCFeature + item['aaWJC'];

        pdb.insert(str(pair[0]), str(pair[1]), CNWts02Feature, CNWts05Feature,
                   CNWts08Feature, AAWts02Feature, AAWts05Feature,
                   AAWts08Feature)
    pdb.commit()
    return pdb
def calculatingWeights(graph, nodesnotLinked, database, calculatingFile):
    pdb = Base(calculatingFile)
    pdb.create('node1', 'node2', 'cnWTS02','cnWTS05','cnWTS08', 'aaWTS02', 'aaWTS05', 'aaWTS08')
    pdb.create_index('node1', 'node2')
                
    element = 0
    qtyofNodesToProcess = len(nodesnotLinked)
    for pair in nodesnotLinked:
        element = element+1
        FormatingDataSets.printProgressofEvents(element, qtyofNodesToProcess, "Calculating features for nodes not liked: ")
        neighbors_node1 = all_neighbors(graph, pair[0])
        neighbors_node2 = all_neighbors(graph, pair[1])
        len_neihbors_node1 = len(neighbors_node1)
        len_neihbors_node2 = len(neighbors_node2)
        CommonNeigbors = neighbors_node1.intersection(neighbors_node2)
        CNWts02Feature = 0;
        CNWts05Feature = 0;
        CNWts08Feature = 0;
        AAWts02Feature = 0;
        AAWts05Feature = 0;
        AAWts08Feature = 0;
        CNWJCFeature = 0;
        AAWJCFeature = 0;
        
        for cn in CommonNeigbors:
            item = get_partOfWeightCalculating(graph, database, pair, cn)
            CNWts02Feature = CNWts02Feature + item['cnWts02'];
            CNWts05Feature = CNWts05Feature + item['cnWts05'];
            CNWts08Feature = CNWts08Feature + item['cnWts08'];
            AAWts02Feature = AAWts02Feature + item['aaWts02'];
            AAWts05Feature = AAWts05Feature + item['aaWts05'];
            AAWts08Feature = AAWts08Feature + item['aaWts08'];
            #CNWJCFeature = CNWJCFeature + item['cnWJC'];
            #AAWJCFeature = AAWJCFeature + item['aaWJC'];
        
            
        pdb.insert(str(pair[0]), str(pair[1]), CNWts02Feature, CNWts05Feature, CNWts08Feature, AAWts02Feature, AAWts05Feature, AAWts08Feature  )   
    pdb.commit()
    return pdb;
Esempio n. 9
0
    def __init__(self, preparedParameters, filePathResults,
                 filePathAnalyseResult, topRank):
        print "Starting Analysing the results", datetime.today()

        absFilePath = filePathResults
        absfilePathAnalyseResult = filePathAnalyseResult  #FormatingDataSets.get_abs_file_path(filePathAnalyseResult)
        fResult = open(absFilePath, 'r')
        with open(absfilePathAnalyseResult, 'w') as fnodes:
            self.success = 0
            element = 0
            for line in fResult:
                element = element + 1
                FormatingDataSets.printProgressofEvents(
                    element, topRank, "Analysing the results: ")
                cols = line.strip().replace('\n', '').split('\t')
                if len(
                        list(
                            networkx.common_neighbors(
                                preparedParameters.testGraph,
                                cols[len(cols) - 2],
                                cols[len(cols) - 1]))) != 0:
                    self.success = self.success + 1
                    fnodes.write(cols[len(cols) - 2] + '\t' +
                                 cols[len(cols) - 1] + '\t' + 'SUCCESS \r\n')
                else:
                    fnodes.write(cols[len(cols) - 2] + '\t' +
                                 cols[len(cols) - 1] + '\t' + 'FAILED \r\n')

                if element == topRank:
                    break

            result = float(self.success) / float(topRank) * 100
            strResult = 'Final Result: \t' + str(result) + '%'
            fnodes.write(strResult)
            fnodes.write('\n#\t' + str(self.success))
            fnodes.close()
        print "Analysing the results finished", datetime.today()
Esempio n. 10
0
 def get_pair_nodes_not_linked(self):
     print "Starting getting pair of nodes that is not liked", datetime.today()
     results = []
     nodesinGraph =self.graph.nodes()
     nodesOrdered = sorted(nodesinGraph)
     totalnodesOrdered = len(nodesOrdered)
     element = 0
     
     for node in nodesOrdered:
         element = element+1
         FormatingDataSets.printProgressofEvents(element, totalnodesOrdered, "Checking Node not liked: ")
         publicacoes = self.graph.edges(node,data=False)
         qtdepublicacoes = len(publicacoes)
         #print "O autor e seus papers ",node,qtdepublicacoes ,publicacoes 
         if (qtdepublicacoes >= self.min_papers):
             others =  set(n for n in nodesOrdered if n > node)
             for otherNode in others:
                 other_publicacoes = self.graph.edges(otherNode,data=False)
                 other_qtdepublicacoes = len(other_publicacoes)
                 if (other_qtdepublicacoes >= self.min_papers):
                     if (not self.graph.has_edge(node, otherNode)):
                         if self.USE_MAX_NUMBER_OF_PEOPLE_BETWEEN == True:
                             if networkx.has_path(self.graph, node, otherNode):
                                 shortestPathResult = networkx.shortest_path(self.graph, node, otherNode)
                                 #print shortestPathResult
                                 tamanho_caminho = len(shortestPathResult) - 1
                                 #print "%s ate %s: %s" %(node1, other_node,tamanho_caminho)
                                 #print repr(networkx.shortest_path(graph, node1, other_node));
                                 if ( tamanho_caminho > 0 ) and (tamanho_caminho <= self.MAX_NUMBER_OF_PEOPLE_BETWEEN ): # -2 porque inclui o inicio e fim
                                     #print "adicionando %s - %s" %(node, otherNode)
                                     results.append([node, otherNode])
                         else:
                             results.append([node, otherNode])
             
     print "getting pair of nodes that is not liked finished", datetime.today()
     return results
 def get_pair_nodes_not_linked(self, graph, file, min_papers):
     print "Starting getting pair of nodes that is not liked", datetime.today()
     results = []
     nodesinGraph =set(n for n,d in graph.nodes(data=True) if d['node_type'] == 'N')
     currentNodes = set()
     for n in nodesinGraph:
         
         papers = set(networkx.all_neighbors(graph, n))
         print papers
         if (len(papers) >= min_papers):
             currentNodes.add(n)
     
     print 'qty of authors: ', len(currentNodes)
     nodesOrdered = sorted(currentNodes)
     element = 0
     totalnodesOrdered = len(nodesOrdered)
     for node1 in nodesOrdered:
         element = element+1
         FormatingDataSets.printProgressofEvents(element, totalnodesOrdered, "Checking Node not liked: ")
         
         others =  set(n for n in nodesOrdered if n > node1)
         notLinked = set()
         for other_node in others:
             if len(set(networkx.common_neighbors(graph, node1, other_node))) == 0:
                 notLinked.add(other_node)
         results.append([node1, notLinked])
         if element % 2000 == 0:
             for item in results:
                 file.write(str(item[0]) + '\t' +  repr(item[1]) + '\n')
             results = []
             
     for item in results:
         file.write(str(item[0]) + '\t' +  repr(item[1]) + '\n')
     results = []
         
     print "getting pair of nodes that is not liked finished", datetime.today()
Esempio n. 12
0
    def readingOrginalDataset(self):
        print "Starting Reading Original Dataset", datetime.today()
        con = None
        try:
            con = psycopg2.connect(database='projetomestrado',
                                   user='******',
                                   password='******')

            curPublicacao = con.cursor()
            curPublicacao.execute(
                "select idpublicacao, titulo, ano from projetomestrado.publicacao  where ano >= 1993 and ano <= 2000"
            )
            curPublicacaoData = curPublicacao.fetchall()
            element = 0
            for linha in curPublicacaoData:
                element = element + 1
                FormatingDataSets.printProgressofEvents(
                    element, len(curPublicacaoData),
                    "Adding paper to new graph: ")

                idpublicacao = linha[0]
                curPublicacaoPalavras = con.cursor()
                curPublicacaoPalavras.execute(
                    "select k.keyword from projetomestrado.keyword k inner join projetomestrado.publicacaokeyword pk on pk.idkeyword = k.idkeyword where pk.idpublicacao ="
                    + str(idpublicacao))
                palavras = []
                for palavra in curPublicacaoPalavras.fetchall():
                    palavras.append(palavra[0].strip())
                curAutores = con.cursor()
                curAutores.execute(
                    "select a.idautor, a.primeironome, a.ultimonome from projetomestrado.autorpublicacao ap inner join projetomestrado.autor a on a.idautor = ap.idautor where ap.idpublicacao = "
                    + str(idpublicacao))
                autores = []
                for autor in curAutores.fetchall():
                    autores.append([autor[0], autor[1] + "," + autor[2]])

                self.Publications.append(
                    [idpublicacao, linha[1], linha[2], palavras, autores])

            self.Graph = networkx.Graph()

            for item_article in self.Publications:
                self.Graph.add_node(
                    'P_' + str(item_article[0]), {
                        'node_type': 'E',
                        'title': item_article[1].decode("latin_1"),
                        'time': int(item_article[2]),
                        'keywords': str(item_article[3])
                    })
                for item_autor in item_article[4]:
                    self.Graph.add_node(int(item_autor[0]), {
                        'node_type': 'N',
                        'name': item_autor[1].decode("latin_1")
                    })
                    self.Graph.add_edge('P_' + str(item_article[0]),
                                        int(item_autor[0]))

            print "Reading Original Dataset finished", datetime.today()

        except psycopg2.DatabaseError, e:
            print 'Error %s' % e
def calculatingWeights(graph, nodesnotLinked, database, calculatingFile):
    pdb = Base(calculatingFile)
    pdb.create('node1', 'node2', 'WCNFTI01','WCNFTI02', 'WCNFTI03','WCNFTI04','WCNFTI05','WCNFTI06','WCNFTI07','WCNFTI08','WCNFTI09','WAAFTI01','WAAFTI02', 'WAAFTI03','WAAFTI04','WAAFTI05','WAAFTI06','WAAFTI07','WAAFTI08','WAAFTI09')
    pdb.create_index('node1', 'node2')
                
    element = 0
    qtyofNodesToProcess = len(nodesnotLinked)
    for pair in nodesnotLinked:
        element = element+1
        FormatingDataSets.printProgressofEvents(element, qtyofNodesToProcess, "Calculating features for nodes not liked: ")
        neighbors_node1 = all_neighbors(graph, pair[0])
        neighbors_node2 = all_neighbors(graph, pair[1])
        len_neihbors_node1 = len(neighbors_node1)
        len_neihbors_node2 = len(neighbors_node2)
        CommonNeigbors = neighbors_node1.intersection(neighbors_node2)
        WCNFTI01 = 0;
        WCNFTI02 = 0;
        WCNFTI03 = 0;
        WCNFTI04 = 0;
        WCNFTI05 = 0;
        WCNFTI06 = 0;
        WCNFTI07 = 0;
        WCNFTI08 = 0;
        WCNFTI09 = 0;
        
        WAAFTI01 = 0;
        WAAFTI02 = 0;
        WAAFTI03 = 0;
        WAAFTI04 = 0;
        WAAFTI05 = 0;
        WAAFTI06 = 0;
        WAAFTI07 = 0;
        WAAFTI08 = 0;
        WAAFTI09 = 0;
        
        
        for cn in CommonNeigbors:
            item = get_partOfWeightCalculating(graph, database, pair, cn)
            WCNFTI01 = WCNFTI01 + item['WCN'][0];
            WCNFTI02 = WCNFTI02 + item['WCN'][1];
            WCNFTI03 = WCNFTI03 + item['WCN'][2];
            WCNFTI04 = WCNFTI04 + item['WCN'][3];
            WCNFTI05 = WCNFTI05 + item['WCN'][4];
            WCNFTI06 = WCNFTI06 + item['WCN'][5];
            WCNFTI07 = WCNFTI07 + item['WCN'][6];
            WCNFTI08 = WCNFTI08 + item['WCN'][7];
            WCNFTI09 = WCNFTI09 + item['WCN'][8];
            
            WAAFTI01 = WAAFTI01 + item['WAA'][0];
            WAAFTI02 = WAAFTI02 + item['WAA'][1];
            WAAFTI03 = WAAFTI03 + item['WAA'][2];
            WAAFTI04 = WAAFTI04 + item['WAA'][3];
            WAAFTI05 = WAAFTI05 + item['WAA'][4];
            WAAFTI06 = WAAFTI06 + item['WAA'][5];
            WAAFTI07 = WAAFTI07 + item['WAA'][6];
            WAAFTI08 = WAAFTI08 + item['WAA'][7];
            WAAFTI09 = WAAFTI09 + item['WAA'][8];
            
        pdb.insert(str(pair[0]), str(pair[1]), WCNFTI01, WCNFTI02,  WCNFTI02,
                   WCNFTI03,WCNFTI04,WCNFTI05,WCNFTI06,WCNFTI07,WCNFTI08,WCNFTI09,
                   WAAFTI01, WAAFTI02,  WAAFTI02,
                   WAAFTI03,WAAFTI04,WAAFTI05,WAAFTI06,WAAFTI07,WAAFTI08,WAAFTI09,
                    
                    )   
    pdb.commit()
    return pdb;
    def readingOrginalDataset(self):
        print "Starting Reading Original Dataset", datetime.today()
        with open(self.OriginalDataSet) as f:
            self.OrignalContent = f.readlines()
            f.close()

        articleid = 0
        articles = []
        authornames = []
        authorofArticles = []
        authors = []
        article = None
        element = 0
        for line in self.OrignalContent:
            element = element + 1
            FormatingDataSets.printProgressofEvents(
                element, len(self.OrignalContent),
                "Reading File Content to Generate Graph: ")
            line = line.strip()
            if line.startswith('#*'):
                articleid = articleid + 1
                article = Article('p_' + str(articleid))
                article.articlename = line.replace('#*',
                                                   '').replace('\r\n', '')
            if line.startswith('#t'):
                article.time = line.replace('#t', '').replace('\r\n', '')

            if line.startswith('#@'):
                authorsofArticle = line.replace('#@',
                                                '').replace('\r\n',
                                                            '').split(',')
                for author in authorsofArticle:
                    author = author.strip()
                    if not author in authornames:
                        authornames.append(author)
                    articleauthor = AuthorInArticle(
                        article.articleid,
                        authornames.index(author) + 1)
                    authorofArticles.append(articleauthor)
            if line.startswith('#!'):
                articles.append(article)
        for index in range(len(authornames)):
            author = Author(index + 1, authornames[index])
            authors.append(author)
        self.Graph = networkx.Graph()
        for item_article in articles:
            self.Graph.add_node(
                item_article.articleid, {
                    'node_type': 'E',
                    'title': item_article.articlename.decode("latin_1"),
                    'time': int(item_article.time)
                })
        for item_author in authors:
            self.Graph.add_node(int(item_author.authorid), {
                'node_type': 'N',
                'name': item_author.name.decode("latin_1")
            })
        for item_edge in authorofArticles:
            self.Graph.add_edge(item_edge.articleid, int(item_edge.authorid))

        print "Reading Original Dataset finished", datetime.today()
Esempio n. 15
0
def calculatingWeights(graph, nodesnotLinked, database, calculatingFile):
    pdb = Base(calculatingFile)
    pdb.create('node1', 'node2', 'WCNFTI01', 'WCNFTI02', 'WCNFTI03',
               'WCNFTI04', 'WCNFTI05', 'WCNFTI06', 'WCNFTI07', 'WCNFTI08',
               'WCNFTI09', 'WAAFTI01', 'WAAFTI02', 'WAAFTI03', 'WAAFTI04',
               'WAAFTI05', 'WAAFTI06', 'WAAFTI07', 'WAAFTI08', 'WAAFTI09')
    pdb.create_index('node1', 'node2')

    element = 0
    qtyofNodesToProcess = len(nodesnotLinked)
    for pair in nodesnotLinked:
        element = element + 1
        FormatingDataSets.printProgressofEvents(
            element, qtyofNodesToProcess,
            "Calculating features for nodes not liked: ")
        neighbors_node1 = all_neighbors(graph, pair[0])
        neighbors_node2 = all_neighbors(graph, pair[1])
        len_neihbors_node1 = len(neighbors_node1)
        len_neihbors_node2 = len(neighbors_node2)
        CommonNeigbors = neighbors_node1.intersection(neighbors_node2)
        WCNFTI01 = 0
        WCNFTI02 = 0
        WCNFTI03 = 0
        WCNFTI04 = 0
        WCNFTI05 = 0
        WCNFTI06 = 0
        WCNFTI07 = 0
        WCNFTI08 = 0
        WCNFTI09 = 0

        WAAFTI01 = 0
        WAAFTI02 = 0
        WAAFTI03 = 0
        WAAFTI04 = 0
        WAAFTI05 = 0
        WAAFTI06 = 0
        WAAFTI07 = 0
        WAAFTI08 = 0
        WAAFTI09 = 0

        for cn in CommonNeigbors:
            item = get_partOfWeightCalculating(graph, database, pair, cn)
            WCNFTI01 = WCNFTI01 + item['WCN'][0]
            WCNFTI02 = WCNFTI02 + item['WCN'][1]
            WCNFTI03 = WCNFTI03 + item['WCN'][2]
            WCNFTI04 = WCNFTI04 + item['WCN'][3]
            WCNFTI05 = WCNFTI05 + item['WCN'][4]
            WCNFTI06 = WCNFTI06 + item['WCN'][5]
            WCNFTI07 = WCNFTI07 + item['WCN'][6]
            WCNFTI08 = WCNFTI08 + item['WCN'][7]
            WCNFTI09 = WCNFTI09 + item['WCN'][8]

            WAAFTI01 = WAAFTI01 + item['WAA'][0]
            WAAFTI02 = WAAFTI02 + item['WAA'][1]
            WAAFTI03 = WAAFTI03 + item['WAA'][2]
            WAAFTI04 = WAAFTI04 + item['WAA'][3]
            WAAFTI05 = WAAFTI05 + item['WAA'][4]
            WAAFTI06 = WAAFTI06 + item['WAA'][5]
            WAAFTI07 = WAAFTI07 + item['WAA'][6]
            WAAFTI08 = WAAFTI08 + item['WAA'][7]
            WAAFTI09 = WAAFTI09 + item['WAA'][8]

        pdb.insert(
            str(pair[0]),
            str(pair[1]),
            WCNFTI01,
            WCNFTI02,
            WCNFTI02,
            WCNFTI03,
            WCNFTI04,
            WCNFTI05,
            WCNFTI06,
            WCNFTI07,
            WCNFTI08,
            WCNFTI09,
            WAAFTI01,
            WAAFTI02,
            WAAFTI02,
            WAAFTI03,
            WAAFTI04,
            WAAFTI05,
            WAAFTI06,
            WAAFTI07,
            WAAFTI08,
            WAAFTI09,
        )
    pdb.commit()
    return pdb