Exemple #1
0
def step01(paramFile):
    util = ParameterUtil(parameter_file=paramFile)
    astroPh = Formating(util.graph_file)
    astroPh.subject = 'cond-mat'
    astroPh.yearstoRescue = [1994, 1995, 1996, 1997, 1998, 1999]
    astroPh.readingOrginalDataset()
    #astroPh.generating_graph()
    astroPh.saveGraph()
 def readingResultsFile(self, filepath):
     results = []
     myfile = Formating.get_abs_file_path(filepath)
     with open(myfile, 'r') as fileNodesNotLinked:
         for lineofFile in fileNodesNotLinked:
             nodenotllinked = lineofFile.replace('\n', '').split(',')
             results.append([nodenotllinked[0],nodenotllinked[1]])
         fileNodesNotLinked.close()
     return results
Exemple #3
0
    def saving_orderedResult(self, filepath, ordering):

        fcn = open(Formating.get_abs_file_path(filepath + '.cn.txt'), 'w')
        fpa = open(Formating.get_abs_file_path(filepath + '.pa.txt'), 'w')
        faas = open(Formating.get_abs_file_path(filepath + '.aas.txt'), 'w')
        fjc = open(Formating.get_abs_file_path(filepath + '.jc.txt'), 'w')
        fts05 = open(Formating.get_abs_file_path(filepath + '.ts05.txt'), 'w')
        fdts05 = open(Formating.get_abs_file_path(filepath + '.dts05.txt'),
                      'w')

        for item_result in ordering:
            fcn.write(
                repr(item_result['cn']['node1']) + ';' +
                repr(item_result['cn']['node2']) + ';' +
                repr(item_result['cn']['cn']) + '\n')
            faas.write(
                repr(item_result['aas']['node1']) + ';' +
                repr(item_result['aas']['node2']) + ';' +
                repr(item_result['aas']['aas']) + '\n')
            fjc.write(
                repr(item_result['jc']['node1']) + ';' +
                repr(item_result['jc']['node2']) + ';' +
                repr(item_result['jc']['jc']) + '\n')
            fpa.write(
                repr(item_result['pa']['node1']) + ';' +
                repr(item_result['pa']['node2']) + ';' +
                repr(item_result['pa']['pa']) + '\n')
            fts05.write(
                repr(item_result['ts05']['node1']) + ';' +
                repr(item_result['ts05']['node2']) + ';' +
                repr(item_result['ts05']['ts05']) + '\n')
            fdts05.write(
                repr(item_result['dts05']['node1']) + ';' +
                repr(item_result['dts05']['node2']) + ';' +
                repr(item_result['dts05']['dts05']) + '\n')

        fcn.close()
        fpa.close()
        faas.close()
        fjc.close()
        fts05.close()
        fdts05.close()
 def saving_orderedResult(self, filepath, ordering):
  
     fcn = open(Formating.get_abs_file_path(filepath + '.cn.txt') , 'w')
     fpa = open(Formating.get_abs_file_path(filepath + '.pa.txt') , 'w')
     faas = open(Formating.get_abs_file_path(filepath + '.aas.txt') , 'w')
     fjc = open(Formating.get_abs_file_path(filepath + '.jc.txt') , 'w')
     fts05 = open(Formating.get_abs_file_path(filepath + '.ts05.txt') , 'w')
     fdts05 = open(Formating.get_abs_file_path(filepath + '.dts05.txt') , 'w')
 
     for item_result in ordering:
         fcn.write(repr(item_result['cn']['node1']) + ';' + repr(item_result['cn']['node2']) +';' + repr(item_result['cn']['cn'])     + '\n')
         faas.write(repr(item_result['aas']['node1']) + ';' + repr(item_result['aas']['node2']) +';' + repr(item_result['aas']['aas'])     + '\n')
         fjc.write(repr(item_result['jc']['node1']) + ';' + repr(item_result['jc']['node2']) +';' + repr(item_result['jc']['jc'])     + '\n')
         fpa.write(repr(item_result['pa']['node1']) + ';' + repr(item_result['pa']['node2']) +';' + repr(item_result['pa']['pa'])     + '\n')
         fts05.write(repr(item_result['ts05']['node1']) + ';' + repr(item_result['ts05']['node2']) +';' + repr(item_result['ts05']['ts05'])     + '\n')
         fdts05.write(repr(item_result['dts05']['node1']) + ';' + repr(item_result['dts05']['node2']) +';' + repr(item_result['dts05']['dts05'])     + '\n')
         
     fcn.close() 
     fpa.close()
     faas.close()
     fjc.close()
     fts05.close()
     fdts05.close()  
 def __init__(self, myparams, nodesnotlinked):
     self.myparams = myparams
     qtyofNodesToProcess = len(nodesnotlinked)
     element = 0
     self.results = []
     
     for pair in nodesnotlinked:
         element = element+1
         Formating.printProgressofEvents(element, qtyofNodesToProcess, "Calculating features for nodes not liked: ")
         neighbors_node1 = self.all_neighbors(pair[0])
         neighbors_node2 = self.all_neighbors(pair[1])
         len_neihbors_node1 = len(neighbors_node1)
         len_neihbors_node2 = len(neighbors_node2)
         CommonNeigbors = neighbors_node1.intersection(neighbors_node2)
         CommonNeigbors_Feature = len(CommonNeigbors)
         TS_Feature09 = float(0)
         TS_Feature08 = float(0)
         TS_Feature07 = float(0)
         TS_Feature06 = float(0)
         TS_Feature05 = float(0)
         TS_Feature04 = float(0)
         TS_Feature03 = float(0)
         TS_Feature02 = float(0)
         TS_Feature01 = float(0)
         
         if CommonNeigbors_Feature > 0:
             print "Calculando ", pair[0], pair[1], CommonNeigbors_Feature
             #x = (float)(len(neighbors_node1.union(neighbors_node2)))
             #if x > 0:
             #    JC_Feature = CommonNeigbors_Feature/x
             for pair_common_neighbor in CommonNeigbors:
                 #secondary_neighbors = self.all_neighbors(pair_common_neighbor)
                 #AAS_Feature += 1 / (numpy.log10(len(secondary_neighbors)) + 0.00001)
             
                 objectsNode1 = self.get_ObjectsofLinks(myparams.trainnigGraph, pair[0], pair_common_neighbor)
                 objectsNode2 = self.get_ObjectsofLinks(myparams.trainnigGraph, pair[1], pair_common_neighbor)
                 
                 hm = 2 / ( (1/float(len(objectsNode1))) + (1/float(len(objectsNode2))))
                 #print pair[0], pair_common_neighbor, "Media Harmonica ", hm 
                 
                 timesofLinksNode1 = []
                 timesofLinksNode2 = []
                 bagofWordsNode1 = set()
                 bagofWordsNode2 = set()
         
                 for t1 in objectsNode1:
                     timesofLinksNode1.append(t1['time'])
                     for b1 in eval(t1['keywords']):
                         bagofWordsNode1.add(b1)
                 for t2 in objectsNode2:
                     timesofLinksNode2.append(t2['time'])
                     for b2 in eval(t2['keywords']):
                         bagofWordsNode2.add(b2)
                    
         
                 timesofLinksNode1.sort(reverse=True)
                 timesofLinksNode2.sort(reverse=True)
                 timeofLinks = timesofLinksNode1 + timesofLinksNode2
                 
                 #print  pair_common_neighbor, "publicacoes realizadas: ", timeofLinks 
                 
                 k =  int(self.myparams.t0_)  - int(max(timeofLinks))
                 
                 #print  pair_common_neighbor, "K ", k 
                 #decayfunction09 = (0.9) ** k
                 #decayfunction08 = (0.8) ** k
                 #decayfunction07 = (0.7) ** k
                 #decayfunction06 = (0.6) ** k
                 #decayfunction05 = (0.5) ** k
                 decayfunction04 = (0.9) ** k
                 #decayfunction03 = (0.3) ** k
                 #decayfunction02 = (0.2) ** k
                 #decayfunction01 = (0.1) ** k
                 
                 #print  pair_common_neighbor, "funcao de decaimento 0.5 ", decayfunction05 
                 
                 
                 control = ( abs( max(timesofLinksNode1) - max(timesofLinksNode2) ) + 1)
                 
                 jcDomain = self.get_jacard_domain(bagofWordsNode1, bagofWordsNode2)
                 
                 #print  pair_common_neighbor, "denominador inicial considerando apenas TS ", control 
                  
                 ts09 = (hm * decayfunction04) /  (control * ((0.9) ** jcDomain))
                 ts08 = (hm * decayfunction04) /  (control * ((0.8) ** jcDomain))
                 ts07 = (hm * decayfunction04) /  (control * ((0.7) ** jcDomain))
                 ts06 = (hm * decayfunction04) /  (control * ((0.6) ** jcDomain))
                 ts05 = (hm * decayfunction04) /  (control * ((0.5) ** jcDomain))
                 ts04 = (hm * decayfunction04) /  (control * ((0.4) ** jcDomain))
                 ts03 = (hm * decayfunction04) /  (control * ((0.3) ** jcDomain))
                 ts02 = (hm * decayfunction04) /  (control * ((0.2) ** jcDomain))
                 ts01 = (hm * decayfunction04) /  (control * ((0.1) ** jcDomain))
                 
                 #print  pair_common_neighbor, "TS ", ts05 
                 TS_Feature09 = TS_Feature09 + ts09
                 TS_Feature08 = TS_Feature08 + ts08
                 TS_Feature07 = TS_Feature07 + ts07
                 TS_Feature06 = TS_Feature06 + ts06
                 TS_Feature05 = TS_Feature05 + ts05
                 TS_Feature04 = TS_Feature04 + ts04
                 TS_Feature03 = TS_Feature03 + ts03
                 TS_Feature02 = TS_Feature02 + ts02
                 TS_Feature01 = TS_Feature01 + ts01
                 
                 #print  pair_common_neighbor, "conjuntos de palavras ", bagofWordsNode1, bagofWordsNode2 
                 
                 
                 
                 #print  pair_common_neighbor, "JC ", jcDomain 
                 
                 #dts02 = (hm * decayfunction02) /  (control * ((0.2) ** jcDomain))
                 #dts05 = (hm * decayfunction02) /  (control * ((0.5) ** jcDomain))
                 #dts08 = (hm * decayfunction02) /  (control * ((0.8) ** jcDomain))
                 
                 #print  pair_common_neighbor, "DTS ", dts05 
                 
                 #DTS_Feature02 = DTS_Feature02 + dts02
                 #DTS_Feature05 = DTS_Feature05 + dts05
                 #DTS_Feature08 = DTS_Feature08 + dts08
             
             
             
             
         self.results.append({'node1' : pair[0], 'node2': pair[1], 'TS09' : TS_Feature09, 'TS08' : TS_Feature08, 'TS07' : TS_Feature07, 'TS06' : TS_Feature06, 'TS05' : TS_Feature05,'TS04' : TS_Feature04,'TS03' : TS_Feature03, 'TS02'  : TS_Feature02, 'TS01'  : TS_Feature01 })
Exemple #6
0
'''
Created on Aug 22, 2015

@author: cptullio

First Step is the generation of the graph from the database informations.
We will need the file of parameter to indicate the place where the graph will be saved

'''
from parametering.ParameterUtil import ParameterUtil

from formating.arxiv.Formating import Formating

if __name__ == '__main__':
    util = ParameterUtil(
        parameter_file='data/formatado/duarte/nowell_duarte_2004_2010.txt')
    astroPh = Formating(util.graph_file)
    #astroPh.readingOrginalDataset()
    astroPh.generating_graph()
    astroPh.saveGraph()
    def __init__(self, myparams, nodesnotlinked, weights, WillCombinate):
        self.myparams = myparams
        self.weights = weights
        self.WillCombinate = WillCombinate
        qtyofNodesToProcess = len(nodesnotlinked)
        element = 0
        calcutations = []
        self.results = []
        self.minCN = float(0)
        self.maxCN = float(0)
        self.minAAS = float(0)
        self.maxAAS = float(0)
        self.minJC = float(0)
        self.maxJC = float(0)
        self.minPA = float(0)
        self.maxPA = float(0)
        self.minTS08 = float(0)
        self.maxTS08 = float(0)
        self.minTS05 = float(0)
        self.maxTS05 = float(0)
        self.minTS02 = float(0)
        self.maxTS02 = float(0)

        for pair in nodesnotlinked:
            element = element + 1
            Formating.printProgressofEvents(
                element, qtyofNodesToProcess,
                "Calculating features for nodes not liked: ")
            neighbors_node1 = self.all_neighbors(pair[0])
            neighbors_node2 = self.all_neighbors(pair[1])
            CommonNeigbors = neighbors_node1.intersection(neighbors_node2)
            CommonNeigbors_Feature = len(CommonNeigbors)
            AAS_Feature = 0
            JC_Feature = 0
            PA_Feature = len(neighbors_node1) * len(neighbors_node2)
            TS_Feature08 = float(0)
            TS_Feature05 = float(0)
            TS_Feature02 = float(0)

            DTS_Feature = float(0)

            if CommonNeigbors_Feature > 0:
                print "Calculando ", pair[0], pair[1], CommonNeigbors_Feature
                x = (float)(len(neighbors_node1.union(neighbors_node2)))
                if x > 0:
                    JC_Feature = CommonNeigbors_Feature / x
                for pair_common_neighbor in CommonNeigbors:
                    secondary_neighbors = self.all_neighbors(
                        pair_common_neighbor)
                    AAS_Feature += 1 / (numpy.log10(len(secondary_neighbors)) +
                                        0.00001)

                    objectsNode1 = self.get_ObjectsofLinks(
                        myparams.trainnigGraph, pair[0], pair_common_neighbor)
                    objectsNode2 = self.get_ObjectsofLinks(
                        myparams.trainnigGraph, pair[1], pair_common_neighbor)
                    hm = 2 / ((1 / float(len(objectsNode1))) +
                              (1 / float(len(objectsNode2))))
                    timesofLinksNode1 = []
                    timesofLinksNode2 = []

                    for t1 in objectsNode1:
                        timesofLinksNode1.append(t1['time'])
                    for t2 in objectsNode2:
                        timesofLinksNode2.append(t2['time'])

                    timesofLinksNode1.sort(reverse=True)
                    timesofLinksNode2.sort(reverse=True)
                    timeofLinks = timesofLinksNode1 + timesofLinksNode2
                    k = int(self.myparams.t0_) - int(max(timeofLinks))
                    decayfunction08 = (1 - 0.8)**k
                    decayfunction05 = (1 - 0.5)**k
                    decayfunction02 = (1 - 0.2)**k

                    control = (
                        abs(max(timesofLinksNode1) - max(timesofLinksNode2)) +
                        1)
                    ts08 = (hm * decayfunction08) / control
                    ts05 = (hm * decayfunction05) / control
                    ts02 = (hm * decayfunction02) / control

                    TS_Feature08 = TS_Feature08 + ts08
                    TS_Feature05 = TS_Feature05 + ts05
                    TS_Feature02 = TS_Feature02 + ts02

            if CommonNeigbors_Feature < self.minCN:
                self.minCN = CommonNeigbors_Feature
            if CommonNeigbors_Feature > self.maxCN:
                self.maxCN = CommonNeigbors_Feature

            if AAS_Feature < self.minAAS:
                self.minAAS = AAS_Feature
            if AAS_Feature > self.maxAAS:
                self.maxAAS = CommonNeigbors_Feature

            if PA_Feature < self.minPA:
                self.minPA = PA_Feature
            if PA_Feature > self.maxPA:
                self.maxPA = PA_Feature

            if JC_Feature < self.minJC:
                self.minJC = JC_Feature
            if JC_Feature > self.maxJC:
                self.maxJC = JC_Feature

            if TS_Feature08 < self.minTS08:
                self.minTS08 = TS_Feature08
            if TS_Feature08 > self.maxTS08:
                self.maxTS08 = TS_Feature08

            if TS_Feature05 < self.minTS05:
                self.minTS05 = TS_Feature05
            if TS_Feature05 > self.maxTS05:
                self.maxTS05 = TS_Feature05

            if TS_Feature02 < self.minTS02:
                self.minTS02 = TS_Feature02
            if TS_Feature02 > self.maxTS02:
                self.maxTS02 = TS_Feature02

            calcutations.append({
                'node1': pair[0],
                'node2': pair[1],
                'cn': CommonNeigbors_Feature,
                'aas': AAS_Feature,
                'jc': JC_Feature,
                'pa': PA_Feature,
                'ts08': TS_Feature08,
                'ts05': TS_Feature05,
                'ts02': TS_Feature02,
                'dts': DTS_Feature
            })

        self.combination(calcutations)
def hepth(years):
    astroPh = Formating('/home/cmuniz/execMen/grafos/hepth_data')
    astroPh.subject = 'hep-th'
    astroPh.yearstoRescue = years
    astroPh.readingOrginalDataset()
 def __init__(self, myparams, nodesnotlinked):
     self.myparams = myparams
     qtyofNodesToProcess = len(nodesnotlinked)
     element = 0
     self.results = []
     
     for pair in nodesnotlinked:
         element = element+1
         Formating.printProgressofEvents(element, qtyofNodesToProcess, "Calculating features for nodes not liked: ")
         neighbors_node1 = self.all_neighbors(pair[0])
         neighbors_node2 = self.all_neighbors(pair[1])
         len_neihbors_node1 = len(neighbors_node1)
         len_neihbors_node2 = len(neighbors_node2)
         CommonNeigbors = neighbors_node1.intersection(neighbors_node2)
         CommonNeigbors_Feature = len(CommonNeigbors)
         AAS_Feature = 0
         JC_Feature = 0
         PA_Feature = len_neihbors_node1 * len_neihbors_node2
         TS_Feature08 = float(0)
         TS_Feature05 = float(0)
         TS_Feature02 = float(0)
         
         DTS_Feature02 = float(0)
         DTS_Feature05 = float(0)
         DTS_Feature08 = float(0)
         
         if CommonNeigbors_Feature > 0:
             print "Calculando ", pair[0], pair[1], CommonNeigbors_Feature
             x = (float)(len(neighbors_node1.union(neighbors_node2)))
             if x > 0:
                 JC_Feature = CommonNeigbors_Feature/x
             for pair_common_neighbor in CommonNeigbors:
                 secondary_neighbors = self.all_neighbors(pair_common_neighbor)
                 AAS_Feature += 1 / (numpy.log10(len(secondary_neighbors)) + 0.00001)
             
                 objectsNode1 = self.get_ObjectsofLinks(myparams.trainnigGraph, pair[0], pair_common_neighbor)
                 objectsNode2 = self.get_ObjectsofLinks(myparams.trainnigGraph, pair[1], pair_common_neighbor)
                 
                 hm = 2 / ( (1/float(len(objectsNode1))) + (1/float(len(objectsNode2))))
                 #print pair[0], pair_common_neighbor, "Media Harmonica ", hm 
                 
                 timesofLinksNode1 = []
                 timesofLinksNode2 = []
                 bagofWordsNode1 = set()
                 bagofWordsNode2 = set()
         
                 for t1 in objectsNode1:
                     timesofLinksNode1.append(t1['time'])
                     for b1 in eval(t1['keywords']):
                         bagofWordsNode1.add(b1)
                 for t2 in objectsNode2:
                     timesofLinksNode2.append(t2['time'])
                     for b2 in eval(t2['keywords']):
                         bagofWordsNode2.add(b2)
                    
         
                 timesofLinksNode1.sort(reverse=True)
                 timesofLinksNode2.sort(reverse=True)
                 timeofLinks = timesofLinksNode1 + timesofLinksNode2
                 
                 #print  pair_common_neighbor, "publicacoes realizadas: ", timeofLinks 
                 
                 k =  int(self.myparams.t0_)  - int(max(timeofLinks))
                 
                 #print  pair_common_neighbor, "K ", k 
                 
                 decayfunction08 = (0.8) ** k
                 decayfunction05 = (0.5) ** k
                 decayfunction02 = (0.2) ** k
                 
                 #print  pair_common_neighbor, "funcao de decaimento 0.5 ", decayfunction05 
                 
                 
                 control = ( abs( max(timesofLinksNode1) - max(timesofLinksNode2) ) + 1)
                 
                 #print  pair_common_neighbor, "denominador inicial considerando apenas TS ", control 
                 
                 ts08 = (hm * decayfunction08) / control
                 ts05 = (hm * decayfunction05) / control
                 ts02 = (hm * decayfunction02) / control
                 
                 #print  pair_common_neighbor, "TS ", ts05 
                 
                 TS_Feature08 = TS_Feature08 + ts08
                 TS_Feature05 = TS_Feature05 + ts05
                 TS_Feature02 = TS_Feature02 + ts02
                 
                 #print  pair_common_neighbor, "conjuntos de palavras ", bagofWordsNode1, bagofWordsNode2 
                 
                 jcDomain = self.get_jacard_domain(bagofWordsNode1, bagofWordsNode2)
                 
                 #print  pair_common_neighbor, "JC ", jcDomain 
                 
                 dts02 = (hm * decayfunction02) /  (control * ((0.2) ** jcDomain))
                 dts05 = (hm * decayfunction02) /  (control * ((0.5) ** jcDomain))
                 dts08 = (hm * decayfunction02) /  (control * ((0.8) ** jcDomain))
                 
                 #print  pair_common_neighbor, "DTS ", dts05 
                 
                 DTS_Feature02 = DTS_Feature02 + dts02
                 DTS_Feature05 = DTS_Feature05 + dts05
                 DTS_Feature08 = DTS_Feature08 + dts08
             
             
             
             
         self.results.append({'node1' : pair[0], 'node2': pair[1], 'cn' : CommonNeigbors_Feature, 'aas' : AAS_Feature, 'jc' : JC_Feature, 'pa' : PA_Feature, 'ts08' : TS_Feature08,'ts05' : TS_Feature05,'ts02' : TS_Feature02, 'dts08'  : DTS_Feature08, 'dts05'  : DTS_Feature05, 'dts02'  : DTS_Feature02 })
def grqc(years):
    astroPh = Formating('/home/cmuniz/execMen/grafos/grqc_data')
    astroPh.subject = 'gr-qc'
    astroPh.yearstoRescue = years
    astroPh.readingOrginalDataset()
 def __init__(self, myparams, nodesnotlinked):
     self.myparams = myparams
     qtyofNodesToProcess = len(nodesnotlinked)
     element = 0
     self.results = []
     
     for pair in nodesnotlinked:
         element = element+1
         Formating.printProgressofEvents(element, qtyofNodesToProcess, "Calculating features for nodes not liked: ")
         neighbors_node1 = self.all_neighbors(pair[0])
         neighbors_node2 = self.all_neighbors(pair[1])
         CommonNeigbors = neighbors_node1.intersection(neighbors_node2)
         CommonNeigbors_Feature = len(CommonNeigbors)
         AAS_Feature = 0
         JC_Feature = 0
         PA_Feature = len(neighbors_node1) * len(neighbors_node2)
         TS_Feature08 = float(0)
         TS_Feature05 = float(0)
         TS_Feature02 = float(0)
         
         DTS_Feature = float(0)
         
         if CommonNeigbors_Feature > 0:
             print "Calculando ", pair[0], pair[1], CommonNeigbors_Feature
             x = (float)(len(neighbors_node1.union(neighbors_node2)))
             if x > 0:
                 JC_Feature = CommonNeigbors_Feature/x
             for pair_common_neighbor in CommonNeigbors:
                 secondary_neighbors = self.all_neighbors(pair_common_neighbor)
                 AAS_Feature += 1 / (numpy.log10(len(secondary_neighbors)) + 0.00001)
             
                 objectsNode1 = self.get_ObjectsofLinks(myparams.trainnigGraph, pair[0], pair_common_neighbor)
                 objectsNode2 = self.get_ObjectsofLinks(myparams.trainnigGraph, pair[1], pair_common_neighbor)
                 hm = 2 / ((1/float(len(objectsNode1))) + (1/float(len(objectsNode2))))
                 timesofLinksNode1 = []
                 timesofLinksNode2 = []
                 bagofWordsNode1 = set()
                 bagofWordsNode2 = set()
         
                 for t1 in objectsNode1:
                     timesofLinksNode1.append(t1['time'])
                 #for bt1 in t1['keywords']:
                 #    bagofWordsNode1.add(bt1)
                 for t2 in objectsNode2:
                     timesofLinksNode2.append(t2['time'])
                 #    for bt2 in t2['keywords']:
                 #        bagofWordsNode2.add(bt2)
         
                 timesofLinksNode1.sort(reverse=True)
                 timesofLinksNode2.sort(reverse=True)
                 timeofLinks = timesofLinksNode1 + timesofLinksNode2
                 k =  int(self.myparams.t0_)  - int(max(timeofLinks))
                 decayfunction08 = (1 - 0.8) ** k
                 decayfunction05 = (1 - 0.5) ** k
                 decayfunction02 = (1 - 0.2) ** k
                 
                 control = (abs( max(timesofLinksNode1) - max(timesofLinksNode2) ) + 1)
                 ts08 = (hm * decayfunction08) / control
                 ts05 = (hm * decayfunction05) / control
                 ts02 = (hm * decayfunction02) / control
                 
                 TS_Feature08 = TS_Feature08 + ts08
                 TS_Feature05 = TS_Feature05 + ts05
                 TS_Feature02 = TS_Feature02 + ts02
                 
                 
                 #dts = (hm * decayfunction) /  (control * ((1 - self.myparams.domain_decay) ** self.get_jacard_domain(bagofWordsNode1, bagofWordsNode2)))
                 #DTS_Feature = DTS_Feature + dts
             
         self.results.append({'node1' : pair[0], 'node2': pair[1], 'cn' : CommonNeigbors_Feature, 'aas' : AAS_Feature, 'jc' : JC_Feature, 'pa' : PA_Feature, 'ts08' : TS_Feature08,'ts05' : TS_Feature05,'ts02' : TS_Feature02, 'dts'  : DTS_Feature })
def hepph(years):
    astroPh = Formating('/grafos/hepph_data')
    astroPh.subject = 'hep-ph'
    astroPh.yearstoRescue = years
    astroPh.readingOrginalDataset()
 def __init__(self, myparams, nodesnotlinked, weights, WillCombinate):
     self.myparams = myparams
     self.weights = weights
     self.WillCombinate = WillCombinate
     qtyofNodesToProcess = len(nodesnotlinked)
     element = 0
     calcutations = []
     self.results = []
     self.minCN = float(0)
     self.maxCN = float(0)
     self.minAAS = float(0)
     self.maxAAS = float(0)
     self.minJC = float(0)
     self.maxJC = float(0)
     self.minPA = float(0)
     self.maxPA = float(0)
     self.minTS08 = float(0)
     self.maxTS08 = float(0)
     self.minTS05 = float(0)
     self.maxTS05 = float(0)
     self.minTS02 = float(0)
     self.maxTS02 = float(0)
     
     
     
     for pair in nodesnotlinked:
         element = element+1
         Formating.printProgressofEvents(element, qtyofNodesToProcess, "Calculating features for nodes not liked: ")
         neighbors_node1 = self.all_neighbors(pair[0])
         neighbors_node2 = self.all_neighbors(pair[1])
         CommonNeigbors = neighbors_node1.intersection(neighbors_node2)
         CommonNeigbors_Feature = len(CommonNeigbors)
         AAS_Feature = 0
         JC_Feature = 0
         PA_Feature = len(neighbors_node1) * len(neighbors_node2)
         TS_Feature08 = float(0)
         TS_Feature05 = float(0)
         TS_Feature02 = float(0)
         
         DTS_Feature = float(0)
         
         if CommonNeigbors_Feature > 0:
             print "Calculando ", pair[0], pair[1], CommonNeigbors_Feature
             x = (float)(len(neighbors_node1.union(neighbors_node2)))
             if x > 0:
                 JC_Feature = CommonNeigbors_Feature/x
             for pair_common_neighbor in CommonNeigbors:
                 secondary_neighbors = self.all_neighbors(pair_common_neighbor)
                 AAS_Feature += 1 / (numpy.log10(len(secondary_neighbors)) + 0.00001)
             
                 objectsNode1 = self.get_ObjectsofLinks(myparams.trainnigGraph, pair[0], pair_common_neighbor)
                 objectsNode2 = self.get_ObjectsofLinks(myparams.trainnigGraph, pair[1], pair_common_neighbor)
                 hm = 2 / ((1/float(len(objectsNode1))) + (1/float(len(objectsNode2))))
                 timesofLinksNode1 = []
                 timesofLinksNode2 = []
               
                 for t1 in objectsNode1:
                     timesofLinksNode1.append(t1['time'])
                 for t2 in objectsNode2:
                     timesofLinksNode2.append(t2['time'])
               
                 timesofLinksNode1.sort(reverse=True)
                 timesofLinksNode2.sort(reverse=True)
                 timeofLinks = timesofLinksNode1 + timesofLinksNode2
                 k =  int(self.myparams.t0_)  - int(max(timeofLinks))
                 decayfunction08 = (1 - 0.8) ** k
                 decayfunction05 = (1 - 0.5) ** k
                 decayfunction02 = (1 - 0.2) ** k
                 
                 control = (abs( max(timesofLinksNode1) - max(timesofLinksNode2) ) + 1)
                 ts08 = (hm * decayfunction08) / control
                 ts05 = (hm * decayfunction05) / control
                 ts02 = (hm * decayfunction02) / control
                 
                 TS_Feature08 = TS_Feature08 + ts08
                 TS_Feature05 = TS_Feature05 + ts05
                 TS_Feature02 = TS_Feature02 + ts02
                 
                 
                 
          
         if CommonNeigbors_Feature < self.minCN:
             self.minCN = CommonNeigbors_Feature
         if CommonNeigbors_Feature > self.maxCN:
             self.maxCN = CommonNeigbors_Feature
         
         if AAS_Feature < self.minAAS:
             self.minAAS = AAS_Feature
         if AAS_Feature > self.maxAAS:
             self.maxAAS = CommonNeigbors_Feature
         
         if PA_Feature < self.minPA:
             self.minPA = PA_Feature
         if PA_Feature > self.maxPA:
             self.maxPA = PA_Feature
         
         if JC_Feature < self.minJC:
             self.minJC = JC_Feature
         if JC_Feature > self.maxJC:
             self.maxJC = JC_Feature
         
         if TS_Feature08 < self.minTS08:
             self.minTS08 = TS_Feature08
         if TS_Feature08 > self.maxTS08:
             self.maxTS08 = TS_Feature08
         
         if TS_Feature05 < self.minTS05:
             self.minTS05 = TS_Feature05
         if TS_Feature05 > self.maxTS05:
             self.maxTS05 = TS_Feature05
         
         if TS_Feature02 < self.minTS02:
             self.minTS02 = TS_Feature02
         if TS_Feature02 > self.maxTS02:
             self.maxTS02 = TS_Feature02
                 
         calcutations.append({'node1' : pair[0], 'node2': pair[1], 'cn' : CommonNeigbors_Feature, 'aas' : AAS_Feature, 'jc' : JC_Feature, 'pa' : PA_Feature, 'ts08' : TS_Feature08,'ts05' : TS_Feature05,'ts02' : TS_Feature02, 'dts'  : DTS_Feature })
     
     self.combination(calcutations)    
             
     
 def saveResults(self, filepath, nodesNotLinked):
     myfile = Formating.get_abs_file_path(filepath)
     with open(myfile, 'w') as fileNodesNotLinked:
         for nodeNotLinked in nodesNotLinked:
             fileNodesNotLinked.write(nodeNotLinked[0] + ',' +  nodeNotLinked[1] + '\n')
         fileNodesNotLinked.close()
'''
Created on Aug 22, 2015

@author: cptullio

First Step is the generation of the graph from the database informations.
We will need the file of parameter to indicate the place where the graph will be saved

'''
from parametering.ParameterUtil import ParameterUtil

from formating.arxiv.Formating import Formating

if __name__ == '__main__':
    util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_condmat_2004_2010.txt')
    astroPh = Formating(util.graph_file)
    #astroPh.readingOrginalDataset()
    astroPh.generating_graph()
    astroPh.saveGraph()
def astroph(years):
    astroPh = Formating('/home/cmuniz/execMen/grafos/astroph_data')
    astroPh.subject = 'astro-ph'
    astroPh.yearstoRescue = years
    astroPh.readingOrginalDataset()
    def __init__(self, myparams, nodesnotlinked):
        self.myparams = myparams
        qtyofNodesToProcess = len(nodesnotlinked)
        element = 0
        self.results = []

        for pair in nodesnotlinked:
            element = element + 1
            Formating.printProgressofEvents(
                element, qtyofNodesToProcess,
                "Calculating features for nodes not liked: ")
            neighbors_node1 = self.all_neighbors(pair[0])
            neighbors_node2 = self.all_neighbors(pair[1])
            len_neihbors_node1 = len(neighbors_node1)
            len_neihbors_node2 = len(neighbors_node2)
            CommonNeigbors = neighbors_node1.intersection(neighbors_node2)
            CommonNeigbors_Feature = len(CommonNeigbors)
            TS_Feature09 = float(0)
            TS_Feature08 = float(0)
            TS_Feature07 = float(0)
            TS_Feature06 = float(0)
            TS_Feature05 = float(0)
            TS_Feature04 = float(0)
            TS_Feature03 = float(0)
            TS_Feature02 = float(0)
            TS_Feature01 = float(0)

            if CommonNeigbors_Feature > 0:
                print "Calculando ", pair[0], pair[1], CommonNeigbors_Feature
                #x = (float)(len(neighbors_node1.union(neighbors_node2)))
                #if x > 0:
                #    JC_Feature = CommonNeigbors_Feature/x
                for pair_common_neighbor in CommonNeigbors:
                    #secondary_neighbors = self.all_neighbors(pair_common_neighbor)
                    #AAS_Feature += 1 / (numpy.log10(len(secondary_neighbors)) + 0.00001)

                    objectsNode1 = self.get_ObjectsofLinks(
                        myparams.trainnigGraph, pair[0], pair_common_neighbor)
                    objectsNode2 = self.get_ObjectsofLinks(
                        myparams.trainnigGraph, pair[1], pair_common_neighbor)

                    hm = 2 / ((1 / float(len(objectsNode1))) +
                              (1 / float(len(objectsNode2))))
                    #print pair[0], pair_common_neighbor, "Media Harmonica ", hm

                    timesofLinksNode1 = []
                    timesofLinksNode2 = []
                    bagofWordsNode1 = set()
                    bagofWordsNode2 = set()

                    for t1 in objectsNode1:
                        timesofLinksNode1.append(t1['time'])
                        for b1 in eval(t1['keywords']):
                            bagofWordsNode1.add(b1)
                    for t2 in objectsNode2:
                        timesofLinksNode2.append(t2['time'])
                        for b2 in eval(t2['keywords']):
                            bagofWordsNode2.add(b2)

                    timesofLinksNode1.sort(reverse=True)
                    timesofLinksNode2.sort(reverse=True)
                    timeofLinks = timesofLinksNode1 + timesofLinksNode2

                    #print  pair_common_neighbor, "publicacoes realizadas: ", timeofLinks

                    k = int(self.myparams.t0_) - int(max(timeofLinks))

                    #print  pair_common_neighbor, "K ", k
                    decayfunction09 = (0.9)**k
                    decayfunction08 = (0.8)**k
                    decayfunction07 = (0.7)**k
                    decayfunction06 = (0.6)**k
                    decayfunction05 = (0.5)**k
                    decayfunction04 = (0.4)**k
                    decayfunction03 = (0.3)**k
                    decayfunction02 = (0.2)**k
                    decayfunction01 = (0.1)**k

                    #print  pair_common_neighbor, "funcao de decaimento 0.5 ", decayfunction05

                    control = (
                        abs(max(timesofLinksNode1) - max(timesofLinksNode2)) +
                        1)

                    #print  pair_common_neighbor, "denominador inicial considerando apenas TS ", control
                    ts09 = (hm * decayfunction09) / control
                    ts08 = (hm * decayfunction08) / control
                    ts07 = (hm * decayfunction07) / control
                    ts06 = (hm * decayfunction06) / control
                    ts05 = (hm * decayfunction05) / control
                    ts04 = (hm * decayfunction04) / control
                    ts03 = (hm * decayfunction03) / control
                    ts02 = (hm * decayfunction02) / control
                    ts01 = (hm * decayfunction01) / control

                    #print  pair_common_neighbor, "TS ", ts05
                    TS_Feature09 = TS_Feature09 + ts09
                    TS_Feature08 = TS_Feature08 + ts08
                    TS_Feature07 = TS_Feature07 + ts07
                    TS_Feature06 = TS_Feature06 + ts06
                    TS_Feature05 = TS_Feature05 + ts05
                    TS_Feature04 = TS_Feature04 + ts04
                    TS_Feature03 = TS_Feature03 + ts03
                    TS_Feature02 = TS_Feature02 + ts02
                    TS_Feature01 = TS_Feature01 + ts01

                    #print  pair_common_neighbor, "conjuntos de palavras ", bagofWordsNode1, bagofWordsNode2

                    #jcDomain = self.get_jacard_domain(bagofWordsNode1, bagofWordsNode2)

                    #print  pair_common_neighbor, "JC ", jcDomain

                    #dts02 = (hm * decayfunction02) /  (control * ((0.2) ** jcDomain))
                    #dts05 = (hm * decayfunction02) /  (control * ((0.5) ** jcDomain))
                    #dts08 = (hm * decayfunction02) /  (control * ((0.8) ** jcDomain))

                    #print  pair_common_neighbor, "DTS ", dts05

                    #DTS_Feature02 = DTS_Feature02 + dts02
                    #DTS_Feature05 = DTS_Feature05 + dts05
                    #DTS_Feature08 = DTS_Feature08 + dts08

            self.results.append({
                'node1': pair[0],
                'node2': pair[1],
                'TS09': TS_Feature09,
                'TS08': TS_Feature08,
                'TS07': TS_Feature07,
                'TS06': TS_Feature06,
                'TS05': TS_Feature05,
                'TS04': TS_Feature04,
                'TS03': TS_Feature03,
                'TS02': TS_Feature02,
                'TS01': TS_Feature01
            })
def condmat(years):
    astroPh = Formating('/home/cmuniz/execMen/grafos/condmat_data')
    astroPh.subject = 'cond-mat'
    astroPh.yearstoRescue = years
    astroPh.readingOrginalDataset()
Exemple #19
0
    def __init__(self, myparams, nodesnotlinked):
        self.myparams = myparams
        qtyofNodesToProcess = len(nodesnotlinked)
        element = 0
        self.results = []

        for pair in nodesnotlinked:
            element = element + 1
            Formating.printProgressofEvents(
                element, qtyofNodesToProcess,
                "Calculating features for nodes not liked: ")
            neighbors_node1 = self.all_neighbors(pair[0])
            neighbors_node2 = self.all_neighbors(pair[1])
            len_neihbors_node1 = len(neighbors_node1)
            len_neihbors_node2 = len(neighbors_node2)
            CommonNeigbors = neighbors_node1.intersection(neighbors_node2)
            CommonNeigbors_Feature = len(CommonNeigbors)
            AAS_Feature = 0
            JC_Feature = 0
            PA_Feature = len_neihbors_node1 * len_neihbors_node2
            TS_Feature08 = float(0)
            TS_Feature05 = float(0)
            TS_Feature02 = float(0)

            DTS_Feature02 = float(0)
            DTS_Feature05 = float(0)
            DTS_Feature08 = float(0)

            if CommonNeigbors_Feature > 0:
                print "Calculando ", pair[0], pair[1], CommonNeigbors_Feature
                x = (float)(len(neighbors_node1.union(neighbors_node2)))
                if x > 0:
                    JC_Feature = CommonNeigbors_Feature / x
                for pair_common_neighbor in CommonNeigbors:
                    secondary_neighbors = self.all_neighbors(
                        pair_common_neighbor)
                    AAS_Feature += 1 / (numpy.log10(len(secondary_neighbors)) +
                                        0.00001)

                    objectsNode1 = self.get_ObjectsofLinks(
                        myparams.trainnigGraph, pair[0], pair_common_neighbor)
                    objectsNode2 = self.get_ObjectsofLinks(
                        myparams.trainnigGraph, pair[1], pair_common_neighbor)

                    hm = 2 / ((1 / float(len(objectsNode1))) +
                              (1 / float(len(objectsNode2))))
                    #print pair[0], pair_common_neighbor, "Media Harmonica ", hm

                    timesofLinksNode1 = []
                    timesofLinksNode2 = []
                    bagofWordsNode1 = set()
                    bagofWordsNode2 = set()

                    for t1 in objectsNode1:
                        timesofLinksNode1.append(t1['time'])
                        for b1 in eval(t1['keywords']):
                            bagofWordsNode1.add(b1)
                    for t2 in objectsNode2:
                        timesofLinksNode2.append(t2['time'])
                        for b2 in eval(t2['keywords']):
                            bagofWordsNode2.add(b2)

                    timesofLinksNode1.sort(reverse=True)
                    timesofLinksNode2.sort(reverse=True)
                    timeofLinks = timesofLinksNode1 + timesofLinksNode2

                    #print  pair_common_neighbor, "publicacoes realizadas: ", timeofLinks

                    k = int(self.myparams.t0_) - int(max(timeofLinks))

                    #print  pair_common_neighbor, "K ", k

                    decayfunction08 = (0.8)**k
                    decayfunction05 = (0.5)**k
                    decayfunction02 = (0.2)**k

                    #print  pair_common_neighbor, "funcao de decaimento 0.5 ", decayfunction05

                    control = (
                        abs(max(timesofLinksNode1) - max(timesofLinksNode2)) +
                        1)

                    #print  pair_common_neighbor, "denominador inicial considerando apenas TS ", control

                    ts08 = (hm * decayfunction08) / control
                    ts05 = (hm * decayfunction05) / control
                    ts02 = (hm * decayfunction02) / control

                    #print  pair_common_neighbor, "TS ", ts05

                    TS_Feature08 = TS_Feature08 + ts08
                    TS_Feature05 = TS_Feature05 + ts05
                    TS_Feature02 = TS_Feature02 + ts02

                    #print  pair_common_neighbor, "conjuntos de palavras ", bagofWordsNode1, bagofWordsNode2

                    jcDomain = self.get_jacard_domain(bagofWordsNode1,
                                                      bagofWordsNode2)

                    #print  pair_common_neighbor, "JC ", jcDomain

                    dts02 = (hm * decayfunction02) / (control *
                                                      ((0.2)**jcDomain))
                    dts05 = (hm * decayfunction02) / (control *
                                                      ((0.5)**jcDomain))
                    dts08 = (hm * decayfunction02) / (control *
                                                      ((0.8)**jcDomain))

                    #print  pair_common_neighbor, "DTS ", dts05

                    DTS_Feature02 = DTS_Feature02 + dts02
                    DTS_Feature05 = DTS_Feature05 + dts05
                    DTS_Feature08 = DTS_Feature08 + dts08

            self.results.append({
                'node1': pair[0],
                'node2': pair[1],
                'cn': CommonNeigbors_Feature,
                'aas': AAS_Feature,
                'jc': JC_Feature,
                'pa': PA_Feature,
                'ts08': TS_Feature08,
                'ts05': TS_Feature05,
                'ts02': TS_Feature02,
                'dts08': DTS_Feature08,
                'dts05': DTS_Feature05,
                'dts02': DTS_Feature02
            })
Exemple #20
0
'''
Created on Aug 22, 2015

@author: cptullio

First Step is the generation of the graph from the database informations.
We will need the file of parameter to indicate the place where the graph will be saved

'''
from parametering.ParameterUtil import ParameterUtil

from formating.arxiv.Formating import Formating

if __name__ == '__main__':
    util = ParameterUtil(
        parameter_file=
        'data/configuration/arxiv/exemplo_1994_1999/CombinationLinear/configToAG.txt'
    )
    #util = ParameterUtil(parameter_file = 'data/configuration/arxiv/condmat_1994_1999/MetricaTemporal/config.txt')

    #myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_,
    #                        filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None)

    astroPh = Formating(util.graph_file)
    astroPh.subject = 'cond-mat'
    #astroPh.yearstoRescue = [1993]
    astroPh.yearstoRescue = [1994, 1995, 1996, 1997, 1998, 1999]
    #astroPh.yearstoRescue = [2004,2005,2006,2007,2008,2009, 2010, 2011, 2012]
    #astroPh.readingOrginalDataset()
    astroPh.generating_graph()
    astroPh.saveGraph()