def step01(paramFile): util = ParameterUtil(parameter_file=paramFile) astroPh = Formating(util.graph_file) astroPh.subject = 'cond-mat' astroPh.yearstoRescue = [1994, 1995, 1996, 1997, 1998, 1999] astroPh.readingOrginalDataset() #astroPh.generating_graph() astroPh.saveGraph()
def readingResultsFile(self, filepath): results = [] myfile = Formating.get_abs_file_path(filepath) with open(myfile, 'r') as fileNodesNotLinked: for lineofFile in fileNodesNotLinked: nodenotllinked = lineofFile.replace('\n', '').split(',') results.append([nodenotllinked[0],nodenotllinked[1]]) fileNodesNotLinked.close() return results
def saving_orderedResult(self, filepath, ordering): fcn = open(Formating.get_abs_file_path(filepath + '.cn.txt'), 'w') fpa = open(Formating.get_abs_file_path(filepath + '.pa.txt'), 'w') faas = open(Formating.get_abs_file_path(filepath + '.aas.txt'), 'w') fjc = open(Formating.get_abs_file_path(filepath + '.jc.txt'), 'w') fts05 = open(Formating.get_abs_file_path(filepath + '.ts05.txt'), 'w') fdts05 = open(Formating.get_abs_file_path(filepath + '.dts05.txt'), 'w') for item_result in ordering: fcn.write( repr(item_result['cn']['node1']) + ';' + repr(item_result['cn']['node2']) + ';' + repr(item_result['cn']['cn']) + '\n') faas.write( repr(item_result['aas']['node1']) + ';' + repr(item_result['aas']['node2']) + ';' + repr(item_result['aas']['aas']) + '\n') fjc.write( repr(item_result['jc']['node1']) + ';' + repr(item_result['jc']['node2']) + ';' + repr(item_result['jc']['jc']) + '\n') fpa.write( repr(item_result['pa']['node1']) + ';' + repr(item_result['pa']['node2']) + ';' + repr(item_result['pa']['pa']) + '\n') fts05.write( repr(item_result['ts05']['node1']) + ';' + repr(item_result['ts05']['node2']) + ';' + repr(item_result['ts05']['ts05']) + '\n') fdts05.write( repr(item_result['dts05']['node1']) + ';' + repr(item_result['dts05']['node2']) + ';' + repr(item_result['dts05']['dts05']) + '\n') fcn.close() fpa.close() faas.close() fjc.close() fts05.close() fdts05.close()
def saving_orderedResult(self, filepath, ordering): fcn = open(Formating.get_abs_file_path(filepath + '.cn.txt') , 'w') fpa = open(Formating.get_abs_file_path(filepath + '.pa.txt') , 'w') faas = open(Formating.get_abs_file_path(filepath + '.aas.txt') , 'w') fjc = open(Formating.get_abs_file_path(filepath + '.jc.txt') , 'w') fts05 = open(Formating.get_abs_file_path(filepath + '.ts05.txt') , 'w') fdts05 = open(Formating.get_abs_file_path(filepath + '.dts05.txt') , 'w') for item_result in ordering: fcn.write(repr(item_result['cn']['node1']) + ';' + repr(item_result['cn']['node2']) +';' + repr(item_result['cn']['cn']) + '\n') faas.write(repr(item_result['aas']['node1']) + ';' + repr(item_result['aas']['node2']) +';' + repr(item_result['aas']['aas']) + '\n') fjc.write(repr(item_result['jc']['node1']) + ';' + repr(item_result['jc']['node2']) +';' + repr(item_result['jc']['jc']) + '\n') fpa.write(repr(item_result['pa']['node1']) + ';' + repr(item_result['pa']['node2']) +';' + repr(item_result['pa']['pa']) + '\n') fts05.write(repr(item_result['ts05']['node1']) + ';' + repr(item_result['ts05']['node2']) +';' + repr(item_result['ts05']['ts05']) + '\n') fdts05.write(repr(item_result['dts05']['node1']) + ';' + repr(item_result['dts05']['node2']) +';' + repr(item_result['dts05']['dts05']) + '\n') fcn.close() fpa.close() faas.close() fjc.close() fts05.close() fdts05.close()
def __init__(self, myparams, nodesnotlinked): self.myparams = myparams qtyofNodesToProcess = len(nodesnotlinked) element = 0 self.results = [] for pair in nodesnotlinked: element = element+1 Formating.printProgressofEvents(element, qtyofNodesToProcess, "Calculating features for nodes not liked: ") neighbors_node1 = self.all_neighbors(pair[0]) neighbors_node2 = self.all_neighbors(pair[1]) len_neihbors_node1 = len(neighbors_node1) len_neihbors_node2 = len(neighbors_node2) CommonNeigbors = neighbors_node1.intersection(neighbors_node2) CommonNeigbors_Feature = len(CommonNeigbors) TS_Feature09 = float(0) TS_Feature08 = float(0) TS_Feature07 = float(0) TS_Feature06 = float(0) TS_Feature05 = float(0) TS_Feature04 = float(0) TS_Feature03 = float(0) TS_Feature02 = float(0) TS_Feature01 = float(0) if CommonNeigbors_Feature > 0: print "Calculando ", pair[0], pair[1], CommonNeigbors_Feature #x = (float)(len(neighbors_node1.union(neighbors_node2))) #if x > 0: # JC_Feature = CommonNeigbors_Feature/x for pair_common_neighbor in CommonNeigbors: #secondary_neighbors = self.all_neighbors(pair_common_neighbor) #AAS_Feature += 1 / (numpy.log10(len(secondary_neighbors)) + 0.00001) objectsNode1 = self.get_ObjectsofLinks(myparams.trainnigGraph, pair[0], pair_common_neighbor) objectsNode2 = self.get_ObjectsofLinks(myparams.trainnigGraph, pair[1], pair_common_neighbor) hm = 2 / ( (1/float(len(objectsNode1))) + (1/float(len(objectsNode2)))) #print pair[0], pair_common_neighbor, "Media Harmonica ", hm timesofLinksNode1 = [] timesofLinksNode2 = [] bagofWordsNode1 = set() bagofWordsNode2 = set() for t1 in objectsNode1: timesofLinksNode1.append(t1['time']) for b1 in eval(t1['keywords']): bagofWordsNode1.add(b1) for t2 in objectsNode2: timesofLinksNode2.append(t2['time']) for b2 in eval(t2['keywords']): bagofWordsNode2.add(b2) timesofLinksNode1.sort(reverse=True) timesofLinksNode2.sort(reverse=True) timeofLinks = timesofLinksNode1 + timesofLinksNode2 #print pair_common_neighbor, "publicacoes realizadas: ", timeofLinks k = int(self.myparams.t0_) - int(max(timeofLinks)) #print pair_common_neighbor, "K ", k #decayfunction09 = (0.9) ** k #decayfunction08 = (0.8) ** k #decayfunction07 = (0.7) ** k #decayfunction06 = (0.6) ** k #decayfunction05 = (0.5) ** k decayfunction04 = (0.9) ** k #decayfunction03 = (0.3) ** k #decayfunction02 = (0.2) ** k #decayfunction01 = (0.1) ** k #print pair_common_neighbor, "funcao de decaimento 0.5 ", decayfunction05 control = ( abs( max(timesofLinksNode1) - max(timesofLinksNode2) ) + 1) jcDomain = self.get_jacard_domain(bagofWordsNode1, bagofWordsNode2) #print pair_common_neighbor, "denominador inicial considerando apenas TS ", control ts09 = (hm * decayfunction04) / (control * ((0.9) ** jcDomain)) ts08 = (hm * decayfunction04) / (control * ((0.8) ** jcDomain)) ts07 = (hm * decayfunction04) / (control * ((0.7) ** jcDomain)) ts06 = (hm * decayfunction04) / (control * ((0.6) ** jcDomain)) ts05 = (hm * decayfunction04) / (control * ((0.5) ** jcDomain)) ts04 = (hm * decayfunction04) / (control * ((0.4) ** jcDomain)) ts03 = (hm * decayfunction04) / (control * ((0.3) ** jcDomain)) ts02 = (hm * decayfunction04) / (control * ((0.2) ** jcDomain)) ts01 = (hm * decayfunction04) / (control * ((0.1) ** jcDomain)) #print pair_common_neighbor, "TS ", ts05 TS_Feature09 = TS_Feature09 + ts09 TS_Feature08 = TS_Feature08 + ts08 TS_Feature07 = TS_Feature07 + ts07 TS_Feature06 = TS_Feature06 + ts06 TS_Feature05 = TS_Feature05 + ts05 TS_Feature04 = TS_Feature04 + ts04 TS_Feature03 = TS_Feature03 + ts03 TS_Feature02 = TS_Feature02 + ts02 TS_Feature01 = TS_Feature01 + ts01 #print pair_common_neighbor, "conjuntos de palavras ", bagofWordsNode1, bagofWordsNode2 #print pair_common_neighbor, "JC ", jcDomain #dts02 = (hm * decayfunction02) / (control * ((0.2) ** jcDomain)) #dts05 = (hm * decayfunction02) / (control * ((0.5) ** jcDomain)) #dts08 = (hm * decayfunction02) / (control * ((0.8) ** jcDomain)) #print pair_common_neighbor, "DTS ", dts05 #DTS_Feature02 = DTS_Feature02 + dts02 #DTS_Feature05 = DTS_Feature05 + dts05 #DTS_Feature08 = DTS_Feature08 + dts08 self.results.append({'node1' : pair[0], 'node2': pair[1], 'TS09' : TS_Feature09, 'TS08' : TS_Feature08, 'TS07' : TS_Feature07, 'TS06' : TS_Feature06, 'TS05' : TS_Feature05,'TS04' : TS_Feature04,'TS03' : TS_Feature03, 'TS02' : TS_Feature02, 'TS01' : TS_Feature01 })
''' Created on Aug 22, 2015 @author: cptullio First Step is the generation of the graph from the database informations. We will need the file of parameter to indicate the place where the graph will be saved ''' from parametering.ParameterUtil import ParameterUtil from formating.arxiv.Formating import Formating if __name__ == '__main__': util = ParameterUtil( parameter_file='data/formatado/duarte/nowell_duarte_2004_2010.txt') astroPh = Formating(util.graph_file) #astroPh.readingOrginalDataset() astroPh.generating_graph() astroPh.saveGraph()
def __init__(self, myparams, nodesnotlinked, weights, WillCombinate): self.myparams = myparams self.weights = weights self.WillCombinate = WillCombinate qtyofNodesToProcess = len(nodesnotlinked) element = 0 calcutations = [] self.results = [] self.minCN = float(0) self.maxCN = float(0) self.minAAS = float(0) self.maxAAS = float(0) self.minJC = float(0) self.maxJC = float(0) self.minPA = float(0) self.maxPA = float(0) self.minTS08 = float(0) self.maxTS08 = float(0) self.minTS05 = float(0) self.maxTS05 = float(0) self.minTS02 = float(0) self.maxTS02 = float(0) for pair in nodesnotlinked: element = element + 1 Formating.printProgressofEvents( element, qtyofNodesToProcess, "Calculating features for nodes not liked: ") neighbors_node1 = self.all_neighbors(pair[0]) neighbors_node2 = self.all_neighbors(pair[1]) CommonNeigbors = neighbors_node1.intersection(neighbors_node2) CommonNeigbors_Feature = len(CommonNeigbors) AAS_Feature = 0 JC_Feature = 0 PA_Feature = len(neighbors_node1) * len(neighbors_node2) TS_Feature08 = float(0) TS_Feature05 = float(0) TS_Feature02 = float(0) DTS_Feature = float(0) if CommonNeigbors_Feature > 0: print "Calculando ", pair[0], pair[1], CommonNeigbors_Feature x = (float)(len(neighbors_node1.union(neighbors_node2))) if x > 0: JC_Feature = CommonNeigbors_Feature / x for pair_common_neighbor in CommonNeigbors: secondary_neighbors = self.all_neighbors( pair_common_neighbor) AAS_Feature += 1 / (numpy.log10(len(secondary_neighbors)) + 0.00001) objectsNode1 = self.get_ObjectsofLinks( myparams.trainnigGraph, pair[0], pair_common_neighbor) objectsNode2 = self.get_ObjectsofLinks( myparams.trainnigGraph, pair[1], pair_common_neighbor) hm = 2 / ((1 / float(len(objectsNode1))) + (1 / float(len(objectsNode2)))) timesofLinksNode1 = [] timesofLinksNode2 = [] for t1 in objectsNode1: timesofLinksNode1.append(t1['time']) for t2 in objectsNode2: timesofLinksNode2.append(t2['time']) timesofLinksNode1.sort(reverse=True) timesofLinksNode2.sort(reverse=True) timeofLinks = timesofLinksNode1 + timesofLinksNode2 k = int(self.myparams.t0_) - int(max(timeofLinks)) decayfunction08 = (1 - 0.8)**k decayfunction05 = (1 - 0.5)**k decayfunction02 = (1 - 0.2)**k control = ( abs(max(timesofLinksNode1) - max(timesofLinksNode2)) + 1) ts08 = (hm * decayfunction08) / control ts05 = (hm * decayfunction05) / control ts02 = (hm * decayfunction02) / control TS_Feature08 = TS_Feature08 + ts08 TS_Feature05 = TS_Feature05 + ts05 TS_Feature02 = TS_Feature02 + ts02 if CommonNeigbors_Feature < self.minCN: self.minCN = CommonNeigbors_Feature if CommonNeigbors_Feature > self.maxCN: self.maxCN = CommonNeigbors_Feature if AAS_Feature < self.minAAS: self.minAAS = AAS_Feature if AAS_Feature > self.maxAAS: self.maxAAS = CommonNeigbors_Feature if PA_Feature < self.minPA: self.minPA = PA_Feature if PA_Feature > self.maxPA: self.maxPA = PA_Feature if JC_Feature < self.minJC: self.minJC = JC_Feature if JC_Feature > self.maxJC: self.maxJC = JC_Feature if TS_Feature08 < self.minTS08: self.minTS08 = TS_Feature08 if TS_Feature08 > self.maxTS08: self.maxTS08 = TS_Feature08 if TS_Feature05 < self.minTS05: self.minTS05 = TS_Feature05 if TS_Feature05 > self.maxTS05: self.maxTS05 = TS_Feature05 if TS_Feature02 < self.minTS02: self.minTS02 = TS_Feature02 if TS_Feature02 > self.maxTS02: self.maxTS02 = TS_Feature02 calcutations.append({ 'node1': pair[0], 'node2': pair[1], 'cn': CommonNeigbors_Feature, 'aas': AAS_Feature, 'jc': JC_Feature, 'pa': PA_Feature, 'ts08': TS_Feature08, 'ts05': TS_Feature05, 'ts02': TS_Feature02, 'dts': DTS_Feature }) self.combination(calcutations)
def hepth(years): astroPh = Formating('/home/cmuniz/execMen/grafos/hepth_data') astroPh.subject = 'hep-th' astroPh.yearstoRescue = years astroPh.readingOrginalDataset()
def __init__(self, myparams, nodesnotlinked): self.myparams = myparams qtyofNodesToProcess = len(nodesnotlinked) element = 0 self.results = [] for pair in nodesnotlinked: element = element+1 Formating.printProgressofEvents(element, qtyofNodesToProcess, "Calculating features for nodes not liked: ") neighbors_node1 = self.all_neighbors(pair[0]) neighbors_node2 = self.all_neighbors(pair[1]) len_neihbors_node1 = len(neighbors_node1) len_neihbors_node2 = len(neighbors_node2) CommonNeigbors = neighbors_node1.intersection(neighbors_node2) CommonNeigbors_Feature = len(CommonNeigbors) AAS_Feature = 0 JC_Feature = 0 PA_Feature = len_neihbors_node1 * len_neihbors_node2 TS_Feature08 = float(0) TS_Feature05 = float(0) TS_Feature02 = float(0) DTS_Feature02 = float(0) DTS_Feature05 = float(0) DTS_Feature08 = float(0) if CommonNeigbors_Feature > 0: print "Calculando ", pair[0], pair[1], CommonNeigbors_Feature x = (float)(len(neighbors_node1.union(neighbors_node2))) if x > 0: JC_Feature = CommonNeigbors_Feature/x for pair_common_neighbor in CommonNeigbors: secondary_neighbors = self.all_neighbors(pair_common_neighbor) AAS_Feature += 1 / (numpy.log10(len(secondary_neighbors)) + 0.00001) objectsNode1 = self.get_ObjectsofLinks(myparams.trainnigGraph, pair[0], pair_common_neighbor) objectsNode2 = self.get_ObjectsofLinks(myparams.trainnigGraph, pair[1], pair_common_neighbor) hm = 2 / ( (1/float(len(objectsNode1))) + (1/float(len(objectsNode2)))) #print pair[0], pair_common_neighbor, "Media Harmonica ", hm timesofLinksNode1 = [] timesofLinksNode2 = [] bagofWordsNode1 = set() bagofWordsNode2 = set() for t1 in objectsNode1: timesofLinksNode1.append(t1['time']) for b1 in eval(t1['keywords']): bagofWordsNode1.add(b1) for t2 in objectsNode2: timesofLinksNode2.append(t2['time']) for b2 in eval(t2['keywords']): bagofWordsNode2.add(b2) timesofLinksNode1.sort(reverse=True) timesofLinksNode2.sort(reverse=True) timeofLinks = timesofLinksNode1 + timesofLinksNode2 #print pair_common_neighbor, "publicacoes realizadas: ", timeofLinks k = int(self.myparams.t0_) - int(max(timeofLinks)) #print pair_common_neighbor, "K ", k decayfunction08 = (0.8) ** k decayfunction05 = (0.5) ** k decayfunction02 = (0.2) ** k #print pair_common_neighbor, "funcao de decaimento 0.5 ", decayfunction05 control = ( abs( max(timesofLinksNode1) - max(timesofLinksNode2) ) + 1) #print pair_common_neighbor, "denominador inicial considerando apenas TS ", control ts08 = (hm * decayfunction08) / control ts05 = (hm * decayfunction05) / control ts02 = (hm * decayfunction02) / control #print pair_common_neighbor, "TS ", ts05 TS_Feature08 = TS_Feature08 + ts08 TS_Feature05 = TS_Feature05 + ts05 TS_Feature02 = TS_Feature02 + ts02 #print pair_common_neighbor, "conjuntos de palavras ", bagofWordsNode1, bagofWordsNode2 jcDomain = self.get_jacard_domain(bagofWordsNode1, bagofWordsNode2) #print pair_common_neighbor, "JC ", jcDomain dts02 = (hm * decayfunction02) / (control * ((0.2) ** jcDomain)) dts05 = (hm * decayfunction02) / (control * ((0.5) ** jcDomain)) dts08 = (hm * decayfunction02) / (control * ((0.8) ** jcDomain)) #print pair_common_neighbor, "DTS ", dts05 DTS_Feature02 = DTS_Feature02 + dts02 DTS_Feature05 = DTS_Feature05 + dts05 DTS_Feature08 = DTS_Feature08 + dts08 self.results.append({'node1' : pair[0], 'node2': pair[1], 'cn' : CommonNeigbors_Feature, 'aas' : AAS_Feature, 'jc' : JC_Feature, 'pa' : PA_Feature, 'ts08' : TS_Feature08,'ts05' : TS_Feature05,'ts02' : TS_Feature02, 'dts08' : DTS_Feature08, 'dts05' : DTS_Feature05, 'dts02' : DTS_Feature02 })
def grqc(years): astroPh = Formating('/home/cmuniz/execMen/grafos/grqc_data') astroPh.subject = 'gr-qc' astroPh.yearstoRescue = years astroPh.readingOrginalDataset()
def __init__(self, myparams, nodesnotlinked): self.myparams = myparams qtyofNodesToProcess = len(nodesnotlinked) element = 0 self.results = [] for pair in nodesnotlinked: element = element+1 Formating.printProgressofEvents(element, qtyofNodesToProcess, "Calculating features for nodes not liked: ") neighbors_node1 = self.all_neighbors(pair[0]) neighbors_node2 = self.all_neighbors(pair[1]) CommonNeigbors = neighbors_node1.intersection(neighbors_node2) CommonNeigbors_Feature = len(CommonNeigbors) AAS_Feature = 0 JC_Feature = 0 PA_Feature = len(neighbors_node1) * len(neighbors_node2) TS_Feature08 = float(0) TS_Feature05 = float(0) TS_Feature02 = float(0) DTS_Feature = float(0) if CommonNeigbors_Feature > 0: print "Calculando ", pair[0], pair[1], CommonNeigbors_Feature x = (float)(len(neighbors_node1.union(neighbors_node2))) if x > 0: JC_Feature = CommonNeigbors_Feature/x for pair_common_neighbor in CommonNeigbors: secondary_neighbors = self.all_neighbors(pair_common_neighbor) AAS_Feature += 1 / (numpy.log10(len(secondary_neighbors)) + 0.00001) objectsNode1 = self.get_ObjectsofLinks(myparams.trainnigGraph, pair[0], pair_common_neighbor) objectsNode2 = self.get_ObjectsofLinks(myparams.trainnigGraph, pair[1], pair_common_neighbor) hm = 2 / ((1/float(len(objectsNode1))) + (1/float(len(objectsNode2)))) timesofLinksNode1 = [] timesofLinksNode2 = [] bagofWordsNode1 = set() bagofWordsNode2 = set() for t1 in objectsNode1: timesofLinksNode1.append(t1['time']) #for bt1 in t1['keywords']: # bagofWordsNode1.add(bt1) for t2 in objectsNode2: timesofLinksNode2.append(t2['time']) # for bt2 in t2['keywords']: # bagofWordsNode2.add(bt2) timesofLinksNode1.sort(reverse=True) timesofLinksNode2.sort(reverse=True) timeofLinks = timesofLinksNode1 + timesofLinksNode2 k = int(self.myparams.t0_) - int(max(timeofLinks)) decayfunction08 = (1 - 0.8) ** k decayfunction05 = (1 - 0.5) ** k decayfunction02 = (1 - 0.2) ** k control = (abs( max(timesofLinksNode1) - max(timesofLinksNode2) ) + 1) ts08 = (hm * decayfunction08) / control ts05 = (hm * decayfunction05) / control ts02 = (hm * decayfunction02) / control TS_Feature08 = TS_Feature08 + ts08 TS_Feature05 = TS_Feature05 + ts05 TS_Feature02 = TS_Feature02 + ts02 #dts = (hm * decayfunction) / (control * ((1 - self.myparams.domain_decay) ** self.get_jacard_domain(bagofWordsNode1, bagofWordsNode2))) #DTS_Feature = DTS_Feature + dts self.results.append({'node1' : pair[0], 'node2': pair[1], 'cn' : CommonNeigbors_Feature, 'aas' : AAS_Feature, 'jc' : JC_Feature, 'pa' : PA_Feature, 'ts08' : TS_Feature08,'ts05' : TS_Feature05,'ts02' : TS_Feature02, 'dts' : DTS_Feature })
def hepph(years): astroPh = Formating('/grafos/hepph_data') astroPh.subject = 'hep-ph' astroPh.yearstoRescue = years astroPh.readingOrginalDataset()
def __init__(self, myparams, nodesnotlinked, weights, WillCombinate): self.myparams = myparams self.weights = weights self.WillCombinate = WillCombinate qtyofNodesToProcess = len(nodesnotlinked) element = 0 calcutations = [] self.results = [] self.minCN = float(0) self.maxCN = float(0) self.minAAS = float(0) self.maxAAS = float(0) self.minJC = float(0) self.maxJC = float(0) self.minPA = float(0) self.maxPA = float(0) self.minTS08 = float(0) self.maxTS08 = float(0) self.minTS05 = float(0) self.maxTS05 = float(0) self.minTS02 = float(0) self.maxTS02 = float(0) for pair in nodesnotlinked: element = element+1 Formating.printProgressofEvents(element, qtyofNodesToProcess, "Calculating features for nodes not liked: ") neighbors_node1 = self.all_neighbors(pair[0]) neighbors_node2 = self.all_neighbors(pair[1]) CommonNeigbors = neighbors_node1.intersection(neighbors_node2) CommonNeigbors_Feature = len(CommonNeigbors) AAS_Feature = 0 JC_Feature = 0 PA_Feature = len(neighbors_node1) * len(neighbors_node2) TS_Feature08 = float(0) TS_Feature05 = float(0) TS_Feature02 = float(0) DTS_Feature = float(0) if CommonNeigbors_Feature > 0: print "Calculando ", pair[0], pair[1], CommonNeigbors_Feature x = (float)(len(neighbors_node1.union(neighbors_node2))) if x > 0: JC_Feature = CommonNeigbors_Feature/x for pair_common_neighbor in CommonNeigbors: secondary_neighbors = self.all_neighbors(pair_common_neighbor) AAS_Feature += 1 / (numpy.log10(len(secondary_neighbors)) + 0.00001) objectsNode1 = self.get_ObjectsofLinks(myparams.trainnigGraph, pair[0], pair_common_neighbor) objectsNode2 = self.get_ObjectsofLinks(myparams.trainnigGraph, pair[1], pair_common_neighbor) hm = 2 / ((1/float(len(objectsNode1))) + (1/float(len(objectsNode2)))) timesofLinksNode1 = [] timesofLinksNode2 = [] for t1 in objectsNode1: timesofLinksNode1.append(t1['time']) for t2 in objectsNode2: timesofLinksNode2.append(t2['time']) timesofLinksNode1.sort(reverse=True) timesofLinksNode2.sort(reverse=True) timeofLinks = timesofLinksNode1 + timesofLinksNode2 k = int(self.myparams.t0_) - int(max(timeofLinks)) decayfunction08 = (1 - 0.8) ** k decayfunction05 = (1 - 0.5) ** k decayfunction02 = (1 - 0.2) ** k control = (abs( max(timesofLinksNode1) - max(timesofLinksNode2) ) + 1) ts08 = (hm * decayfunction08) / control ts05 = (hm * decayfunction05) / control ts02 = (hm * decayfunction02) / control TS_Feature08 = TS_Feature08 + ts08 TS_Feature05 = TS_Feature05 + ts05 TS_Feature02 = TS_Feature02 + ts02 if CommonNeigbors_Feature < self.minCN: self.minCN = CommonNeigbors_Feature if CommonNeigbors_Feature > self.maxCN: self.maxCN = CommonNeigbors_Feature if AAS_Feature < self.minAAS: self.minAAS = AAS_Feature if AAS_Feature > self.maxAAS: self.maxAAS = CommonNeigbors_Feature if PA_Feature < self.minPA: self.minPA = PA_Feature if PA_Feature > self.maxPA: self.maxPA = PA_Feature if JC_Feature < self.minJC: self.minJC = JC_Feature if JC_Feature > self.maxJC: self.maxJC = JC_Feature if TS_Feature08 < self.minTS08: self.minTS08 = TS_Feature08 if TS_Feature08 > self.maxTS08: self.maxTS08 = TS_Feature08 if TS_Feature05 < self.minTS05: self.minTS05 = TS_Feature05 if TS_Feature05 > self.maxTS05: self.maxTS05 = TS_Feature05 if TS_Feature02 < self.minTS02: self.minTS02 = TS_Feature02 if TS_Feature02 > self.maxTS02: self.maxTS02 = TS_Feature02 calcutations.append({'node1' : pair[0], 'node2': pair[1], 'cn' : CommonNeigbors_Feature, 'aas' : AAS_Feature, 'jc' : JC_Feature, 'pa' : PA_Feature, 'ts08' : TS_Feature08,'ts05' : TS_Feature05,'ts02' : TS_Feature02, 'dts' : DTS_Feature }) self.combination(calcutations)
def saveResults(self, filepath, nodesNotLinked): myfile = Formating.get_abs_file_path(filepath) with open(myfile, 'w') as fileNodesNotLinked: for nodeNotLinked in nodesNotLinked: fileNodesNotLinked.write(nodeNotLinked[0] + ',' + nodeNotLinked[1] + '\n') fileNodesNotLinked.close()
''' Created on Aug 22, 2015 @author: cptullio First Step is the generation of the graph from the database informations. We will need the file of parameter to indicate the place where the graph will be saved ''' from parametering.ParameterUtil import ParameterUtil from formating.arxiv.Formating import Formating if __name__ == '__main__': util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_condmat_2004_2010.txt') astroPh = Formating(util.graph_file) #astroPh.readingOrginalDataset() astroPh.generating_graph() astroPh.saveGraph()
def astroph(years): astroPh = Formating('/home/cmuniz/execMen/grafos/astroph_data') astroPh.subject = 'astro-ph' astroPh.yearstoRescue = years astroPh.readingOrginalDataset()
def __init__(self, myparams, nodesnotlinked): self.myparams = myparams qtyofNodesToProcess = len(nodesnotlinked) element = 0 self.results = [] for pair in nodesnotlinked: element = element + 1 Formating.printProgressofEvents( element, qtyofNodesToProcess, "Calculating features for nodes not liked: ") neighbors_node1 = self.all_neighbors(pair[0]) neighbors_node2 = self.all_neighbors(pair[1]) len_neihbors_node1 = len(neighbors_node1) len_neihbors_node2 = len(neighbors_node2) CommonNeigbors = neighbors_node1.intersection(neighbors_node2) CommonNeigbors_Feature = len(CommonNeigbors) TS_Feature09 = float(0) TS_Feature08 = float(0) TS_Feature07 = float(0) TS_Feature06 = float(0) TS_Feature05 = float(0) TS_Feature04 = float(0) TS_Feature03 = float(0) TS_Feature02 = float(0) TS_Feature01 = float(0) if CommonNeigbors_Feature > 0: print "Calculando ", pair[0], pair[1], CommonNeigbors_Feature #x = (float)(len(neighbors_node1.union(neighbors_node2))) #if x > 0: # JC_Feature = CommonNeigbors_Feature/x for pair_common_neighbor in CommonNeigbors: #secondary_neighbors = self.all_neighbors(pair_common_neighbor) #AAS_Feature += 1 / (numpy.log10(len(secondary_neighbors)) + 0.00001) objectsNode1 = self.get_ObjectsofLinks( myparams.trainnigGraph, pair[0], pair_common_neighbor) objectsNode2 = self.get_ObjectsofLinks( myparams.trainnigGraph, pair[1], pair_common_neighbor) hm = 2 / ((1 / float(len(objectsNode1))) + (1 / float(len(objectsNode2)))) #print pair[0], pair_common_neighbor, "Media Harmonica ", hm timesofLinksNode1 = [] timesofLinksNode2 = [] bagofWordsNode1 = set() bagofWordsNode2 = set() for t1 in objectsNode1: timesofLinksNode1.append(t1['time']) for b1 in eval(t1['keywords']): bagofWordsNode1.add(b1) for t2 in objectsNode2: timesofLinksNode2.append(t2['time']) for b2 in eval(t2['keywords']): bagofWordsNode2.add(b2) timesofLinksNode1.sort(reverse=True) timesofLinksNode2.sort(reverse=True) timeofLinks = timesofLinksNode1 + timesofLinksNode2 #print pair_common_neighbor, "publicacoes realizadas: ", timeofLinks k = int(self.myparams.t0_) - int(max(timeofLinks)) #print pair_common_neighbor, "K ", k decayfunction09 = (0.9)**k decayfunction08 = (0.8)**k decayfunction07 = (0.7)**k decayfunction06 = (0.6)**k decayfunction05 = (0.5)**k decayfunction04 = (0.4)**k decayfunction03 = (0.3)**k decayfunction02 = (0.2)**k decayfunction01 = (0.1)**k #print pair_common_neighbor, "funcao de decaimento 0.5 ", decayfunction05 control = ( abs(max(timesofLinksNode1) - max(timesofLinksNode2)) + 1) #print pair_common_neighbor, "denominador inicial considerando apenas TS ", control ts09 = (hm * decayfunction09) / control ts08 = (hm * decayfunction08) / control ts07 = (hm * decayfunction07) / control ts06 = (hm * decayfunction06) / control ts05 = (hm * decayfunction05) / control ts04 = (hm * decayfunction04) / control ts03 = (hm * decayfunction03) / control ts02 = (hm * decayfunction02) / control ts01 = (hm * decayfunction01) / control #print pair_common_neighbor, "TS ", ts05 TS_Feature09 = TS_Feature09 + ts09 TS_Feature08 = TS_Feature08 + ts08 TS_Feature07 = TS_Feature07 + ts07 TS_Feature06 = TS_Feature06 + ts06 TS_Feature05 = TS_Feature05 + ts05 TS_Feature04 = TS_Feature04 + ts04 TS_Feature03 = TS_Feature03 + ts03 TS_Feature02 = TS_Feature02 + ts02 TS_Feature01 = TS_Feature01 + ts01 #print pair_common_neighbor, "conjuntos de palavras ", bagofWordsNode1, bagofWordsNode2 #jcDomain = self.get_jacard_domain(bagofWordsNode1, bagofWordsNode2) #print pair_common_neighbor, "JC ", jcDomain #dts02 = (hm * decayfunction02) / (control * ((0.2) ** jcDomain)) #dts05 = (hm * decayfunction02) / (control * ((0.5) ** jcDomain)) #dts08 = (hm * decayfunction02) / (control * ((0.8) ** jcDomain)) #print pair_common_neighbor, "DTS ", dts05 #DTS_Feature02 = DTS_Feature02 + dts02 #DTS_Feature05 = DTS_Feature05 + dts05 #DTS_Feature08 = DTS_Feature08 + dts08 self.results.append({ 'node1': pair[0], 'node2': pair[1], 'TS09': TS_Feature09, 'TS08': TS_Feature08, 'TS07': TS_Feature07, 'TS06': TS_Feature06, 'TS05': TS_Feature05, 'TS04': TS_Feature04, 'TS03': TS_Feature03, 'TS02': TS_Feature02, 'TS01': TS_Feature01 })
def condmat(years): astroPh = Formating('/home/cmuniz/execMen/grafos/condmat_data') astroPh.subject = 'cond-mat' astroPh.yearstoRescue = years astroPh.readingOrginalDataset()
def __init__(self, myparams, nodesnotlinked): self.myparams = myparams qtyofNodesToProcess = len(nodesnotlinked) element = 0 self.results = [] for pair in nodesnotlinked: element = element + 1 Formating.printProgressofEvents( element, qtyofNodesToProcess, "Calculating features for nodes not liked: ") neighbors_node1 = self.all_neighbors(pair[0]) neighbors_node2 = self.all_neighbors(pair[1]) len_neihbors_node1 = len(neighbors_node1) len_neihbors_node2 = len(neighbors_node2) CommonNeigbors = neighbors_node1.intersection(neighbors_node2) CommonNeigbors_Feature = len(CommonNeigbors) AAS_Feature = 0 JC_Feature = 0 PA_Feature = len_neihbors_node1 * len_neihbors_node2 TS_Feature08 = float(0) TS_Feature05 = float(0) TS_Feature02 = float(0) DTS_Feature02 = float(0) DTS_Feature05 = float(0) DTS_Feature08 = float(0) if CommonNeigbors_Feature > 0: print "Calculando ", pair[0], pair[1], CommonNeigbors_Feature x = (float)(len(neighbors_node1.union(neighbors_node2))) if x > 0: JC_Feature = CommonNeigbors_Feature / x for pair_common_neighbor in CommonNeigbors: secondary_neighbors = self.all_neighbors( pair_common_neighbor) AAS_Feature += 1 / (numpy.log10(len(secondary_neighbors)) + 0.00001) objectsNode1 = self.get_ObjectsofLinks( myparams.trainnigGraph, pair[0], pair_common_neighbor) objectsNode2 = self.get_ObjectsofLinks( myparams.trainnigGraph, pair[1], pair_common_neighbor) hm = 2 / ((1 / float(len(objectsNode1))) + (1 / float(len(objectsNode2)))) #print pair[0], pair_common_neighbor, "Media Harmonica ", hm timesofLinksNode1 = [] timesofLinksNode2 = [] bagofWordsNode1 = set() bagofWordsNode2 = set() for t1 in objectsNode1: timesofLinksNode1.append(t1['time']) for b1 in eval(t1['keywords']): bagofWordsNode1.add(b1) for t2 in objectsNode2: timesofLinksNode2.append(t2['time']) for b2 in eval(t2['keywords']): bagofWordsNode2.add(b2) timesofLinksNode1.sort(reverse=True) timesofLinksNode2.sort(reverse=True) timeofLinks = timesofLinksNode1 + timesofLinksNode2 #print pair_common_neighbor, "publicacoes realizadas: ", timeofLinks k = int(self.myparams.t0_) - int(max(timeofLinks)) #print pair_common_neighbor, "K ", k decayfunction08 = (0.8)**k decayfunction05 = (0.5)**k decayfunction02 = (0.2)**k #print pair_common_neighbor, "funcao de decaimento 0.5 ", decayfunction05 control = ( abs(max(timesofLinksNode1) - max(timesofLinksNode2)) + 1) #print pair_common_neighbor, "denominador inicial considerando apenas TS ", control ts08 = (hm * decayfunction08) / control ts05 = (hm * decayfunction05) / control ts02 = (hm * decayfunction02) / control #print pair_common_neighbor, "TS ", ts05 TS_Feature08 = TS_Feature08 + ts08 TS_Feature05 = TS_Feature05 + ts05 TS_Feature02 = TS_Feature02 + ts02 #print pair_common_neighbor, "conjuntos de palavras ", bagofWordsNode1, bagofWordsNode2 jcDomain = self.get_jacard_domain(bagofWordsNode1, bagofWordsNode2) #print pair_common_neighbor, "JC ", jcDomain dts02 = (hm * decayfunction02) / (control * ((0.2)**jcDomain)) dts05 = (hm * decayfunction02) / (control * ((0.5)**jcDomain)) dts08 = (hm * decayfunction02) / (control * ((0.8)**jcDomain)) #print pair_common_neighbor, "DTS ", dts05 DTS_Feature02 = DTS_Feature02 + dts02 DTS_Feature05 = DTS_Feature05 + dts05 DTS_Feature08 = DTS_Feature08 + dts08 self.results.append({ 'node1': pair[0], 'node2': pair[1], 'cn': CommonNeigbors_Feature, 'aas': AAS_Feature, 'jc': JC_Feature, 'pa': PA_Feature, 'ts08': TS_Feature08, 'ts05': TS_Feature05, 'ts02': TS_Feature02, 'dts08': DTS_Feature08, 'dts05': DTS_Feature05, 'dts02': DTS_Feature02 })
''' Created on Aug 22, 2015 @author: cptullio First Step is the generation of the graph from the database informations. We will need the file of parameter to indicate the place where the graph will be saved ''' from parametering.ParameterUtil import ParameterUtil from formating.arxiv.Formating import Formating if __name__ == '__main__': util = ParameterUtil( parameter_file= 'data/configuration/arxiv/exemplo_1994_1999/CombinationLinear/configToAG.txt' ) #util = ParameterUtil(parameter_file = 'data/configuration/arxiv/condmat_1994_1999/MetricaTemporal/config.txt') #myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, # filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None) astroPh = Formating(util.graph_file) astroPh.subject = 'cond-mat' #astroPh.yearstoRescue = [1993] astroPh.yearstoRescue = [1994, 1995, 1996, 1997, 1998, 1999] #astroPh.yearstoRescue = [2004,2005,2006,2007,2008,2009, 2010, 2011, 2012] #astroPh.readingOrginalDataset() astroPh.generating_graph() astroPh.saveGraph()