def generating_Training_Graph(self): if not os.path.exists( Formating.get_abs_file_path(self.filePathTrainingGraph)): if self.graph == None: print "Reading Full graphs", datetime.today() self.graph = Formating.reading_graph(self.filePathGraph) print "Generating Trainnig graphs", datetime.today() self.trainnigGraph = Formating.get_graph_from_period( self.graph, self.t0, self.t0_) networkx.write_graphml( self.trainnigGraph, Formating.get_abs_file_path(self.filePathTrainingGraph)) else: print "Reading Trainnig graph", datetime.today() self.trainnigGraph = Formating.reading_graph( self.filePathTrainingGraph) for w_score in self.WeightedScoresChoiced: w_score[0].graph = self.trainnigGraph for score in self.ScoresChoiced: score[0].graph = self.trainnigGraph for w in self.WeightsChoiced: w[0].graph = self.trainnigGraph
def executingCalculate(self): dataInicial = datetime.today() print "Starting Calculating Nodes not linked", dataInicial element = 0 qtyofNodesToProcess = len(self.NodesNotLinked) qtyFeatures = len(self.preparedParameter.ScoresChoiced) results = [] #for each node for nodenotLinked in self.NodesNotLinked: element = element+1 Formating.printProgressofEvents(element, qtyofNodesToProcess, "Calculating features for nodes not liked: ") item_result = [] for index_features in range(qtyFeatures): self.preparedParameter.ScoresChoiced[index_features][0].parameter = self.preparedParameter valueCalculated = self.preparedParameter.ScoresChoiced[index_features][0].execute(nodenotLinked[0],nodenotLinked[1] ) if valueCalculated < self.minValueCalculated[index_features]: self.minValueCalculated[index_features] = valueCalculated if valueCalculated > self.maxValueCalculated[index_features]: self.maxValueCalculated[index_features] = valueCalculated item_result.append(valueCalculated) lineContent = [] #generating a vetor with the name of the feature and the result of the calculate for indice in range(qtyFeatures): lineContent.append(item_result[indice]) results.append([nodenotLinked[0], nodenotLinked[1], lineContent ]) print "Calculating Nodes not linked finished", dataInicial, datetime.today() self.qtyDataCalculated = len(results) return results
def generate_dblpGeneralData(self, parameter_file): util = ParameterUtil(parameter_file) format = Formating(util.original_file, util.graph_file) format.readingOrginalDataset() format.saveGraph() myparams = Parameterization(util.top_rank, util.distanceNeighbors,util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) selecting = VariableSelection(myparams.trainnigGraph, util.nodes_notlinked_file)
def generating_Test_Graph(self ): if not os.path.exists(Formating.get_abs_file_path(self.filePathTestGraph)): if self.graph == None: print "Reading Full graphs", datetime.today() self.graph = Formating.reading_graph(self.filePathGraph) print "Generating Testing graphs", datetime.today() self.testGraph = Formating.get_graph_from_period(self.graph, self.t1, self.t1_) networkx.write_graphml(self.testGraph, Formating.get_abs_file_path(self.filePathTestGraph)) else: print "Reading testing graph", datetime.today() self.testGraph = Formating.reading_graph(self.filePathTestGraph)
def saving_orderedResult(self, filepath, results): for index_score in range(len(results)): f = open(Formating.get_abs_file_path(filepath + str(self.preparedParameter.ScoresChoiced[index_score][0]) + '.txt') , 'w') for item_result in results[index_score]: f.write(repr(item_result[0]) + ";" + repr(item_result[1]) + ";" + repr(item_result[2][index_score]) + '\n') f.close()
def generating_Test_Graph(self): if not os.path.exists( Formating.get_abs_file_path(self.filePathTestGraph)): if self.graph == None: print "Reading Full graphs", datetime.today() self.graph = Formating.reading_graph(self.filePathGraph) print "Generating Testing graphs", datetime.today() self.testGraph = Formating.get_graph_from_period( self.graph, self.t1, self.t1_) networkx.write_graphml( self.testGraph, Formating.get_abs_file_path(self.filePathTestGraph)) else: print "Reading testing graph", datetime.today() self.testGraph = Formating.reading_graph(self.filePathTestGraph)
def __init__(self, graph, filepathNodesToCalculate, min_papers = 1): myfile = Formating.get_abs_file_path(filepathNodesToCalculate) if not os.path.exists(myfile): with open(myfile, 'w') as fnodes: self.get_pair_nodes_not_linked(graph,fnodes, min_papers) fnodes.close() else: print "Nodes not linked file already generated. please delete if you want a new one.", datetime.today()
def generating_Training_Graph(self): if not os.path.exists(Formating.get_abs_file_path(self.filePathTrainingGraph)): if self.graph == None: print "Reading Full graphs", datetime.today() self.graph = Formating.reading_graph(self.filePathGraph) print "Generating Trainnig graphs", datetime.today() self.trainnigGraph = Formating.get_graph_from_period(self.graph, self.t0, self.t0_) networkx.write_graphml(self.trainnigGraph, Formating.get_abs_file_path(self.filePathTrainingGraph)) else: print "Reading Trainnig graph", datetime.today() self.trainnigGraph = Formating.reading_graph(self.filePathTrainingGraph) for feature in self.featuresChoice: feature[0].graph = self.trainnigGraph
def readingResultsFile(self, filepath): results = [] myfile = Formating.get_abs_file_path(filepath) with open(myfile, 'r') as fileNodesNotLinked: for lineofFile in fileNodesNotLinked: nodenotllinked = lineofFile.replace('\n', '').split(',') results.append([nodenotllinked[0],nodenotllinked[1]]) fileNodesNotLinked.close() return results
def __init__(self, preparedParameter, fileAllNodes): print "Starting Generating Weights for all Nodes", datetime.today() self.preparedParameter = preparedParameter self.filepathAllNodes = Formating.get_abs_file_path(fileAllNodes) self.preparedParameter.open_connection() self.preparedParameter.clean_database() fcontentAllNodes = open(self.filepathAllNodes, 'r') self.minValueCalculated = list( 99999 for x in self.preparedParameter.WeightsChoiced) self.maxValueCalculated = list( 0 for x in self.preparedParameter.WeightsChoiced) qtyFeatures = len(self.preparedParameter.WeightsChoiced) qtyNodesCalculated = 0 for lineofFile in fcontentAllNodes: item = VariableSelection.getItemFromLine(lineofFile) item_result = [] #executing the calculation for each features chosen at parameter for index_features in range(qtyFeatures): self.preparedParameter.WeightsChoiced[index_features][ 0].parameter = preparedParameter valueCalculated = self.preparedParameter.WeightsChoiced[ index_features][0].execute( item[0], item[1]) * self.preparedParameter.WeightsChoiced[ index_features][1] if valueCalculated < self.minValueCalculated[index_features]: self.minValueCalculated[index_features] = valueCalculated if valueCalculated > self.maxValueCalculated[index_features]: self.maxValueCalculated[index_features] = valueCalculated item_result.append(valueCalculated) self.preparedParameter.add_weight(item[0], item[1], item_result) self.preparedParameter.add_weight(-1, -1, qtyNodesCalculated) self.preparedParameter.add_weight(-2, -2, repr(self.minValueCalculated)) self.preparedParameter.add_weight(-3, -3, repr(self.maxValueCalculated)) self.preparedParameter.close_connection() print "Finishinig Generating Weights for all Nodes", datetime.today()
def saving_calculateResult(self, filepath, results): f = open(Formating.get_abs_file_path(filepath), 'w') header = 'no1,no2' for index_score in range(len(self.preparedParameter.ScoresChoiced)): header = header + ',' + self.preparedParameter.ScoresChoiced[index_score][0].getName() f.write(header + '\n') for itemResult in results: value = '' for index_score in range(len(self.preparedParameter.ScoresChoiced)): value = value + ',' + repr( itemResult[2][index_score] ) f.write( itemResult[0] + ',' + itemResult[1] + value + '\n') f.close()
def __init__(self, preparedParameter, fileAllNodes): print "Starting Generating Weights for all Nodes", datetime.today() self.preparedParameter = preparedParameter self.filepathAllNodes = Formating.get_abs_file_path(fileAllNodes) self.preparedParameter.open_connection() self.preparedParameter.clean_database() fcontentAllNodes = open(self.filepathAllNodes, 'r') self.minValueCalculated = list(99999 for x in self.preparedParameter.WeightsChoiced) self.maxValueCalculated = list(0 for x in self.preparedParameter.WeightsChoiced) qtyFeatures = len(self.preparedParameter.WeightsChoiced) qtyNodesCalculated = 0 for lineofFile in fcontentAllNodes: item = VariableSelection.getItemFromLine(lineofFile) item_result = [] #executing the calculation for each features chosen at parameter for index_features in range(qtyFeatures): self.preparedParameter.WeightsChoiced[index_features][0].parameter = preparedParameter valueCalculated = self.preparedParameter.WeightsChoiced[index_features][0].execute(item[0],item[1]) * self.preparedParameter.WeightsChoiced[index_features][1] if valueCalculated < self.minValueCalculated[index_features]: self.minValueCalculated[index_features] = valueCalculated if valueCalculated > self.maxValueCalculated[index_features]: self.maxValueCalculated[index_features] = valueCalculated item_result.append(valueCalculated) self.preparedParameter.add_weight(item[0], item[1], item_result) self.preparedParameter.add_weight(-1,-1, qtyNodesCalculated) self.preparedParameter.add_weight(-2,-2, repr(self.minValueCalculated)) self.preparedParameter.add_weight(-3,-3, repr(self.maxValueCalculated)) self.preparedParameter.close_connection() print "Finishinig Generating Weights for all Nodes", datetime.today()
def __init__(self, graph, filepathNodesToCalculate, min_papers=1, allNodes=False, MAX_NUMBER_OF_PEOPLE_BETWEEN=1000): self.MAX_NUMBER_OF_PEOPLE_BETWEEN = MAX_NUMBER_OF_PEOPLE_BETWEEN myfile = Formating.get_abs_file_path(filepathNodesToCalculate) if not os.path.exists(myfile): with open(myfile, 'w') as fnodes: if allNodes: self.get_all_pair_nodes(graph, fnodes) else: self.get_pair_nodes_not_linked(graph, fnodes, min_papers) fnodes.close() else: print "Nodes not linked file already generated. please delete if you want a new one.", datetime.today( )
def reading_calculateResult_normalized(self, filepath): results = [] f = open(Formating.get_abs_file_path(filepath), 'r') firstLine = 0 for line in f: if firstLine == 0: firstLine = 1 continue cols = line.strip().replace('\n','').split(',') item_result = [] item_result.append(cols[0]) item_result.append(cols[1]) scores = [] for index_col in range(len(cols)-2): scores.append(eval(cols[2+index_col])) item_result.append(scores) results.append(item_result) return results
def __init__(self, parameter_file): parameterFile = Formating.get_abs_file_path(parameter_file) AllFeatures = [] AllFeatures.append(AASFeature()) AllFeatures.append(CNFeature()) AllFeatures.append(JCFeature()) AllFeatures.append(PAFeature()) AllFeatures.append(TimeScore()) AllFeatures.append(DomainTimeScore()) AllFeatures.append(DomainTimeScorevTwo()) AllFeatures.append(DomainJC()) self.FeaturesChoiced = [] with open(parameterFile) as f: lines = f.readlines() f.close() for line in lines: line = line.strip() line = line.replace('\n','') cols = line.split('\t') if cols[0] == 'original_file': self.original_file = cols[1] if cols[0] == 'graph_file': self.graph_file = cols[1] if cols[0] == 'maxmincalculated_file': self.maxmincalculated_file = cols[1] if cols[0] == 'trainnig_graph_file': self.trainnig_graph_file = cols[1] if cols[0] == 'test_graph_file': self.test_graph_file = cols[1] if cols[0] == 'nodes_notlinked_file': self.nodes_notlinked_file = cols[1] if cols[0] == 'calculated_file': self.calculated_file = cols[1] if cols[0] == 'ordered_file': self.ordered_file = cols[1] if cols[0] == 'analysed_file': self.analysed_file = cols[1] if cols[0] == 'min_edges': self.min_edges = int(cols[1]) if cols[0] == 'lengthVertex': self.lengthVertex = int(cols[1]) if cols[0] == 't0': self.t0 = int(cols[1]) if cols[0] == 't0_': self.t0_ = int(cols[1]) if cols[0] == 't1': self.t1 = int(cols[1]) if cols[0] == 't1_': self.t1_ = int(cols[1]) if cols[0] == 'decay': self.decay = float(cols[1]) if cols[0] == 'keyword_decay': self.keyword_decay = float(cols[1]) if cols[0] == 'features': features = cols[1].split(';') for feature in features: featureandweight = feature.split(':') self.FeaturesChoiced.append([AllFeatures[int(featureandweight[0])], int(featureandweight[1])])
def __init__(self, preparedParameter, filepathNodesNotLinked, filepathResult, filePathOrdered, filepathMaxMinCalculated): print "Starting Calculating Nodes not linked", datetime.today() self.preparedParameter = preparedParameter self.filePathOrdered = Formating.get_abs_file_path(filePathOrdered) self.filepathMaxMinCalculated = Formating.get_abs_file_path( filepathMaxMinCalculated) self.filepathResult = Formating.get_abs_file_path(filepathResult) self.filepathNodesNotLinked = Formating.get_abs_file_path( filepathNodesNotLinked) #for each links that is not linked all the calculates is done. element = 0 qtyofResults = FormatingDataSets.getTotalLineNumbers( self.filepathNodesNotLinked) fcontentNodesNotLinked = open(self.filepathNodesNotLinked, 'r') if os.path.exists(self.filepathResult): print "Calculate already done for this file, please delete if you want a new one.", datetime.today( ) return fcontentCalcResult = open(self.filepathResult, 'w') self.minValueCalculated = list( 99999 for x in self.preparedParameter.featuresChoice) self.maxValueCalculated = list( 0 for x in self.preparedParameter.featuresChoice) qtyFeatures = len(self.preparedParameter.featuresChoice) self.qtyDataCalculated = 0 out_q = multiprocessing.Queue() procs = [] nprocs = 100 for lineofFile in fcontentNodesNotLinked: element = element + 1 p = multiprocessing.Process( target=self.calculating_features, args=(lineofFile, element, qtyofResults, preparedParameter, qtyFeatures, self.minValueCalculated, self.maxValueCalculated, out_q)) procs.append(p) p.start() if len(procs) >= nprocs: for i in range(len(procs)): result = out_q.get() result = result.split('|') mini = eval(result[0]) maxi = eval(result[1]) self.qtyDataCalculated = self.qtyDataCalculated + int( result[2]) fcontentCalcResult.write(result[3]) for index_features in range(qtyFeatures): if mini[index_features] < self.minValueCalculated[ index_features]: self.minValueCalculated[index_features] = mini[ index_features] if maxi[index_features] > self.maxValueCalculated[ index_features]: self.maxValueCalculated[index_features] = maxi[ index_features] for p in procs: p.join() procs = [] for i in range(len(procs)): result = out_q.get() result = result.split('|') mini = eval(result[0]) maxi = eval(result[1]) self.qtyDataCalculated = self.qtyDataCalculated + int(result[2]) fcontentCalcResult.write(result[3]) for index_features in range(qtyFeatures): if mini[index_features] < self.minValueCalculated[ index_features]: self.minValueCalculated[index_features] = mini[ index_features] if maxi[index_features] > self.maxValueCalculated[ index_features]: self.maxValueCalculated[index_features] = maxi[ index_features] for p in procs: p.join() fcontentCalcResult.flush() fcontentCalcResult.close() fcontentNodesNotLinked.close() fcontentMaxMin = open(self.filepathMaxMinCalculated, 'w') fcontentMaxMin.write( str(self.qtyDataCalculated) + '\t' + repr(self.minValueCalculated) + '\t' + repr(self.maxValueCalculated)) fcontentMaxMin.close() print "Calculating Nodes not linked finished", datetime.today()
def __init__(self, parameter_file): parameterFile = Formating.get_abs_file_path(parameter_file) AllFeatures = [] AllFeatures.append(AASFeature()) AllFeatures.append(CNFeature()) AllFeatures.append(JCFeature()) AllFeatures.append(PAFeature()) AllFeatures.append(TimeScore()) #AllFeatures.append(LSFeature()) AllFeatures.append(DomainTimeScore()) AllFeatures.append(DomainJC()) WeightedFeatures = [] WeightedFeatures.append(WeightTimeScore()) WeightedFeatures.append(WeightDomainScore()) FeaturesForWeight = [] FeaturesForWeight.append(WCNFeature()) FeaturesForWeight.append(WAAFeature()) FeaturesForWeight.append(CNWFeature()) FeaturesForWeight.append(AAWFeature()) FeaturesForWeight.append(PAWFeature()) FeaturesForWeight.append(WSPLFeature()) self.ScoresChoiced = [] self.WeightsChoiced = [] self.WeightedScoresChoiced = [] with open(parameterFile) as f: lines = f.readlines() f.close() for line in lines: line = line.strip() line = line.replace('\n','') cols = line.split('\t') if cols[0] == 'linear_combination': self.linear_combination = eval(cols[1]) if cols[0] == 'original_file': self.original_file = cols[1] if cols[0] == 'graph_file': self.graph_file = cols[1] if cols[0] == 'maxmincalculated_file': self.maxmincalculated_file = cols[1] if cols[0] == 'trainnig_graph_file': self.trainnig_graph_file = cols[1] if cols[0] == 'test_graph_file': self.test_graph_file = cols[1] if cols[0] == 'nodes_notlinked_file': self.nodes_notlinked_file = cols[1] if cols[0] == 'nodes_file': self.nodes_file = cols[1] if cols[0] == 'calculated_file': self.calculated_file = cols[1] if cols[0] == 'ordered_file': self.ordered_file = cols[1] if cols[0] == 'analysed_file': self.analysed_file = cols[1] if cols[0] == 'min_edges': self.min_edges = int(cols[1]) if cols[0] == 'lengthVertex': self.lengthVertex = int(cols[1]) if cols[0] == 'result_random_file': self.result_random_file = cols[1] if cols[0] == 't0': self.t0 = int(cols[1]) if cols[0] == 't0_': self.t0_ = int(cols[1]) if cols[0] == 't1': self.t1 = int(cols[1]) if cols[0] == 't1_': self.t1_ = int(cols[1]) if cols[0] == 'decay': self.decay = float(cols[1]) if cols[0] == 'domain_decay': self.domain_decay = float(cols[1]) if cols[0] == 'scores': features = cols[1].split(';') for feature in features: #print feature featureandweight = feature.split(':') weight = float(featureandweight[1].split(',')[0]) orderingType = int(featureandweight[1].split(',')[1]) self.ScoresChoiced.append([AllFeatures[int(featureandweight[0])], weight, orderingType ]) if cols[0] == 'weights': features = cols[1].split(';') for feature in features: featureandweight = feature.split(':') self.WeightsChoiced.append([WeightedFeatures[int(featureandweight[0])], int(featureandweight[1])]) if cols[0] == 'weighted_scores': features = cols[1].split(';') for feature in features:#0:1-1,0; featureandweight = feature.split(':') weight = featureandweight[1].split('-')[0] weightfeatures = featureandweight[1].split('-')[1].split(',')[0] orderingType = int(featureandweight[1].split('-')[1].split(',')[1]) item = [FeaturesForWeight[int(featureandweight[0])], weight, weightfeatures, orderingType ] self.WeightedScoresChoiced.append( item )
def saveResults(self, filepath, nodesNotLinked): myfile = Formating.get_abs_file_path(filepath) with open(myfile, 'w') as fileNodesNotLinked: for nodeNotLinked in nodesNotLinked: fileNodesNotLinked.write(nodeNotLinked[0] + ',' + nodeNotLinked[1] + '\n') fileNodesNotLinked.close()
def __init__(self, preparedParameter, filepathNodesNotLinked, filepathResult, filePathOrdered, filepathMaxMinCalculated): print "Starting Calculating Nodes not linked", datetime.today() self.preparedParameter = preparedParameter self.filePathOrdered = Formating.get_abs_file_path(filePathOrdered) self.filepathMaxMinCalculated = Formating.get_abs_file_path(filepathMaxMinCalculated) self.filepathResult = Formating.get_abs_file_path(filepathResult) self.filepathNodesNotLinked = Formating.get_abs_file_path(filepathNodesNotLinked) #for each links that is not linked all the calculates is done. element = 0 qtyofResults = FormatingDataSets.getTotalLineNumbers(self.filepathNodesNotLinked) fcontentNodesNotLinked = open(self.filepathNodesNotLinked, 'r') if os.path.exists(self.filepathResult): print "Calculate already done for this file, please delete if you want a new one.", datetime.today() return fcontentCalcResult = open(self.filepathResult, 'w') self.minValueCalculated = list(99999 for x in self.preparedParameter.featuresChoice) self.maxValueCalculated = list(0 for x in self.preparedParameter.featuresChoice) qtyFeatures = len(self.preparedParameter.featuresChoice) self.qtyDataCalculated = 0 out_q = multiprocessing.Queue() procs = [] nprocs = 100 for lineofFile in fcontentNodesNotLinked: element = element+1 p = multiprocessing.Process(target=self.calculating_features, args=(lineofFile,element,qtyofResults , preparedParameter, qtyFeatures , self.minValueCalculated, self.maxValueCalculated, out_q)) procs.append(p) p.start() if len(procs) >= nprocs: for i in range(len(procs)): result = out_q.get() result = result.split('|') mini = eval(result[0]) maxi = eval(result[1]) self.qtyDataCalculated = self.qtyDataCalculated + int(result[2]) fcontentCalcResult.write(result[3]) for index_features in range(qtyFeatures): if mini[index_features] < self.minValueCalculated[index_features]: self.minValueCalculated[index_features] = mini[index_features] if maxi[index_features] > self.maxValueCalculated[index_features]: self.maxValueCalculated[index_features] = maxi[index_features] for p in procs: p.join() procs = [] for i in range(len(procs)): result = out_q.get() result = result.split('|') mini = eval(result[0]) maxi = eval(result[1]) self.qtyDataCalculated = self.qtyDataCalculated + int(result[2]) fcontentCalcResult.write(result[3]) for index_features in range(qtyFeatures): if mini[index_features] < self.minValueCalculated[index_features]: self.minValueCalculated[index_features] = mini[index_features] if maxi[index_features] > self.maxValueCalculated[index_features]: self.maxValueCalculated[index_features] = maxi[index_features] for p in procs: p.join() fcontentCalcResult.flush() fcontentCalcResult.close() fcontentNodesNotLinked.close() fcontentMaxMin = open(self.filepathMaxMinCalculated, 'w') fcontentMaxMin.write(str(self.qtyDataCalculated) + '\t' + repr(self.minValueCalculated) + '\t' + repr(self.maxValueCalculated) ) fcontentMaxMin.close() print "Calculating Nodes not linked finished", datetime.today()
def __init__(self, preparedParameter, filepathNodesNotLinked, filepathResult, filePathOrdered, filepathMaxMinCalculated): print "Starting Calculating Nodes not linked", datetime.today() self.preparedParameter = preparedParameter self.filePathOrdered = Formating.get_abs_file_path(filePathOrdered) self.filepathMaxMinCalculated = Formating.get_abs_file_path(filepathMaxMinCalculated) self.filepathResult = Formating.get_abs_file_path(filepathResult) self.filepathNodesNotLinked = Formating.get_abs_file_path(filepathNodesNotLinked) #for each links that is not linked all the calculates is done. element = 0 qtyofResults = FormatingDataSets.getTotalLineNumbers(self.filepathNodesNotLinked) fcontentNodesNotLinked = open(self.filepathNodesNotLinked, 'r') if os.path.exists(self.filepathResult): print "Calculate already done for this file, please delete if you want a new one.", datetime.today() self.reading_Max_min_file() return fcontentCalcResult = open(self.filepathResult, 'w') self.minValueCalculated = list(99999 for x in self.preparedParameter.featuresChoice) self.maxValueCalculated = list(0 for x in self.preparedParameter.featuresChoice) qtyFeatures = len(self.preparedParameter.featuresChoice) qtyNodesCalculated = 0 partialResults = [] for lineofFile in fcontentNodesNotLinked: element = element+1 item = VariableSelection.getItemFromLine(lineofFile) qtyothernodenotlinked = len(item[1]) newelement = 0 for neighbor_node in item[1]: newelement = newelement +1 qtyNodesCalculated = qtyNodesCalculated + 1 self.printProgressofEvents(element, qtyofResults, "Calculating features for nodes not liked: ") self.printProgressofEventsWihoutPercent(newelement, qtyothernodenotlinked, "Calculating nodes: " + str(item[0]) + ":" + str(neighbor_node) ) item_result = [] #executing the calculation for each features chosen at parameter for index_features in range(qtyFeatures): self.preparedParameter.featuresChoice[index_features][0].parameter = preparedParameter valueCalculated = self.preparedParameter.featuresChoice[index_features][0].execute(item[0],neighbor_node) * self.preparedParameter.featuresChoice[index_features][1] if valueCalculated < self.minValueCalculated[index_features]: self.minValueCalculated[index_features] = valueCalculated if valueCalculated > self.maxValueCalculated[index_features]: self.maxValueCalculated[index_features] = valueCalculated item_result.append(valueCalculated) lineContent = [] #generating a vetor with the name of the feature and the result of the calculate for indice in range(qtyFeatures): lineContent.append(str({str(self.preparedParameter.featuresChoice[indice]):item_result[indice]}) ) partialResults.append([lineContent, item[0], neighbor_node]) if element % 10 == 0: for item in partialResults: for calc in item[0]: fcontentCalcResult.write(calc + '\t') fcontentCalcResult.write(str(item[1]) + '\t' + str(item[2]) + '\n' ) partialResults = [] for item in partialResults: for calc in item[0]: fcontentCalcResult.write(calc + '\t') fcontentCalcResult.write(str(item[1]) + '\t' + str(item[2]) + '\n' ) fcontentCalcResult.flush() fcontentCalcResult.close() fcontentNodesNotLinked.close() fcontentMaxMin = open(self.filepathMaxMinCalculated, 'w') fcontentMaxMin.write(str(qtyNodesCalculated) + '\t' + repr(self.minValueCalculated) + '\t' + repr(self.maxValueCalculated) ) fcontentMaxMin.close() print "Calculating Nodes not linked finished", datetime.today()
def save_Max_min_file(self, filepath, qtyCalculated, minValues, maxValues): fcontentMaxMin = open(Formating.get_abs_file_path(filepath), 'w') fcontentMaxMin.write(str(qtyCalculated) + '\t' + repr(minValues) + '\t' + repr(maxValues) ) fcontentMaxMin.close()
def __init__(self, preparedParameter, filepathNodesNotLinked, filepathResult, filePathOrdered, filepathMaxMinCalculated): print "Starting Calculating Nodes not linked", datetime.today() self.preparedParameter = preparedParameter self.filePathOrdered = Formating.get_abs_file_path(filePathOrdered) self.filepathMaxMinCalculated = Formating.get_abs_file_path( filepathMaxMinCalculated) self.filepathResult = Formating.get_abs_file_path(filepathResult) self.filepathNodesNotLinked = Formating.get_abs_file_path( filepathNodesNotLinked) #for each links that is not linked all the calculates is done. element = 0 qtyofResults = FormatingDataSets.getTotalLineNumbers( self.filepathNodesNotLinked) fcontentNodesNotLinked = open(self.filepathNodesNotLinked, 'r') if os.path.exists(self.filepathResult): print "Calculate already done for this file, please delete if you want a new one.", datetime.today( ) self.reading_Max_min_file() return fcontentCalcResult = open(self.filepathResult, 'w') self.minValueCalculated = list( 99999 for x in self.preparedParameter.featuresChoice) self.maxValueCalculated = list( 0 for x in self.preparedParameter.featuresChoice) qtyFeatures = len(self.preparedParameter.featuresChoice) qtyNodesCalculated = 0 partialResults = [] for lineofFile in fcontentNodesNotLinked: element = element + 1 item = VariableSelection.getItemFromLine(lineofFile) qtyothernodenotlinked = len(item[1]) newelement = 0 for neighbor_node in item[1]: newelement = newelement + 1 qtyNodesCalculated = qtyNodesCalculated + 1 self.printProgressofEvents( element, qtyofResults, "Calculating features for nodes not liked: ") self.printProgressofEventsWihoutPercent( newelement, qtyothernodenotlinked, "Calculating nodes: " + str(item[0]) + ":" + str(neighbor_node)) item_result = [] #executing the calculation for each features chosen at parameter for index_features in range(qtyFeatures): self.preparedParameter.featuresChoice[index_features][ 0].parameter = preparedParameter valueCalculated = self.preparedParameter.featuresChoice[ index_features][0].execute( item[0], neighbor_node ) * self.preparedParameter.featuresChoice[ index_features][1] if valueCalculated < self.minValueCalculated[ index_features]: self.minValueCalculated[ index_features] = valueCalculated if valueCalculated > self.maxValueCalculated[ index_features]: self.maxValueCalculated[ index_features] = valueCalculated item_result.append(valueCalculated) lineContent = [] #generating a vetor with the name of the feature and the result of the calculate for indice in range(qtyFeatures): lineContent.append( str({ str(self.preparedParameter.featuresChoice[indice]): item_result[indice] })) partialResults.append([lineContent, item[0], neighbor_node]) if element % 10 == 0: for item in partialResults: for calc in item[0]: fcontentCalcResult.write(calc + '\t') fcontentCalcResult.write( str(item[1]) + '\t' + str(item[2]) + '\n') partialResults = [] for item in partialResults: for calc in item[0]: fcontentCalcResult.write(calc + '\t') fcontentCalcResult.write(str(item[1]) + '\t' + str(item[2]) + '\n') fcontentCalcResult.flush() fcontentCalcResult.close() fcontentNodesNotLinked.close() fcontentMaxMin = open(self.filepathMaxMinCalculated, 'w') fcontentMaxMin.write( str(qtyNodesCalculated) + '\t' + repr(self.minValueCalculated) + '\t' + repr(self.maxValueCalculated)) fcontentMaxMin.close() print "Calculating Nodes not linked finished", datetime.today()
def __init__(self, parameter_file): parameterFile = Formating.get_abs_file_path(parameter_file) AllFeatures = [] AllFeatures.append(AASFeature()) AllFeatures.append(CNFeature()) AllFeatures.append(JCFeature()) AllFeatures.append(PAFeature()) AllFeatures.append(TimeScore()) AllFeatures.append(DomainTimeScore()) AllFeatures.append(DomainTimeScorevTwo()) AllFeatures.append(DomainJC()) AllFeatures.append(WeightTimeScore()) AllFeatures.append(WeightDomainScore()) self.FeaturesChoiced = [] with open(parameterFile) as f: lines = f.readlines() f.close() for line in lines: line = line.strip() line = line.replace('\n','') cols = line.split('\t') if cols[0] == 'original_file': self.original_file = cols[1] if cols[0] == 'graph_file': self.graph_file = cols[1] if cols[0] == 'maxmincalculated_file': self.maxmincalculated_file = cols[1] if cols[0] == 'trainnig_graph_file': self.trainnig_graph_file = cols[1] if cols[0] == 'test_graph_file': self.test_graph_file = cols[1] if cols[0] == 'nodes_notlinked_file': self.nodes_notlinked_file = cols[1] if cols[0] == 'nodes_file': self.nodes_file = cols[1] if cols[0] == 'calculated_file': self.calculated_file = cols[1] if cols[0] == 'ordered_file': self.ordered_file = cols[1] if cols[0] == 'analysed_file': self.analysed_file = cols[1] if cols[0] == 'min_edges': self.min_edges = int(cols[1]) if cols[0] == 'lengthVertex': self.lengthVertex = int(cols[1]) if cols[0] == 't0': self.t0 = int(cols[1]) if cols[0] == 't0_': self.t0_ = int(cols[1]) if cols[0] == 't1': self.t1 = int(cols[1]) if cols[0] == 't1_': self.t1_ = int(cols[1]) if cols[0] == 'decay': self.decay = float(cols[1]) if cols[0] == 'keyword_decay': self.keyword_decay = float(cols[1]) if cols[0] == 'features': features = cols[1].split(';') for feature in features: featureandweight = feature.split(':') self.FeaturesChoiced.append([AllFeatures[int(featureandweight[0])], int(featureandweight[1])])