def generating_Training_Graph(self):
        if not os.path.exists(
                Formating.get_abs_file_path(self.filePathTrainingGraph)):
            if self.graph == None:
                print "Reading Full graphs", datetime.today()
                self.graph = Formating.reading_graph(self.filePathGraph)

            print "Generating Trainnig graphs", datetime.today()

            self.trainnigGraph = Formating.get_graph_from_period(
                self.graph, self.t0, self.t0_)

            networkx.write_graphml(
                self.trainnigGraph,
                Formating.get_abs_file_path(self.filePathTrainingGraph))
        else:
            print "Reading Trainnig graph", datetime.today()
            self.trainnigGraph = Formating.reading_graph(
                self.filePathTrainingGraph)

        for w_score in self.WeightedScoresChoiced:
            w_score[0].graph = self.trainnigGraph
        for score in self.ScoresChoiced:
            score[0].graph = self.trainnigGraph
        for w in self.WeightsChoiced:
            w[0].graph = self.trainnigGraph
 def executingCalculate(self):
     dataInicial = datetime.today()
     
     print "Starting Calculating Nodes not linked", dataInicial
     element = 0
     qtyofNodesToProcess = len(self.NodesNotLinked)
     qtyFeatures = len(self.preparedParameter.ScoresChoiced)
     results = []
     #for each node
     for nodenotLinked in self.NodesNotLinked:
         element = element+1
         Formating.printProgressofEvents(element, qtyofNodesToProcess, "Calculating features for nodes not liked: ")
         item_result = []
         for index_features in range(qtyFeatures):
             self.preparedParameter.ScoresChoiced[index_features][0].parameter = self.preparedParameter
             valueCalculated = self.preparedParameter.ScoresChoiced[index_features][0].execute(nodenotLinked[0],nodenotLinked[1]  )
             if valueCalculated < self.minValueCalculated[index_features]:
                 self.minValueCalculated[index_features] = valueCalculated
             if valueCalculated > self.maxValueCalculated[index_features]:
                 self.maxValueCalculated[index_features] = valueCalculated
                     
             item_result.append(valueCalculated)
             
         lineContent = []    
             #generating a vetor with the name of the feature and the result of the calculate
         for indice in range(qtyFeatures):
             lineContent.append(item_result[indice])
                 
                 
         results.append([nodenotLinked[0], nodenotLinked[1], lineContent ])
         
     print "Calculating Nodes not linked finished", dataInicial, datetime.today()
     self.qtyDataCalculated = len(results)
     return results    
 def generate_dblpGeneralData(self, parameter_file):
     util = ParameterUtil(parameter_file)
     format = Formating(util.original_file, util.graph_file)
     format.readingOrginalDataset()
     format.saveGraph()
     myparams = Parameterization(util.top_rank, util.distanceNeighbors,util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay)
     selecting = VariableSelection(myparams.trainnigGraph, util.nodes_notlinked_file)
 def generating_Test_Graph(self ):
     if not os.path.exists(Formating.get_abs_file_path(self.filePathTestGraph)):
         if self.graph == None:
             print "Reading Full graphs", datetime.today()
             self.graph = Formating.reading_graph(self.filePathGraph)
         print "Generating Testing graphs", datetime.today()
     
         self.testGraph = Formating.get_graph_from_period(self.graph, self.t1, self.t1_)
         networkx.write_graphml(self.testGraph, Formating.get_abs_file_path(self.filePathTestGraph))
     else:
         print "Reading testing graph", datetime.today()
         self.testGraph = Formating.reading_graph(self.filePathTestGraph)
 def saving_orderedResult(self, filepath, results):
     
     for index_score in range(len(results)):
         f = open(Formating.get_abs_file_path(filepath + str(self.preparedParameter.ScoresChoiced[index_score][0]) + '.txt') , 'w')
         for item_result in results[index_score]:
             f.write(repr(item_result[0]) + ";" + repr(item_result[1]) + ";" + repr(item_result[2][index_score]) + '\n')
         f.close()   
Ejemplo n.º 6
0
    def generating_Test_Graph(self):
        if not os.path.exists(
                Formating.get_abs_file_path(self.filePathTestGraph)):
            if self.graph == None:
                print "Reading Full graphs", datetime.today()
                self.graph = Formating.reading_graph(self.filePathGraph)
            print "Generating Testing graphs", datetime.today()

            self.testGraph = Formating.get_graph_from_period(
                self.graph, self.t1, self.t1_)
            networkx.write_graphml(
                self.testGraph,
                Formating.get_abs_file_path(self.filePathTestGraph))
        else:
            print "Reading testing graph", datetime.today()
            self.testGraph = Formating.reading_graph(self.filePathTestGraph)
 def __init__(self, graph,  filepathNodesToCalculate, min_papers = 1):
     myfile = Formating.get_abs_file_path(filepathNodesToCalculate)
     if not os.path.exists(myfile):
         with open(myfile, 'w') as fnodes:
             self.get_pair_nodes_not_linked(graph,fnodes, min_papers)
             fnodes.close()
     else:
         print "Nodes not linked file already generated. please delete if you want a new one.", datetime.today()
 def generating_Training_Graph(self):
     if not os.path.exists(Formating.get_abs_file_path(self.filePathTrainingGraph)):
         if self.graph == None:
             print "Reading Full graphs", datetime.today()
             self.graph = Formating.reading_graph(self.filePathGraph)
         
         print "Generating Trainnig graphs", datetime.today()
        
         self.trainnigGraph = Formating.get_graph_from_period(self.graph, self.t0, self.t0_)
         
         networkx.write_graphml(self.trainnigGraph, Formating.get_abs_file_path(self.filePathTrainingGraph))
     else:
         print "Reading Trainnig graph", datetime.today()
         self.trainnigGraph = Formating.reading_graph(self.filePathTrainingGraph)
     
     for feature in self.featuresChoice:
         feature[0].graph = self.trainnigGraph
Ejemplo n.º 9
0
 def readingResultsFile(self, filepath):
     results = []
     myfile = Formating.get_abs_file_path(filepath)
     with open(myfile, 'r') as fileNodesNotLinked:
         for lineofFile in fileNodesNotLinked:
             nodenotllinked = lineofFile.replace('\n', '').split(',')
             results.append([nodenotllinked[0],nodenotllinked[1]])
         fileNodesNotLinked.close()
     return results
    def __init__(self, preparedParameter, fileAllNodes):

        print "Starting Generating Weights for all Nodes", datetime.today()

        self.preparedParameter = preparedParameter

        self.filepathAllNodes = Formating.get_abs_file_path(fileAllNodes)

        self.preparedParameter.open_connection()
        self.preparedParameter.clean_database()

        fcontentAllNodes = open(self.filepathAllNodes, 'r')

        self.minValueCalculated = list(
            99999 for x in self.preparedParameter.WeightsChoiced)
        self.maxValueCalculated = list(
            0 for x in self.preparedParameter.WeightsChoiced)

        qtyFeatures = len(self.preparedParameter.WeightsChoiced)
        qtyNodesCalculated = 0

        for lineofFile in fcontentAllNodes:
            item = VariableSelection.getItemFromLine(lineofFile)
            item_result = []
            #executing the calculation for each features chosen at parameter
            for index_features in range(qtyFeatures):
                self.preparedParameter.WeightsChoiced[index_features][
                    0].parameter = preparedParameter
                valueCalculated = self.preparedParameter.WeightsChoiced[
                    index_features][0].execute(
                        item[0],
                        item[1]) * self.preparedParameter.WeightsChoiced[
                            index_features][1]

                if valueCalculated < self.minValueCalculated[index_features]:
                    self.minValueCalculated[index_features] = valueCalculated
                if valueCalculated > self.maxValueCalculated[index_features]:
                    self.maxValueCalculated[index_features] = valueCalculated

                item_result.append(valueCalculated)

            self.preparedParameter.add_weight(item[0], item[1], item_result)

        self.preparedParameter.add_weight(-1, -1, qtyNodesCalculated)
        self.preparedParameter.add_weight(-2, -2,
                                          repr(self.minValueCalculated))
        self.preparedParameter.add_weight(-3, -3,
                                          repr(self.maxValueCalculated))
        self.preparedParameter.close_connection()
        print "Finishinig Generating Weights for all Nodes", datetime.today()
 def saving_calculateResult(self, filepath, results):
     f = open(Formating.get_abs_file_path(filepath), 'w')
     header = 'no1,no2'
     for index_score in range(len(self.preparedParameter.ScoresChoiced)):
         header = header + ',' + self.preparedParameter.ScoresChoiced[index_score][0].getName()
     f.write(header + '\n')
     
     for itemResult in results:
         value = ''
         for index_score in range(len(self.preparedParameter.ScoresChoiced)):
             value = value + ',' + repr(   itemResult[2][index_score]  )
             
         f.write( itemResult[0] + ',' + itemResult[1] + value + '\n')
         
         
     f.close()   
 def __init__(self, preparedParameter, fileAllNodes):
     
     print "Starting Generating Weights for all Nodes", datetime.today()
     
     self.preparedParameter = preparedParameter
    
     self.filepathAllNodes = Formating.get_abs_file_path(fileAllNodes)
     
     self.preparedParameter.open_connection()
     self.preparedParameter.clean_database()
     
     fcontentAllNodes = open(self.filepathAllNodes, 'r')
     
     self.minValueCalculated = list(99999 for x in self.preparedParameter.WeightsChoiced)
     self.maxValueCalculated = list(0 for x in self.preparedParameter.WeightsChoiced)
     
     qtyFeatures = len(self.preparedParameter.WeightsChoiced)
     qtyNodesCalculated = 0
     
     for lineofFile in fcontentAllNodes:
         item = VariableSelection.getItemFromLine(lineofFile)
         item_result = []
         #executing the calculation for each features chosen at parameter
         for index_features in range(qtyFeatures):
             self.preparedParameter.WeightsChoiced[index_features][0].parameter = preparedParameter
             valueCalculated = self.preparedParameter.WeightsChoiced[index_features][0].execute(item[0],item[1]) * self.preparedParameter.WeightsChoiced[index_features][1]
                 
             if valueCalculated < self.minValueCalculated[index_features]:
                 self.minValueCalculated[index_features] = valueCalculated
             if valueCalculated > self.maxValueCalculated[index_features]:
                 self.maxValueCalculated[index_features] = valueCalculated
                     
             item_result.append(valueCalculated)
             
         self.preparedParameter.add_weight(item[0], item[1], item_result)
             
             
     
     self.preparedParameter.add_weight(-1,-1, qtyNodesCalculated)
     self.preparedParameter.add_weight(-2,-2, repr(self.minValueCalculated))
     self.preparedParameter.add_weight(-3,-3, repr(self.maxValueCalculated))
     self.preparedParameter.close_connection()
     print "Finishinig Generating Weights for all Nodes", datetime.today()
     
     
Ejemplo n.º 13
0
 def __init__(self,
              graph,
              filepathNodesToCalculate,
              min_papers=1,
              allNodes=False,
              MAX_NUMBER_OF_PEOPLE_BETWEEN=1000):
     self.MAX_NUMBER_OF_PEOPLE_BETWEEN = MAX_NUMBER_OF_PEOPLE_BETWEEN
     myfile = Formating.get_abs_file_path(filepathNodesToCalculate)
     if not os.path.exists(myfile):
         with open(myfile, 'w') as fnodes:
             if allNodes:
                 self.get_all_pair_nodes(graph, fnodes)
             else:
                 self.get_pair_nodes_not_linked(graph, fnodes, min_papers)
             fnodes.close()
     else:
         print "Nodes not linked file already generated. please delete if you want a new one.", datetime.today(
         )
 def reading_calculateResult_normalized(self, filepath):
     results = []
     f = open(Formating.get_abs_file_path(filepath), 'r')
     firstLine = 0
     for line in f:
         if firstLine == 0:
             firstLine = 1
             continue
         cols = line.strip().replace('\n','').split(',')
         
         item_result = []
         item_result.append(cols[0])
         item_result.append(cols[1])
         scores = []
         for index_col in range(len(cols)-2):
             scores.append(eval(cols[2+index_col]))
         item_result.append(scores)  
         results.append(item_result)
     return results
 def __init__(self, parameter_file):
     parameterFile = Formating.get_abs_file_path(parameter_file)
     
     AllFeatures = []
     AllFeatures.append(AASFeature())
     AllFeatures.append(CNFeature())
     AllFeatures.append(JCFeature())
     AllFeatures.append(PAFeature())
     AllFeatures.append(TimeScore())
     AllFeatures.append(DomainTimeScore())
     AllFeatures.append(DomainTimeScorevTwo())
     AllFeatures.append(DomainJC())
     
     
     
     
     self.FeaturesChoiced = []
     
     with open(parameterFile) as f:
         lines = f.readlines()
         f.close()
     for line in lines:
         line = line.strip()
         line = line.replace('\n','')
         cols = line.split('\t')
         if cols[0] == 'original_file':
             self.original_file = cols[1]
         if cols[0] == 'graph_file':
             self.graph_file = cols[1]
         if cols[0] == 'maxmincalculated_file':
             self.maxmincalculated_file = cols[1]
         if cols[0] == 'trainnig_graph_file':
             self.trainnig_graph_file = cols[1]
         if cols[0] == 'test_graph_file':
             self.test_graph_file = cols[1]
         if cols[0] == 'nodes_notlinked_file':
             self.nodes_notlinked_file = cols[1]
         if cols[0] == 'calculated_file':
             self.calculated_file = cols[1]
         if cols[0] == 'ordered_file':
             self.ordered_file = cols[1]
         if cols[0] == 'analysed_file':
             self.analysed_file = cols[1]
         if cols[0] == 'min_edges':
             self.min_edges = int(cols[1])
         if cols[0] == 'lengthVertex':
             self.lengthVertex = int(cols[1])
         if cols[0] == 't0':
             self.t0 = int(cols[1])
         if cols[0] == 't0_':
             self.t0_ = int(cols[1])
         if cols[0] == 't1':
             self.t1 = int(cols[1])
         if cols[0] == 't1_':
             self.t1_ = int(cols[1])
         if cols[0] == 'decay':
             self.decay = float(cols[1])
         if cols[0] == 'keyword_decay':
             self.keyword_decay = float(cols[1])
         if cols[0] == 'features':
             features = cols[1].split(';')
             for feature in features:
                 featureandweight = feature.split(':')
                 self.FeaturesChoiced.append([AllFeatures[int(featureandweight[0])], int(featureandweight[1])])
Ejemplo n.º 16
0
    def __init__(self, preparedParameter, filepathNodesNotLinked,
                 filepathResult, filePathOrdered, filepathMaxMinCalculated):
        print "Starting Calculating Nodes not linked", datetime.today()

        self.preparedParameter = preparedParameter
        self.filePathOrdered = Formating.get_abs_file_path(filePathOrdered)
        self.filepathMaxMinCalculated = Formating.get_abs_file_path(
            filepathMaxMinCalculated)
        self.filepathResult = Formating.get_abs_file_path(filepathResult)
        self.filepathNodesNotLinked = Formating.get_abs_file_path(
            filepathNodesNotLinked)
        #for each links that is not linked all the calculates is done.
        element = 0
        qtyofResults = FormatingDataSets.getTotalLineNumbers(
            self.filepathNodesNotLinked)
        fcontentNodesNotLinked = open(self.filepathNodesNotLinked, 'r')
        if os.path.exists(self.filepathResult):
            print "Calculate already done for this file, please delete if you want a new one.", datetime.today(
            )
            return

        fcontentCalcResult = open(self.filepathResult, 'w')

        self.minValueCalculated = list(
            99999 for x in self.preparedParameter.featuresChoice)
        self.maxValueCalculated = list(
            0 for x in self.preparedParameter.featuresChoice)

        qtyFeatures = len(self.preparedParameter.featuresChoice)
        self.qtyDataCalculated = 0

        out_q = multiprocessing.Queue()
        procs = []
        nprocs = 100
        for lineofFile in fcontentNodesNotLinked:
            element = element + 1

            p = multiprocessing.Process(
                target=self.calculating_features,
                args=(lineofFile, element, qtyofResults, preparedParameter,
                      qtyFeatures, self.minValueCalculated,
                      self.maxValueCalculated, out_q))
            procs.append(p)
            p.start()

            if len(procs) >= nprocs:
                for i in range(len(procs)):
                    result = out_q.get()
                    result = result.split('|')

                    mini = eval(result[0])
                    maxi = eval(result[1])

                    self.qtyDataCalculated = self.qtyDataCalculated + int(
                        result[2])
                    fcontentCalcResult.write(result[3])
                    for index_features in range(qtyFeatures):
                        if mini[index_features] < self.minValueCalculated[
                                index_features]:
                            self.minValueCalculated[index_features] = mini[
                                index_features]
                        if maxi[index_features] > self.maxValueCalculated[
                                index_features]:
                            self.maxValueCalculated[index_features] = maxi[
                                index_features]

                for p in procs:
                    p.join()
                procs = []

        for i in range(len(procs)):
            result = out_q.get()
            result = result.split('|')

            mini = eval(result[0])
            maxi = eval(result[1])
            self.qtyDataCalculated = self.qtyDataCalculated + int(result[2])

            fcontentCalcResult.write(result[3])

            for index_features in range(qtyFeatures):
                if mini[index_features] < self.minValueCalculated[
                        index_features]:
                    self.minValueCalculated[index_features] = mini[
                        index_features]
                if maxi[index_features] > self.maxValueCalculated[
                        index_features]:
                    self.maxValueCalculated[index_features] = maxi[
                        index_features]

        for p in procs:
            p.join()

        fcontentCalcResult.flush()
        fcontentCalcResult.close()
        fcontentNodesNotLinked.close()
        fcontentMaxMin = open(self.filepathMaxMinCalculated, 'w')
        fcontentMaxMin.write(
            str(self.qtyDataCalculated) + '\t' +
            repr(self.minValueCalculated) + '\t' +
            repr(self.maxValueCalculated))
        fcontentMaxMin.close()
        print "Calculating Nodes not linked finished", datetime.today()
 def __init__(self, parameter_file):
     parameterFile = Formating.get_abs_file_path(parameter_file)
     
     AllFeatures = []
     AllFeatures.append(AASFeature())
     AllFeatures.append(CNFeature())
     AllFeatures.append(JCFeature())
     AllFeatures.append(PAFeature())
     AllFeatures.append(TimeScore())
     #AllFeatures.append(LSFeature())
     
     AllFeatures.append(DomainTimeScore())
     AllFeatures.append(DomainJC())
     
     
     WeightedFeatures = []
     WeightedFeatures.append(WeightTimeScore())
     WeightedFeatures.append(WeightDomainScore())
     
     FeaturesForWeight = []
     FeaturesForWeight.append(WCNFeature())
     FeaturesForWeight.append(WAAFeature())
     FeaturesForWeight.append(CNWFeature())
     FeaturesForWeight.append(AAWFeature())
     FeaturesForWeight.append(PAWFeature())
     FeaturesForWeight.append(WSPLFeature())
             
     self.ScoresChoiced = []
     self.WeightsChoiced = []
     self.WeightedScoresChoiced = []
     
     
     with open(parameterFile) as f:
         lines = f.readlines()
         f.close()
     for line in lines:
         line = line.strip()
         line = line.replace('\n','')
         cols = line.split('\t')
         if cols[0] == 'linear_combination':
             self.linear_combination = eval(cols[1])
         if cols[0] == 'original_file':
             self.original_file = cols[1]
         if cols[0] == 'graph_file':
             self.graph_file = cols[1]
         if cols[0] == 'maxmincalculated_file':
             self.maxmincalculated_file = cols[1]
         if cols[0] == 'trainnig_graph_file':
             self.trainnig_graph_file = cols[1]
         if cols[0] == 'test_graph_file':
             self.test_graph_file = cols[1]
         if cols[0] == 'nodes_notlinked_file':
             self.nodes_notlinked_file = cols[1]
         if cols[0] == 'nodes_file':
             self.nodes_file = cols[1]
         if cols[0] == 'calculated_file':
             self.calculated_file = cols[1]
         if cols[0] == 'ordered_file':
             self.ordered_file = cols[1]
         if cols[0] == 'analysed_file':
             self.analysed_file = cols[1]
         if cols[0] == 'min_edges':
             self.min_edges = int(cols[1])
         if cols[0] == 'lengthVertex':
             self.lengthVertex = int(cols[1])
         if cols[0] == 'result_random_file':
             self.result_random_file = cols[1]
         if cols[0] == 't0':
             self.t0 = int(cols[1])
         if cols[0] == 't0_':
             self.t0_ = int(cols[1])
         if cols[0] == 't1':
             self.t1 = int(cols[1])
         if cols[0] == 't1_':
             self.t1_ = int(cols[1])
         if cols[0] == 'decay':
             self.decay = float(cols[1])
         if cols[0] == 'domain_decay':
             self.domain_decay = float(cols[1])
         if cols[0] == 'scores':
             features = cols[1].split(';')
             for feature in features:
                 #print feature
                 featureandweight = feature.split(':')
                 weight = float(featureandweight[1].split(',')[0])
                 orderingType = int(featureandweight[1].split(',')[1])
                 self.ScoresChoiced.append([AllFeatures[int(featureandweight[0])], weight, orderingType   ])
         
         if cols[0] == 'weights':
             features = cols[1].split(';')
             for feature in features:
                 featureandweight = feature.split(':')
                 self.WeightsChoiced.append([WeightedFeatures[int(featureandweight[0])], int(featureandweight[1])])
             
         if cols[0] == 'weighted_scores':
             features = cols[1].split(';')
             for feature in features:#0:1-1,0;
                 featureandweight = feature.split(':')
                 weight = featureandweight[1].split('-')[0]
                 weightfeatures = featureandweight[1].split('-')[1].split(',')[0]
                 orderingType = int(featureandweight[1].split('-')[1].split(',')[1])
                 item =  [FeaturesForWeight[int(featureandweight[0])], 
                                                    weight, 
                                                    weightfeatures, orderingType  ]
                 self.WeightedScoresChoiced.append(
                                                    item
                                                    )
Ejemplo n.º 18
0
 def saveResults(self, filepath, nodesNotLinked):
     myfile = Formating.get_abs_file_path(filepath)
     with open(myfile, 'w') as fileNodesNotLinked:
         for nodeNotLinked in nodesNotLinked:
             fileNodesNotLinked.write(nodeNotLinked[0] + ',' +  nodeNotLinked[1] + '\n')
         fileNodesNotLinked.close()
	def __init__(self, preparedParameter, filepathNodesNotLinked, filepathResult, filePathOrdered, filepathMaxMinCalculated):
		print "Starting Calculating Nodes not linked", datetime.today()
		
		self.preparedParameter = preparedParameter
		self.filePathOrdered = Formating.get_abs_file_path(filePathOrdered)
		self.filepathMaxMinCalculated = Formating.get_abs_file_path(filepathMaxMinCalculated)
		self.filepathResult = Formating.get_abs_file_path(filepathResult)
		self.filepathNodesNotLinked = Formating.get_abs_file_path(filepathNodesNotLinked)
		#for each links that is not linked all the calculates is done.
		element = 0
		qtyofResults = FormatingDataSets.getTotalLineNumbers(self.filepathNodesNotLinked)
		fcontentNodesNotLinked = open(self.filepathNodesNotLinked, 'r')
		if os.path.exists(self.filepathResult):
			print "Calculate already done for this file, please delete if you want a new one.", datetime.today()
			return
		
		fcontentCalcResult = open(self.filepathResult, 'w')
		
		self.minValueCalculated = list(99999 for x in self.preparedParameter.featuresChoice)
		self.maxValueCalculated = list(0 for x in self.preparedParameter.featuresChoice)
		
		qtyFeatures = len(self.preparedParameter.featuresChoice)
		self.qtyDataCalculated = 0
		
		out_q = multiprocessing.Queue()
		procs = []
		nprocs = 100
		for lineofFile in fcontentNodesNotLinked:
			element = element+1
			
			p = multiprocessing.Process(target=self.calculating_features, args=(lineofFile,element,qtyofResults  , preparedParameter, qtyFeatures , self.minValueCalculated, self.maxValueCalculated,  out_q))
			procs.append(p)
			p.start()
			
			
			if len(procs) >= nprocs:
				for i in range(len(procs)):
					result  = out_q.get()
					result = result.split('|')
					
					mini = eval(result[0])
					maxi = eval(result[1])
					
					self.qtyDataCalculated = self.qtyDataCalculated + int(result[2])
					fcontentCalcResult.write(result[3])
					for index_features in range(qtyFeatures):
						if   mini[index_features] < self.minValueCalculated[index_features]:
							self.minValueCalculated[index_features] = mini[index_features]
						if maxi[index_features] > self.maxValueCalculated[index_features]:
							self.maxValueCalculated[index_features] = maxi[index_features]
							
				for p in procs:
					p.join()
				procs = []
		
		for i in range(len(procs)):
			result  = out_q.get()
			result = result.split('|')
					
			mini = eval(result[0])
			maxi = eval(result[1])
			self.qtyDataCalculated = self.qtyDataCalculated + int(result[2])
			
			fcontentCalcResult.write(result[3])
			
			for index_features in range(qtyFeatures):
				if   mini[index_features] < self.minValueCalculated[index_features]:
					self.minValueCalculated[index_features] = mini[index_features]
				if maxi[index_features] > self.maxValueCalculated[index_features]:
					self.maxValueCalculated[index_features] = maxi[index_features]
			
		for p in procs:
			p.join()
				
		fcontentCalcResult.flush()
		fcontentCalcResult.close()
		fcontentNodesNotLinked.close()
		fcontentMaxMin = open(self.filepathMaxMinCalculated, 'w')
		fcontentMaxMin.write(str(self.qtyDataCalculated) + '\t' + repr(self.minValueCalculated) + '\t' + repr(self.maxValueCalculated) )
		fcontentMaxMin.close()
		print "Calculating Nodes not linked finished", datetime.today()
		
		
Ejemplo n.º 20
0
 def __init__(self, preparedParameter, filepathNodesNotLinked, filepathResult, filePathOrdered, filepathMaxMinCalculated):
     print "Starting Calculating Nodes not linked", datetime.today()
     
     self.preparedParameter = preparedParameter
     self.filePathOrdered = Formating.get_abs_file_path(filePathOrdered)
     self.filepathMaxMinCalculated = Formating.get_abs_file_path(filepathMaxMinCalculated)
     self.filepathResult = Formating.get_abs_file_path(filepathResult)
     self.filepathNodesNotLinked = Formating.get_abs_file_path(filepathNodesNotLinked)
     #for each links that is not linked all the calculates is done.
     element = 0
     qtyofResults = FormatingDataSets.getTotalLineNumbers(self.filepathNodesNotLinked)
     fcontentNodesNotLinked = open(self.filepathNodesNotLinked, 'r')
     if os.path.exists(self.filepathResult):
         print "Calculate already done for this file, please delete if you want a new one.", datetime.today()
         self.reading_Max_min_file()
         return
     
     fcontentCalcResult = open(self.filepathResult, 'w')
     
     self.minValueCalculated = list(99999 for x in self.preparedParameter.featuresChoice)
     self.maxValueCalculated = list(0 for x in self.preparedParameter.featuresChoice)
     
     qtyFeatures = len(self.preparedParameter.featuresChoice)
     qtyNodesCalculated = 0
     partialResults = []
     for lineofFile in fcontentNodesNotLinked:
         element = element+1
         item = VariableSelection.getItemFromLine(lineofFile)
         qtyothernodenotlinked = len(item[1])
         newelement = 0
         for neighbor_node in item[1]:
             newelement = newelement +1
             qtyNodesCalculated = qtyNodesCalculated + 1
             self.printProgressofEvents(element, qtyofResults, "Calculating features for nodes not liked: ")
             self.printProgressofEventsWihoutPercent(newelement, qtyothernodenotlinked, "Calculating nodes: " + str(item[0])  + ":" +  str(neighbor_node) )
         
             item_result = []
             #executing the calculation for each features chosen at parameter
             for index_features in range(qtyFeatures):
                 self.preparedParameter.featuresChoice[index_features][0].parameter = preparedParameter
                 valueCalculated = self.preparedParameter.featuresChoice[index_features][0].execute(item[0],neighbor_node) * self.preparedParameter.featuresChoice[index_features][1]
                 if valueCalculated < self.minValueCalculated[index_features]:
                     self.minValueCalculated[index_features] = valueCalculated
                 if valueCalculated > self.maxValueCalculated[index_features]:
                     self.maxValueCalculated[index_features] = valueCalculated
                     
                 item_result.append(valueCalculated)
             
             lineContent = []    
             #generating a vetor with the name of the feature and the result of the calculate
             for indice in range(qtyFeatures):
                 lineContent.append(str({str(self.preparedParameter.featuresChoice[indice]):item_result[indice]}) )
             partialResults.append([lineContent, item[0], neighbor_node])
             
         if element % 10 == 0:
             for item in partialResults:
                 for calc in item[0]:
                     fcontentCalcResult.write(calc + '\t')
                 fcontentCalcResult.write(str(item[1]) + '\t' + str(item[2])  + '\n'  )
             partialResults = []
     
     for item in partialResults:
         for calc in item[0]:
             fcontentCalcResult.write(calc + '\t')
         fcontentCalcResult.write(str(item[1]) + '\t' + str(item[2])  + '\n'  )
             
     
     fcontentCalcResult.flush()
     fcontentCalcResult.close()
     fcontentNodesNotLinked.close()
     fcontentMaxMin = open(self.filepathMaxMinCalculated, 'w')
     fcontentMaxMin.write(str(qtyNodesCalculated) + '\t' + repr(self.minValueCalculated) + '\t' + repr(self.maxValueCalculated) )
     fcontentMaxMin.close()
     print "Calculating Nodes not linked finished", datetime.today()
     
     
 def save_Max_min_file(self, filepath, qtyCalculated, minValues, maxValues):
     
     fcontentMaxMin = open(Formating.get_abs_file_path(filepath), 'w')
     fcontentMaxMin.write(str(qtyCalculated) + '\t' + repr(minValues) + '\t' + repr(maxValues) )
     fcontentMaxMin.close()
Ejemplo n.º 22
0
    def __init__(self, preparedParameter, filepathNodesNotLinked,
                 filepathResult, filePathOrdered, filepathMaxMinCalculated):
        print "Starting Calculating Nodes not linked", datetime.today()

        self.preparedParameter = preparedParameter
        self.filePathOrdered = Formating.get_abs_file_path(filePathOrdered)
        self.filepathMaxMinCalculated = Formating.get_abs_file_path(
            filepathMaxMinCalculated)
        self.filepathResult = Formating.get_abs_file_path(filepathResult)
        self.filepathNodesNotLinked = Formating.get_abs_file_path(
            filepathNodesNotLinked)
        #for each links that is not linked all the calculates is done.
        element = 0
        qtyofResults = FormatingDataSets.getTotalLineNumbers(
            self.filepathNodesNotLinked)
        fcontentNodesNotLinked = open(self.filepathNodesNotLinked, 'r')
        if os.path.exists(self.filepathResult):
            print "Calculate already done for this file, please delete if you want a new one.", datetime.today(
            )
            self.reading_Max_min_file()
            return

        fcontentCalcResult = open(self.filepathResult, 'w')

        self.minValueCalculated = list(
            99999 for x in self.preparedParameter.featuresChoice)
        self.maxValueCalculated = list(
            0 for x in self.preparedParameter.featuresChoice)

        qtyFeatures = len(self.preparedParameter.featuresChoice)
        qtyNodesCalculated = 0
        partialResults = []
        for lineofFile in fcontentNodesNotLinked:
            element = element + 1
            item = VariableSelection.getItemFromLine(lineofFile)
            qtyothernodenotlinked = len(item[1])
            newelement = 0
            for neighbor_node in item[1]:
                newelement = newelement + 1
                qtyNodesCalculated = qtyNodesCalculated + 1
                self.printProgressofEvents(
                    element, qtyofResults,
                    "Calculating features for nodes not liked: ")
                self.printProgressofEventsWihoutPercent(
                    newelement, qtyothernodenotlinked, "Calculating nodes: " +
                    str(item[0]) + ":" + str(neighbor_node))

                item_result = []
                #executing the calculation for each features chosen at parameter
                for index_features in range(qtyFeatures):
                    self.preparedParameter.featuresChoice[index_features][
                        0].parameter = preparedParameter
                    valueCalculated = self.preparedParameter.featuresChoice[
                        index_features][0].execute(
                            item[0], neighbor_node
                        ) * self.preparedParameter.featuresChoice[
                            index_features][1]
                    if valueCalculated < self.minValueCalculated[
                            index_features]:
                        self.minValueCalculated[
                            index_features] = valueCalculated
                    if valueCalculated > self.maxValueCalculated[
                            index_features]:
                        self.maxValueCalculated[
                            index_features] = valueCalculated

                    item_result.append(valueCalculated)

                lineContent = []
                #generating a vetor with the name of the feature and the result of the calculate
                for indice in range(qtyFeatures):
                    lineContent.append(
                        str({
                            str(self.preparedParameter.featuresChoice[indice]):
                            item_result[indice]
                        }))
                partialResults.append([lineContent, item[0], neighbor_node])

            if element % 10 == 0:
                for item in partialResults:
                    for calc in item[0]:
                        fcontentCalcResult.write(calc + '\t')
                    fcontentCalcResult.write(
                        str(item[1]) + '\t' + str(item[2]) + '\n')
                partialResults = []

        for item in partialResults:
            for calc in item[0]:
                fcontentCalcResult.write(calc + '\t')
            fcontentCalcResult.write(str(item[1]) + '\t' + str(item[2]) + '\n')

        fcontentCalcResult.flush()
        fcontentCalcResult.close()
        fcontentNodesNotLinked.close()
        fcontentMaxMin = open(self.filepathMaxMinCalculated, 'w')
        fcontentMaxMin.write(
            str(qtyNodesCalculated) + '\t' + repr(self.minValueCalculated) +
            '\t' + repr(self.maxValueCalculated))
        fcontentMaxMin.close()
        print "Calculating Nodes not linked finished", datetime.today()
Ejemplo n.º 23
0
 def __init__(self, parameter_file):
     parameterFile = Formating.get_abs_file_path(parameter_file)
     
     AllFeatures = []
     AllFeatures.append(AASFeature())
     AllFeatures.append(CNFeature())
     AllFeatures.append(JCFeature())
     AllFeatures.append(PAFeature())
     AllFeatures.append(TimeScore())
     AllFeatures.append(DomainTimeScore())
     AllFeatures.append(DomainTimeScorevTwo())
     AllFeatures.append(DomainJC())
     AllFeatures.append(WeightTimeScore())
     AllFeatures.append(WeightDomainScore())
     
     
     
     self.FeaturesChoiced = []
     
     with open(parameterFile) as f:
         lines = f.readlines()
         f.close()
     for line in lines:
         line = line.strip()
         line = line.replace('\n','')
         cols = line.split('\t')
         if cols[0] == 'original_file':
             self.original_file = cols[1]
         if cols[0] == 'graph_file':
             self.graph_file = cols[1]
         if cols[0] == 'maxmincalculated_file':
             self.maxmincalculated_file = cols[1]
         if cols[0] == 'trainnig_graph_file':
             self.trainnig_graph_file = cols[1]
         if cols[0] == 'test_graph_file':
             self.test_graph_file = cols[1]
         if cols[0] == 'nodes_notlinked_file':
             self.nodes_notlinked_file = cols[1]
         if cols[0] == 'nodes_file':
             self.nodes_file = cols[1]
         if cols[0] == 'calculated_file':
             self.calculated_file = cols[1]
         if cols[0] == 'ordered_file':
             self.ordered_file = cols[1]
         if cols[0] == 'analysed_file':
             self.analysed_file = cols[1]
         if cols[0] == 'min_edges':
             self.min_edges = int(cols[1])
         if cols[0] == 'lengthVertex':
             self.lengthVertex = int(cols[1])
         if cols[0] == 't0':
             self.t0 = int(cols[1])
         if cols[0] == 't0_':
             self.t0_ = int(cols[1])
         if cols[0] == 't1':
             self.t1 = int(cols[1])
         if cols[0] == 't1_':
             self.t1_ = int(cols[1])
         if cols[0] == 'decay':
             self.decay = float(cols[1])
         if cols[0] == 'keyword_decay':
             self.keyword_decay = float(cols[1])
         if cols[0] == 'features':
             features = cols[1].split(';')
             for feature in features:
                 featureandweight = feature.split(':')
                 self.FeaturesChoiced.append([AllFeatures[int(featureandweight[0])], int(featureandweight[1])])