def step08(paramFile): #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt') util = ParameterUtil(parameter_file=paramFile) myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) myparams.generating_Training_Graph() myparams.generating_Test_Graph() print "Trainning Period:", myparams.t0, " - ", myparams.t0_ print "Test Period:", myparams.t1, " - ", myparams.t1_ print "# Papers in Trainning: ", myparams.get_edges(myparams.trainnigGraph) print "# Authors in Training: ", myparams.get_nodes(myparams.trainnigGraph) print "# Papers in Test: ", myparams.get_edges(myparams.testGraph) print "# Authors in Test", myparams.get_nodes(myparams.testGraph) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) calc.reading_Max_min_file() print "# pair of Authors with at least 3 articles Calculated: ", calc.qtyDataCalculated #FormatingDataSets.getTotalLineNumbers(FormatingDataSets.get_abs_file_path(util.calculated_file)) topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt') print "# pair of Authors with at least 3 articles that is connected in Test Graph in a random way: ", topRank print "Max values found in calculations: ", str(calc.maxValueCalculated) print "Min Values found in calculations: ", str(calc.minValueCalculated) for pathFile in calc.getfilePathOrdered_separeted(): print "File Analised: ", pathFile + '.analised.txt' number_connected = Analyse.getTopRankABSPathFiles(pathFile + '.analised.txt') print "# pair of Authors that is connected in Test Graph: ", number_connected print "%: ", Analyse.getLastInfosofResultsABSPathFiles( pathFile + '.analised.txt', topRank) print "---------------------------------"
def step08(paramFile): #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt') util = ParameterUtil(parameter_file = paramFile) myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) myparams.generating_Training_Graph() myparams.generating_Test_Graph() print "Trainning Period:", myparams.t0, " - ", myparams.t0_ print "Test Period:", myparams.t1, " - ", myparams.t1_ print "# Papers in Trainning: ", myparams.get_edges(myparams.trainnigGraph) print "# Authors in Training: ", myparams.get_nodes(myparams.trainnigGraph) print "# Papers in Test: ", myparams.get_edges(myparams.testGraph) print "# Authors in Test", myparams.get_nodes(myparams.testGraph) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) calc.reading_Max_min_file() print "# pair of Authors with at least 3 articles Calculated: ", calc.qtyDataCalculated #FormatingDataSets.getTotalLineNumbers(FormatingDataSets.get_abs_file_path(util.calculated_file)) topRank = Analyse.getTopRank(util.analysed_file+ '.random.analised.txt') print "# pair of Authors with at least 3 articles that is connected in Test Graph in a random way: ", topRank print "Max values found in calculations: ", str(calc.maxValueCalculated) print "Min Values found in calculations: ", str(calc.minValueCalculated) for pathFile in calc.getfilePathOrdered_separeted(): print "File Analised: ", pathFile + '.analised.txt' number_connected = Analyse.getTopRankABSPathFiles(pathFile + '.analised.txt') print "# pair of Authors that is connected in Test Graph: ", number_connected print "%: ", Analyse.getLastInfosofResultsABSPathFiles(pathFile + '.analised.txt', topRank) print "---------------------------------"
def execution(configFile): #DEFINE THE FILE THAT WILL KEEP THE RESULT DATA resultFile = open(FormatingDataSets.get_abs_file_path(configFile + 'T.EXPERIMENTO_ATUAL_CORE03.txt'), 'w') resultFile.write("Inicio da operacao\n") resultFile.write(str(datetime.datetime.now())) resultFile.write("\n") #READING THE CONFIG FILE util = ParameterUtil(parameter_file = configFile) #CREATING PARAMETRIZATION OBJECT WITH THE INFORMATIONS OF THE CONFIG FILE. myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, linear_combination=util.linear_combination, filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None, result_random_file=util.result_random_file) #GENERATING TRAINNING GRAPH BASED ON CONFIG FILE T0 AND T0_ myparams.generating_Training_Graph() #GENERATING TEST GRAPH BASED ON CONcvb FIG FILE T1 AND T1_ myparams.generating_Test_Graph() nodesSelection = NodeSelection(myparams.trainnigGraph, myparams.testGraph, util) #GET THE AUTHORS THAT PUBLISH AT TRAINNING AND TEST #A NUMBER OF PAPERS DEFINED AT MIN_EDGES IN CONFIG FILE nodes = nodesSelection.get_NowellAuthorsCore() #GET A PAIR OF AUTHORS THAT PUBLISH AT LEAST ONE ARTICLE AT TRAINNING AND TEST. #DID NOT SEE ANY NEED collaborations = nodesSelection.get_NowellColaboration() #GET THE FIRST EDGES MADE BY THE COMBINATION OF NODES IN TRAINNING GRAPH eOld = nodesSelection.get_NowellE(nodes,myparams.trainnigGraph) #GET THE FIRST EDGES MADE BY THE COMBINATION OF NODES IN TEST GRAPH THAT DO NOT HAVE EDGES IN TRAINNING eNew = nodesSelection.get_NowellE2(nodes, eOld, myparams.testGraph) #GET THE NODES NOT LINKED OVER THE COMBINATION NODES. nodesNotLinked = nodesSelection.get_PairsofNodesNotinEold(nodes) #CREATING CALCULATION OBJECT calc = CalculateInMemory(myparams,nodesNotLinked) #CALCULATING THE SCORES. resultsofCalculation = calc.executingCalculate() #ORDERNING THE RESULTS RETURNING THE TOP N orderingResults = calc.ordering(len(eNew), resultsofCalculation) #SAVING THE ORDERED RESULTS. calc.saving_orderedResult(util.ordered_file, orderingResults) #ANALISE THE ORDERED RESULTS AND CHECK THE FUTURE. ScoresResults = Analyse.AnalyseNodesWithScoresInFuture(orderingResults, myparams.testGraph) #SAVING THE RESULTS. for index in range(len(ScoresResults)): Analyse.saving_analyseResult(ScoresResults[index], util.analysed_file + str(myparams.ScoresChoiced[index][0] ) + '.txt') resultFile.write("TOTAL OF SUCESSS USING METRIC " + str(myparams.ScoresChoiced[index][0]) + " = " + str(Analyse.get_TotalSucess(ScoresResults[index]) )) resultFile.write("\n") resultFile.write("\n") resultFile.write("Authors\tArticles\tCollaborations\tAuthors\tEold\tEnew\n") resultFile.write( str(myparams.get_nodes(myparams.trainnigGraph))+ "\t" + str(myparams.get_edges(myparams.trainnigGraph)) + "\t\t" + str(len(collaborations)*2)+ "\t\t" + str(len(nodes)) + "\t" + str(len(eOld))+"\t" + str(len(eNew))) resultFile.write("\n") resultFile.write("Fim da Operacao\n") resultFile.write(str(datetime.datetime.now())) resultFile.close()
def step07(paramFile): #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt') util = ParameterUtil(parameter_file=paramFile) myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) myparams.generating_Test_Graph() topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt') print 'Analising Files with TopRank', str(topRank) for OrderingFilePath in calc.getfilePathOrdered_separeted(): analise = Analyse(myparams, OrderingFilePath, OrderingFilePath + '.analised.txt', topRank)
def step07(paramFile): #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt') util = ParameterUtil(parameter_file = paramFile) myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) myparams.generating_Test_Graph() topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt') print 'Analising Files with TopRank', str(topRank) for OrderingFilePath in calc.getfilePathOrdered_separeted(): analise = Analyse(myparams, OrderingFilePath, OrderingFilePath + '.analised.txt', topRank )
def step06(paramFile): #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt') util = ParameterUtil(parameter_file=paramFile) myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt') calc.Ordering_separating_File(topRank)
def step05(paramFile): #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt') util = ParameterUtil(parameter_file=paramFile) myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) myparams.generating_Test_Graph() analise = Analyse( myparams, FormatingDataSets.get_abs_file_path(util.calculated_file), FormatingDataSets.get_abs_file_path(util.analysed_file) + '.random.analised.txt', calc.qtyDataCalculated)
''' Created on Aug 22, 2015 @author: cptullio Ordering Calculation ''' from parametering.ParameterUtil import ParameterUtil from parametering.Parameterization import Parameterization from calculating.Calculate import Calculate from analysing.Analyse import Analyse if __name__ == '__main__': util = ParameterUtil(parameter_file = 'data/formatado/duarte/nowell_duarte_1994_1999.txt') myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt') calc.Ordering_separating_File(topRank)
if __name__ == '__main__': util = ParameterUtil(parameter_file = 'data/formatado/exemplomenor/config/config.txt') myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None) myparams.generating_Training_Graph() myparams.generating_Test_Graph() selection = VariableSelection(myparams.trainnigGraph, util.min_edges) nodesNotLinked = selection.get_pair_nodes_not_linked() calc = CalculateInMemory(myparams, nodesNotLinked) resultsCalculate = calc.executingCalculate() calc.Separating_calculateFile() analise = Analyse(myparams, FormatingDataSets.get_abs_file_path(util.calculated_file), FormatingDataSets.get_abs_file_path(util.analysed_file) + '.random.analised.txt', calc.qtyDataCalculated) topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt') calc.Ordering_separating_File(topRank) for OrderingFilePath in calc.getfilePathOrdered_separeted(): analise = Analyse(myparams, OrderingFilePath, OrderingFilePath + '.analised.txt', topRank ) print "Trainning Period:", myparams.t0, " - ", myparams.t0_ print "Test Period:", myparams.t1, " - ", myparams.t1_ print "# Papers in Trainning: ", myparams.get_edges(myparams.trainnigGraph) print "# Authors in Training: ", myparams.get_nodes(myparams.trainnigGraph) print "# Papers in Test: ", myparams.get_edges(myparams.testGraph) print "# Authors in Test", myparams.get_nodes(myparams.testGraph) print "# pair of Authors with at least 3 articles Calculated: ", calc.qtyDataCalculated #FormatingDataSets.getTotalLineNumbers(FormatingDataSets.get_abs_file_path(util.calculated_file))
''' Created on Aug 22, 2015 @author: cptullio Generating TopRank ''' from parametering.ParameterUtil import ParameterUtil from parametering.Parameterization import Parameterization from calculating.Calculate import Calculate from analysing.Analyse import Analyse from formating.FormatingDataSets import FormatingDataSets if __name__ == '__main__': util = ParameterUtil( parameter_file='data/formatado/duarte/nowell_duarte_1994_1999.txt') myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) myparams.generating_Test_Graph() analise = Analyse( myparams, FormatingDataSets.get_abs_file_path(util.calculated_file), FormatingDataSets.get_abs_file_path(util.analysed_file) + '.random.analised.txt', calc.qtyDataCalculated)
''' Created on Aug 22, 2015 @author: cptullio Analysing the results ''' from parametering.ParameterUtil import ParameterUtil from parametering.Parameterization import Parameterization from calculating.Calculate import Calculate from analysing.Analyse import Analyse if __name__ == '__main__': util = ParameterUtil(parameter_file = 'data/formatado/duarte/nowell_duarte_1994_1999.txt') myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) myparams.generating_Test_Graph() topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt') print 'Analising Files with TopRank', str(topRank) for OrderingFilePath in calc.getfilePathOrdered_separeted(): analise = Analyse(myparams, OrderingFilePath, OrderingFilePath + '.analised.txt', topRank )
if __name__ == '__main__': util = ParameterUtil(parameter_file = 'data/formatado/arxiv/exemplomenor/config/configuration_weights.txt') myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None) myparams.generating_Training_Graph() myparams.generating_Test_Graph() print "Trainning Period:", myparams.t0, " - ", myparams.t0_ print "Test Period:", myparams.t1, " - ", myparams.t1_ print "# Papers in Trainning: ", myparams.get_edges(myparams.trainnigGraph) print "# Authors in Training: ", myparams.get_nodes(myparams.trainnigGraph) print "# Papers in Test: ", myparams.get_edges(myparams.testGraph) print "# Authors in Test", myparams.get_nodes(myparams.testGraph) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) calc.reading_Max_min_file() print "# pair of Authors with at least 3 articles Calculated: ", calc.qtyDataCalculated #FormatingDataSets.getTotalLineNumbers(FormatingDataSets.get_abs_file_path(util.calculated_file)) topRank = Analyse.getTopRank(util.analysed_file+ '.random.analised.txt') print "# pair of Authors with at least 3 articles that is connected in Test Graph in a random way: ", topRank print "Max values found in calculations: ", str(calc.maxValueCalculated) print "Min Values found in calculations: ", str(calc.minValueCalculated) for pathFile in calc.getfilePathOrdered_separeted(): print "File Analised: ", pathFile + '.analised.txt' number_connected = Analyse.getTopRankABSPathFiles(pathFile + '.analised.txt') print "# pair of Authors that is connected in Test Graph: ", number_connected print "%: ", Analyse.getLastInfosofResultsABSPathFiles(pathFile + '.analised.txt', topRank) print "---------------------------------"
''' Created on Aug 22, 2015 @author: cptullio Ordering Calculation ''' from parametering.ParameterUtil import ParameterUtil from parametering.Parameterization import Parameterization from calculating.Calculate import Calculate from analysing.Analyse import Analyse if __name__ == '__main__': util = ParameterUtil( parameter_file='data/formatado/duarte/nowell_duarte_1994_1999.txt') myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt') calc.Ordering_separating_File(topRank)
''' Created on Aug 22, 2015 @author: cptullio Generating TopRank ''' from parametering.ParameterUtil import ParameterUtil from parametering.Parameterization import Parameterization from calculating.Calculate import Calculate from analysing.Analyse import Analyse from formating.FormatingDataSets import FormatingDataSets if __name__ == '__main__': util = ParameterUtil(parameter_file = 'data/formatado/duarte/1994_1999/config/configuration.txt') myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) myparams.generating_Test_Graph() analise = Analyse(myparams, FormatingDataSets.get_abs_file_path(util.calculated_file), FormatingDataSets.get_abs_file_path(util.analysed_file) + '.random.analised.txt', calc.qtyDataCalculated) topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt') calc.Ordering_separating_File(topRank) print 'Analising Files with TopRank', str(topRank) for OrderingFilePath in calc.getfilePathOrdered_separeted(): analise = Analyse(myparams, OrderingFilePath, OrderingFilePath + '.analised.txt', topRank )
''' Created on Aug 22, 2015 @author: cptullio Generating TopRank ''' from parametering.ParameterUtil import ParameterUtil from parametering.Parameterization import Parameterization from calculating.Calculate import Calculate from analysing.Analyse import Analyse from formating.FormatingDataSets import FormatingDataSets from calculating.VariableSelection import VariableSelection if __name__ == '__main__': util = ParameterUtil(parameter_file = 'data/formatado/arxiv/exemplomenor/config/config.txt') myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, linear_combination=util.linear_combination, filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None, result_random_file=util.result_random_file) myparams.generating_Training_Graph() myparams.generating_Test_Graph() selection = VariableSelection(myparams.trainnigGraph) nodesNotLinked = selection.readingResultsFile(util.nodes_notlinked_file) resultsRank = Analyse.AnalyseNodesInFuture(nodesNotLinked, myparams.testGraph) Analyse.saving_analyseResult(resultsRank, util.result_random_file) print resultsRank print Analyse.reading_analyseResult(util.result_random_file)
from calculating.VariableSelection import VariableSelection if __name__ == '__main__': util = ParameterUtil(parameter_file = 'data/formatado/arxiv/exemplomenor/config/config.txt') myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, linear_combination=util.linear_combination, filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None, result_random_file=util.result_random_file) myparams.generating_Training_Graph() myparams.generating_Test_Graph() selecao = VariableSelection(myparams.trainnigGraph) nodesNotLinked = selecao.readingResultsFile(util.nodes_notlinked_file) calc = CalculateInMemory(myparams, myparams.trainnigGraph) resultsNormalized = calc.reading_calculateResult_normalized(util.calculated_file) AnalyseNodesnotLinkedInFuture = Analyse.reading_analyseResult(util.result_random_file) topRank = Analyse.get_topRank(AnalyseNodesnotLinkedInFuture) orderResult = [] if myparams.linear_combination: resultCombination = calc.combinate_linear(resultsNormalized) orderResult = calc.ordering_combinate_linear(topRank, resultCombination) else: orderResult = calc.ordering(topRank, resultsNormalized) FinalResult = [] for featureOrderResult in orderResult: final = Analyse.AnalyseNodesInFuture(featureOrderResult, myparams.testGraph) FinalResult.append(final) Analyse.saving_analyseResult(FinalResult, util.analysed_file)