예제 #1
0
def step08(paramFile):
    #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt')
    util = ParameterUtil(parameter_file = paramFile)

    myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay)
    myparams.generating_Training_Graph()
    myparams.generating_Test_Graph()
    print "Trainning Period:", myparams.t0, " - ", myparams.t0_
    print "Test Period:", myparams.t1, " - ", myparams.t1_
    
    print "# Papers in Trainning: ",  myparams.get_edges(myparams.trainnigGraph)
    print "# Authors in Training: ", myparams.get_nodes(myparams.trainnigGraph)
    print "# Papers in Test: ",  myparams.get_edges(myparams.testGraph)
    print "# Authors in Test", myparams.get_nodes(myparams.testGraph)
    
    calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file)
    calc.reading_Max_min_file()
    print "# pair of Authors with at least 3 articles Calculated: ", calc.qtyDataCalculated  #FormatingDataSets.getTotalLineNumbers(FormatingDataSets.get_abs_file_path(util.calculated_file))
    topRank = Analyse.getTopRank(util.analysed_file+ '.random.analised.txt')
    print "# pair of Authors with at least 3 articles that is connected in Test Graph in a random way: ", topRank
    print "Max values found in calculations: ", str(calc.maxValueCalculated)
    print "Min Values found in calculations: ", str(calc.minValueCalculated)
    for pathFile in calc.getfilePathOrdered_separeted():
        print "File Analised: ", pathFile +  '.analised.txt'
        number_connected =  Analyse.getTopRankABSPathFiles(pathFile + '.analised.txt')
        print "# pair of Authors that is connected in Test Graph: ", number_connected
        print "%: ", Analyse.getLastInfosofResultsABSPathFiles(pathFile + '.analised.txt', topRank)
        print "---------------------------------"
예제 #2
0
def step08(paramFile):
    #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt')
    util = ParameterUtil(parameter_file=paramFile)

    myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0,
                                util.t0_, util.t1, util.t1_,
                                util.FeaturesChoiced, util.graph_file,
                                util.trainnig_graph_file, util.test_graph_file,
                                util.decay)
    myparams.generating_Training_Graph()
    myparams.generating_Test_Graph()
    print "Trainning Period:", myparams.t0, " - ", myparams.t0_
    print "Test Period:", myparams.t1, " - ", myparams.t1_

    print "# Papers in Trainning: ", myparams.get_edges(myparams.trainnigGraph)
    print "# Authors in Training: ", myparams.get_nodes(myparams.trainnigGraph)
    print "# Papers in Test: ", myparams.get_edges(myparams.testGraph)
    print "# Authors in Test", myparams.get_nodes(myparams.testGraph)

    calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file,
                     util.ordered_file, util.maxmincalculated_file)
    calc.reading_Max_min_file()
    print "# pair of Authors with at least 3 articles Calculated: ", calc.qtyDataCalculated  #FormatingDataSets.getTotalLineNumbers(FormatingDataSets.get_abs_file_path(util.calculated_file))
    topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt')
    print "# pair of Authors with at least 3 articles that is connected in Test Graph in a random way: ", topRank
    print "Max values found in calculations: ", str(calc.maxValueCalculated)
    print "Min Values found in calculations: ", str(calc.minValueCalculated)
    for pathFile in calc.getfilePathOrdered_separeted():
        print "File Analised: ", pathFile + '.analised.txt'
        number_connected = Analyse.getTopRankABSPathFiles(pathFile +
                                                          '.analised.txt')
        print "# pair of Authors that is connected in Test Graph: ", number_connected
        print "%: ", Analyse.getLastInfosofResultsABSPathFiles(
            pathFile + '.analised.txt', topRank)
        print "---------------------------------"
예제 #3
0
def step04(paramFile):
    #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt')
    util = ParameterUtil(parameter_file = paramFile)

    myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay)
    myparams.generating_Training_Graph()
 
    calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file)
    calc.Separating_calculateFile()
예제 #4
0
def step07(paramFile):
    #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt')
    util = ParameterUtil(parameter_file = paramFile)

    myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay)
    calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file)
    myparams.generating_Test_Graph()
    topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt')
    print 'Analising Files with TopRank', str(topRank)
    for OrderingFilePath in calc.getfilePathOrdered_separeted():
        analise = Analyse(myparams, OrderingFilePath, OrderingFilePath + '.analised.txt', topRank )
예제 #5
0
def step06(paramFile):
    #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt')
    util = ParameterUtil(parameter_file=paramFile)

    myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0,
                                util.t0_, util.t1, util.t1_,
                                util.FeaturesChoiced, util.graph_file,
                                util.trainnig_graph_file, util.test_graph_file,
                                util.decay)
    calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file,
                     util.ordered_file, util.maxmincalculated_file)
    topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt')
    calc.Ordering_separating_File(topRank)
예제 #6
0
def step07(paramFile):
    #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt')
    util = ParameterUtil(parameter_file=paramFile)

    myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0,
                                util.t0_, util.t1, util.t1_,
                                util.FeaturesChoiced, util.graph_file,
                                util.trainnig_graph_file, util.test_graph_file,
                                util.decay)
    calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file,
                     util.ordered_file, util.maxmincalculated_file)
    myparams.generating_Test_Graph()
    topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt')
    print 'Analising Files with TopRank', str(topRank)
    for OrderingFilePath in calc.getfilePathOrdered_separeted():
        analise = Analyse(myparams, OrderingFilePath,
                          OrderingFilePath + '.analised.txt', topRank)
예제 #7
0
def step05(paramFile):
    #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt')
    util = ParameterUtil(parameter_file=paramFile)

    myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0,
                                util.t0_, util.t1, util.t1_,
                                util.FeaturesChoiced, util.graph_file,
                                util.trainnig_graph_file, util.test_graph_file,
                                util.decay)
    calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file,
                     util.ordered_file, util.maxmincalculated_file)
    myparams.generating_Test_Graph()
    analise = Analyse(
        myparams, FormatingDataSets.get_abs_file_path(util.calculated_file),
        FormatingDataSets.get_abs_file_path(util.analysed_file) +
        '.random.analised.txt', calc.qtyDataCalculated)
'''
Created on Aug 22, 2015

@author: cptullio
Analysing the results
'''
from parametering.ParameterUtil import ParameterUtil
from parametering.Parameterization import Parameterization
from calculating.Calculate import Calculate
from analysing.Analyse import Analyse
from calculating.VariableSelection import VariableSelection
from formating.FormatingDataSets import FormatingDataSets
import networkx

if __name__ == '__main__':
    util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt')
    myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay)
    myparams.generating_Training_Graph()
    selection = VariableSelection(myparams.trainnigGraph, util.nodes_notlinked_file,util.min_edges, True)
    calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file)
    wg = calc.adding_normalize_values_tograph(myparams.trainnigGraph)
    networkx.write_graphml(wg, FormatingDataSets.get_abs_file_path(util.trainnig_graph_file + '.weighted.txt'))
    node993 =set(n for n,d in wg.edges(data=True) if n == 993 and d == 994)
    print node993
    
    
from analysing.Analyse import Analyse
from calculating.VariableSelection import VariableSelection
from formating.FormatingDataSets import FormatingDataSets
import networkx
import mysql.connector

if __name__ == '__main__':
    util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_astroph_1994_1999.txt')
    myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay)
    myparams.generating_Training_Graph()
    AllNodes = VariableSelection(myparams.trainnigGraph, util.nodes_file,util.min_edges, True)
    calc = Calculate(myparams, util.nodes_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file)
    print 'armazenando resultados'
    cnx = mysql.connector.connect(user='******', password='******',
                              host='127.0.0.1',
                              database='calculos')
    add_result = ("INSERT INTO resultadopesos "
               "(no1, no2, resultados) "
               "VALUES (%s, %s, %s)")
    cursor = cnx.cursor()
    calculatedFile = open(FormatingDataSets.get_abs_file_path(util.calculated_file), 'r')
    for linha in calculatedFile:
        dado = Calculate.reading_calculateLine(linha)
        data_result = (dado[1], dado[2].replace('\n',''),str(dado[0]))
        cursor.execute(add_result, data_result)
    calculatedFile.close()
    cnx.commit()
    cursor.close()
    cnx.close()
    
    
예제 #10
0
'''
Created on Aug 22, 2015

@author: cptullio
Generating TopRank
'''
from parametering.ParameterUtil import ParameterUtil
from parametering.Parameterization import Parameterization
from calculating.Calculate import Calculate
from analysing.Analyse import Analyse
from formating.FormatingDataSets import FormatingDataSets

if __name__ == '__main__':

    util = ParameterUtil(
        parameter_file='data/formatado/duarte/nowell_duarte_1994_1999.txt')
    myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0,
                                util.t0_, util.t1, util.t1_,
                                util.FeaturesChoiced, util.graph_file,
                                util.trainnig_graph_file, util.test_graph_file,
                                util.decay)
    calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file,
                     util.ordered_file, util.maxmincalculated_file)
    myparams.generating_Test_Graph()
    analise = Analyse(
        myparams, FormatingDataSets.get_abs_file_path(util.calculated_file),
        FormatingDataSets.get_abs_file_path(util.analysed_file) +
        '.random.analised.txt', calc.qtyDataCalculated)
예제 #11
0
'''
Created on Aug 22, 2015

@author: cptullio
Analysing the results
'''
from parametering.ParameterUtil import ParameterUtil
from parametering.Parameterization import Parameterization
from calculating.Calculate import Calculate
from analysing.Analyse import Analyse

if __name__ == '__main__':
    util = ParameterUtil(parameter_file = 'data/formatado/duarte/nowell_duarte_1994_1999.txt')
    myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay)
    calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file)
    myparams.generating_Test_Graph()
    topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt')
    print 'Analising Files with TopRank', str(topRank)
    for OrderingFilePath in calc.getfilePathOrdered_separeted():
        analise = Analyse(myparams, OrderingFilePath, OrderingFilePath + '.analised.txt', topRank )
        filePathGraph=util.graph_file,
        filePathTrainingGraph=util.trainnig_graph_file,
        filePathTestGraph=util.test_graph_file,
        decay=util.decay,
        domain_decay=util.domain_decay,
        min_edges=util.min_edges,
        scoreChoiced=util.ScoresChoiced,
        weightsChoiced=util.WeightsChoiced,
        weightedScoresChoiced=util.WeightedScoresChoiced,
        FullGraph=None)

    myparams.generating_Training_Graph()
    myparams.generating_Test_Graph()
    selection = VariableSelection(myparams.trainnigGraph,
                                  util.nodes_notlinked_file, util.min_edges)
    calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file,
                     util.ordered_file, util.maxmincalculated_file)
    calc.Separating_calculateFile()
    analise = Analyse(
        myparams, FormatingDataSets.get_abs_file_path(util.calculated_file),
        FormatingDataSets.get_abs_file_path(util.analysed_file) +
        '.random.analised.txt', calc.qtyDataCalculated)
    topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt')
    calc.Ordering_separating_File(topRank)
    for OrderingFilePath in calc.getfilePathOrdered_separeted():
        analise = Analyse(myparams, OrderingFilePath,
                          OrderingFilePath + '.analised.txt', topRank)

    print "Trainning Period:", myparams.t0, " - ", myparams.t0_
    print "Test Period:", myparams.t1, " - ", myparams.t1_

    print "# Papers in Trainning: ", myparams.get_edges(myparams.trainnigGraph)
예제 #13
0
            result = line
            break
        elif textov2 in line:
            result = line
            break
    calculatedFile.seek(0)
    return result


if __name__ == '__main__':
    util = ParameterUtil(
        parameter_file='data/formatado/arxiv/nowell_astroph_1994_1999.txt')
    calculatedFile = open(
        FormatingDataSets.get_abs_file_path(util.calculated_file), 'r')
    for linha in calculatedFile:
        x.append(Calculate.reading_calculateLine(linha))
    calculatedFile.close()
    myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0,
                                util.t0_, util.t1, util.t1_,
                                util.FeaturesChoiced, util.graph_file,
                                util.trainnig_graph_file, util.test_graph_file,
                                util.decay)
    myparams.generating_Training_Graph()
    Nodes_notLinked = VariableSelection(myparams.trainnigGraph,
                                        util.nodes_notlinked_file,
                                        util.min_edges)
    nodes_notlinkedFile = open(
        FormatingDataSets.get_abs_file_path(util.nodes_notlinked_file), 'r')
    qtyLine = 0
    qtyCalculated = 0
    f = open(
예제 #14
0
if __name__ == '__main__':
   
    util = ParameterUtil(parameter_file = 'data/formatado/arxiv/exemplomenor/config/configuration_weights.txt')
    myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, 
                                filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None)

 
   
    myparams.generating_Training_Graph()
    myparams.generating_Test_Graph()
    print "Trainning Period:", myparams.t0, " - ", myparams.t0_
    print "Test Period:", myparams.t1, " - ", myparams.t1_
    
    print "# Papers in Trainning: ",  myparams.get_edges(myparams.trainnigGraph)
    print "# Authors in Training: ", myparams.get_nodes(myparams.trainnigGraph)
    print "# Papers in Test: ",  myparams.get_edges(myparams.testGraph)
    print "# Authors in Test", myparams.get_nodes(myparams.testGraph)
    
    calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file)
    calc.reading_Max_min_file()
    print "# pair of Authors with at least 3 articles Calculated: ", calc.qtyDataCalculated  #FormatingDataSets.getTotalLineNumbers(FormatingDataSets.get_abs_file_path(util.calculated_file))
    topRank = Analyse.getTopRank(util.analysed_file+ '.random.analised.txt')
    print "# pair of Authors with at least 3 articles that is connected in Test Graph in a random way: ", topRank
    print "Max values found in calculations: ", str(calc.maxValueCalculated)
    print "Min Values found in calculations: ", str(calc.minValueCalculated)
    for pathFile in calc.getfilePathOrdered_separeted():
        print "File Analised: ", pathFile +  '.analised.txt'
        number_connected =  Analyse.getTopRankABSPathFiles(pathFile + '.analised.txt')
        print "# pair of Authors that is connected in Test Graph: ", number_connected
        print "%: ", Analyse.getLastInfosofResultsABSPathFiles(pathFile + '.analised.txt', topRank)
        print "---------------------------------"
예제 #15
0
'''
Created on Aug 22, 2015

@author: cptullio
Ordering Calculation
'''
from parametering.ParameterUtil import ParameterUtil
from parametering.Parameterization import Parameterization
from calculating.Calculate import Calculate
from analysing.Analyse import Analyse

if __name__ == '__main__':
    util = ParameterUtil(
        parameter_file='data/formatado/duarte/nowell_duarte_1994_1999.txt')
    myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0,
                                util.t0_, util.t1, util.t1_,
                                util.FeaturesChoiced, util.graph_file,
                                util.trainnig_graph_file, util.test_graph_file,
                                util.decay)
    calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file,
                     util.ordered_file, util.maxmincalculated_file)
    topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt')
    calc.Ordering_separating_File(topRank)
예제 #16
0
from formating.FormatingDataSets import FormatingDataSets
import networkx
import mysql.connector

if __name__ == '__main__':
    util = ParameterUtil(
        parameter_file='data/formatado/arxiv/nowell_astroph_1994_1999.txt')
    myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0,
                                util.t0_, util.t1, util.t1_,
                                util.FeaturesChoiced, util.graph_file,
                                util.trainnig_graph_file, util.test_graph_file,
                                util.decay)
    myparams.generating_Training_Graph()
    AllNodes = VariableSelection(myparams.trainnigGraph, util.nodes_file,
                                 util.min_edges, True)
    calc = Calculate(myparams, util.nodes_file, util.calculated_file,
                     util.ordered_file, util.maxmincalculated_file)
    print 'armazenando resultados'
    cnx = mysql.connector.connect(user='******',
                                  password='******',
                                  host='127.0.0.1',
                                  database='calculos')
    add_result = ("INSERT INTO resultadopesos "
                  "(no1, no2, resultados) "
                  "VALUES (%s, %s, %s)")
    cursor = cnx.cursor()
    calculatedFile = open(
        FormatingDataSets.get_abs_file_path(util.calculated_file), 'r')
    for linha in calculatedFile:
        dado = Calculate.reading_calculateLine(linha)
        data_result = (dado[1], dado[2].replace('\n', ''), str(dado[0]))
        cursor.execute(add_result, data_result)
예제 #17
0
from matplotlib import pyplot

if __name__ == '__main__':
    #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/exemplomenor/config/configuration_weights.txt')
    util = ParameterUtil(
        parameter_file=
        'data/formatado/duarte/1994_1999/config/configuration.txt')
    myparams = Parameterization(
        t0=util.t0,
        t0_=util.t0_,
        t1=util.t1,
        t1_=util.t1_,
        filePathGraph=util.graph_file,
        filePathTrainingGraph=util.trainnig_graph_file,
        filePathTestGraph=util.test_graph_file,
        decay=util.decay,
        domain_decay=util.domain_decay,
        min_edges=util.min_edges,
        scoreChoiced=util.ScoresChoiced,
        weightsChoiced=util.WeightsChoiced,
        weightedScoresChoiced=util.WeightedScoresChoiced,
        FullGraph=None)

    myparams.generating_Training_Graph()

    calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file,
                     util.ordered_file, util.maxmincalculated_file)
    calc.Separating_calculateFile()
    #networkx.networkx.draw_networkx(myparams.trainnigGraph)  # networkx draw()
    #pyplot.draw()  # pyplot draw()
    #pyplot.show()
    for line in calculatedFile:
        if texto in line:
            result = line
            break
        elif textov2 in line:
            result = line
            break
    calculatedFile.seek(0)
    return result
    

if __name__ == '__main__':
    util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_astroph_1994_1999.txt')
    calculatedFile = open(FormatingDataSets.get_abs_file_path(util.calculated_file), 'r')
    for linha in calculatedFile:
        x.append(Calculate.reading_calculateLine(linha))
    calculatedFile.close()
    myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay)
    myparams.generating_Training_Graph()
    Nodes_notLinked = VariableSelection(myparams.trainnigGraph, util.nodes_notlinked_file,util.min_edges)
    nodes_notlinkedFile = open(FormatingDataSets.get_abs_file_path(util.nodes_notlinked_file), 'r')
    qtyLine = 0
    qtyCalculated = 0
    f = open(FormatingDataSets.get_abs_file_path(util.calculated_file )+ '.weight.txt', 'w')
    minValueCalculated = list(99999 for x in myparams.featuresChoice)
    maxValueCalculated = list(0 for x in myparams.featuresChoice)
    qtyFeatures = len(myparams.featuresChoice)
    for line in nodes_notlinkedFile:
        qtyLine = qtyLine + 1
        item = VariableSelection.getItemFromLine(line)