def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature,
                         compteEurostat):
    startIndice = 0
    dicIndicator = {}
    dicNoCountry = {}
    dicNaceCheck = {}
    dicNace = {}
    dicNation = {}
    indicatorSpi = indicatorInput
    indicatorEurostat = indicatorInputEurostat
    Unit = G_Unit
    dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat)
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicStartValue = dict(startIndice=1,
                         startCountry='',
                         startNace='',
                         startIndicator='',
                         startValeur=0,
                         startYear=1900)
    minStartYear = 99999
    maxEndYear = -1
    fichiersTXT.sort()
    fichiersTXT.reverse()
    for txt in fichiersTXT:
        fichierTXT = open(txt, 'r')
        rec1er = fichierTXT.readline()  #1er rec avec les meta
        lstrec = rec1er.split(',')
        #on selectionne les colonne de l'input d'Eurostat
        dicEurostat = spiLib.defDicEurostat(lstrec)
        iUnit = dicEurostat['unit']
        iNace = dicEurostat['nace']
        iIndic = dicEurostat['indic']
        iSize = dicEurostat['size']
        iSector = dicEurostat['sector']
        iGeoTime = dicEurostat['geotime']
        geotime = lstrec[iGeoTime].split('\t')
        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        for ligneTXT in fichierTXT:
            ligne = ligneTXT.strip('\n')  #RAPPEL strip enleve des extremites
            ligne = ligne.split(',')
            nace = ligne[iNace].strip()
            indicator = ligne[iIndic].strip()
            unit = ligne[iUnit].strip()
            geoTime = ligne[iGeoTime].split('\t')
            geoEuroStat = geoTime[0].strip()
            # la colonne sector existait pour le nama_nace64_e.tsv
            # il a disparu dans le esa2010 mais on garde le test
            if iSector == -1:
                sector = 'S1'
            else:
                sector = ligne[iSector].strip()
            try:
                country = dicNation[geoEuroStat]
                timeSerie = geoTime[1:]
                if indicator == indicatorEurostat and (unit == Unit) and (
                        sector == 'S1') and dicNace.has_key(nace):
                    dicNaceCheck[
                        nace] = nace  #on remplit le dic pour faire un check si autant de nace que dans la table SPI
                    vector = spiLib.defVectorYears(
                        timeSerie, startYear,
                        endYear)  #traitement de la serie Eurostat
                    dicStartValue = spiLib.defDicStartValue(
                        timeSerie, country, nace, indicator, dicStartValue,
                        endYear)
                    dicIndicator = spiLib.defDicIndicator(
                        country, nace, vector, dicIndicator)
                    minStartYear, maxEndYear = spiLib.defMinMaxYear(
                        startYear, minStartYear, endYear, maxEndYear)
            except:
                dicNoCountry[geoEuroStat] = geoEuroStat
    #retour avec l'annee de la  1er valeur existante dans les vecteurs
    #different de la 1er annee ou vecteur le plus long car la valeur peut etre ':'
    fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\
    ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\
    ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n')
    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)
    #test des annee de debut, soit par valeur reelle ou par vecteur la valeur peut etre ':'
    if minStartYear != dicStartValue['startYear']:
        fileLog.write('annee min. pour les vecteurs =' + str(minStartYear) +
                      ' annee min. avec une valeur =' +
                      str(dicStartValue['startYear']) + '\n')
        minStartYear = dicStartValue['startYear']
    #traitement indicator
    dicIndicator = spiLib.reverseAndNormalizeDicIndicator(
        dicIndicator, minStartYear, maxEndYear)
    spiLibCreateTable.createTable(nomenclature, dicIndicator, fileLog,
                                  minStartYear, dicNace, indicatorSpi,
                                  compteEurostat, G_tableName, G_FileExt)
    spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog,
                                    indicatorSpi, compteEurostat, G_tableName)
Example #2
0
def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature,
                         compteEurostat):
    startIndice = 0
    dicIndicator = {}
    dicIndicatorTotal = {}
    dicNoCountry = {}
    dicNaceCheck = {}
    dicNace = {}
    dicAgregatNace = {}
    dicNation = {}
    indicatorSpi = indicatorInput
    indicatorSpiTotal = G_IndicatorSPI_T
    indicatorEurostat = indicatorInputEurostat
    dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat)
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicStartValue = dict(startIndice=1,
                         startCountry='',
                         startNace='',
                         startIndicator='',
                         startValeur=0,
                         startYear=1900)
    minStartYear = 99999
    maxEndYear = -1
    for txt in fichiersTXT:
        fichierTXT = open(txt, 'r')
        rec1er = fichierTXT.readline()  #1er rec avec les meta
        lstrec = rec1er.split(',')
        #on selectionne les colonne de l'input d'Eurostat
        dicEurostat = spiLib.defDicEurostat(lstrec)
        iUnit = dicEurostat['unit']
        iNace = dicEurostat['nace']
        iIndic = dicEurostat['indic']
        iSize = dicEurostat['size']
        iGeoTime = dicEurostat['geotime']
        geotime = lstrec[iGeoTime].split('\t')
        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        #nace_r1,indic_sb,size_emp,geo\time
        #E,V11110,TOTAL,ES	9037 	6410 	3544 	3311 	3336 	3084
        for ligneTXT in fichierTXT:
            ligne = ligneTXT.strip('\n')  #RAPPEL strip enleve des extremites
            ligne = ligne.split(',')
            nace = ligne[iNace].strip()
            indicator = ligne[iIndic].strip()
            geoTime = ligne[iGeoTime].split('\t')
            geoEuroStat = geoTime[0].strip()
            try:
                country = dicNation[geoEuroStat]
                timeSerie = geoTime[1:]
                if (indicator == indicatorEurostat ) and\
                 ((nomenclature == 'nace1' and dicNace.has_key(nace)) or \
                 (nomenclature == 'nace2' and len(nace) < 4)):
                    dicNaceCheck[
                        nace] = nace  #on remplit le dic pour faire un check si autant de nace que dans la table SPI
                    vector = spiLib.defVectorYears(
                        timeSerie, startYear,
                        endYear)  #traitement de la serie Eurostat
                    dicStartValue = spiLib.defDicStartValue(
                        timeSerie, country, nace, indicator, dicStartValue,
                        endYear)
                    dicIndicator = spiLib.defDicIndicator(
                        country, nace, vector, dicIndicator)
                    minStartYear, maxEndYear = spiLib.defMinMaxYear(
                        startYear, minStartYear, endYear, maxEndYear)
            except:
                dicNoCountry[geoEuroStat] = geoEuroStat

    fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\
    ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\
    ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n')
    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)
    minStartYear = dicStartValue['startYear']
    dicIndicator = spiLib.reverseAndNormalizeDicIndicator(
        dicIndicator, minStartYear, maxEndYear)
    #creation indicateur SPI par ex vabus avec en retour le dic des agregats
    dicAgregatNace = spiLibCreateTable.createTable(nomenclature, dicIndicator,
                                                   fileLog, minStartYear,
                                                   dicNace, indicatorSpi,
                                                   compteEurostat, G_tableName)
    if indicatorSpiTotal != 'noTotal':
        #creation indicateur total SPI par ex vabussh
        spiLibCreateTable.createTableTotal(nomenclature, dicAgregatNace,
                                           dicIndicator, minStartYear, fileLog,
                                           dicNace, indicatorSpiTotal,
                                           compteEurostat, G_tableName)
        spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear,
                                        fileLog, indicatorSpiTotal,
                                        compteEurostat, G_tableName)
    spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog,
                                    indicatorSpi, compteEurostat, G_tableName)
def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature,
                         compteEurostat):
    startIndice = 0
    dicIndicator = {}
    dicNoCountry = {}
    dicNaceCheck = {}
    dicNace = {}
    dicNation = {}
    indicatorSpi = indicatorInput
    indicatorEurostat = indicatorInputEurostat
    Unit = G_Unit
    dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat)
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicStartValue = dict(startIndice=1,
                         startCountry='',
                         startNace='',
                         startIndicator='',
                         startValeur=0,
                         startYear=1900)
    minStartYear = 99999
    maxEndYear = -1
    fichiersTXT.sort()
    fichiersTXT.reverse()
    for txt in fichiersTXT:
        fichierTXT = open(txt, 'r')
        rec1er = fichierTXT.readline()  #1er rec avec les meta
        lstrec = rec1er.split(',')
        #recherche de la position de chaque variable dans input eurostat
        #on selectionne les colonne de l'input d'Eurostat
        dicEurostat = spiLib.defDicEurostat(lstrec)
        iUnit = dicEurostat['unit']
        iNace = dicEurostat['nace']
        iIndic = dicEurostat['indic']
        iSector = dicEurostat['sector']
        iSize = dicEurostat['size']
        iGeoTime = dicEurostat['geotime']
        geotime = lstrec[iGeoTime].split('\t')
        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        #nace 1 : unit,sector,nace_r1,indic_na,geo\time
        #nace 2 : unit,nace_r2,indic_na,sector,geo\time
        #MIO_EUR,A,B1G,AT	: 	: 	: 	3781.7 	4375.5 	4322.6 	3827.4 	3542.7 	3921.8 	3763.1
        for ligneTXT in fichierTXT:
            ligne = ligneTXT.strip('\n')  #RAPPEL strip enleve des extremites
            ligne = ligne.split(',')
            nace = ligne[iNace].strip()
            indicator = ligne[iIndic].strip()
            unit = ligne[iUnit].strip()
            geoTime = ligne[iGeoTime].split('\t')
            geoEuroStat = geoTime[0].strip()
            if iSector == -1:
                sector = 'S1'
            else:
                sector = ligne[iSector].strip()
            try:
                country = dicNation[geoEuroStat]
                timeSerie = geoTime[1:]
                #indicateur pour savoir si on doit selectionner les indicateurs
                #dans la liste definie dans la table oracle ou uniquement sur la longueur (4)
                #la regle est si nace1 alors on prend la liste
                #si nace2 et nama on prend la liste sur sbs on teste sur la longueur max 4
                if indicator == indicatorEurostat and (unit == Unit) and (
                        sector == 'S1') and dicNace.has_key(nace):
                    dicNaceCheck[
                        nace] = nace  #on remplit le dic pour faire un check si autant de nace que dans la table SPI
                    vector = spiLib.defVectorYears(
                        timeSerie, startYear,
                        endYear)  #traitement de la serie Eurostat
                    dicStartValue = spiLib.defDicStartValue(
                        timeSerie, country, nace, indicator, dicStartValue,
                        endYear)
                    dicIndicator = spiLib.defDicIndicator(
                        country, nace, vector, dicIndicator)
                    minStartYear, maxEndYear = spiLib.defMinMaxYear(
                        startYear, minStartYear, endYear, maxEndYear)
            except:
                dicNoCountry[geoEuroStat] = geoEuroStat
    fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\
    ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\
    ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n')
    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)
    minStartYear = dicStartValue['startYear']
    dicIndicator = spiLib.reverseAndNormalizeDicIndicator(
        dicIndicator, minStartYear, maxEndYear)
    spiLibCreateTable.createTableGrowth(nomenclature, dicIndicator, fileLog,
                                        minStartYear, dicNace, indicatorSpi,
                                        compteEurostat, G_tableName, G_Growth,
                                        G_FileExt)
    spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog,
                                    indicatorSpi, compteEurostat, G_tableName)
Example #4
0
def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature,
                         compteEurostat):
    startIndice = 0
    dicIndicator = {}
    dicIndicatorTotal = {}
    dicNoCountry = {}
    dicNaceCheck = {}
    dicNace = {}
    dicNation = {}
    dicSize = {}
    indicatorSpi = indicatorInput
    indicatorEurostat = indicatorInputEurostat
    dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat)
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicStartValue = dict(startIndice=1,
                         startCountry='',
                         startNace='',
                         startIndicator='',
                         startValeur=0,
                         startYear=1900)
    minStartYear = 99999
    maxEndYear = -1

    for txt in fichiersTXT:
        fichierTXT = open(txt, 'r')
        rec1er = fichierTXT.readline()  #1er rec avec les meta
        lstrec = rec1er.split(',')
        #on selectionne les colonne de l'input d'Eurostat
        dicEurostat = spiLib.defDicEurostat(lstrec)
        iUnit = dicEurostat['unit']
        iNace = dicEurostat['nace']
        iIndic = dicEurostat['indic']
        iSize = dicEurostat['size']
        iGeoTime = dicEurostat['geotime']
        geotime = lstrec[iGeoTime].split('\t')
        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        #nace_r1,indic_sb,size_emp,geo\time
        #E,V11110,TOTAL,ES	9037 	6410 	3544 	3311 	3336 	3084
        for ligneTXT in fichierTXT:
            ligne = ligneTXT.strip('\n')  #RAPPEL strip enleve des extremites
            ligne = ligne.split(',')
            nace = ligne[iNace].strip()
            indicator = ligne[iIndic].strip()
            size = ligne[iSize].strip()
            geoTime = ligne[iGeoTime].split('\t')
            geoEuroStat = geoTime[0].strip()
            try:
                country = dicNation[geoEuroStat]
                timeSerie = geoTime[1:]
                #print indicator,indicatorEurostat,indicatorInput,size,nomenclature,nace
                if (indicator == indicatorEurostat and (G_LstSize.count(size))) and\
                 ((nomenclature == 'nace1' and dicNace.has_key(nace)) or \
                 (nomenclature == 'nace2' and len(nace) < 4)):
                    dicSize[size] = size  #pour connaitre les size de eurostat
                    try:  #on cherche l'indicateur SPI correspondant
                        indicator_size = indicatorSpi + '_' + G_DicSize[size]
                    except:
                        fileLog.write('pas de size ' + size +
                                      ' indicateur : ' + indicator +
                                      ' country : ' + country + '\n')
                        continue  #on passe on record suivant
                    dicNaceCheck[
                        nace] = nace  #on remplit le dic pour faire un check si autant de nace que dans la table SPI
                    vector = spiLib.defVectorYears(timeSerie, startYear,
                                                   endYear)
                    dicStartValue = spiLib.defDicStartValue(
                        timeSerie, country, nace, indicator, dicStartValue,
                        endYear)
                    dicIndicator = spiLib.defDicIndicatorSize(
                        country, nace, vector, indicator_size, dicIndicator)
                    minStartYear, maxEndYear = spiLib.defMinMaxYear(
                        startYear, minStartYear, endYear, maxEndYear)
            except:
                dicNoCountry[geoEuroStat] = geoEuroStat
    fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\
    ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\
    ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n')
    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)
    keySize = dicSize.keys()
    keySize.sort()
    for s in keySize:
        fileLog.write(' List Size in Eurostat Input : ' + s + '\n')
    minStartYear = dicStartValue['startYear']
    dicIndicator = spiLib.reverseAndNormalizeDicIndicatorSize(
        dicIndicator, minStartYear, maxEndYear)
    spiLibCreateTable.createTableSize(nomenclature, dicIndicator, fileLog,
                                      minStartYear, dicNace, indicatorSpi,
                                      compteEurostat, G_tableName)
    spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog,
                                    indicatorSpi, compteEurostat, G_tableName,
                                    G_DicSize)
Example #5
0
def traitementFichierTXT(indicatorInput, nomenclature, compteEurostat):
    startIndice = 0
    dicIndicator = {}
    dicIndicatorTotal = {}
    dicNoCountry = {}
    dicNaceCheck = {}
    dicNace = {}
    dicNation = {}
    dicIndicatorDomain = {}
    dicIndicatorDomain = FileAccess.lectureIndicator(dicIndicatorDomain,
                                                     'competition', dirUse)
    indicatorSpi = dicIndicatorDomain[indicatorInput]
    dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat)
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicStartValue = dict(startIndice=1,
                         startCountry='',
                         startNace='',
                         startIndicator='',
                         startValeur=0,
                         startYear=1900)
    minStartYear = 99999
    maxEndYear = -1
    for txt in fichiersTXT:
        fichierTXT = open(txt, 'r')
        rec1er = fichierTXT.readline()  #1er rec avec les meta
        lstrec = rec1er.split(',')
        #on selectionne les colonne de l'input d'Eurostat
        dicEurostat = spiLib.defDicEurostat(lstrec)
        iUnit = dicEurostat['unit']
        iNace = dicEurostat['nace']
        iIndic = dicEurostat['indic']
        iSize = dicEurostat['size']
        iGeoTime = dicEurostat['geotime']
        geotime = lstrec[iGeoTime].split('\t')
        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        #nace_r1,indic_sb,size_emp,geo\time
        #E,V11110,TOTAL,ES	9037 	6410 	3544 	3311 	3336 	3084
        for ligneTXT in fichierTXT:
            ligne = ligneTXT.strip('\n')  #RAPPEL strip enleve des extremites
            ligne = ligne.split(',')
            nace = ligne[iNace].strip()
            indicator = ligne[iIndic].strip()
            sizeEurostat = ligne[iSize].strip()

            #la colonne size n'existe pas pour le NACE2 pour les sbs_na *on rempli donc la condition a chaque fois
            if nomenclature == 'nace2' and compteEurostat == 'sbs':
                sizeEurostat = 'TOTAL'
            else:
                sizeEurostat = ligne[iSize].strip()

            geoTime = ligne[iGeoTime].split('\t')
            geoEuroStat = geoTime[0].strip()
            try:
                country = dicNation[geoEuroStat]
                timeSerie = geoTime[1:]
                #indicateur pour savoir si on doit selectionner les indicateurs
                #dans la liste definie dans la table oracle ou uniquement sur la longueur (4)
                #la regle est si nace1 alors on prend la liste
                #si bd on prend tout
                #if	indicator  == indicatorInput and sizeEurostat == G_Size and dicNace.has_key(nace):
                if indicator == indicatorInput and sizeEurostat == G_Size:
                    dicNaceCheck[
                        nace] = nace  #on remplit le dic pour faire un check si autant de nace que dans la table SPI
                    vector = spiLib.defVectorYears(timeSerie, startYear,
                                                   endYear)
                    dicStartValue = spiLib.defDicStartValue(
                        timeSerie, country, nace, indicator, dicStartValue,
                        endYear)
                    dicIndicator = spiLib.defDicIndicator(
                        country, nace, vector, dicIndicator)
                    minStartYear, maxEndYear = spiLib.defMinMaxYear(
                        startYear, minStartYear, endYear, maxEndYear)
            except:
                dicNoCountry[geoEuroStat] = geoEuroStat

    fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\
    ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\
    ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n')
    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)
    minStartYear = dicStartValue['startYear']
    #traitement indicator
    dicIndicator = spiLib.reverseAndNormalizeDicIndicator(
        dicIndicator, minStartYear, maxEndYear)
    spiLibCreateTable.createTable(nomenclature, dicIndicator, fileLog,
                                  minStartYear, dicNace, indicatorSpi,
                                  compteEurostat, G_tableName)
    spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog,
                                    indicatorSpi, compteEurostat, G_tableName)