Exemple #1
0
def lectureFileWto(dirCSV):
    minStartYear = 99999
    maxEndYear = -1
    dicFile = {}
    fichierLecture = dirCSV + '\\wto.csv'
    try:
        fichier = open(fichierLecture, 'r')
    except:
        print "le fichier ", fichierLecture, " est introuvable"
    #1er rec avec les meta
    #Topic_code,Topic_desc,DataSet_code,DataSet_desc,Country_code=4,
    #Country_desc,Flow_code=6,Flow_desc,Indicator_code=8,Indicator_desc,
    #Partner_Country_code=10,Partner_Country_desc,Unit_code=12,
    #Unit_desc,Year=14,Value=15,Value_Flag,Notes_Export
    rec1er = fichier.readline()
    for recordIn in fichier:
        record = re.sub('[a-z],', '',
                        recordIn)  #enleve les , dans certaine variable
        ligne = record.split(',')
        try:
            country_code = ligne[4].strip('"')
            flow_code = ligne[6].strip('"')
            indicator_code = ligne[8].strip('"')  #sector
            partner_Country_code = ligne[10].strip('"')
            unit_code = ligne[12].strip('"')
            year = ligne[14].strip()
            value = ligne[15].strip()
        except:
            pass  #ne fait rien, continue a l'instruction suivante
        #on ne prend que les exports
        if flow_code == 'X':
            minStartYear, maxEndYear = spiLib.defMinMaxYear(
                year, minStartYear, year, maxEndYear)
            try:
                dicFile[country_code][indicator_code][year] = value
            except:
                try:
                    dicFile[country_code][indicator_code] = {}
                    dicFile[country_code][indicator_code][year] = value
                except:
                    dicFile[country_code] = {}
                    dicFile[country_code][indicator_code] = {}
                    dicFile[country_code][indicator_code][year] = value
    fichier.close()
    return dicFile, minStartYear, maxEndYear
Exemple #2
0
def traitementXML(fichiersXml, fileOutput):
    fileOutput.write(
        '0000,0000\n')  #1er ligne qui sera remplacee avec les dates min et max
    dicCodeBEC = {}
    dicCodeBEC = FileAccess.lectureBEC(dirCSV)  #fichier : BECCodeOnly.csv
    minStartYear = 99999
    maxEndYear = -1
    dicWorld = {}
    #on a deja selectionne les bons pays lors du download
    #cf gethttpUN.py
    for fichierXml in fichiersXml:
        dicXml = {}
        dicXml = XmlAccess.lectureXMLBec(fichierXml, dicCodeBEC, fileLog)
        lstfichierXml = fichierXml.split('.')
        base = os.path.basename(fichierXml)
        country = os.path.splitext(base)[0]
        for rgCode in dicXml:
            codeBECSort = dicXml[rgCode].keys()
            try:
                codeBECSort.remove('TOTAL')
            except:
                fileLog.write('no TOTAL for XML file ' + fichierXml + '\n')
            codeBECSort.sort()
            for codeBEC in codeBECSort:
                if dicWorld.has_key(codeBEC):
                    pass
                else:
                    dicWorld[codeBEC] = {}
                dicTotalYear = {}
                yearSort = dicXml[rgCode][codeBEC].keys()
                yearSort.sort()
                startYear = yearSort[0]
                endYear = yearSort[-1]
                minStartYear, maxEndYear = spiLib.defMinMaxYear(
                    startYear, minStartYear, endYear, maxEndYear)
                for year in yearSort:
                    valYear = int(dicXml[rgCode][codeBEC][year])
                    try:
                        dicTotalYear[year] = dicTotalYear[year] + valYear
                    except:
                        dicTotalYear[year] = valYear
                    try:
                        dicWorld[codeBEC][
                            year] = dicWorld[codeBEC][year] + valYear
                    except:
                        dicWorld[codeBEC][year] = valYear
                #traitement du total des annees par code BEC
                #le record de sortie par pays
                lstValue = spiLibTrade.vectorYear(dicTotalYear)
                recordOut = country + ',' + codeBEC + ',' + str(
                    startYear) + ',' + lstValue + '\n'
                fileOutput.write(recordOut)
    #traitement world
    codeBECSort = dicWorld.keys()
    codeBECSort.sort()
    for codeBEC in codeBECSort:
        keyYear = dicWorld[codeBEC].keys()
        keyYear.sort()
        startYear = keyYear[0]
        endYear = keyYear[-1]
        minStartYear, maxEndYear = spiLib.defMinMaxYear(
            startYear, minStartYear, endYear, maxEndYear)
        lstValue = spiLibTrade.vectorYear(dicWorld[codeBEC])
        recordOut = 'WLD,' + codeBEC + ',' + startYear + ',' + lstValue + '\n'
        fileOutput.write(recordOut)
    #ecriture au debut du fichier des dates min et max
    fileOutput.flush()  #on vide le cache
    os.fsync(fileOutput.fileno())  #on force a ecrire sur le disque
    fileOutput.seek(0,
                    0)  #on se positionne sur le 1er caractere de la 1er ligne
    fileOutput.write(str(minStartYear) + ',' + str(maxEndYear) + '\n')
def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature,
                         compteEurostat):
    startIndice = 0
    dicIndicator = {}
    dicNoCountry = {}
    dicNaceCheck = {}
    dicNace = {}
    dicNation = {}
    indicatorSpi = indicatorInput
    indicatorEurostat = indicatorInputEurostat
    Unit = G_Unit
    dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat)
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicStartValue = dict(startIndice=1,
                         startCountry='',
                         startNace='',
                         startIndicator='',
                         startValeur=0,
                         startYear=1900)
    minStartYear = 99999
    maxEndYear = -1
    fichiersTXT.sort()
    fichiersTXT.reverse()
    for txt in fichiersTXT:
        fichierTXT = open(txt, 'r')
        rec1er = fichierTXT.readline()  #1er rec avec les meta
        lstrec = rec1er.split(',')
        #on selectionne les colonne de l'input d'Eurostat
        dicEurostat = spiLib.defDicEurostat(lstrec)
        iUnit = dicEurostat['unit']
        iNace = dicEurostat['nace']
        iIndic = dicEurostat['indic']
        iSize = dicEurostat['size']
        iSector = dicEurostat['sector']
        iGeoTime = dicEurostat['geotime']
        geotime = lstrec[iGeoTime].split('\t')
        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        for ligneTXT in fichierTXT:
            ligne = ligneTXT.strip('\n')  #RAPPEL strip enleve des extremites
            ligne = ligne.split(',')
            nace = ligne[iNace].strip()
            indicator = ligne[iIndic].strip()
            unit = ligne[iUnit].strip()
            geoTime = ligne[iGeoTime].split('\t')
            geoEuroStat = geoTime[0].strip()
            # la colonne sector existait pour le nama_nace64_e.tsv
            # il a disparu dans le esa2010 mais on garde le test
            if iSector == -1:
                sector = 'S1'
            else:
                sector = ligne[iSector].strip()
            try:
                country = dicNation[geoEuroStat]
                timeSerie = geoTime[1:]
                if indicator == indicatorEurostat and (unit == Unit) and (
                        sector == 'S1') and dicNace.has_key(nace):
                    dicNaceCheck[
                        nace] = nace  #on remplit le dic pour faire un check si autant de nace que dans la table SPI
                    vector = spiLib.defVectorYears(
                        timeSerie, startYear,
                        endYear)  #traitement de la serie Eurostat
                    dicStartValue = spiLib.defDicStartValue(
                        timeSerie, country, nace, indicator, dicStartValue,
                        endYear)
                    dicIndicator = spiLib.defDicIndicator(
                        country, nace, vector, dicIndicator)
                    minStartYear, maxEndYear = spiLib.defMinMaxYear(
                        startYear, minStartYear, endYear, maxEndYear)
            except:
                dicNoCountry[geoEuroStat] = geoEuroStat
    #retour avec l'annee de la  1er valeur existante dans les vecteurs
    #different de la 1er annee ou vecteur le plus long car la valeur peut etre ':'
    fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\
    ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\
    ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n')
    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)
    #test des annee de debut, soit par valeur reelle ou par vecteur la valeur peut etre ':'
    if minStartYear != dicStartValue['startYear']:
        fileLog.write('annee min. pour les vecteurs =' + str(minStartYear) +
                      ' annee min. avec une valeur =' +
                      str(dicStartValue['startYear']) + '\n')
        minStartYear = dicStartValue['startYear']
    #traitement indicator
    dicIndicator = spiLib.reverseAndNormalizeDicIndicator(
        dicIndicator, minStartYear, maxEndYear)
    spiLibCreateTable.createTable(nomenclature, dicIndicator, fileLog,
                                  minStartYear, dicNace, indicatorSpi,
                                  compteEurostat, G_tableName, G_FileExt)
    spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog,
                                    indicatorSpi, compteEurostat, G_tableName)
Exemple #4
0
def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature,
                         compteEurostat):
    startIndice = 0
    dicIndicator = {}
    dicIndicatorTotal = {}
    dicNoCountry = {}
    dicNaceCheck = {}
    dicNace = {}
    dicAgregatNace = {}
    dicNation = {}
    indicatorSpi = indicatorInput
    indicatorSpiTotal = G_IndicatorSPI_T
    indicatorEurostat = indicatorInputEurostat
    dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat)
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicStartValue = dict(startIndice=1,
                         startCountry='',
                         startNace='',
                         startIndicator='',
                         startValeur=0,
                         startYear=1900)
    minStartYear = 99999
    maxEndYear = -1
    for txt in fichiersTXT:
        fichierTXT = open(txt, 'r')
        rec1er = fichierTXT.readline()  #1er rec avec les meta
        lstrec = rec1er.split(',')
        #on selectionne les colonne de l'input d'Eurostat
        dicEurostat = spiLib.defDicEurostat(lstrec)
        iUnit = dicEurostat['unit']
        iNace = dicEurostat['nace']
        iIndic = dicEurostat['indic']
        iSize = dicEurostat['size']
        iGeoTime = dicEurostat['geotime']
        geotime = lstrec[iGeoTime].split('\t')
        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        #nace_r1,indic_sb,size_emp,geo\time
        #E,V11110,TOTAL,ES	9037 	6410 	3544 	3311 	3336 	3084
        for ligneTXT in fichierTXT:
            ligne = ligneTXT.strip('\n')  #RAPPEL strip enleve des extremites
            ligne = ligne.split(',')
            nace = ligne[iNace].strip()
            indicator = ligne[iIndic].strip()
            geoTime = ligne[iGeoTime].split('\t')
            geoEuroStat = geoTime[0].strip()
            try:
                country = dicNation[geoEuroStat]
                timeSerie = geoTime[1:]
                if (indicator == indicatorEurostat ) and\
                 ((nomenclature == 'nace1' and dicNace.has_key(nace)) or \
                 (nomenclature == 'nace2' and len(nace) < 4)):
                    dicNaceCheck[
                        nace] = nace  #on remplit le dic pour faire un check si autant de nace que dans la table SPI
                    vector = spiLib.defVectorYears(
                        timeSerie, startYear,
                        endYear)  #traitement de la serie Eurostat
                    dicStartValue = spiLib.defDicStartValue(
                        timeSerie, country, nace, indicator, dicStartValue,
                        endYear)
                    dicIndicator = spiLib.defDicIndicator(
                        country, nace, vector, dicIndicator)
                    minStartYear, maxEndYear = spiLib.defMinMaxYear(
                        startYear, minStartYear, endYear, maxEndYear)
            except:
                dicNoCountry[geoEuroStat] = geoEuroStat

    fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\
    ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\
    ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n')
    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)
    minStartYear = dicStartValue['startYear']
    dicIndicator = spiLib.reverseAndNormalizeDicIndicator(
        dicIndicator, minStartYear, maxEndYear)
    #creation indicateur SPI par ex vabus avec en retour le dic des agregats
    dicAgregatNace = spiLibCreateTable.createTable(nomenclature, dicIndicator,
                                                   fileLog, minStartYear,
                                                   dicNace, indicatorSpi,
                                                   compteEurostat, G_tableName)
    if indicatorSpiTotal != 'noTotal':
        #creation indicateur total SPI par ex vabussh
        spiLibCreateTable.createTableTotal(nomenclature, dicAgregatNace,
                                           dicIndicator, minStartYear, fileLog,
                                           dicNace, indicatorSpiTotal,
                                           compteEurostat, G_tableName)
        spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear,
                                        fileLog, indicatorSpiTotal,
                                        compteEurostat, G_tableName)
    spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog,
                                    indicatorSpi, compteEurostat, G_tableName)
Exemple #5
0
def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature,
                         compteEurostat):
    startIndice = 0
    dicIndicator = {}
    dicIndicatorTotal = {}
    dicNoCountry = {}
    dicNaceCheck = {}
    dicNace = {}
    dicNation = {}
    dicSize = {}
    indicatorSpi = indicatorInput
    indicatorEurostat = indicatorInputEurostat
    dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat)
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicStartValue = dict(startIndice=1,
                         startCountry='',
                         startNace='',
                         startIndicator='',
                         startValeur=0,
                         startYear=1900)
    minStartYear = 99999
    maxEndYear = -1

    for txt in fichiersTXT:
        fichierTXT = open(txt, 'r')
        rec1er = fichierTXT.readline()  #1er rec avec les meta
        lstrec = rec1er.split(',')
        #on selectionne les colonne de l'input d'Eurostat
        dicEurostat = spiLib.defDicEurostat(lstrec)
        iUnit = dicEurostat['unit']
        iNace = dicEurostat['nace']
        iIndic = dicEurostat['indic']
        iSize = dicEurostat['size']
        iGeoTime = dicEurostat['geotime']
        geotime = lstrec[iGeoTime].split('\t')
        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        #nace_r1,indic_sb,size_emp,geo\time
        #E,V11110,TOTAL,ES	9037 	6410 	3544 	3311 	3336 	3084
        for ligneTXT in fichierTXT:
            ligne = ligneTXT.strip('\n')  #RAPPEL strip enleve des extremites
            ligne = ligne.split(',')
            nace = ligne[iNace].strip()
            indicator = ligne[iIndic].strip()
            size = ligne[iSize].strip()
            geoTime = ligne[iGeoTime].split('\t')
            geoEuroStat = geoTime[0].strip()
            try:
                country = dicNation[geoEuroStat]
                timeSerie = geoTime[1:]
                #print indicator,indicatorEurostat,indicatorInput,size,nomenclature,nace
                if (indicator == indicatorEurostat and (G_LstSize.count(size))) and\
                 ((nomenclature == 'nace1' and dicNace.has_key(nace)) or \
                 (nomenclature == 'nace2' and len(nace) < 4)):
                    dicSize[size] = size  #pour connaitre les size de eurostat
                    try:  #on cherche l'indicateur SPI correspondant
                        indicator_size = indicatorSpi + '_' + G_DicSize[size]
                    except:
                        fileLog.write('pas de size ' + size +
                                      ' indicateur : ' + indicator +
                                      ' country : ' + country + '\n')
                        continue  #on passe on record suivant
                    dicNaceCheck[
                        nace] = nace  #on remplit le dic pour faire un check si autant de nace que dans la table SPI
                    vector = spiLib.defVectorYears(timeSerie, startYear,
                                                   endYear)
                    dicStartValue = spiLib.defDicStartValue(
                        timeSerie, country, nace, indicator, dicStartValue,
                        endYear)
                    dicIndicator = spiLib.defDicIndicatorSize(
                        country, nace, vector, indicator_size, dicIndicator)
                    minStartYear, maxEndYear = spiLib.defMinMaxYear(
                        startYear, minStartYear, endYear, maxEndYear)
            except:
                dicNoCountry[geoEuroStat] = geoEuroStat
    fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\
    ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\
    ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n')
    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)
    keySize = dicSize.keys()
    keySize.sort()
    for s in keySize:
        fileLog.write(' List Size in Eurostat Input : ' + s + '\n')
    minStartYear = dicStartValue['startYear']
    dicIndicator = spiLib.reverseAndNormalizeDicIndicatorSize(
        dicIndicator, minStartYear, maxEndYear)
    spiLibCreateTable.createTableSize(nomenclature, dicIndicator, fileLog,
                                      minStartYear, dicNace, indicatorSpi,
                                      compteEurostat, G_tableName)
    spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog,
                                    indicatorSpi, compteEurostat, G_tableName,
                                    G_DicSize)
def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature,
                         compteEurostat):
    startIndice = 0
    dicIndicator = {}
    dicNoCountry = {}
    dicNaceCheck = {}
    dicNace = {}
    dicNation = {}
    indicatorSpi = indicatorInput
    indicatorEurostat = indicatorInputEurostat
    Unit = G_Unit
    dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat)
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicStartValue = dict(startIndice=1,
                         startCountry='',
                         startNace='',
                         startIndicator='',
                         startValeur=0,
                         startYear=1900)
    minStartYear = 99999
    maxEndYear = -1
    fichiersTXT.sort()
    fichiersTXT.reverse()
    for txt in fichiersTXT:
        fichierTXT = open(txt, 'r')
        rec1er = fichierTXT.readline()  #1er rec avec les meta
        lstrec = rec1er.split(',')
        #recherche de la position de chaque variable dans input eurostat
        #on selectionne les colonne de l'input d'Eurostat
        dicEurostat = spiLib.defDicEurostat(lstrec)
        iUnit = dicEurostat['unit']
        iNace = dicEurostat['nace']
        iIndic = dicEurostat['indic']
        iSector = dicEurostat['sector']
        iSize = dicEurostat['size']
        iGeoTime = dicEurostat['geotime']
        geotime = lstrec[iGeoTime].split('\t')
        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        #nace 1 : unit,sector,nace_r1,indic_na,geo\time
        #nace 2 : unit,nace_r2,indic_na,sector,geo\time
        #MIO_EUR,A,B1G,AT	: 	: 	: 	3781.7 	4375.5 	4322.6 	3827.4 	3542.7 	3921.8 	3763.1
        for ligneTXT in fichierTXT:
            ligne = ligneTXT.strip('\n')  #RAPPEL strip enleve des extremites
            ligne = ligne.split(',')
            nace = ligne[iNace].strip()
            indicator = ligne[iIndic].strip()
            unit = ligne[iUnit].strip()
            geoTime = ligne[iGeoTime].split('\t')
            geoEuroStat = geoTime[0].strip()
            if iSector == -1:
                sector = 'S1'
            else:
                sector = ligne[iSector].strip()
            try:
                country = dicNation[geoEuroStat]
                timeSerie = geoTime[1:]
                #indicateur pour savoir si on doit selectionner les indicateurs
                #dans la liste definie dans la table oracle ou uniquement sur la longueur (4)
                #la regle est si nace1 alors on prend la liste
                #si nace2 et nama on prend la liste sur sbs on teste sur la longueur max 4
                if indicator == indicatorEurostat and (unit == Unit) and (
                        sector == 'S1') and dicNace.has_key(nace):
                    dicNaceCheck[
                        nace] = nace  #on remplit le dic pour faire un check si autant de nace que dans la table SPI
                    vector = spiLib.defVectorYears(
                        timeSerie, startYear,
                        endYear)  #traitement de la serie Eurostat
                    dicStartValue = spiLib.defDicStartValue(
                        timeSerie, country, nace, indicator, dicStartValue,
                        endYear)
                    dicIndicator = spiLib.defDicIndicator(
                        country, nace, vector, dicIndicator)
                    minStartYear, maxEndYear = spiLib.defMinMaxYear(
                        startYear, minStartYear, endYear, maxEndYear)
            except:
                dicNoCountry[geoEuroStat] = geoEuroStat
    fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\
    ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\
    ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n')
    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)
    minStartYear = dicStartValue['startYear']
    dicIndicator = spiLib.reverseAndNormalizeDicIndicator(
        dicIndicator, minStartYear, maxEndYear)
    spiLibCreateTable.createTableGrowth(nomenclature, dicIndicator, fileLog,
                                        minStartYear, dicNace, indicatorSpi,
                                        compteEurostat, G_tableName, G_Growth,
                                        G_FileExt)
    spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog,
                                    indicatorSpi, compteEurostat, G_tableName)
Exemple #7
0
def traitementXML(fichiersXml, G_typeProduit, fileOutput):
    typeProduit = G_typeProduit  #cpa2002 ou cpa2008
    fileOutput.write(
        '0000,0000\n')  #1er ligne qui sera remplacee avec les dates min et max
    dicCodeCPA = {}
    if typeProduit == 'cpa2002':
        dicCodeCPA = FileAccess.lectureHS1992toCPA2002(
            dirCSV)  #fichier : hs1992tocpa2002ratio.csv
    else:
        dicCodeCPA = FileAccess.lectureHS1992toCPA2008(
            dirCSV, fileLog)  #fichier : hs1992tocpa2008un.csv
    minStartYear = 99999
    maxEndYear = -1
    dicWorld = {}
    dicCountryNoValue = {}
    #on a deja selectionne les bons pays lors du download
    #cf gethttpUN.py
    for fichierXml in fichiersXml:
        dicXml = {}
        dicXml = XmlAccess.lectureXML(fichierXml, dicCodeCPA, fileLog)
        lstfichierXml = fichierXml.split('.')
        base = os.path.basename(fichierXml)
        country = os.path.splitext(base)[0]
        #print country
        paystraiter = 0
        for rgCode in dicXml:
            codeCPASort = dicXml[rgCode].keys()
            codeCPASort.sort()
            for codeCPA in codeCPASort:
                codeHSSort = dicXml[rgCode][codeCPA].keys()
                codeHSSort.sort()
                dicTotalYearHS = {}
                minStartYearHS = 99999
                maxEndYearHS = -1
                if dicWorld.has_key(codeCPA):
                    pass  #on continue a l'instruction suivante
                else:
                    dicWorld[codeCPA] = {}
                for codeHS in codeHSSort:
                    yearSort = dicXml[rgCode][codeCPA][codeHS].keys()
                    yearSort.sort()
                    startYear = yearSort[0]
                    endYear = yearSort[-1]
                    #print country, rgCode, codeCPA, codeHS, startYear, endYear, dicXml[rgCode][codeCPA][codeHS]
                    minStartYear, maxEndYear = spiLib.defMinMaxYear(
                        startYear, minStartYear, endYear, maxEndYear)
                    minStartYearHS, maxEndYearHS = spiLib.defMinMaxYear(
                        startYear, minStartYearHS, endYear, maxEndYearHS)
                    for year in yearSort:
                        paystraiter = 1
                        valYear = int(dicXml[rgCode][codeCPA][codeHS]
                                      [year])  #la valeur est tj numerique
                        try:  #la 1er valeur du dic est vide
                            dicTotalYearHS[
                                year] = dicTotalYearHS[year] + valYear
                        except:  #alors on initialise avec la 1er valeur du code HS
                            dicTotalYearHS[year] = valYear
                        #pour le total WLD si on ne tient pas compte des valeurs inexistantes
                        try:
                            dicWorld[codeCPA][
                                year] = dicWorld[codeCPA][year] + valYear
                        except:
                            dicWorld[codeCPA][year] = valYear

                #traitement du total des annees par code CPA
                #le record de sortie par pays
                lstValue = spiLibTrade.vectorYear(dicTotalYearHS)
                recordOut = country + ',' + codeCPA + ',' + str(
                    minStartYearHS) + ',' + lstValue + '\n'
                fileOutput.write(recordOut)
                #calcul du total WLD, on ne tient plus compte des valeurs inexistantes
                #(on fait le calcul apres la normalisation du vecteur)
                #on va lister uniquement les pays manquants
                lstVector = lstValue.split('!')
                for i in range(len(lstVector)):
                    year = minStartYearHS + i
                    '''
					try:
						dicWorld[codeCPA][year] = dicWorld[codeCPA][year] + 0
					except:
						dicWorld[codeCPA][year] = 0
					'''
                    try:
                        valeur = int(lstVector[i])
                        #dicWorld[codeCPA][year] = dicWorld[codeCPA][year] + valeur
                    except:  #dans ce cas on initialise l'annee avec la valeur
                        #dicWorld[codeCPA][year] = ':'
                        keyNoValue = country + ',' + codeCPA + ',' + str(year)
                        dicCountryNoValue[keyNoValue] = keyNoValue

        if paystraiter == 0:
            recordOut = 'unprocessed country :' + country + '\n'
            fileLog.write(recordOut)

    countryNoValueSort = dicCountryNoValue.keys()
    countryNoValueSort.sort()
    for countryNoValue in countryNoValueSort:
        recordOut = 'no value for country, cpa, year :' + countryNoValue + '\n'
        fileLog.write(recordOut)
    #traitement world
    codeCPASort = dicWorld.keys()
    codeCPASort.sort()
    for codeCPA in codeCPASort:
        keyYear = dicWorld[codeCPA].keys()
        keyYear.sort()
        startYear = keyYear[0]
        endYear = keyYear[-1]
        #minStartYear,maxEndYear = spiLib.defMinMaxYear(startYear,minStartYear,endYear,maxEndYear)
        lstValue = spiLibTrade.vectorYear(dicWorld[codeCPA])
        recordOut = 'WLD,' + codeCPA + ',' + str(
            startYear) + ',' + lstValue + '\n'
        fileOutput.write(recordOut)
    #ecriture au debut du fichier des dates min et max
    fileOutput.flush()  #on vide le cache
    os.fsync(fileOutput.fileno())  #on force a ecrire sur le disque
    fileOutput.seek(0,
                    0)  #on se positionne sur le 1er caractere de la 1er ligne
    fileOutput.write(str(minStartYear) + ',' + str(maxEndYear) + '\n')
Exemple #8
0
def traitementFichierTXT(indicatorInput, nomenclature, compteEurostat):
    startIndice = 0
    dicIndicator = {}
    dicIndicatorTotal = {}
    dicNoCountry = {}
    dicNaceCheck = {}
    dicNace = {}
    dicNation = {}
    dicIndicatorDomain = {}
    dicIndicatorDomain = FileAccess.lectureIndicator(dicIndicatorDomain,
                                                     'competition', dirUse)
    indicatorSpi = dicIndicatorDomain[indicatorInput]
    dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat)
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicStartValue = dict(startIndice=1,
                         startCountry='',
                         startNace='',
                         startIndicator='',
                         startValeur=0,
                         startYear=1900)
    minStartYear = 99999
    maxEndYear = -1
    for txt in fichiersTXT:
        fichierTXT = open(txt, 'r')
        rec1er = fichierTXT.readline()  #1er rec avec les meta
        lstrec = rec1er.split(',')
        #on selectionne les colonne de l'input d'Eurostat
        dicEurostat = spiLib.defDicEurostat(lstrec)
        iUnit = dicEurostat['unit']
        iNace = dicEurostat['nace']
        iIndic = dicEurostat['indic']
        iSize = dicEurostat['size']
        iGeoTime = dicEurostat['geotime']
        geotime = lstrec[iGeoTime].split('\t')
        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        #nace_r1,indic_sb,size_emp,geo\time
        #E,V11110,TOTAL,ES	9037 	6410 	3544 	3311 	3336 	3084
        for ligneTXT in fichierTXT:
            ligne = ligneTXT.strip('\n')  #RAPPEL strip enleve des extremites
            ligne = ligne.split(',')
            nace = ligne[iNace].strip()
            indicator = ligne[iIndic].strip()
            sizeEurostat = ligne[iSize].strip()

            #la colonne size n'existe pas pour le NACE2 pour les sbs_na *on rempli donc la condition a chaque fois
            if nomenclature == 'nace2' and compteEurostat == 'sbs':
                sizeEurostat = 'TOTAL'
            else:
                sizeEurostat = ligne[iSize].strip()

            geoTime = ligne[iGeoTime].split('\t')
            geoEuroStat = geoTime[0].strip()
            try:
                country = dicNation[geoEuroStat]
                timeSerie = geoTime[1:]
                #indicateur pour savoir si on doit selectionner les indicateurs
                #dans la liste definie dans la table oracle ou uniquement sur la longueur (4)
                #la regle est si nace1 alors on prend la liste
                #si bd on prend tout
                #if	indicator  == indicatorInput and sizeEurostat == G_Size and dicNace.has_key(nace):
                if indicator == indicatorInput and sizeEurostat == G_Size:
                    dicNaceCheck[
                        nace] = nace  #on remplit le dic pour faire un check si autant de nace que dans la table SPI
                    vector = spiLib.defVectorYears(timeSerie, startYear,
                                                   endYear)
                    dicStartValue = spiLib.defDicStartValue(
                        timeSerie, country, nace, indicator, dicStartValue,
                        endYear)
                    dicIndicator = spiLib.defDicIndicator(
                        country, nace, vector, dicIndicator)
                    minStartYear, maxEndYear = spiLib.defMinMaxYear(
                        startYear, minStartYear, endYear, maxEndYear)
            except:
                dicNoCountry[geoEuroStat] = geoEuroStat

    fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\
    ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\
    ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n')
    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)
    minStartYear = dicStartValue['startYear']
    #traitement indicator
    dicIndicator = spiLib.reverseAndNormalizeDicIndicator(
        dicIndicator, minStartYear, maxEndYear)
    spiLibCreateTable.createTable(nomenclature, dicIndicator, fileLog,
                                  minStartYear, dicNace, indicatorSpi,
                                  compteEurostat, G_tableName)
    spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog,
                                    indicatorSpi, compteEurostat, G_tableName)