def lectureFileWto(dirCSV): minStartYear = 99999 maxEndYear = -1 dicFile = {} fichierLecture = dirCSV + '\\wto.csv' try: fichier = open(fichierLecture, 'r') except: print "le fichier ", fichierLecture, " est introuvable" #1er rec avec les meta #Topic_code,Topic_desc,DataSet_code,DataSet_desc,Country_code=4, #Country_desc,Flow_code=6,Flow_desc,Indicator_code=8,Indicator_desc, #Partner_Country_code=10,Partner_Country_desc,Unit_code=12, #Unit_desc,Year=14,Value=15,Value_Flag,Notes_Export rec1er = fichier.readline() for recordIn in fichier: record = re.sub('[a-z],', '', recordIn) #enleve les , dans certaine variable ligne = record.split(',') try: country_code = ligne[4].strip('"') flow_code = ligne[6].strip('"') indicator_code = ligne[8].strip('"') #sector partner_Country_code = ligne[10].strip('"') unit_code = ligne[12].strip('"') year = ligne[14].strip() value = ligne[15].strip() except: pass #ne fait rien, continue a l'instruction suivante #on ne prend que les exports if flow_code == 'X': minStartYear, maxEndYear = spiLib.defMinMaxYear( year, minStartYear, year, maxEndYear) try: dicFile[country_code][indicator_code][year] = value except: try: dicFile[country_code][indicator_code] = {} dicFile[country_code][indicator_code][year] = value except: dicFile[country_code] = {} dicFile[country_code][indicator_code] = {} dicFile[country_code][indicator_code][year] = value fichier.close() return dicFile, minStartYear, maxEndYear
def traitementXML(fichiersXml, fileOutput): fileOutput.write( '0000,0000\n') #1er ligne qui sera remplacee avec les dates min et max dicCodeBEC = {} dicCodeBEC = FileAccess.lectureBEC(dirCSV) #fichier : BECCodeOnly.csv minStartYear = 99999 maxEndYear = -1 dicWorld = {} #on a deja selectionne les bons pays lors du download #cf gethttpUN.py for fichierXml in fichiersXml: dicXml = {} dicXml = XmlAccess.lectureXMLBec(fichierXml, dicCodeBEC, fileLog) lstfichierXml = fichierXml.split('.') base = os.path.basename(fichierXml) country = os.path.splitext(base)[0] for rgCode in dicXml: codeBECSort = dicXml[rgCode].keys() try: codeBECSort.remove('TOTAL') except: fileLog.write('no TOTAL for XML file ' + fichierXml + '\n') codeBECSort.sort() for codeBEC in codeBECSort: if dicWorld.has_key(codeBEC): pass else: dicWorld[codeBEC] = {} dicTotalYear = {} yearSort = dicXml[rgCode][codeBEC].keys() yearSort.sort() startYear = yearSort[0] endYear = yearSort[-1] minStartYear, maxEndYear = spiLib.defMinMaxYear( startYear, minStartYear, endYear, maxEndYear) for year in yearSort: valYear = int(dicXml[rgCode][codeBEC][year]) try: dicTotalYear[year] = dicTotalYear[year] + valYear except: dicTotalYear[year] = valYear try: dicWorld[codeBEC][ year] = dicWorld[codeBEC][year] + valYear except: dicWorld[codeBEC][year] = valYear #traitement du total des annees par code BEC #le record de sortie par pays lstValue = spiLibTrade.vectorYear(dicTotalYear) recordOut = country + ',' + codeBEC + ',' + str( startYear) + ',' + lstValue + '\n' fileOutput.write(recordOut) #traitement world codeBECSort = dicWorld.keys() codeBECSort.sort() for codeBEC in codeBECSort: keyYear = dicWorld[codeBEC].keys() keyYear.sort() startYear = keyYear[0] endYear = keyYear[-1] minStartYear, maxEndYear = spiLib.defMinMaxYear( startYear, minStartYear, endYear, maxEndYear) lstValue = spiLibTrade.vectorYear(dicWorld[codeBEC]) recordOut = 'WLD,' + codeBEC + ',' + startYear + ',' + lstValue + '\n' fileOutput.write(recordOut) #ecriture au debut du fichier des dates min et max fileOutput.flush() #on vide le cache os.fsync(fileOutput.fileno()) #on force a ecrire sur le disque fileOutput.seek(0, 0) #on se positionne sur le 1er caractere de la 1er ligne fileOutput.write(str(minStartYear) + ',' + str(maxEndYear) + '\n')
def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature, compteEurostat): startIndice = 0 dicIndicator = {} dicNoCountry = {} dicNaceCheck = {} dicNace = {} dicNation = {} indicatorSpi = indicatorInput indicatorEurostat = indicatorInputEurostat Unit = G_Unit dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat) dicNation = DBAccess.lectureNationEurostat(dicNation) dicStartValue = dict(startIndice=1, startCountry='', startNace='', startIndicator='', startValeur=0, startYear=1900) minStartYear = 99999 maxEndYear = -1 fichiersTXT.sort() fichiersTXT.reverse() for txt in fichiersTXT: fichierTXT = open(txt, 'r') rec1er = fichierTXT.readline() #1er rec avec les meta lstrec = rec1er.split(',') #on selectionne les colonne de l'input d'Eurostat dicEurostat = spiLib.defDicEurostat(lstrec) iUnit = dicEurostat['unit'] iNace = dicEurostat['nace'] iIndic = dicEurostat['indic'] iSize = dicEurostat['size'] iSector = dicEurostat['sector'] iGeoTime = dicEurostat['geotime'] geotime = lstrec[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() for ligneTXT in fichierTXT: ligne = ligneTXT.strip('\n') #RAPPEL strip enleve des extremites ligne = ligne.split(',') nace = ligne[iNace].strip() indicator = ligne[iIndic].strip() unit = ligne[iUnit].strip() geoTime = ligne[iGeoTime].split('\t') geoEuroStat = geoTime[0].strip() # la colonne sector existait pour le nama_nace64_e.tsv # il a disparu dans le esa2010 mais on garde le test if iSector == -1: sector = 'S1' else: sector = ligne[iSector].strip() try: country = dicNation[geoEuroStat] timeSerie = geoTime[1:] if indicator == indicatorEurostat and (unit == Unit) and ( sector == 'S1') and dicNace.has_key(nace): dicNaceCheck[ nace] = nace #on remplit le dic pour faire un check si autant de nace que dans la table SPI vector = spiLib.defVectorYears( timeSerie, startYear, endYear) #traitement de la serie Eurostat dicStartValue = spiLib.defDicStartValue( timeSerie, country, nace, indicator, dicStartValue, endYear) dicIndicator = spiLib.defDicIndicator( country, nace, vector, dicIndicator) minStartYear, maxEndYear = spiLib.defMinMaxYear( startYear, minStartYear, endYear, maxEndYear) except: dicNoCountry[geoEuroStat] = geoEuroStat #retour avec l'annee de la 1er valeur existante dans les vecteurs #different de la 1er annee ou vecteur le plus long car la valeur peut etre ':' fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\ ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\ ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n') spiLib.defnoCountry(dicNoCountry, fileLog) spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog) #test des annee de debut, soit par valeur reelle ou par vecteur la valeur peut etre ':' if minStartYear != dicStartValue['startYear']: fileLog.write('annee min. pour les vecteurs =' + str(minStartYear) + ' annee min. avec une valeur =' + str(dicStartValue['startYear']) + '\n') minStartYear = dicStartValue['startYear'] #traitement indicator dicIndicator = spiLib.reverseAndNormalizeDicIndicator( dicIndicator, minStartYear, maxEndYear) spiLibCreateTable.createTable(nomenclature, dicIndicator, fileLog, minStartYear, dicNace, indicatorSpi, compteEurostat, G_tableName, G_FileExt) spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog, indicatorSpi, compteEurostat, G_tableName)
def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature, compteEurostat): startIndice = 0 dicIndicator = {} dicIndicatorTotal = {} dicNoCountry = {} dicNaceCheck = {} dicNace = {} dicAgregatNace = {} dicNation = {} indicatorSpi = indicatorInput indicatorSpiTotal = G_IndicatorSPI_T indicatorEurostat = indicatorInputEurostat dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat) dicNation = DBAccess.lectureNationEurostat(dicNation) dicStartValue = dict(startIndice=1, startCountry='', startNace='', startIndicator='', startValeur=0, startYear=1900) minStartYear = 99999 maxEndYear = -1 for txt in fichiersTXT: fichierTXT = open(txt, 'r') rec1er = fichierTXT.readline() #1er rec avec les meta lstrec = rec1er.split(',') #on selectionne les colonne de l'input d'Eurostat dicEurostat = spiLib.defDicEurostat(lstrec) iUnit = dicEurostat['unit'] iNace = dicEurostat['nace'] iIndic = dicEurostat['indic'] iSize = dicEurostat['size'] iGeoTime = dicEurostat['geotime'] geotime = lstrec[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() #nace_r1,indic_sb,size_emp,geo\time #E,V11110,TOTAL,ES 9037 6410 3544 3311 3336 3084 for ligneTXT in fichierTXT: ligne = ligneTXT.strip('\n') #RAPPEL strip enleve des extremites ligne = ligne.split(',') nace = ligne[iNace].strip() indicator = ligne[iIndic].strip() geoTime = ligne[iGeoTime].split('\t') geoEuroStat = geoTime[0].strip() try: country = dicNation[geoEuroStat] timeSerie = geoTime[1:] if (indicator == indicatorEurostat ) and\ ((nomenclature == 'nace1' and dicNace.has_key(nace)) or \ (nomenclature == 'nace2' and len(nace) < 4)): dicNaceCheck[ nace] = nace #on remplit le dic pour faire un check si autant de nace que dans la table SPI vector = spiLib.defVectorYears( timeSerie, startYear, endYear) #traitement de la serie Eurostat dicStartValue = spiLib.defDicStartValue( timeSerie, country, nace, indicator, dicStartValue, endYear) dicIndicator = spiLib.defDicIndicator( country, nace, vector, dicIndicator) minStartYear, maxEndYear = spiLib.defMinMaxYear( startYear, minStartYear, endYear, maxEndYear) except: dicNoCountry[geoEuroStat] = geoEuroStat fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\ ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\ ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n') spiLib.defnoCountry(dicNoCountry, fileLog) spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog) minStartYear = dicStartValue['startYear'] dicIndicator = spiLib.reverseAndNormalizeDicIndicator( dicIndicator, minStartYear, maxEndYear) #creation indicateur SPI par ex vabus avec en retour le dic des agregats dicAgregatNace = spiLibCreateTable.createTable(nomenclature, dicIndicator, fileLog, minStartYear, dicNace, indicatorSpi, compteEurostat, G_tableName) if indicatorSpiTotal != 'noTotal': #creation indicateur total SPI par ex vabussh spiLibCreateTable.createTableTotal(nomenclature, dicAgregatNace, dicIndicator, minStartYear, fileLog, dicNace, indicatorSpiTotal, compteEurostat, G_tableName) spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog, indicatorSpiTotal, compteEurostat, G_tableName) spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog, indicatorSpi, compteEurostat, G_tableName)
def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature, compteEurostat): startIndice = 0 dicIndicator = {} dicIndicatorTotal = {} dicNoCountry = {} dicNaceCheck = {} dicNace = {} dicNation = {} dicSize = {} indicatorSpi = indicatorInput indicatorEurostat = indicatorInputEurostat dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat) dicNation = DBAccess.lectureNationEurostat(dicNation) dicStartValue = dict(startIndice=1, startCountry='', startNace='', startIndicator='', startValeur=0, startYear=1900) minStartYear = 99999 maxEndYear = -1 for txt in fichiersTXT: fichierTXT = open(txt, 'r') rec1er = fichierTXT.readline() #1er rec avec les meta lstrec = rec1er.split(',') #on selectionne les colonne de l'input d'Eurostat dicEurostat = spiLib.defDicEurostat(lstrec) iUnit = dicEurostat['unit'] iNace = dicEurostat['nace'] iIndic = dicEurostat['indic'] iSize = dicEurostat['size'] iGeoTime = dicEurostat['geotime'] geotime = lstrec[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() #nace_r1,indic_sb,size_emp,geo\time #E,V11110,TOTAL,ES 9037 6410 3544 3311 3336 3084 for ligneTXT in fichierTXT: ligne = ligneTXT.strip('\n') #RAPPEL strip enleve des extremites ligne = ligne.split(',') nace = ligne[iNace].strip() indicator = ligne[iIndic].strip() size = ligne[iSize].strip() geoTime = ligne[iGeoTime].split('\t') geoEuroStat = geoTime[0].strip() try: country = dicNation[geoEuroStat] timeSerie = geoTime[1:] #print indicator,indicatorEurostat,indicatorInput,size,nomenclature,nace if (indicator == indicatorEurostat and (G_LstSize.count(size))) and\ ((nomenclature == 'nace1' and dicNace.has_key(nace)) or \ (nomenclature == 'nace2' and len(nace) < 4)): dicSize[size] = size #pour connaitre les size de eurostat try: #on cherche l'indicateur SPI correspondant indicator_size = indicatorSpi + '_' + G_DicSize[size] except: fileLog.write('pas de size ' + size + ' indicateur : ' + indicator + ' country : ' + country + '\n') continue #on passe on record suivant dicNaceCheck[ nace] = nace #on remplit le dic pour faire un check si autant de nace que dans la table SPI vector = spiLib.defVectorYears(timeSerie, startYear, endYear) dicStartValue = spiLib.defDicStartValue( timeSerie, country, nace, indicator, dicStartValue, endYear) dicIndicator = spiLib.defDicIndicatorSize( country, nace, vector, indicator_size, dicIndicator) minStartYear, maxEndYear = spiLib.defMinMaxYear( startYear, minStartYear, endYear, maxEndYear) except: dicNoCountry[geoEuroStat] = geoEuroStat fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\ ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\ ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n') spiLib.defnoCountry(dicNoCountry, fileLog) spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog) keySize = dicSize.keys() keySize.sort() for s in keySize: fileLog.write(' List Size in Eurostat Input : ' + s + '\n') minStartYear = dicStartValue['startYear'] dicIndicator = spiLib.reverseAndNormalizeDicIndicatorSize( dicIndicator, minStartYear, maxEndYear) spiLibCreateTable.createTableSize(nomenclature, dicIndicator, fileLog, minStartYear, dicNace, indicatorSpi, compteEurostat, G_tableName) spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog, indicatorSpi, compteEurostat, G_tableName, G_DicSize)
def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature, compteEurostat): startIndice = 0 dicIndicator = {} dicNoCountry = {} dicNaceCheck = {} dicNace = {} dicNation = {} indicatorSpi = indicatorInput indicatorEurostat = indicatorInputEurostat Unit = G_Unit dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat) dicNation = DBAccess.lectureNationEurostat(dicNation) dicStartValue = dict(startIndice=1, startCountry='', startNace='', startIndicator='', startValeur=0, startYear=1900) minStartYear = 99999 maxEndYear = -1 fichiersTXT.sort() fichiersTXT.reverse() for txt in fichiersTXT: fichierTXT = open(txt, 'r') rec1er = fichierTXT.readline() #1er rec avec les meta lstrec = rec1er.split(',') #recherche de la position de chaque variable dans input eurostat #on selectionne les colonne de l'input d'Eurostat dicEurostat = spiLib.defDicEurostat(lstrec) iUnit = dicEurostat['unit'] iNace = dicEurostat['nace'] iIndic = dicEurostat['indic'] iSector = dicEurostat['sector'] iSize = dicEurostat['size'] iGeoTime = dicEurostat['geotime'] geotime = lstrec[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() #nace 1 : unit,sector,nace_r1,indic_na,geo\time #nace 2 : unit,nace_r2,indic_na,sector,geo\time #MIO_EUR,A,B1G,AT : : : 3781.7 4375.5 4322.6 3827.4 3542.7 3921.8 3763.1 for ligneTXT in fichierTXT: ligne = ligneTXT.strip('\n') #RAPPEL strip enleve des extremites ligne = ligne.split(',') nace = ligne[iNace].strip() indicator = ligne[iIndic].strip() unit = ligne[iUnit].strip() geoTime = ligne[iGeoTime].split('\t') geoEuroStat = geoTime[0].strip() if iSector == -1: sector = 'S1' else: sector = ligne[iSector].strip() try: country = dicNation[geoEuroStat] timeSerie = geoTime[1:] #indicateur pour savoir si on doit selectionner les indicateurs #dans la liste definie dans la table oracle ou uniquement sur la longueur (4) #la regle est si nace1 alors on prend la liste #si nace2 et nama on prend la liste sur sbs on teste sur la longueur max 4 if indicator == indicatorEurostat and (unit == Unit) and ( sector == 'S1') and dicNace.has_key(nace): dicNaceCheck[ nace] = nace #on remplit le dic pour faire un check si autant de nace que dans la table SPI vector = spiLib.defVectorYears( timeSerie, startYear, endYear) #traitement de la serie Eurostat dicStartValue = spiLib.defDicStartValue( timeSerie, country, nace, indicator, dicStartValue, endYear) dicIndicator = spiLib.defDicIndicator( country, nace, vector, dicIndicator) minStartYear, maxEndYear = spiLib.defMinMaxYear( startYear, minStartYear, endYear, maxEndYear) except: dicNoCountry[geoEuroStat] = geoEuroStat fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\ ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\ ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n') spiLib.defnoCountry(dicNoCountry, fileLog) spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog) minStartYear = dicStartValue['startYear'] dicIndicator = spiLib.reverseAndNormalizeDicIndicator( dicIndicator, minStartYear, maxEndYear) spiLibCreateTable.createTableGrowth(nomenclature, dicIndicator, fileLog, minStartYear, dicNace, indicatorSpi, compteEurostat, G_tableName, G_Growth, G_FileExt) spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog, indicatorSpi, compteEurostat, G_tableName)
def traitementXML(fichiersXml, G_typeProduit, fileOutput): typeProduit = G_typeProduit #cpa2002 ou cpa2008 fileOutput.write( '0000,0000\n') #1er ligne qui sera remplacee avec les dates min et max dicCodeCPA = {} if typeProduit == 'cpa2002': dicCodeCPA = FileAccess.lectureHS1992toCPA2002( dirCSV) #fichier : hs1992tocpa2002ratio.csv else: dicCodeCPA = FileAccess.lectureHS1992toCPA2008( dirCSV, fileLog) #fichier : hs1992tocpa2008un.csv minStartYear = 99999 maxEndYear = -1 dicWorld = {} dicCountryNoValue = {} #on a deja selectionne les bons pays lors du download #cf gethttpUN.py for fichierXml in fichiersXml: dicXml = {} dicXml = XmlAccess.lectureXML(fichierXml, dicCodeCPA, fileLog) lstfichierXml = fichierXml.split('.') base = os.path.basename(fichierXml) country = os.path.splitext(base)[0] #print country paystraiter = 0 for rgCode in dicXml: codeCPASort = dicXml[rgCode].keys() codeCPASort.sort() for codeCPA in codeCPASort: codeHSSort = dicXml[rgCode][codeCPA].keys() codeHSSort.sort() dicTotalYearHS = {} minStartYearHS = 99999 maxEndYearHS = -1 if dicWorld.has_key(codeCPA): pass #on continue a l'instruction suivante else: dicWorld[codeCPA] = {} for codeHS in codeHSSort: yearSort = dicXml[rgCode][codeCPA][codeHS].keys() yearSort.sort() startYear = yearSort[0] endYear = yearSort[-1] #print country, rgCode, codeCPA, codeHS, startYear, endYear, dicXml[rgCode][codeCPA][codeHS] minStartYear, maxEndYear = spiLib.defMinMaxYear( startYear, minStartYear, endYear, maxEndYear) minStartYearHS, maxEndYearHS = spiLib.defMinMaxYear( startYear, minStartYearHS, endYear, maxEndYearHS) for year in yearSort: paystraiter = 1 valYear = int(dicXml[rgCode][codeCPA][codeHS] [year]) #la valeur est tj numerique try: #la 1er valeur du dic est vide dicTotalYearHS[ year] = dicTotalYearHS[year] + valYear except: #alors on initialise avec la 1er valeur du code HS dicTotalYearHS[year] = valYear #pour le total WLD si on ne tient pas compte des valeurs inexistantes try: dicWorld[codeCPA][ year] = dicWorld[codeCPA][year] + valYear except: dicWorld[codeCPA][year] = valYear #traitement du total des annees par code CPA #le record de sortie par pays lstValue = spiLibTrade.vectorYear(dicTotalYearHS) recordOut = country + ',' + codeCPA + ',' + str( minStartYearHS) + ',' + lstValue + '\n' fileOutput.write(recordOut) #calcul du total WLD, on ne tient plus compte des valeurs inexistantes #(on fait le calcul apres la normalisation du vecteur) #on va lister uniquement les pays manquants lstVector = lstValue.split('!') for i in range(len(lstVector)): year = minStartYearHS + i ''' try: dicWorld[codeCPA][year] = dicWorld[codeCPA][year] + 0 except: dicWorld[codeCPA][year] = 0 ''' try: valeur = int(lstVector[i]) #dicWorld[codeCPA][year] = dicWorld[codeCPA][year] + valeur except: #dans ce cas on initialise l'annee avec la valeur #dicWorld[codeCPA][year] = ':' keyNoValue = country + ',' + codeCPA + ',' + str(year) dicCountryNoValue[keyNoValue] = keyNoValue if paystraiter == 0: recordOut = 'unprocessed country :' + country + '\n' fileLog.write(recordOut) countryNoValueSort = dicCountryNoValue.keys() countryNoValueSort.sort() for countryNoValue in countryNoValueSort: recordOut = 'no value for country, cpa, year :' + countryNoValue + '\n' fileLog.write(recordOut) #traitement world codeCPASort = dicWorld.keys() codeCPASort.sort() for codeCPA in codeCPASort: keyYear = dicWorld[codeCPA].keys() keyYear.sort() startYear = keyYear[0] endYear = keyYear[-1] #minStartYear,maxEndYear = spiLib.defMinMaxYear(startYear,minStartYear,endYear,maxEndYear) lstValue = spiLibTrade.vectorYear(dicWorld[codeCPA]) recordOut = 'WLD,' + codeCPA + ',' + str( startYear) + ',' + lstValue + '\n' fileOutput.write(recordOut) #ecriture au debut du fichier des dates min et max fileOutput.flush() #on vide le cache os.fsync(fileOutput.fileno()) #on force a ecrire sur le disque fileOutput.seek(0, 0) #on se positionne sur le 1er caractere de la 1er ligne fileOutput.write(str(minStartYear) + ',' + str(maxEndYear) + '\n')
def traitementFichierTXT(indicatorInput, nomenclature, compteEurostat): startIndice = 0 dicIndicator = {} dicIndicatorTotal = {} dicNoCountry = {} dicNaceCheck = {} dicNace = {} dicNation = {} dicIndicatorDomain = {} dicIndicatorDomain = FileAccess.lectureIndicator(dicIndicatorDomain, 'competition', dirUse) indicatorSpi = dicIndicatorDomain[indicatorInput] dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat) dicNation = DBAccess.lectureNationEurostat(dicNation) dicStartValue = dict(startIndice=1, startCountry='', startNace='', startIndicator='', startValeur=0, startYear=1900) minStartYear = 99999 maxEndYear = -1 for txt in fichiersTXT: fichierTXT = open(txt, 'r') rec1er = fichierTXT.readline() #1er rec avec les meta lstrec = rec1er.split(',') #on selectionne les colonne de l'input d'Eurostat dicEurostat = spiLib.defDicEurostat(lstrec) iUnit = dicEurostat['unit'] iNace = dicEurostat['nace'] iIndic = dicEurostat['indic'] iSize = dicEurostat['size'] iGeoTime = dicEurostat['geotime'] geotime = lstrec[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() #nace_r1,indic_sb,size_emp,geo\time #E,V11110,TOTAL,ES 9037 6410 3544 3311 3336 3084 for ligneTXT in fichierTXT: ligne = ligneTXT.strip('\n') #RAPPEL strip enleve des extremites ligne = ligne.split(',') nace = ligne[iNace].strip() indicator = ligne[iIndic].strip() sizeEurostat = ligne[iSize].strip() #la colonne size n'existe pas pour le NACE2 pour les sbs_na *on rempli donc la condition a chaque fois if nomenclature == 'nace2' and compteEurostat == 'sbs': sizeEurostat = 'TOTAL' else: sizeEurostat = ligne[iSize].strip() geoTime = ligne[iGeoTime].split('\t') geoEuroStat = geoTime[0].strip() try: country = dicNation[geoEuroStat] timeSerie = geoTime[1:] #indicateur pour savoir si on doit selectionner les indicateurs #dans la liste definie dans la table oracle ou uniquement sur la longueur (4) #la regle est si nace1 alors on prend la liste #si bd on prend tout #if indicator == indicatorInput and sizeEurostat == G_Size and dicNace.has_key(nace): if indicator == indicatorInput and sizeEurostat == G_Size: dicNaceCheck[ nace] = nace #on remplit le dic pour faire un check si autant de nace que dans la table SPI vector = spiLib.defVectorYears(timeSerie, startYear, endYear) dicStartValue = spiLib.defDicStartValue( timeSerie, country, nace, indicator, dicStartValue, endYear) dicIndicator = spiLib.defDicIndicator( country, nace, vector, dicIndicator) minStartYear, maxEndYear = spiLib.defMinMaxYear( startYear, minStartYear, endYear, maxEndYear) except: dicNoCountry[geoEuroStat] = geoEuroStat fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\ ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\ ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n') spiLib.defnoCountry(dicNoCountry, fileLog) spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog) minStartYear = dicStartValue['startYear'] #traitement indicator dicIndicator = spiLib.reverseAndNormalizeDicIndicator( dicIndicator, minStartYear, maxEndYear) spiLibCreateTable.createTable(nomenclature, dicIndicator, fileLog, minStartYear, dicNace, indicatorSpi, compteEurostat, G_tableName) spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog, indicatorSpi, compteEurostat, G_tableName)