def traitementFichierTXT(nomenclature, fileLog, tableName): infoX = DBAccess.lectureNaceIndicatorData('x', nomenclature, tableName) infoM = DBAccess.lectureNaceIndicatorData('m', nomenclature, tableName) refDicX = infoX[0] startYearX = infoX[1] refDicM = infoM[0] startYearM = infoM[1] if startYearX != startYearM: fileLog.write('Start year for indicators x and m are different.\n') return refDicX = spiLibTotal.calcNaceAggregates(refDicX, nomenclature, 'manufacturing') refDicM = spiLibTotal.calcNaceAggregates(refDicM, nomenclature, 'manufacturing') spiLibCreateTable.createTableExternalCrtbal(nomenclature, refDicX, refDicM, startYearM, fileLog, tableName)
def traitementFichierTxt(spiIndicator, eurostatIndicator, nomenclature, fileData, fileLabel, tableName, fileLog): listNation = DBAccess.defListNationIso2() if nomenclature == 'bec': listCpa = DBAccess.lectureBecEurostat() else: listCpa = DBAccess.lectureCpaSimple(nomenclature) uselessNation = {} uselessCpa = {} dicIndicator = {} file = open(fileData, 'r') line1st = file.readline() list1st = line1st.split('\t') dicComext = spiLib.defDicComext(list1st) iReporter = dicComext['reporter'] iPartner = dicComext['partner'] iProduct = dicComext['product'] iFlow = dicComext['flow'] iPeriod = dicComext['period'] iValue = dicComext['value'] minYear = 999999 maxYear = 0 for line in file: list = line.split('\t') reporter = list[iReporter].strip() partner = list[iPartner].strip().strip('"') product = list[iProduct].strip() if product[0] == '0': product = product[1:] flow = list[iFlow].strip() period = int(list[iPeriod].strip()[0:4]) value = list[iValue].strip() if reporter in listNation: if product in listCpa: if flow == eurostatIndicator: try: dicIndicator[reporter][product][partner][ period] = value except: try: dicIndicator[reporter][product][partner] = {} dicIndicator[reporter][product][partner][ period] = value except: try: dicIndicator[reporter][product] = {} dicIndicator[reporter][product][partner] = {} dicIndicator[reporter][product][partner][ period] = value except: dicIndicator[reporter] = {} dicIndicator[reporter][product] = {} dicIndicator[reporter][product][partner] = {} dicIndicator[reporter][product][partner][ period] = value if period > maxYear: maxYear = period if period < minYear: minYear = period else: uselessCpa[product] = product else: uselessNation[reporter] = reporter file.close() spiLib.defnoCountry(uselessNation, fileLog) del uselessNation spiLib.defUselessCode(uselessCpa, fileLog) del uselessCpa dicIndicator = spiLibComext.convertSingleValueToVector( dicIndicator, minYear, maxYear) dicIndicator = spiLibComext.mergePartners(dicIndicator) if nomenclature == 'bec': dicIndicator = spiLibComext.formatBecEurostatDic(dicIndicator) dicIndicator = spiLibTotal.calcNaceAggregates(dicIndicator, nomenclature, 'manufacturing') dicIndicator = spiLibTotal.calcNaceAggregates(dicIndicator, nomenclature, 'comext') spiLibCreateTable.createTableExternalXMShare(spiIndicator, nomenclature, minYear, dicIndicator, tableName, fileLog)
def traitementFichierTXT(indicatorEurostatDenominator, unitDenominator, unitNumerator, nomenclature, filesDenominator, fileNumerator, indicatorSpi, fileLog, tableName): dicIndicatorNumerator = {} dicIndicatorDenominator = {} dicNoCountry = {} dicNation = {} dicNaceCheck = {} dicNation = DBAccess.lectureNationEurostat(dicNation) dicNace = spiLib.defSelectdicNace(nomenclature, 'nama') minimumYearWithActualData = 999999 maxEndYear = -1 filesDenominator.sort() filesDenominator.reverse() file = open(fileNumerator, 'r') line1st = file.readline() list1st = line1st.split(',') dicEurostat = spiLib.defDicEurostat(list1st) iUnit = dicEurostat['unit'] iNace = dicEurostat['nace'] iGeoTime = dicEurostat['geotime'] geotime = list1st[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() #This code is added to create special aggregates asked on 30-09-2016 if (indicatorSpi == 'patintens' or indicatorSpi == 'patintrd') and nomenclature == 'nace2': dicNace['C10'] = 'C' dicNace['C11'] = 'C' dicNace['C12'] = 'C' dicNace['C13'] = 'C' dicNace['C14'] = 'C' dicNace['C15'] = 'C' dicNace['C31'] = 'C' dicNace['C32'] = 'C' #________________________________________________________________________ for line in file: lineList = line.strip('\n').split(',') nace = lineList[iNace].strip() unit = lineList[iUnit].strip() geoTime = lineList[iGeoTime].split('\t') geo = geoTime[0].strip() try: dicNaceCheck[nace] = nace country = dicNation[geo] timeSerie = geoTime[1:] if dicNace.has_key(nace) and unit == unitNumerator: vector = spiLib.defVectorYears(timeSerie, startYear, endYear) minimumYearWithActualData = spiLib.findMinimumYearWithActualData( timeSerie, int(startYear), minimumYearWithActualData) dicIndicatorNumerator = spiLib.defDicIndicator( country, nace, vector, dicIndicatorNumerator) if int(endYear) > maxEndYear: maxEndYear = int(endYear) except: dicNoCountry[geo] = geo file.close() dicIndicatorNumerator = spiLib.reverseAndNormalizeDic( dicIndicatorNumerator, minimumYearWithActualData, maxEndYear) #This code is added to create special aggregates asked on 30-09-2016 if (indicatorSpi == 'patintens' or indicatorSpi == 'patintrd') and nomenclature == 'nace2': for country in dicIndicatorNumerator: try: C10 = dicIndicatorNumerator[country]['C10'] C11 = dicIndicatorNumerator[country]['C11'] C12 = dicIndicatorNumerator[country]['C12'] except: C10 = [] C11 = [] C12 = [] try: C13 = dicIndicatorNumerator[country]['C13'] C14 = dicIndicatorNumerator[country]['C14'] C15 = dicIndicatorNumerator[country]['C15'] except: C13 = [] C14 = [] C15 = [] try: C31 = dicIndicatorNumerator[country]['C31'] C32 = dicIndicatorNumerator[country]['C32'] except: C31 = [] C32 = [] res = [] for i in range(0, len(C10)): if C10[i] == ':' or C11[i] == ':' or C12[i] == ':': res.append(':') else: res.append( str(float(C10[i]) + float(C11[i]) + float(C12[i]))) if len(res) > 0: dicIndicatorNumerator[country]['C10-C12'] = res res = [] for i in range(0, len(C13)): if C13[i] == ':' or C14[i] == ':' or C15[i] == ':': res.append(':') else: res.append( str(float(C13[i]) + float(C14[i]) + float(C15[i]))) if len(res) > 0: dicIndicatorNumerator[country]['C13-C15'] = res res = [] for i in range(0, len(C31)): if C31[i] == ':' or C32[i] == ':': res.append(':') else: res.append(str(float(C31[i]) + float(C32[i]))) if len(res) > 0: dicIndicatorNumerator[country]['C31_C32'] = res #__________________________________________________________________________ dicIndicatorNumerator = spiLibTotal.calcNaceAggregates( dicIndicatorNumerator, nomenclature, 'nama') for txt in filesDenominator: file = open(txt, 'r') line1st = file.readline() list1st = line1st.split(',') dicEurostat = spiLib.defDicEurostat(list1st) iUnit = dicEurostat['unit'] try: iIndic = dicEurostat['indic'] except: pass iNace = dicEurostat['nace'] iGeoTime = dicEurostat['geotime'] geotime = list1st[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() for line in file: lineList = line.strip('\n').split(',') nace = lineList[iNace].strip() if iIndic != -1: indicator = lineList[iIndic].strip() else: indicator = 'noindicator' unit = lineList[iUnit].strip() geoTime = lineList[iGeoTime].split('\t') geo = geoTime[0].strip() try: dicNaceCheck[nace] = nace country = dicNation[geo] timeSerie = geoTime[1:] if indicator != 'noindicator': if indicator == indicatorEurostatDenominator and dicNace.has_key( nace) and unit == unitDenominator: vector = spiLib.defVectorYears(timeSerie, startYear, endYear) dicIndicatorDenominator = spiLib.defDicIndicator( country, nace, vector, dicIndicatorDenominator) else: if dicNace.has_key(nace) and unit == unitDenominator: vector = spiLib.defVectorYears(timeSerie, startYear, endYear) dicIndicatorDenominator = spiLib.defDicIndicator( country, nace, vector, dicIndicatorDenominator) except: dicNoCountry[geo] = geo file.close() dicIndicatorDenominator = spiLib.reverseAndNormalizeDic( dicIndicatorDenominator, minimumYearWithActualData, maxEndYear) dicIndicatorDenominator = spiLibTotal.calcNaceAggregates( dicIndicatorDenominator, nomenclature, 'nama') spiLib.defnoCountry(dicNoCountry, fileLog) spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog) spiLibCreateTable.createTableNacePercentage( nomenclature, dicIndicatorNumerator, dicIndicatorDenominator, indicatorSpi, minimumYearWithActualData, fileLog, tableName)
def traitementFichierTXT(indicatorEurostatDenominator, indicatorEurostatNumerator, unitDenominator, unitNumerator, nomenclature, filesDenominator, filesNumerator, baseYear, indicatorSpi, fileLog, tableName): dicIndicatorNumerator = {} dicIndicatorDenominator = {} dicNoCountry = {} dicNation = {} dicNaceCheck = {} dicNation = DBAccess.lectureNationEurostat(dicNation) dicNace = spiLib.defSelectdicNace(nomenclature, 'nama') minimumYearWithActualData = 999999 maxEndYear = -1 filesNumerator.sort() filesNumerator.reverse() filesDenominator.sort() filesDenominator.reverse() for txt in filesNumerator : file = open(txt, 'r') line1st = file.readline() list1st = line1st.split(',') dicEurostat = spiLib.defDicEurostat(list1st) iUnit = dicEurostat['unit'] iIndic = dicEurostat['indic'] iNace = dicEurostat['nace'] iGeoTime = dicEurostat['geotime'] geotime = list1st[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() for line in file : lineList = line.strip('\n').split(',') nace = lineList[iNace].strip() indicator = lineList[iIndic].strip() unit = lineList[iUnit].strip() geoTime = lineList[iGeoTime].split('\t') geo = geoTime[0].strip() try : dicNaceCheck[nace] = nace country = dicNation[geo] timeSerie = geoTime[1:] if indicator == indicatorEurostatNumerator and dicNace.has_key(nace) and unit == unitNumerator: vector = spiLib.defVectorYears(timeSerie, startYear, endYear) minimumYearWithActualData = spiLib.findMinimumYearWithActualData(timeSerie,int(startYear),minimumYearWithActualData) dicIndicatorNumerator = spiLib.defDicIndicator(country,nace,vector,dicIndicatorNumerator) if int(endYear) > maxEndYear : maxEndYear = int(endYear) except : dicNoCountry[geo] = geo file.close() dicIndicatorNumerator = spiLib.reverseAndNormalizeDic(dicIndicatorNumerator, minimumYearWithActualData, maxEndYear) dicIndicatorNumerator = spiLibTotal.calcNaceAggregates(dicIndicatorNumerator, nomenclature, 'nama') for txt in filesDenominator : file = open(txt, 'r') line1st = file.readline() list1st = line1st.split(',') dicEurostat = spiLib.defDicEurostat(list1st) iUnit = dicEurostat['unit'] iIndic = dicEurostat['indic'] iNace = dicEurostat['nace'] iGeoTime = dicEurostat['geotime'] iSector = dicEurostat['sector'] geotime = list1st[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() for line in file : lineList = line.strip('\n').split(',') nace = lineList[iNace].strip() indicator = lineList[iIndic].strip() unit = lineList[iUnit].strip() geoTime = lineList[iGeoTime].split('\t') if iSector == -1 : sector = 'S1' else : sector = lineList[iSector].strip() geo = geoTime[0].strip() try : dicNaceCheck[nace] = nace country = dicNation[geo] timeSerie = geoTime[1:] if indicator == indicatorEurostatDenominator and dicNace.has_key(nace) and unit == unitDenominator and sector == 'S1' : vector = spiLib.defVectorYears(timeSerie, startYear, endYear) dicIndicatorDenominator = spiLib.defDicIndicator(country,nace,vector,dicIndicatorDenominator) except : dicNoCountry[geo] = geo file.close() dicIndicatorDenominator = spiLib.reverseAndNormalizeDic(dicIndicatorDenominator, minimumYearWithActualData, maxEndYear) dicIndicatorDenominator = spiLibTotal.calcNaceAggregates(dicIndicatorDenominator, nomenclature, 'nama') #The following lines are added on demand of B2 team to calculate aggregates that cannot be extracted from the nama_nace10_e file if nomenclature == 'nace2' and indicatorEurostatDenominator == 'EMP': dicIndicatorDenominator = spiLibTotal.calcNace2EmpAggr(dicIndicatorDenominator) ############################################################################################################################## spiLib.defnoCountry(dicNoCountry,fileLog) spiLib.defDicNaceCheck(dicNaceCheck,dicNace,fileLog) spiLibCreateTable.createTableDomesticIndex(nomenclature,dicIndicatorNumerator, dicIndicatorDenominator, indicatorSpi, minimumYearWithActualData, baseYear,fileLog, tableName)
def traitementFichierTxt(spiIndicator, eurostatIndicator, nomenclature, fileData, fileLabel, tableName, fileLog): listNation = DBAccess.defListNationIso2() if nomenclature == 'bec': listCpa = DBAccess.lectureBecEurostat() else: listCpa = DBAccess.lectureCpaSimple(nomenclature) uselessNation = {} uselessCpa = {} dicIndicator = {} file = open(fileData, 'r') line1st = file.readline() list1st = line1st.split('\t') dicComext = spiLib.defDicComext(list1st) iReporter = dicComext['reporter'] iPartner = dicComext['partner'] iProduct = dicComext['product'] iFlow = dicComext['flow'] iPeriod = dicComext['period'] iValue = dicComext['value'] minYear = 999999 maxYear = 0 for line in file: list = line.split('\t') reporter = list[iReporter].strip() partner = list[iPartner].strip().strip('"') product = list[iProduct].strip() if product[0] == '0': product = product[1:] flow = list[iFlow].strip() period = int(list[iPeriod].strip()[0:4]) value = list[iValue].strip() if reporter in listNation: if product in listCpa: if flow == eurostatIndicator: try: dicIndicator[reporter][product][partner][ period] = value except: try: dicIndicator[reporter][product][partner] = {} dicIndicator[reporter][product][partner][ period] = value except: try: dicIndicator[reporter][product] = {} dicIndicator[reporter][product][partner] = {} dicIndicator[reporter][product][partner][ period] = value except: dicIndicator[reporter] = {} dicIndicator[reporter][product] = {} dicIndicator[reporter][product][partner] = {} dicIndicator[reporter][product][partner][ period] = value if period > maxYear: maxYear = period if period < minYear: minYear = period else: uselessCpa[product] = product else: uselessNation[reporter] = reporter file.close() spiLib.defnoCountry(uselessNation, fileLog) del uselessNation spiLib.defUselessCode(uselessCpa, fileLog) del uselessCpa dicIndicator = spiLibComext.convertSingleValueToVector( dicIndicator, minYear, maxYear) for country in dicIndicator: for code in dicIndicator[country]: res = [] refIntra = {} refExtra = {} try: refIntra = dicIndicator[country][code]['EU27_INTRA'] except: fileLog.write('Missing intra EU27 reference for country ' + country + ' and code ' + code + '.\n') continue try: refExtra = dicIndicator[country][code]['EU27_EXTRA'] except: fileLog.write('Missing extra EU27 reference for country ' + country + ' and code ' + code + '.\n') continue for i in range(0, len(refIntra)): if refIntra[i] == ':': res.append(refExtra[i]) elif refExtra[i] == ':': res.append(refIntra[i]) else: res.append('{0:.0f}'.format( (int(refIntra[i]) + int(refExtra[i])))) dicIndicator[country][code] = res if nomenclature == 'bec': dicIndicator = spiLibComext.formatBecEurostatDic(dicIndicator) dicIndicator = spiLibTotal.calcNaceAggregates(dicIndicator, nomenclature, 'comext') spiLibCreateTable.createTableNomenclatureBasic(dicIndicator, spiIndicator, nomenclature, minYear, tableName)
def traitementFichierTXT(nomenclature, indicNumerator, indicDenominator, filePaths): dicNumerator = {} dicDenominator = {} dicNace = spiLib.defSelectdicNace(nomenclature, 'sbs') dicNation = DBAccess.lectureNationEurostat({}) maxEndYear = -1 minStartYear = 9999 for filePath in filePaths: with open(filePath, 'r') as file: csvFile = csv.reader(file, delimiter='\t') firstLine = csvFile.next() metaLabel = firstLine[0].split(',') dicEurostat = spiLib.defDicEurostat(metaLabel) iNace = dicEurostat['nace'] iIndic = dicEurostat['indic'] iGeoTime = dicEurostat['geotime'] iSize = dicEurostat['size'] endYear = firstLine[1].strip() startYear = firstLine[-1].strip() if int(endYear) > maxEndYear: maxEndYear = int(endYear) if int(startYear) < minStartYear: minStartYear = int(startYear) for line in csvFile: meta = line[0].split(',') code = meta[iNace] country = meta[iGeoTime] indic = meta[iIndic] if nomenclature == 'nace2': size = 'TOTAL' else: size = meta[iSize] if code in dicNace and country in dicNation and size == 'TOTAL': if indic == indicNumerator or indic == indicDenominator: vector = [] vector.append(endYear) vector.extend([ re.sub('[" ",a-z]', '', element) for element in line[1:] ]) vector.append(startYear) if indic == indicNumerator: dicNumerator = spiLib.defDicIndicator( country, code, vector, dicNumerator) else: dicDenominator = spiLib.defDicIndicator( country, code, vector, dicDenominator) dicNumerator = spiLib.reverseAndNormalizeDic(dicNumerator, minStartYear, maxEndYear) dicDenominator = spiLib.reverseAndNormalizeDic(dicDenominator, minStartYear, maxEndYear) dicNumerator = spiLibTotal.calcNaceAggregates(dicNumerator, nomenclature, 'sbs') dicDenominator = spiLibTotal.calcNaceAggregates(dicDenominator, nomenclature, 'sbs') spiLibCreateTable.createTableOverOtherShare(dicNumerator, dicDenominator, minStartYear, 'surpl', nomenclature, 'competition')
def traitementFichierTXT(indicatorEurostat, unitEurostat, nomenclature, cpa, files, fileLog, tableName): infoX = DBAccess.lectureCpaNaceIndicatorData('x', cpa, 'external') infoM = DBAccess.lectureCpaNaceIndicatorData('m', cpa, 'external') refDicX = infoX[0] startYearX = infoX[1] refDicM = infoM[0] startYearM = infoM[1] refDicGO = {} dicNoCountry = {} dicNation = {} dicNaceCheck = {} dicNation = DBAccess.lectureNationEurostat(dicNation) dicNace = spiLib.defSelectdicNace(nomenclature, 'nama') minimumYearWithActualData = 999999 maxEndYear = -1 files.sort() files.reverse() for txt in files: file = open(txt, 'r') line1st = file.readline() list1st = line1st.split(',') dicEurostat = spiLib.defDicEurostat(list1st) iUnit = dicEurostat['unit'] iIndic = dicEurostat['indic'] iNace = dicEurostat['nace'] iGeoTime = dicEurostat['geotime'] geotime = list1st[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() for line in file: lineList = line.strip('\n').split(',') nace = lineList[iNace].strip() indicator = lineList[iIndic].strip() unit = lineList[iUnit].strip() geoTime = lineList[iGeoTime].split('\t') geo = geoTime[0].strip() try: dicNaceCheck[nace] = nace country = dicNation[geo] timeSerie = geoTime[1:] if indicator == indicatorEurostat and dicNace.has_key( nace) and unit == unitEurostat: vector = spiLib.defVectorYears(timeSerie, startYear, endYear) minimumYearWithActualData = spiLib.findMinimumYearWithActualData( timeSerie, int(startYear), minimumYearWithActualData) refDicGO = spiLib.defDicIndicator(country, nace, vector, refDicGO) if int(endYear) > maxEndYear: maxEndYear = int(endYear) except: dicNoCountry[geo] = geo refDicGO = spiLib.reverseAndNormalizeDic(refDicGO, startYearX, maxEndYear) refDicGO = spiLibTotal.calcNaceAggregates(refDicGO, nomenclature, 'nama') refDicX = spiLib.normalizeDicSize(refDicX, startYearX, startYearX, maxEndYear) refDicM = spiLib.normalizeDicSize(refDicM, startYearM, startYearX, maxEndYear) spiLib.defnoCountry(dicNoCountry, fileLog) spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog) spiLibCreateTable.createTableCompetitionImportpen(nomenclature, refDicGO, refDicX, refDicM, startYearX, fileLog, tableName)