Exemple #1
0
def traitementFichierTXT(indicatorSpi, categoryEurostat, ageEurostat,
                         iscoEurostat, unitEurostat, nomFichierTXT,
                         nomFichierTXT2, naceEurostat, itemEurostat,
                         unitEurostat2):
    dicIndicator = {}
    dicIndicator2 = {}
    dicNoCountry = {}
    dicNation = {}
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    minimumYearWithActualData = 999999

    fichierTXT = open(nomFichierTXT, 'r')
    rec1er = fichierTXT.readline()
    lstrec = rec1er.split(',')
    dicEurostat = spiLib.defDicEurostat(lstrec)
    iCategory = dicEurostat['category']
    iUnit = dicEurostat['unit']
    iIsco = dicEurostat['isco']
    iAge = dicEurostat['age']
    iGeoTime = dicEurostat['geotime']
    geotime = lstrec[iGeoTime].split('\t')
    endYear = geotime[1].strip()
    startYear = geotime[-1].strip()

    for ligneTXT in fichierTXT:
        ligne = ligneTXT.strip('\n').split(',')
        category = ligne[iCategory].strip()
        isco = ligne[iIsco].strip()
        age = ligne[iAge].strip()
        unit = ligne[iUnit].strip()
        geoTime = ligne[iGeoTime].split('\t')
        geoEuroStat = geoTime[0].strip()

        try:
            country = dicNation[geoEuroStat]
            timeSerie = geoTime[1:]
            if unit == unitEurostat and age == ageEurostat and isco == iscoEurostat and category == categoryEurostat:
                vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                minimumYearWithActualData = spiLib.findMinimumYearWithActualData(
                    timeSerie, int(startYear), minimumYearWithActualData)
                dicIndicator[country] = vector
        except:
            dicNoCountry[geoEuroStat] = geoEuroStat

    fichierTXT.close()
    spiLib.defnoCountry(dicNoCountry, fileLog)

    fichierTXT = open(nomFichierTXT2, 'r')
    rec1er = fichierTXT.readline()
    lstrec = rec1er.split(',')
    dicEurostat = spiLib.defDicEurostat(lstrec)
    iNace = dicEurostat['nace']
    iUnit = dicEurostat['unit']
    iIndic = dicEurostat['indic']
    iGeoTime = dicEurostat['geotime']
    geotime = lstrec[iGeoTime].split('\t')
    endYear2 = geotime[1].strip()
    startYear2 = geotime[-1].strip()

    for ligneTXT in fichierTXT:
        ligne = ligneTXT.strip('\n').split(',')
        nace = ligne[iNace].strip()
        unit = ligne[iUnit].strip()
        indic = ligne[iIndic].strip()
        geoTime = ligne[iGeoTime].split('\t')
        geoEuroStat = geoTime[0].strip()

        try:
            country = dicNation[geoEuroStat]
            timeSerie = geoTime[1:]
            if unit == unitEurostat2 and indic == itemEurostat and nace == naceEurostat:
                vector = spiLib.defVectorYears(timeSerie, startYear2, endYear2)
                dicIndicator2[country] = vector
        except:
            dicNoCountry[geoEuroStat] = geoEuroStat

    fichierTXT.close()

    dicIndicator = spiLib.reverseAndNormalizeDic(dicIndicator,
                                                 minimumYearWithActualData,
                                                 int(endYear))
    dicIndicator2 = spiLib.reverseAndNormalizeDic(dicIndicator2,
                                                  minimumYearWithActualData,
                                                  int(endYear))

    spiLibCreateTable.createTableCountryLevelFdi(dicIndicator, dicIndicator2,
                                                 G_spiIndicator,
                                                 minimumYearWithActualData,
                                                 fileLog, G_tableName)
def traitementFichierTXT(indicatorEurostatDenominator, unitDenominator,
                         unitNumerator, nomenclature, filesDenominator,
                         fileNumerator, indicatorSpi, fileLog, tableName):
    dicIndicatorNumerator = {}
    dicIndicatorDenominator = {}
    dicNoCountry = {}
    dicNation = {}
    dicNaceCheck = {}
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicNace = spiLib.defSelectdicNace(nomenclature, 'nama')
    minimumYearWithActualData = 999999
    maxEndYear = -1

    filesDenominator.sort()
    filesDenominator.reverse()

    file = open(fileNumerator, 'r')
    line1st = file.readline()
    list1st = line1st.split(',')
    dicEurostat = spiLib.defDicEurostat(list1st)
    iUnit = dicEurostat['unit']
    iNace = dicEurostat['nace']
    iGeoTime = dicEurostat['geotime']
    geotime = list1st[iGeoTime].split('\t')

    endYear = geotime[1].strip()
    startYear = geotime[-1].strip()

    #This code is added to create special aggregates asked on 30-09-2016
    if (indicatorSpi == 'patintens'
            or indicatorSpi == 'patintrd') and nomenclature == 'nace2':
        dicNace['C10'] = 'C'
        dicNace['C11'] = 'C'
        dicNace['C12'] = 'C'
        dicNace['C13'] = 'C'
        dicNace['C14'] = 'C'
        dicNace['C15'] = 'C'
        dicNace['C31'] = 'C'
        dicNace['C32'] = 'C'
    #________________________________________________________________________

    for line in file:
        lineList = line.strip('\n').split(',')
        nace = lineList[iNace].strip()
        unit = lineList[iUnit].strip()
        geoTime = lineList[iGeoTime].split('\t')
        geo = geoTime[0].strip()

        try:
            dicNaceCheck[nace] = nace
            country = dicNation[geo]
            timeSerie = geoTime[1:]

            if dicNace.has_key(nace) and unit == unitNumerator:
                vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                minimumYearWithActualData = spiLib.findMinimumYearWithActualData(
                    timeSerie, int(startYear), minimumYearWithActualData)
                dicIndicatorNumerator = spiLib.defDicIndicator(
                    country, nace, vector, dicIndicatorNumerator)
                if int(endYear) > maxEndYear:
                    maxEndYear = int(endYear)
        except:
            dicNoCountry[geo] = geo

    file.close()

    dicIndicatorNumerator = spiLib.reverseAndNormalizeDic(
        dicIndicatorNumerator, minimumYearWithActualData, maxEndYear)

    #This code is added to create special aggregates asked on 30-09-2016
    if (indicatorSpi == 'patintens'
            or indicatorSpi == 'patintrd') and nomenclature == 'nace2':
        for country in dicIndicatorNumerator:
            try:
                C10 = dicIndicatorNumerator[country]['C10']
                C11 = dicIndicatorNumerator[country]['C11']
                C12 = dicIndicatorNumerator[country]['C12']
            except:
                C10 = []
                C11 = []
                C12 = []

            try:
                C13 = dicIndicatorNumerator[country]['C13']
                C14 = dicIndicatorNumerator[country]['C14']
                C15 = dicIndicatorNumerator[country]['C15']
            except:
                C13 = []
                C14 = []
                C15 = []

            try:
                C31 = dicIndicatorNumerator[country]['C31']
                C32 = dicIndicatorNumerator[country]['C32']
            except:
                C31 = []
                C32 = []

            res = []

            for i in range(0, len(C10)):
                if C10[i] == ':' or C11[i] == ':' or C12[i] == ':':
                    res.append(':')
                else:
                    res.append(
                        str(float(C10[i]) + float(C11[i]) + float(C12[i])))

            if len(res) > 0:
                dicIndicatorNumerator[country]['C10-C12'] = res

            res = []

            for i in range(0, len(C13)):
                if C13[i] == ':' or C14[i] == ':' or C15[i] == ':':
                    res.append(':')
                else:
                    res.append(
                        str(float(C13[i]) + float(C14[i]) + float(C15[i])))

            if len(res) > 0:
                dicIndicatorNumerator[country]['C13-C15'] = res

            res = []

            for i in range(0, len(C31)):
                if C31[i] == ':' or C32[i] == ':':
                    res.append(':')
                else:
                    res.append(str(float(C31[i]) + float(C32[i])))

            if len(res) > 0:
                dicIndicatorNumerator[country]['C31_C32'] = res
    #__________________________________________________________________________

    dicIndicatorNumerator = spiLibTotal.calcNaceAggregates(
        dicIndicatorNumerator, nomenclature, 'nama')

    for txt in filesDenominator:
        file = open(txt, 'r')
        line1st = file.readline()
        list1st = line1st.split(',')
        dicEurostat = spiLib.defDicEurostat(list1st)
        iUnit = dicEurostat['unit']
        try:
            iIndic = dicEurostat['indic']
        except:
            pass
        iNace = dicEurostat['nace']
        iGeoTime = dicEurostat['geotime']
        geotime = list1st[iGeoTime].split('\t')

        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        for line in file:
            lineList = line.strip('\n').split(',')
            nace = lineList[iNace].strip()
            if iIndic != -1:
                indicator = lineList[iIndic].strip()
            else:
                indicator = 'noindicator'
            unit = lineList[iUnit].strip()
            geoTime = lineList[iGeoTime].split('\t')
            geo = geoTime[0].strip()

            try:
                dicNaceCheck[nace] = nace
                country = dicNation[geo]
                timeSerie = geoTime[1:]
                if indicator != 'noindicator':
                    if indicator == indicatorEurostatDenominator and dicNace.has_key(
                            nace) and unit == unitDenominator:
                        vector = spiLib.defVectorYears(timeSerie, startYear,
                                                       endYear)
                        dicIndicatorDenominator = spiLib.defDicIndicator(
                            country, nace, vector, dicIndicatorDenominator)

                else:
                    if dicNace.has_key(nace) and unit == unitDenominator:
                        vector = spiLib.defVectorYears(timeSerie, startYear,
                                                       endYear)
                        dicIndicatorDenominator = spiLib.defDicIndicator(
                            country, nace, vector, dicIndicatorDenominator)
            except:
                dicNoCountry[geo] = geo

        file.close()

    dicIndicatorDenominator = spiLib.reverseAndNormalizeDic(
        dicIndicatorDenominator, minimumYearWithActualData, maxEndYear)
    dicIndicatorDenominator = spiLibTotal.calcNaceAggregates(
        dicIndicatorDenominator, nomenclature, 'nama')

    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)

    spiLibCreateTable.createTableNacePercentage(
        nomenclature, dicIndicatorNumerator, dicIndicatorDenominator,
        indicatorSpi, minimumYearWithActualData, fileLog, tableName)
def traitementFichierTXT(tableName, spiIndicator, indicEurostat, postEurostat, unitEurostat, partnerEurostat, gdpIndicEurostat, nomFichierTXT, nomFichierTXTGdp, nomFichierTXT2, partnerEurostat2, itemEurostat, gdpUnit):
    dicIndicator                = {}
    dicIndicator2               = {}
    dicGdp                      = {}
    dicNoCountry                = {}    
    dicNation                   = {}
    dicNation                   = DBAccess.lectureNationEurostat(dicNation)
    minimumYearWithActualData   = 999999
    
    fichierTXT     = open(nomFichierTXT,'r')
    rec1er         = fichierTXT.readline()
    lstrec         = rec1er.split(',')
    dicEurostat    = spiLib.defDicEurostat(lstrec)
    iIndic         = dicEurostat['indic']
    iPostTime      = dicEurostat['post']
    iPartner       = dicEurostat['partner']
    iGeo           = dicEurostat['geotime']
    
    posttime       = lstrec[iPostTime].split('\t')
    endYear        = int(posttime[1].strip())
    startYear      = int(posttime[-1].strip())
    
    for ligneTXT in fichierTXT: 
        ligne         = ligneTXT.strip('\n').split(',')
        indic         = ligne[iIndic].strip()
        geo           = ligne[iGeo].strip()
        partner       = ligne[iPartner].strip()
        postTime      = ligne[iPostTime].split('\t')
        post          = postTime[0].strip()
        
        try:
            country = dicNation[geo]                    
            timeSerie = postTime[1:]
            if partner == partnerEurostat and post == postEurostat and indic == indicEurostat :
                vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                minimumYearWithActualData = spiLib.findMinimumYearWithActualData(timeSerie,int(startYear),minimumYearWithActualData)
                dicIndicator[country] = vector 
        except:
            dicNoCountry[geo] = geo
    
    fichierTXT.close()        
    dicIndicator   = spiLib.reverseAndNormalizeDic(dicIndicator, minimumYearWithActualData, endYear)
    fileEndYear   = endYear
           
    fichierTXT     = open(nomFichierTXT2,'r')
    rec1er         = fichierTXT.readline()
    lstrec         = rec1er.split(',')
    dicEurostat    = spiLib.defDicEurostat(lstrec)
    iIndic         = dicEurostat['indic']
    iPartnerTime   = dicEurostat['partner']
    iGeo           = dicEurostat['geotime']
    iItem          = dicEurostat['item']
           
    geotime         = lstrec[iGeo].split('\t')
    endYear2        = int(geotime[1].strip())
    startYear2      = int(geotime[-1].strip())
    
    for ligneTXT in fichierTXT: 
        ligne         = ligneTXT.strip('\n').split(',')
        indic         = ligne[iIndic].strip()
        item          = ligne[iItem].strip()
        partner       = ligne[iPartnerTime].strip()
        geotime       = ligne[iGeo].split('\t')
        geo           = geotime[0].strip()

        try:
            country = dicNation[geo]                    
            timeSerie = geotime[1:]
            if partner == partnerEurostat2 and indic == indicEurostat  and item == itemEurostat:
                vector = spiLib.defVectorYears(timeSerie, startYear2, endYear2)
                dicIndicator2[country] = vector   
        except:
            dicNoCountry[geo] = geo           
    
    fichierTXT.close()      
    dicIndicator2   = spiLib.reverseAndNormalizeDic(dicIndicator2, startYear2, endYear2)
    finalEndYear   = endYear2    
            
    for country in dicIndicator :
        if country not in dicIndicator2 :
            dicIndicator2[country] = []
            for i in range(startYear2-1, endYear2) :
                dicIndicator2[country].append(':')
        
    for country in dicIndicator2 : 
        if country not in dicIndicator :
            dicIndicator[country] = []
            for i in range(minimumYearWithActualData-1, endYear) :
                dicIndicator[country].append(':')
    
    yearGap = startYear2 - endYear - 1
        
    if yearGap == 0 :
        for country in dicIndicator :
            dicIndicator[country].extend(dicIndicator2[country])
    elif yearGap > 0 :
        fillingVector = []
        for i in range(0, yearGap) :
            fillingVector.append(':')
        for country in dicIndicator :
            dicIndicator[country].extend(fillingVector)
            dicIndicator[country].extend(dicIndicator2[country])
    elif yearGap < 0 :
        for country in dicIndicator :
            del dicIndicator[country][yearGap:]
        for country in dicIndicator :
            dicIndicator[country].extend(dicIndicator2[country])        
    
    fichierTXT     = open(nomFichierTXTGdp,'r')
    rec1er         = fichierTXT.readline()
    lstrec         = rec1er.split(',')
    dicEurostat    = spiLib.defDicEurostat(lstrec)
    iIndic         = dicEurostat['indic']
    iGeoTime       = dicEurostat['geotime']
    iUnit          = dicEurostat['unit']
    
    geotime        = lstrec[iGeoTime].split('\t')
    endYear        = int(geotime[1].strip())
    startYear      = int(geotime[-1].strip())
    
    for ligneTXT in fichierTXT :
        ligne       = ligneTXT.strip('\n').split(',')
        indic       = ligne[iIndic].strip()
        unit        = ligne[iUnit].strip()
        geoTime     = ligne[iGeoTime].split('\t')            
        geoEurostat = geoTime[0].strip()
        
        try:
            country = dicNation[geoEurostat]                    
            timeSerie = geoTime[1:]
            if indic == gdpIndicEurostat and unit == gdpUnit:
                vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                dicGdp[country] = vector
        except:
            dicNoCountry[geoEurostat] = geoEurostat        
        
    fichierTXT.close()
    dicGdp = spiLib.reverseAndNormalizeDic(dicGdp, minimumYearWithActualData, finalEndYear)
    
    spiLib.defnoCountry(dicNoCountry,fileLog)
    spiLibCreateTable.createTableCountryLevelFdi(dicIndicator,dicGdp,spiIndicator,minimumYearWithActualData,fileLog,tableName)
def traitementFichierTXT(indicatorInputEurostat, unitEurostat, nomenclature, sbsFile = ''):
    startIndice         = 0
    dicIndicator        = {}
    dicIndicatorTotal   = {}
    dicNoCountry        = {}    
    dicNaceCheck        = {}
    dicNace             = {}
    dicNation           = {}
    indicatorEurostat   = indicatorInputEurostat
    dicNace             = spiLib.defSelectdicNaceSkillTech(nomenclature)
    dicNation           = DBAccess.lectureNationEurostat(dicNation)
    dicStartValue       = dict(startIndice=1,startCountry='',startNace='',startIndicator='',startValeur=0)
    
    minStartYear        = 99999
    maxEndYear          = -1
    
    for txt in fichiersTXT:
        fichierTXT     = open(txt,'r')
        rec1er         = fichierTXT.readline() #1er rec avec les meta
        lstrec         = rec1er.split(',')
        #on selectionne les colonne de l'input d'Eurostat        
        dicEurostat    = spiLib.defDicEurostat(lstrec)
        iUnit          = dicEurostat['unit']
        iNace          = dicEurostat['nace']
        iIndic         = dicEurostat['indic']
        iGeoTime       = dicEurostat['geotime']
        geotime        = lstrec[iGeoTime].split('\t')
        endYear        = geotime[1].strip()
        startYear      = geotime[-1].strip()
        #nace_r1,indic_sb,size_emp,geo\time
        #E,V11110,TOTAL,ES    9037     6410     3544     3311     3336     3084
        for ligneTXT in fichierTXT: 
            ligne         = ligneTXT.strip('\n').split(',') #RAPPEL strip enleve des extremites
            nace          = ligne[iNace].strip()
            indicator     = ligne[iIndic].strip()
            unit          = ligne[iUnit].strip()
            geoTime       = ligne[iGeoTime].split('\t')            
            geoEuroStat   = geoTime[0].strip()
            try:
                country = dicNation[geoEuroStat]                    
                timeSerie = geoTime[1:]

                if indicator == indicatorEurostat and dicNace.has_key(nace) and unit == unitEurostat:
                    dicNaceCheck[nace] = nace
                    vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                    dicStartValue = spiLib.defDicStartValue(timeSerie,country,nace,indicator,dicStartValue, endYear)
                    dicIndicator = spiLib.defDicIndicator(country,nace,vector,dicIndicator)
                    if int(endYear) > maxEndYear :
                        maxEndYear = int(endYear)
                    if int(startYear) < minStartYear :
                        minStartYear = int(startYear)
            except:
                dicNoCountry[geoEuroStat] = geoEuroStat   
        fichierTXT.close()
    fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\
    ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+' valeur '+str(dicStartValue['startValeur'])+'\n')
    spiLib.defnoCountry(dicNoCountry,fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck,dicNace,fileLog)
    dicIndicator = spiLib.reverseAndNormalizeDic(dicIndicator, minStartYear, maxEndYear)
    
    if nomenclature == 'nace2' :
        dicSbs = {}
        fichierTXT     = open(sbsFile,'r')
        rec1er         = fichierTXT.readline()
        lstrec         = rec1er.split(',')
        dicEurostat    = spiLib.defDicEurostat(lstrec)
        iNace          = dicEurostat['nace']
        iIndic         = dicEurostat['indic']
        iGeoTime       = dicEurostat['geotime']
        geotime        = lstrec[iGeoTime].split('\t')
        endYear        = geotime[1].strip()
        startYear      = geotime[-1].strip()
        for ligneTXT in fichierTXT :
            ligne         = ligneTXT.strip('\n').split(',')
            nace          = ligne[iNace].strip()
            indicator     = ligne[iIndic].strip()
            geoTime       = ligne[iGeoTime].split('\t')            
            geoEuroStat   = geoTime[0].strip()
            
            try:
                country = dicNation[geoEuroStat]                    
                timeSerie = geoTime[1:]
                if indicator == 'V12150' and nace in ('N80', 'N81', 'N82') :
                    vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                    dicSbs = spiLib.defDicIndicator(country,nace,vector,dicSbs)
            except:
                continue
        fichierTXT.close()
        dicSbs = spiLib.reverseAndNormalizeDic(dicSbs, minStartYear, maxEndYear)
        dicSbs = spiLibTotal.createSkillTechNace2SbsTotal(dicSbs)
        dicRatio = spiLibTotal.createSkillTechNace2Ratio(dicSbs)
        del dicSbs
        
        dicIndicator = spiLibTotal.addSkillTechNace2RemainingCodes(dicIndicator, dicRatio)
                
    spiLibCreateTable.createTableSkillTech(nomenclature,dicIndicator,minStartYear,fileLog,G_tableName)
def traitementFichierTXT(indicatorEurostatDenominator, indicatorEurostatNumerator, unitDenominator, unitNumerator, nomenclature, filesDenominator, filesNumerator, baseYear, indicatorSpi, fileLog, tableName):
    dicIndicatorNumerator       = {}
    dicIndicatorDenominator     = {}
    dicNoCountry                = {}    
    dicNation                   = {}
    dicNaceCheck                = {}
    dicNation                   = DBAccess.lectureNationEurostat(dicNation)
    dicNace                     = spiLib.defSelectdicNace(nomenclature, 'nama')
    minimumYearWithActualData   = 999999
    maxEndYear                  = -1
    
    filesNumerator.sort()
    filesNumerator.reverse()    
    filesDenominator.sort()
    filesDenominator.reverse()
    
    for txt in filesNumerator :
        file           = open(txt, 'r')
        line1st        = file.readline()
        list1st        = line1st.split(',')
        dicEurostat    = spiLib.defDicEurostat(list1st)
        iUnit          = dicEurostat['unit']
        iIndic         = dicEurostat['indic']
        iNace          = dicEurostat['nace']
        iGeoTime       = dicEurostat['geotime']
        geotime        = list1st[iGeoTime].split('\t')
        
        endYear        = geotime[1].strip()
        startYear      = geotime[-1].strip()
        
        for line in file :
            lineList      = line.strip('\n').split(',')
            nace          = lineList[iNace].strip()
            indicator     = lineList[iIndic].strip()
            unit          = lineList[iUnit].strip()
            geoTime       = lineList[iGeoTime].split('\t')            
            geo           = geoTime[0].strip()
            
            try :
                dicNaceCheck[nace] = nace
                country = dicNation[geo]                    
                timeSerie = geoTime[1:]
                
                if indicator == indicatorEurostatNumerator and dicNace.has_key(nace) and unit == unitNumerator:
                    vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                    minimumYearWithActualData = spiLib.findMinimumYearWithActualData(timeSerie,int(startYear),minimumYearWithActualData)
                    dicIndicatorNumerator = spiLib.defDicIndicator(country,nace,vector,dicIndicatorNumerator)
                    if int(endYear) > maxEndYear :
                        maxEndYear = int(endYear)
            except :
                dicNoCountry[geo] = geo
                
        file.close()
    
    dicIndicatorNumerator = spiLib.reverseAndNormalizeDic(dicIndicatorNumerator, minimumYearWithActualData, maxEndYear)
    dicIndicatorNumerator = spiLibTotal.calcNaceAggregates(dicIndicatorNumerator, nomenclature, 'nama')
    
    for txt in filesDenominator :
        file           = open(txt, 'r')
        line1st        = file.readline()
        list1st        = line1st.split(',')
        dicEurostat    = spiLib.defDicEurostat(list1st)
        iUnit          = dicEurostat['unit']
        iIndic         = dicEurostat['indic']
        iNace          = dicEurostat['nace']
        iGeoTime       = dicEurostat['geotime']
        iSector        = dicEurostat['sector'] 
        geotime        = list1st[iGeoTime].split('\t')
        
        endYear        = geotime[1].strip()
        startYear      = geotime[-1].strip()
        
        for line in file :
            lineList      = line.strip('\n').split(',')
            nace          = lineList[iNace].strip()
            indicator     = lineList[iIndic].strip()
            unit          = lineList[iUnit].strip()
            geoTime       = lineList[iGeoTime].split('\t')  
            if iSector == -1 : 
                sector = 'S1'
            else :
                sector = lineList[iSector].strip()
            geo           = geoTime[0].strip()
            
            try :
                dicNaceCheck[nace] = nace
                country = dicNation[geo]                    
                timeSerie = geoTime[1:]
                
                if indicator == indicatorEurostatDenominator and dicNace.has_key(nace) and unit == unitDenominator and sector == 'S1' :
                    vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                    dicIndicatorDenominator = spiLib.defDicIndicator(country,nace,vector,dicIndicatorDenominator)
            except :
              dicNoCountry[geo] = geo  
        
        file.close()
        
    dicIndicatorDenominator = spiLib.reverseAndNormalizeDic(dicIndicatorDenominator, minimumYearWithActualData, maxEndYear)
    dicIndicatorDenominator = spiLibTotal.calcNaceAggregates(dicIndicatorDenominator, nomenclature, 'nama')
    
    #The following lines are added on demand of B2 team to calculate aggregates that cannot be extracted from the nama_nace10_e file 
    if nomenclature == 'nace2' and indicatorEurostatDenominator == 'EMP':
        dicIndicatorDenominator = spiLibTotal.calcNace2EmpAggr(dicIndicatorDenominator)
    
    ##############################################################################################################################
    
    spiLib.defnoCountry(dicNoCountry,fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck,dicNace,fileLog)   
    
    spiLibCreateTable.createTableDomesticIndex(nomenclature,dicIndicatorNumerator, dicIndicatorDenominator, indicatorSpi, minimumYearWithActualData, baseYear,fileLog, tableName)     
Exemple #6
0
def traitementFichierTXT(unitEurostat, flowEurostat, partnerEurostat,
                         gdpIndicEurostat, nomFichierTXT, nomFichierTXTGdp,
                         unitGdp):
    dicIndicator = {}
    dicGdp = {}
    dicNoCountry = {}
    dicNation = {}
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    minimumYearWithActualData = 999999

    fichierTXT = open(nomFichierTXT, 'r')
    rec1er = fichierTXT.readline()
    lstrec = rec1er.split(',')
    dicEurostat = spiLib.defDicEurostat(lstrec)
    iUnit = dicEurostat['unit']
    iFlow = dicEurostat['flow']
    iPartner = dicEurostat['partner']
    iGeoTime = dicEurostat['geotime']
    geotime = lstrec[iGeoTime].split('\t')
    endYear = geotime[1].strip()
    startYear = geotime[-1].strip()

    for ligneTXT in fichierTXT:
        ligne = ligneTXT.strip('\n').split(',')
        flow = ligne[iFlow].strip()
        unit = ligne[iUnit].strip()
        partner = ligne[iPartner].strip()
        geoTime = ligne[iGeoTime].split('\t')
        geoEuroStat = geoTime[0].strip()

        try:
            country = dicNation[geoEuroStat]
            timeSerie = geoTime[1:]
            if flow in flowEurostat and partner == partnerEurostat and unit == unitEurostat:
                vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                minimumYearWithActualData = spiLib.findMinimumYearWithActualData(
                    timeSerie, int(startYear), minimumYearWithActualData)
                dicIndicator = spiLib.defDicFlow(country, flow, vector,
                                                 dicIndicator)
        except:
            dicNoCountry[geoEuroStat] = geoEuroStat

    finalEndYear = endYear

    fichierTXT.close()

    fichierTXT = open(nomFichierTXTGdp, 'r')
    rec1er = fichierTXT.readline()
    lstrec = rec1er.split(',')
    dicEurostat = spiLib.defDicEurostat(lstrec)
    iUnit = dicEurostat['unit']
    iIndic = dicEurostat['indic']
    iGeoTime = dicEurostat['geotime']
    geotime = lstrec[iGeoTime].split('\t')
    endYear = geotime[1].strip()
    startYear = geotime[-1].strip()

    for ligneTXT in fichierTXT:
        ligne = ligneTXT.strip('\n').split(',')
        unit = ligne[iUnit].strip()
        indic = ligne[iIndic].strip()
        geoTime = ligne[iGeoTime].split('\t')
        geoEuroStat = geoTime[0].strip()

        try:
            country = dicNation[geoEuroStat]
            timeSerie = geoTime[1:]
            if indic == gdpIndicEurostat and unit == unitGdp:
                vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                dicGdp[country] = vector
        except:
            dicNoCountry[geoEuroStat] = geoEuroStat

    fichierTXT.close()

    spiLib.defnoCountry(dicNoCountry, fileLog)

    dicIndicator = spiLib.reverseAndNormalizeDic(dicIndicator,
                                                 minimumYearWithActualData,
                                                 int(finalEndYear))
    dicGdp = spiLib.reverseAndNormalizeDic(dicGdp, minimumYearWithActualData,
                                           int(finalEndYear))

    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLibCreateTable.createTableCountryLevelBtechBtechgdp(
        dicIndicator, dicGdp, minimumYearWithActualData, fileLog, G_tableName)
def traitementFichierTXT(nomenclature, indicNumerator, indicDenominator,
                         filePaths):
    dicNumerator = {}
    dicDenominator = {}

    dicNace = spiLib.defSelectdicNace(nomenclature, 'sbs')
    dicNation = DBAccess.lectureNationEurostat({})

    maxEndYear = -1
    minStartYear = 9999

    for filePath in filePaths:
        with open(filePath, 'r') as file:
            csvFile = csv.reader(file, delimiter='\t')
            firstLine = csvFile.next()
            metaLabel = firstLine[0].split(',')
            dicEurostat = spiLib.defDicEurostat(metaLabel)
            iNace = dicEurostat['nace']
            iIndic = dicEurostat['indic']
            iGeoTime = dicEurostat['geotime']
            iSize = dicEurostat['size']

            endYear = firstLine[1].strip()
            startYear = firstLine[-1].strip()

            if int(endYear) > maxEndYear:
                maxEndYear = int(endYear)
            if int(startYear) < minStartYear:
                minStartYear = int(startYear)

            for line in csvFile:
                meta = line[0].split(',')
                code = meta[iNace]
                country = meta[iGeoTime]
                indic = meta[iIndic]
                if nomenclature == 'nace2':
                    size = 'TOTAL'
                else:
                    size = meta[iSize]

                if code in dicNace and country in dicNation and size == 'TOTAL':
                    if indic == indicNumerator or indic == indicDenominator:
                        vector = []
                        vector.append(endYear)
                        vector.extend([
                            re.sub('[" ",a-z]', '', element)
                            for element in line[1:]
                        ])
                        vector.append(startYear)
                        if indic == indicNumerator:
                            dicNumerator = spiLib.defDicIndicator(
                                country, code, vector, dicNumerator)
                        else:
                            dicDenominator = spiLib.defDicIndicator(
                                country, code, vector, dicDenominator)

    dicNumerator = spiLib.reverseAndNormalizeDic(dicNumerator, minStartYear,
                                                 maxEndYear)
    dicDenominator = spiLib.reverseAndNormalizeDic(dicDenominator,
                                                   minStartYear, maxEndYear)

    dicNumerator = spiLibTotal.calcNaceAggregates(dicNumerator, nomenclature,
                                                  'sbs')
    dicDenominator = spiLibTotal.calcNaceAggregates(dicDenominator,
                                                    nomenclature, 'sbs')

    spiLibCreateTable.createTableOverOtherShare(dicNumerator, dicDenominator,
                                                minStartYear, 'surpl',
                                                nomenclature, 'competition')
Exemple #8
0
def traitementFichierTXT(indicatorEurostat, unitEurostat, nomenclature, cpa,
                         files, fileLog, tableName):
    infoX = DBAccess.lectureCpaNaceIndicatorData('x', cpa, 'external')
    infoM = DBAccess.lectureCpaNaceIndicatorData('m', cpa, 'external')
    refDicX = infoX[0]
    startYearX = infoX[1]
    refDicM = infoM[0]
    startYearM = infoM[1]
    refDicGO = {}
    dicNoCountry = {}
    dicNation = {}
    dicNaceCheck = {}
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicNace = spiLib.defSelectdicNace(nomenclature, 'nama')
    minimumYearWithActualData = 999999
    maxEndYear = -1

    files.sort()
    files.reverse()

    for txt in files:
        file = open(txt, 'r')
        line1st = file.readline()
        list1st = line1st.split(',')
        dicEurostat = spiLib.defDicEurostat(list1st)
        iUnit = dicEurostat['unit']
        iIndic = dicEurostat['indic']
        iNace = dicEurostat['nace']
        iGeoTime = dicEurostat['geotime']
        geotime = list1st[iGeoTime].split('\t')

        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()

        for line in file:
            lineList = line.strip('\n').split(',')
            nace = lineList[iNace].strip()
            indicator = lineList[iIndic].strip()
            unit = lineList[iUnit].strip()
            geoTime = lineList[iGeoTime].split('\t')
            geo = geoTime[0].strip()

            try:
                dicNaceCheck[nace] = nace
                country = dicNation[geo]
                timeSerie = geoTime[1:]

                if indicator == indicatorEurostat and dicNace.has_key(
                        nace) and unit == unitEurostat:
                    vector = spiLib.defVectorYears(timeSerie, startYear,
                                                   endYear)
                    minimumYearWithActualData = spiLib.findMinimumYearWithActualData(
                        timeSerie, int(startYear), minimumYearWithActualData)
                    refDicGO = spiLib.defDicIndicator(country, nace, vector,
                                                      refDicGO)
                    if int(endYear) > maxEndYear:
                        maxEndYear = int(endYear)
            except:
                dicNoCountry[geo] = geo

    refDicGO = spiLib.reverseAndNormalizeDic(refDicGO, startYearX, maxEndYear)
    refDicGO = spiLibTotal.calcNaceAggregates(refDicGO, nomenclature, 'nama')

    refDicX = spiLib.normalizeDicSize(refDicX, startYearX, startYearX,
                                      maxEndYear)
    refDicM = spiLib.normalizeDicSize(refDicM, startYearM, startYearX,
                                      maxEndYear)

    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)

    spiLibCreateTable.createTableCompetitionImportpen(nomenclature, refDicGO,
                                                      refDicX, refDicM,
                                                      startYearX, fileLog,
                                                      tableName)