예제 #1
0
def traitementFichierTXT(indicatorSpi, unitEurostat, ageEurostat, sexEurostat,
                         nomFichierTXT):
    dicIndicator = {}
    dicNoCountry = {}
    dicNation = {}
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    minimumYearWithActualData = 999999

    ## added 20190326
    iscedTOindicator = {
        'ED0-2': 'edusl',
        'ED3_4': 'edusup',
        'ED5-8': 'edut',
        'ED3-8': 'edust'
    }

    fichierTXT = open(nomFichierTXT, 'r')
    rec1er = fichierTXT.readline()
    lstrec = rec1er.split(',')
    dicEurostat = spiLib.defDicEurostat(lstrec)
    iUnit = dicEurostat['unit']
    iAge = dicEurostat['age']
    iSex = dicEurostat['sex']
    iIsced = dicEurostat['indic']  ## added 20190326
    iGeoTime = dicEurostat['geotime']
    geotime = lstrec[iGeoTime].split('\t')
    endYear = geotime[1].strip()
    startYear = geotime[-1].strip()

    for ligneTXT in fichierTXT:
        ligne = ligneTXT.strip('\n').split(',')
        age = ligne[iAge].strip()
        unit = ligne[iUnit].strip()
        sex = ligne[iSex].strip()
        indicator = iscedTOindicator[ligne[iIsced].strip()]  ## added 20190326
        geoTime = ligne[iGeoTime].split('\t')
        geoEuroStat = geoTime[0].strip()
        try:
            country = dicNation[geoEuroStat]
            timeSerie = geoTime[1:]
            if sex == sexEurostat and age == ageEurostat and unit == unitEurostat and indicator == indicatorSpi:  # added indicator check to if statement 20190326
                vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                minimumYearWithActualData = spiLib.findMinimumYearWithActualData(
                    timeSerie, int(startYear), minimumYearWithActualData)
                dicIndicator[country] = vector
        except:
            dicNoCountry[geoEuroStat] = geoEuroStat

    spiLib.defnoCountry(dicNoCountry, fileLog)
    dicIndicator = spiLib.reverseAndNormalizeDicNoIndicator(
        dicIndicator, minimumYearWithActualData, int(endYear))
    spiLibCreateTable.createTableCountryLevelEduTech(
        dicIndicator, G_spiIndicator, minimumYearWithActualData, fileLog,
        G_tableName)
def traitementFichierTXT(indicatorSpi, unitEurostat, flowEurostat, partnerEurostat, nomFichierTXT):
    dicIndicator                = {}
    dicNoCountry                = {}    
    dicNation                   = {}
    dicNation                   = DBAccess.lectureNationEurostat(dicNation)
    minimumYearWithActualData   = 999999
    
    fichierTXT     = open(nomFichierTXT,'r')
    rec1er         = fichierTXT.readline()
    lstrec         = rec1er.split(',')
    dicEurostat    = spiLib.defDicEurostat(lstrec)
    iUnit          = dicEurostat['unit']
    iFlow          = dicEurostat['flow']
    iPartner       = dicEurostat['partner']
    iGeoTime       = dicEurostat['geotime']
    geotime        = lstrec[iGeoTime].split('\t')
    endYear        = geotime[1].strip()
    startYear      = geotime[-1].strip()
    
    for ligneTXT in fichierTXT: 
        ligne         = ligneTXT.strip('\n').split(',')
        flow          = ligne[iFlow].strip()
        unit          = ligne[iUnit].strip()
        partner       = ligne[iPartner].strip()
        geoTime       = ligne[iGeoTime].split('\t')            
        geoEuroStat   = geoTime[0].strip()
        
        try:
            country = dicNation[geoEuroStat]                    
            timeSerie = geoTime[1:]
            if flow == flowEurostat and partner == partnerEurostat and unit == unitEurostat:
                vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                minimumYearWithActualData = spiLib.findMinimumYearWithActualData(timeSerie,int(startYear),minimumYearWithActualData)
                dicIndicator[country] = vector  
        except:
            dicNoCountry[geoEuroStat] = geoEuroStat
            
    spiLib.defnoCountry(dicNoCountry,fileLog)
    dicIndicator = spiLib.reverseAndNormalizeDicNoIndicator(dicIndicator, minimumYearWithActualData, int(endYear))
    spiLibCreateTable.createTableCountryLevelEduTech(dicIndicator,G_spiIndicator,minimumYearWithActualData,fileLog,G_tableName)
예제 #3
0
def traitementFichierTXT(indicatorInputEurostat, unitEurostat, nomFichierTXT):
    startIndice                 = 0
    dicIndicators               = {}
    dicNoCountry                = {}    
    dicNation                   = {}
    indicatorEurostat           = indicatorInputEurostat
    dicNation                   = DBAccess.lectureNationEurostat(dicNation)
    #dicStartValue       = dict(startIndice=1,startCountry='',startNace='',startIndicator='',startValeur=0)
    minimumYearWithActualData   = 999999
    
    fichierTXT     = open(nomFichierTXT,'r')
    rec1er         = fichierTXT.readline() #1er rec avec les meta
    lstrec         = rec1er.split(',')
    dicEurostat    = spiLib.defDicEurostat(lstrec)
    iUnit          = dicEurostat['unit']
    iIndic         = dicEurostat['indic']
    iGeoTime       = dicEurostat['geotime']
    geotime        = lstrec[iGeoTime].split('\t')
    endYear        = geotime[1].strip()
    startYear      = geotime[-1].strip()
        
    for ligneTXT in fichierTXT: 
        ligne         = ligneTXT.strip('\n').split(',') #RAPPEL strip enleve des extremites
        indicator     = ligne[iIndic].strip()
        unit          = ligne[iUnit].strip()
        geoTime       = ligne[iGeoTime].split('\t')            
        geoEuroStat   = geoTime[0].strip()
        try:
            country = dicNation[geoEuroStat]                    
            timeSerie = geoTime[1:]
            if indicator in indicatorEurostat and unit == unitEurostat:
                vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                minimumYearWithActualData = spiLib.findMinimumYearWithActualData(timeSerie,int(startYear),minimumYearWithActualData)
                dicIndicators = spiLib.defDicIndicators(country,indicator,vector,dicIndicators)    
        except:
            dicNoCountry[geoEuroStat] = geoEuroStat
    spiLib.defnoCountry(dicNoCountry,fileLog)
    dicIndicators = spiLib.reverseAndNormalizeDicIndicators(dicIndicators, minimumYearWithActualData, int(endYear))
    spiLibCreateTable.createTableCountryLevelOpen(dicIndicators,minimumYearWithActualData,fileLog,G_tableName)
예제 #4
0
def traitementFichierTXT(indicatorSpi, categoryEurostat, ageEurostat,
                         iscoEurostat, unitEurostat, nomFichierTXT,
                         nomFichierTXT2, naceEurostat, itemEurostat,
                         unitEurostat2):
    dicIndicator = {}
    dicIndicator2 = {}
    dicNoCountry = {}
    dicNation = {}
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    minimumYearWithActualData = 999999

    fichierTXT = open(nomFichierTXT, 'r')
    rec1er = fichierTXT.readline()
    lstrec = rec1er.split(',')
    dicEurostat = spiLib.defDicEurostat(lstrec)
    iCategory = dicEurostat['category']
    iUnit = dicEurostat['unit']
    iIsco = dicEurostat['isco']
    iAge = dicEurostat['age']
    iGeoTime = dicEurostat['geotime']
    geotime = lstrec[iGeoTime].split('\t')
    endYear = geotime[1].strip()
    startYear = geotime[-1].strip()

    for ligneTXT in fichierTXT:
        ligne = ligneTXT.strip('\n').split(',')
        category = ligne[iCategory].strip()
        isco = ligne[iIsco].strip()
        age = ligne[iAge].strip()
        unit = ligne[iUnit].strip()
        geoTime = ligne[iGeoTime].split('\t')
        geoEuroStat = geoTime[0].strip()

        try:
            country = dicNation[geoEuroStat]
            timeSerie = geoTime[1:]
            if unit == unitEurostat and age == ageEurostat and isco == iscoEurostat and category == categoryEurostat:
                vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                minimumYearWithActualData = spiLib.findMinimumYearWithActualData(
                    timeSerie, int(startYear), minimumYearWithActualData)
                dicIndicator[country] = vector
        except:
            dicNoCountry[geoEuroStat] = geoEuroStat

    fichierTXT.close()
    spiLib.defnoCountry(dicNoCountry, fileLog)

    fichierTXT = open(nomFichierTXT2, 'r')
    rec1er = fichierTXT.readline()
    lstrec = rec1er.split(',')
    dicEurostat = spiLib.defDicEurostat(lstrec)
    iNace = dicEurostat['nace']
    iUnit = dicEurostat['unit']
    iIndic = dicEurostat['indic']
    iGeoTime = dicEurostat['geotime']
    geotime = lstrec[iGeoTime].split('\t')
    endYear2 = geotime[1].strip()
    startYear2 = geotime[-1].strip()

    for ligneTXT in fichierTXT:
        ligne = ligneTXT.strip('\n').split(',')
        nace = ligne[iNace].strip()
        unit = ligne[iUnit].strip()
        indic = ligne[iIndic].strip()
        geoTime = ligne[iGeoTime].split('\t')
        geoEuroStat = geoTime[0].strip()

        try:
            country = dicNation[geoEuroStat]
            timeSerie = geoTime[1:]
            if unit == unitEurostat2 and indic == itemEurostat and nace == naceEurostat:
                vector = spiLib.defVectorYears(timeSerie, startYear2, endYear2)
                dicIndicator2[country] = vector
        except:
            dicNoCountry[geoEuroStat] = geoEuroStat

    fichierTXT.close()

    dicIndicator = spiLib.reverseAndNormalizeDic(dicIndicator,
                                                 minimumYearWithActualData,
                                                 int(endYear))
    dicIndicator2 = spiLib.reverseAndNormalizeDic(dicIndicator2,
                                                  minimumYearWithActualData,
                                                  int(endYear))

    spiLibCreateTable.createTableCountryLevelFdi(dicIndicator, dicIndicator2,
                                                 G_spiIndicator,
                                                 minimumYearWithActualData,
                                                 fileLog, G_tableName)
def traitementFichierTXT(tableName, spiIndicator, indicEurostat, postEurostat, unitEurostat, partnerEurostat, gdpIndicEurostat, nomFichierTXT, nomFichierTXTGdp, nomFichierTXT2, partnerEurostat2, itemEurostat, gdpUnit):
    dicIndicator                = {}
    dicIndicator2               = {}
    dicGdp                      = {}
    dicNoCountry                = {}    
    dicNation                   = {}
    dicNation                   = DBAccess.lectureNationEurostat(dicNation)
    minimumYearWithActualData   = 999999
    
    fichierTXT     = open(nomFichierTXT,'r')
    rec1er         = fichierTXT.readline()
    lstrec         = rec1er.split(',')
    dicEurostat    = spiLib.defDicEurostat(lstrec)
    iIndic         = dicEurostat['indic']
    iPostTime      = dicEurostat['post']
    iPartner       = dicEurostat['partner']
    iGeo           = dicEurostat['geotime']
    
    posttime       = lstrec[iPostTime].split('\t')
    endYear        = int(posttime[1].strip())
    startYear      = int(posttime[-1].strip())
    
    for ligneTXT in fichierTXT: 
        ligne         = ligneTXT.strip('\n').split(',')
        indic         = ligne[iIndic].strip()
        geo           = ligne[iGeo].strip()
        partner       = ligne[iPartner].strip()
        postTime      = ligne[iPostTime].split('\t')
        post          = postTime[0].strip()
        
        try:
            country = dicNation[geo]                    
            timeSerie = postTime[1:]
            if partner == partnerEurostat and post == postEurostat and indic == indicEurostat :
                vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                minimumYearWithActualData = spiLib.findMinimumYearWithActualData(timeSerie,int(startYear),minimumYearWithActualData)
                dicIndicator[country] = vector 
        except:
            dicNoCountry[geo] = geo
    
    fichierTXT.close()        
    dicIndicator   = spiLib.reverseAndNormalizeDic(dicIndicator, minimumYearWithActualData, endYear)
    fileEndYear   = endYear
           
    fichierTXT     = open(nomFichierTXT2,'r')
    rec1er         = fichierTXT.readline()
    lstrec         = rec1er.split(',')
    dicEurostat    = spiLib.defDicEurostat(lstrec)
    iIndic         = dicEurostat['indic']
    iPartnerTime   = dicEurostat['partner']
    iGeo           = dicEurostat['geotime']
    iItem          = dicEurostat['item']
           
    geotime         = lstrec[iGeo].split('\t')
    endYear2        = int(geotime[1].strip())
    startYear2      = int(geotime[-1].strip())
    
    for ligneTXT in fichierTXT: 
        ligne         = ligneTXT.strip('\n').split(',')
        indic         = ligne[iIndic].strip()
        item          = ligne[iItem].strip()
        partner       = ligne[iPartnerTime].strip()
        geotime       = ligne[iGeo].split('\t')
        geo           = geotime[0].strip()

        try:
            country = dicNation[geo]                    
            timeSerie = geotime[1:]
            if partner == partnerEurostat2 and indic == indicEurostat  and item == itemEurostat:
                vector = spiLib.defVectorYears(timeSerie, startYear2, endYear2)
                dicIndicator2[country] = vector   
        except:
            dicNoCountry[geo] = geo           
    
    fichierTXT.close()      
    dicIndicator2   = spiLib.reverseAndNormalizeDic(dicIndicator2, startYear2, endYear2)
    finalEndYear   = endYear2    
            
    for country in dicIndicator :
        if country not in dicIndicator2 :
            dicIndicator2[country] = []
            for i in range(startYear2-1, endYear2) :
                dicIndicator2[country].append(':')
        
    for country in dicIndicator2 : 
        if country not in dicIndicator :
            dicIndicator[country] = []
            for i in range(minimumYearWithActualData-1, endYear) :
                dicIndicator[country].append(':')
    
    yearGap = startYear2 - endYear - 1
        
    if yearGap == 0 :
        for country in dicIndicator :
            dicIndicator[country].extend(dicIndicator2[country])
    elif yearGap > 0 :
        fillingVector = []
        for i in range(0, yearGap) :
            fillingVector.append(':')
        for country in dicIndicator :
            dicIndicator[country].extend(fillingVector)
            dicIndicator[country].extend(dicIndicator2[country])
    elif yearGap < 0 :
        for country in dicIndicator :
            del dicIndicator[country][yearGap:]
        for country in dicIndicator :
            dicIndicator[country].extend(dicIndicator2[country])        
    
    fichierTXT     = open(nomFichierTXTGdp,'r')
    rec1er         = fichierTXT.readline()
    lstrec         = rec1er.split(',')
    dicEurostat    = spiLib.defDicEurostat(lstrec)
    iIndic         = dicEurostat['indic']
    iGeoTime       = dicEurostat['geotime']
    iUnit          = dicEurostat['unit']
    
    geotime        = lstrec[iGeoTime].split('\t')
    endYear        = int(geotime[1].strip())
    startYear      = int(geotime[-1].strip())
    
    for ligneTXT in fichierTXT :
        ligne       = ligneTXT.strip('\n').split(',')
        indic       = ligne[iIndic].strip()
        unit        = ligne[iUnit].strip()
        geoTime     = ligne[iGeoTime].split('\t')            
        geoEurostat = geoTime[0].strip()
        
        try:
            country = dicNation[geoEurostat]                    
            timeSerie = geoTime[1:]
            if indic == gdpIndicEurostat and unit == gdpUnit:
                vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                dicGdp[country] = vector
        except:
            dicNoCountry[geoEurostat] = geoEurostat        
        
    fichierTXT.close()
    dicGdp = spiLib.reverseAndNormalizeDic(dicGdp, minimumYearWithActualData, finalEndYear)
    
    spiLib.defnoCountry(dicNoCountry,fileLog)
    spiLibCreateTable.createTableCountryLevelFdi(dicIndicator,dicGdp,spiIndicator,minimumYearWithActualData,fileLog,tableName)
예제 #6
0
def traitementFichierTXT(indicatorEurostatDenominator, unitDenominator,
                         unitNumerator, nomenclature, filesDenominator,
                         fileNumerator, indicatorSpi, fileLog, tableName):
    dicIndicatorNumerator = {}
    dicIndicatorDenominator = {}
    dicNoCountry = {}
    dicNation = {}
    dicNaceCheck = {}
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicNace = spiLib.defSelectdicNace(nomenclature, 'nama')
    minimumYearWithActualData = 999999
    maxEndYear = -1

    filesDenominator.sort()
    filesDenominator.reverse()

    file = open(fileNumerator, 'r')
    line1st = file.readline()
    list1st = line1st.split(',')
    dicEurostat = spiLib.defDicEurostat(list1st)
    iUnit = dicEurostat['unit']
    iNace = dicEurostat['nace']
    iGeoTime = dicEurostat['geotime']
    geotime = list1st[iGeoTime].split('\t')

    endYear = geotime[1].strip()
    startYear = geotime[-1].strip()

    #This code is added to create special aggregates asked on 30-09-2016
    if (indicatorSpi == 'patintens'
            or indicatorSpi == 'patintrd') and nomenclature == 'nace2':
        dicNace['C10'] = 'C'
        dicNace['C11'] = 'C'
        dicNace['C12'] = 'C'
        dicNace['C13'] = 'C'
        dicNace['C14'] = 'C'
        dicNace['C15'] = 'C'
        dicNace['C31'] = 'C'
        dicNace['C32'] = 'C'
    #________________________________________________________________________

    for line in file:
        lineList = line.strip('\n').split(',')
        nace = lineList[iNace].strip()
        unit = lineList[iUnit].strip()
        geoTime = lineList[iGeoTime].split('\t')
        geo = geoTime[0].strip()

        try:
            dicNaceCheck[nace] = nace
            country = dicNation[geo]
            timeSerie = geoTime[1:]

            if dicNace.has_key(nace) and unit == unitNumerator:
                vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                minimumYearWithActualData = spiLib.findMinimumYearWithActualData(
                    timeSerie, int(startYear), minimumYearWithActualData)
                dicIndicatorNumerator = spiLib.defDicIndicator(
                    country, nace, vector, dicIndicatorNumerator)
                if int(endYear) > maxEndYear:
                    maxEndYear = int(endYear)
        except:
            dicNoCountry[geo] = geo

    file.close()

    dicIndicatorNumerator = spiLib.reverseAndNormalizeDic(
        dicIndicatorNumerator, minimumYearWithActualData, maxEndYear)

    #This code is added to create special aggregates asked on 30-09-2016
    if (indicatorSpi == 'patintens'
            or indicatorSpi == 'patintrd') and nomenclature == 'nace2':
        for country in dicIndicatorNumerator:
            try:
                C10 = dicIndicatorNumerator[country]['C10']
                C11 = dicIndicatorNumerator[country]['C11']
                C12 = dicIndicatorNumerator[country]['C12']
            except:
                C10 = []
                C11 = []
                C12 = []

            try:
                C13 = dicIndicatorNumerator[country]['C13']
                C14 = dicIndicatorNumerator[country]['C14']
                C15 = dicIndicatorNumerator[country]['C15']
            except:
                C13 = []
                C14 = []
                C15 = []

            try:
                C31 = dicIndicatorNumerator[country]['C31']
                C32 = dicIndicatorNumerator[country]['C32']
            except:
                C31 = []
                C32 = []

            res = []

            for i in range(0, len(C10)):
                if C10[i] == ':' or C11[i] == ':' or C12[i] == ':':
                    res.append(':')
                else:
                    res.append(
                        str(float(C10[i]) + float(C11[i]) + float(C12[i])))

            if len(res) > 0:
                dicIndicatorNumerator[country]['C10-C12'] = res

            res = []

            for i in range(0, len(C13)):
                if C13[i] == ':' or C14[i] == ':' or C15[i] == ':':
                    res.append(':')
                else:
                    res.append(
                        str(float(C13[i]) + float(C14[i]) + float(C15[i])))

            if len(res) > 0:
                dicIndicatorNumerator[country]['C13-C15'] = res

            res = []

            for i in range(0, len(C31)):
                if C31[i] == ':' or C32[i] == ':':
                    res.append(':')
                else:
                    res.append(str(float(C31[i]) + float(C32[i])))

            if len(res) > 0:
                dicIndicatorNumerator[country]['C31_C32'] = res
    #__________________________________________________________________________

    dicIndicatorNumerator = spiLibTotal.calcNaceAggregates(
        dicIndicatorNumerator, nomenclature, 'nama')

    for txt in filesDenominator:
        file = open(txt, 'r')
        line1st = file.readline()
        list1st = line1st.split(',')
        dicEurostat = spiLib.defDicEurostat(list1st)
        iUnit = dicEurostat['unit']
        try:
            iIndic = dicEurostat['indic']
        except:
            pass
        iNace = dicEurostat['nace']
        iGeoTime = dicEurostat['geotime']
        geotime = list1st[iGeoTime].split('\t')

        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        for line in file:
            lineList = line.strip('\n').split(',')
            nace = lineList[iNace].strip()
            if iIndic != -1:
                indicator = lineList[iIndic].strip()
            else:
                indicator = 'noindicator'
            unit = lineList[iUnit].strip()
            geoTime = lineList[iGeoTime].split('\t')
            geo = geoTime[0].strip()

            try:
                dicNaceCheck[nace] = nace
                country = dicNation[geo]
                timeSerie = geoTime[1:]
                if indicator != 'noindicator':
                    if indicator == indicatorEurostatDenominator and dicNace.has_key(
                            nace) and unit == unitDenominator:
                        vector = spiLib.defVectorYears(timeSerie, startYear,
                                                       endYear)
                        dicIndicatorDenominator = spiLib.defDicIndicator(
                            country, nace, vector, dicIndicatorDenominator)

                else:
                    if dicNace.has_key(nace) and unit == unitDenominator:
                        vector = spiLib.defVectorYears(timeSerie, startYear,
                                                       endYear)
                        dicIndicatorDenominator = spiLib.defDicIndicator(
                            country, nace, vector, dicIndicatorDenominator)
            except:
                dicNoCountry[geo] = geo

        file.close()

    dicIndicatorDenominator = spiLib.reverseAndNormalizeDic(
        dicIndicatorDenominator, minimumYearWithActualData, maxEndYear)
    dicIndicatorDenominator = spiLibTotal.calcNaceAggregates(
        dicIndicatorDenominator, nomenclature, 'nama')

    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)

    spiLibCreateTable.createTableNacePercentage(
        nomenclature, dicIndicatorNumerator, dicIndicatorDenominator,
        indicatorSpi, minimumYearWithActualData, fileLog, tableName)
def traitementFichierTXT(indicatorSpi, indicatorEurostat, nomFichierTXT, nomFichierTXT2, indicatorEurostat2, eurostatUnit):
    dicIndicator                = {}
    dicIndicator2               = {}
    dicNoCountry                = {}    
    dicNation                   = {}
    dicNation                   = DBAccess.lectureNationEurostat(dicNation)
    minimumYearWithActualData   = 999999
    
    fichierTXT     = open(nomFichierTXT,'r')
    rec1er         = fichierTXT.readline()
    lstrec         = rec1er.split(',')
    dicEurostat    = spiLib.defDicEurostat(lstrec)
    iIndic         = dicEurostat['indic']
    iGeoTime       = dicEurostat['geotime']
    geotime        = lstrec[iGeoTime].split('\t')
    endYear        = int(geotime[1].strip())
    startYear      = int(geotime[-1].strip())
    
    for ligneTXT in fichierTXT: 
        ligne         = ligneTXT.strip('\n').split(',')
        indic         = ligne[iIndic].strip()
        geoTime       = ligne[iGeoTime].split('\t')            
        geoEuroStat   = geoTime[0].strip()
        
        try:
            country = dicNation[geoEuroStat]                    
            timeSerie = geoTime[1:]
            if indic == indicatorEurostat :
                vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                minimumYearWithActualData = spiLib.findMinimumYearWithActualData(timeSerie,int(startYear),minimumYearWithActualData)
                dicIndicator[country] = vector  
        except:
            dicNoCountry[geoEuroStat] = geoEuroStat
    
    fichierTXT.close()
    dicIndicator = spiLib.reverseAndNormalizeDicNoIndicator(dicIndicator, minimumYearWithActualData, int(endYear))
    
    fichierTXT     = open(nomFichierTXT2,'r')
    rec1er         = fichierTXT.readline()
    lstrec         = rec1er.split(',')
    dicEurostat    = spiLib.defDicEurostat(lstrec)
    iIndic         = dicEurostat['indic']
    iUnit          = dicEurostat['unit']
    iGeoTime       = dicEurostat['geotime']
    geotime        = lstrec[iGeoTime].split('\t')
    endYear2       = int(geotime[1].strip())
    startYear2     = int(geotime[-1].strip())
    
    for ligneTXT in fichierTXT: 
        ligne         = ligneTXT.strip('\n').split(',')
        indic         = ligne[iIndic].strip()
        unit          = ligne[iUnit].strip()
        geoTime       = ligne[iGeoTime].split('\t')            
        geoEuroStat   = geoTime[0].strip()
        
        try:
            country = dicNation[geoEuroStat]                    
            timeSerie = geoTime[1:]
            if indic == indicatorEurostat2 and unit == eurostatUnit :
                vector = spiLib.defVectorYears(timeSerie, startYear2, endYear2)
                dicIndicator2[country] = vector  
        except:
            dicNoCountry[geoEuroStat] = geoEuroStat
    
    fichierTXT.close()
    dicIndicator2 = spiLib.reverseAndNormalizeDicNoIndicator(dicIndicator2, int(startYear2), int(endYear2))
    
    for country in dicIndicator :
        if country not in dicIndicator2 :
            dicIndicator2[country] = []
            for i in range(startYear2-1, endYear2) :
                dicIndicator2[country].append(':')
        
    for country in dicIndicator2 : 
        if country not in dicIndicator :
            dicIndicator[country] = []
            for i in range(minimumYearWithActualData-1, endYear) :
                dicIndicator[country].append(':')
                
    yearGap = startYear2 - endYear - 1
    
    if yearGap == 0 :
        for country in dicIndicator :
            dicIndicator[country].extend(dicIndicator2[country])
    elif yearGap > 0 :
        fillingVector = []
        for i in range(0, yearGap) :
            fillingVector.append(':')
        for country in dicIndicator :
            dicIndicator[country].extend(fillingVector)
            dicIndicator[country].extend(dicIndicator2[country])
    elif yearGap < 0 :
        for country in dicIndicator :
            del dicIndicator[country][yearGap:]
        for country in dicIndicator :
            dicIndicator[country].extend(dicIndicator2[country])  
        
    spiLib.defnoCountry(dicNoCountry,fileLog)
    spiLibCreateTable.createTableCountryLevelEduTech(dicIndicator,G_spiIndicator,minimumYearWithActualData,fileLog,G_tableName)
예제 #8
0
def traitementFichierTXT(indicatorEurostatDenominator, indicatorEurostatNumerator, unitDenominator, unitNumerator, nomenclature, filesDenominator, filesNumerator, baseYear, indicatorSpi, fileLog, tableName):
    dicIndicatorNumerator       = {}
    dicIndicatorDenominator     = {}
    dicNoCountry                = {}    
    dicNation                   = {}
    dicNaceCheck                = {}
    dicNation                   = DBAccess.lectureNationEurostat(dicNation)
    dicNace                     = spiLib.defSelectdicNace(nomenclature, 'nama')
    minimumYearWithActualData   = 999999
    maxEndYear                  = -1
    
    filesNumerator.sort()
    filesNumerator.reverse()    
    filesDenominator.sort()
    filesDenominator.reverse()
    
    for txt in filesNumerator :
        file           = open(txt, 'r')
        line1st        = file.readline()
        list1st        = line1st.split(',')
        dicEurostat    = spiLib.defDicEurostat(list1st)
        iUnit          = dicEurostat['unit']
        iIndic         = dicEurostat['indic']
        iNace          = dicEurostat['nace']
        iGeoTime       = dicEurostat['geotime']
        geotime        = list1st[iGeoTime].split('\t')
        
        endYear        = geotime[1].strip()
        startYear      = geotime[-1].strip()
        
        for line in file :
            lineList      = line.strip('\n').split(',')
            nace          = lineList[iNace].strip()
            indicator     = lineList[iIndic].strip()
            unit          = lineList[iUnit].strip()
            geoTime       = lineList[iGeoTime].split('\t')            
            geo           = geoTime[0].strip()
            
            try :
                dicNaceCheck[nace] = nace
                country = dicNation[geo]                    
                timeSerie = geoTime[1:]
                
                if indicator == indicatorEurostatNumerator and dicNace.has_key(nace) and unit == unitNumerator:
                    vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                    minimumYearWithActualData = spiLib.findMinimumYearWithActualData(timeSerie,int(startYear),minimumYearWithActualData)
                    dicIndicatorNumerator = spiLib.defDicIndicator(country,nace,vector,dicIndicatorNumerator)
                    if int(endYear) > maxEndYear :
                        maxEndYear = int(endYear)
            except :
                dicNoCountry[geo] = geo
                
        file.close()
    
    dicIndicatorNumerator = spiLib.reverseAndNormalizeDic(dicIndicatorNumerator, minimumYearWithActualData, maxEndYear)
    dicIndicatorNumerator = spiLibTotal.calcNaceAggregates(dicIndicatorNumerator, nomenclature, 'nama')
    
    for txt in filesDenominator :
        file           = open(txt, 'r')
        line1st        = file.readline()
        list1st        = line1st.split(',')
        dicEurostat    = spiLib.defDicEurostat(list1st)
        iUnit          = dicEurostat['unit']
        iIndic         = dicEurostat['indic']
        iNace          = dicEurostat['nace']
        iGeoTime       = dicEurostat['geotime']
        iSector        = dicEurostat['sector'] 
        geotime        = list1st[iGeoTime].split('\t')
        
        endYear        = geotime[1].strip()
        startYear      = geotime[-1].strip()
        
        for line in file :
            lineList      = line.strip('\n').split(',')
            nace          = lineList[iNace].strip()
            indicator     = lineList[iIndic].strip()
            unit          = lineList[iUnit].strip()
            geoTime       = lineList[iGeoTime].split('\t')  
            if iSector == -1 : 
                sector = 'S1'
            else :
                sector = lineList[iSector].strip()
            geo           = geoTime[0].strip()
            
            try :
                dicNaceCheck[nace] = nace
                country = dicNation[geo]                    
                timeSerie = geoTime[1:]
                
                if indicator == indicatorEurostatDenominator and dicNace.has_key(nace) and unit == unitDenominator and sector == 'S1' :
                    vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                    dicIndicatorDenominator = spiLib.defDicIndicator(country,nace,vector,dicIndicatorDenominator)
            except :
              dicNoCountry[geo] = geo  
        
        file.close()
        
    dicIndicatorDenominator = spiLib.reverseAndNormalizeDic(dicIndicatorDenominator, minimumYearWithActualData, maxEndYear)
    dicIndicatorDenominator = spiLibTotal.calcNaceAggregates(dicIndicatorDenominator, nomenclature, 'nama')
    
    #The following lines are added on demand of B2 team to calculate aggregates that cannot be extracted from the nama_nace10_e file 
    if nomenclature == 'nace2' and indicatorEurostatDenominator == 'EMP':
        dicIndicatorDenominator = spiLibTotal.calcNace2EmpAggr(dicIndicatorDenominator)
    
    ##############################################################################################################################
    
    spiLib.defnoCountry(dicNoCountry,fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck,dicNace,fileLog)   
    
    spiLibCreateTable.createTableDomesticIndex(nomenclature,dicIndicatorNumerator, dicIndicatorDenominator, indicatorSpi, minimumYearWithActualData, baseYear,fileLog, tableName)     
예제 #9
0
def traitementFichierTXT(indicatorEurostat, unitEurostat, nomenclature, cpa,
                         files, fileLog, tableName):
    infoX = DBAccess.lectureCpaNaceIndicatorData('x', cpa, 'external')
    infoM = DBAccess.lectureCpaNaceIndicatorData('m', cpa, 'external')
    refDicX = infoX[0]
    startYearX = infoX[1]
    refDicM = infoM[0]
    startYearM = infoM[1]
    refDicGO = {}
    dicNoCountry = {}
    dicNation = {}
    dicNaceCheck = {}
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicNace = spiLib.defSelectdicNace(nomenclature, 'nama')
    minimumYearWithActualData = 999999
    maxEndYear = -1

    files.sort()
    files.reverse()

    for txt in files:
        file = open(txt, 'r')
        line1st = file.readline()
        list1st = line1st.split(',')
        dicEurostat = spiLib.defDicEurostat(list1st)
        iUnit = dicEurostat['unit']
        iIndic = dicEurostat['indic']
        iNace = dicEurostat['nace']
        iGeoTime = dicEurostat['geotime']
        geotime = list1st[iGeoTime].split('\t')

        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()

        for line in file:
            lineList = line.strip('\n').split(',')
            nace = lineList[iNace].strip()
            indicator = lineList[iIndic].strip()
            unit = lineList[iUnit].strip()
            geoTime = lineList[iGeoTime].split('\t')
            geo = geoTime[0].strip()

            try:
                dicNaceCheck[nace] = nace
                country = dicNation[geo]
                timeSerie = geoTime[1:]

                if indicator == indicatorEurostat and dicNace.has_key(
                        nace) and unit == unitEurostat:
                    vector = spiLib.defVectorYears(timeSerie, startYear,
                                                   endYear)
                    minimumYearWithActualData = spiLib.findMinimumYearWithActualData(
                        timeSerie, int(startYear), minimumYearWithActualData)
                    refDicGO = spiLib.defDicIndicator(country, nace, vector,
                                                      refDicGO)
                    if int(endYear) > maxEndYear:
                        maxEndYear = int(endYear)
            except:
                dicNoCountry[geo] = geo

    refDicGO = spiLib.reverseAndNormalizeDic(refDicGO, startYearX, maxEndYear)
    refDicGO = spiLibTotal.calcNaceAggregates(refDicGO, nomenclature, 'nama')

    refDicX = spiLib.normalizeDicSize(refDicX, startYearX, startYearX,
                                      maxEndYear)
    refDicM = spiLib.normalizeDicSize(refDicM, startYearM, startYearX,
                                      maxEndYear)

    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)

    spiLibCreateTable.createTableCompetitionImportpen(nomenclature, refDicGO,
                                                      refDicX, refDicM,
                                                      startYearX, fileLog,
                                                      tableName)