Example #1
0
def processFile(domainName, user, edbName, filePath):
    logger=logging.getLogger('loadRUSedb.processFile')

    dmn=domain(domainName)
    edb=Edb(domainName,user,edbName) 

    if not dmn.edbUserExist(user):
        logger.error("Directory for user '"+user+"' does not exist in edb directory for domain '"+domain+"'")
        sys.exit()

    if not edb.exists():
        edb.create()
        logger.info("EDB '"+edb.name+"' for user '"+user+"' in domain '"+dmn.name+"' created")

    if filePath[-3:]==".gz":
        utilities.gunzip(filePath)
        filePath=filePath[:-3]
    filePath=filePath[:-5] #removing .data from the file name

    #converting from dos-format to linux
    utilities.dos2linux(filePath+".asc")
    utilities.dos2linux(filePath+".data")

    #create grid object
    gridname=path.basename(filePath)
    grid=Egrid(dmn.name,user,edb.name,gridname)

    grid.load(filePath)  #load grid into edb
Example #2
0
def main():
    #-----------Setting up and unsing option parser-----------------------
    parser=OptionParser(usage= usage, version=version)
    logger=logging.getLogger("exportToEMEP.py")
    
    parser.add_option("-l", "--loglevel",
                      action="store",dest="loglevel",default=2,
                      help="Sets the loglevel (0-3 where 3=full logging)")
    
    parser.add_option("-u", "--user",
                      action="store", dest="user", default=None,
                      help="Specify user manually")    

    parser.add_option("-e", "--edb",
                      action="store", dest="edb", default=None,
                      help="Name of target edb")

    parser.add_option("-t", "--template",
                      action="store",dest="cf",default=None,
                      help="Generate default controlfile")

    parser.add_option("-f", "--force",
                      action="store_true",dest="force",default=False,
                      help="To start the process without confirming the domain")

        
    (options, args) = parser.parse_args()

    #------------Setting up logging capabilities -----------
    rootLogger=logger.RootLogger(int(options.loglevel))
    log=rootLogger.getLogger(sys.argv[0])

    
    if options.cf!=None:
        generateCf(path.abspath(options.cf))
        log.info("Wrote default controlfile")
        sys.exit()

    if len(args)!=1:
        parser.error("Incorrect number of arguments")

    domainName=os.environ["AVDBNAME"]
    dmn = domain.Domain(domainName)        
    

    if options.edb ==None:
        parser.error("Need to specify edb using flag -e")
    if options.user ==None:
        parser.error("Need to specify user using flag -u")        

    if not options.force:
        answer=raw_input("Chosen dbase is: "+domainName+", continue(y/n)?")    
        if answer=="y":
            dmn=domain.Domain()            
        else:
            sys.exit("Interrupted by user")
    
    if not dmn.edbExistForUser(options.edb,options.user):
        log.error("Edb "+options.edb+" does not exist for user "+options.user+" in domain "+domainName)    
        sys.exit()

    edb=Edb(dmn.name,options.user,options.edb)
    rsrc=rsrc.Rsrc(edb.rsrcPath())

    #Opening controlfile
    cf=controlfile.ControlFile(fileName=path.abspath(args[0]))

    
    substances=cf.findStringList("substances:")
    outputDir=cf.findExistingPath("outputDir:")
    acIndex=cf.findInt("acIndex:")

    #Get activity code tree
    codes=codetable.CodeTable(edb.rsrcPath(),acIndex=acIndex)
        
    datadb=datadb.Datadb(dmn,options.user,edb.name)
    datadb.read()
    
    substDict=dmn.listSubstanceIndices()

    #List grid names
    gridNames=edb.listGrids()
   
    for ac in codes.ac:
        if ac=="all":
            continue
        pdb.set_trace()
        log.debug("Activity code: "+ac)
        dataMarker = emepgrid.emepRaster()
        rasterDict={}
        substancesWithData=[]
        #Rereads grid list for each category to not fill the memory with emission grids
        #Grid data is only read for grids with current ac
        gridList=[]
        for gridName in gridNames:
            grd=Egrid(dmn.name,options.user,edb.name,gridName)
            grd.readAsc()        
            if grd.hasFuel():
                log.warning("Only support for non-fuel grids implemented, no processing done for grid "+gridName)
                break
            gridList.append(grd)

        
        for subst in substances:
            log.debug("Substance: "+subst)
            substInd=substDict[subst]
            totEmisRast=raster.Raster(Xll=1190000,Yll=6110000,Ncols=720,Nrows=1560,Cellsize=1000,init=0)

            for grd in gridList:
                if grd.par["ACTIVITYCODE"].val[0]==ac:
                    if len(grd.substances)==0:
                        grd.readData()                        
                    totEmisRast=totEmisRast+grd.substances.get(substInd,0)

            for src in datadb.sources:
                if src.par["ACTIVITYCODE"].val[0]==ac:
                    row,col=totEmisRast.getIndex(src.par["X1"].val,src.par["Y1"].val)
                    totEmisRast.data[row,col]+=src.getEmis(substInd,rsrc,"ton/year")

            pdb.set_trace()
            if not totEmisRast.sum()==0:
                if subst not in substancesWithData:
                    substancesWithData.append(subst)

                __RT90__="+proj=tmerc +lat_0=0 +lon_0=15d48\\'29.8\\\" +x_0=1500000 +k_0=1 +ellps=bessel +towgs84=414.1,41.3,603.1,-0.855,2.141,-7.023,0"
                emepRast = emepgrid.sortToEmep(totEmisRast,__RT90__,printInfo=True)
            
                dataMarker.data = numpy.where(emepRast.data > 0, 1, dataMarker.data)
                rasterDict[subst]=emepRast

                categoryDirPath = path.join(outputDir, ac)
                if not path.isdir(categoryDirPath):
                    os.mkdir(categoryDirPath)
                fileName = path.join(categoryDirPath, "Emep50km_" + subst+ ".asc")
                emepRast.write(fileName)
                log.info("Emissions in EMEP-projection for substance: " + subst + "written to outputDir for category: " + ac)
          
        if len(rasterDict)>0:
            #creating substance header in the same order as the substances in the template
            header = "i\tj\t"
            #headerList=["SO2","NOx","NH3","NMVOC","CO","TSP","PM10","PM25","Pb ","Cd","Hg","As","Cr","Cu","Ni","Se","Zn","Aldrin","Chlordane","Chlordecone","Dieldrin","Endrin","Heptachlor","Hexabromobiphenyl","Mirex","Toxaphene","HCH","DDT","PCB","DIOX","PAH","HCB","PCP","SCCP"]
            for s in substancesWithData:
                header += s + "\t"
            #remove the tab after the last column and add a newline instead
            header = header[: - 1]+ "\n"

            #Creating file for EMEP-data                    
            fileName = "CLRTAP_" + ac + ".txt"
            categoryDirPath = path.join(outputDir, ac)
            if not path.isdir(categoryDirPath):
                os.mkdir(categoryDirPath)
            fid = open(path.join(categoryDirPath, fileName), 'w')
            fid.writelines(header)

            #Writing indexes and data for all non-zero elements                
            for row in range(dataMarker.nrows):
                for col in range(dataMarker.ncols):
                    if dataMarker.data[row, col] > 0:
                        (i, j) = dataMarker.getCentreCoords(row, col)
                        fid.write(str(i) + "\t" + str(j) + "\t")
                        for substWithData in substancesWithData[:-1]:
                            fid.write(str(rasterDict[substWithData].data[row, col]) + "\t")                            
                        fid.write(str(rasterDict[substancesWithData[-1]].data[row, col]) + "\n")
            fid.close()
            log.info("wrote emissions to clrtap-file: " + path.join(categoryDirPath, fileName))
    log.info("Finished")
Example #3
0
def main():

   # Parse command line arguments
    parser = argparse.ArgumentParser(description=__doc__)
    utils.add_standard_command_options(parser)


    parser.add_argument("controlfile", metavar='CONTROLFILE',
                       action="store",
                      help="Controlfile for topdown processing")
    
    parser.add_argument("-t", "--template", metavar='TEMPLATEFILE',
                        action="store",dest="cf",default=None,
                        help="Generate default controlfile")

    args = parser.parse_args()


    if args.cf is not None:
        generateCf(args.cf)
        log.info("Wrote default controlfile")
        sys.exit(0)

    log.info("Starting topdown processing")
    # Opening controlfile
    cf = ControlFile(args.controlfile)
    dmn = Domain()

    log.info("Reading topdown table")
    tdTableName = cf.findExistingPath("topDownTable:")
    tdTable = DataTable()
    tdTable.keys.append("Code")
    tdTable.read(tdTableName,delimiter=";")

    log.info("Reading national totals table")
    natTotalTableName = cf.findExistingPath("nationalTotalTable:")
    natTable = DataTable(desc=[{"id": "Code", "type":unicode},
                               {"id": "description", "type":unicode}])
    natTable.keys.append("Code")
    natTable.read(natTotalTableName, units=True, defaultType=str)
    notationKeys = ["NE", "NO", "NA", "IE"]

    
    
    log.debug("Remove notation keys from national totals table")
    for row in natTable.data:
        for i in range(len(row)):
            if row[i] in notationKeys:
                row[i] = None

    log.debug("Convert all emission columns in national totals to float")
    for colId in natTable.listIds():
        if colId not in ["Code","description"]:
            natTable.convertCol(colId,float)

    log.debug("Store units from national totals for each substance in dict")
    natUnits={}
    for col in natTable.desc:
        if col.get("units",None)!=None:
            natUnits[col["id"]]=col["units"]
        
    log.debug("Read remaining data from control file")
    bottomupEdbName = cf.findString("bottomUpEdb:")
    topDownEdbName = cf.findString("topDownEdb:")
    emissionsEdbName = cf.findString("emissionsEdb:")
    userName = cf.findString("user:"******"year:")

    #initialize edb objects
    buEdb = Edb(dmn,userName,bottomupEdbName)
    tdEdb = Edb(dmn,userName,topDownEdbName)
    eEdb = Edb(dmn,userName,emissionsEdbName)
    log.info("Reading/preparing EDB:s")
    
    log.info("Reading subdb")
    subdb = Subdb(eEdb)
    subdb.read()

    log.info("Reading subgrpdb")
    subgrpdb = SubgrpStream(buEdb)
    subgrpdb.read()

    log.info("Reading facilitydb")
    facilityIn = FacilityStream(buEdb)

    log.info("Reading companydb")
    companyIn = CompanyStream(buEdb)
    
    facilityOut = FacilityStream(eEdb,mode="w")
    companyOut = CompanyStream(eEdb,mode="w")

    log.info("Writing company db to result edb")
    companyOut.write(companyIn.read())

    log.info("Writing facility db to result edb")
    facilityOut.write(facilityIn.read())

    if not buEdb.exists():
        log.error("Edb " + buEdb.name + " does not exist for user " + userName +
                  " in domain " + dmn.name)
        sys.exit(1)
    if not tdEdb.exists():
        log.error("Edb " + tdEdb.name + " does not exist for user " + userName +
                  " in domain " + dmn.name)
        sys.exit(1)
    if not eEdb.exists():
        log.error("Edb " + eEdb.name + " does not exist for user " + userName +
                  " in domain " + dmn.name)
        sys.exit(1)

    keys = tdEdb.listGrids()
    msg = "%i keys found in edb: %s" % (len(keys), tdEdb.name)
    log.info(msg)

    # sourcedb from bottom-up edb
    with SourceStream(buEdb, mode='rb') as source_instream:
        source_reader = ModelReader(source_instream)
        bu_sources = list(source_reader)

    log.info(
        "%i point sources found in edb: %s" % (
            len(bu_sources),
            buEdb.name)
    )


    # Empty sourcedb of the result edb
    if cf.findBoolean("emptyEmissionSourcedb:"):
        eEdb.empty_sourcedb()
        e_sources = []
        log.info("Removed point sources from edb: %s" % (eEdb.name))
    else:
        # sourcedb from emission edb (result edb)
        with SourceStream(eEdb, mode='rb') as source_instream:
            source_reader = ModelReader(source_instream)
            e_sources = list(source_reader)

        msg = "%i point sources found in edb: %s" % (len(e_sources), eEdb.name)
        log.info(msg)

    if not path.exists(eEdb.rsrcPath()):
        log.error("No edb.rsrc exists for emission edb")
        sys.exit()
    else:
        rsrc = Rsrc(eEdb.rsrcPath())
    acIndex = cf.findInt("acIndex:")
    codeDepth = rsrc.ac[acIndex-1].depth

    substances = cf.findStringList("substances:")
        
    for subst in substances:
        if subst not in subdb.substIndices:
            log.error("Substance: " + subst + " not in Airviro substance list")
            sys.exit()
    
    # Initialize trace for debug and additional logging
    if cf.findBoolean("trace:") == True:
        log.info("Initializing trace for detailed logging")
        trace = TraceDef(
            active=True,
            substances=cf.findStringList("trace.substances:"),
            logfile=cf.findString("trace.logfile:"),
            regdefgc=cf.findIntList("trace.regdef.gc:",
                                    optional=True,
                                    default=None),
            gcDefRaster=cf.findExistingPath("trace.gcraster:")
        )                               
    else:
        trace = TraceDef(active=False)

    log.info("Initializing result table")
    resTablePath = cf.findString("resTable:")
    resTable = DataTable(desc=[{"id": "Code", "type": unicode}])
    resTable.keys.append("Code")
    for subst in substances:
        resTable.addCol({"id": subst, "type": float, "unit": "%"})
        
    # Create emission grid template (with geocodes)
    log.info("Reading emission grid template")
    eGridTemplatePath = cf.findExistingPath("emisGridTemplatePath:")
    eGridTemplate = Egrid(eEdb,"name")
    if eGridTemplatePath[-4:] == ".asc":
        eGridTemplatePath=eGridTemplatePath[:-4]
    eGridTemplate.readData(eGridTemplatePath)
    eGridTemplate.substances = {}
    eGridTemplate.par["SUBSTANCE"].val = []
    dd = {"key": None,
          "regstat": None,
          "regdef": None,
          "bu_sources": bu_sources,
          "psIndices": [],
          "units": natUnits,
          "rsrc": rsrc,
          "subdb": subdb,
          "trace": trace,
          "subgrpdb": subgrpdb
          }    

    # Process all rows in the topdown table
    for row in tdTable.data:
        code = row[tdTable.colIndex["Code"]]
        active = row[tdTable.colIndex["Active"]]
        statType = row[tdTable.colIndex["Stat_type"]]
        if active == "no":
            continue
        log.info("Code: "+code)
        
        distributed=False

        # Add '-' to the code to reach max length (fix for a GUI bug)
        airviroCode = code
#         while len(airviroCode.split(".")) < codeDepth:
#             airviroCode += ".-"
            
        tdrow = tdTable.data[tdTable.rowIndex([code])]
        nrow = natTable.data[natTable.rowIndex([code])]

        # Create a resTable row to fill with data
        resrow = [None] * resTable.ncols
        resrow[0] = code

        # Check if national totals are non-zero
        nonZero = False
        for val in nrow:
            if val != None:
                if val > 0:
                    nonZero = True
                    break

        # Filter out indices for pointsources with the current ac
        # Also including sources coded with sub-codes
        # This allows to estimate top-down emissions on a higher code-level
        psIndices = []
        for i, ps in enumerate(bu_sources):
            codeMatch = False
            
            for emis in ps.EMISSION:                
                # It is assumed that the first code is used while processing topdown
                ac = emis.ACTCODE[0]
                if ac[-1] == ".":
                    ac=ac[:-1]
#                 if ac[:len(code)] == code:                    
                if ac == code:                    
                    codeMatch = True
                    break

            if not codeMatch:
                for emis in ps.SUBGRP:                
                    # It is assumed that the first code is used while processing topdown
                    ac = emis.ACTCODE[0]
                    if ac[:len(code)] == code:                    
                        codeMatch = True
                        break

            if codeMatch:
                psIndices.append(i)

        dd["psIndices"] = psIndices

        keyName = row[tdTable.colIndex["Key"]]

        #If no distribution key specified and no ps in bottom-up edb - cont.
        if keyName is None and psIndices == []:
            log.debug("No key and no point sources found for code: %s, skipping..." % code)
            resTable.addRow(resrow)
            continue

        if psIndices!=[]:
            msg = "--Found %i pointsources" % len(psIndices)
            log.info(msg)

        if keyName is not None:
            if keyName not in keys:
                log.error("No such key: " + keyName)
                sys.exit()

            msg = "--Key: %s" % keyName
            log.info(msg)
            keyGrid = Egrid(tdEdb, keyName)
            keyGrid.readData()
            log.debug("Read key: " + keyName + " from topdownEdb")

            # create emission grid to store distributed emissions
            eGrid = deepcopy(eGridTemplate)
            eGrid.name = code.replace(".", "_")
            eGrid.par["NAME"].val = code
            eGrid.par["INFO2"].val = "Distribution key: " + keyGrid.par["NAME"].val
            eGrid.par["ACTIVITYCODE"].val = [airviroCode.split(".")]

        regstatName = row[tdTable.colIndex["Regstat"]]
        regdefName = row[tdTable.colIndex["Regdef"]]
                
        if regstatName is not None:
            if regdefName is None:
                log.error("No region definition given for regional statistics: " +
                          regstatName)
                sys.exit(1)
            regstatPath = path.join(dmn.domainPath(), "topdown", "regstat", regstatName)
            regstat = DataTable()
            log.info("regstatPath: "+regstatPath)
            regstat.read(regstatPath, units=True, defaultType=float, delimiter=";")
            if not "Geocode" in regstat.listIds():
                log.error("No Geocode column found in regstat")
                sys.exit(1)
            regstat.convertCol("Geocode", int)
            regstat.keys.append("Geocode")  # Making Geocode the primary key

            # create list of unique geo codes
            geocodes = [row[regstat.colIndex["Geocode"]] for row in regstat.data]
            geocodes = unique(geocodes)


            for colId in regstat.listIds():
                if colId.lower() == "year":
                    rows = []
                    regstat.convertCol(colId, int)
                    # Make it possible to accumulate year
                    regstat.setKeys(regstat.keys + [colId])
            
                    # Calculates the total emission for each geocode
                    # in case there are multiple rows for different fuels etc
                    colsToSum = regstat.listIds()
                    colsToSum.remove(colId)
                    colsToSum.remove("Geocode")
                    for gc in geocodes:
                        # sums all numeric values in colsToSum for
                        # rows matching row id [gc,year]
                        #returns an accumulated row and appends it to rows
                        rowId = regstat.dict2RowId({"Geocode": gc, colId: year})
                        rows.append(regstat.accumulate(rowId, "sum", colsToSum))
                    regstat.data = rows  # replace original rows with accumulated rows
                    regstat.keys.remove(colId)
                    break
                
#             dd["regstat"] = regstat
            regdef = Raster()
            regdefPath = path.join(dmn.domainPath(), "topdown", "regdef", regdefName)
            regdef.read(regdefPath)

            dd["regstat"] = regstat
            dd["regdef"] = regdef
        else:
            dd["regstat"] = None
            dd["regdef"] = None

        if dd["regstat"] is not None and len(bu_sources) > 0 and statType == "fixed":
            log.info("--Regionalizing pointsources")
            dd = regionalizePS(dd, code)

        if keyName is not None and nonZero:
            regionalizedDefault = False
            # Spatial distribution of emissions
            for subst in substances:
                
                sInd = subdb.substIndices[subst]
                toUnit = dd["units"][subst] + "/year"
                ntot = nrow[natTable.colIndex[subst]]                
                pstot = 0
                for i in dd["psIndices"]:
                    source = dd["bu_sources"][i]
                    # TODO: should give reference to subgrps to include emis from them
                    pstot += source.get_emis(
                        sInd,
                        toUnit,
                        eEdb,
                        actcodes=[code]
                    )

                if ntot is None or ntot == 0:
                    if pstot > 0:
                        # 9999 is used as marker for no national total 
                        resrow[resTable.colIndex[subst]] = 9999.0
                        log.warning(
                            "Nattot is 0 but ps tot is: %f %s" % (pstot, toUnit))
                    continue
                
                nrest = ntot - pstot

                resrow[resTable.colIndex[subst]] = 100.0
            
                if abs(nrest / ntot) < 0.0001:
                    nrest = 0
                    log.info(
                        "--Rest is < 0.01 % of national total, rounded to zero"
                    )
                    continue
                elif nrest < 0:
                    log.warning(
                        "--National rest is below zero, %4.2f proc for %s" % (
                            -1 * nrest / ntot * 100,
                             subst)
                    )
                    dd["trace"].write()
#                    continue
                log.info(
                    "---Substance: "+subst+
                    ", rest is: " + str(nrest) +
                    toUnit + " = " + str(nrest / ntot * 100.0) + "%"
                )
                
                try: 
                    keyRast = keyGrid.substances[sInd]
                except KeyError:
                    keyRast = keyGrid.substances[subdb.substIndices["all"]]
                    
                dd["key"] = keyRast
                if dd["regstat"] is not None:
                    if (subst not in regstat.colIndex and 
                        sInd not in keyGrid.substances and not regionalizedDefault):
                        dd = regionalizeKey(dd, subst, code)
                        regionalizedDefault = True                                    
                    else:
                        dd = regionalizeKey(dd, subst, code)
                    
                emisRast = distribute(dd["key"], nrest)
                emisRast = emisRast * unitConvFac(toUnit, "ton/year")
                eGrid.addData(emisRast, dd["subdb"].substIndices[subst])
                distributed = True

        else:
            # resTable is filled
            # In case all national totals are zero but there are ps
            for subst in substances:
                sInd = dd["subdb"].substIndices[subst]
                toUnit = dd["units"][subst] + "/year"
                ntot = nrow[natTable.colIndex[subst]]               
                pstot = 0
                for i in dd["psIndices"]:
                    source = dd["bu_sources"][i]
                    # subgrps are not used!
                    pstot += source.get_emis(sInd, toUnit, buEdb,
                                             actcodes=[code])

                if ntot!=0 and ntot is not None:
                    resrow[resTable.colIndex[subst]] = pstot / ntot * 100.0
                else:
                    resrow[resTable.colIndex[subst]] = -999.0

        if len(dd["psIndices"]) > 0:
            tmp_sources = (bu_sources[i] for i in dd["psIndices"])
            with SourceStream(eEdb, mode='wb') as out_source_stream:
                source_writer = ModelWriter(out_source_stream)
                for source in tmp_sources:
                    source_writer.write(source)
            log.debug("Wrote ps to emission edb")

        if distributed:
            eGrid.load()
            log.debug("Wrote emission grid to emission edb")    

        dd["trace"].write()
        resTable.addRow(resrow)

    resTableFile = open(resTablePath,"w")
    resTable.write(resTableFile)

    log.info("Finished topdown process")