예제 #1
0
 def load_rc(self, rcfilename):
     """ 
     This method loads a DA System Info rc-file with settings for this simulation 
     """
     for k, v in rc.read(rcfilename).iteritems():
         self[k] = v
     
     logging.debug("DA System Info rc-file (%s) loaded successfully" % rcfilename)
예제 #2
0
    def load_rc(self, rcfilename):
        """ 
        This method loads a DA Cycle rc-file with settings for this simulation 
        """

        rcdata = rc.read(rcfilename)
        for k, v in rcdata.iteritems():
            self[k] = v

        logging.info('DA Cycle rc-file (%s) loaded successfully' % rcfilename)
예제 #3
0
    def add_model_data_mismatch(self, filename):
        """ 
            Get the model-data mismatch values for this cycle.

                (1) Open a sites_weights file
                (2) Parse the data
                (3) Compare site list against data
                (4) Take care of double sites, etc

        """    

        if not os.path.exists(filename):
            msg = 'Could not find  the required sites.rc input file (%s) ' % filename
            logging.error(msg)
            raise IOError, msg
        else:
            self.sites_file = filename

        sites_weights = rc.read(self.sites_file)

        self.rejection_threshold = int(sites_weights['obs.rejection.threshold'])
        self.global_R_scaling = float(sites_weights['global.R.scaling'])
        self.n_site_categories = int(sites_weights['n.site.categories'])

        logging.debug('Model-data mismatch rejection threshold: %d ' % self.rejection_threshold)
        logging.warning('Model-data mismatch scaling factor     : %f ' % self.global_R_scaling)
        logging.debug('Model-data mismatch site categories    : %d ' % self.n_site_categories)
   
        cats = [k for k in sites_weights.keys() if 'site.category' in k] 

        site_categories = {}
        for key in cats:
            name, error, may_localize, may_reject = sites_weights[key].split(';')
            name = name.strip().lower()
            error = float(error)
            may_reject = ("TRUE" in may_reject.upper())
            may_localize = ("TRUE" in may_localize.upper())
            site_categories[name] = {'category': name, 'error': error, 'may_localize': may_localize, 'may_reject': may_reject}

        site_info = {}
        site_move = {}
        site_hourly = {}   # option added to include only certain hours of the day (for e.g. PAL) IvdL
        site_incalt = {} # option to increase sampling altitude for sites specified in sites and weights file 
        for key, value in sites_weights.iteritems():
            if 'co2_' in key or 'sf6' in key:  # to be fixed later, do not yet know how to parse valid keys from rc-files yet.... WP
                sitename, sitecategory = key, value
                sitename = sitename.strip()
                sitecategory = sitecategory.split()[0].strip().lower()
                site_info[sitename] = site_categories[sitecategory]
            if 'site.move' in key:
                identifier, latmove, lonmove = value.split(';')
                site_move[identifier.strip()] = (float(latmove), float(lonmove))
            if 'site.hourly' in key:
                identifier, hourfrom, hourto = value.split(';')
                site_hourly[identifier.strip()] = (int(hourfrom), int(hourto))
            if 'site.incalt' in key:
                identifier, incalt = value.split(';')
                site_incalt[identifier.strip()] = (int(incalt))

        for obs in self.datalist:  # loop over all available data points

            obs.mdm = 1000.0  # default is very high model-data-mismatch, until explicitly set by script
            obs.flag = 99  # default is do-not-use , until explicitly set by script
            exclude_hourly = False # default is that hourly values are not included

            identifier = obs.code
            species, site, method, lab, datasetnr = identifier.split('_')

            if site_info.has_key(identifier):
                if site_hourly.has_key(identifier):
                    obs.samplingstrategy = 2
                    hourf, hourt = site_hourly[identifier]
                    if int(obs.xdate.hour) >= hourf and int(obs.xdate.hour) <= hourt:
                        logging.warning("Observations in hourly dataset INCLUDED, while sampling time %s was between %s:00-%s:00"%(obs.xdate.time(),hourf,hourt))
                    else:
                        logging.warning("Observation in hourly dataset EXCLUDED, while sampling time %s was outside %s:00-%s:00"%(obs.xdate.time(),hourf,hourt))
                        exclude_hourly = True
                if site_info[identifier]['category'] == 'do-not-use' or exclude_hourly:
                    logging.warning("Observation found (%s, %d), but not used in assimilation !!!" % (identifier, obs.id))
                    obs.mdm = site_info[identifier]['error'] * self.global_R_scaling
                    obs.may_localize = site_info[identifier]['may_localize']
                    obs.may_reject = site_info[identifier]['may_reject']
                    obs.flag = 99
                else:
                    logging.debug("Observation found (%s, %d)" % (identifier, obs.id))
                    obs.mdm = site_info[identifier]['error'] * self.global_R_scaling
                    obs.may_localize = site_info[identifier]['may_localize']
                    obs.may_reject = site_info[identifier]['may_reject']
                    obs.flag = 0

            else:
                logging.warning("Observation NOT found (%s, %d), please check sites.rc file (%s)  !!!" % (identifier, obs.id, self.sites_file))

            if site_move.has_key(identifier):

                movelat, movelon = site_move[identifier]
                obs.lat = obs.lat + movelat
                obs.lon = obs.lon + movelon

                logging.warning("Observation location for (%s, %d), is moved by %3.2f degrees latitude and %3.2f degrees longitude" % (identifier, obs.id, movelat, movelon))

            if site_incalt.has_key(identifier):

                incalt = site_incalt[identifier]
                obs.height = obs.height + incalt

                logging.warning("Observation location for (%s, %d), is moved by %3.2f meters in altitude" % (identifier, obs.id, incalt))


        # Add site_info dictionary to the Observations object for future use

        self.site_info = site_info
        self.site_move = site_move
        self.site_hourly = site_hourly
        self.site_incalt = site_incalt

        logging.debug("Added Model Data Mismatch to all samples ")
예제 #4
0
    def add_model_data_mismatch(self, filename):
        """ 
            Get the model-data mismatch values for this cycle.

                (1) Open a sites_weights file
                (2) Parse the data
                (3) Compare site list against data
                (4) Take care of double sites, etc

        """

        if not os.path.exists(filename):
            msg = 'Could not find  the required sites.rc input file (%s) ' % filename
            logging.error(msg)
            raise IOError, msg
        else:
            self.sites_file = filename

        sites_weights = rc.read(self.sites_file)

        self.rejection_threshold = int(sites_weights['obs.rejection.threshold'])
        self.global_R_scaling = float(sites_weights['global.R.scaling'])
        self.n_site_categories = int(sites_weights['n.site.categories'])
        self.n_sites_active = int(sites_weights['n.sites.active'])
        self.n_sites_moved = int(sites_weights['n.sites.moved'])

        logging.debug('Model-data mismatch rejection threshold: %d ' % (self.rejection_threshold))
        logging.warning('Model-data mismatch scaling factor     : %f ' % (self.global_R_scaling))
        logging.debug('Model-data mismatch site categories    : %d ' % (self.n_site_categories))
        logging.debug('Model-data mismatch active sites       : %d ' % (self.n_sites_active))
        logging.debug('Model-data mismatch moved sites        : %d ' % (self.n_sites_moved))
   
        cats = [k for k in sites_weights.keys() if 'site.category' in k] 

        site_categories = {}
        for key in cats:
            name, error, may_localize, may_reject = sites_weights[key].split(';')
            name = name.strip().lower()
            error = float(error)
            may_localize = ("TRUE" in may_localize.upper())
            may_reject = ("TRUE" in may_reject.upper())
            site_categories[name] = {'category':name, 'error':error, 'may_localize':may_localize, 'may_reject':may_reject}
            #print name,site_categories[name]


        active = [k for k in sites_weights.keys() if 'site.active' in k] 

        site_info = {}
        for key in active:
            sitename, sitecategory = sites_weights[key].split(';')
            sitename = sitename.strip()
            sitecategory = sitecategory.strip().lower()
            site_info[sitename] = site_categories[sitecategory]
            #print sitename,site_info[sitename]

        for obs in self.datalist:

            obs.mdm = 1000.0  # default is very high model-data-mismatch, until explicitly set by script
            obs.flag = 99  # default is do-not-use , until explicitly set by script

            species, site, method, lab, nr = os.path.split(obs.fromfile)[-1].split('_')

            identifier = "%s_%02d_%s" % (site, int(lab), method)

            identifier = name_convert(name="%s_%s_%s" % (site.lower(), method.lower(), lab.lower(),), to='GV')

            if site_info.has_key(identifier): 
                logging.debug("Observation found (%s, %s)" % (obs.code, identifier))
                obs.mdm = site_info[identifier]['error'] * self.global_R_scaling
                obs.may_localize = site_info[identifier]['may_localize']
                obs.may_reject = site_info[identifier]['may_reject']
                obs.flag = 0
            else:
                logging.warning("Observation NOT found (%s, %s), please check sites.rc file  (%s)  !!!" % (obs.code, identifier, self.sites_file))

            if site_info[identifier]['category'] == 'do-not-use':
                logging.warning("Observation found (%s, %s), but not used in assimilation !!!" % (obs.code, identifier))
                obs.flag = 99

            # Add site_info dictionary to the Observations object for future use

            self.site_info = site_info
예제 #5
0
    def add_model_data_mismatch(self, filename):
        """
            Get the model-data mismatch values for this cycle.

                (1) Open a sites_weights file
                (2) Parse the data
                (3) Compare site list against data
                (4) Take care of double sites, etc

        """

        if not os.path.exists(filename):
            msg = 'Could not find  the required sites.rc input file (%s) ' % filename
            logging.error(msg)
            raise IOError, msg
        else:
            self.sites_file = filename

        sites_weights = rc.read(self.sites_file)

        self.rejection_threshold = int(sites_weights['obs.rejection.threshold'])
        self.global_R_scaling = float(sites_weights['global.R.scaling'])
        self.n_site_categories = int(sites_weights['n.site.categories'])

        logging.debug('Model-data mismatch rejection threshold: %d ' % self.rejection_threshold)
        logging.warning('Model-data mismatch scaling factor     : %f ' % self.global_R_scaling)
        logging.debug('Model-data mismatch site categories    : %d ' % self.n_site_categories)

        cats = [k for k in sites_weights.keys() if 'site.category' in k]

        site_categories = {}
        for key in cats:
            name, error_summer, error_winter , may_localize, may_reject = sites_weights[key].split(';')
            name = name.strip().lower()
            error_summer = float(error_summer)
            error_winter = float(error_winter)
            may_reject = ("TRUE" in may_reject.upper())
            may_localize = ("TRUE" in may_localize.upper())
            site_categories[name] = {'category': name, 'error_summer': error_summer, 'error_winter': error_winter ,'may_localize': may_localize, 'may_reject': may_reject}

        site_info = {}
        site_move = {}
        site_hourly = {}   # option added to include only certain hours of the day (for e.g. PAL) IvdL
        site_foot = {} # option added to check for available footprints per observation
        site_incalt = {} # option to increase sampling altitude for sites specified in sites and weights file
        co_filter = {}
        for key, value in sites_weights.iteritems():
            if 'co2_' in key or 'sf6' in key:  # to be fixed later, do not yet know how to parse valid keys from rc-files yet.... WP
                sitename, sitecategory = key, value
                sitename = sitename.strip()
                sitecategory = sitecategory.split()[0].strip().lower()
                site_info[sitename] = site_categories[sitecategory]
            if 'site.move' in key:
                identifier, latmove, lonmove = value.split(';')
                site_move[identifier.strip()] = (float(latmove), float(lonmove))
            if 'site.hourly' in key:
                identifier, hourfrom, hourto = value.split(';')
                site_hourly[identifier.strip()] = (int(hourfrom), int(hourto))
            if 'site.foot' in key:
                identifier, foot = value.split(';')
                site_foot[identifier.strip()] = (str(foot))
            if 'site.incalt' in key:
                identifier, incalt = value.split(';')
                site_incalt[identifier.strip()] = (int(incalt))
            if 'co.filter' in key:
                identifier_cofilter, cofilter = value.split(';')
                co_filter[identifier_cofilter.strip()] = (str(cofilter))

        #for obs in self.datalist:
        do_not_simulate=[]
        do_simulate=[]

        eventids=[]
        for line in open(str(cofilter)):
            columns = line.split()
            if columns[0] == "eventid":
                pass
            else:
                eventids.append(columns[0])

        eventids=array(eventids)
        #logging.info('eventids %s'%eventids)

        for i,obs in enumerate(self.datalist):
            obs.mdm = 1000.0  # default is very high model-data-mismatch, until explicitly set by script
            obs.flag = 99  # default is do-not-use , until explicitly set by script
            exclude_hourly = False # default is that hourly values are not included
            exclude_footprint = False
            exclude_aircraft = False
            identifier = obs.code
            species, site, method, lab, datasetnr = identifier.split('_')



            if site_info.has_key(identifier):
                if site_foot.has_key(identifier):
                    path_foot  = site_foot[identifier]
                    dir_foot = os.path.join(path_foot,'%s'%obs.xdate.year,'%02d'%obs.xdate.month)
                    files_foot = os.listdir(dir_foot)
                    str_id = '%s'%obs.id
                    if any(str_id in s for s in files_foot):
                        logging.info("id in footprint %s" %str_id)
                        if any(str_id in t for t in eventids):
                            exclude_footprint = True
                            logging.info("id in CO list, exclude this observation %s" %str_id)
                    else:
                        exclude_footprint = True
                        logging.info("id not in footprint %s" %str_id)
                if site_hourly.has_key(identifier):
                    obs.samplingstrategy = 2
                    hourf, hourt = site_hourly[identifier]
                    if int(obs.xdate.hour+obs.utc2loc) > hourf and int(obs.xdate.hour+obs.utc2loc) < hourt:
                        logging.warning("Observation UTC sampling time %s was between local %s:00-%s:00"%(obs.xdate.time(),hourf,hourt))
                    else:
                        logging.warning("Observation UTC sampling time %s was outside local %s:00-%s:00"%(obs.xdate.time(),hourf,hourt))
                        exclude_hourly = True
                if 'aircraft' in method:
                    if obs.intakeheight> 3000.:
                        logging.warning("Intake height aircraft observation %s > 3000 meters" %(obs.intakeheight))
                    else:
                        exclude_aircraft = True
                if site_info[identifier]['category'] == 'do-not-use' or site_info[identifier]['category'] == 'do-not-simulate' or exclude_hourly or exclude_footprint or exclude_aircraft:
                    logging.warning("Site found (%s, %d), but data point not used in assimilation !!!" % (identifier, obs.id))
                    if int(obs.xdate.month) > 3 and int(obs.xdate.month) < 10:
                        obs.mdm = site_info[identifier]['error_summer'] * self.global_R_scaling
                    else:
                        obs.mdm = site_info[identifier]['error_winter'] * self.global_R_scaling
                    obs.may_localize = site_info[identifier]['may_localize']
                    obs.may_reject = site_info[identifier]['may_reject']
                    obs.flag = 99
                    if site_info[identifier]['category'] == 'do-not-simulate':
                        do_not_simulate.append(i)
                else:
                    logging.debug("Site found (%s, %d)" % (identifier, obs.id))
                    if int(obs.xdate.month) > 3 and int(obs.xdate.month) < 10:
                        obs.mdm = site_info[identifier]['error_summer'] * self.global_R_scaling
                    else:
                        obs.mdm = site_info[identifier]['error_winter'] * self.global_R_scaling
                    obs.may_localize = site_info[identifier]['may_localize']
                    obs.may_reject = site_info[identifier]['may_reject']
                    obs.flag = 0
                    do_simulate.append(i)
            else:
                logging.warning("Site NOT found (%s, %d), please check sites.rc file (%s)  !!!" % (identifier, obs.id, self.sites_file))

            if site_move.has_key(identifier):

                movelat, movelon = site_move[identifier]
                obs.lat = obs.lat + movelat
                obs.lon = obs.lon + movelon

                logging.warning("Observation location for (%s, %d), is moved by %3.2f degrees latitude and %3.2f degrees longitude" % (identifier, obs.id, movelat, movelon))

            if site_incalt.has_key(identifier):

                incalt = site_incalt[identifier]
                obs.height = obs.height + incalt

                logging.warning("Observation location for (%s, %d), is moved by %3.2f meters in altitude" % (identifier, obs.id, incalt))

        #if len(do_not_simulate) > 0 :
        #    logging.info("do-not-simulate flags located")
        #    self.datalist = [self.datalist[k] for k in do_simulate]
        #    logging.info("After do-not-simulate filter observations list now holds %d values" % len(self.datalist))
        #else:
        #    logging.info("No do-not-simulate flags, continues normally ")
        #    logging.info("Observations list now holds %d values" % len(self.datalist))

        self.datalist = [self.datalist[k] for k in do_simulate]
        logging.info("After multple filters observations list now holds %d values" % len(self.datalist))
        # Add site_info dictionary to the Observations object for future use


        self.site_info = site_info
        self.site_move = site_move
        self.site_hourly = site_hourly
        self.site_foot = site_foot
        self.site_incalt = site_incalt

        logging.debug("Added Model Data Mismatch to all samples ")