def proc_work(twx_cfg, start_ymd, end_ymd, params_ppca, rank):

    status = MPI.Status()

    stn_da = StationDataDb(twx_cfg.fpath_stndata_nc_tair_homog,
                           (start_ymd, end_ymd))
    days = stn_da.days
    ndays = days.size

    empty_fill = np.ones(ndays, dtype=np.float32) * netCDF4.default_fillvals['f4']
    empty_flags = np.ones(ndays, dtype=np.int8) * netCDF4.default_fillvals['i1']
    empty_bias = netCDF4.default_fillvals['f4']
    empty_mae = netCDF4.default_fillvals['f4']

    ds_nnr = NNRNghData(twx_cfg.path_reanalysis_namerica,
                        (start_ymd, end_ymd))
    
    mths = np.arange(1, 13)
    mth_masks = [stn_da.days[MONTH] == mth for mth in mths]
    vnames_mean_tmin = [get_mean_varname('tmin', mth) for mth in mths]
    vnames_vari_tmin = [get_variance_varname('tmin', mth) for mth in mths]
    vnames_mean_tmax = [get_mean_varname('tmax', mth) for mth in mths]
    vnames_vari_tmax = [get_variance_varname('tmax', mth) for mth in mths]
    

    bcast_msg = None
    bcast_msg = MPI.COMM_WORLD.bcast(bcast_msg, root=RANK_COORD)
    stnids_tmin, stnids_tmax = bcast_msg
    print "".join(["WORKER ", str(rank), ": Received broadcast msg"])
    print "".join(["WORKER ", str(rank),
                   ": Minimum number of station neighbors for infilling: ",
                   str(params_ppca['min_daily_nnghs'])])

    while 1:

        stn_id = MPI.COMM_WORLD.recv(source=RANK_COORD, tag=MPI.ANY_TAG, status=status)

        if status.tag == TAG_STOPWORK:
            MPI.COMM_WORLD.send([None] * 7, dest=RANK_WRITE, tag=TAG_STOPWORK)
            print "".join(["WORKER ", str(rank), ": Finished"])
            return 0
        else:

            try:
                
                run_infill_tmin = stn_id in stnids_tmin
                run_infill_tmax = stn_id in stnids_tmax

                if run_infill_tmin:
                    
                    results = infill_tair(stn_id, stn_da, 'tmin', ds_nnr,
                                          vnames_mean_tmin, vnames_vari_tmin,
                                          mth_masks, params_ppca)
                    fnl_tmin, fill_mask_tmin, infill_tmin, mae_tmin, bias_tmin = results
                    
                if run_infill_tmax:
     
                    results = infill_tair(stn_id, stn_da,'tmax', ds_nnr,
                                          vnames_mean_tmax, vnames_vari_tmax,
                                          mth_masks, params_ppca)
                    fnl_tmax, fill_mask_tmax, infill_tmax, mae_tmax, bias_tmax = results 
            
            except Exception as e:

                print "".join(["ERROR: Could not infill ", stn_id, "|", str(e)])
                if run_infill_tmin:
                    
                    results = empty_fill, empty_flags, empty_fill, empty_mae, empty_bias
                    fnl_tmin, fill_mask_tmin, infill_tmin, mae_tmin, bias_tmin = results
                    
                if run_infill_tmax:
                    
                    results = empty_fill, empty_flags, empty_fill, empty_mae, empty_bias
                    fnl_tmax, fill_mask_tmax, infill_tmax, mae_tmax, bias_tmax = results

            if run_infill_tmin:
                MPI.COMM_WORLD.send((stn_id, 'tmin', fnl_tmin, fill_mask_tmin,
                                     infill_tmin, mae_tmin, bias_tmin),
                                    dest=RANK_WRITE, tag=TAG_DOWORK)
            if run_infill_tmax:
                MPI.COMM_WORLD.send((stn_id, 'tmax', fnl_tmax, fill_mask_tmax,
                                     infill_tmax, mae_tmax, bias_tmax),
                                    dest=RANK_WRITE, tag=TAG_DOWORK)
            MPI.COMM_WORLD.send(rank, dest=RANK_COORD, tag=TAG_DOWORK)
def proc_coord(twx_cfg, ncdf_mode, stnids_to_infill_tmin, stnids_to_infill_tmax,
               start_ymd, end_ymd, nwrkers):

    stn_da = StationDataDb(twx_cfg.fpath_stndata_nc_tair_homog,
                           (start_ymd, end_ymd))

    mask_tmin = np.isfinite(stn_da.stns[get_mean_varname('tmin', 1)])
    mask_tmax = np.isfinite(stn_da.stns[get_mean_varname('tmax', 1)])

    stnids_tmin = stn_da.stn_ids[mask_tmin]
    stnids_tmax = stn_da.stn_ids[mask_tmax]

    # Check if we're restarting a run
    if ncdf_mode == 'r+':

        # If rerunning remove stn ids that have already been completed
        try:

            if stnids_to_infill_tmin is None:
                
                ds_tmin = Dataset(twx_cfg.fpath_stndata_nc_infill_tmin)
                mask_incplt = ds_tmin.variables[LAST_VAR_WRITTEN][:].mask
                stnids_tmin = stnids_tmin[mask_incplt]

            else:

                stnids_tmin = stnids_to_infill_tmin

        except AttributeError:
            # no mask: infill complete
            stnids_tmin = np.array([], dtype="<S16")

        try:

            if stnids_to_infill_tmax is None:

                ds_tmax = Dataset(twx_cfg.fpath_stndata_nc_infill_tmax)
                mask_incplt = ds_tmax.variables[LAST_VAR_WRITTEN][:].mask
                stnids_tmax = stnids_tmax[mask_incplt]

            else:

                stnids_tmax = stnids_to_infill_tmax

        except AttributeError:
            # no mask: infill complete
            stnids_tmax = np.array([], dtype="<S16")

    stnids_all = np.unique(np.concatenate((stnids_tmin, stnids_tmax)))

    # Send stn ids to all processes
    MPI.COMM_WORLD.bcast((stnids_tmin, stnids_tmax), root=RANK_COORD)

    print "COORD: Done initialization. Starting to send work."

    cnt = 0
    nrec = 0

    for stn_id in stnids_all:

        if cnt < nwrkers:
            dest = cnt + N_NON_WRKRS
        else:
            dest = MPI.COMM_WORLD.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG)
            nrec += 1

        MPI.COMM_WORLD.send(stn_id, dest=dest, tag=TAG_DOWORK)
        cnt += 1

    for w in np.arange(nwrkers):
        MPI.COMM_WORLD.send(None, dest=w + N_NON_WRKRS, tag=TAG_STOPWORK)

    print "COORD: done"
Esempio n. 3
0
    def __init__(self,
                 stnda,
                 var_tair,
                 infill_params,
                 xval_stnids=None,
                 ntrain_yrs=5):
        '''
        Parameters
        ----------
        stnda : twx.db.StationDataDb
            The station database from which all target and neighboring
            station observations should be loaded.
        var_tair : str
            The temperature variable ('tmin' or 'tmax') of focus.
        infill_params : XvalInfillParams
            Parameters for the infill model.
        xval_stnids : ndarray, optional
            A list of station ids to be used for cross validation.
            If None, default set of longer term stations is used
        ntrain_yrs : int, optional
            The number of years of data to be used for training
            the infill model. Currently, the last ntrain_yrs in a
            station's period-of-record will be used for training and
            all previous observations will be artificially set to
            missing and used for validation
        '''

        if xval_stnids is None:
            xval_stnids = load_default_xval_stnids(stnda.stn_ids)

        # Load observations for each station
        obs = stnda.load_all_stn_obs_var(xval_stnids, var_tair)[0]

        if len(obs.shape) == 1:
            obs.shape = (obs.shape[0], 1)

        days = stnda.days

        # The number of observations that should not be set to nan
        # and are used to build the infill model
        nmask = int(np.round(ntrain_yrs * 365.25))

        # Build masks of the data values that should be set to nan for each station
        xval_masks = []

        idxs = np.arange(days.size)

        for x in np.arange(xval_stnids.size):

            fin_obs = np.isfinite(obs[:, x])

            last_idxs = np.nonzero(fin_obs)[0][-nmask:]
            xval_mask_obs = np.logical_and(
                np.logical_not(np.in1d(idxs, last_idxs, assume_unique=True)),
                fin_obs)
            xval_masks.append(xval_mask_obs)

        self.mths = np.arange(1, 13)
        self.mth_masks = [stnda.days[MONTH] == mth for mth in self.mths]
        self.vnames_mean = [
            get_mean_varname(var_tair, mth) for mth in self.mths
        ]
        self.vnames_vari = [
            get_variance_varname(var_tair, mth) for mth in self.mths
        ]

        # Neighbor station mask
        self.ngh_stn_mask = np.isfinite(stnda.stns[get_mean_varname(
            var_tair, 1)])
        self.stn_ids = xval_stnids
        self.stn_obs = obs
        self.stn_xval_masks = xval_masks
        self.stnda = stnda
        self.var_tair = var_tair
        self.infill_params = infill_params
 params_ppca['npcs'] = 0
 params_ppca['verbose'] = False
 
 if rank == RANK_COORD:
     
     # If previous log file specified, run infilling
     # only for stations that were suspect.
     if fpath_log is not None:
         
         print ("Initializing rerun of previous infill run. "
                "Process %d of %d"%(rank, nsize))
                             
         stn_da = StationDataDb(twx_cfg.fpath_stndata_nc_tair_homog,
                                (start_ymd, end_ymd))
      
         mask_tmin = np.isfinite(stn_da.stns[get_mean_varname('tmin', 1)])
         mask_tmax = np.isfinite(stn_da.stns[get_mean_varname('tmax', 1)])
      
         stnids_tmin = stn_da.stn_ids[mask_tmin]
         stnids_tmax = stn_da.stn_ids[mask_tmax]
          
         stnids_bad = get_bad_infill_stnids(fpath_log)
                       
         stnids_to_infill_tmin = stnids_tmin[np.in1d(stnids_tmin, stnids_bad, True)]
         stnids_to_infill_tmax = stnids_tmax[np.in1d(stnids_tmax, stnids_bad, True)]
          
         stn_da.ds.close()
         stn_da = None
         mask_tmin = None
         mask_tmax = None
         stnids_tmin = None
def proc_write(twx_cfg, start_ymd, end_ymd, nwrkers):

    status = MPI.Status()
    nwrkrs_done = 0
    stn_da = StationDataDb(twx_cfg.fpath_stndata_nc_tair_homog,
                           (start_ymd, end_ymd),
                           mode="r+")

    mths = np.arange(1, 13)

    for mth in mths:

        for varname in ['tmin', 'tmax']:

            varname_mean = get_mean_varname(varname, mth)
            varname_vari = get_variance_varname(varname, mth)

            stn_da.add_stn_variable(varname_mean, varname_mean, "C", 'f8')
            stn_da.add_stn_variable(varname_vari, varname_vari, "C**2", 'f8')

    stn_da.ds.sync()

    bcast_msg = None
    bcast_msg = MPI.COMM_WORLD.bcast(bcast_msg, root=RANK_COORD)
    mask_por_tmin, mask_por_tmax = bcast_msg
    stn_ids_tmin, stn_ids_tmax = (stn_da.stn_ids[mask_por_tmin],
                                  stn_da.stn_ids[mask_por_tmax])
    print "WRITER: Received broadcast msg"
    stn_ids_uniq = np.unique(np.concatenate([stn_ids_tmin, stn_ids_tmax]))

    stn_idxs = {}
    for x in np.arange(stn_da.stn_ids.size):
        if stn_da.stn_ids[x] in stn_ids_uniq:
            stn_idxs[stn_da.stn_ids[x]] = x

    ttl_infills = stn_ids_tmin.size + stn_ids_tmax.size

    stat_chk = StatusCheck(ttl_infills, 30)

    while 1:

        stn_id, tair_var, stn_mean, stn_vari = MPI.COMM_WORLD.recv(
            source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status)

        if status.tag == TAG_STOPWORK:

            nwrkrs_done += 1
            if nwrkrs_done == nwrkers:
                print "WRITER: Finished"
                return 0
        else:

            stnid_dim = stn_idxs[stn_id]

            for mth in mths:

                vname_mean = get_mean_varname(tair_var, mth)
                stn_da.ds.variables[vname_mean][stnid_dim] = stn_mean[mth - 1]

                vname_vary = get_variance_varname(tair_var, mth)
                stn_da.ds.variables[vname_vary][stnid_dim] = stn_vari[mth - 1]

            stn_da.ds.sync()

            stat_chk.increment()