예제 #1
0
def average_main(filename,
                 maskname,
                 varname,
                 latname,
                 lonname,
                 depname=None,
                 timname=None,
                 gridval=None,
                 Nproc=1,
                 ind=None):
    """
    Creates a mask of are values for a file or a set of splitted files.
    If one file is used this can be automatically splitted using ind_out.

    Parameters
    ----------
    filename : str
        Name of the file to be open.
    maskname : str
        Name of the mask file to compute the average.
    savename : str
        Name of the file to be saved.
    varname : str
        Name of the var to be averaged.
    latname : str
        Name of the latitude variable.
    lonname : str
        Name of the longitude variable.
    depname : str, optional
        Name of the depth variable. The default is None.
    timname : str, optional
        Name of the time variable. The default is None.
    Nproc : int, optional
        If bigger than 1 will run in parallel. The default is 1.
    ind : tuple, optional
        If the file(s) to be loaded come(s) from a partition from breakds
        a tuple with the index must be provided (Z, Y, X). Where X, Y, Z
        are the respective index in each dimension (floats or array like).
        The default is None.

    Returns
    -------
    None.
    """

    if ind is None:
        f_var, f_lat, f_lon, weight, weight0, tim, dep =\
            average_1file(filename, maskname, varname, latname,
                          lonname, depname, timname, gridval)
        f_var = f_var / weight
        f_lat = f_lat / weight0
        f_lon = f_lon / weight0

    else:

        def avg_kji(kji):
            k, j, i = kji
            fname = filename + '_' + str(k) + '_' + str(j) + '_' + str(i)
            mname = maskname + '_' + str(k) + '_' + str(j) + '_' + str(i)
            outs.append(
                average_1file(fname, mname, varname, latname, lonname, depname,
                              timname, gridval))
            return 1

        # Get iterables for the 3 index and call combinations
        indk = int2iterable(ind[0])
        indj = int2iterable(ind[1])
        indi = int2iterable(ind[2])

        dep = np.array([])
        f_var = np.array([])
        for k in indk:
            outs = []
            kji_com = [(k, j, i) for j in indj for i in indi]
            totl = len(kji_com)

            # Run in parallel
            if Nproc > 1:
                kji_com = split_iterable(kji_com, Nproc)
                print("Processing {} files with {} cores".format(totl, Nproc))
                totl = len(kji_com)
                for i, kji_ in enumerate(kji_com):
                    print("\t{:.2f}%".format(100. * i * (k + 1) / totl))
                    output = []
                    for kji in kji_:
                        run_paral = dask.delayed(avg_kji)(kji)
                        output.append(run_paral)
                    total = dask.delayed(sum)(output)
                    total.compute()

            # Run in series
            else:
                print("Processing {} files".format(totl))
                for i, kji_ in enumerate(kji_com):
                    print("\t{:.2f}%".format(100. * i * (k + 1) / totl))
                    avg_kji(kji_)

            tav_val = np.zeros_like(outs[0][0])
            tav_lon, tav_lat = 0., 0.
            tweight, tweight0 = np.zeros_like(outs[0][3]), 0.

            for av_val, av_lat, av_lon, weight, weight0, _, _ in outs:
                tav_val += av_val
                tav_lat += av_lat
                tav_lon += av_lon
                tweight0 += weight0
                tweight += weight
            dep = np.append(dep, outs[0][6])
            tweight[tweight == 0.] = 1
            if len(f_var.shape) == 2:
                f_var = np.append(f_var, tav_val / tweight, axis=-1)
            elif f_var.shape[0] != 0:
                f_var = np.append(f_var, tav_val / tweight)
            else:
                f_var = tav_val / tweight
            f_lat = tav_lat / tweight0
            f_lon = tav_lon / tweight0

        tim = outs[0][5]

        #############
        # SAVE DATA #
        #############

        ds = {lonname: ((), f_lon), latname: ((), f_lat)}

        if timname is not None:
            ds[timname] = (('t'), tim)
        if depname is not None:
            ds[depname] = (('z'), dep)

        if len(f_var.shape) == 2:
            ds[varname] = (('t', 'z'), f_var)
        else:
            ds[varname] = (('t'), f_var)

        ds = xr.Dataset(ds)
        ds.to_netcdf(filename + '_mean.nc')
예제 #2
0
def stra_main(file_den,
              file_str,
              bpnt,
              denname,
              latname,
              lonname,
              depname,
              strname,
              timname=None,
              Nproc=1,
              ind=None,
              H=5000):

    if ind is None:
        stra_1file(file_den, file_str, bpnt, denname, latname, lonname,
                   depname, strname, timname, H)

    else:

        def str_kji(kji):
            k, j, i = kji
            fden = file_den + '_' + str(k) + '_' + str(j) + '_' + str(i)
            fstr = file_str + '_' + str(k) + '_' + str(j) + '_' + str(i)
            stra_1file(fden, fstr, bpnt, denname, latname, lonname, depname,
                       strname, timname, H)
            return 1

        # Get iterables for the 3 index and call combinations
        indk = int2iterable(ind[0])
        indj = int2iterable(ind[1])
        indi = int2iterable(ind[2])

        for k in indk:
            kji_com = [(k, j, i) for j in indj for i in indi]
            totl = len(kji_com)

            # Run in parallel
            if Nproc > 1:
                kji_com = split_iterable(kji_com, Nproc)
                print("Processing {} files with {} cores".format(totl, Nproc))
                totl = len(kji_com)
                for i, kji_ in enumerate(kji_com):
                    print("\t{:.2f}%".format(100. * i * (k + 1) / totl))
                    output = []
                    for kji in kji_:
                        run_paral = dask.delayed(str_kji)(kji)
                        output.append(run_paral)
                    total = dask.delayed(sum)(output)
                    total.compute()

            # Run in series
            else:
                print("Processing {} files".format(totl))
                for i, kji_ in enumerate(kji_com):
                    print("\t{:.2f}%".format(100. * i * (k + 1) / totl))
                    str_kji(kji_)

        [strval], lats, lons, tim, _ = load_main(file_str, [strname], latname,
                                                 lonname, None, timname, ind)

        ds = {
            lonname: (('y', 'x'), lons),
            latname: (('y', 'x'), lats),
            strname: (('t', 'z', 'y', 'x'), strval)
        }

        if timname is not None:
            ds[timname] = (('t'), tim)

        ds = xr.Dataset(ds)
        ds.to_netcdf(file_str + '.nc')
예제 #3
0
def load_split(filename, varnames, latname, lonname, depname, timname, ind):
    """
    Load splitted data.
    Check the documentation of load_main for more information.
    """

    # Transform the floats to iterables
    indk = int2iterable(ind[0])
    indj = int2iterable(ind[1])
    indi = int2iterable(ind[2])

    tim_in = None
    nvars = len(varnames)

    # Loop appending the values
    vars_k = [None for v in range(nvars)]
    dep_k = None
    for k in indk:
        vars_j = [None for var in varnames]
        lat_j, lon_j = None, None
        for j in indj:
            vars_i = [None for var in varnames]
            lat_i, lon_i = None, None
            for i in indi:
                filenamekji = filename + '_' + str(k) + '_' + str(
                    j) + '_' + str(i)
                with xr.open_dataset(filenamekji + '.nc') as data:
                    for v in range(nvars):
                        if vars_i[v] is None:
                            vars_i[v] = data[varnames[v]].values
                        else:
                            vars_i[v] = np.append(vars_i[v],
                                                  data[varnames[v]].values,
                                                  axis=-1)
                    if lat_i is None:
                        lat_i = data[latname].values
                        lon_i = data[lonname].values
                    else:
                        lat_i = np.append(lat_i, data[latname].values, axis=-1)
                        lon_i = np.append(lon_i, data[lonname].values, axis=-1)
                    if depname is not None:
                        dep_in = data[depname].values
                    if timname is not None:
                        tim_in = data[timname].values
            for v in range(nvars):
                if vars_j[v] is None:
                    vars_j[v] = vars_i[v]
                else:
                    vars_j[v] = np.append(vars_j[v], vars_i[v], axis=-2)
            if lat_j is None:
                lat_j = lat_i
                lon_j = lon_i
            else:
                lat_j = np.append(lat_j, lat_i, axis=-2)
                lon_j = np.append(lon_j, lon_i, axis=-2)
        for v in range(nvars):
            if len(vars_j[v].shape) == 3 or vars_k[v] is None:
                vars_k[v] = vars_j[v]
            else:
                vars_k[v] = np.append(vars_k[v], vars_j[v], axis=-3)
        if depname is not None:
            if dep_k is None:
                dep_k = dep_in
            else:
                dep_k = np.append(dep_k, dep_in)

    return vars_k, lat_j, lon_j, tim_in, dep_k
예제 #4
0
def den_main(filename_tem, filename_sal, savename,
             temname, salname, depname, denname, latname, lonname,
             pressure_lvl=False, mlat=None, Nproc=1,
             timname=None, ind=None):
    """
    Computes the density profile of a file or a set splitted files.
    Saves the file as the input format.

    Parameters
    ----------
    filename_tem : str
        Name of the temperature data file.
    filename_sal : str
        Name of the salinity data file.
    savename : str
        Name to the saving file.
    temname : str
        Name of the temperature variable.
    temname : str
        Name of the salinity variable.
    latname : str
        Name of the latitude variable.
    lonname : str
        Name of the longitude variable.
    pressure_lvl : Bool, optional
        True if the depth variable is the pressure level.
        The default is False.
    mlat : float, optional
        If provided, mean latitude is used to compute pressure levels.
        The default is None.
    Nproc : int, optional
        If bigger than 1 will run in parallel. The default is 1.
    timname : str, optional
        Name of the time variable. The default is None.
    ind : tuple, optional
        If the file or files to be loaded come from a partition from breakds
        a tuple with the index must be provided (Z, Y, X). Where X, Y, Z
        are the respective index in each dimension (floats or array like).
        In that case the loaded data will be appended.

    Returns
    -------
    None.

    """

    # Get data from 1 file
    if ind is None:
        den_1file(filename_tem, filename_sal, savename,
                  temname, salname, depname,
                  denname, latname, lonname,
                  pressure_lvl, mlat, timname)

    # Get data from splitted files
    else:

        def den_kji(kji):
            k, j, i = kji
            fname_tem = filename_tem+'_'+str(k)+'_'+str(j)+'_'+str(i)
            fname_sal = filename_sal+'_'+str(k)+'_'+str(j)+'_'+str(i)
            fsave_den = savename+'_'+str(k)+'_'+str(j)+'_'+str(i)
            den_1file(fname_tem, fname_sal, fsave_den,
                      temname, salname, depname, denname,
                      latname, lonname, pressure_lvl,
                      mlat, timname)
            return 1

        # Get iterables for the 3 index and call combinations
        indk = int2iterable(ind[0])
        indj = int2iterable(ind[1])
        indi = int2iterable(ind[2])

        kji_com = [(k, j, i) for k in indk for j in indj for i in indi]
        totl = len(kji_com)

        # Run in parallel
        if Nproc > 1:
            kji_com = split_iterable(kji_com, Nproc)
            print("Processing {} files with {} cores".format(totl, Nproc))
            totl = len(kji_com)
            for i, kji_ in enumerate(kji_com):
                print("\t{:.2f}%".format(100.*i/totl))
                output = []
                for kji in kji_:
                    run_paral = dask.delayed(den_kji)(kji)
                    output.append(run_paral)
                total = dask.delayed(sum)(output)
                total.compute()

        # Run in series
        else:
            print("Processing {} files".format(totl))
            for i, kji_ in enumerate(kji_com):
                print("\t{:.2f}%".format(100.*i/totl))
                den_kji(kji_)
예제 #5
0
def mask_main(filename,
              savename,
              latname,
              lonname,
              Nproc=1,
              ind=None,
              Nb=None):
    """
    Creates a mask of are values for a file or a set of splitted files.
    If one file is used this can be automatically splitted using ind_out.

    Parameters
    ----------
    filename : str
        Name of the file to be open.
    savename : str
        Name of the file to be saved.
    latname : str
        Name of the latitude variable.
    lonname : str
        Name of the longitude variable.
    Nproc : int, optional
        If bigger than 1 will run in parallel. The default is 1.
    ind : tuple, optional
        If the file(s) to be loaded come(s) from a partition from breakds
        a tuple with the index must be provided (Z, Y, X). Where X, Y, Z
        are the respective index in each dimension (floats or array like).
        In that case the loaded data will be appended.
        The default is None.
    Nb : tuple of 3 int, optional
        Number of partitions to be made in the z, y and x axis.
        Only used if ind is None and Nb not None. The default is None.

    Returns
    -------
    None.
    """
    if ind is None:
        mask_1file(filename, savename, latname, lonname, ind, Nb)
    else:

        def mask_kji(kji):
            k, j, i = kji
            fname = filename + '_' + str(k) + '_' + str(j) + '_' + str(i)
            fsave = savename + '_' + str(k) + '_' + str(j) + '_' + str(i)
            mask_1file(fname, fsave, latname, lonname, ind, None)
            return 1

        # Get iterables for the 3 index and call combinations
        indk = int2iterable(ind[0])
        indj = int2iterable(ind[1])
        indi = int2iterable(ind[2])

        kji_com = [(k, j, i) for k in indk for j in indj for i in indi]
        totl = len(kji_com)

        # Run in parallel
        if Nproc > 1:
            kji_com = split_iterable(kji_com, Nproc)
            print("Processing {} files with {} cores".format(totl, Nproc))
            totl = len(kji_com)
            for i, kji_ in enumerate(kji_com):
                print("\t{:.2f}%".format(100. * i / totl))
                output = []
                for kji in kji_:
                    run_paral = dask.delayed(mask_kji)(kji)
                    output.append(run_paral)
                total = dask.delayed(sum)(output)
                total.compute()

        # Run in series
        else:
            print("Processing {} files".format(totl))
            for i, kji_ in enumerate(kji_com):
                print("\t{:.2f}%".format(100. * i / totl))
                mask_kji(kji_)