예제 #1
0
    def __init__(self, inputdir, inputpattern=".*\.cnv",
            cfg=None, saveauxiliary=False, timeout=60):
        """
        """
        self.name = "ProfilesQCCollection"

        self.inputfiles = make_file_list(inputdir, inputpattern)

        self.profiles = process_profiles(self.inputfiles, cfg, saveauxiliary,
                timeout=timeout)
        # self.profiles = process_profiles_serial(self.inputfiles, cfg,
        #        saveauxiliary)

        self.data = {'id': [], 'profileid': [], 'profilename': []}
        self.flags = {}
        if saveauxiliary is True:
            self.auxiliary = {}

        offset = 0
        for p in self.profiles:
            N = p['timeS'].size

            # Be sure that all have the same lenght.
            for v in p.keys():
                assert p[v].size == N
            ids = offset + np.arange(N)
            self.data['id'] = np.append(self.data['id'],
                    ids).astype('i')
            profileid = [p.attributes['md5']] * N
            self.data['profileid'] = np.append(self.data['profileid'],
                    profileid)
            profilename = [p.attributes['filename']] * N
            self.data['profilename'] = np.append(self.data['profilename'],
                    profilename)
            for v in p.keys():
                if v not in self.data:
                    self.data[v] = ma.masked_all(offset)
                self.data[v] = ma.append(self.data[v], p[v])

            # ---- Dealing with the flags --------------------------------
            for v in p.flags.keys():
                if v not in self.flags:
                    self.flags[v] = {'id': [], 'profileid': []}
                self.flags[v]['id'] = np.append(self.flags[v]['id'],
                        ids).astype('i')
                self.flags[v]['profileid'] = np.append(
                        self.flags[v]['profileid'], profileid)
                for t in p.flags[v]:
                    if t not in self.flags[v]:
                        self.flags[v][t] = ma.masked_all(offset)
                    self.flags[v][t] = ma.append(self.flags[v][t],
                            p.flags[v][t])
            offset += N

        return
예제 #2
0
    def __init__(self, inputdir, inputpattern=".*\.cnv",
            cfg=None, saveauxiliary=False, timeout=60):
        """
        """
        self.name = "ProfilesQCCollection"

        self.inputfiles = make_file_list(inputdir, inputpattern)

        self.profiles = process_profiles(self.inputfiles, cfg, saveauxiliary,
                timeout=timeout)
        # self.profiles = process_profiles_serial(self.inputfiles, cfg,
        #        saveauxiliary)

        self.data = {'id': [], 'profileid': [], 'profilename': []}
        self.flags = {}
        if saveauxiliary is True:
            self.auxiliary = {}

        offset = 0
        for p in self.profiles:
            N = p['timeS'].size

            # Be sure that all have the same lenght.
            for v in p.keys():
                assert p[v].size == N
            ids = offset + np.arange(N)
            self.data['id'] = np.append(self.data['id'],
                    ids).astype('i')
            profileid = [p.attributes['md5']] * N
            self.data['profileid'] = np.append(self.data['profileid'],
                    profileid)
            profilename = [p.attributes['filename']] * N
            self.data['profilename'] = np.append(self.data['profilename'],
                    profilename)
            for v in p.keys():
                if v not in self.data:
                    self.data[v] = ma.masked_all(offset)
                self.data[v] = ma.append(self.data[v], p[v])

            # ---- Dealing with the flags --------------------------------
            for v in p.flags.keys():
                if v not in self.flags:
                    self.flags[v] = {'id': [], 'profileid': []}
                self.flags[v]['id'] = np.append(self.flags[v]['id'],
                        ids).astype('i')
                self.flags[v]['profileid'] = np.append(
                        self.flags[v]['profileid'], profileid)
                for t in p.flags[v]:
                    if t not in self.flags[v]:
                        self.flags[v][t] = ma.masked_all(offset)
                    self.flags[v][t] = ma.append(self.flags[v][t],
                            p.flags[v][t])
            offset += N

        return
def find_lines(peaks, fwhm, y=None, verbose=False):
    if y is None:
        y = np.arange(len(peaks))
    # Делаем все строки одинаковой длины (по наидленнейшей)
    peaks = np.array(list(zip_longest(*peaks)), dtype='float')
    # if verbose:
    #     plt.plot(peaks.T, y, 'o')
    #     plt.show()
    msk = np.isnan(peaks)
    peaks = ma.array(peaks, mask=msk)
    col = ['C' + str(j) for j in range(9)]
    #     print(len(peaks))
    #     print()
    for i in range(len(peaks)):
        f**k = peaks[i:]
        line = f**k[0]
    #     msk = np.logical_not(np.isnan(line))
    #     k = ma.polyfit(y, line, 2)
    #     print(k)
        est = np.ones(len(y)) * ma.median(line)
    #     est = np.polyval(k, y)
        err = est - line
        move_right = ma.filled((err > 5 * ma.median(ma.abs(err))), False)
        move_left = ma.filled((err < -5 * ma.median(ma.abs(err))), False)
        not_move = np.logical_not(move_right + move_left)
        # plt.plot(y[not_move], f**k[0][not_move], '.' + col[i % 9])
        # plt.plot(y, est, col[i % 9], ls='--')
        # plt.plot(y[move_right], f**k[0][move_right], 'x' + col[i % 9])
        # plt.plot(y[move_left], f**k[0][move_left], '+' + col[i % 9])
        # plt.show()

    #         print(i)
    #         print(ma.mean(ma.abs(err)))
    #         print(ma.median(line))
    #         print()
        if np.sum(move_right) > 0:  # Те, что меньше медианы (слева)
            nonearray = ma.array([[None] * np.sum(move_right.astype('int'))], mask=[[True] * np.sum(move_right.astype('int'))])
            f**k[:, move_right] = ma.append(f**k[:, move_right][1:, :], nonearray, axis=0)
        if np.sum(move_left) > 0:
            nonearray = ma.array([[None] * np.sum(move_left.astype('int'))], mask=[[True] * np.sum(move_left.astype('int'))])
            f**k[:, move_left] = ma.append(nonearray, f**k[:, move_left][:-1, :], axis=0)
    #     plt.plot(f**k[0], col[i%9])
        peaks[i:] = f**k
    plt.show()
    peaks = peaks.T
    msk = np.isnan(peaks)
    peaks = ma.array(peaks, mask=msk)
    good_lines = (np.sum(np.logical_not(msk), axis=0) > len(y) / 4.)
    peaks = peaks[:, good_lines]
    return peaks
예제 #4
0
    def __reduce_masked(self, other, reduce_column, zs=None, galaxies=None):

        # use self galaxies if external list of galaxies is not provided
        # self.galaxies = galaxies
        red_val = column_dict[reduce_column]
        other_indexes = ma.array(np.zeros_like(galaxies))
        for i, galaxy in enumerate(galaxies):
            if galaxy in other.galaxies:
                other_indexes[i] = np.where(other.galaxies == galaxy)[0][0]
            else:
                other_indexes[i] = ma.masked

        new_catalog = ma.array([])
        if (self.reduced):
            columns = self.data.T
            new_catalog = ma.array(columns)
        else:
            column = ma.array(np.zeros_like(galaxies))
            for i, galaxy in enumerate(galaxies):
                if galaxy in self.galaxies:
                    index = np.where(self.galaxies == galaxy)[0][0]
                    column[i] = self.data[red_val][index]
                else:
                    column[i] = ma.masked

            column = column.reshape(column.shape[0], 1)
            new_catalog = ma.array(column)

        new_column = ma.array(np.zeros(galaxies.shape))
        for i, index in enumerate(other_indexes):
            if (ma.is_masked(index)):
                new_column[i] = ma.masked
            else:
                if (other.data[red_val][int(index)] is None):
                    new_column[i] = ma.masked
                else:
                    new_column[i] = other.data[red_val][int(index)]

        new_column = new_column.reshape(new_column.shape[0], 1)

        new_catalog = ma.append(new_catalog, new_column, axis=1)

        ncat = Catalog(path=self.path,
                       reduced=True,
                       external=new_catalog.T,
                       zs=zs,
                       galaxies=galaxies,
                       instrument=self.instrument,
                       classes=self.classes)

        ncat.param = reduce_column

        return ncat
예제 #5
0
def _dict_append(data_dict, key, data):
    '''
    Append masked arrays in a dictionary. If the dictionary array
    doesn't exist, create it.

    :param data_dict: The dictionary
    :param key: The element to append to
    :param data: The data to append
    '''
    import numpy.ma as ma

    if key in data_dict:
        data_dict[key] = ma.append(data_dict[key], data)
    else:
        data_dict[key] = data
예제 #6
0
    def param_selection(self, params):
        new_catalog = np.array([])
        size = np.size(self.data[0])

        for i, param in enumerate(params):
            red_val = column_dict[param]

            if not i:
                new_catalog = self.data[red_val].reshape(size, 1)
            else:
                new_catalog = ma.append(new_catalog,
                                        self.data[red_val].reshape(size, 1),
                                        axis=1)

        return np.array(new_catalog).astype(float)
예제 #7
0
def speed(data):
    """
    """
    assert ('timeS' in data.keys()), \
            "Missing timeS in input data"
    assert ('LATITUDE' in data.keys()), \
            "Missing LATITUDE in input data"
    assert ('LONGITUDE' in data.keys()), \
            "Missing LONGITUDE in input data"

    dL = haversine(data['LATITUDE'][:-1], data['LONGITUDE'][:-1],
                   data['LATITUDE'][1:], data['LONGITUDE'][1:])
    dt = ma.diff(data['timeS'])

    speed = ma.append(ma.masked_array([0], [True]), dL / dt)

    return speed
예제 #8
0
def speed(data):
    """
    """
    assert ('timeS' in data.keys()), \
            "Missing timeS in input data"
    assert ('LATITUDE' in data.keys()), \
            "Missing LATITUDE in input data"
    assert ('LONGITUDE' in data.keys()), \
            "Missing LONGITUDE in input data"

    dL = haversine(data['LATITUDE'][:-1], data['LONGITUDE'][:-1],
            data['LATITUDE'][1:], data['LONGITUDE'][1:])
    dt = ma.diff(data['timeS'])

    speed = ma.append(ma.masked_array([0], [True]),
            dL/dt)

    return speed
예제 #9
0
def speed(data):
    """
    """
    assert "timeS" in data.keys(), "Missing timeS in input data"
    assert "LATITUDE" in data.keys(), "Missing LATITUDE in input data"
    assert "LONGITUDE" in data.keys(), "Missing LONGITUDE in input data"

    dL = haversine(
        data["LATITUDE"][:-1],
        data["LONGITUDE"][:-1],
        data["LATITUDE"][1:],
        data["LONGITUDE"][1:],
    )
    dt = ma.diff(data["timeS"])

    speed = ma.append(ma.masked_array([0], [True]), dL / dt)

    return speed
예제 #10
0
def descentPrate(data):
    """

        It's probably a good idea to smooth it with a window of 2-5 seconds.
        After binned, the data will be probably groupped in bins of 1dbar,
          but the raw data might have more than one records per second, which
          might have plenty spikes. I'm looking here for inadequate casts
          lowered too fast, or maybe bad weather and a rolling vessel.

        Consider to create another test looking for excessive ups and downs.
    """
    assert ('timeS' in data.keys()), "timeS is not available"
    assert ('PRES' in data.keys()), "pressure is not available"
    assert data['timeS'].shape == data['PRES'].shape, \
            "t and p have different sizes"
    dt = ma.diff(data['timeS'])
    dp = ma.diff(data['PRES'])
    y = ma.append(ma.masked_all(1, dtype='i'), dp / dt)
    return y
예제 #11
0
def descentPrate(data):
    """

        It's probably a good idea to smooth it with a window of 2-5 seconds.
        After binned, the data will be probably groupped in bins of 1dbar,
          but the raw data might have more than one records per second, which
          might have plenty spikes. I'm looking here for inadequate casts
          lowered too fast, or maybe bad weather and a rolling vessel.

        Consider to create another test looking for excessive ups and downs.
    """
    assert ('timeS' in data.keys()), "timeS is not available"
    assert ('PRES' in data.keys()), "pressure is not available"
    assert data['timeS'].shape == data['PRES'].shape, \
            "t and p have different sizes"
    dt = ma.diff(data['timeS'])
    dp = ma.diff(data['PRES'])
    y = ma.append(ma.masked_all(1, dtype='i'), dp/dt)
    return y
예제 #12
0
def _geo_globe(x, y, z, xmin=-180, modulo=False):
    """
    Ensure global coverage by fixing gaps over poles and across
    longitude seams. Increases the size of the arrays.
    """
    # Cover gaps over poles by appending polar data
    with np.errstate(all='ignore'):
        p1 = np.mean(z[0, :])  # do not ignore NaN if present
        p2 = np.mean(z[-1, :])
    ps = (-90, 90) if (y[0] < y[-1]) else (90, -90)
    z1 = np.repeat(p1, z.shape[1])
    z2 = np.repeat(p2, z.shape[1])
    y = ma.concatenate((ps[:1], y, ps[1:]))
    z = ma.concatenate((z1[None, :], z, z2[None, :]), axis=0)
    # Cover gaps over cartopy longitude seam
    # Ensure coordinates span 360 after modulus
    if modulo:
        if x[0] % 360 != (x[-1] + 360) % 360:
            x = ma.concatenate((x, (x[0] + 360,)))
            z = ma.concatenate((z, z[:, :1]), axis=1)
    # Cover gaps over basemap longitude seam
    # Ensure coordinates span exactly 360
    else:
        # Interpolate coordinate centers to seam. Size possibly augmented by 2
        if x.size == z.shape[1]:
            if x[0] + 360 != x[-1]:
                xi = np.array([x[-1], x[0] + 360])  # input coordinates
                xq = xmin + 360  # query coordinate
                zq = ma.concatenate((z[:, -1:], z[:, :1]), axis=1)
                zq = (zq[:, :1] * (xi[1] - xq) + zq[:, 1:] * (xq - xi[0])) / (xi[1] - xi[0])  # noqa: E501
                x = ma.concatenate(((xmin,), x, (xmin + 360,)))
                z = ma.concatenate((zq, z, zq), axis=1)
        # Extend coordinate edges to seam. Size possibly augmented by 1.
        elif x.size - 1 == z.shape[1]:
            if x[0] != xmin:
                x = ma.append(xmin, x)
                x[-1] = xmin + 360
                z = ma.concatenate((z[:, -1:], z), axis=1)
        else:
            raise ValueError('Unexpected shapes of coordinates or data arrays.')
    return x, y, z
예제 #13
0
    def rescuer(self):
        complete = False
        all_map = False
        pre_position = self.robot_position.copy()
        self.robot_position = self.frontier(self.op_map, self.map_size, self.t)
        self.op_map = self.inverse_sensor(self.robot_position,
                                          self.sensor_range, self.op_map,
                                          self.global_map)
        step_map = self.robot_model(self.robot_position, self.robot_size,
                                    self.t, self.op_map)
        map_local = self.local_map(self.robot_position, step_map,
                                   self.map_size,
                                   self.sensor_range + self.local_size)

        if self.plot:
            path = self.astar_path(self.op_map, pre_position.tolist(),
                                   self.robot_position.tolist())
            self.x2frontier = ma.append(self.x2frontier, ma.masked)
            self.y2frontier = ma.append(self.y2frontier, ma.masked)
            self.x2frontier = ma.append(self.x2frontier, path[1, :])
            self.y2frontier = ma.append(self.y2frontier, path[0, :])
            self.xPoint = ma.append(self.xPoint, ma.masked)
            self.yPoint = ma.append(self.yPoint, ma.masked)
            self.xPoint = ma.append(self.xPoint, self.robot_position[0])
            self.yPoint = ma.append(self.yPoint, self.robot_position[1])
            self.plot_env()

        if np.size(np.where(self.op_map == 255)) / np.size(
                np.where(self.global_map == 255)) > self.finish_percent:
            self.li_map += 1
            if self.li_map == self.map_number:
                self.li_map = 0
                all_map = True
            self.__init__(self.li_map, self.mode, self.plot)
            complete = True
            new_location = False
            terminal = True
        return map_local, complete, all_map
예제 #14
0
def estimate_cell_edges_2d(X, columns=True, rows=False):
    """Runs estimate_cell_edges on every column or row as specified"""

    both_axes = columns and rows
    transpose = rows and not columns

    # Transpose input if we want to work on rows
    X = X.T if transpose else X

    edges = ma.masked_invalid(np.full((X.shape[0] + 1, X.shape[1]), np.nan))
    for i, column in enumerate(X.T):
        edges[:, i] = estimate_cell_edges(column)

    if both_axes:
        edges = ma.append(edges, np.full((edges.shape[0], 1), np.nan), axis=1)
        for i, row in enumerate(edges):
            edges[i, :] = estimate_cell_edges(row[:-1])

    # Transpose output back to original shape if necessary
    edges = edges.T if transpose else edges

    return edges
예제 #15
0
import numpy.ma as ma

a = ma.masked_values([1, 2, 3], 2)
b = ma.masked_values([[4, 5, 6], [7, 8, 9]], 7)
print(ma.append(a, b))
예제 #16
0
                 time_differences_test[i - num_input_scenes - train_n -
                                       out_n - 1,
                                       j] = (dates_i[j] -
                                             dates_i_minus_1[j]).days
 if count == 0:
     x_train = x_scenes_train
     t_train = time_differences_train
     y_train = y_scenes_train
     x_valid = x_scenes_valid
     t_valid = time_differences_valid
     y_valid = y_scenes_valid
     x_test = x_scenes_test
     t_test = time_differences_test
     y_test = y_scenes_test
 else:
     x_train = ma.append(x_train, x_scenes_train, axis=0)
     t_train = np.append(t_train, time_differences_train, axis=0)
     y_train = ma.append(y_train, y_scenes_train, axis=0)
     x_valid = ma.append(x_valid, x_scenes_valid, axis=0)
     t_valid = np.append(t_valid, time_differences_valid, axis=0)
     y_valid = ma.append(y_valid, y_scenes_valid, axis=0)
     x_test = ma.append(x_test, x_scenes_test, axis=0)
     t_test = np.append(t_test, time_differences_test, axis=0)
     y_test = ma.append(y_test, y_scenes_test, axis=0)
 count += 1
 vol_cutoff_indices.append(y_train.shape[0])
 vol_cutoff_indices_valid.append(y_valid.shape[0])
 vol_cutoff_indices_test.append(y_test.shape[0])
 vol_name_ls.append(vol)
 print('\timported ' + str(x_scenes_train.shape[0]) +
       ' training scenes from ' + vol)
예제 #17
0
    def _to_prices_values(self, initial_price):
        prices_values = self.values.cumsum()
        prices_values = exp(prices_values)
        prices_values = append([1], prices_values)

        return prices_values * initial_price
예제 #18
0
    tStartnum = date2num(tStart.replace(tzinfo=None), units=ncTime[0].units)
    tEndnum = date2num(tEnd.replace(tzinfo=None), units=ncTime[0].units)

    maTime = ma.array(ncTime[0][:])
    msk = (maTime < tStartnum) | (maTime > tEndnum)
    maTime.mask = msk

    timeLen = 1
    if len(ncTime[0].shape) > 0:
        timeLen = ncTime[0].shape[0]

    if filen == 0:
        maTimeAll = maTime
        instrumentIndex = ma.ones(timeLen) * filen
    else:
        maTimeAll = ma.append(maTimeAll, maTime)
        instrumentIndex = ma.append(instrumentIndex, ma.ones(timeLen) * filen)

    nc.close()
    filen += 1

instrumentIndex.mask = maTimeAll.mask  # same mask for instrument index

idx = maTimeAll.argsort(0)  # sort by time dimension

#
# createTimeArray (1D, OBS) - from list of structures
#

dsTime = Dataset(files[0], mode="r")
예제 #19
0
    def _add_row_block(self):
        """add a block of rows to the data array
        """
        block = ma.masked_all((self._row_block_size, ), dtype=self.dtype)

        self._set_data(ma.append(self._data, block))
예제 #20
0
    def step(self, action_index):
        terminal = False
        complete = False
        new_location = False
        all_map = False
        self.old_position = self.robot_position.copy()
        self.old_op_map = self.op_map.copy()

        # take action
        self.take_action(action_index, self.robot_position)

        # collision check
        collision_points, collision_index = self.collision_check(
            self.old_position, self.robot_position, self.map_size,
            self.global_map)

        if collision_index:
            self.robot_position = self.nearest_free(self.free_tree,
                                                    collision_points)
            self.op_map = self.inverse_sensor(self.robot_position,
                                              self.sensor_range, self.op_map,
                                              self.global_map)
            step_map = self.robot_model(self.robot_position, self.robot_size,
                                        self.t, self.op_map)
        else:
            self.op_map = self.inverse_sensor(self.robot_position,
                                              self.sensor_range, self.op_map,
                                              self.global_map)
            step_map = self.robot_model(self.robot_position, self.robot_size,
                                        self.t, self.op_map)

        map_local = self.local_map(self.robot_position, step_map,
                                   self.map_size,
                                   self.sensor_range + self.local_size)
        reward = self.reward_function.get_reward(self.robot_position,
                                                 self.old_op_map, self.op_map,
                                                 collision_index)

        if reward <= 0.02 and not collision_index:
            reward = -0.8
            new_location = True
            #terminal = True

        # during training, the robot is relocated if it has a collision
        # during testing, the robot will use collision check to avoid the collision
        if collision_index:
            if not self.mode:
                new_location = False
                terminal = False
            else:
                new_location = True
                terminal = True
            if self.plot and self.mode:
                self.xPoint = ma.append(self.xPoint, self.robot_position[0])
                self.yPoint = ma.append(self.yPoint, self.robot_position[1])
                self.plot_env()
            self.robot_position = self.old_position.copy()
            self.op_map = self.old_op_map.copy()
            if self.plot and self.mode:
                self.xPoint[self.xPoint.size - 1] = ma.masked
                self.yPoint[self.yPoint.size - 1] = ma.masked
        else:
            if self.plot:
                self.xPoint = ma.append(self.xPoint, self.robot_position[0])
                self.yPoint = ma.append(self.yPoint, self.robot_position[1])
                self.plot_env()

        # check if exploration is finished
        if np.size(np.where(self.op_map == 255)) / np.size(
                np.where(self.global_map == 255)) > self.finish_percent:
            self.li_map += 1
            if self.li_map == self.map_number:
                self.li_map = 0
                all_map = True
            self.__init__(self.li_map, self.mode, self.plot)
            complete = True
            new_location = False
            terminal = True

        return (
            self.op_map, self.robot_position
        ), reward, terminal, complete, new_location, collision_index, all_map
예제 #21
0
def aggregate(files, varNames):
    # split this into   createCatalog - copy needed information into structure
    #                   createTimeArray (1D, OBS) - from list of structures
    #                   createNewFile
    #                   copyAttributes
    #                   updateAttributes
    #                   copyData

    #
    # createCatalog - copy needed information into structure
    #

    # look over all files, create a time array from all files
    # TODO: maybe delete files here without variables we're not interested in
    # TODO: Create set of variables in all files
    if not isinstance(varNames, list):

        varNames = [varNames]

    filen = 0
    for path_file in files:

        print("input file %s" % path_file)

        nc = Dataset(path_file, mode="r")

        ncTime = nc.get_variables_by_attributes(standard_name='time')

        time_deployment_start = nc.time_deployment_start
        time_deployment_end = nc.time_deployment_end

        tStart = parse(time_deployment_start)
        tEnd = parse(time_deployment_end)

        tStartnum = date2num(tStart.replace(tzinfo=None),
                             units=ncTime[0].units)
        tEndnum = date2num(tEnd.replace(tzinfo=None), units=ncTime[0].units)

        maTime = ma.array(ncTime[0][:])
        #msk = (maTime < tStartnum) | (maTime > tEndnum)
        #maTime.mask = msk
        maTime.mask = 0

        timeLen = 1
        if len(ncTime[0].shape) > 0:
            timeLen = ncTime[0].shape[0]

        if filen == 0:
            maTimeAll = maTime
            instrumentIndex = ma.ones(timeLen) * filen
        else:
            maTimeAll = ma.append(maTimeAll, maTime)
            instrumentIndex = ma.append(instrumentIndex,
                                        ma.ones(timeLen) * filen)

        nc.close()
        filen += 1

    instrumentIndex.mask = maTimeAll.mask  # same mask for instrument index

    idx = maTimeAll.argsort(0)  # sort by time dimension

    #
    # createTimeArray (1D, OBS) - from list of structures
    #

    dsTime = Dataset(files[0], mode="r")

    ncTime = dsTime.get_variables_by_attributes(standard_name='time')

    dates = num2date(maTimeAll[idx].compressed(),
                     units=ncTime[0].units,
                     calendar=ncTime[0].calendar)

    #
    # createNewFile
    #

    # create a new filename
    # IMOS_<Facility-Code>_<Data-Code>_<Start-date>_<Platform-Code>_FV<File-Version>_ <Product-Type>_END-<End-date>_C-<Creation_date>_<PARTX>.nc

    # TODO: what to do with <Data-Code> with a reduced number of variables

    splitPath = files[0].split("/")
    splitParts = splitPath[-1].split(
        "_")  # get the last path item (the file nanme), split by _

    tStartMaksed = num2date(maTimeAll[idx].compressed()[0],
                            units=ncTime[0].units,
                            calendar=ncTime[0].calendar)
    tEndMaksed = num2date(maTimeAll[idx].compressed()[-1],
                          units=ncTime[0].units,
                          calendar=ncTime[0].calendar)

    fileProductTypeSplit = splitParts[6].split("-")
    fileProductType = fileProductTypeSplit[0]

    # could use the global attribute site_code for the product type

    fileTimeFormat = "%Y%m%d"
    ncTimeFormat = "%Y-%m-%dT%H:%M:%SZ"

    outputName = splitParts[0] + "_" + splitParts[1] + "_" + splitParts[2] \
                 + "_" + tStartMaksed.strftime(fileTimeFormat) \
                 + "_" + splitParts[4] \
                 + "_" + "FV02" \
                 + "_" + fileProductType + "-Aggregate-" + varNames[0] \
                 + "_END-" + tEndMaksed.strftime(fileTimeFormat) \
                 + "_C-" + datetime.utcnow().strftime(fileTimeFormat) \
                 + ".nc"

    print("output file : %s" % outputName)

    ncOut = Dataset(outputName, 'w', format='NETCDF4')

    #
    # create additional dimensions needed
    #

    # for d in nc.dimensions:
    #     print("Dimension %s " % d)
    #     ncOut.createDimension(nc.dimensions[d].name, size=nc.dimensions[d].size)
    #

    tDim = ncOut.createDimension("OBS", len(maTimeAll.compressed()))
    iDim = ncOut.createDimension("instrument", len(files))
    strDim = ncOut.createDimension(
        "strlen", 256
    )  # netcdf4 allow variable length strings, should we use them, probably not

    #
    # copyAttributes
    #

    # some of these need re-creating from the combined source data
    globalAttributeBlackList = [
        'time_coverage_end', 'time_coverage_start', 'time_deployment_end',
        'time_deployment_start', 'compliance_checks_passed',
        'compliance_checker_version', 'compliance_checker_imos_version',
        'date_created', 'deployment_code', 'geospatial_lat_max',
        'geospatial_lat_min', 'geospatial_lon_max', 'geospatial_lon_min',
        'geospatial_vertical_max', 'geospatial_vertical_min', 'instrument',
        'instrument_nominal_depth', 'instrument_sample_interval',
        'instrument_serial_number', 'quality_control_log', 'history',
        'netcdf_version'
    ]

    # global attributes
    # TODO: get list of variables, global attributes and dimensions from first pass above
    dsIn = Dataset(files[0], mode='r')
    for a in dsIn.ncattrs():
        if not (a in globalAttributeBlackList):
            #print("Attribute %s value %s" % (a, dsIn.getncattr(a)))
            ncOut.setncattr(a, dsIn.getncattr(a))

    for d in dsIn.dimensions:
        if not (d in 'TIME'):
            ncOut.createDimension(d, dsIn.dimensions[d].size)

    varList = dsIn.variables

    # add the ancillary variables for the ones requested
    for v in dsIn.variables:
        if v in varNames:
            if hasattr(dsIn.variables[v], 'ancillary_variables'):
                varNames.extend(
                    dsIn.variables[v].ancillary_variables.split(" "))

    ncOut.setncattr("data_mode", "A")  # something to indicate its an aggregate

    # TIME variable
    # TODO: get TIME attributes from first pass above
    ncTimesOut = ncOut.createVariable("TIME", ncTime[0].dtype, ("OBS", ))

    #  copy TIME variable attributes
    for a in ncTime[0].ncattrs():
        if a not in ('comment', ):
            print("TIME Attribute %s value %s" % (a, ncTime[0].getncattr(a)))
            ncTimesOut.setncattr(a, ncTime[0].getncattr(a))

    ncTimesOut[:] = maTimeAll[idx].compressed()

    ncOut.setncattr("time_coverage_start", dates[0].strftime(ncTimeFormat))
    ncOut.setncattr("time_coverage_end", dates[-1].strftime(ncTimeFormat))
    ncOut.setncattr("date_created", datetime.utcnow().strftime(ncTimeFormat))
    ncOut.setncattr(
        "history",
        datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC : Create Aggregate"))

    # instrument index
    indexVarType = "i1"
    if len(files) > 128:
        indexVarType = "i2"
        if len(files) > 32767:  # your really keen then
            indexVarType = "i4"

    #
    # create new variables needed
    #

    ncInstrumentIndexVar = ncOut.createVariable("instrument_index",
                                                indexVarType, ("OBS", ))
    ncInstrumentIndexVar.setncattr("long_name",
                                   "which instrument this obs is for")
    ncInstrumentIndexVar.setncattr("instance_dimension", "instrument")
    ncInstrumentIndexVar[:] = instrumentIndex[idx].compressed()

    # create a variable with the source file name
    ncFileNameVar = ncOut.createVariable("source_file", "S1",
                                         ("instrument", "strlen"))
    ncFileNameVar.setncattr("long_name", "source file for this instrument")

    ncInstrumentTypeVar = ncOut.createVariable("instrument_type", "S1",
                                               ("instrument", "strlen"))
    ncInstrumentTypeVar.setncattr(
        "long_name", "source instrument make, model, serial_number")

    filen = 0
    data = numpy.empty(len(files), dtype="S256")
    instrument = numpy.empty(len(files), dtype="S256")
    for path_file in files:
        data[filen] = path_file
        ncType = Dataset(path_file, mode='r')
        instrument[
            filen] = ncType.instrument + '-' + ncType.instrument_serial_number
        filen += 1

    ncFileNameVar[:] = stringtochar(data)
    ncInstrumentTypeVar[:] = stringtochar(instrument)

    #
    # create a list of variables needed
    #

    filen = 0

    # variables we want regardless
    varNames.extend(['LATITUDE', 'LONGITUDE', 'NOMINAL_DEPTH'])

    # remove any duplicates
    varNamesOut = set(varNames)

    #
    # copyData
    #

    # copy variable data from all files into output file

    # should we add uncertainty to variables here if they don't have one from a default set

    for v in varNamesOut:
        varOrder = -1
        filen = 0

        if (v != 'TIME') & (v in varList):

            # TODO: need to deal with files that don't have v variable in it
            for path_file in files:
                print("%d : %s file %s" % (filen, v, path_file))

                nc1 = Dataset(path_file, mode="r")

                maVariable = nc1.variables[v][:]
                varDims = varList[v].dimensions
                varOrder = len(varDims)

                if len(varDims) > 0:
                    # need to replace the TIME dimension with the now extended OBS dimension
                    # should we extend this to the CTD case where the variables have a DEPTH dimension and no TIME
                    if varList[v].dimensions[0] == 'TIME':
                        if filen == 0:
                            maVariableAll = maVariable

                            dim = ('OBS', ) + varDims[1:len(varDims)]
                            ncVariableOut = ncOut.createVariable(
                                v, varList[v].dtype, dim)
                        else:
                            maVariableAll = ma.append(
                                maVariableAll, maVariable,
                                axis=0)  # add new data to end along OBS axis
                    else:
                        if filen == 0:
                            maVariableAll = maVariable
                            maVariableAll.shape = (1, ) + maVariable.shape

                            dim = ('instrument', ) + varDims[0:len(varDims)]
                            varOrder += 1
                            ncVariableOut = ncOut.createVariable(
                                v, varList[v].dtype, dim)
                        else:
                            vdata = maVariable
                            vdata.shape = (1, ) + maVariable.shape
                            maVariableAll = ma.append(maVariableAll,
                                                      vdata,
                                                      axis=0)

                else:
                    if filen == 0:
                        maVariableAll = maVariable

                        dim = ('instrument', ) + varDims[0:len(varDims)]
                        ncVariableOut = ncOut.createVariable(
                            v, varList[v].dtype, dim)
                    else:
                        maVariableAll = ma.append(maVariableAll, maVariable)

                # copy the variable attributes
                # this is ends up as the super set of all files
                for a in varList[v].ncattrs():
                    if a not in ('comment', '_FillValue') and not re.match(
                            r"calibration.*", a):
                        #print("%s Attribute %s value %s" % (v, a, varList[v].getncattr(a)))
                        ncVariableOut.setncattr(a, varList[v].getncattr(a))

                nc1.close()
                filen += 1

            # write the aggregated data to the output file
            if varOrder == 2:
                maVariableAll.mask = maTimeAll.mask  # apply the time mask
                ncVariableOut[:] = maVariableAll[idx][:].compressed()
            elif varOrder == 1:
                maVariableAll.mask = maTimeAll.mask  # apply the time mask
                ncVariableOut[:] = maVariableAll[idx].compressed()
            elif varOrder == 0:
                ncVariableOut[:] = maVariableAll

                # create the output global attributes
                if hasattr(ncVariableOut, 'standard_name'):
                    if ncVariableOut.standard_name == 'latitude':
                        laMax = maVariableAll.max(0)
                        laMin = maVariableAll.max(0)
                        ncOut.setncattr("geospatial_lat_max", laMax)
                        ncOut.setncattr("geospatial_lat_min", laMin)
                    if ncVariableOut.standard_name == 'longitude':
                        loMax = maVariableAll.max(0)
                        loMin = maVariableAll.max(0)
                        ncOut.setncattr("geospatial_lon_max", loMax)
                        ncOut.setncattr("geospatial_lon_min", loMin)
                    if ncVariableOut.standard_name == 'depth':
                        dMax = maVariableAll.max(0)
                        dMin = maVariableAll.max(0)
                        ncOut.setncattr("geospatial_vertical_max", dMax)
                        ncOut.setncattr("geospatial_vertical_min", dMin)

    dsIn.close()  # we're done with the varList now

    ncOut.close()

    return outputName
예제 #22
0
        t_startnum = date2num(t_start.replace(tzinfo=None), units=nc_time[0].units)
        t_endnum = date2num(t_end.replace(tzinfo=None), units=nc_time[0].units)

        ma_time = ma.array(nc_time[0][:])
        msk = (ma_time < t_startnum) | (ma_time > t_endnum)
        ma_time.mask = msk

        time_len = 1
        if len(nc_time[0].shape) > 0:
            time_len = nc_time[0].shape[0]

        if filen == 0:
            ma_time_all = ma_time
            instrumentIndex = ma.ones(time_len) * filen
        else:
            ma_time_all = ma.append(ma_time_all, ma_time)
            instrumentIndex = ma.append(instrumentIndex, ma.ones(time_len) * filen)

    else:
        files.remove(path_file)
        print('%s not found in %s' % (var_to_agg[0], path_file))


    nc.close()
    filen += 1

print()

instrumentIndex.mask = ma_time_all.mask  # same mask for instrument index

idx = ma_time_all.argsort(0)  # sort by time dimension