def __init__(self, inputdir, inputpattern=".*\.cnv", cfg=None, saveauxiliary=False, timeout=60): """ """ self.name = "ProfilesQCCollection" self.inputfiles = make_file_list(inputdir, inputpattern) self.profiles = process_profiles(self.inputfiles, cfg, saveauxiliary, timeout=timeout) # self.profiles = process_profiles_serial(self.inputfiles, cfg, # saveauxiliary) self.data = {'id': [], 'profileid': [], 'profilename': []} self.flags = {} if saveauxiliary is True: self.auxiliary = {} offset = 0 for p in self.profiles: N = p['timeS'].size # Be sure that all have the same lenght. for v in p.keys(): assert p[v].size == N ids = offset + np.arange(N) self.data['id'] = np.append(self.data['id'], ids).astype('i') profileid = [p.attributes['md5']] * N self.data['profileid'] = np.append(self.data['profileid'], profileid) profilename = [p.attributes['filename']] * N self.data['profilename'] = np.append(self.data['profilename'], profilename) for v in p.keys(): if v not in self.data: self.data[v] = ma.masked_all(offset) self.data[v] = ma.append(self.data[v], p[v]) # ---- Dealing with the flags -------------------------------- for v in p.flags.keys(): if v not in self.flags: self.flags[v] = {'id': [], 'profileid': []} self.flags[v]['id'] = np.append(self.flags[v]['id'], ids).astype('i') self.flags[v]['profileid'] = np.append( self.flags[v]['profileid'], profileid) for t in p.flags[v]: if t not in self.flags[v]: self.flags[v][t] = ma.masked_all(offset) self.flags[v][t] = ma.append(self.flags[v][t], p.flags[v][t]) offset += N return
def find_lines(peaks, fwhm, y=None, verbose=False): if y is None: y = np.arange(len(peaks)) # Делаем все строки одинаковой длины (по наидленнейшей) peaks = np.array(list(zip_longest(*peaks)), dtype='float') # if verbose: # plt.plot(peaks.T, y, 'o') # plt.show() msk = np.isnan(peaks) peaks = ma.array(peaks, mask=msk) col = ['C' + str(j) for j in range(9)] # print(len(peaks)) # print() for i in range(len(peaks)): f**k = peaks[i:] line = f**k[0] # msk = np.logical_not(np.isnan(line)) # k = ma.polyfit(y, line, 2) # print(k) est = np.ones(len(y)) * ma.median(line) # est = np.polyval(k, y) err = est - line move_right = ma.filled((err > 5 * ma.median(ma.abs(err))), False) move_left = ma.filled((err < -5 * ma.median(ma.abs(err))), False) not_move = np.logical_not(move_right + move_left) # plt.plot(y[not_move], f**k[0][not_move], '.' + col[i % 9]) # plt.plot(y, est, col[i % 9], ls='--') # plt.plot(y[move_right], f**k[0][move_right], 'x' + col[i % 9]) # plt.plot(y[move_left], f**k[0][move_left], '+' + col[i % 9]) # plt.show() # print(i) # print(ma.mean(ma.abs(err))) # print(ma.median(line)) # print() if np.sum(move_right) > 0: # Те, что меньше медианы (слева) nonearray = ma.array([[None] * np.sum(move_right.astype('int'))], mask=[[True] * np.sum(move_right.astype('int'))]) f**k[:, move_right] = ma.append(f**k[:, move_right][1:, :], nonearray, axis=0) if np.sum(move_left) > 0: nonearray = ma.array([[None] * np.sum(move_left.astype('int'))], mask=[[True] * np.sum(move_left.astype('int'))]) f**k[:, move_left] = ma.append(nonearray, f**k[:, move_left][:-1, :], axis=0) # plt.plot(f**k[0], col[i%9]) peaks[i:] = f**k plt.show() peaks = peaks.T msk = np.isnan(peaks) peaks = ma.array(peaks, mask=msk) good_lines = (np.sum(np.logical_not(msk), axis=0) > len(y) / 4.) peaks = peaks[:, good_lines] return peaks
def __reduce_masked(self, other, reduce_column, zs=None, galaxies=None): # use self galaxies if external list of galaxies is not provided # self.galaxies = galaxies red_val = column_dict[reduce_column] other_indexes = ma.array(np.zeros_like(galaxies)) for i, galaxy in enumerate(galaxies): if galaxy in other.galaxies: other_indexes[i] = np.where(other.galaxies == galaxy)[0][0] else: other_indexes[i] = ma.masked new_catalog = ma.array([]) if (self.reduced): columns = self.data.T new_catalog = ma.array(columns) else: column = ma.array(np.zeros_like(galaxies)) for i, galaxy in enumerate(galaxies): if galaxy in self.galaxies: index = np.where(self.galaxies == galaxy)[0][0] column[i] = self.data[red_val][index] else: column[i] = ma.masked column = column.reshape(column.shape[0], 1) new_catalog = ma.array(column) new_column = ma.array(np.zeros(galaxies.shape)) for i, index in enumerate(other_indexes): if (ma.is_masked(index)): new_column[i] = ma.masked else: if (other.data[red_val][int(index)] is None): new_column[i] = ma.masked else: new_column[i] = other.data[red_val][int(index)] new_column = new_column.reshape(new_column.shape[0], 1) new_catalog = ma.append(new_catalog, new_column, axis=1) ncat = Catalog(path=self.path, reduced=True, external=new_catalog.T, zs=zs, galaxies=galaxies, instrument=self.instrument, classes=self.classes) ncat.param = reduce_column return ncat
def _dict_append(data_dict, key, data): ''' Append masked arrays in a dictionary. If the dictionary array doesn't exist, create it. :param data_dict: The dictionary :param key: The element to append to :param data: The data to append ''' import numpy.ma as ma if key in data_dict: data_dict[key] = ma.append(data_dict[key], data) else: data_dict[key] = data
def param_selection(self, params): new_catalog = np.array([]) size = np.size(self.data[0]) for i, param in enumerate(params): red_val = column_dict[param] if not i: new_catalog = self.data[red_val].reshape(size, 1) else: new_catalog = ma.append(new_catalog, self.data[red_val].reshape(size, 1), axis=1) return np.array(new_catalog).astype(float)
def speed(data): """ """ assert ('timeS' in data.keys()), \ "Missing timeS in input data" assert ('LATITUDE' in data.keys()), \ "Missing LATITUDE in input data" assert ('LONGITUDE' in data.keys()), \ "Missing LONGITUDE in input data" dL = haversine(data['LATITUDE'][:-1], data['LONGITUDE'][:-1], data['LATITUDE'][1:], data['LONGITUDE'][1:]) dt = ma.diff(data['timeS']) speed = ma.append(ma.masked_array([0], [True]), dL / dt) return speed
def speed(data): """ """ assert ('timeS' in data.keys()), \ "Missing timeS in input data" assert ('LATITUDE' in data.keys()), \ "Missing LATITUDE in input data" assert ('LONGITUDE' in data.keys()), \ "Missing LONGITUDE in input data" dL = haversine(data['LATITUDE'][:-1], data['LONGITUDE'][:-1], data['LATITUDE'][1:], data['LONGITUDE'][1:]) dt = ma.diff(data['timeS']) speed = ma.append(ma.masked_array([0], [True]), dL/dt) return speed
def speed(data): """ """ assert "timeS" in data.keys(), "Missing timeS in input data" assert "LATITUDE" in data.keys(), "Missing LATITUDE in input data" assert "LONGITUDE" in data.keys(), "Missing LONGITUDE in input data" dL = haversine( data["LATITUDE"][:-1], data["LONGITUDE"][:-1], data["LATITUDE"][1:], data["LONGITUDE"][1:], ) dt = ma.diff(data["timeS"]) speed = ma.append(ma.masked_array([0], [True]), dL / dt) return speed
def descentPrate(data): """ It's probably a good idea to smooth it with a window of 2-5 seconds. After binned, the data will be probably groupped in bins of 1dbar, but the raw data might have more than one records per second, which might have plenty spikes. I'm looking here for inadequate casts lowered too fast, or maybe bad weather and a rolling vessel. Consider to create another test looking for excessive ups and downs. """ assert ('timeS' in data.keys()), "timeS is not available" assert ('PRES' in data.keys()), "pressure is not available" assert data['timeS'].shape == data['PRES'].shape, \ "t and p have different sizes" dt = ma.diff(data['timeS']) dp = ma.diff(data['PRES']) y = ma.append(ma.masked_all(1, dtype='i'), dp / dt) return y
def descentPrate(data): """ It's probably a good idea to smooth it with a window of 2-5 seconds. After binned, the data will be probably groupped in bins of 1dbar, but the raw data might have more than one records per second, which might have plenty spikes. I'm looking here for inadequate casts lowered too fast, or maybe bad weather and a rolling vessel. Consider to create another test looking for excessive ups and downs. """ assert ('timeS' in data.keys()), "timeS is not available" assert ('PRES' in data.keys()), "pressure is not available" assert data['timeS'].shape == data['PRES'].shape, \ "t and p have different sizes" dt = ma.diff(data['timeS']) dp = ma.diff(data['PRES']) y = ma.append(ma.masked_all(1, dtype='i'), dp/dt) return y
def _geo_globe(x, y, z, xmin=-180, modulo=False): """ Ensure global coverage by fixing gaps over poles and across longitude seams. Increases the size of the arrays. """ # Cover gaps over poles by appending polar data with np.errstate(all='ignore'): p1 = np.mean(z[0, :]) # do not ignore NaN if present p2 = np.mean(z[-1, :]) ps = (-90, 90) if (y[0] < y[-1]) else (90, -90) z1 = np.repeat(p1, z.shape[1]) z2 = np.repeat(p2, z.shape[1]) y = ma.concatenate((ps[:1], y, ps[1:])) z = ma.concatenate((z1[None, :], z, z2[None, :]), axis=0) # Cover gaps over cartopy longitude seam # Ensure coordinates span 360 after modulus if modulo: if x[0] % 360 != (x[-1] + 360) % 360: x = ma.concatenate((x, (x[0] + 360,))) z = ma.concatenate((z, z[:, :1]), axis=1) # Cover gaps over basemap longitude seam # Ensure coordinates span exactly 360 else: # Interpolate coordinate centers to seam. Size possibly augmented by 2 if x.size == z.shape[1]: if x[0] + 360 != x[-1]: xi = np.array([x[-1], x[0] + 360]) # input coordinates xq = xmin + 360 # query coordinate zq = ma.concatenate((z[:, -1:], z[:, :1]), axis=1) zq = (zq[:, :1] * (xi[1] - xq) + zq[:, 1:] * (xq - xi[0])) / (xi[1] - xi[0]) # noqa: E501 x = ma.concatenate(((xmin,), x, (xmin + 360,))) z = ma.concatenate((zq, z, zq), axis=1) # Extend coordinate edges to seam. Size possibly augmented by 1. elif x.size - 1 == z.shape[1]: if x[0] != xmin: x = ma.append(xmin, x) x[-1] = xmin + 360 z = ma.concatenate((z[:, -1:], z), axis=1) else: raise ValueError('Unexpected shapes of coordinates or data arrays.') return x, y, z
def rescuer(self): complete = False all_map = False pre_position = self.robot_position.copy() self.robot_position = self.frontier(self.op_map, self.map_size, self.t) self.op_map = self.inverse_sensor(self.robot_position, self.sensor_range, self.op_map, self.global_map) step_map = self.robot_model(self.robot_position, self.robot_size, self.t, self.op_map) map_local = self.local_map(self.robot_position, step_map, self.map_size, self.sensor_range + self.local_size) if self.plot: path = self.astar_path(self.op_map, pre_position.tolist(), self.robot_position.tolist()) self.x2frontier = ma.append(self.x2frontier, ma.masked) self.y2frontier = ma.append(self.y2frontier, ma.masked) self.x2frontier = ma.append(self.x2frontier, path[1, :]) self.y2frontier = ma.append(self.y2frontier, path[0, :]) self.xPoint = ma.append(self.xPoint, ma.masked) self.yPoint = ma.append(self.yPoint, ma.masked) self.xPoint = ma.append(self.xPoint, self.robot_position[0]) self.yPoint = ma.append(self.yPoint, self.robot_position[1]) self.plot_env() if np.size(np.where(self.op_map == 255)) / np.size( np.where(self.global_map == 255)) > self.finish_percent: self.li_map += 1 if self.li_map == self.map_number: self.li_map = 0 all_map = True self.__init__(self.li_map, self.mode, self.plot) complete = True new_location = False terminal = True return map_local, complete, all_map
def estimate_cell_edges_2d(X, columns=True, rows=False): """Runs estimate_cell_edges on every column or row as specified""" both_axes = columns and rows transpose = rows and not columns # Transpose input if we want to work on rows X = X.T if transpose else X edges = ma.masked_invalid(np.full((X.shape[0] + 1, X.shape[1]), np.nan)) for i, column in enumerate(X.T): edges[:, i] = estimate_cell_edges(column) if both_axes: edges = ma.append(edges, np.full((edges.shape[0], 1), np.nan), axis=1) for i, row in enumerate(edges): edges[i, :] = estimate_cell_edges(row[:-1]) # Transpose output back to original shape if necessary edges = edges.T if transpose else edges return edges
import numpy.ma as ma a = ma.masked_values([1, 2, 3], 2) b = ma.masked_values([[4, 5, 6], [7, 8, 9]], 7) print(ma.append(a, b))
time_differences_test[i - num_input_scenes - train_n - out_n - 1, j] = (dates_i[j] - dates_i_minus_1[j]).days if count == 0: x_train = x_scenes_train t_train = time_differences_train y_train = y_scenes_train x_valid = x_scenes_valid t_valid = time_differences_valid y_valid = y_scenes_valid x_test = x_scenes_test t_test = time_differences_test y_test = y_scenes_test else: x_train = ma.append(x_train, x_scenes_train, axis=0) t_train = np.append(t_train, time_differences_train, axis=0) y_train = ma.append(y_train, y_scenes_train, axis=0) x_valid = ma.append(x_valid, x_scenes_valid, axis=0) t_valid = np.append(t_valid, time_differences_valid, axis=0) y_valid = ma.append(y_valid, y_scenes_valid, axis=0) x_test = ma.append(x_test, x_scenes_test, axis=0) t_test = np.append(t_test, time_differences_test, axis=0) y_test = ma.append(y_test, y_scenes_test, axis=0) count += 1 vol_cutoff_indices.append(y_train.shape[0]) vol_cutoff_indices_valid.append(y_valid.shape[0]) vol_cutoff_indices_test.append(y_test.shape[0]) vol_name_ls.append(vol) print('\timported ' + str(x_scenes_train.shape[0]) + ' training scenes from ' + vol)
def _to_prices_values(self, initial_price): prices_values = self.values.cumsum() prices_values = exp(prices_values) prices_values = append([1], prices_values) return prices_values * initial_price
tStartnum = date2num(tStart.replace(tzinfo=None), units=ncTime[0].units) tEndnum = date2num(tEnd.replace(tzinfo=None), units=ncTime[0].units) maTime = ma.array(ncTime[0][:]) msk = (maTime < tStartnum) | (maTime > tEndnum) maTime.mask = msk timeLen = 1 if len(ncTime[0].shape) > 0: timeLen = ncTime[0].shape[0] if filen == 0: maTimeAll = maTime instrumentIndex = ma.ones(timeLen) * filen else: maTimeAll = ma.append(maTimeAll, maTime) instrumentIndex = ma.append(instrumentIndex, ma.ones(timeLen) * filen) nc.close() filen += 1 instrumentIndex.mask = maTimeAll.mask # same mask for instrument index idx = maTimeAll.argsort(0) # sort by time dimension # # createTimeArray (1D, OBS) - from list of structures # dsTime = Dataset(files[0], mode="r")
def _add_row_block(self): """add a block of rows to the data array """ block = ma.masked_all((self._row_block_size, ), dtype=self.dtype) self._set_data(ma.append(self._data, block))
def step(self, action_index): terminal = False complete = False new_location = False all_map = False self.old_position = self.robot_position.copy() self.old_op_map = self.op_map.copy() # take action self.take_action(action_index, self.robot_position) # collision check collision_points, collision_index = self.collision_check( self.old_position, self.robot_position, self.map_size, self.global_map) if collision_index: self.robot_position = self.nearest_free(self.free_tree, collision_points) self.op_map = self.inverse_sensor(self.robot_position, self.sensor_range, self.op_map, self.global_map) step_map = self.robot_model(self.robot_position, self.robot_size, self.t, self.op_map) else: self.op_map = self.inverse_sensor(self.robot_position, self.sensor_range, self.op_map, self.global_map) step_map = self.robot_model(self.robot_position, self.robot_size, self.t, self.op_map) map_local = self.local_map(self.robot_position, step_map, self.map_size, self.sensor_range + self.local_size) reward = self.reward_function.get_reward(self.robot_position, self.old_op_map, self.op_map, collision_index) if reward <= 0.02 and not collision_index: reward = -0.8 new_location = True #terminal = True # during training, the robot is relocated if it has a collision # during testing, the robot will use collision check to avoid the collision if collision_index: if not self.mode: new_location = False terminal = False else: new_location = True terminal = True if self.plot and self.mode: self.xPoint = ma.append(self.xPoint, self.robot_position[0]) self.yPoint = ma.append(self.yPoint, self.robot_position[1]) self.plot_env() self.robot_position = self.old_position.copy() self.op_map = self.old_op_map.copy() if self.plot and self.mode: self.xPoint[self.xPoint.size - 1] = ma.masked self.yPoint[self.yPoint.size - 1] = ma.masked else: if self.plot: self.xPoint = ma.append(self.xPoint, self.robot_position[0]) self.yPoint = ma.append(self.yPoint, self.robot_position[1]) self.plot_env() # check if exploration is finished if np.size(np.where(self.op_map == 255)) / np.size( np.where(self.global_map == 255)) > self.finish_percent: self.li_map += 1 if self.li_map == self.map_number: self.li_map = 0 all_map = True self.__init__(self.li_map, self.mode, self.plot) complete = True new_location = False terminal = True return ( self.op_map, self.robot_position ), reward, terminal, complete, new_location, collision_index, all_map
def aggregate(files, varNames): # split this into createCatalog - copy needed information into structure # createTimeArray (1D, OBS) - from list of structures # createNewFile # copyAttributes # updateAttributes # copyData # # createCatalog - copy needed information into structure # # look over all files, create a time array from all files # TODO: maybe delete files here without variables we're not interested in # TODO: Create set of variables in all files if not isinstance(varNames, list): varNames = [varNames] filen = 0 for path_file in files: print("input file %s" % path_file) nc = Dataset(path_file, mode="r") ncTime = nc.get_variables_by_attributes(standard_name='time') time_deployment_start = nc.time_deployment_start time_deployment_end = nc.time_deployment_end tStart = parse(time_deployment_start) tEnd = parse(time_deployment_end) tStartnum = date2num(tStart.replace(tzinfo=None), units=ncTime[0].units) tEndnum = date2num(tEnd.replace(tzinfo=None), units=ncTime[0].units) maTime = ma.array(ncTime[0][:]) #msk = (maTime < tStartnum) | (maTime > tEndnum) #maTime.mask = msk maTime.mask = 0 timeLen = 1 if len(ncTime[0].shape) > 0: timeLen = ncTime[0].shape[0] if filen == 0: maTimeAll = maTime instrumentIndex = ma.ones(timeLen) * filen else: maTimeAll = ma.append(maTimeAll, maTime) instrumentIndex = ma.append(instrumentIndex, ma.ones(timeLen) * filen) nc.close() filen += 1 instrumentIndex.mask = maTimeAll.mask # same mask for instrument index idx = maTimeAll.argsort(0) # sort by time dimension # # createTimeArray (1D, OBS) - from list of structures # dsTime = Dataset(files[0], mode="r") ncTime = dsTime.get_variables_by_attributes(standard_name='time') dates = num2date(maTimeAll[idx].compressed(), units=ncTime[0].units, calendar=ncTime[0].calendar) # # createNewFile # # create a new filename # IMOS_<Facility-Code>_<Data-Code>_<Start-date>_<Platform-Code>_FV<File-Version>_ <Product-Type>_END-<End-date>_C-<Creation_date>_<PARTX>.nc # TODO: what to do with <Data-Code> with a reduced number of variables splitPath = files[0].split("/") splitParts = splitPath[-1].split( "_") # get the last path item (the file nanme), split by _ tStartMaksed = num2date(maTimeAll[idx].compressed()[0], units=ncTime[0].units, calendar=ncTime[0].calendar) tEndMaksed = num2date(maTimeAll[idx].compressed()[-1], units=ncTime[0].units, calendar=ncTime[0].calendar) fileProductTypeSplit = splitParts[6].split("-") fileProductType = fileProductTypeSplit[0] # could use the global attribute site_code for the product type fileTimeFormat = "%Y%m%d" ncTimeFormat = "%Y-%m-%dT%H:%M:%SZ" outputName = splitParts[0] + "_" + splitParts[1] + "_" + splitParts[2] \ + "_" + tStartMaksed.strftime(fileTimeFormat) \ + "_" + splitParts[4] \ + "_" + "FV02" \ + "_" + fileProductType + "-Aggregate-" + varNames[0] \ + "_END-" + tEndMaksed.strftime(fileTimeFormat) \ + "_C-" + datetime.utcnow().strftime(fileTimeFormat) \ + ".nc" print("output file : %s" % outputName) ncOut = Dataset(outputName, 'w', format='NETCDF4') # # create additional dimensions needed # # for d in nc.dimensions: # print("Dimension %s " % d) # ncOut.createDimension(nc.dimensions[d].name, size=nc.dimensions[d].size) # tDim = ncOut.createDimension("OBS", len(maTimeAll.compressed())) iDim = ncOut.createDimension("instrument", len(files)) strDim = ncOut.createDimension( "strlen", 256 ) # netcdf4 allow variable length strings, should we use them, probably not # # copyAttributes # # some of these need re-creating from the combined source data globalAttributeBlackList = [ 'time_coverage_end', 'time_coverage_start', 'time_deployment_end', 'time_deployment_start', 'compliance_checks_passed', 'compliance_checker_version', 'compliance_checker_imos_version', 'date_created', 'deployment_code', 'geospatial_lat_max', 'geospatial_lat_min', 'geospatial_lon_max', 'geospatial_lon_min', 'geospatial_vertical_max', 'geospatial_vertical_min', 'instrument', 'instrument_nominal_depth', 'instrument_sample_interval', 'instrument_serial_number', 'quality_control_log', 'history', 'netcdf_version' ] # global attributes # TODO: get list of variables, global attributes and dimensions from first pass above dsIn = Dataset(files[0], mode='r') for a in dsIn.ncattrs(): if not (a in globalAttributeBlackList): #print("Attribute %s value %s" % (a, dsIn.getncattr(a))) ncOut.setncattr(a, dsIn.getncattr(a)) for d in dsIn.dimensions: if not (d in 'TIME'): ncOut.createDimension(d, dsIn.dimensions[d].size) varList = dsIn.variables # add the ancillary variables for the ones requested for v in dsIn.variables: if v in varNames: if hasattr(dsIn.variables[v], 'ancillary_variables'): varNames.extend( dsIn.variables[v].ancillary_variables.split(" ")) ncOut.setncattr("data_mode", "A") # something to indicate its an aggregate # TIME variable # TODO: get TIME attributes from first pass above ncTimesOut = ncOut.createVariable("TIME", ncTime[0].dtype, ("OBS", )) # copy TIME variable attributes for a in ncTime[0].ncattrs(): if a not in ('comment', ): print("TIME Attribute %s value %s" % (a, ncTime[0].getncattr(a))) ncTimesOut.setncattr(a, ncTime[0].getncattr(a)) ncTimesOut[:] = maTimeAll[idx].compressed() ncOut.setncattr("time_coverage_start", dates[0].strftime(ncTimeFormat)) ncOut.setncattr("time_coverage_end", dates[-1].strftime(ncTimeFormat)) ncOut.setncattr("date_created", datetime.utcnow().strftime(ncTimeFormat)) ncOut.setncattr( "history", datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC : Create Aggregate")) # instrument index indexVarType = "i1" if len(files) > 128: indexVarType = "i2" if len(files) > 32767: # your really keen then indexVarType = "i4" # # create new variables needed # ncInstrumentIndexVar = ncOut.createVariable("instrument_index", indexVarType, ("OBS", )) ncInstrumentIndexVar.setncattr("long_name", "which instrument this obs is for") ncInstrumentIndexVar.setncattr("instance_dimension", "instrument") ncInstrumentIndexVar[:] = instrumentIndex[idx].compressed() # create a variable with the source file name ncFileNameVar = ncOut.createVariable("source_file", "S1", ("instrument", "strlen")) ncFileNameVar.setncattr("long_name", "source file for this instrument") ncInstrumentTypeVar = ncOut.createVariable("instrument_type", "S1", ("instrument", "strlen")) ncInstrumentTypeVar.setncattr( "long_name", "source instrument make, model, serial_number") filen = 0 data = numpy.empty(len(files), dtype="S256") instrument = numpy.empty(len(files), dtype="S256") for path_file in files: data[filen] = path_file ncType = Dataset(path_file, mode='r') instrument[ filen] = ncType.instrument + '-' + ncType.instrument_serial_number filen += 1 ncFileNameVar[:] = stringtochar(data) ncInstrumentTypeVar[:] = stringtochar(instrument) # # create a list of variables needed # filen = 0 # variables we want regardless varNames.extend(['LATITUDE', 'LONGITUDE', 'NOMINAL_DEPTH']) # remove any duplicates varNamesOut = set(varNames) # # copyData # # copy variable data from all files into output file # should we add uncertainty to variables here if they don't have one from a default set for v in varNamesOut: varOrder = -1 filen = 0 if (v != 'TIME') & (v in varList): # TODO: need to deal with files that don't have v variable in it for path_file in files: print("%d : %s file %s" % (filen, v, path_file)) nc1 = Dataset(path_file, mode="r") maVariable = nc1.variables[v][:] varDims = varList[v].dimensions varOrder = len(varDims) if len(varDims) > 0: # need to replace the TIME dimension with the now extended OBS dimension # should we extend this to the CTD case where the variables have a DEPTH dimension and no TIME if varList[v].dimensions[0] == 'TIME': if filen == 0: maVariableAll = maVariable dim = ('OBS', ) + varDims[1:len(varDims)] ncVariableOut = ncOut.createVariable( v, varList[v].dtype, dim) else: maVariableAll = ma.append( maVariableAll, maVariable, axis=0) # add new data to end along OBS axis else: if filen == 0: maVariableAll = maVariable maVariableAll.shape = (1, ) + maVariable.shape dim = ('instrument', ) + varDims[0:len(varDims)] varOrder += 1 ncVariableOut = ncOut.createVariable( v, varList[v].dtype, dim) else: vdata = maVariable vdata.shape = (1, ) + maVariable.shape maVariableAll = ma.append(maVariableAll, vdata, axis=0) else: if filen == 0: maVariableAll = maVariable dim = ('instrument', ) + varDims[0:len(varDims)] ncVariableOut = ncOut.createVariable( v, varList[v].dtype, dim) else: maVariableAll = ma.append(maVariableAll, maVariable) # copy the variable attributes # this is ends up as the super set of all files for a in varList[v].ncattrs(): if a not in ('comment', '_FillValue') and not re.match( r"calibration.*", a): #print("%s Attribute %s value %s" % (v, a, varList[v].getncattr(a))) ncVariableOut.setncattr(a, varList[v].getncattr(a)) nc1.close() filen += 1 # write the aggregated data to the output file if varOrder == 2: maVariableAll.mask = maTimeAll.mask # apply the time mask ncVariableOut[:] = maVariableAll[idx][:].compressed() elif varOrder == 1: maVariableAll.mask = maTimeAll.mask # apply the time mask ncVariableOut[:] = maVariableAll[idx].compressed() elif varOrder == 0: ncVariableOut[:] = maVariableAll # create the output global attributes if hasattr(ncVariableOut, 'standard_name'): if ncVariableOut.standard_name == 'latitude': laMax = maVariableAll.max(0) laMin = maVariableAll.max(0) ncOut.setncattr("geospatial_lat_max", laMax) ncOut.setncattr("geospatial_lat_min", laMin) if ncVariableOut.standard_name == 'longitude': loMax = maVariableAll.max(0) loMin = maVariableAll.max(0) ncOut.setncattr("geospatial_lon_max", loMax) ncOut.setncattr("geospatial_lon_min", loMin) if ncVariableOut.standard_name == 'depth': dMax = maVariableAll.max(0) dMin = maVariableAll.max(0) ncOut.setncattr("geospatial_vertical_max", dMax) ncOut.setncattr("geospatial_vertical_min", dMin) dsIn.close() # we're done with the varList now ncOut.close() return outputName
t_startnum = date2num(t_start.replace(tzinfo=None), units=nc_time[0].units) t_endnum = date2num(t_end.replace(tzinfo=None), units=nc_time[0].units) ma_time = ma.array(nc_time[0][:]) msk = (ma_time < t_startnum) | (ma_time > t_endnum) ma_time.mask = msk time_len = 1 if len(nc_time[0].shape) > 0: time_len = nc_time[0].shape[0] if filen == 0: ma_time_all = ma_time instrumentIndex = ma.ones(time_len) * filen else: ma_time_all = ma.append(ma_time_all, ma_time) instrumentIndex = ma.append(instrumentIndex, ma.ones(time_len) * filen) else: files.remove(path_file) print('%s not found in %s' % (var_to_agg[0], path_file)) nc.close() filen += 1 print() instrumentIndex.mask = ma_time_all.mask # same mask for instrument index idx = ma_time_all.argsort(0) # sort by time dimension