def npy_to_dat(npy_path, dat_path): """ Transform data from .npy to .dat npy_path - can be a regular path. dat_path - can be a regular path or a directory. """ if os.path.isfile(npy_path): iutils.guard( npy_path.split('.')[-1] == 'npy', 'npy_path should have .npy ext') d = np.load(npy_path) if dat_path.split('.')[-1] == 'dat': # dat_path is a regular path octave.gwrite(dat_path, d) else: # dat_path is a directory os.makedirs(dat_path, exist_ok=True) dat_path = os.path.join(dat_path, f'{Path(npy_path).stem}.dat') octave.gwrite(dat_path, d) logger.info( f'{Path(dat_path).stem}.dat is created here: {dat_path}') elif os.path.isdir(npy_path): raise Exception('npy_to_dat for directories is not implemented') else: raise Exception( 'npy_path should be either directory or regular file')
def touch_timeline(self): """ Touch timeline tensor in interpolated It is supposed that times are included in filenames of interpolated in format *YYYYDDD* """ int_data_dir_path = self.get_interpolated_path() iutils.guard(os.path.isdir(int_data_dir_path), 'Run touch_interpolated_data() before this.') filenames = [ f for f in iutils.ls(int_data_dir_path) if self.unified_tensor_stem not in f and self.timeline_stem not in f ] timeline = [] for f in filenames: m = re.search(r'\d{4}(\d{3})', f) if m: timeline.append(m.group(1)) timeline = np.array([timeline], dtype=np.float) timeline_path = self.get_timeline_path(extension='npy') np.save(timeline_path, timeline) logger.info(f'timeline is created here: {timeline_path}')
def touch_unified_tensor(self, move_new_axis_to_end): """ Unify all files from interpolated in 1 tensor and put it in the same directory as unified.npy """ int_data_dir_path = self.get_interpolated_path() iutils.guard(os.path.isdir(int_data_dir_path), 'Run touch_interpolated_data() before this.') data_files = [ f for f in iutils.ls(int_data_dir_path) if self.unified_tensor_stem not in f and self.timeline_stem not in f ] iutils.guard(all([f.split('.')[-1] == 'npy' for f in data_files]), 'Files in dir_path should have .npy ext') unified_tensor = [] for f in data_files: d = np.load(f) unified_tensor.append(d) unified_tensor = np.array(unified_tensor) if move_new_axis_to_end: unified_tensor = np.moveaxis(unified_tensor, 0, -1) unified_tensor_path = self.get_unified_tensor_path(extension='npy') np.save(unified_tensor_path, unified_tensor) logger.info(f'unified_tensor is created here: {unified_tensor_path}')
def read_raw_data_files(self): data_files = iutils.ls(self.raw_data_dir) iutils.guard(all(d.split('.')[-1] == 'nc' for d in data_files), 'NetCDF format is only supported format') for raw_data_file in data_files: ds = nc.Dataset(raw_data_file, mode='r') nav_group = ds.groups['navigation_data'] # Initially data is masked lons = nav_group.variables['longitude'][:] # I unmask it for further simpler usage lons = np.ma.getdata(lons) lats = nav_group.variables['latitude'][:] lats = np.ma.getdata(lats) geo_group = ds.groups['geophysical_data'] inv_obj = geo_group.variables[self.investigated_obj][:] # Initially mask consists of: False - lake, True - land # I want: True - lake, False - land inv_obj_mask = np.invert(inv_obj.mask) # Unmask and place nan in land's points inv_obj.fill_value = np.nan inv_obj = inv_obj.filled() yield lons, lats, inv_obj, inv_obj_mask, raw_data_file
def preserve_day_range_only(self, day_range): data_files = iutils.ls(self.raw_data_dir) iutils.guard(all(d.split('.')[-1] == 'nc' for d in data_files), 'NetCDF format is only supported format') final_data_files = [] for day in day_range: # Choose all files for specific day r_compiler = re.compile( f'^{self.raw_data_dir}/' + r'[a-z]*\d{4}' + f'{day:03d}', re.I) filtered_data_files = list(filter(r_compiler.match, data_files)) final_data_files.extend(filtered_data_files) files_to_del = [f for f in data_files if f not in final_data_files] for f in files_to_del: os.remove(f) logger.info( f'Day range: {day_range} is only kept in {self.raw_data_dir}.')
def preserve_best_day_only(self): """ Preserves the best matrix for one day. Filenames in interpolated should be in format *YYYYDDD*. .npy extension is only supported. """ static_grid_dir_path = self.get_static_grid_path() iutils.guard(os.path.isdir(static_grid_dir_path), 'Run touch_static_grid() before this.') int_data_dir_path = self.get_interpolated_path() iutils.guard(os.path.isdir(int_data_dir_path), 'Run touch_interpolated_data() before this.') mask_path = self.get_static_grid_mask_path() geo_obj_mask = np.load(mask_path) data_files = [ f for f in iutils.ls(int_data_dir_path) if self.unified_tensor_stem not in f and self.timeline_stem not in f ] iutils.guard( all([f.split('.')[-1] == 'npy' for f in data_files]), 'Interpolated chunks in interpolated/ should have .npy ext') final_data_files = [] already_analyzed_days = [] for f in data_files: # Pull day from filename day = int( re.search(f'^{int_data_dir_path}/' + r'[a-z]*\d{4}(\d{3})', f, re.I).group(1)) if day in already_analyzed_days: continue # Choose all files for this specific day r_compiler = re.compile( f'^{int_data_dir_path}/' + r'[a-z]*\d{4}' + f'{day:03d}', re.I) filtered_data_files = list(filter(r_compiler.match, data_files)) datasets = [np.load(f) for f in filtered_data_files] fullness, best_file = iutils.calculate_fullness( datasets[0], geo_obj_mask), filtered_data_files[0] for i, d in enumerate(datasets[1:], 1): new_fullness = iutils.calculate_fullness(d, geo_obj_mask) if new_fullness > fullness: fullness = new_fullness best_file = filtered_data_files[i] final_data_files.append(best_file) already_analyzed_days.append(day) files_to_del = [f for f in data_files if f not in final_data_files] for f in files_to_del: os.remove(f) logger.info(f'Best day is only kept in {int_data_dir_path}.')