Esempio n. 1
0
    def npy_to_dat(npy_path, dat_path):
        """
            Transform data from .npy to .dat

            npy_path - can be a regular path.
            dat_path - can be a regular path or a directory.
        """
        if os.path.isfile(npy_path):
            iutils.guard(
                npy_path.split('.')[-1] == 'npy',
                'npy_path should have .npy ext')
            d = np.load(npy_path)

            if dat_path.split('.')[-1] == 'dat':
                # dat_path is a regular path

                octave.gwrite(dat_path, d)
            else:
                # dat_path is a directory
                os.makedirs(dat_path, exist_ok=True)
                dat_path = os.path.join(dat_path, f'{Path(npy_path).stem}.dat')
                octave.gwrite(dat_path, d)
            logger.info(
                f'{Path(dat_path).stem}.dat is created here: {dat_path}')
        elif os.path.isdir(npy_path):
            raise Exception('npy_to_dat for directories is not implemented')
        else:
            raise Exception(
                'npy_path should be either directory or regular file')
Esempio n. 2
0
    def touch_timeline(self):
        """
            Touch timeline tensor in interpolated

            It is supposed that times are included in filenames
            of interpolated in format *YYYYDDD*
        """
        int_data_dir_path = self.get_interpolated_path()
        iutils.guard(os.path.isdir(int_data_dir_path),
                     'Run touch_interpolated_data() before this.')

        filenames = [
            f for f in iutils.ls(int_data_dir_path) if
            self.unified_tensor_stem not in f and self.timeline_stem not in f
        ]
        timeline = []
        for f in filenames:
            m = re.search(r'\d{4}(\d{3})', f)
            if m:
                timeline.append(m.group(1))

        timeline = np.array([timeline], dtype=np.float)

        timeline_path = self.get_timeline_path(extension='npy')
        np.save(timeline_path, timeline)
        logger.info(f'timeline is created here: {timeline_path}')
Esempio n. 3
0
    def touch_unified_tensor(self, move_new_axis_to_end):
        """
            Unify all files from interpolated in 1 tensor and put it
            in the same directory as unified.npy
        """
        int_data_dir_path = self.get_interpolated_path()
        iutils.guard(os.path.isdir(int_data_dir_path),
                     'Run touch_interpolated_data() before this.')

        data_files = [
            f for f in iutils.ls(int_data_dir_path) if
            self.unified_tensor_stem not in f and self.timeline_stem not in f
        ]
        iutils.guard(all([f.split('.')[-1] == 'npy' for f in data_files]),
                     'Files in dir_path should have .npy ext')

        unified_tensor = []
        for f in data_files:
            d = np.load(f)
            unified_tensor.append(d)
        unified_tensor = np.array(unified_tensor)

        if move_new_axis_to_end:
            unified_tensor = np.moveaxis(unified_tensor, 0, -1)

        unified_tensor_path = self.get_unified_tensor_path(extension='npy')
        np.save(unified_tensor_path, unified_tensor)
        logger.info(f'unified_tensor is created here: {unified_tensor_path}')
Esempio n. 4
0
    def read_raw_data_files(self):
        data_files = iutils.ls(self.raw_data_dir)
        iutils.guard(all(d.split('.')[-1] == 'nc' for d in data_files),
                     'NetCDF format is only supported format')

        for raw_data_file in data_files:
            ds = nc.Dataset(raw_data_file, mode='r')

            nav_group = ds.groups['navigation_data']
            # Initially data is masked
            lons = nav_group.variables['longitude'][:]
            # I unmask it for further simpler usage
            lons = np.ma.getdata(lons)
            lats = nav_group.variables['latitude'][:]
            lats = np.ma.getdata(lats)

            geo_group = ds.groups['geophysical_data']
            inv_obj = geo_group.variables[self.investigated_obj][:]

            # Initially mask consists of: False - lake, True - land
            # I want: True - lake, False - land
            inv_obj_mask = np.invert(inv_obj.mask)

            # Unmask and place nan in land's points
            inv_obj.fill_value = np.nan
            inv_obj = inv_obj.filled()

            yield lons, lats, inv_obj, inv_obj_mask, raw_data_file
Esempio n. 5
0
    def preserve_day_range_only(self, day_range):
        data_files = iutils.ls(self.raw_data_dir)
        iutils.guard(all(d.split('.')[-1] == 'nc' for d in data_files),
                     'NetCDF format is only supported format')

        final_data_files = []
        for day in day_range:
            #  Choose all files for specific day
            r_compiler = re.compile(
                f'^{self.raw_data_dir}/' + r'[a-z]*\d{4}' + f'{day:03d}', re.I)
            filtered_data_files = list(filter(r_compiler.match, data_files))

            final_data_files.extend(filtered_data_files)

        files_to_del = [f for f in data_files if f not in final_data_files]

        for f in files_to_del:
            os.remove(f)

        logger.info(
            f'Day range: {day_range} is only kept in {self.raw_data_dir}.')
Esempio n. 6
0
    def preserve_best_day_only(self):
        """
            Preserves the best matrix for one day.

            Filenames in interpolated should be in format *YYYYDDD*.
            .npy extension is only supported.
        """
        static_grid_dir_path = self.get_static_grid_path()
        iutils.guard(os.path.isdir(static_grid_dir_path),
                     'Run touch_static_grid() before this.')

        int_data_dir_path = self.get_interpolated_path()
        iutils.guard(os.path.isdir(int_data_dir_path),
                     'Run touch_interpolated_data() before this.')

        mask_path = self.get_static_grid_mask_path()
        geo_obj_mask = np.load(mask_path)

        data_files = [
            f for f in iutils.ls(int_data_dir_path) if
            self.unified_tensor_stem not in f and self.timeline_stem not in f
        ]
        iutils.guard(
            all([f.split('.')[-1] == 'npy' for f in data_files]),
            'Interpolated chunks in interpolated/ should have .npy ext')

        final_data_files = []
        already_analyzed_days = []
        for f in data_files:
            # Pull day from filename
            day = int(
                re.search(f'^{int_data_dir_path}/' + r'[a-z]*\d{4}(\d{3})', f,
                          re.I).group(1))
            if day in already_analyzed_days:
                continue
            #  Choose all files for this specific day
            r_compiler = re.compile(
                f'^{int_data_dir_path}/' + r'[a-z]*\d{4}' + f'{day:03d}', re.I)
            filtered_data_files = list(filter(r_compiler.match, data_files))

            datasets = [np.load(f) for f in filtered_data_files]

            fullness, best_file = iutils.calculate_fullness(
                datasets[0], geo_obj_mask), filtered_data_files[0]
            for i, d in enumerate(datasets[1:], 1):
                new_fullness = iutils.calculate_fullness(d, geo_obj_mask)
                if new_fullness > fullness:
                    fullness = new_fullness
                    best_file = filtered_data_files[i]

            final_data_files.append(best_file)
            already_analyzed_days.append(day)

        files_to_del = [f for f in data_files if f not in final_data_files]

        for f in files_to_del:
            os.remove(f)

        logger.info(f'Best day is only kept in {int_data_dir_path}.')