Esempio n. 1
0
def get_lats_lon(h5_data: h5py.File, h5_size: int):
    idx, lats, lons = 0, None, None
    while (lats is None or lons is None) and idx < h5_size:
        lats, lons = utils.fetch_hdf5_sample("lat", h5_data,
                                             idx), utils.fetch_hdf5_sample(
                                                 "lon", h5_data, idx)
        idx += 1
    return lats, lons
def get_lats_longs_goes13_coords(catalog):
    goes13_ds = 'hdf5_16bit_path'
    lats = None
    lons = None

    all_paths = list(catalog.groupby(goes13_ds).groups.keys())
    i = 0
    while lats is None and lons is None and i < len(all_paths):
        with h5py.File(all_paths[i], "r") as h5_data:
            offset = np.random.randint(
                0, h5_data.attrs["global_dataframe_end_idx"] -
                h5_data.attrs["global_dataframe_start_idx"])
            lats, lons = \
                utils.fetch_hdf5_sample("lat", h5_data, offset), utils.fetch_hdf5_sample("lon", h5_data, offset)
        i = i + 1

    assert lats is not None and lons is not None, 'No latitude and longitude values were found'
    return lats, lons
    def get_image(self, timestamp: datetime, station_coords: tuple):
        path_offset = self.metadata.get_path(timestamp)
        if path_offset is None:
            #logger.warning(f'{timestamp} is unavailable. Returning empty image.')
            return np.zeros((self.image_size, self.image_size, 5))
        path, offset = path_offset
        h5_data = h5py.File(path, "r")

        # Get latitude & longitude stored in the file
        lats, lons = utils.fetch_hdf5_sample("lat", h5_data,
                                             offset), utils.fetch_hdf5_sample(
                                                 "lon", h5_data, offset)
        if lats is None or lons is None:
            #logger.warning(f'{timestamp} is unavailable. Returning empty image.')
            return np.zeros((self.image_size, self.image_size, 5))

        # Get data from the 5 channels
        images = []
        for channel in ('ch1', 'ch2', 'ch3', 'ch4', 'ch6'):
            img = utils.fetch_hdf5_sample(channel, h5_data, offset)
            if type(img) is np.ndarray:
                images.append(img)
            else:
                #logger.warning(f'Channel "{channel}" is not available at date {timestamp}, it will be zeros.')
                images.append(np.zeros((self.image_size, self.image_size)))

        # Crop image
        pixel_coords = (np.argmin(np.abs(lats - station_coords[0])),
                        np.argmin(np.abs(lons - station_coords[1])))
        pixels = self.image_size // 2
        adjustement = self.image_size % 2  # Adjustement if image_size is odd
        cropped_images = []
        for img, mean, std in zip(images, data.images_mean.values(),
                                  data.images_std.values()):
            img = (img - mean) / std  # Normalize image
            cropped_images.append(
                img[pixel_coords[0] - pixels:pixel_coords[0] + pixels +
                    adjustement, pixel_coords[1] - pixels:pixel_coords[1] +
                    pixels + adjustement])
        return np.moveaxis(np.array(cropped_images), 0, -1)
def fetch_frames(datetimes, frames_df, channels, seqs):
    output = np.empty(
        (len(datetimes), seqs, len(channels), IMAGE_HEIGHT, IMAGE_WIDTH))

    paths_groups = frames_df.groupby('path', sort=False)

    for name, group in paths_groups:
        with h5py.File(name, "r") as h5_data:
            for index, row in group.iterrows():
                position = row['position']
                frame = row['offset']
                for c, channel in enumerate(channels):
                    channel_idx_data = utils.fetch_hdf5_sample(
                        channel, h5_data, frame)
                    if channel_idx_data is None or channel_idx_data.shape != (
                            IMAGE_HEIGHT, IMAGE_WIDTH):
                        output[position[0], position[1], c] = 0
                    else:
                        output[position[0], position[1],
                               c] = tf.keras.utils.normalize(channel_idx_data)

    return output
Esempio n. 5
0
    def get_full_image(image_data):
        all_channels = np.empty(
            [cropped_img_size, cropped_img_size,
             len(channels)])
        for ch_idx, channel in enumerate(channels):
            raw_img = utils.fetch_hdf5_sample(channel, image_data,
                                              image_time_offset_idx)
            if raw_img is None or raw_img.shape != (650, 1500):
                return None
            try:
                array_cropped = utils.crop(copy.deepcopy(raw_img),
                                           station_pixel_coords,
                                           cropped_img_size)
            except:
                return None
            # raw_data[array_idx, station_idx, channel_idx, ...] = cv.flip(array_cropped, 0) # TODO why the flip??

            #array = (((array.astype(np.float32) - norm_min) / (norm_max - norm_min)) * 255).astype(np.uint8) # TODO norm?
            array_cropped = array_cropped.astype(
                np.float64)  # convert to image format
            all_channels[:, :, ch_idx] = array_cropped

        return all_channels
Esempio n. 6
0
    def generator(self, data):
        lats = None
        lons = None
        c, path = data

        try:
            if os.path.isfile(path):
                with h5py.File(path, "r") as h5_data:
                    file_data = []
                    for channel in ['ch1', 'ch2', 'ch3', 'ch4', 'ch6']:
                        ch_images = []
                        for offset in range(96):
                            channel_idx_data = utils.fetch_hdf5_sample(
                                channel, h5_data, offset)
                            if channel_idx_data is None or channel_idx_data.shape != (
                                    IMAGE_HEIGHT, IMAGE_WIDTH):
                                ch_images.append([
                                    np.zeros((self.config.crop_size,
                                              self.config.crop_size))
                                ] * len(self.stations))
                            else:
                                if not self.stations_px_center:
                                    i = 0
                                    while lats is None or lons is None:
                                        lats = utils.fetch_hdf5_sample(
                                            "lat", h5_data, i)
                                        lons = utils.fetch_hdf5_sample(
                                            "lon", h5_data, i)
                                        i += 1

                                    if lats is None or lons is None:
                                        continue

                                    def red_coords_to_px(aggre, s):
                                        lat, lon, _ = self.stations[s]
                                        px_lat = len(lats) * (
                                            (lat - lats[0]) /
                                            (lats[-1] - lats[0]))
                                        px_lon = len(lons) * (
                                            (lon - lons[0]) /
                                            (lons[-1] - lons[0]))

                                        del lat
                                        del lon

                                        aggre[s] = (int(px_lat), int(px_lon))
                                        return aggre

                                    self.stations_px_center = functools.reduce(
                                        red_coords_to_px, self.stations, {})

                                    del lats
                                    del lons
                                    gc.collect()

                                def crop(s):
                                    center = self.stations_px_center[s]
                                    px_x_ = center[0] - self.px_offset
                                    px_x = center[0] + self.px_offset
                                    px_y_ = center[1] - self.px_offset
                                    px_y = center[1] + self.px_offset
                                    crop = channel_idx_data[px_x_:px_x,
                                                            px_y_:px_y].copy()
                                    return crop

                                ch_images.append(
                                    list(map(crop, self.stations_px_center)))

                        yield ch_images
        finally:
            del ch_images
            gc.collect()
Esempio n. 7
0
def create_crops(args: tuple):
    """Function executed by multiple threads to create crops around stations 
        and save them to disk as pickle.
    
    Arguments:
        args {tuple} -- dest, paths, image_size, thread_nb
    """
    dest, paths, image_size, thread_nb = args
    # Iterate over all the existing timestamps
    open_path = None
    curr_date = None
    for time, path, offset in tqdm(paths,
                                   position=thread_nb,
                                   desc=f'Thread {thread_nb}',
                                   leave=False):
        # Open hdf5 file if it is not already opened
        if open_path != path:
            if open_path != None:
                h5_data.close()
            h5_data = h5py.File(path, "r")
            open_path = path

        # Save cropped images
        if curr_date != time.date():
            if curr_date is not None:
                with open(os.path.join(dest,
                                       str(curr_date) + '.pkl'), 'wb') as f:
                    pickle.dump(cropped_images_day, f)
            cropped_images_day = {}
            curr_date = time.date()

        # Get latitude & longitude stored in the file
        lats, lons = utils.fetch_hdf5_sample("lat", h5_data,
                                             offset), utils.fetch_hdf5_sample(
                                                 "lon", h5_data, offset)
        if lats is None or lons is None:
            logger.warning(
                f'latlong of date {time} is unavailable, skipping...')
            continue

        # Get data from the 5 channels
        images = []
        for channel in ('ch1', 'ch2', 'ch3', 'ch4', 'ch6'):
            img = utils.fetch_hdf5_sample(channel, h5_data, offset)
            if type(img) is np.ndarray:
                images.append(img)
        if len(images) < 5:
            logger.warning(
                f'{5-len(images)} channels are not available at date {index}, skipping...'
            )
            continue

        # Crop stations
        cropped_images_stations = {}
        for station_name, station_coords in stations.items():
            pixel_coords = (np.argmin(np.abs(lats - station_coords[0])),
                            np.argmin(np.abs(lons - station_coords[1])))

            # Crop the images with the station centered
            pixels = image_size // 2
            adjustement = image_size % 2  # Adjustement if image_size is odd
            cropped_images = []
            for img, mean, std in zip(images, images_mean.values(),
                                      images_std.values()):
                # TODO : Check if the slice is out of bounds
                img = (img - mean) / std  # Normalize image
                cropped_images.append(
                    img[pixel_coords[0] - pixels:pixel_coords[0] + pixels +
                        adjustement, pixel_coords[1] - pixels:pixel_coords[1] +
                        pixels + adjustement])
            cropped_images_stations[station_name] = np.moveaxis(
                np.array(cropped_images), 0, -1)
        cropped_images_day[time] = cropped_images_stations

    # Save the last day
    if len(cropped_images_day.keys()) > 0:
        with open(os.path.join(dest, str(time.date()) + '.pkl'), 'wb') as f:
            pickle.dump(cropped_images_day, f)