Beispiel #1
0
    def test_load_metadata_datatime(self):
        loader = MetadataLoader(CATALOG_PATH)

        metadata = loader.load(A_STATION, A_STATION_COORDINATE)

        expected_data = datetime(2010, 1, 1, 8, 0, 0, 0)
        self.assertEqual(expected_data, next(metadata).datetime)
Beispiel #2
0
 def test_load_metadata_image_offset_with_no_compression(self):
     loader = MetadataLoader(CATALOG_PATH)
     metadata = loader.load(A_STATION,
                            A_STATION_COORDINATE,
                            compression=None,
                            night_time=False)
     actual = next(metadata).image_offsets[0]
     self.assertAlmostEqual(actual, 0)
Beispiel #3
0
    def test_givenTargetDatetimes_whenLoad_shouldLoadMetadataInOrder(self):
        loader = MetadataLoader(CATALOG_PATH)

        metadata = loader.load(Station.BND,
                               A_STATION_COORDINATE,
                               target_datetimes=SOME_TARGET_DATETIMES)

        for md, expected_datetime in zip(metadata, SOME_TARGET_DATETIMES):
            self.assertEqual(md.datetime, expected_datetime)
Beispiel #4
0
    def test_load_metadata_coodinates(self):
        loader = MetadataLoader(CATALOG_PATH)
        station = Station.BND
        coordinates = Coordinates(*STATION_COORDINATES[station])

        metadata = loader.load(station, coordinates, night_time=False)

        actual_coordinates = next(metadata).coordinates
        self.assertEqual(coordinates, actual_coordinates)
Beispiel #5
0
    def test_load_metadata_target_cloudiness_6hour(self):
        loader = MetadataLoader(CATALOG_PATH)
        station_with_target = Station.BND
        target_6h = "variable"

        metadata = loader.load(station_with_target, A_STATION_COORDINATE)

        actual_target_6h = next(metadata).target_cloudiness_6h
        self.assertEqual(target_6h, actual_target_6h)
Beispiel #6
0
    def test_load_metadata_target_cloudiness(self):
        loader = MetadataLoader(CATALOG_PATH)
        station_with_target = Station.BND
        target = "night"

        metadata = loader.load(station_with_target, A_STATION_COORDINATE)

        actual_target: Any = next(metadata).target_cloudiness
        self.assertAlmostEqual(target, actual_target)
Beispiel #7
0
    def test_load_metadata_target_ghi_6hour(self):
        loader = MetadataLoader(CATALOG_PATH)
        station_with_target = Station.BND
        target_6h = 29.10666666666667

        metadata = loader.load(station_with_target, A_STATION_COORDINATE)

        actual_target_6h: Any = next(metadata).target_ghi_6h
        self.assertAlmostEqual(target_6h, actual_target_6h)
Beispiel #8
0
    def test_load_metadata_target_ghi_(self):
        loader = MetadataLoader(CATALOG_PATH)
        station_with_target = Station.BND
        target = -3.986666666666666

        metadata = loader.load(station_with_target, A_STATION_COORDINATE)

        actual_target: Any = next(metadata).target_ghi
        self.assertAlmostEqual(target, actual_target)
Beispiel #9
0
    def test_load_metadata_image_path_with_16bit_compression(self):
        loader = MetadataLoader(CATALOG_PATH)

        metadata = loader.load(A_STATION,
                               A_STATION_COORDINATE,
                               compression="16bit")

        first_image_path = next(metadata).image_paths[0]
        self.assertTrue("16bit" in first_image_path)
Beispiel #10
0
 def test_load_metadata_with_specified_dataframe(self):
     dummy_catalog = pickle.load(
         open("tests/data/samples/catalog-test.pkl", "rb"))
     loader = MetadataLoader(file_name=None, dataframe=dummy_catalog)
     station_with_target = Station.BND
     target_6h = 29.10666666666667
     metadata = loader.load(station_with_target, A_STATION_COORDINATE)
     actual_target_6h: Any = next(metadata).target_ghi_6h
     self.assertAlmostEqual(target_6h, actual_target_6h)
def prepare_dataloader(
    dataframe: pd.DataFrame,
    target_datetimes: typing.List[datetime.datetime],
    station: str,
    coordinates: typing.Tuple[float, float, float],
    target_time_offsets: typing.List[datetime.timedelta],
    config: dataloader.DataloaderConfig,
) -> tf.data.Dataset:
    """Output data.

    Note that you can use either the netCDF or HDF5 data. Each iteration over your data loader should return a
    2-element tuple containing the tensor that should be provided to the model as input, and the target values. In
    this specific case, you will not be able to provide the latter since the dataframe contains no GHI, and we are
    only interested in predictions, not training. Therefore, you must return a placeholder (or ``None``) as the second
    tuple element.
    Reminder: the dataframe contains imagery paths for every possible timestamp requested in ``target_datetimes``.
    However, we expect that you will use some of the "past" imagery (i.e. imagery at T<=0) for any T in
    ``target_datetimes``, but you should NEVER rely on "future" imagery to generate predictions (for T>0). We
    will be inspecting data loader implementations to ensure this is the case, and those who "cheat" will be
    dramatically penalized.
    See https://github.com/mila-iqia/ift6759/tree/master/projects/project1/evaluation.md for more information.
    Args:
        dataframe: a pandas dataframe that provides the netCDF file path (or HDF5 file path and offset) for all
            relevant timestamp values over the test period.
        target_datetimes: a list of timestamps that your data loader should use to provide imagery for your model.
            The ordering of this list is important, as each element corresponds to a sequence of GHI values
            to predict. By definition, the GHI values must be provided for the offsets given by ``target_time_offsets``
            which are added to each timestamp (T=0) in this datetimes list.
        station: station name of interest
        coordinates: station's coordinates (latitude, longitude, elevation).
            During evaluation time, it will only be one station to avoid confusions.
            See comment on function `generate_all_predictions` with the for loop.
        target_time_offsets: the list of timedeltas to predict GHIs for (by definition: [T=0, T+1h, T+3h, T+6h]).
        config: configuration for the dataloader.

    Returns:
        A ``tf.data.Dataset`` object that can be used to produce input tensors for your model. One tensor
        must correspond to one sequence of past imagery data. The tensors must be generated in the order given
        by ``target_sequences``.

    """

    logger.info(f"Prepare dataloader for station {station} and config {config}")
    metadata_loader = MetadataLoader(dataframe=dataframe, training=False)
    metadata_generator = metadata_loader.load(
        Station(station),
        Coordinates(coordinates[0], coordinates[1], coordinates[2]),
        target_datetimes=target_datetimes,
        skip_missing=False,
        num_images=config.num_images,
        time_interval_min=config.time_interval_min,
    )

    return dataloader.create_dataset(
        lambda: metadata_generator, config=config, enable_image_cache=False,
    )
Beispiel #12
0
    def test_load_metadata_with_night_time(self):
        loader = MetadataLoader(CATALOG_PATH)

        metadata = loader.load(A_STATION,
                               A_STATION_COORDINATE,
                               night_time=True)

        num_nigh_time = self._night_time(metadata)
        self.assertEqual(NUM_METADATA - NUM_METADATA_BND_DAY_TIME,
                         num_nigh_time)
Beispiel #13
0
    def test_givenTargetDatetimes_whenLoad_shouldLoadSameAmountOfMetadata(
            self):
        loader = MetadataLoader(CATALOG_PATH)

        metadata = loader.load(Station.BND,
                               A_STATION_COORDINATE,
                               target_datetimes=SOME_TARGET_DATETIMES)

        self.assertEqual(self._num_metadata(metadata),
                         len(SOME_TARGET_DATETIMES))
Beispiel #14
0
    def __init__(self):
        loader = MetadataLoader(CATALOG_PATH)

        config = default_config()
        config.error_strategy = dataloader.ErrorStrategy.ignore
        config.features = [dataloader.Feature.target_ghi]

        self.dataset = dataloader.create_dataset(
            lambda: loader.load(STATION, COORDINATES, skip_missing=False),
            config=config)
Beispiel #15
0
    def test_load_metadata_image_offset_with_16bit_compression(self):
        loader = MetadataLoader(CATALOG_PATH)

        metadata = loader.load(A_STATION,
                               A_STATION_COORDINATE,
                               compression="16bit",
                               night_time=False)
        actual = next(metadata)
        while actual.night_time:
            actual = next(metadata)

        self.assertAlmostEqual(actual.image_offsets[0], 22)
Beispiel #16
0
    def test_load_metadata_target_datetimes(self):
        loader = MetadataLoader(CATALOG_PATH)
        target_datetimes = [
            datetime(2010, 6, 19, 22,
                     15),  # Only test timestamp that have images.
            datetime(2012, 3, 24, 12),
            datetime(2015, 9, 21, 21, 15),
            datetime(2012, 7, 6, 18),
            datetime(2014, 7, 13),
            datetime(2010, 8, 31, 20, 45),
            datetime(2015, 4, 16, 12, 45),
            datetime(2013, 4, 17, 16),
            datetime(2012, 8, 15),
            datetime(2010, 11, 14, 19, 15),
            datetime(2014, 7, 21, 14, 30),
            datetime(2011, 11, 22, 17, 30),
            datetime(2010, 8, 15, 23),
            datetime(2010, 5, 11, 19),
            datetime(2013, 2, 15, 14, 15),
            datetime(2011, 2, 8, 17, 45),
        ]
        target_offsets = [
            57,
            16,
            53,
            40,
            64,
            51,
            19,
            32,
            64,
            45,
            26,
            38,
            60,
            44,
            25,
            39,
        ]

        metadata = loader.load(
            A_STATION,
            A_STATION_COORDINATE,
            night_time=True,
            target_datetimes=target_datetimes,
        )
        i = 0
        for datapoint in metadata:
            self.assertIsInstance(datapoint.image_offsets[0], int)
            self.assertEqual(datapoint.image_offsets[0], target_offsets[i])
            i = i + 1
        self.assertEqual(len(target_datetimes), i)
Beispiel #17
0
    def test_givenNumImagesAndTimeInterval_whenLoad_shouldReturnCorrectClearskyValues(
        self, ):
        loader = MetadataLoader(CATALOG_PATH)
        num_images = 5
        num_clearsky = 4
        metadata = loader.load(
            Station.BND,
            A_STATION_COORDINATE,
            num_images=num_images,
        )

        for i in range(1, num_images + 1):
            mt = next(metadata)
            self.assertEqual(num_images, len(mt.clearsky_values))
            self.assertEqual(num_clearsky, len(mt.clearsky_values[0]))
Beispiel #18
0
    def test_load_metadata_compression(self):
        loader = MetadataLoader(CATALOG_PATH)

        metadata = loader.load(A_STATION,
                               A_STATION_COORDINATE,
                               night_time=True,
                               compression="8bit")
        actual: Any = next(metadata).image_compression
        self.assertEqual(actual, "8bit")

        metadata = loader.load(A_STATION,
                               A_STATION_COORDINATE,
                               night_time=True,
                               compression="16bit")
        actual: Any = next(metadata).image_compression
        self.assertEqual(actual, "16bit")
Beispiel #19
0
    def test_givenNumImagesAndTimeInterval_whenLoad_shouldReturnCorrectOffsets(
            self):
        loader = MetadataLoader(CATALOG_PATH)
        num_images = 5
        time_interval_min = 15

        metadata = loader.load(
            Station.BND,
            A_STATION_COORDINATE,
            num_images=num_images,
            time_interval_min=time_interval_min,
        )

        for i in range(1, num_images + 1):
            expected_offset = (num_images - i) * [0] + list(range(i))
            mt = next(metadata)
            self.assertEqual(expected_offset, mt.image_offsets)
Beispiel #20
0
class MetadataPerf(object):
    def __init__(self):
        self.loader = MetadataLoader(CATALOG_PATH)

    def run(self):
        metadata = self.loader.load(STATION, COORDINATES, skip_missing=False)
        for i, m in enumerate(metadata):
            if i % 100 == 0:
                print(f"Loaded {i} metadata")
Beispiel #21
0
    def test_givenNumImagesAndTimeInterval_whenLoad_shouldReturnCorrectPaths(
            self):
        loader = MetadataLoader(CATALOG_PATH)
        num_images = 5
        first_day_image_path = (
            "/project/cq-training-1/project1/data/hdf5v7_8bit/2010.01.01.0800.h5"
        )

        metadata = loader.load(
            Station.BND,
            A_STATION_COORDINATE,
            num_images=num_images,
        )

        for i in range(1, num_images + 1):
            expected_path = (num_images - i) * ["/unknow/path"
                                                ] + i * [first_day_image_path]
            mt = next(metadata)
            self.assertEqual(expected_path, mt.image_paths)
    def test_load_metadata(self):
        metadata_loader = MetadataLoader(file_name=CATALOG_PATH)
        timestamps = metadata_loader.catalog.index.tolist()
        datetimes = [timestamp.to_pydatetime() for timestamp in timestamps]

        metadata = train.metadata_station(metadata_loader, datetimes, 1,
                                          IMAGE_INTERVAL_MIN)

        for m in metadata():
            self.assertTrue(isinstance(m, Metadata))
            break
Beispiel #23
0
def load_data(
    file_name=None,
    batch_size=64,
    night_time=False,
    skip_missing=True,
    config=default_config(),
    skip_non_cached=False,
) -> Tuple[tf.data.Dataset, tf.data.Dataset, tf.data.Dataset]:
    """Load train, valid and test datasets.

    Return: (train_dataset, valid_dataset, test_dataset)
    """
    if file_name is None:
        file_name = env.get_catalog_path()
    if env.run_local:
        config.local_path = env.get_local_data_path() + "/hdf5v7_8bit"

    # Both concepts are equivalent. If we force caching, we need to skip non cached images.
    config.force_caching = skip_non_cached

    train_datetimes, valid_datetimes, test_datetimes = split.load()

    random.shuffle(train_datetimes)
    random.shuffle(valid_datetimes)
    random.shuffle(test_datetimes)

    ratio_train_datetimes = int(len(train_datetimes) * config.ratio)
    ratio_valid_datetimes = int(len(valid_datetimes) * config.ratio)
    ratio_test_datetimes = int(len(test_datetimes) * config.ratio)

    logger.info(f"Loading {config.ratio*100}% of the data")
    logger.info(f"Training dataset has {ratio_train_datetimes} datetimes")
    logger.info(f"Validation dataset has {ratio_valid_datetimes} datetimes")
    logger.info(f"Test dataset has {ratio_test_datetimes} datetimes")
    logger.info(f"Using {len(STATION_COORDINATES)} stations")

    train_datetimes = train_datetimes[:ratio_train_datetimes]
    valid_datetimes = valid_datetimes[:ratio_valid_datetimes]
    test_datetimes = test_datetimes[:ratio_test_datetimes]

    if dataloader.Feature.metadata in config.features:
        config.precompute_clearsky = True
        target_datetimes = train_datetimes + valid_datetimes + test_datetimes
        config.target_datetimes = target_datetimes
        config.stations = STATION_COORDINATES

    metadata_loader = MetadataLoader(file_name=file_name)
    metadata_train = metadata_station(
        metadata_loader,
        train_datetimes,
        config.num_images,
        config.time_interval_min,
        night_time=night_time,
        skip_missing=skip_missing,
    )
    metadata_valid = metadata_station(
        metadata_loader,
        valid_datetimes,
        config.num_images,
        config.time_interval_min,
        night_time=night_time,
        skip_missing=skip_missing,
    )
    metadata_test = metadata_station(
        metadata_loader,
        test_datetimes,
        config.num_images,
        config.time_interval_min,
        night_time=night_time,
        skip_missing=skip_missing,
    )

    dataset_train = dataloader.create_dataset(metadata_train, config,
                                              train_datetimes,
                                              STATION_COORDINATES)
    dataset_valid = dataloader.create_dataset(metadata_valid, config,
                                              valid_datetimes,
                                              STATION_COORDINATES)
    dataset_test = dataloader.create_dataset(metadata_test, config,
                                             test_datetimes,
                                             STATION_COORDINATES)

    logger.info("Loaded datasets.")
    return dataset_train, dataset_valid, dataset_test
Beispiel #24
0
 def __init__(self):
     self.loader = MetadataLoader(CATALOG_PATH)