Ejemplo n.º 1
0
    def create_generator():
        def batch_datetimes():
            for index in target_datetimes:
                row = dataframe.loc[index]
                if 'BND' in station:
                    yield [(index, [*station].index('BND'))]
                if 'TBL' in station:
                    yield [(index, [*station].index('TBL'))]
                if 'DRA' in station:
                    yield [(index, [*station].index('DRA'))]
                if 'FPK' in station:
                    yield [(index, [*station].index('FPK'))]
                if 'GWN' in station:
                    yield [(index, [*station].index('GWN'))]
                if 'PSU' in station:
                    yield [(index, [*station].index('PSU'))]
                if 'SXF' in station:
                    yield [(index, [*station].index('SXF'))]

        for batch in batch_datetimes():
            images = get_raw_images(dataframe, batch, config)
            clearsky = get_column_from_dataframe(dataframe, batch, station,
                                                 target_time_offsets,
                                                 'CLEARSKY_GHI', config)
            targets = get_GHI_targets(dataframe, batch, station,
                                      target_time_offsets, config)

            yield {
                'images': images,
                'clearsky': clearsky,
            }, targets
Ejemplo n.º 2
0
    def create_generator():
        if DEBUGGING:
            pydevd.settrace(suspend=False)

        def batch_datetimes():
            filtered_df = []
            while True:
                sample = dataframe.sample()
                index = sample.index[0]
                row = sample.iloc[0]

                if row['BND_DAYTIME'] == 1 and 'BND' in station:
                    filtered_df.append((index, [*station].index('BND')))
                if row['TBL_DAYTIME'] == 1 and 'TBL' in station:
                    filtered_df.append((index, [*station].index('TBL')))
                if row['DRA_DAYTIME'] == 1 and 'DRA' in station:
                    filtered_df.append((index, [*station].index('DRA')))
                if row['FPK_DAYTIME'] == 1 and 'FPK' in station:
                    filtered_df.append((index, [*station].index('FPK')))
                if row['GWN_DAYTIME'] == 1 and 'GWN' in station:
                    filtered_df.append((index, [*station].index('GWN')))
                if row['PSU_DAYTIME'] == 1 and 'PSU' in station:
                    filtered_df.append((index, [*station].index('PSU')))
                if row['SXF_DAYTIME'] == 1 and 'SXF' in station:
                    filtered_df.append((index, [*station].index('SXF')))

                if len(filtered_df) > config['batch_size']:
                    batch = filtered_df[:config['batch_size']]
                    filtered_df[config['batch_size']:]
                    yield batch

        for batch in batch_datetimes():
            images = get_preprocessed_images(dataframe, batch, config)
            clearsky = get_column_from_dataframe(dataframe, batch, station,
                                                 target_time_offsets,
                                                 'CLEARSKY_GHI', config)
            targets = get_GHI_targets(dataframe, batch, station,
                                      target_time_offsets, config)

            yield {
                'images': images,
                'clearsky': clearsky,
            }, targets
def get_column_from_dataframe(
        df: pd.DataFrame, datetimes: typing.List[datetime],
        station: typing.Dict[typing.AnyStr, typing.Tuple[float, float, float]],
        offsets: typing.List[timedelta], column: typing.AnyStr,
        config: typing.Dict[typing.AnyStr, typing.Any]) -> np.ndarray:
    """
    Get a station's GHI measurements for a list of datetimes and offsets.

    Args:
        df: metadata dataframe.
        datetimes: list of timestamps (datetime objects) to provide targets for.
        station: 1-element dictionary with format {'station': (latitude, longitude, elevation)}
        offsets: list of target time offsets (timedelta objects) (by definition: [T=0, T+1h, T+3h, T+6h]).
        column: name of column to return with seq_len past sequences.
        config: configuration dictionary holding any extra parameters that might be required for tuning purposes.
    Returns:
        A 2D NumPy array of GHI values, of size [#datetimes, #offsets + config['target_past_len'] - 1].
    """
    new_config = copy(config)
    new_config['target_name'] = column
    new_config['target_past_len'] = 1
    new_config['target_past_interval'] = config['input_past_interval']
    COLUMN = get_GHI_targets(df, datetimes, station, offsets, new_config)
    return COLUMN
Ejemplo n.º 4
0
def test_target_type(df, datetimes, station, offsets, config):
    targets = get_GHI_targets(df, datetimes, station, offsets, config)
    assert isinstance(targets, np.ndarray)
    assert all([isinstance(x, np.float64) for x in targets.flatten()])
Ejemplo n.º 5
0
def test_shape_sequence(df, datetimes, station):
    offsets = [timedelta()]
    config = {'target_name': 'CLOUDINESS', 'target_past_len': 10, 'target_past_interval': 30}
    targets = get_GHI_targets(df, datetimes, station, offsets, config)
    assert targets.shape == (len(datetimes), config['target_past_len'])
Ejemplo n.º 6
0
def test_shape_offsets(df, datetimes, station, offsets, config):
    targets = get_GHI_targets(df, datetimes, station, offsets, config)
    assert targets.shape == (len(datetimes), len(offsets))