Esempio n. 1
0
def prepend_dataset_with_weather(samples,
                                 location='Fresno, CA',
                                 weather_columns=None,
                                 use_cache=True,
                                 verbosity=0):
    """ Prepend weather the values specified (e.g. Max TempF) to the samples[0..N]['input'] vectors

    samples[0..N]['target'] should have an index with the date timestamp

    If you use_cache for the curent year, you may not get the most recent data.

    Arguments:
        samples (list of dict): {'input': np.array(), 'target': pandas.DataFrame}
    """
    if verbosity > 1:
        print('Prepending weather data for {} to dataset samples'.format(
            weather_columns))
    if not weather_columns:
        return samples
    timestamps = pd.DatetimeIndex([s['target'].index[0] for s in samples])
    years = range(timestamps.min().date().year,
                  timestamps.max().date().year + 1)
    weather_df = weather.daily(location=location,
                               years=years,
                               use_cache=use_cache)
    # FIXME: weather_df.resample('D') fails
    weather_df.index = [d.date() for d in weather_df.index]
    if verbosity > 1:
        print('Retrieved weather for years {}:'.format(years))
        print(weather_df)
    weather_columns = [
        label
        if label in weather_df.columns else weather_df.columns[int(label)]
        for label in (weather_columns or [])
    ]
    for sampnum, sample in enumerate(samples):
        timestamp = timestamps[sampnum]
        try:
            weather_day = weather_df.loc[timestamp.date()]
        except:
            from traceback import print_exc
            print_exc()
            weather_day = {}
            if verbosity >= 0:
                warnings.warn('Unable to find weather for the date {}'.format(
                    timestamp.date()))
        NaN = float('NaN')
        sample['input'] = [
            weather_day.get(label, None) for label in weather_columns
        ] + list(sample['input'])
        if verbosity > 0 and NaN in sample['input']:
            warnings.warn(
                'Unable to find weather features {} in the weather for date {}'
                .format([
                    label for i, label in enumerate(weather_columns)
                    if sample['input'][i] == NaN
                ], timestamp))
    return samples
Esempio n. 2
0
def oneday_weather_forecast(
    location="Portland, OR",
    inputs=(
        "Min Temperature",
        "Mean Temperature",
        "Max Temperature",
        "Max Humidity",
        "Mean Humidity",
        "Min Humidity",
        "Max Sea Level Pressure",
        "Mean Sea Level Pressure",
        "Min Sea Level Pressure",
        "Wind Direction",
    ),
    outputs=("Min Temperature", "Mean Temperature", "Max Temperature", "Max Humidity"),
    date=None,
    epochs=200,
    delays=(1, 2, 3, 4),
    num_years=4,
    use_cache=False,
    verbosity=1,
):
    """ Provide a weather forecast for tomorrow based on historical weather at that location """
    date = make_date(date or datetime.datetime.now().date())
    num_years = int(num_years or 10)
    years = range(date.year - num_years, date.year + 1)
    df = weather.daily(location, years=years, use_cache=use_cache, verbosity=verbosity).sort()
    # because up-to-date weather history was cached above, can use that cache, regardless of use_cache kwarg
    trainer, df = train_weather_predictor(
        location,
        years=years,
        delays=delays,
        inputs=inputs,
        outputs=outputs,
        epochs=epochs,
        verbosity=verbosity,
        use_cache=True,
    )
    nn = trainer.module
    forecast = {"trainer": trainer}

    yesterday = dict(zip(outputs, nn.activate(trainer.ds["input"][-2])))
    forecast["yesterday"] = update_dict(yesterday, {"date": df.index[-2].date()})

    today = dict(zip(outputs, nn.activate(trainer.ds["input"][-1])))
    forecast["today"] = update_dict(today, {"date": df.index[-1].date()})

    ds = util.input_dataset_from_dataframe(
        df[-max(delays) :], delays=delays, inputs=inputs, normalize=False, verbosity=0
    )
    tomorrow = dict(zip(outputs, nn.activate(ds["input"][-1])))
    forecast["tomorrow"] = update_dict(tomorrow, {"date": (df.index[-1] + datetime.timedelta(1)).date()})

    return forecast
Esempio n. 3
0
def train_weather_predictor(
        location='Portland, OR',
        years=range(2013, 2016,),
        delays=(1, 2, 3),
        inputs=('Min Temperature', 'Max Temperature', 'Min Sea Level Pressure', u'Max Sea Level Pressure', 'WindDirDegrees',),
        outputs=(u'Max TemperatureF',),
        N_hidden=6,
        epochs=30,
        use_cache=False,
        verbosity=2,
        ):
    """Train a neural nerual net to predict the weather for tomorrow based on past weather.

    Builds a linear single hidden layer neural net (multi-dimensional nonlinear regression).
    The dataset is a basic SupervisedDataSet rather than a SequentialDataSet, so the training set
    and the test set are sampled randomly. This means that historical data for one sample (the delayed
    input vector) will likely be used as the target for other samples.

    Uses CSVs scraped from wunderground (without an api key) to get daily weather for the years indicated.

    Arguments:
      location (str): City and state in standard US postal service format: "City, ST"
          alternatively an airport code like "PDX or LAX"
      delays (list of int): sample delays to use for the input tapped delay line.
          Positive and negative values are treated the same as sample counts into the past.
          default: [1, 2, 3], in z-transform notation: z^-1 + z^-2 + z^-3
      years (int or list of int): list of 4-digit years to download weather from wunderground
      inputs (list of int or list of str): column indices or labels for the inputs
      outputs (list of int or list of str): column indices or labels for the outputs

    Returns:
      3-tuple: tuple(dataset, list of means, list of stds)
          means and stds allow normalization of new inputs and denormalization of the outputs

    """
    df = weather.daily(location, years=years, use_cache=use_cache, verbosity=verbosity).sort()
    ds = util.dataset_from_dataframe(df, normalize=False, delays=delays, inputs=inputs, outputs=outputs, verbosity=verbosity)
    nn = util.ann_from_ds(ds, N_hidden=N_hidden, verbosity=verbosity)
    trainer = util.build_trainer(nn, ds=ds, verbosity=verbosity)
    trainer.trainEpochs(epochs)

    columns = []
    for delay in delays:
        columns += [inp + "[-{}]".format(delay) for inp in inputs]
    columns += list(outputs)

    columns += ['Predicted {}'.format(outp) for outp in outputs]
    table = [list(i) + list(t) + list(trainer.module.activate(i)) for i, t in zip(trainer.ds['input'], trainer.ds['target'])]
    df = pd.DataFrame(table, columns=columns, index=df.index[max(delays):])

    #comparison = df[[] + list(outputs)]
    return trainer, df
Esempio n. 4
0
def oneday_weather_forecast(
        location='Portland, OR',
        inputs=('Min Temperature', 'Mean Temperature', 'Max Temperature', 'Max Humidity', 'Mean Humidity', 'Min Humidity', 'Max Sea Level Pressure', 'Mean Sea Level Pressure', 'Min Sea Level Pressure', 'Wind Direction'),
        outputs=('Min Temperature', 'Mean Temperature', 'Max Temperature', 'Max Humidity'),
        date=None,
        epochs=200,
        delays=(1, 2, 3, 4),
        num_years=4,
        use_cache=False,
        verbosity=1,
        ):
    """ Provide a weather forecast for tomorrow based on historical weather at that location """
    date = make_date(date or datetime.datetime.now().date())
    num_years = int(num_years or 10)
    years = range(date.year - num_years, date.year + 1)
    df = weather.daily(location, years=years, use_cache=use_cache, verbosity=verbosity).sort()
    # because up-to-date weather history was cached above, can use that cache, regardless of use_cache kwarg
    trainer, df = train_weather_predictor(
        location,
        years=years,
        delays=delays,
        inputs=inputs,
        outputs=outputs,
        epochs=epochs,
        verbosity=verbosity,
        use_cache=True,
        )
    nn = trainer.module
    forecast = {'trainer': trainer}

    yesterday = dict(zip(outputs, nn.activate(trainer.ds['input'][-2])))
    forecast['yesterday'] = update_dict(yesterday, {'date': df.index[-2].date()})

    today = dict(zip(outputs, nn.activate(trainer.ds['input'][-1])))
    forecast['today'] = update_dict(today, {'date': df.index[-1].date()})

    ds = util.input_dataset_from_dataframe(df[-max(delays):], delays=delays, inputs=inputs, normalize=False, verbosity=0)
    tomorrow = dict(zip(outputs, nn.activate(ds['input'][-1])))
    forecast['tomorrow'] = update_dict(tomorrow, {'date': (df.index[-1] + datetime.timedelta(1)).date()})

    return forecast
Esempio n. 5
0
def prepend_dataset_with_weather(samples, location='Fresno, CA', weather_columns=None, use_cache=True, verbosity=0):
    """ Prepend weather the values specified (e.g. Max TempF) to the samples[0..N]['input'] vectors

    samples[0..N]['target'] should have an index with the date timestamp

    If you use_cache for the curent year, you may not get the most recent data.

    Arguments:
        samples (list of dict): {'input': np.array(), 'target': pandas.DataFrame}
    """
    if verbosity > 1:
        print('Prepending weather data for {} to dataset samples'.format(weather_columns))
    if not weather_columns:
        return samples
    timestamps = pd.DatetimeIndex([s['target'].index[0] for s in samples])
    years = range(timestamps.min().date().year, timestamps.max().date().year + 1)
    weather_df = weather.daily(location=location, years=years, use_cache=use_cache)
    # FIXME: weather_df.resample('D') fails
    weather_df.index = [d.date() for d in weather_df.index]
    if verbosity > 1:
        print('Retrieved weather for years {}:'.format(years))
        print(weather_df)
    weather_columns = [label if label in weather_df.columns else weather_df.columns[int(label)]
                       for label in (weather_columns or [])]
    for sampnum, sample in enumerate(samples):
        timestamp = timestamps[sampnum]
        try:
            weather_day = weather_df.loc[timestamp.date()]
        except:
            from traceback import print_exc
            print_exc()
            weather_day = {}
            if verbosity >= 0:
                warnings.warn('Unable to find weather for the date {}'.format(timestamp.date()))
        NaN = float('NaN')
        sample['input'] = [weather_day.get(label, None) for label in weather_columns] + list(sample['input'])
        if verbosity > 0 and NaN in sample['input']:
            warnings.warn('Unable to find weather features {} in the weather for date {}'.format(
                [label for i, label in enumerate(weather_columns) if sample['input'][i] == NaN], timestamp))
    return samples
Esempio n. 6
0
def train_weather_predictor(
    location="Portland, OR",
    years=range(2013, 2016),
    delays=(1, 2, 3),
    inputs=(
        "Min Temperature",
        "Max Temperature",
        "Min Sea Level Pressure",
        u"Max Sea Level Pressure",
        "WindDirDegrees",
    ),
    outputs=(u"Max TemperatureF",),
    N_hidden=6,
    epochs=30,
    use_cache=False,
    verbosity=2,
):
    """Train a neural nerual net to predict the weather for tomorrow based on past weather.

    Builds a linear single hidden layer neural net (multi-dimensional nonlinear regression).
    The dataset is a basic SupervisedDataSet rather than a SequentialDataSet, so the training set
    and the test set are sampled randomly. This means that historical data for one sample (the delayed
    input vector) will likely be used as the target for other samples.

    Uses CSVs scraped from wunderground (without an api key) to get daily weather for the years indicated.

    Arguments:
      location (str): City and state in standard US postal service format: "City, ST"
          alternatively an airport code like "PDX or LAX"
      delays (list of int): sample delays to use for the input tapped delay line.
          Positive and negative values are treated the same as sample counts into the past.
          default: [1, 2, 3], in z-transform notation: z^-1 + z^-2 + z^-3
      years (int or list of int): list of 4-digit years to download weather from wunderground
      inputs (list of int or list of str): column indices or labels for the inputs
      outputs (list of int or list of str): column indices or labels for the outputs

    Returns:
      3-tuple: tuple(dataset, list of means, list of stds)
          means and stds allow normalization of new inputs and denormalization of the outputs

    """
    df = weather.daily(location, years=years, use_cache=use_cache, verbosity=verbosity).sort()
    ds = util.dataset_from_dataframe(
        df, normalize=False, delays=delays, inputs=inputs, outputs=outputs, verbosity=verbosity
    )
    nn = util.ann_from_ds(ds, N_hidden=N_hidden, verbosity=verbosity)
    trainer = util.build_trainer(nn, ds=ds, verbosity=verbosity)
    trainer.trainEpochs(epochs)

    columns = []
    for delay in delays:
        columns += [inp + "[-{}]".format(delay) for inp in inputs]
    columns += list(outputs)

    columns += ["Predicted {}".format(outp) for outp in outputs]
    table = [
        list(i) + list(t) + list(trainer.module.activate(i)) for i, t in zip(trainer.ds["input"], trainer.ds["target"])
    ]
    df = pd.DataFrame(table, columns=columns, index=df.index[max(delays) :])

    # comparison = df[[] + list(outputs)]
    return trainer, df
Esempio n. 7
0
 def reset(self):
     EpisodicTask.reset(self)
     self.day = weather.daily(date="random")
     self.t = 0
Esempio n. 8
0
 def reset(self):
     EpisodicTask.reset(self)
     self.day = weather.daily(date='random')
     self.t = 0