Exemple #1
0
    def genFeatures(self, t, df, **kwargs):
        cols = self.lag_cols + [t]
        ld = self.stationId[0].isupper()
        self.ld = ld  # thread-unsafe
        self.target = t  # thread-unsafe
        if ld is True:
            print('London city')
            cols.remove('weather_clu')
        features = []
        for c in cols:
            features += [
                _ for _ in df.columns if _.find(lag_format(c, '')) != -1
            ]
        features += [
            'hour'
        ] + cols  #, 'temperature', 'pressure', 'humidity', 'wind_direction', 'wind_speed']
        if ld is False and self.target != 'O3':
            features.append('last_rain')
        features.remove(t)

        if self.target == 'O3':
            res1 = []
            for f in features:
                if f.startswith('weather_clu'):
                    res1.append('weather' + f[len('weather_clu'):])
                else:
                    res1.append(f)
            features = res1
        return features
Exemple #2
0
def _gen_from_dict(d):
    res = []
    for k, v in d.items():
        if type(v) == int:
            v = range(v + 1)
        for i in v:
            res.append(lag_format(k, i))
    return res
Exemple #3
0
 def __init__(self, args, creator=dataset.create_dataset1):
     self.args = args
     self.dataset = creator(args.data_path, MaxLagging=args.lag)
     if args.predict_date is not None:
         for gas in ['PM2.5', 'PM10', 'O3']:
             for i in range(args.lag + 1):
                 self.dataset.loc[
                     self.dataset.utc_time >= args.predict_date,
                     lag_format(gas, i)] = np.nan
                 # print('Latest one with non-nan {} in dataset : '.format(lag_format(gas, i)), self.dataset.dropna(subset=[lag_format(gas, i)]).utc_time.max())
     print('Latest of dataset: ', self.dataset.utc_time.max())
Exemple #4
0
    def genNext(X_test, y_pred, features, ref):
        assert len(X_test) == 1
        assert ref is not None

        ##### SLOW ######
        # new_X_test = X_test.copy()
        # new_X_test.utc_time += pd.DateOffset(hours=1)
        # if 'hour' in new_X_test.columns:
        #     new_X_test['hour'] = (1 + new_X_test['hour']) % 24
        # find = False
        # if df is not None:
        #     tmp, find = find_new_X_test(new_X_test)
        #     if find:
        #         new_X_test = tmp.copy()
        ##### SHOULD BE LESS SLOW ######
        find = True
        new_X_test = ref

        new_X_test = new_X_test.reset_index(drop=True)
        X_test = X_test.reset_index(drop=True)
        for c in features:
            match = re.match('(.*?)_lag_(\d+)', c)
            if match is not None:
                name = match.group(1)
                lag_idx = int(match.group(2))
                if lag_idx > 1:
                    col = '{}_lag_{}'.format(name, lag_idx - 1)
                    if col in X_test.columns:
                        new_X_test[c] = X_test[col]
                elif name in X_test.columns and name != target:
                    new_X_test[c] = X_test[name]
        if not find or not deploy or new_X_test[lag_format(target,
                                                           1)].isna().any():
            new_X_test[lag_format(target, 1)] = y_pred
        # print(np.expm1(new_X_test[ [lag_format(target, i) for i in range(13)] ].iloc[0].values))
        return new_X_test
Exemple #5
0
 def genFeatures(self, t, df, **kwargs):
     Strategy.genFeatures(self, t, df, **kwargs)
     cols = self.lag_cols + [t]
     if self.stationId[0].isupper():
         print('London city')
         cols.remove('weather')
     features = []
     for c in cols:
         features += [
             _ for _ in df.columns if _.find(lag_format(c, '')) != -1
         ]
     features += [
         'hour'
     ] + cols  #, 'temperature', 'pressure', 'humidity', 'wind_direction', 'wind_speed']
     features.remove(t)
     return features
Exemple #6
0
 def genFeatures(self, t, df, **kwargs):  #
     s = self.stationId
     features = list(
         self.features[(self.features.stationId == s)
                       & (self.features.gas == t) &
                       (self.features.importance > self.thres)].feature)
     self_lag = []
     for f in features:
         match = re.match(r'{}_lag_(\d+)'.format(t), f)
         if match is not None:
             self_lag.append(int(match.group(1)))
     self_lag = set(self_lag)
     for i in range(1, max(self_lag)):
         if i not in self_lag:
             features.append(lag_format(t, i))
     print(features)
     return features
Exemple #7
0
 def genFeatures(self, t, df, **kwargs):
     Strategy.genFeatures(self, t, df, **kwargs)
     cols = self.lag_cols + [t]
     ld = self.ld
     # print('??????', self.stationId)
     if ld is True:
         print('London city')
         cols.remove('weather_clu')
     features = []
     for c in cols:
         features += [
             _ for _ in df.columns if _.find(lag_format(c, '')) != -1
         ]
     features += [
         'hour'
     ] + cols  #, 'temperature', 'pressure', 'humidity', 'wind_direction', 'wind_speed']
     if ld is False:
         features.append('rain_hours')
     features.remove(t)
     return features