def genFeatures(self, t, df, **kwargs): cols = self.lag_cols + [t] ld = self.stationId[0].isupper() self.ld = ld # thread-unsafe self.target = t # thread-unsafe if ld is True: print('London city') cols.remove('weather_clu') features = [] for c in cols: features += [ _ for _ in df.columns if _.find(lag_format(c, '')) != -1 ] features += [ 'hour' ] + cols #, 'temperature', 'pressure', 'humidity', 'wind_direction', 'wind_speed'] if ld is False and self.target != 'O3': features.append('last_rain') features.remove(t) if self.target == 'O3': res1 = [] for f in features: if f.startswith('weather_clu'): res1.append('weather' + f[len('weather_clu'):]) else: res1.append(f) features = res1 return features
def _gen_from_dict(d): res = [] for k, v in d.items(): if type(v) == int: v = range(v + 1) for i in v: res.append(lag_format(k, i)) return res
def __init__(self, args, creator=dataset.create_dataset1): self.args = args self.dataset = creator(args.data_path, MaxLagging=args.lag) if args.predict_date is not None: for gas in ['PM2.5', 'PM10', 'O3']: for i in range(args.lag + 1): self.dataset.loc[ self.dataset.utc_time >= args.predict_date, lag_format(gas, i)] = np.nan # print('Latest one with non-nan {} in dataset : '.format(lag_format(gas, i)), self.dataset.dropna(subset=[lag_format(gas, i)]).utc_time.max()) print('Latest of dataset: ', self.dataset.utc_time.max())
def genNext(X_test, y_pred, features, ref): assert len(X_test) == 1 assert ref is not None ##### SLOW ###### # new_X_test = X_test.copy() # new_X_test.utc_time += pd.DateOffset(hours=1) # if 'hour' in new_X_test.columns: # new_X_test['hour'] = (1 + new_X_test['hour']) % 24 # find = False # if df is not None: # tmp, find = find_new_X_test(new_X_test) # if find: # new_X_test = tmp.copy() ##### SHOULD BE LESS SLOW ###### find = True new_X_test = ref new_X_test = new_X_test.reset_index(drop=True) X_test = X_test.reset_index(drop=True) for c in features: match = re.match('(.*?)_lag_(\d+)', c) if match is not None: name = match.group(1) lag_idx = int(match.group(2)) if lag_idx > 1: col = '{}_lag_{}'.format(name, lag_idx - 1) if col in X_test.columns: new_X_test[c] = X_test[col] elif name in X_test.columns and name != target: new_X_test[c] = X_test[name] if not find or not deploy or new_X_test[lag_format(target, 1)].isna().any(): new_X_test[lag_format(target, 1)] = y_pred # print(np.expm1(new_X_test[ [lag_format(target, i) for i in range(13)] ].iloc[0].values)) return new_X_test
def genFeatures(self, t, df, **kwargs): Strategy.genFeatures(self, t, df, **kwargs) cols = self.lag_cols + [t] if self.stationId[0].isupper(): print('London city') cols.remove('weather') features = [] for c in cols: features += [ _ for _ in df.columns if _.find(lag_format(c, '')) != -1 ] features += [ 'hour' ] + cols #, 'temperature', 'pressure', 'humidity', 'wind_direction', 'wind_speed'] features.remove(t) return features
def genFeatures(self, t, df, **kwargs): # s = self.stationId features = list( self.features[(self.features.stationId == s) & (self.features.gas == t) & (self.features.importance > self.thres)].feature) self_lag = [] for f in features: match = re.match(r'{}_lag_(\d+)'.format(t), f) if match is not None: self_lag.append(int(match.group(1))) self_lag = set(self_lag) for i in range(1, max(self_lag)): if i not in self_lag: features.append(lag_format(t, i)) print(features) return features
def genFeatures(self, t, df, **kwargs): Strategy.genFeatures(self, t, df, **kwargs) cols = self.lag_cols + [t] ld = self.ld # print('??????', self.stationId) if ld is True: print('London city') cols.remove('weather_clu') features = [] for c in cols: features += [ _ for _ in df.columns if _.find(lag_format(c, '')) != -1 ] features += [ 'hour' ] + cols #, 'temperature', 'pressure', 'humidity', 'wind_direction', 'wind_speed'] if ld is False: features.append('rain_hours') features.remove(t) return features