/
regressor.py
38 lines (34 loc) · 1.4 KB
/
regressor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from sklearn.ensemble import RandomForestRegressor
from sklearn.base import BaseEstimator
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import FeatureAgglomeration
from sklearn.feature_extraction.image import grid_to_graph
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest, f_regression
N_JOBS = 1
class Regressor(BaseEstimator):
def __init__(self):
self.clf = Pipeline([
("RF", RandomForestRegressor(n_estimators=200, max_depth=15,
n_jobs=N_JOBS))])
self.scaler = StandardScaler()
self.agglo = FeatureAgglomeration(n_clusters=500)
def fit(self, X, y):
y = y.ravel()
n_samples, n_lags, n_lats, n_lons = X.shape
self.scaler.fit(X[:, -1].reshape(n_samples, -1))
X = X.reshape(n_lags * n_samples, -1)
connectivity = grid_to_graph(n_lats, n_lons)
self.agglo.connectivity = connectivity
X = self.scaler.transform(X)
X = self.agglo.fit_transform(X)
X = X.reshape(n_samples, -1)
self.clf.fit(X, y)
def predict(self, X):
n_samples, n_lags, n_lats, n_lons = X.shape
X = X.reshape(n_lags * n_samples, -1)
X = self.scaler.transform(X)
X = self.agglo.transform(X)
X = X.reshape(n_samples, -1)
return self.clf.predict(X)