class Data(object): def __init__(self, X, y, shuffle=True, random_state=None): if isinstance(X, np.ndarray): self.X = pd.DataFrame(X) self.y = pd.Series(y) else: self.X = X.copy(deep=True) self.y = y.copy(deep=True) if not isinstance(self.X, pd.DataFrame): raise ValueError('%s is not supported' % type(X)) self.shape_before = self.X.shape self.__imputer() self.__encoder() if shuffle: self.X, self.y = sk_shuffle(self.X, self.y, random_state=random_state) self.X = StandardScaler().fit_transform(self.X) def __imputer(self): fill = pd.Series([ self.X[c].value_counts().index[0] if self.X[c].dtype == np.dtype('O') else self.X[c].median() if self.X[c].dtype == np.dtype('int') else self.X[c].mean() for c in self.X ], index=self.X.columns) self.col_was_null = [ c for c in self.X if pd.isnull(self.X[c]).sum() > 0 ] self.X = self.X.fillna(fill) def __encoder(self): self.del_columns = [] for i in xrange(len(self.X.columns)): if self.X.dtypes[i] == np.dtype('O'): enc = LabelEncoder() col_enc = enc.fit_transform(self.X.icol(i)) col_onehot = np.array(OneHotEncoder().fit_transform( col_enc.reshape(-1, 1)).todense()) col_names = [ str(self.X.columns[i]) + '_' + c for c in enc.classes_ ] col_onehot = pd.DataFrame(col_onehot, columns=col_names, index=self.X.index) self.X = pd.concat([self.X, col_onehot], axis=1) self.del_columns.append(self.X.columns[i]) for col in self.del_columns: del self.X[col]
# Get all slopes. standardize. Plot. over 1 window ############################################################################### if 1: # Parameters currencies = ['aud', 'cad', 'eur', 'gbp', 'nzd', 'usd'] windows = np.array([360]) # Get slopes indicator positions = pd.DataFrame() deviations = pd.DataFrame() slopes = pd.DataFrame() for currency in currencies: pos = get_channel_mean_pos_std(cu[currency].values.astype(float), windows) positions[currency] = pos['pos'].values.ravel() deviations[currency] = pos['std'].values.ravel() slopes[currency] = pos['slope'].values.ravel() # Standardize slopes = StandardScaler().fit_transform(slopes.fillna(0)) slopes = pd.DataFrame(slopes, columns=currencies) # Plot slopes.plot() plt.plot(np.zeros(slopes.shape[0]), color='grey') # # Plot an instrument # plt.figure() # ratios.EUR_USD.plot()