コード例 #1
0
class Data(object):
    def __init__(self, X, y, shuffle=True, random_state=None):
        if isinstance(X, np.ndarray):
            self.X = pd.DataFrame(X)
            self.y = pd.Series(y)
        else:
            self.X = X.copy(deep=True)
            self.y = y.copy(deep=True)
        if not isinstance(self.X, pd.DataFrame):
            raise ValueError('%s is not supported' % type(X))
        self.shape_before = self.X.shape

        self.__imputer()

        self.__encoder()

        if shuffle:
            self.X, self.y = sk_shuffle(self.X,
                                        self.y,
                                        random_state=random_state)

        self.X = StandardScaler().fit_transform(self.X)

    def __imputer(self):
        fill = pd.Series([
            self.X[c].value_counts().index[0]
            if self.X[c].dtype == np.dtype('O') else self.X[c].median()
            if self.X[c].dtype == np.dtype('int') else self.X[c].mean()
            for c in self.X
        ],
                         index=self.X.columns)
        self.col_was_null = [
            c for c in self.X if pd.isnull(self.X[c]).sum() > 0
        ]
        self.X = self.X.fillna(fill)

    def __encoder(self):
        self.del_columns = []
        for i in xrange(len(self.X.columns)):
            if self.X.dtypes[i] == np.dtype('O'):
                enc = LabelEncoder()
                col_enc = enc.fit_transform(self.X.icol(i))
                col_onehot = np.array(OneHotEncoder().fit_transform(
                    col_enc.reshape(-1, 1)).todense())
                col_names = [
                    str(self.X.columns[i]) + '_' + c for c in enc.classes_
                ]
                col_onehot = pd.DataFrame(col_onehot,
                                          columns=col_names,
                                          index=self.X.index)
                self.X = pd.concat([self.X, col_onehot], axis=1)
                self.del_columns.append(self.X.columns[i])
        for col in self.del_columns:
            del self.X[col]
コード例 #2
0
# Get all slopes.  standardize.  Plot.  over 1 window
###############################################################################
if 1:

    # Parameters
    currencies = ['aud', 'cad', 'eur', 'gbp', 'nzd', 'usd']
    windows = np.array([360])

    # Get slopes indicator
    positions = pd.DataFrame()
    deviations = pd.DataFrame()
    slopes = pd.DataFrame()
    for currency in currencies:
        pos = get_channel_mean_pos_std(cu[currency].values.astype(float),
                                       windows)
        positions[currency] = pos['pos'].values.ravel()
        deviations[currency] = pos['std'].values.ravel()
        slopes[currency] = pos['slope'].values.ravel()

    # Standardize
    slopes = StandardScaler().fit_transform(slopes.fillna(0))
    slopes = pd.DataFrame(slopes, columns=currencies)

    # Plot
    slopes.plot()
    plt.plot(np.zeros(slopes.shape[0]), color='grey')

#    # Plot an instrument
#    plt.figure()
#    ratios.EUR_USD.plot()