예제 #1
0
class Embedder(object):
    def __init__(self, method_name, *args, **kwargs):
        self.projector = None
        self.method_name = method_name
        if method_name == "tsne":
            self.projector = TSNE(*args, **kwargs)
        elif method_name == "pca":
            self.projector = PCA(*args, **kwargs)
        elif method_name == "mds":
            self.projector = MDS(n_jobs=-1, *args, **kwargs)
        else:
            logger.error("the projection method is not supported now!!")

    def fit(self, X, y):
        t = time()
        self.projector.fit(X, y)
        logger.info("{} fit function time cost: {}".format(self.method_name, time()-t))

    def transform(self, X, y):
        t = time()
        self.projector.transform(X, y)
        logger.info("{} transform function time cost: {}".format(self.method_name, time()-t))

    def fit_transform(self, X, y):
        t = time()
        res = self.projector.fit_transform(X, y)
        logger.info("{} fit_transform function time cost: {}".format(self.method_name, time()-t))
        return res
예제 #2
0

# 1 MDS(多次元尺度構成法) -----------------------------------------------------------------------------

# パラメータの設定
n_components = 2
n_init = 12
max_iter = 1200
metric = True
n_jobs = 4
random_state = 2018

# インスタンスの作成
mds = MDS(n_components=n_components, n_init=n_init, max_iter=max_iter,
          metric=metric, n_jobs=n_jobs, random_state=random_state)

# 学習器の生成
mds.fit(X_train.loc[0:1000, :])

# 学習器の適用
X_train_mds = mds.transform(X_train.loc[0:1000, :])

# データフレームに変換
X_train_mds = pd.DataFrame(data=X_train_mds, index=train_index[0:1001])

# プロット表示
scatterPlot(X_train_mds, y_train, "Multidimensional Scaling")



예제 #3
0
class ESN(object):
    def __init__(self, n_internal_units = 100, spectral_radius = 0.9, connectivity = 0.5, input_scaling = 0.5, input_shift = 0.0,
                 teacher_scaling = 0.5, teacher_shift = 0.0, noise_level = 0.01):
        # Initialize attributes
        self._n_internal_units = n_internal_units
        self._spectral_radius = spectral_radius
        self._connectivity = connectivity

        self._input_scaling = input_scaling
        self._input_shift = input_shift
        self._teacher_scaling = teacher_scaling
        self._teacher_shift = teacher_shift
        self._noise_level = noise_level
        self._dim_output = None

        # The weights will be set later, when data is provided
        self._input_weights = None

        # Regression method and embedding method.
        # Initialized to None for now. Will be set during 'fit'.
        self._regression_method = None
        self._embedding_method = None

        # Generate internal weights
        self._internal_weights = self._initialize_internal_weights(n_internal_units, connectivity, spectral_radius)

    def fit(self, Xtr, Ytr, n_drop = 100, regression_method = 'linear', regression_parameters = None, embedding = 'identity', n_dim = 3, embedding_parameters = None):
        _,_ = self._fit_transform(Xtr = Xtr, Ytr = Ytr, n_drop = n_drop, regression_method = regression_method, regression_parameters = regression_parameters, embedding = embedding, n_dim = n_dim, embedding_parameters = embedding_parameters)

        return

    def _fit_transform(self, Xtr, Ytr, n_drop = 100, regression_method = 'linear', regression_parameters = None, embedding = 'identity', n_dim = 3, embedding_parameters = None):
        n_data, dim_data = Xtr.shape
        _, dim_output = Ytr.shape

        self._dim_output = dim_output

        # If this is the first time the network is tuned, set the input weight.
        # The weights are dense and uniformly distributed in [-1.0, 1.0]
        if (self._input_weights is None):
            self._input_weights = 2.0*np.random.rand(self._n_internal_units, dim_data) - 1.0

        # Initialize regression method
        if (regression_method == 'nusvr'):
            # NuSVR, RBF kernel
            C, nu, gamma = regression_parameters
            self._regression_method = NuSVR(C = C, nu = nu, gamma = gamma)

        elif (regression_method == 'linsvr'):
            # NuSVR, linear kernel
            #C = regression_parameters[0]
            #nu = regression_parameters[1]
            C, epsilon = regression_parameters

            #self._regression_method = NuSVR(C = C, nu = nu, kernel='linear')
            self._regression_method = LinearSVR(C = C, epsilon = epsilon)

        elif (regression_method == 'enet'):
            # Elastic net
            alpha, l1_ratio = regression_parameters
            self._regression_method = ElasticNet(alpha = alpha, l1_ratio = l1_ratio)

        elif (regression_method == 'ridge'):
            # Ridge regression
            self._regression_method = Ridge(alpha = regression_parameters)

        elif (regression_method == 'lasso'):
            # LASSO
            self._regression_method = Lasso(alpha = regression_parameters)

        elif (regression_method == 'bayeridge'):
            lambda_1, lambda_2, alpha_1, alpha_2 = regression_parameters
            self._regression_method = BayesianRidge(lambda_1=lambda_1,lambda_2=lambda_2,alpha_1=alpha_1,alpha_2=alpha_2)

        elif (regression_method == 'gpr'):
            self._regression_method = GaussianProcessRegressor()

        elif (regression_method == 'bayelinear'):
            self._regression_method = EBLinearRegression()

        else:
            # Use canonical linear regression
            self._regression_method = LinearRegression()

        # Initialize embedding method
        if (embedding == 'identity'):
            self._embedding_dimensions = self._n_internal_units
        else:
            self._embedding_dimensions = n_dim

            if (embedding == 'kpca'):
                # Kernel PCA with RBF kernel
                self._embedding_method = KernelPCA(n_components = n_dim, kernel = 'rbf', gamma = embedding_parameters)

            elif (embedding == 'pca'):
                # PCA
                self._embedding_method = PCA(n_components = n_dim)

            elif (embedding == 'fa'):
                # ICA
                self._embedding_method = FactorAnalysis(n_components = n_dim)

            elif (embedding == 'spca'):
                # Sparse PCA
                self._embedding_method = SparsePCA(n_components = n_dim, alpha = embedding_parameters)

            elif (embedding == 'ipca'):
                # Sparse PCA
                self._embedding_method = IncrementalPCA(n_components = n_dim)

            elif (embedding == 'tsvd'):
                # Sparse PCA
                if n_dim >= self._n_internal_units:
                    self._embedding_method = TruncatedSVD(n_components = self._n_internal_units-1)
                else:
                    self._embedding_method = TruncatedSVD(n_components = n_dim)

            elif (embedding == 'wpca'):
                # Bayesian Probabilistic PCA
                self._embedding_method = WPCA(n_components=n_dim)

            elif (embedding == 'rpca'):
                # Bayesian Probabilistic PCA
                self._embedding_method = RobustPCA.RobustPCA()

            elif (embedding == 'tga'):
                # Bayesian Probabilistic PCA
                self._embedding_method = tga.TGA(n_components=n_dim, random_state=1)

            elif (embedding == 'empca'):
                # Expectation Maximization PCA
                self._embedding_method = EMPCA(n_components=n_dim)

            elif (embedding == 'mds'):
                # Multi-Dimensional Scaling (MDS)
                self._embedding_method = MDS(n_components=n_dim)

            elif (embedding == 'ica'):
                # Sparse PCA
                alpha = embedding_parameters
                self._embedding_method = FastICA.FastICA(n_components=n_dim)
                #self._embedding_method = FastICA.FastICA(n_components=n_dim, fun_args={'alpha':alpha})
                #self._embedding_method = FastICA.FastICA(n_components = n_dim, algorithm = 'deflation')

            elif (embedding == 'kica'):
                self._embedding_method = KICA.KICA(n_components=n_dim)

            else:
                raise(ValueError, "Unknown embedding method")

        # Calculate states/embedded states.
        # Note: If the embedding is 'identity', embedded states will be equal to the states.
        states, embedded_states,_ = self._compute_state_matrix(X = Xtr, Y = Ytr, n_drop = n_drop)

        # Train output
        self._regression_method.fit(np.concatenate((embedded_states, self._scaleshift(Xtr[n_drop:, :], self._input_scaling, self._input_shift)), axis=1),
                                        self._scaleshift(Ytr[n_drop:, :], self._teacher_scaling,self._teacher_shift).flatten())

        return states, embedded_states

    def predict(self, X, Y = None, n_drop = 100, error_function = NRMSE, scaler = None):
        Yhat, error, _, _ = self._predict_transform(X = X, Y = Y, n_drop = n_drop, error_function = error_function, scaler = scaler)

        return Yhat, error

    def _predict_transform(self, X, Y = None, n_drop = 100, error_function = NRMSE, scaler = None):
        # Predict outputs
        states,embedded_states,Yhat = self._compute_state_matrix(X = X, n_drop = n_drop)

        # Revert scale and shift
        Yhat = self._uscaleshift(Yhat, self._teacher_scaling, self._teacher_shift)

        # Compute error if ground truth is provided
        if (Y is not None):
            error = error_function(Y[n_drop:,:], Yhat, scaler)

        return Yhat, error, states, embedded_states

    def _compute_state_matrix(self, X, Y = None, n_drop = 100):
        n_data, _ = X.shape

        # Initial values
        previous_state = np.zeros((1, self._n_internal_units), dtype=float)
        previous_output = np.zeros((1, self._dim_output), dtype=float)

        # Storage
        state_matrix = np.empty((n_data - n_drop, self._n_internal_units), dtype=float)
        embedded_states = np.empty((n_data - n_drop, self._embedding_dimensions), dtype=float)
        outputs = np.empty((n_data - n_drop, self._dim_output), dtype=float)

        for i in range(n_data):
            # Process inputs
            previous_state = np.atleast_2d(previous_state)
            current_input = np.atleast_2d(self._scaleshift(X[i, :], self._input_scaling, self._input_shift))

            # Calculate state. Add noise and apply nonlinearity.
            state_before_tanh = self._internal_weights.dot(previous_state.T) + self._input_weights.dot(
                current_input.T)
            state_before_tanh += np.random.rand(self._n_internal_units, 1) * self._noise_level
            previous_state = np.tanh(state_before_tanh).T

            # Embed data and perform regression if applicable.
            if (Y is not None):
                # If we are training, the previous output should be a scaled and shifted version of the ground truth.
                previous_output = self._scaleshift(Y[i, :], self._teacher_scaling, self._teacher_shift)
            else:
                # Should the data be embedded?
                if (self._embedding_method is not None):
                    current_embedding = self._embedding_method.transform(previous_state)
                else:
                    current_embedding = previous_state

                # Perform regression
                previous_output = self._regression_method.predict(
                    np.concatenate((current_embedding, current_input), axis=1))

            # Store everything after the dropout period
            if (i > n_drop - 1):
                state_matrix[i - n_drop, :] = previous_state.flatten()

                # Only save embedding for test data.
                # In training, we do it after computing the whole state matrix.
                if (Y is None):
                    embedded_states[i - n_drop, :] = current_embedding.flatten()

                outputs[i - n_drop, :] = previous_output.flatten()

        # Now, embed the data if we are in training
        if (Y is not None):
            if (self._embedding_method is not None):
                embedded_states = self._embedding_method.fit_transform(state_matrix)
            else:
                embedded_states = state_matrix

        return state_matrix, embedded_states, outputs

    def _scaleshift(self, x, scale, shift):
        # Scales and shifts x by scale and shift
        return (x*scale + shift)

    def _uscaleshift(self, x, scale, shift):
        # Reverts the scale and shift applied by _scaleshift
        return ( (x - shift)/float(scale) )

    def _initialize_internal_weights(self, n_internal_units, connectivity, spectral_radius):
        # The eigs function might not converge. Attempt until it does.
        convergence = False
        while (not convergence):
            # Generate sparse, uniformly distributed weights.
            internal_weights = sparse.rand(n_internal_units, n_internal_units, density=connectivity).todense()

            # Ensure that the nonzero values are uniformly distributed in [-0.5, 0.5]
            internal_weights[np.where(internal_weights > 0)] -= 0.5

            try:
                # Get the largest eigenvalue
                w,_ = slinalg.eigs(internal_weights, k=1, which='LM')

                convergence = True

            except:
                continue

        # Adjust the spectral radius.
        internal_weights /= np.abs(w)/spectral_radius

        return internal_weights