Example #1
1
class EnsembleIOC(BaseEstimator, RegressorMixin):
    '''
    Handling state/state pairs as input
    '''
    def __init__(self,  n_estimators=20,
                        max_depth=5, min_samples_split=10, min_samples_leaf=10, clustering=0,
                        random_state=0,
                        em_itrs=5,
                        regularization=0.05,
                        passive_dyn_func=None,
                        passive_dyn_ctrl=None,
                        passive_dyn_noise=None,
                        verbose=False):
        '''
        n_estimators        - number of ensembled models
        ...                 - a batch of parameters used for RandomTreesEmbedding, see relevant documents
        clustering          - whether or not to force the number of subset. If non-zero, call a clustering scheme with the learned metric
        em_itrs             - maximum number of EM iterations to take if one would like to increase the likelihood of the MaxEnt approximation
        regularization      - small positive scalar to prevent singularity of matrix inversion. This is especially necessary when passive dynamics
                              is considered. Notably, the underactuated system will assum zero covariance for uncontrolled state dimensions but this might not
                              not be the case in reality since the collected data could be corrupted by noises.
        passive_dyn_func    - function to evaluate passive dynamics; None for MaxEnt model
        passive_dyn_ctrl    - function to return the control matrix which might depend on the state...
        passive_dyn_noise   - covariance of a Gaussian noise; only applicable when passive_dyn is Gaussian; None for MaxEnt model
                                note this implies a dynamical system with constant input gain. It is extendable to have state dependent
                                input gain then we need covariance for each data point
        verbose             - output training information
        '''
        BaseEstimator.__init__(self)

        self.n_estimators=n_estimators
        self.max_depth=max_depth
        self.min_samples_split=min_samples_split
        self.min_samples_leaf=min_samples_leaf
        self.clustering=clustering
        self.random_state=random_state
        self.em_itrs=em_itrs
        self.reg=regularization
        self.passive_dyn_func=passive_dyn_func
        self.passive_dyn_ctrl=passive_dyn_ctrl
        self.passive_dyn_noise=passive_dyn_noise
        self.verbose=verbose
        return

    def predict(self, X):
        n_samples, n_dim = X.shape

        # use approximated GMM to capture the correlation, which provides us an initialization to iterate
        # the MAP estimation
        tmp_gmm = gmm.GMM(  n_components=len(self.gmm_estimators_full_['weights']),
                            priors=np.array(self.gmm_estimators_full_['weights']),
                            means=np.array(self.gmm_estimators_full_['means']),
                            covariances=self.gmm_estimators_full_['covars'])

        init_guess, init_covar = tmp_gmm.predict_with_covariance(indices=range(n_dim), X=X)

        def objfunc(x, *args):
            prior_mu, prior_inv_var = args
            vals, grads = self.value_eval_samples_helper(np.array([x]), average=False, const=True)
            prior_prob = .5*(x - prior_mu).dot(prior_inv_var).dot(x - prior_mu)
            prior_grad = prior_inv_var.dot(x-prior_mu)
            return vals[0] + prior_prob, grads[0] + prior_grad

        res = []
        for sample_idx in range(n_samples):
            opt_res = sciopt.minimize(  fun=objfunc,
                                        x0=init_guess[sample_idx, :],
                                        args=(init_guess[sample_idx, :], np.linalg.pinv(init_covar[sample_idx])),
                                        method='BFGS',
                                        jac=True,
                                        options={'gtol': 1e-8, 'disp': False})
            # print opt_res.message, opt_res.x,
            # print opt_res.fun, opt_res.jac
            # print init_guess[sample_idx, :], init_covar[sample_idx], opt_res.x
            res.append(opt_res.x)
        res = np.array(res)
        return res

    def _check_grads(self, X):
        n_samples, n_dim = X.shape

        # #predict the next state x_{t+1} given x_{t}
        tmp_gmm = gmm.GMM(  n_components=len(self.gmm_estimators_full_['weights']),
                            priors=np.array(self.gmm_estimators_full_['weights']),
                            means=np.array(self.gmm_estimators_full_['means']),
                            covariances=self.gmm_estimators_full_['covars'])

        init_guess, init_covar = tmp_gmm.predict_with_covariance(indices=range(n_dim), X=X)

        def objfunc(x, *args):
            prior_mu, prior_var = args
            vals, grads = self.value_eval_samples_helper(np.array([x]), average=False, const=True)
            prior_prob = .5*(x - prior_mu).dot(prior_var).dot(x - prior_mu)
            prior_grad = prior_var.dot(x-prior_mu)
            return vals[0] + prior_prob, grads[0] + prior_grad

        res = []
        for sample_idx in range(n_samples):
            def check_grad_fun(x):
                return objfunc(x, init_guess[sample_idx, :], init_covar[sample_idx])[0]
            def check_grad_fun_jac(x):
                return objfunc(x, init_guess[sample_idx, :], init_covar[sample_idx])[1]

            res.append(sciopt.check_grad(check_grad_fun, check_grad_fun_jac, X[sample_idx, :]))

        return np.mean(res)

    def fit(self, X, y=None):
        '''
        X - an array of concatenated features X_i = (x_{t-1}, x_{t}) corresponding to the infinite horizon case
        '''
        #check parameters...
        assert(type(self.n_estimators)==int)
        assert(self.n_estimators > 0)
        assert(type(self.max_depth)==int)
        assert(self.max_depth > 0)
        assert(type(self.min_samples_split)==int)
        assert(self.min_samples_split > 0)
        assert(type(self.min_samples_leaf)==int)
        assert(self.min_samples_leaf > 0)
        assert(type(self.em_itrs)==int)

        n_samples, n_dims = X.shape

        #an initial partitioning of data with random forest embedding
        self.random_embedding_mdl_ = RandomTreesEmbedding(
            n_estimators=self.n_estimators,
            max_depth=self.max_depth,
            min_samples_split=self.min_samples_split,
            min_samples_leaf=self.min_samples_leaf,
            random_state=self.random_state
            )

        #we probably do not need the data type to differentiate it is a demonstration
        #of trajectory or commanded state, do we?
        if self.passive_dyn_func is not None and self.passive_dyn_ctrl is not None and self.passive_dyn_noise is not None:
            # self.random_embedding_mdl_.fit(X[:, X.shape[1]/2:])
            # indices = self.random_embedding_mdl_.apply(X[:, X.shape[1]/2:])
            self.random_embedding_mdl_.fit(X[:, :X.shape[1]/2])
            indices = self.random_embedding_mdl_.apply(X[:, :X.shape[1]/2])
            # X_tmp = np.array(X)
            # X_tmp[:, X.shape[1]/2:] = X_tmp[:, X.shape[1]/2:] - X_tmp[:, :X.shape[1]/2]
            # self.random_embedding_mdl_.fit(X_tmp)

            # indices = self.random_embedding_mdl_.apply(X_tmp)
        else:
            self.random_embedding_mdl_.fit(X)
            #figure out indices
            indices = self.random_embedding_mdl_.apply(X)

        #prepare ensemble for prediction
        self.random_prediction_mdl_ = RandomForestRegressor(
            n_estimators=self.n_estimators,
            max_depth=self.max_depth,
            min_samples_split=self.min_samples_split,
            min_samples_leaf=self.min_samples_leaf,
            random_state=self.random_state
            )

        self.random_prediction_mdl_.fit(X[:, :X.shape[1]/2], X[:, X.shape[1]/2:])

        if self.clustering > 0:
            #we need to force the data to situate in clusters with the given number and the random embeddings
            #first construct affinity
            #use extracted indices as sparse features to construct an affinity matrix
            if self.n_estimators > 1:
                if self.verbose:
                    print 'Building {0} subset of data depending on their random embedding similarity...'.format(self.clustering)
                #it makes sense to use the random embedding to do the clustering if we have ensembled features
                aff_mat = _affinity_matrix_from_indices(indices, 'binary')
                #using spectral mapping (Laplacian eigenmap)
                self.cluster = SpectralClustering(n_clusters=self.clustering, affinity='precomputed')
                self.cluster.fit(aff_mat)
            else:
                if self.verbose:
                    print 'Building {0} subset of data depending on their Euclidean similarity...'.format(self.clustering)
                #otherwise, use euclidean distance, this should be enough when the state space is low dimensional
                self.cluster = KMeans(n_clusters=self.clustering, max_iter=200, n_init=5)
                self.cluster.fit(X)

            partitioned_data = defaultdict(list)
            leaf_idx = defaultdict(set)
            weight_idx = defaultdict(float)
            for d_idx, d, p_idx in zip(range(len(X)), X, self.cluster.labels_):
                partitioned_data[0, p_idx].append(d)
                leaf_idx[0] |= {p_idx}
            for p_idx in range(self.clustering):
                weight_idx[0, p_idx] = 1./self.clustering
            num_estimators = 1
        else:
            partitioned_data = defaultdict(list)
            leaf_idx = defaultdict(set)
            weight_idx = defaultdict(float)
            #group data belongs to the same partition and have the weights...
            #is weight really necessary for EM steps? Hmm, seems to be for the initialization
            #d_idx: data index; p_idx: partition index (comprised of estimator index and leaf index)
            for d_idx, d, p_idx in zip(range(len(X)), X, indices):
                for e_idx, l_idx in enumerate(p_idx):
                    partitioned_data[e_idx, l_idx].append(d)
                    leaf_idx[e_idx] |= {l_idx}

                for e_idx, l_idx in enumerate(p_idx):
                    weight_idx[e_idx, l_idx] = float(len(partitioned_data[e_idx, l_idx])) / len(X)
                    # weight_idx[e_idx, l_idx] = 1. / len(p_idx)
            num_estimators = self.n_estimators

        #for each grouped data, solve an easy IOC problem by assuming quadratic cost-to-go function
        #note that, if the passive dynamics need to be learned, extra steps is needed to train a regressor with weighted data
        #otherwise, just a simply gaussian for each conditional probability distribution model
        self.estimators_ = []
        #another copy to store the parameters all together, for EM/evaluation on all of the models
        self.estimators_full_ = defaultdict(list)

        #<hyin/Feb-6th-2016> an estimator and leaf indexed structure to record the passive likelihood of data...
        passive_likelihood_dict = defaultdict(list)
        for e_idx in range(num_estimators):
            #for each estimator
            estimator_parms = defaultdict(list)
            for l_idx in leaf_idx[e_idx]:
                if self.verbose:
                    print 'Processing {0}-th estimator and {1}-th leaf/partition...'.format(e_idx, l_idx)
                #and for each data partition
                data_partition=np.array(partitioned_data[e_idx, l_idx])

                estimator_parms['means'].append(np.mean(data_partition, axis=0))
                estimator_parms['covars'].append(np.cov(data_partition.T) + np.eye(data_partition.shape[1])*self.reg)

                #for MaxEnt, uniform passive likelihood
                passive_likelihood_dict[e_idx, l_idx] = np.ones(len(data_partition)) / float(len(data_partition))


                estimator_parms['weights'].append(weight_idx[e_idx, l_idx])

            self.estimators_.append(estimator_parms)

        #can stop here or go for expectation maximization for each estimator...
        if self.em_itrs > 0:
            #prepare em results for each estimator
            em_res = [self._em_steps(e_idx, X, y) for e_idx in range(num_estimators)]

            self.estimators_ = em_res

        #record the gmm approximation
        self.gmm_estimators_ = copy.deepcopy(self.estimators_)
        self.gmm_estimators_full_ = defaultdict(list)

        for est in self.estimators_:
            for comp_idx in range(len(est['weights'])):
                est['means'][comp_idx] = est['means'][comp_idx][(n_dims/2):]
                est['covars'][comp_idx] = est['covars'][comp_idx][(n_dims/2):, (n_dims/2):]
                self.estimators_full_['weights'].append(est['weights'][comp_idx]/float(num_estimators))
                #for full estimators
                self.estimators_full_['means'].append(est['means'][comp_idx])
                self.estimators_full_['covars'].append(est['covars'][comp_idx])

        if self.passive_dyn_func is not None and self.passive_dyn_ctrl is not None and self.passive_dyn_noise is not None:
            X_new         = X[:, X.shape[1]/2:]
            X_old         = X[:, 0:X.shape[1]/2]

            #merge the model knowledge if passive dynamics model is available, use MaxEnt assumption otherwise
            X_new_passive = np.array([self.passive_dyn_func(X_old[sample_idx]) for sample_idx in range(X.shape[0])])
            passive_likelihood = _passive_dyn_likelihood(X_new, X_new_passive, self.passive_dyn_noise, self.passive_dyn_ctrl, self.reg)
            weights = passive_likelihood / (np.sum(passive_likelihood) + self.reg)

            if np.sum(weights) < 1e-10:
                weights = 1./len(weights) * np.ones(len(weights))
            #a GMM as a MaxEnt surrogate
            tmp_gmm = gmm.GMM(  n_components=len(self.estimators_[0]['weights']),
                                priors=self.estimators_[0]['weights'],
                                means=self.estimators_[0]['means'],
                                covariances=self.estimators_[0]['covars'])
            for e_idx in range(num_estimators):
                tmp_gmm.n_components = len(self.estimators_[e_idx]['weights'])
                tmp_gmm.priors = self.estimators_[e_idx]['weights']
                tmp_gmm.means = self.estimators_[e_idx]['means']
                tmp_gmm.covariances = self.estimators_[e_idx]['covars']

                responsibilities = tmp_gmm.to_responsibilities(X_new)
                responsibilities = responsibilities / (np.sum(responsibilities, axis=0) + 1e-10)
                new_weights = (weights * responsibilities.T).T

                new_weights = (new_weights + 1e-10) / (np.sum(new_weights +1e-10, axis=0))

                weighted_means = [np.sum((new_weight*X_new.T).T, axis=0) for new_weight in new_weights.T]

                weighted_covars =[ _frequency_weighted_covariance(X_new, weighted_mean, new_weight, spherical=False)
                                        for new_weight, weighted_mean in zip(new_weights.T, weighted_means)]

                self.estimators_[e_idx]['means'] = weighted_means
                self.estimators_[e_idx]['covars'] = weighted_covars


        self.prepare_inv_and_constants()
        return indices, leaf_idx, partitioned_data, passive_likelihood_dict

    def _em_steps(self, estimator_idx, X, y=None):
        #use current estimation as initialization to perform expectation-maximization
        #now reuse the procedure implemented by scikit-learn, actually a costumized implementation
        #is required if the passive dynamics also needs to be learned.
        if self.verbose:
            if estimator_idx is not None:
                print 'EM steps for the estimator {0}'.format(estimator_idx)
            else:
                print 'EM steps...'

        if estimator_idx is not None:
            n_partitions=len(self.estimators_[estimator_idx]['weights'])
            if self.verbose:
                print 'num of partitions:', n_partitions
            #use our own initialization
            g = gmm.GMM(n_components=n_partitions, priors=np.array(self.estimators_[estimator_idx]['weights']),
                means=np.array(self.estimators_[estimator_idx]['means']),
                covariances=np.array(self.estimators_[estimator_idx]['covars']),
                n_iter=self.em_itrs,
                covariance_type='full')
        else:
            n_partitions=len(self.estimators_full_['weights'])
            g = mixture.GaussianMixture(n_components=n_partitions, priors=np.array(self.estimators_[estimator_idx]['weights']),
                means=np.array(self.estimators_[estimator_idx]['means']),
                covariances=np.array(self.estimators_[estimator_idx]['covars']),
                n_iter=self.em_itrs,
                covariance_type='full')

        # g.fit(X[:, (X.shape[1]/2):])
        g.fit(X)

        #prepare to return a defaultdict
        res=defaultdict(list)
        res['means']=list(g.means)
        res['covars']=list(g.covariances)
        res['weights']=list(g.priors)

        return res

    def sample(self, n_samples=1, random_state=None):
        '''
        return samples that are synthesized from the model
        '''
        if not hasattr(self, 'estimators_'):
            print 'The model has not been trained yet...'
            return
        else:
            pass
        return

    def score(self, X, y=None):
        return self.value_eval_samples(X, y, False, True)

    def value_eval_samples(self, X, y=None, average=False, const=True):
        scores, grads = self.value_eval_samples_helper(X, y, average, const)
        return scores

    def value_eval_samples_helper(self, X, y=None, average=False, const=True):
        n_samples, n_dim = X.shape

        grads = np.zeros((n_samples, n_dim))

        if self.clustering > 0:
            num_estimators = 1
        else:
            num_estimators = self.n_estimators

        if not average:
            res = np.zeros(X.shape[0])
            res_mat = np.zeros((X.shape[0], len(self.estimators_full_['means'])))
            res_grad_tmp = []
            for i, (m, c_inv)   in enumerate(   zip(self.estimators_full_['means'],
                                            self.estimators_full_['inv_covars'])):
                diff_data = X - m
                res_mat[:, i] = np.array([e_prod.dot(e)*0.5 + self.estimators_full_['beta'][i]*const for e_prod, e in zip(diff_data.dot(c_inv), diff_data)])
                res_grad_tmp.append(c_inv.dot(diff_data.T).T)
            for d_idx, r in enumerate(res_mat):
                res[d_idx] = -logsumexp(-r, b=np.array(self.estimators_full_['weights']))
            resp = ((np.exp(-res_mat)*np.array(self.estimators_full_['weights'])).T / np.exp(-res)).T
            for e_idx in range(res_mat.shape[1]):
                grads += (res_grad_tmp[e_idx].T * resp[:, e_idx]).T
        else:
            def value_estimator_eval(d, est_idx):
                res = []
                for i, (m, c_inv) in enumerate(   zip(self.estimators_[est_idx]['means'],
                                            self.estimators_[est_idx]['inv_covars'])):
                    diff_data = d - m
                    res.append((.5*diff_data.dot(c_inv).dot(diff_data.T) + self.estimators_[est_idx]['beta'][i]*const)[0])
                return np.array(res).T
            def value_estimator_grad(d, est_idx, val):
                res_grad = 0
                for i, (m, c_inv) in enumerate(   zip(self.estimators_[est_idx]['means'],
                                            self.estimators_[est_idx]['inv_covars'])):
                    diff_data = d - m
                    resp = np.exp(-(.5*diff_data.dot(c_inv).dot(diff_data.T) + self.estimators_[est_idx]['beta'][i]*const)[0]) * self.estimators_[est_idx]['weights'][i]
                    grad_comp = c_inv.dot(diff_data.T).T
                    res_grad += (grad_comp.T * (resp / np.exp(-val))).T
                return res_grad
            res = np.array([-logsumexp(-value_estimator_eval(X, idx), axis=1, b=self.estimators_[idx]['weights']) for idx in range(num_estimators)]).T
            res_grad = [value_estimator_grad(X, idx, res[:, idx]) for idx in range(num_estimators)]
            res = np.mean(res, axis=1)
            grads = np.mean(res_grad, axis=0)
        return res, grads

    def prepare_inv_and_constants(self):
        '''
        supplement steps to prepare inverse of variance matrices and constant terms
        '''
        regularization = self.reg

        if self.clustering > 0:
            num_estimators = 1
        else:
            num_estimators = self.n_estimators

        for idx in range(num_estimators):
            self.estimators_[idx]['inv_covars'] = [ np.linalg.pinv(covar + np.eye(covar.shape[0])*regularization) for covar in self.estimators_[idx]['covars']]
            self.estimators_[idx]['beta'] = [.5*np.log(pseudo_determinant(covar + np.eye(covar.shape[0])*regularization)) + .5*np.log(2*np.pi)*covar.shape[0] for covar in self.estimators_[idx]['covars']]

        self.estimators_full_['weights'] = []
        self.estimators_full_['means'] = []
        self.estimators_full_['covars'] = []

        self.gmm_estimators_full_['weights'] = []
        self.gmm_estimators_full_['means'] = []
        self.gmm_estimators_full_['covars'] = []
        for e_idx in range(num_estimators):
            for leaf_idx in range(len(self.estimators_[e_idx]['weights'])):
                self.estimators_full_['weights'].append(self.estimators_[e_idx]['weights'][leaf_idx]/float(num_estimators))
                self.estimators_full_['covars'].append(self.estimators_[e_idx]['covars'][leaf_idx])
                self.estimators_full_['means'].append(self.estimators_[e_idx]['means'][leaf_idx])

                self.estimators_full_['inv_covars'].append(self.estimators_[e_idx]['inv_covars'][leaf_idx])
                self.estimators_full_['beta'].append(self.estimators_[e_idx]['beta'][leaf_idx])

                self.gmm_estimators_full_['weights'].append(self.gmm_estimators_[e_idx]['weights'][leaf_idx]/float(num_estimators))
                self.gmm_estimators_full_['covars'].append(self.gmm_estimators_[e_idx]['covars'][leaf_idx])
                self.gmm_estimators_full_['means'].append(self.gmm_estimators_[e_idx]['means'][leaf_idx])
        return
Example #2
0
    def random_forest_embedding(self, data, n_estimators=30, random_state=0, max_depth=3, min_samples_leaf=1):
        """
        learn a density with random forest representation
        """
        """
        scikit-learn only supports axis-align sepration, let's first stick to this and see how it works
        """
        # n_estimators = 400
        # random_state = 0
        # max_depth = 5
        rf_mdl = RandomTreesEmbedding(
            n_estimators=n_estimators,
            random_state=random_state,
            max_depth=max_depth,
            min_samples_leaf=min_samples_leaf)
        rf_mdl.fit(data)

        indices = rf_mdl.apply(data)
        samples_by_node = defaultdict(list)
        idx_by_node = defaultdict(list)
        #kde_by_node = defaultdict(KernelDensity)

        for idx, sample, est_data in zip(range(len(data)), data, indices):
            for est_ind, leaf in enumerate(est_data):
                samples_by_node[ est_ind, leaf ].append(sample)
                idx_by_node[ est_ind, leaf ].append(idx)

        res_mdl = dict()
        res_mdl['rf_mdl'] = rf_mdl
        res_mdl['samples_dict'] = samples_by_node
        res_mdl['idx_dict'] = idx_by_node
        # res_mdl['kde_dict'] = kde_by_node
        return res_mdl
Example #3
0
def random_forest_embedding(data,
                            n_estimators=400,
                            random_state=0,
                            max_depth=5,
                            min_samples_leaf=1):
    """
    learn a density with random forest representation
    """
    """
    scikit-learn only supports axis-align sepration, let's first stick to this and see how it works
    """
    # n_estimators = 400
    # random_state = 0
    # max_depth = 5
    rf_mdl = RandomTreesEmbedding(n_estimators=n_estimators,
                                  random_state=random_state,
                                  max_depth=max_depth,
                                  min_samples_leaf=min_samples_leaf)
    rf_mdl.fit(data)

    # forestClf.fit(trainingData, trainingLabels)
    # indices = forestClf.apply(trainingData)
    # samples_by_node = defaultdict(list)
    # for est_ind, est_data in enumerate(indices.T):
    # for sample_ind, leaf in enumerate(est_data):
    # samples_by_node[ est_ind, leaf ].append(sample_ind)
    # indexOfSamples = samples_by_node[0,10]
    # # samples_by_node[treeIndex, leafIndex within that tree]
    # leafNodeSamples = trainingAngles[indexOfSamples]
    # kde = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(leafNodeSamples)

    indices = rf_mdl.apply(data)
    samples_by_node = defaultdict(list)
    idx_by_node = defaultdict(list)
    kde_by_node = defaultdict(KernelDensity)

    for idx, sample, est_data in zip(range(len(data)), data, indices):
        for est_ind, leaf in enumerate(est_data):
            samples_by_node[est_ind, leaf].append(sample)
            idx_by_node[est_ind, leaf].append(idx)

    #Kernel Density Estimation for each leaf node
    # for k,v in samples_by_node.iteritems():
    #     est_ind, leaf = k
    # params = {'bandwidth': np.logspace(-1, 1, 20)}
    # grid = GridSearchCV(KernelDensity(), params)
    # grid.fit(v)

    #     kde_by_node[ est_ind, leaf ] = grid.best_estimator_

    res_mdl = dict()
    res_mdl['rf_mdl'] = rf_mdl
    res_mdl['samples_dict'] = samples_by_node
    res_mdl['idx_dict'] = idx_by_node
    # res_mdl['kde_dict'] = kde_by_node
    return res_mdl
Example #4
0
def random_forest_embedding(data, n_estimators=400, random_state=0, max_depth=5, min_samples_leaf=1):
    """
    learn a density with random forest representation
    """
    """
    scikit-learn only supports axis-align sepration, let's first stick to this and see how it works
    """
    # n_estimators = 400
    # random_state = 0
    # max_depth = 5
    rf_mdl = RandomTreesEmbedding(
        n_estimators=n_estimators, 
        random_state=random_state, 
        max_depth=max_depth,
        min_samples_leaf=min_samples_leaf)
    rf_mdl.fit(data)
    
    # forestClf.fit(trainingData, trainingLabels)
    # indices = forestClf.apply(trainingData)
    # samples_by_node = defaultdict(list)
    # for est_ind, est_data in enumerate(indices.T):
    # for sample_ind, leaf in enumerate(est_data):
    # samples_by_node[ est_ind, leaf ].append(sample_ind)
    # indexOfSamples = samples_by_node[0,10]
    # # samples_by_node[treeIndex, leafIndex within that tree]
    # leafNodeSamples = trainingAngles[indexOfSamples]
    # kde = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(leafNodeSamples)

    indices = rf_mdl.apply(data)
    samples_by_node = defaultdict(list)
    idx_by_node = defaultdict(list)
    kde_by_node = defaultdict(KernelDensity)

    for idx, sample, est_data in zip(range(len(data)), data, indices):
        for est_ind, leaf in enumerate(est_data):
            samples_by_node[ est_ind, leaf ].append(sample)
            idx_by_node[ est_ind, leaf ].append(idx)

        
    #Kernel Density Estimation for each leaf node
    # for k,v in samples_by_node.iteritems():
    #     est_ind, leaf = k
          # params = {'bandwidth': np.logspace(-1, 1, 20)}
          # grid = GridSearchCV(KernelDensity(), params)
          # grid.fit(v)

    #     kde_by_node[ est_ind, leaf ] = grid.best_estimator_

    res_mdl = dict()
    res_mdl['rf_mdl'] = rf_mdl
    res_mdl['samples_dict'] = samples_by_node
    res_mdl['idx_dict'] = idx_by_node
    # res_mdl['kde_dict'] = kde_by_node
    return res_mdl
Example #5
0
# drop ids and get labels
labels = train.target.values
train = train.drop('id', axis=1)
train = train.drop('target', axis=1)
test = test.drop('id', axis=1)

# scale features
scaler = StandardScaler()
train = scaler.fit_transform(train.astype(float))
test = scaler.transform(test.astype(float))

# random trees embedding
rte = RandomTreesEmbedding(n_estimators = 50, verbose = 1)
rte.fit(train)
tran = rte.apply(train)

# encode labels 
lbl_enc = LabelEncoder()
labels = lbl_enc.fit_transform(labels)

# set up datasets for cross eval
x_train, x_test, y_train, y_test = train_test_split(train, labels)
#label_binary = LabelBinarizer()
#y_test = label_binary.fit_transform(y_test)

# train a random forest classifier
clf = LogisticRegression()
clf.fit(x_train, y_train)

# predict on test set
Example #6
0
## labels for the data
dic = pickle.load(open('letterdict_normalized.pickle'))
mypath = '/home/asriva20/SrivastavaA/Data/3_AD_Normal/'
names = [name for name in sorted(listdir(mypath))]
Y = [1 if n[2:8] in dic['AD'] else \
     0 if n[2:8] in  dic['Normal'] else \
    -1 for n in names]
Y = np.asarray(Y)

mat = sio.loadmat('X.mat')
X = mat['Data']
print np.shape(X)

forest = RandomTreesEmbedding(n_estimators=50, max_depth=3)
forest.fit(X)
print(forest.apply(X))
sum = 0
for tree in forest.estimators_:
    n_nodes = tree.tree_.node_count
    children_left = tree.tree_.children_left
    children_right = tree.tree_.children_right
    feature = tree.tree_.feature
    threshold = tree.tree_.threshold

    node_depth = np.zeros(shape=n_nodes)
    is_leaves = np.zeros(shape=n_nodes, dtype=bool)
    parent_id = {}
    # seed is root node and id is parent depth
    stack = [(0, -1)]

    while len(stack) > 0:
test_samples_num, feature_length = np.asarray(X_test).shape
columns = ["t{}".format(i + 1) for i in range(feature_length)]
test_labels = np.asarray(y_test).reshape(test_samples_num, 1)
test_data = np.asarray(X_test)
test_all = np.hstack([test_labels, test_data])
pd.DataFrame(data=test_all,
             columns=['label'] + columns).to_csv(args.test_original,
                                                 index_label='id')

rt = RandomTreesEmbedding(max_depth=3,
                          n_estimators=n_estimator,
                          random_state=0)

rt.fit(X_train, y_train)

X_test_embedding = rt.apply(X_test)
X_train_embedding = rt.apply(X_train)

field_num = X_train_embedding.shape[1]
field_id = ["t{}".format(i + 1) for i in range(field_num)]

leaves_num = sum([max(X_train_embedding[:, i]) for i in range(field_num)])
print("leaves num: {}".format(leaves_num))
with open('./parameters.conf', 'w') as file:
    file.write("leaves_num:{}".format(leaves_num))
train_data_path = args.train_embedding
test_data_path = args.test_embedding

print('saving RandomTreesEmbedding datasets...')
# 将训练集和测试集的Tree Embedding 保存下来
X_train_embedding_df = pd.DataFrame(data=X_train_embedding, columns=field_id)
class EnsembleIOC(BaseEstimator, RegressorMixin):
    def __init__(self,
                 n_estimators=20,
                 max_depth=5,
                 min_samples_split=10,
                 min_samples_leaf=10,
                 random_state=0,
                 em_itrs=5,
                 regularization=0.05,
                 passive_dyn_func=None,
                 passive_dyn_ctrl=None,
                 passive_dyn_noise=None,
                 verbose=False):
        '''
        n_estimators        - number of ensembled models
        ...                 - a batch of parameters used for RandomTreesEmbedding, see relevant documents
        em_itrs             - maximum number of EM iterations to take
        regularization      - small positive scalar to prevent singularity of matrix inversion
        passive_dyn_func    - function to evaluate passive dynamics; None for MaxEnt model
        passive_dyn_ctrl    - function to return the control matrix which might depend on the state...
        passive_dyn_noise   - covariance of a Gaussian noise; only applicable when passive_dyn is Gaussian; None for MaxEnt model
                                note this implies a dynamical system with constant input gain. It is extendable to have state dependent
                                input gain then we need covariance for each data point
        verbose             - output training information
        '''
        BaseEstimator.__init__(self)

        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.random_state = random_state
        self.em_itrs = em_itrs
        self.reg = regularization
        self.passive_dyn_func = passive_dyn_func
        self.passive_dyn_ctrl = passive_dyn_ctrl
        self.passive_dyn_noise = passive_dyn_noise
        self.verbose = verbose
        return

    def fit(self, X, y=None):
        '''
        y could be the array of starting state of the demonstrated trajectories/policies
        if it is None, it implicitly implies a MaxEnt model. Other wise, it serves as the feature mapping
        of the starting state. This data might also be potentially used for learning the passive dynamics
        for a pure model-free learning with some regressors and regularization.
        '''
        #check parameters...
        assert (type(self.n_estimators) == int)
        assert (self.n_estimators > 0)
        assert (type(self.max_depth) == int)
        assert (self.max_depth > 0)
        assert (type(self.min_samples_split) == int)
        assert (self.min_samples_split > 0)
        assert (type(self.min_samples_leaf) == int)
        assert (self.min_samples_leaf > 0)
        assert (type(self.em_itrs) == int)

        #an initial partitioning of data with random forest embedding
        self.random_embedding_mdl_ = RandomTreesEmbedding(
            n_estimators=self.n_estimators,
            max_depth=self.max_depth,
            min_samples_split=self.min_samples_split,
            min_samples_leaf=self.min_samples_leaf,
            random_state=self.random_state)

        #we probably do not need the data type to differentiate it is a demonstration
        #of trajectory or commanded state, do we?
        if self.passive_dyn_func is not None and self.passive_dyn_ctrl is not None and self.passive_dyn_noise is not None:
            self.random_embedding_mdl_.fit(X[:, X.shape[1] / 2:])
            indices = self.random_embedding_mdl_.apply(X[:, X.shape[1] / 2:])
            # X_tmp = np.array(X)
            # X_tmp[:, X.shape[1]/2:] = X_tmp[:, X.shape[1]/2:] - X_tmp[:, :X.shape[1]/2]
            # self.random_embedding_mdl_.fit(X_tmp)

            # indices = self.random_embedding_mdl_.apply(X_tmp)
        else:
            self.random_embedding_mdl_.fit(X)
            #figure out indices
            indices = self.random_embedding_mdl_.apply(X)

        partitioned_data = defaultdict(list)

        leaf_idx = defaultdict(set)
        weight_idx = defaultdict(float)
        #group data belongs to the same partition and have the weights...
        #is weight really necessary for EM steps? Hmm, seems to be for the initialization
        #d_idx: data index; p_idx: partition index (comprised of estimator index and leaf index)
        for d_idx, d, p_idx in zip(range(len(X)), X, indices):
            for e_idx, l_idx in enumerate(p_idx):
                partitioned_data[e_idx, l_idx].append(d)
                leaf_idx[e_idx] |= {l_idx}

            for e_idx, l_idx in enumerate(p_idx):
                weight_idx[e_idx, l_idx] = float(
                    len(partitioned_data[e_idx, l_idx])) / len(X)
                # weight_idx[e_idx, l_idx] = 1. / len(p_idx)

        #for each grouped data, solve an easy IOC problem by assuming quadratic cost-to-go function
        #note that, if the passive dynamics need to be learned, extra steps is needed to train a regressor with weighted data
        #otherwise, just a simply gaussian for each conditional probability distribution model
        self.estimators_ = []
        #another copy to store the parameters all together, for EM/evaluation on all of the models
        self.estimators_full_ = defaultdict(list)
        #<hyin/Feb-6th-2016> an estimator and leaf indexed structure to record the passive likelihood of data...
        passive_likelihood_dict = defaultdict(list)
        for e_idx in range(self.n_estimators):
            #for each estimator
            estimator_parms = defaultdict(list)
            for l_idx in leaf_idx[e_idx]:
                if self.verbose:
                    print 'Processing {0}-th estimator and {1}-th leaf...'.format(
                        e_idx, l_idx)
                #and for each data partition
                data_partition = np.array(partitioned_data[e_idx, l_idx])
                if self.passive_dyn_func is not None and self.passive_dyn_ctrl is not None and self.passive_dyn_noise is not None:
                    X_new = data_partition[:, data_partition.shape[1] / 2:]
                    X_old = data_partition[:, 0:data_partition.shape[1] / 2]
                    X_new_passive = np.array([
                        self.passive_dyn_func(X_old[sample_idx])
                        for sample_idx in range(data_partition.shape[0])
                    ])
                    passive_likelihood = _passive_dyn_likelihood(
                        X_new, X_new_passive, self.passive_dyn_noise,
                        self.passive_dyn_ctrl, self.reg)

                    weights = passive_likelihood / np.sum(passive_likelihood)
                    weighted_mean = np.sum((weights * X_new.T).T, axis=0)

                    estimator_parms['means'].append(weighted_mean)
                    estimator_parms['covars'].append(
                        _frequency_weighted_covariance(X_new,
                                                       weighted_mean,
                                                       weights,
                                                       spherical=False))

                    #for full estimators
                    self.estimators_full_['means'].append(
                        estimator_parms['means'][-1])
                    self.estimators_full_['covars'].append(
                        estimator_parms['covars'][-1])

                    #<hyin/Feb-6th-2016> also remember the data weight according to the passive likelihood
                    #this could be useful if the weights according to the passive likelihood is desired for other applications
                    #to evaluate some statistics within the data parition
                    passive_likelihood_dict[e_idx, l_idx] = weights
                else:
                    estimator_parms['means'].append(
                        np.mean(data_partition, axis=0))
                    estimator_parms['covars'].append(np.cov(data_partition.T))

                    #for full estimators
                    self.estimators_full_['means'].append(
                        estimator_parms['means'][-1])
                    self.estimators_full_['covars'].append(
                        estimator_parms['covars'][-1])

                    #for MaxEnt, uniform passive likelihood
                    passive_likelihood_dict[e_idx, l_idx] = np.ones(
                        len(data_partition)) / float(len(data_partition))

                estimator_parms['weights'].append(weight_idx[e_idx, l_idx])
                self.estimators_full_['weights'].append(
                    weight_idx[e_idx, l_idx] / float(self.n_estimators))

            self.estimators_.append(estimator_parms)
        #can stop here or go for expectation maximization for each estimator...
        if self.em_itrs > 0:
            #prepare em results for each estimator
            em_res = [
                self._em_steps(e_idx, X, y)
                for e_idx in range(self.n_estimators)
            ]
            #or do EM on the full model?
            # <hyin/Dec-2nd-2015> no, doing this seems to harm the learning as the aggregated model is really
            # complex so optimizing that model tends to overfit...
            # em_res = self._em_steps(None, X, y)
            #then use them
            self.estimators_ = em_res

        self.prepare_inv_and_constants()
        return indices, leaf_idx, passive_likelihood_dict

    def _em_steps(self, estimator_idx, X, y=None):
        #use current estimation as initialization to perform expectation-maximization
        #now reuse the procedure implemented by scikit-learn, actually a costumized implementation
        #is required if the passive dynamics also needs to be learned.
        if self.verbose:
            if estimator_idx is not None:
                print 'EM steps for the estimator {0}'.format(estimator_idx)
            else:
                print 'EM steps...'

        if self.passive_dyn_func is not None and self.passive_dyn_ctrl is not None and self.passive_dyn_noise is not None:
            #extract X_old, X_new, X_new_passive
            X_old = X[:, 0:X.shape[1] / 2]
            X_new = X[:, X.shape[1] / 2:]
            X_new_passive = np.array([
                self.passive_dyn_func(X_old[sample_idx])
                for sample_idx in range(X.shape[0])
            ])

            # EM algorithms
            current_log_likelihood = None
            # reset self.converged_ to False
            converged = False
            # this line should be removed when 'thresh' is removed in v0.18
            tol = 1e-4
            #use the internal EM steps for non-uniform passive dynamics case
            for i in range(self.em_itrs):
                prev_log_likelihood = current_log_likelihood
                # Expectation step
                log_likelihoods, responsibilities = self._do_estep(
                    estimator_idx, X_new_passive, X_new, y)
                current_log_likelihood = log_likelihoods.mean()

                if self.verbose:
                    print 'current_log_likelihood:', current_log_likelihood
                if prev_log_likelihood is not None:
                    change = abs(current_log_likelihood - prev_log_likelihood)
                    if change < tol:
                        converged = True
                        break

                # Maximization step
                if estimator_idx is not None:
                    self._do_mstep(X_new_passive, X_new, responsibilities,
                                   self.estimators_[estimator_idx])
                else:
                    self._do_mstep(X_new_passive, X_new, responsibilities,
                                   self.estimators_full_)

            if estimator_idx is None:
                res = self.estimators_full_
            else:
                res = self.estimators_[estimator_idx]
        else:
            if estimator_idx is not None:
                n_partitions = len(self.estimators_[estimator_idx]['weights'])
                #use our own initialization
                g = mixture.GMM(n_components=n_partitions,
                                n_iter=self.em_itrs,
                                init_params='',
                                covariance_type='full')
                g.means_ = np.array(self.estimators_[estimator_idx]['means'])
                g.covars_ = np.array(self.estimators_[estimator_idx]['covars'])
                g.weights_ = np.array(
                    self.estimators_[estimator_idx]['weights'])
            else:
                n_partitions = len(self.estimators_full_['weights'])
                g = mixture.GMM(n_components=n_partitions,
                                n_iter=self.em_itrs,
                                init_params='',
                                covariance_type='full')
                g.means_ = np.array(self.estimators_full_['means'])
                g.covars_ = np.array(self.estimators_full_['covars'])
                g.weights_ = np.array(self.estimators_full_['weights'])

            g.fit(X)

            #prepare to return a defaultdict
            res = defaultdict(list)
            res['means'] = list(g.means_)
            res['covars'] = list(g.covars_)
            res['weights'] = list(g.weights_)

        return res

    def _do_estep(self, estimator_idx, X_new_passive, X_new, y):
        return self._score_sample_for_passive_mdl_helper(
            estimator_idx, X_new_passive, X_new, y)

    def _do_mstep(self,
                  X_new_passive,
                  X_new,
                  responsibilities,
                  parms,
                  min_covar=1e-7):
        """
        X_new_passive    -  An array of the propagation of the old state through the passiv edynamics
        X_new            -  An array of the new states that observed  
        responsibilities -  array_like, shape (n_samples, n_components)
                            Posterior probabilities of each mixture component for each data
        """
        n_samples, n_dim = X_new.shape
        weights = responsibilities.sum(axis=0)
        weighted_X_new_sum = np.dot(responsibilities.T, X_new)
        weighted_X_new_passive_sum = np.dot(responsibilities.T, X_new_passive)
        inverse_weights = 1.0 / (weights[:, np.newaxis] + 10 * EPS)
        weighted_X_new_mean = weighted_X_new_sum * inverse_weights
        weighted_X_new_passive_mean = weighted_X_new_passive_sum * inverse_weights

        if 'weights' in parms:
            parms['weights'] = (weights / (weights.sum() + 10 * EPS) + EPS)

        # delta_X_new                 = [None] * n_samples
        # delta_X_new_passive         = [None] * n_samples
        # delta_X_new_passive_Sigma_0 = [None] * n_samples
        # one_array = np.ones(n_dim)
        # for c in range(len(parms['weights'])):
        #     delta_X_new[c]                 = X_new - weighted_X_new_mean[c]
        #     delta_X_new_passive[c]         = X_new_passive - weighted_X_new_passive_mean[c]
        #     delta_X_new_passive_Sigma_0[c] = (1./self.passive_dyn_noise * np.eye(n_dim).dot(delta_X_new_passive[c].T)).T

        # if 'covars' in parms:
        #     #now only support diagonal covariance matrix
        #     for c, old_covar in enumerate(parms['covars']):
        #         constant=np.sum(delta_X_new[c]*delta_X_new[c]*responsibilities[:, c][:, np.newaxis], axis=0)#*inverse_weights[c, 0]
        #         so_coeff=np.sum(delta_X_new_passive_Sigma_0[c]*delta_X_new_passive_Sigma_0[c]*responsibilities[:, c][:, np.newaxis], axis=0)#*inverse_weights[c, 0]
        #         #take the roots for S matrix
        #         S_k=(np.sqrt(one_array+4*so_coeff*constant)-one_array)/(2*so_coeff)
        #         #get Sigma_k from S_k through S_k^(-1) = Sigma_k^(-1) + Sigma_0^(-1)
        #         Sigma_k = 1./(1./S_k -  1./self.passive_dyn_noise * np.ones(n_dim))
        #         print S_k, Sigma_k
        #         parms['covars'][c] = np.diag(Sigma_k)
        # if 'means' in parms:
        #     for c, old_mean in enumerate(parms['means']):
        #         Sigma_k_array = np.diag(parms['covars'][c])
        #         S_k=1./Sigma_k_array + 1./self.passive_dyn_noise * np.ones(n_dim)
        #         coeff_mat = np.diag(Sigma_k_array*(1./S_k))
        #         #difference betwen X_new and X_new_passive
        #         delta_X_new_X_new_passive = X_new - (np.diag(S_k).dot(X_new_passive.T)).T
        #         parms['means'][c] = coeff_mat.dot(np.sum(delta_X_new_X_new_passive*responsibilities[:, c][:, np.newaxis]*inverse_weights[c, 0], axis=0))
        #<hyin/Oct-23rd-2015> Try the formulation from Bellman equation, this seems leading t a weighted-linear regression problem...
        # c = (X_new - X_new_passive)
        #<hyin/OCt-27th-2015> Try the closed-form solutions for a relaxed lower-bound
        # if 'means' in parms:
        #     parms['means'] = weighted_X_new_mean
        # if 'covars' in parms:
        #     for c, old_covar in enumerate(parms['covars']):
        #         data_weights = responsibilities[:, c]
        #         parms['covars'][c] = _frequency_weighted_covariance(X_new, parms['means'][c], data_weights)

        #<hyin/Nov-20th-2015> As far as I realize, the above close-form solution actually optimize a value lower than the actual objective
        #however, this approximation is not tight thus unfortunately we cannot guarantee the optimum is also obtained for the actual objective...
        #another idea is to symplify the model by only learning the mean, or say the center of the RBF function
        #the width of the RBF basis can be adapted by solving a one-dimensional numerical optimization, this should lead to
        #a generalized EM algorithm
        #<hyin/Jan-22nd-2016> note that without the adaptation of covariance, the shift of mean
        #is not that great option, so let's only keeps the weights adapatation. We need numerical optimization for the covariance adaptation
        #to see if it would help the mean shift
        if 'means' in parms:
            for c, old_mean in enumerate(parms['means']):
                Sigma_k_array = parms['covars'][c]
                # S_k = self.passive_dyn_noise * self.passive_dyn_ctrl + Sigma_k_array + 1e-5*np.eye(X_new.shape[1])
                # # coeff_mat = np.diag(Sigma_k_array*(1./S_k))
                # inv_Sigma_k_array = np.linalg.pinv(Sigma_k_array)
                # inv_Sigma_sum = np.linalg.pinv(S_k + Sigma_k_array)
                # #could use woodbury here...
                # coeff_mat = np.linalg.pinv(inv_Sigma_k_array - inv_Sigma_sum)
                # #difference betwen X_new and X_new_passive
                # delta_X_new_X_new_passive = (inv_Sigma_k_array.dot(X_new.T) - inv_Sigma_sum.dot(X_new_passive.T)).T

                # parms['means'][c] = coeff_mat.dot(np.sum(delta_X_new_X_new_passive*responsibilities[:, c][:, np.newaxis]*inverse_weights[c, 0], axis=0))

                # #another formulation? which one is correct?
                # <hyin/Dec-2nd-2015> this seems more straightforward and at least give a keep increasing likelihood
                # need to check the original formulation to see whats the problem
                inv_Sigma_k_array = np.linalg.pinv(Sigma_k_array)
                inv_Sigma_0 = np.linalg.pinv(self.passive_dyn_noise *
                                             self.passive_dyn_ctrl +
                                             self.reg * np.eye(X_new.shape[1]))
                coeff_mat = Sigma_k_array
                inv_Sigma_sum = inv_Sigma_k_array + inv_Sigma_0
                delta_X_new_X_new_passive = (
                    inv_Sigma_sum.dot(X_new.T) -
                    inv_Sigma_0.dot(X_new_passive.T)).T
                parms['means'][c] = coeff_mat.dot(
                    np.sum(delta_X_new_X_new_passive *
                           responsibilities[:, c][:, np.newaxis] *
                           inverse_weights[c, 0],
                           axis=0))
        # return

    def sample(self, n_samples=1, random_state=None):
        '''
        return samples that are synthesized from the model
        '''
        if not hasattr(self, 'estimators_'):
            print 'The model has not been trained yet...'
            return
        else:
            pass
        return

    def score(self, X, y=None):
        #take log likelihood for each estimator for a given trajectory/state
        #without considering the passive dynamics: MaxEnt model
        estimator_scores = [
            _log_multivariate_normal_density_full(
                X, np.array(self.estimators_[e_idx]['means']),
                np.array(self.estimators_[e_idx]['covars'])) +
            np.log(self.estimators_[e_idx]['weights'])
            for e_idx in range(self.n_estimators)
        ]

        # concatenate different models...
        # estimator_scores=np.concatenate(estimator_scores,axis=1)
        # res=[logsumexp(x)-np.log(1./self.n_estimators) for x in np.array(estimator_scores)]
        # another way: mean of evaluated cost functions
        # helper to evaluate a single model
        mdl_eval = lambda scores: [logsumexp(x_score) for x_score in scores]
        estimator_scores = np.array(
            [mdl_eval(scores) for scores in estimator_scores])

        responsibilities = [
            np.exp(estimator_scores[e_idx] -
                   estimator_scores[e_idx][:, np.newaxis])
            for e_idx in range(self.n_estimators)
        ]
        #average seems to be more reasonable...
        res = np.mean(estimator_scores, axis=0)
        res_responsibilities = np.mean(np.array(responsibilities), axis=0)
        return -np.array(res), res_responsibilities

    def score_samples(self, X, y=None, min_covar=1.e-7):
        #a different version to evaluate the quality/likelihood of state pairs
        if self.passive_dyn_func is not None and self.passive_dyn_ctrl is not None and self.passive_dyn_noise is not None:
            X_old = X[:, 0:X.shape[1] / 2]
            X_new = X[:, X.shape[1] / 2:]
            X_new_passive = np.array([
                self.passive_dyn_func(X_old[sample_idx])
                for sample_idx in range(X.shape[0])
            ])

            log_prob_lst = [None] * self.n_estimators
            respon_lst = [None] * self.n_estimators
            for e_idx in range(self.n_estimators):
                log_prob_lst[e_idx], respon_lst[
                    e_idx] = self._score_sample_for_passive_mdl_helper(
                        e_idx, X_new_passive, X_new, y, min_covar)
            res = -np.mean(np.array(log_prob_lst), axis=0)
            res_responsibilities = np.mean(np.array(respon_lst), axis=0)
        else:
            #this should be a trajectory/maximum ent model, use score...
            res, res_responsibilities = self.score(X, y)
        return res, res_responsibilities

    def value_eval_samples(self,
                           X,
                           y=None,
                           average=False,
                           full=True,
                           const=True):
        #switching off the constant term seems to smooth the value function
        #I don't quite understand why, my current guess is that the axis-align partition results in
        #oversized covariance matrices, making the constant terms extremely large for some partitions
        #this can be shown adding a fixed term to the covariance matrices to mitigate the singularity
        #this could be cast as a kind of regularization

        #the new switch is actually equivalent to average=True, but since the training parameters are separated
        #lets keep this ugly solution...
        n_samples, n_dim = X.shape

        if not average:
            if not full:
                weights = []
                for idx in range(self.n_estimators):
                    weights = weights + (
                        np.array(self.estimators_[idx]['weights']) /
                        self.n_estimators).tolist()
                #the real function to evaluate the value functions, which are actually un-normalized Gaussians
                def value_estimator_eval(d):
                    res = []
                    for idx in range(self.n_estimators):
                        for i, (m, c_inv) in enumerate(
                                zip(self.estimators_[idx]['means'],
                                    self.estimators_[idx]['inv_covars'])):
                            diff_data = d - m
                            res.append(
                                .5 * diff_data.dot(c_inv).dot(diff_data) +
                                self.estimators_[idx]['beta'][i] * const)
                    return np.array(res)

                res = np.array([
                    -logsumexp(-value_estimator_eval(d), b=np.array(weights))
                    for d in X
                ])
            else:
                res = np.zeros(X.shape[0])
                res_mat = np.zeros(
                    (X.shape[0], len(self.estimators_full_['means'])))
                for i, (m, c_inv) in enumerate(
                        zip(self.estimators_full_['means'],
                            self.estimators_full_['inv_covars'])):
                    diff_data = X - m
                    res_mat[:, i] = np.array([
                        e_prod.dot(e) * 0.5 +
                        self.estimators_full_['beta'][i] * const
                        for e_prod, e in zip(diff_data.dot(c_inv), diff_data)
                    ])
                for d_idx, r in enumerate(res_mat):
                    res[d_idx] = -logsumexp(-r,
                                            b=self.estimators_full_['weights'])
        else:
            #the real function to evaluate the value functions, which are actually un-normalized Gaussians
            def value_estimator_eval(idx):
                res = np.zeros(
                    (X.shape[0], len(self.estimators_[idx]['means'])))
                logsumexp_res = np.zeros(len(res))
                for i, (m, c_inv) in enumerate(
                        zip(self.estimators_[idx]['means'],
                            self.estimators_[idx]['inv_covars'])):
                    diff_data = X - m
                    res[:, i] = np.array([
                        e_prod.dot(e) * 0.5 +
                        self.estimators_[idx]['beta'][i] * const
                        for e_prod, e in zip(diff_data.dot(c_inv), diff_data)
                    ])
                for d_idx, r in enumerate(res):
                    logsumexp_res[d_idx] = -logsumexp(
                        -r, b=self.estimators_[idx]['weights'])

                return logsumexp_res

            estimator_scores = [
                value_estimator_eval(e_idx)
                for e_idx in range(self.n_estimators)
            ]
            #take average
            res = np.mean(np.array(estimator_scores), axis=0)
        return res

    def _score_sample_for_passive_mdl_helper(self,
                                             estimator_idx,
                                             X_new_passive,
                                             X_new,
                                             y,
                                             min_covar=1.e-7):
        #for the specified estimator with a passive dynamics model,
        #evaluate the likelihood for given state pairs
        #to call this, ensure passive dynamics and noise are available
        n_samples, n_dim = X_new.shape

        #incorporate the likelihood of passive dynamics - a Gaussian
        """
                        P_0(x'|x) exp^(V(x'))
        P(x'|x) = --------------------------------- = N(x', m(x), S)
                    int_x'' P_0(x''|x) exp^(V(x''))
        """
        """
        for sake of maximization step and simplicity, evaluate a lower-bound instead
        log(P(x'|x)) > -0.5 * D * log(2*pi) + 0.5*log(det(S^{-1})) -0.5*log2 + 0.5*log2 - 0.5*(x'-f(x))^TSigma^{-1}(x'-f(x)) - 0.5*(x'-mu_k)^TSimga_k^{-1}(x'-mu_k) + 0.5*(mu_k-f(x))^TM^{-1}(mu_k-f(x))
                     > -0.5 * D * log(2*pi) + 0.5*log(det(S^{-1})/2) + 0.5*log2 - 0.5*(x'-f(x))^TSigma^{-1}(x'-f(x)) - 0.5*(x'-mu_k)^TSimga_k^{-1}(x'-mu_k)
                     > -0.5 * D * log(2*pi) + 0.5*log((det(Sigma_k)^{-1}+det(Sigma_0)^{-1})/2) + 0.5*log2 - 0.5*(x'-f(x))^TSigma^{-1}(x'-f(x)) - 0.5*(x'-mu_k)^TSimga_k^{-1}(x'-mu_k) + 0.5*(mu_k-f(x))^TM^{-1}(mu_k-f(x))
                     > -0.5 * D * log(2*pi) + 0.5*log(det(Sigma_k)^{-1})/2 + 0.5*log(det(Sigma_0))/2 + 0.5*log2 - 0.5*(x'-f(x))^TSigma^{-1}(x'-f(x)) - 0.5*(x'-mu_k)^TSimga_k^{-1}(x'-mu_k) + 0.5*(mu_k-f(x))^TM^{-1}(mu_k-f(x))
        Any way to bound the last term to also make it independent from matrix other than Sigma_k?
        """

        # regularize to prevent numerical instability
        Sigma_0 = self.passive_dyn_noise * self.passive_dyn_ctrl + self.reg * np.eye(
            X_new.shape[1])
        # + 1e-2 * np.eye(X_new.shape[1])
        Sigma_0_inv = np.linalg.pinv(Sigma_0)
        if estimator_idx is not None:
            Sigma = self.estimators_[estimator_idx]['covars']
            mu = self.estimators_[estimator_idx]['means']
            w = self.estimators_[estimator_idx]['weights']
        else:
            Sigma = self.estimators_full_['covars']
            mu = self.estimators_full_['means']
            w = self.estimators_full_['weights']
        nmix = len(mu)

        log_prob = np.empty((n_samples, nmix))
        for c, (mu_k, Sigma_k) in enumerate(zip(mu, Sigma)):
            #obviously, this fraction can be optimized by exploiting the structure of covariance matrix
            #using say Cholesky decomposition
            Sigma_k_inv = np.linalg.pinv(Sigma_k)
            S_inv = Sigma_k_inv + Sigma_0_inv
            S = np.linalg.pinv(S_inv)
            try:
                S_chol = linalg.cholesky(S, lower=True)
            except linalg.LinAlgError:
                # The model is most probably stuck in a component with too
                # few observations, we need to reinitialize this components
                S_chol = linalg.cholesky(S + min_covar * np.eye(n_dim),
                                         lower=True)
            m = S.dot((Sigma_k_inv.dot(mu_k) +
                       Sigma_0_inv.dot(X_new_passive.T).T).T).T
            #fraction part of above equation
            # scale_log_det = -.5 * (np.log(2*np.pi) + np.sum(np.log(S_inv)) +
            #     2*np.sum(np.log(np.diag(Sigma_k_chol))) + np.sum(np.log(np.diag(Sigma_0))))
            # #exp() part of the above equation
            # S_sol = linalg.solve_triangular(M_chol, (X_new - X_old).T, lower=True).T

            # scale_log_rbf = -.5 * (np.sum(M_sol**2), axis=1)
            S_log_det = 2 * np.sum(np.log(np.diag(S_chol)))
            # print 'S_log_det:', S_log_det
            S_sol = linalg.solve_triangular(S_chol, (X_new - m).T,
                                            lower=True).T
            log_prob[:, c] = -.5 * (np.sum(S_sol**2, axis=1) +
                                    n_dim * np.log(2 * np.pi) + S_log_det)
        lpr = log_prob + np.log(w)
        # print 'log_prob:', log_prob
        # print 'w:', w
        # print 'lpr:', lpr
        logprob = logsumexp(lpr, axis=1)
        responsibilities = np.exp(lpr - logprob[:, np.newaxis])
        return logprob, responsibilities

    def prepare_inv_and_constants(self):
        '''
        supplement steps to prepare inverse of variance matrices and constant terms
        '''
        regularization = self.reg
        for idx in range(self.n_estimators):
            self.estimators_[idx]['inv_covars'] = [
                np.linalg.pinv(covar + np.eye(covar.shape[0]) * regularization)
                for covar in self.estimators_[idx]['covars']
            ]
            self.estimators_[idx]['beta'] = [
                .5 * np.log(
                    pseudo_determinant(covar + np.eye(covar.shape[0]) *
                                       regularization)) +
                .5 * np.log(2 * np.pi) * covar.shape[0]
                for covar in self.estimators_[idx]['covars']
            ]

        self.estimators_full_['weights'] = []
        self.estimators_full_['means'] = []
        self.estimators_full_['covars'] = []
        for e_idx in range(self.n_estimators):
            for leaf_idx in range(len(self.estimators_[e_idx]['weights'])):
                self.estimators_full_['weights'].append(
                    self.estimators_[e_idx]['weights'][leaf_idx] /
                    float(self.n_estimators))
                self.estimators_full_['covars'].append(
                    self.estimators_[e_idx]['covars'][leaf_idx])
                self.estimators_full_['means'].append(
                    self.estimators_[e_idx]['means'][leaf_idx])
                # self.estimators_full_['inv_covars'] = [ np.linalg.pinv(covar) for covar in self.estimators_full_['covars']]
                # self.estimators_full_['beta'] = [.5*np.log(pseudo_determinant(covar)) + .5*np.log(2*np.pi)*covar.shape[0] for covar in self.estimators_full_['covars']]
                self.estimators_full_['inv_covars'].append(
                    self.estimators_[e_idx]['inv_covars'][leaf_idx])
                self.estimators_full_['beta'].append(
                    self.estimators_[e_idx]['beta'][leaf_idx])
        return
Example #9
0
                random_state=0, verbose = 0).fit(data)

codewords = kmeans.cluster_centers_
codewords.shape

"""## Random Trees Embedding"""

rtree = RandomTreesEmbedding(n_estimators=1000, max_depth=70, 
                             min_samples_leaf=1, min_samples_split=2,
                             verbose=1, random_state=0)

rtree.fit(data)

# For each datapoint x in X and for each tree in the forest, 
# return the index of the leaf x ends up in.
leafs = rtree.apply(data)

leafs.shape

"""# Histogram of visual words"""

# note how many SIFTs are per image knowing there are 150 images

def count_sifts_per_image(x):
  sift = cv.xfeatures2d.SIFT_create()
  n_sift = []
  for label_img in x:
    for l in label_img:
      img = cv.cvtColor(np.array(l), cv.COLOR_RGB2GRAY) if l.mode == 'RGB' else np.array(l)
      kp, des = sift.detectAndCompute(img, None)
      n_sift.append(des.shape[0])
class EnsembleIOC(BaseEstimator, RegressorMixin):

    def __init__(self,  n_estimators=20, 
                        max_depth=5, min_samples_split=10, min_samples_leaf=10,
                        random_state=0,
                        em_itrs=5,
                        regularization=0.05,
                        passive_dyn_func=None,
                        passive_dyn_ctrl=None,
                        passive_dyn_noise=None,
                        verbose=False):
        '''
        n_estimators        - number of ensembled models
        ...                 - a batch of parameters used for RandomTreesEmbedding, see relevant documents
        em_itrs             - maximum number of EM iterations to take
        regularization      - small positive scalar to prevent singularity of matrix inversion
        passive_dyn_func    - function to evaluate passive dynamics; None for MaxEnt model
        passive_dyn_ctrl    - function to return the control matrix which might depend on the state...
        passive_dyn_noise   - covariance of a Gaussian noise; only applicable when passive_dyn is Gaussian; None for MaxEnt model
                                note this implies a dynamical system with constant input gain. It is extendable to have state dependent
                                input gain then we need covariance for each data point
        verbose             - output training information
        '''
        BaseEstimator.__init__(self)

        self.n_estimators=n_estimators
        self.max_depth=max_depth
        self.min_samples_split=min_samples_split
        self.min_samples_leaf=min_samples_leaf
        self.random_state=random_state
        self.em_itrs=em_itrs
        self.reg=regularization
        self.passive_dyn_func=passive_dyn_func
        self.passive_dyn_ctrl=passive_dyn_ctrl
        self.passive_dyn_noise=passive_dyn_noise
        self.verbose=verbose
        return

    def fit(self, X, y=None):
        '''
        y could be the array of starting state of the demonstrated trajectories/policies
        if it is None, it implicitly implies a MaxEnt model. Other wise, it serves as the feature mapping
        of the starting state. This data might also be potentially used for learning the passive dynamics
        for a pure model-free learning with some regressors and regularization.
        '''
        #check parameters...
        assert(type(self.n_estimators)==int)
        assert(self.n_estimators > 0)
        assert(type(self.max_depth)==int)
        assert(self.max_depth > 0)
        assert(type(self.min_samples_split)==int)
        assert(self.min_samples_split > 0)
        assert(type(self.min_samples_leaf)==int)
        assert(self.min_samples_leaf > 0)
        assert(type(self.em_itrs)==int)

        #an initial partitioning of data with random forest embedding
        self.random_embedding_mdl_ = RandomTreesEmbedding(
            n_estimators=self.n_estimators,
            max_depth=self.max_depth,
            min_samples_split=self.min_samples_split,
            min_samples_leaf=self.min_samples_leaf,
            random_state=self.random_state
            )

        #we probably do not need the data type to differentiate it is a demonstration
        #of trajectory or commanded state, do we?
        if self.passive_dyn_func is not None and self.passive_dyn_ctrl is not None and self.passive_dyn_noise is not None:
            self.random_embedding_mdl_.fit(X[:, X.shape[1]/2:])
            indices = self.random_embedding_mdl_.apply(X[:, X.shape[1]/2:])
            # X_tmp = np.array(X)
            # X_tmp[:, X.shape[1]/2:] = X_tmp[:, X.shape[1]/2:] - X_tmp[:, :X.shape[1]/2]
            # self.random_embedding_mdl_.fit(X_tmp)

            # indices = self.random_embedding_mdl_.apply(X_tmp)
        else:
            self.random_embedding_mdl_.fit(X)
            #figure out indices
            indices = self.random_embedding_mdl_.apply(X)

        partitioned_data = defaultdict(list)

        leaf_idx = defaultdict(set)
        weight_idx = defaultdict(float)
        #group data belongs to the same partition and have the weights...
        #is weight really necessary for EM steps? Hmm, seems to be for the initialization
        #d_idx: data index; p_idx: partition index (comprised of estimator index and leaf index)
        for d_idx, d, p_idx in zip(range(len(X)), X, indices):
            for e_idx, l_idx in enumerate(p_idx):
                partitioned_data[e_idx, l_idx].append(d)
                leaf_idx[e_idx] |= {l_idx}

            for e_idx, l_idx in enumerate(p_idx):
                weight_idx[e_idx, l_idx] = float(len(partitioned_data[e_idx, l_idx])) / len(X)
                # weight_idx[e_idx, l_idx] = 1. / len(p_idx)

        #for each grouped data, solve an easy IOC problem by assuming quadratic cost-to-go function
        #note that, if the passive dynamics need to be learned, extra steps is needed to train a regressor with weighted data
        #otherwise, just a simply gaussian for each conditional probability distribution model
        self.estimators_ = []
        #another copy to store the parameters all together, for EM/evaluation on all of the models
        self.estimators_full_ = defaultdict(list)
        #<hyin/Feb-6th-2016> an estimator and leaf indexed structure to record the passive likelihood of data...
        passive_likelihood_dict = defaultdict(list)
        for e_idx in range(self.n_estimators):
            #for each estimator
            estimator_parms = defaultdict(list)
            for l_idx in leaf_idx[e_idx]:
                if self.verbose:
                    print 'Processing {0}-th estimator and {1}-th leaf...'.format(e_idx, l_idx)
                #and for each data partition
                data_partition=np.array(partitioned_data[e_idx, l_idx])
                if self.passive_dyn_func is not None and self.passive_dyn_ctrl is not None and self.passive_dyn_noise is not None:
                    X_new         = data_partition[:, data_partition.shape[1]/2:]
                    X_old         = data_partition[:, 0:data_partition.shape[1]/2]
                    X_new_passive = np.array([self.passive_dyn_func(X_old[sample_idx]) for sample_idx in range(data_partition.shape[0])])
                    passive_likelihood = _passive_dyn_likelihood(X_new, X_new_passive, self.passive_dyn_noise, self.passive_dyn_ctrl, self.reg)

                    weights = passive_likelihood / np.sum(passive_likelihood)
                    weighted_mean = np.sum((weights*X_new.T).T, axis=0)

                    estimator_parms['means'].append(weighted_mean)
                    estimator_parms['covars'].append(_frequency_weighted_covariance(X_new, weighted_mean, weights, spherical=False))

                    #for full estimators
                    self.estimators_full_['means'].append(estimator_parms['means'][-1])
                    self.estimators_full_['covars'].append(estimator_parms['covars'][-1])

                    #<hyin/Feb-6th-2016> also remember the data weight according to the passive likelihood
                    #this could be useful if the weights according to the passive likelihood is desired for other applications
                    #to evaluate some statistics within the data parition
                    passive_likelihood_dict[e_idx, l_idx] = weights
                else:
                    estimator_parms['means'].append(np.mean(data_partition, axis=0))
                    estimator_parms['covars'].append(np.cov(data_partition.T))

                    #for full estimators
                    self.estimators_full_['means'].append(estimator_parms['means'][-1])
                    self.estimators_full_['covars'].append(estimator_parms['covars'][-1])

                    #for MaxEnt, uniform passive likelihood
                    passive_likelihood_dict[e_idx, l_idx] = np.ones(len(data_partition)) / float(len(data_partition))


                estimator_parms['weights'].append(weight_idx[e_idx, l_idx])
                self.estimators_full_['weights'].append(weight_idx[e_idx, l_idx]/float(self.n_estimators))

            self.estimators_.append(estimator_parms)
        #can stop here or go for expectation maximization for each estimator...
        if self.em_itrs > 0:
            #prepare em results for each estimator
            em_res = [self._em_steps(e_idx, X, y) for e_idx in range(self.n_estimators)]
            #or do EM on the full model?
            # <hyin/Dec-2nd-2015> no, doing this seems to harm the learning as the aggregated model is really
            # complex so optimizing that model tends to overfit...
            # em_res = self._em_steps(None, X, y)
            #then use them
            self.estimators_=em_res

        self.prepare_inv_and_constants()
        return indices, leaf_idx, passive_likelihood_dict

    def _em_steps(self, estimator_idx, X, y=None):
        #use current estimation as initialization to perform expectation-maximization
        #now reuse the procedure implemented by scikit-learn, actually a costumized implementation
        #is required if the passive dynamics also needs to be learned.
        if self.verbose:
            if estimator_idx is not None:
                print 'EM steps for the estimator {0}'.format(estimator_idx)
            else:
                print 'EM steps...'

        if self.passive_dyn_func is not None and self.passive_dyn_ctrl is not None and self.passive_dyn_noise is not None:
            #extract X_old, X_new, X_new_passive
            X_old = X[:, 0:X.shape[1]/2]
            X_new = X[:, X.shape[1]/2:]
            X_new_passive = np.array([self.passive_dyn_func(X_old[sample_idx]) for sample_idx in range(X.shape[0])])


            # EM algorithms
            current_log_likelihood = None
            # reset self.converged_ to False
            converged = False
            # this line should be removed when 'thresh' is removed in v0.18
            tol = 1e-4
            #use the internal EM steps for non-uniform passive dynamics case
            for i in range(self.em_itrs):
                prev_log_likelihood = current_log_likelihood
                # Expectation step
                log_likelihoods, responsibilities = self._do_estep(
                    estimator_idx, X_new_passive, X_new, y)
                current_log_likelihood = log_likelihoods.mean()

                if self.verbose:
                    print 'current_log_likelihood:', current_log_likelihood
                if prev_log_likelihood is not None:
                    change = abs(current_log_likelihood - prev_log_likelihood)
                    if change < tol:
                        converged = True
                        break

                # Maximization step
                if estimator_idx is not None:
                    self._do_mstep(X_new_passive, X_new, responsibilities, self.estimators_[estimator_idx])
                else:
                    self._do_mstep(X_new_passive, X_new, responsibilities, self.estimators_full_)

            if estimator_idx is None:
                res=self.estimators_full_
            else:
                res=self.estimators_[estimator_idx]
        else:
            if estimator_idx is not None:
                n_partitions=len(self.estimators_[estimator_idx]['weights'])
                #use our own initialization
                g = mixture.GMM(n_components=n_partitions, n_iter=self.em_itrs, init_params='',
                    covariance_type='full')
                g.means_=np.array(self.estimators_[estimator_idx]['means'])
                g.covars_=np.array(self.estimators_[estimator_idx]['covars'])
                g.weights_=np.array(self.estimators_[estimator_idx]['weights'])
            else:
                n_partitions=len(self.estimators_full_['weights'])
                g = mixture.GMM(n_components=n_partitions, n_iter=self.em_itrs, init_params='',
                    covariance_type='full')
                g.means_=np.array(self.estimators_full_['means'])
                g.covars_=np.array(self.estimators_full_['covars'])
                g.weights_=np.array(self.estimators_full_['weights'])

            g.fit(X)

            #prepare to return a defaultdict
            res=defaultdict(list)
            res['means']=list(g.means_)
            res['covars']=list(g.covars_)
            res['weights']=list(g.weights_)

        return res

    def _do_estep(self, estimator_idx, X_new_passive, X_new, y):
        return self._score_sample_for_passive_mdl_helper(
                    estimator_idx, X_new_passive, X_new, y)

    def _do_mstep(self, X_new_passive, X_new, responsibilities, parms, min_covar=1e-7):
        """
        X_new_passive    -  An array of the propagation of the old state through the passiv edynamics
        X_new            -  An array of the new states that observed  
        responsibilities -  array_like, shape (n_samples, n_components)
                            Posterior probabilities of each mixture component for each data
        """
        n_samples, n_dim = X_new.shape
        weights = responsibilities.sum(axis=0)
        weighted_X_new_sum = np.dot(responsibilities.T, X_new)
        weighted_X_new_passive_sum = np.dot(responsibilities.T, X_new_passive)
        inverse_weights = 1.0 / (weights[:, np.newaxis] + 10 * EPS)
        weighted_X_new_mean = weighted_X_new_sum * inverse_weights
        weighted_X_new_passive_mean = weighted_X_new_passive_sum * inverse_weights

        if 'weights' in parms:
            parms['weights'] = (weights / (weights.sum() + 10 * EPS) + EPS)

        # delta_X_new                 = [None] * n_samples
        # delta_X_new_passive         = [None] * n_samples
        # delta_X_new_passive_Sigma_0 = [None] * n_samples
        # one_array = np.ones(n_dim)
        # for c in range(len(parms['weights'])):
        #     delta_X_new[c]                 = X_new - weighted_X_new_mean[c]
        #     delta_X_new_passive[c]         = X_new_passive - weighted_X_new_passive_mean[c]
        #     delta_X_new_passive_Sigma_0[c] = (1./self.passive_dyn_noise * np.eye(n_dim).dot(delta_X_new_passive[c].T)).T

        # if 'covars' in parms:
        #     #now only support diagonal covariance matrix
        #     for c, old_covar in enumerate(parms['covars']):
        #         constant=np.sum(delta_X_new[c]*delta_X_new[c]*responsibilities[:, c][:, np.newaxis], axis=0)#*inverse_weights[c, 0]
        #         so_coeff=np.sum(delta_X_new_passive_Sigma_0[c]*delta_X_new_passive_Sigma_0[c]*responsibilities[:, c][:, np.newaxis], axis=0)#*inverse_weights[c, 0]
        #         #take the roots for S matrix
        #         S_k=(np.sqrt(one_array+4*so_coeff*constant)-one_array)/(2*so_coeff)
        #         #get Sigma_k from S_k through S_k^(-1) = Sigma_k^(-1) + Sigma_0^(-1)
        #         Sigma_k = 1./(1./S_k -  1./self.passive_dyn_noise * np.ones(n_dim))
        #         print S_k, Sigma_k
        #         parms['covars'][c] = np.diag(Sigma_k)
        # if 'means' in parms:
        #     for c, old_mean in enumerate(parms['means']):
        #         Sigma_k_array = np.diag(parms['covars'][c])
        #         S_k=1./Sigma_k_array + 1./self.passive_dyn_noise * np.ones(n_dim)
        #         coeff_mat = np.diag(Sigma_k_array*(1./S_k))
        #         #difference betwen X_new and X_new_passive
        #         delta_X_new_X_new_passive = X_new - (np.diag(S_k).dot(X_new_passive.T)).T
        #         parms['means'][c] = coeff_mat.dot(np.sum(delta_X_new_X_new_passive*responsibilities[:, c][:, np.newaxis]*inverse_weights[c, 0], axis=0))
        #<hyin/Oct-23rd-2015> Try the formulation from Bellman equation, this seems leading t a weighted-linear regression problem...
        # c = (X_new - X_new_passive)
        #<hyin/OCt-27th-2015> Try the closed-form solutions for a relaxed lower-bound
        # if 'means' in parms:
        #     parms['means'] = weighted_X_new_mean
        # if 'covars' in parms:
        #     for c, old_covar in enumerate(parms['covars']):
        #         data_weights = responsibilities[:, c]
        #         parms['covars'][c] = _frequency_weighted_covariance(X_new, parms['means'][c], data_weights)

        #<hyin/Nov-20th-2015> As far as I realize, the above close-form solution actually optimize a value lower than the actual objective
        #however, this approximation is not tight thus unfortunately we cannot guarantee the optimum is also obtained for the actual objective...
        #another idea is to symplify the model by only learning the mean, or say the center of the RBF function
        #the width of the RBF basis can be adapted by solving a one-dimensional numerical optimization, this should lead to 
        #a generalized EM algorithm
        #<hyin/Jan-22nd-2016> note that without the adaptation of covariance, the shift of mean
        #is not that great option, so let's only keeps the weights adapatation. We need numerical optimization for the covariance adaptation
        #to see if it would help the mean shift 
        if 'means' in parms:
            for c, old_mean in enumerate(parms['means']):
                Sigma_k_array = parms['covars'][c]
                # S_k = self.passive_dyn_noise * self.passive_dyn_ctrl + Sigma_k_array + 1e-5*np.eye(X_new.shape[1])
                # # coeff_mat = np.diag(Sigma_k_array*(1./S_k))
                # inv_Sigma_k_array = np.linalg.pinv(Sigma_k_array)
                # inv_Sigma_sum = np.linalg.pinv(S_k + Sigma_k_array)
                # #could use woodbury here...
                # coeff_mat = np.linalg.pinv(inv_Sigma_k_array - inv_Sigma_sum)
                # #difference betwen X_new and X_new_passive
                # delta_X_new_X_new_passive = (inv_Sigma_k_array.dot(X_new.T) - inv_Sigma_sum.dot(X_new_passive.T)).T

                # parms['means'][c] = coeff_mat.dot(np.sum(delta_X_new_X_new_passive*responsibilities[:, c][:, np.newaxis]*inverse_weights[c, 0], axis=0))

                # #another formulation? which one is correct?
                # <hyin/Dec-2nd-2015> this seems more straightforward and at least give a keep increasing likelihood
                # need to check the original formulation to see whats the problem
                inv_Sigma_k_array = np.linalg.pinv(Sigma_k_array)
                inv_Sigma_0 = np.linalg.pinv(self.passive_dyn_noise * self.passive_dyn_ctrl + self.reg*np.eye(X_new.shape[1]))
                coeff_mat = Sigma_k_array
                inv_Sigma_sum = inv_Sigma_k_array + inv_Sigma_0
                delta_X_new_X_new_passive = (inv_Sigma_sum.dot(X_new.T) - inv_Sigma_0.dot(X_new_passive.T)).T
                parms['means'][c] = coeff_mat.dot(np.sum(delta_X_new_X_new_passive*responsibilities[:, c][:, np.newaxis]*inverse_weights[c, 0], axis=0))
        # return

    def sample(self, n_samples=1, random_state=None):
        '''
        return samples that are synthesized from the model
        '''
        if not hasattr(self, 'estimators_'):
            print 'The model has not been trained yet...'
            return
        else:
            pass
        return

    def score(self, X, y=None):
        #take log likelihood for each estimator for a given trajectory/state
        #without considering the passive dynamics: MaxEnt model
        estimator_scores=[_log_multivariate_normal_density_full(
                            X,
                            np.array(self.estimators_[e_idx]['means']),
                            np.array(self.estimators_[e_idx]['covars']))
                            +np.log(self.estimators_[e_idx]['weights']) for e_idx in range(self.n_estimators)]

        # concatenate different models...
        # estimator_scores=np.concatenate(estimator_scores,axis=1)
        # res=[logsumexp(x)-np.log(1./self.n_estimators) for x in np.array(estimator_scores)]
        # another way: mean of evaluated cost functions
        # helper to evaluate a single model
        mdl_eval = lambda scores: [logsumexp(x_score) for x_score in scores]
        estimator_scores = np.array([mdl_eval(scores) for scores in estimator_scores])

        responsibilities = [np.exp(estimator_scores[e_idx] - estimator_scores[e_idx][:, np.newaxis]) for e_idx in range(self.n_estimators)]
        #average seems to be more reasonable...
        res=np.mean(estimator_scores,axis=0)
        res_responsibilities = np.mean(np.array(responsibilities), axis=0)
        return -np.array(res), res_responsibilities

    def score_samples(self, X, y=None, min_covar=1.e-7):
        #a different version to evaluate the quality/likelihood of state pairs
        if self.passive_dyn_func is not None and self.passive_dyn_ctrl is not None and self.passive_dyn_noise is not None:
            X_old = X[:, 0:X.shape[1]/2]
            X_new = X[:, X.shape[1]/2:]
            X_new_passive = np.array([self.passive_dyn_func(X_old[sample_idx]) for sample_idx in range(X.shape[0])])

            log_prob_lst = [None] * self.n_estimators
            respon_lst = [None] * self.n_estimators
            for e_idx in range(self.n_estimators):
                log_prob_lst[e_idx], respon_lst[e_idx] = self._score_sample_for_passive_mdl_helper(
                    e_idx, X_new_passive, X_new, y, min_covar)
            res = -np.mean(np.array(log_prob_lst),axis=0)
            res_responsibilities = np.mean(np.array(respon_lst), axis=0)
        else:
            #this should be a trajectory/maximum ent model, use score...
            res, res_responsibilities = self.score(X, y)
        return res, res_responsibilities 


    def value_eval_samples(self, X, y=None, average=False, full=True, const=True):
        #switching off the constant term seems to smooth the value function
        #I don't quite understand why, my current guess is that the axis-align partition results in 
        #oversized covariance matrices, making the constant terms extremely large for some partitions
        #this can be shown adding a fixed term to the covariance matrices to mitigate the singularity
        #this could be cast as a kind of regularization

        #the new switch is actually equivalent to average=True, but since the training parameters are separated
        #lets keep this ugly solution...
        n_samples, n_dim = X.shape

        if not average:
            if not full:
                weights = []
                for idx in range(self.n_estimators):
                    weights = weights + (np.array(self.estimators_[idx]['weights'])/self.n_estimators).tolist()
                #the real function to evaluate the value functions, which are actually un-normalized Gaussians
                def value_estimator_eval(d):
                    res = []
                    for idx in range(self.n_estimators):
                        for i, (m, c_inv) in enumerate(   zip(self.estimators_[idx]['means'], 
                                                    self.estimators_[idx]['inv_covars'])):
                            diff_data = d - m
                            res.append(.5*diff_data.dot(c_inv).dot(diff_data) + self.estimators_[idx]['beta'][i]*const)
                    return np.array(res)

                res = np.array([ -logsumexp(-value_estimator_eval(d), b=np.array(weights)) for d in X])
            else:
                res = np.zeros(X.shape[0])
                res_mat = np.zeros((X.shape[0], len(self.estimators_full_['means'])))
                for i, (m, c_inv)   in enumerate(   zip(self.estimators_full_['means'], 
                                                self.estimators_full_['inv_covars'])):
                    diff_data = X - m
                    res_mat[:, i] = np.array([e_prod.dot(e)*0.5 + self.estimators_full_['beta'][i]*const for e_prod, e in zip(diff_data.dot(c_inv), diff_data)])
                for d_idx, r in enumerate(res_mat):
                    res[d_idx] = -logsumexp(-r, b=self.estimators_full_['weights'])
        else:
            #the real function to evaluate the value functions, which are actually un-normalized Gaussians
            def value_estimator_eval(idx):
                res = np.zeros((X.shape[0], len(self.estimators_[idx]['means'])))
                logsumexp_res=np.zeros(len(res))
                for i, (m, c_inv) in enumerate(   zip(self.estimators_[idx]['means'], 
                                            self.estimators_[idx]['inv_covars'])):
                    diff_data = X - m
                    res[:, i] = np.array([e_prod.dot(e)*0.5 + self.estimators_[idx]['beta'][i]*const for e_prod, e in zip(diff_data.dot(c_inv), diff_data)])
                for d_idx, r in enumerate(res):
                    logsumexp_res[d_idx] = -logsumexp(-r, b=self.estimators_[idx]['weights'])

                return logsumexp_res
                
            estimator_scores = [ value_estimator_eval(e_idx) for e_idx in range(self.n_estimators) ]
            #take average
            res = np.mean(np.array(estimator_scores), axis=0)
        return res
 
    def _score_sample_for_passive_mdl_helper(self, estimator_idx, X_new_passive, X_new, y, min_covar=1.e-7):
        #for the specified estimator with a passive dynamics model,
        #evaluate the likelihood for given state pairs
        #to call this, ensure passive dynamics and noise are available
        n_samples, n_dim = X_new.shape

        #incorporate the likelihood of passive dynamics - a Gaussian
        """
                        P_0(x'|x) exp^(V(x'))
        P(x'|x) = --------------------------------- = N(x', m(x), S)
                    int_x'' P_0(x''|x) exp^(V(x''))
        """
        """
        for sake of maximization step and simplicity, evaluate a lower-bound instead
        log(P(x'|x)) > -0.5 * D * log(2*pi) + 0.5*log(det(S^{-1})) -0.5*log2 + 0.5*log2 - 0.5*(x'-f(x))^TSigma^{-1}(x'-f(x)) - 0.5*(x'-mu_k)^TSimga_k^{-1}(x'-mu_k) + 0.5*(mu_k-f(x))^TM^{-1}(mu_k-f(x))
                     > -0.5 * D * log(2*pi) + 0.5*log(det(S^{-1})/2) + 0.5*log2 - 0.5*(x'-f(x))^TSigma^{-1}(x'-f(x)) - 0.5*(x'-mu_k)^TSimga_k^{-1}(x'-mu_k)
                     > -0.5 * D * log(2*pi) + 0.5*log((det(Sigma_k)^{-1}+det(Sigma_0)^{-1})/2) + 0.5*log2 - 0.5*(x'-f(x))^TSigma^{-1}(x'-f(x)) - 0.5*(x'-mu_k)^TSimga_k^{-1}(x'-mu_k) + 0.5*(mu_k-f(x))^TM^{-1}(mu_k-f(x))
                     > -0.5 * D * log(2*pi) + 0.5*log(det(Sigma_k)^{-1})/2 + 0.5*log(det(Sigma_0))/2 + 0.5*log2 - 0.5*(x'-f(x))^TSigma^{-1}(x'-f(x)) - 0.5*(x'-mu_k)^TSimga_k^{-1}(x'-mu_k) + 0.5*(mu_k-f(x))^TM^{-1}(mu_k-f(x))
        Any way to bound the last term to also make it independent from matrix other than Sigma_k?
        """

        # regularize to prevent numerical instability
        Sigma_0 = self.passive_dyn_noise * self.passive_dyn_ctrl + self.reg*np.eye(X_new.shape[1])
        # + 1e-2 * np.eye(X_new.shape[1])
        Sigma_0_inv = np.linalg.pinv(Sigma_0)
        if estimator_idx is not None:
            Sigma   = self.estimators_[estimator_idx]['covars']
            mu      = self.estimators_[estimator_idx]['means']
            w       = self.estimators_[estimator_idx]['weights']
        else:
            Sigma   = self.estimators_full_['covars']
            mu      = self.estimators_full_['means']
            w       = self.estimators_full_['weights']
        nmix    = len(mu)

        log_prob  = np.empty((n_samples, nmix))
        for c, (mu_k, Sigma_k) in enumerate(zip(mu, Sigma)):
            #obviously, this fraction can be optimized by exploiting the structure of covariance matrix
            #using say Cholesky decomposition
            Sigma_k_inv = np.linalg.pinv(Sigma_k)
            S_inv       = Sigma_k_inv + Sigma_0_inv
            S           = np.linalg.pinv(S_inv)
            try:
                S_chol = linalg.cholesky(S, lower=True)
            except linalg.LinAlgError:
                # The model is most probably stuck in a component with too
                # few observations, we need to reinitialize this components
                S_chol = linalg.cholesky(S + min_covar * np.eye(n_dim),
                                          lower=True)
            m = S.dot((Sigma_k_inv.dot(mu_k)+Sigma_0_inv.dot(X_new_passive.T).T).T).T
            #fraction part of above equation
            # scale_log_det = -.5 * (np.log(2*np.pi) + np.sum(np.log(S_inv)) + 
            #     2*np.sum(np.log(np.diag(Sigma_k_chol))) + np.sum(np.log(np.diag(Sigma_0))))
            # #exp() part of the above equation
            # S_sol = linalg.solve_triangular(M_chol, (X_new - X_old).T, lower=True).T

            # scale_log_rbf = -.5 * (np.sum(M_sol**2), axis=1)
            S_log_det = 2 * np.sum(np.log(np.diag(S_chol)))
            # print 'S_log_det:', S_log_det
            S_sol = linalg.solve_triangular(S_chol, (X_new - m).T, lower=True).T
            log_prob[:, c] = -.5 * (np.sum(S_sol**2, axis=1) + n_dim * np.log(2 * np.pi) + S_log_det)
        lpr = log_prob + np.log(w)
        # print 'log_prob:', log_prob
        # print 'w:', w
        # print 'lpr:', lpr
        logprob = logsumexp(lpr, axis=1)
        responsibilities = np.exp(lpr - logprob[:, np.newaxis])
        return logprob, responsibilities

    def prepare_inv_and_constants(self):
        '''
        supplement steps to prepare inverse of variance matrices and constant terms
        ''' 
        regularization = self.reg
        for idx in range(self.n_estimators):
            self.estimators_[idx]['inv_covars'] = [ np.linalg.pinv(covar + np.eye(covar.shape[0])*regularization) for covar in self.estimators_[idx]['covars']]
            self.estimators_[idx]['beta'] = [.5*np.log(pseudo_determinant(covar + np.eye(covar.shape[0])*regularization)) + .5*np.log(2*np.pi)*covar.shape[0] for covar in self.estimators_[idx]['covars']]

        self.estimators_full_['weights'] = []
        self.estimators_full_['means'] = []
        self.estimators_full_['covars'] = []
        for e_idx in range(self.n_estimators):
            for leaf_idx in range(len(self.estimators_[e_idx]['weights'])):
                self.estimators_full_['weights'].append(self.estimators_[e_idx]['weights'][leaf_idx]/float(self.n_estimators))
                self.estimators_full_['covars'].append(self.estimators_[e_idx]['covars'][leaf_idx])
                self.estimators_full_['means'].append(self.estimators_[e_idx]['means'][leaf_idx])
        # self.estimators_full_['inv_covars'] = [ np.linalg.pinv(covar) for covar in self.estimators_full_['covars']]
        # self.estimators_full_['beta'] = [.5*np.log(pseudo_determinant(covar)) + .5*np.log(2*np.pi)*covar.shape[0] for covar in self.estimators_full_['covars']]
                self.estimators_full_['inv_covars'].append(self.estimators_[e_idx]['inv_covars'][leaf_idx])
                self.estimators_full_['beta'].append(self.estimators_[e_idx]['beta'][leaf_idx])
        return
            for nb in range(nb_comp):
                new_col = '{}_{:03d}'.format(k, nb + 1)
                X_train[new_col] = trans_train[:, nb]
                X_valid[new_col] = trans_valid[:, nb]
                X_test[new_col] = trans_test[:, nb]

        #known cluster
        f = 'f_clu_{:03d}'.format(n_clust)
        f_y_enc.append(f)
        X_train[f] = clust.fit_predict(X_train)
        X_valid[f] = clust.predict(X_valid)
        X_test[f] = clust.predict(X_test)

        #embed
        embed.fit(X_train)
        trans_train = embed.apply(X_train)
        trans_valid = embed.apply(X_valid)
        trans_test = embed.apply(X_test)

        for tree in range(trans_train.shape[1]):
            f = 'f_embed_{:04d}'.format(tree)
            f_y_enc.append(f)
            leaf_lbl = LabelEncoder()
            leaf_train = trans_train[:, tree].tolist()
            leaf_valid = trans_valid[:, tree].tolist()
            leaf_test = trans_test[:, tree].tolist()

            leaf_lbl.fit(leaf_train + leaf_valid + leaf_test)
            X_train[f] = leaf_lbl.transform(leaf_train)
            X_valid[f] = leaf_lbl.transform(leaf_valid)
            X_test[f] = leaf_lbl.transform(leaf_test)
Example #12
0
class RecForest(object):
    """
    Implementation of RecForest for Anomaly Detection.

    Parameters
    ----------
    n_estimators : int, default=100
        The number of decision trees in the forest.
    max_depth : int, default=None
        The maximum depth of decision trees in the forest. ``None`` means no
        limitation on the maximum tree depth.
    n_jobs : int, default=None
        The number of jobs to run in parallel for both `fit` and `transform`.
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend`
        context. ``-1`` means using all processors.
    random_state : int or None, default=None
        - If ``int``, ``random_state`` is the seed used by the internal random
          number generator;
        - If ``None``, the random number generator is the RandomState
          instance used by `np.random`.

    Attributes
    ----------
    estimator_ : RandomTreesEmbedding
        The backbone model of RecForest.
    """
    def __init__(self,
                 n_estimators=100,
                 max_depth=None,
                 n_jobs=None,
                 random_state=None):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.n_jobs = n_jobs
        self.random_state = random_state

        self.estimator_ = RandomTreesEmbedding(n_estimators=self.n_estimators,
                                               max_depth=self.max_depth,
                                               n_jobs=self.n_jobs,
                                               random_state=random_state)

    def _rec_error(self, X, X_rec):
        """
        Compute the reconstruction error given the original sample and the
        reconstructed sample.
        """
        assert X.shape == X_rec.shape
        rec_error = np.sum(np.square(X - X_rec), axis=1)

        return rec_error

    def _init_bound(self, X):
        """Initialize the bounding boxes."""
        n_samples, _ = X.shape
        lower_bound = np.repeat(self.amin, n_samples, axis=0)
        upper_bound = np.repeat(self.amax, n_samples, axis=0)

        return lower_bound, upper_bound

    def _transform(self, X):
        """Generate reconstructed samples from the bounding boxes."""
        rets = Parallel(n_jobs=self.n_jobs)(
            delayed(_parallel_transform)(X, tree, idx, self.amin, self.amax)
            for idx, tree in enumerate(self.estimator_.estimators_))

        # Merge results from workers
        lower_bound, upper_bound = self._init_bound(X)
        for tree_lower, tree_upper in rets:
            lower_bound = np.maximum(lower_bound, tree_lower)
            upper_bound = np.minimum(upper_bound, tree_upper)

        X_rec = .5 * lower_bound + .5 * upper_bound

        return X_rec

    def fit(self, X):
        """
        Build the RecForest from the training set X.

        Parameters
        ----------
        X : ndarray of shape (n_samples, n_features)
            The input training data.
        """

        # C-aligned
        if not X.flags["C_CONTIGUOUS"]:
            X = np.ascontiguousarray(X)

        self.amax = np.amax(X, axis=0).reshape(1, -1)
        self.amin = np.amin(X, axis=0).reshape(1, -1)
        self.estimator_.fit(X)

        return self

    def apply(self, X):
        """
        Return the leaf node ID for each sample in each decision tree of the
        RecForest.

        Parameters
        ----------
        X : ndarray of shape (n_samples, n_features)
            The input data.

        Returns
        -------
        X_leaves : ndarray of shape (n_samples, n_estimators)
            The leaf node ID mat, with the i-th row corresponding to the
            leaf node of i-th sample across all decision trees.
        """
        X_leaves = self.estimator_.apply(X)
        return X_leaves

    def predict(self, X):
        """
        Predict raw anomaly scores of X using the fitted RecForest.

        Parameters
        ----------
        X : ndarray of shape (n_samples, n_features)
            The input data.

        Returns
        -------
        scores : ndarray of shape (n_samples,)
            The anomaly scores of each sample in X.
        """

        # C-aligned
        if not X.flags["C_CONTIGUOUS"]:
            X = np.ascontiguousarray(X)

        n_samples, _ = X.shape
        scores = np.zeros((n_samples, ))
        X_rec = self._transform(X)
        scores = self._rec_error(X, X_rec)

        return scores
Example #13
0
# 10. 其他特殊的API
print("子模型列表:\n{}".format(algo.estimators_))

from sklearn import tree
import pydotplus

k = 0
for algo1 in algo.estimators_:
    dot_data = tree.export_graphviz(decision_tree=algo1, out_file=None,
                                    feature_names=['A', 'B', 'C', 'D'],
                                    class_names=['1', '2', '3'],
                                    filled=True, rounded=True,
                                    special_characters=True
                                    )

    graph = pydotplus.graph_from_dot_data(dot_data)
    graph.write_png('trte_{}.png'.format(k))
    k += 1
    if k > 3:
        break

# 做一个维度扩展
print("*" * 100)
x_test2 = x_test.iloc[:2, :]
print(x_test2)
# apply方法返回的是叶子节点下标
print(algo.apply(x_test2))
# transform转换数据(其实就是apply方法+哑编码)
print(algo.transform(x_test2))
Example #14
0
class EnsembleIOCTraj(BaseEstimator, RegressorMixin):
    '''
    Handling the entire trajectories as the input
    '''
    def __init__(self,  traj_clusters=3, ti=True,
                        n_estimators=20,
                        max_depth=5, min_samples_split=10, min_samples_leaf=10, state_n_estimators=100, state_n_clusters=0,
                        random_state=0,
                        em_itrs=5,
                        regularization=0.05,
                        passive_dyn_func=None,
                        passive_dyn_ctrl=None,
                        passive_dyn_noise=None,
                        verbose=False):
        '''
        traj_clusters       - number of clusters of trajectories
        ti                  - whether or not to extract time invariant states

        ***The remained parameters are for the state ioc estimators***
        n_estimators        - number of ensembled models
        ...                 - a batch of parameters used for RandomTreesEmbedding, see relevant documents

        state_n_estimators  - number of state estimators
        state_n_clusters    - number of clusters for states for each trajectory group
        em_itrs             - maximum number of EM iterations to take
        regularization      - small positive scalar to prevent singularity of matrix inversion
        passive_dyn_func    - function to evaluate passive dynamics; None for MaxEnt model
        passive_dyn_ctrl    - function to return the control matrix which might depend on the state...
        passive_dyn_noise   - covariance of a Gaussian noise; only applicable when passive_dyn is Gaussian; None for MaxEnt model
                                note this implies a dynamical system with constant input gain. It is extendable to have state dependent
                                input gain then we need covariance for each data point
        verbose             - output training information
        '''
        self.n_traj_clusters = traj_clusters
        if isinstance(state_n_clusters, int):
            state_clusters_lst = [state_n_clusters] * self.n_traj_clusters
        else:
            state_clusters_lst = state_n_clusters

        self.eioc_mdls = [ EnsembleIOC( n_estimators=state_n_estimators,
                                        max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, clustering=state_n_clusters,  #let random embedding decides how many clusters we should have
                                        random_state=random_state,
                                        em_itrs=em_itrs,
                                        regularization=regularization,
                                        passive_dyn_func=passive_dyn_func,
                                        passive_dyn_ctrl=passive_dyn_ctrl,
                                        passive_dyn_noise=passive_dyn_noise,
                                        verbose=verbose) for i in range(self.n_traj_clusters) ]
        self.ti = ti
        self.n_estimators=n_estimators
        self.max_depth=max_depth
        self.min_samples_split=min_samples_split
        self.min_samples_leaf=min_samples_leaf
        self.random_state=random_state
        self.state_n_estimators = state_n_estimators
        self.state_n_clusters = state_n_clusters
        self.em_itrs=em_itrs
        self.reg=regularization
        self.passive_dyn_func=passive_dyn_func
        self.passive_dyn_ctrl=passive_dyn_ctrl
        self.passive_dyn_noise=passive_dyn_noise
        self.verbose=verbose

        self.clustered_trajs = None
        return

    def cluster_trajectories(self, trajs):
        #clustering the trajectories according to random embedding parameters and number of clusters
        #flatten each trajectories
        flattened_trajs = np.array([np.array(traj).T.flatten() for traj in trajs])

        #an initial partitioning of data with random forest embedding
        self.random_embedding_mdl_ = RandomTreesEmbedding(
            n_estimators=self.n_estimators,
            max_depth=self.max_depth,
            min_samples_split=self.min_samples_split,
            min_samples_leaf=self.min_samples_leaf,
            random_state=self.random_state
            )

        self.random_embedding_mdl_.fit(flattened_trajs)
        #figure out indices
        indices = self.random_embedding_mdl_.apply(flattened_trajs)

        #we need to force the data to situate in clusters with the given number and the random embeddings
        #first construct affinity
        #use extracted indices as sparse features to construct an affinity matrix
        if self.verbose:
            print 'Building {0} subset of trajectories depending on their random embedding similarity...'.format(self.n_traj_clusters)
        aff_mat = _affinity_matrix_from_indices(indices, 'binary')
        #using spectral mapping (Laplacian eigenmap)
        self.cluster = SpectralClustering(n_clusters=self.n_traj_clusters, affinity='precomputed')
        self.cluster.fit(aff_mat)

        clustered_trajs = [[] for i in range(self.n_traj_clusters)]

        for d_idx, d, p_idx in zip(range(len(trajs)), trajs, self.cluster.labels_):
            clustered_trajs[p_idx].append(d)

        #let's see how the DBSCAN works
        #here it means at least how many trajectories do we need to form a cluster
        #dont know why always assign all of the data as noise...
        # self.cluster = DBSCAN(eps=0.5, min_samples=self.n_traj_clusters, metric='euclidean', algorithm='auto')
        # flatten_trajs = [traj.T.flatten() for traj in trajs]
        # self.cluster.fit(flatten_trajs)
        # labels = self.cluster.labels_
        # print labels
        # # Number of clusters in labels, ignoring noise if present.
        # n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
        #
        # clustered_trajs = [[] for i in range(n_clusters_)]
        #
        # for d_idx, d, p_idx in zip(range(len(trajs)), trajs, labels):
        #     clustered_trajs[p_idx].append(d)

        return np.array(clustered_trajs)

    def fit(self, X, y=None):
        '''
        X is an array of trajectories
        '''
        #first cluster these trajectories to locally similar data sets (here 'locally' does not necessarily mean euclidean distance)
        clustered_trajs = self.cluster_trajectories(X)

        for i in range(len(clustered_trajs)):
            #for each clustered trajectories train the sub eioc model
            #reform the trajectories if necessary
            if not self.ti:
                #time varing system, just flatten them
                flattened_trajs = [ np.array(traj).T.flatten() in clustered_trajs[i]]
                self.eioc_mdls[i].clustering=1
                self.eioc_mdls[i].fit(flattened_trajs)
                #note the fit model retains mean and covariance of the flattened trajectories
            else:
                #time invariant
                aug_states = []
                for traj in clustered_trajs[i]:
                    for t_idx in range(len(traj)-1):
                        aug_states.append(np.array(traj)[t_idx:t_idx+2, :].flatten())

                self.eioc_mdls[i].fit(np.array(aug_states))

        self.clustered_trajs = clustered_trajs
        return

    def score(self, X, gamma=1.0, average=False):
        #score a query state
        if self.clustered_trajs is not None:
            #the model ensemble has been trained
            # score_ensemble = [np.array(model.score(X)[0]) for model in self.eioc_mdls]
            score_ensemble = [np.array(model.value_eval_samples(X,average=average)) for model in self.eioc_mdls]
            #average (maximum likelihood) or logsumexp (softmaximum -> maximum posterior)
            if gamma is None:
                res = np.mean(score_ensemble, axis=0)
            else:
                # mdl_eval = lambda scores: [logsumexp(x_score) for x_score in scores]
                res = np.array([-logsumexp(-gamma*np.array([score[sample_idx] for score in score_ensemble])) for sample_idx, sample in enumerate(X)])

        return res