Example #1
0
 def __init__(self, kernel_width, seed, nRFF, n_feat):
     rbf_sampler = RBFSampler(gamma=kernel_width, random_state=seed, n_components=nRFF)
     rbf_sampler.fit(np.zeros((1, n_feat)))
   
     self.W = rbf_sampler.random_weights_
     self.b = rbf_sampler.random_offset_
     self.nRFF = nRFF
Example #2
0
class KCCA():
    def __init__(self, n_components=256):
        self.CCA = CCA(n_components)

    ### shape of A Nxd
    def fit(self, A, B):
        A = deepcopy(A)
        B = deepcopy(B)
        self.rbf_feature_A = RBFSampler(gamma=1, n_components=len(A))
        self.rbf_feature_B = RBFSampler(gamma=1, n_components=len(B))
        self.rbf_feature_A.fit(A)
        self.rbf_feature_B.fit(B)
        A = self.rbf_feature_A.transform(A)
        B = self.rbf_feature_B.transform(B)
        self.CCA.fit(A, B)

    def transform_a(self, A):
        A = deepcopy(A)
        A = self.rbf_feature_A.transform(A)
        return self.CCA.transform_a(A)

    def transform_b(self, B):
        B = deepcopy(B)
        B = self.rbf_feature_B.transform(B)
        return self.CCA.transform_b(B)
Example #3
0
    def computeKernelMatrix(self, Graphs):
        print "Computing gram matrix"
        #self.treekernelfunction=tree_kernels_STonlyroot_FeatureVector.STKernel(self.Lambda, labels=self.labels,veclabels=self.veclabels,order=self.order)
        #Preprocessing step: approximation of RBF with explicit features.
        #Add a field to every node "veclabel_explicit_rbf"
        labels = set()
        for g in Graphs:
            for _, d in g.nodes(data=True):
                #print d
                labels.add(tuple(d['veclabel']))
        #print len(labels)
        labels_list = [list(l) for l in labels]
        ##                labels=set()
        #        labels_list=[]
        #        for g in Graphs:
        #            for _,d in g.nodes(data=True):
        #                #print d
        #                labels_list.append(list(d['veclabel']))
        #print len(labels)
        #labels_list=[list(l) for l in labels]
        print "Size of labels matrix:", len(labels_list), len(labels_list[0])
        feature_map_fourier = RBFSampler(gamma=(1.0 / len(labels_list[0])),
                                         random_state=1,
                                         n_components=self.n_comp)
        #feature_map_fourier = Nystroem(gamma=(1.0/len(labels_list[0])), random_state=1,n_components=250)

        feature_map_fourier.fit(labels_list)
        for g in Graphs:
            for n, d in g.nodes(data=True):
                g.node[n]['veclabel_rbf'] = feature_map_fourier.transform(
                    d['veclabel'])[0]  #.tolist()
        print "RBF approximation finished."
        #print Graphs[0].node[1]['veclabel_rbf']

        Gram = np.empty(shape=(len(Graphs), len(Graphs)))
        progress = 0
        FeatureMaps = []
        for i in xrange(0, len(Graphs)):
            FeatureMaps.append(
                self.generateGraphFeatureMap(Graphs[i], self.max_radius))

        print "FeatureVectors calculated"
        for i in xrange(0, len(Graphs)):
            for j in xrange(i, len(Graphs)):
                #print i,j
                progress += 1
                Gram[i][j] = self._kernelFunctionFeatureVectors(
                    FeatureMaps[i], FeatureMaps[j])
                Gram[j][i] = Gram[i][j]
                if progress % 1000 == 0:
                    print "k",
                    sys.stdout.flush()
                elif progress % 100 == 0:
                    print ".",
                    sys.stdout.flush()

        return Gram
Example #4
0
class RKHSfunction():
    # easier to use with random features
    def __init__(self, kernel_gamma, seed=1, n_feat=96):
        self.kernel_gamma = kernel_gamma
        self.n_feat = n_feat
        self.model = nn.Sequential(
            Flatten(), nn.Linear(n_feat, 1, bias=True)
        )  # random feature. the param of this models are the weights, i.e., decision var
        self.seed = seed
        self.rbf_feature = RBFSampler(
            gamma=kernel_gamma, n_components=n_feat,
            random_state=seed)  # only support Gaussian RKHS for now
        # self.rbf_feature.fit(X_example.view(X_example.shape[0], -1))

    def eval(self, X, fit=False):
        x_reshaped = (X.view(X.shape[0], -1))

        if fit:
            self.rbf_feature.fit(
                x_reshaped)  # only transform during evaluation

        if not x_reshaped.requires_grad:
            # x_feat = self.rbf_feature.fit_transform(x_reshaped)
            x_feat = self.rbf_feature.transform(
                x_reshaped)  # only transform during evaluation
            rkhsF = self.model(torch.from_numpy(x_feat).float())
        else:
            # raise NotImplementedError
            # print('need a pth impl of fit transform')
            # internally: self.fit(X, **fit_params).transform(X)
            x_detach = x_reshaped.detach()
            x_fitted = self.rbf_feature.fit(x_detach, y=None)
            # x_fitted.transform(x_reshaped)
            x_feat = pth_transform(x_fitted, x_reshaped)
            # assert torch.max(x_feat.detach() - self.rbf_feature.fit_transform(x_detach)) == 0 # there's randomness of course
            rkhsF = self.model(x_feat)[:, 0]

        return rkhsF

    def norm(self):
        return computeRKHSNorm(self.model)

    def set_seed(self, seed):
        # set the seed of RF. such as in doubly SGD
        self.seed = seed
        self.rbf_feature = RBFSampler(
            gamma=self.kernel_gamma,
            n_components=self.n_feat,
            random_state=seed)  # only support Gaussian RKHS for now

    def __call__(self, X, fit=False, random_state=False):
        if random_state is True:
            self.set_seed(
                seed=np.random)  # do a random seed reset for doubly stochastic
        # else:
        #     self.set_seed(seed=1)
        return self.eval(X, fit)
Example #5
0
    def fit(self, X, y=None):
        RBFSampler.fit(self, X=X, y=y)
        for i_pass in range(self.n_pass):
            IntLoss = numpy.zeros((self.n_components, 1))
            EnLoss = numpy.zeros((self.n_components, 1))
            for comp in range(self.n_components):
                if self.verbose:
                    print("COMPONENT %d, " % comp, end="")
                indices_minibatch = numpy.random.choice(
                    X.shape[0], self.minibatch_size)
                minibatch = X[indices_minibatch]
                gram_minibatch = rbf_kernel(minibatch, gamma=self.gamma)
                phi = self.transform(minibatch)
                diff_mat = gram_minibatch - numpy.dot(phi, phi.T)
                n_iter = 0
                err = numpy.inf
                IntLoss[comp] = self.loss_function(minibatch, gram_minibatch)
                if self.verbose:
                    print('Intial Loss', IntLoss[comp])

                while err > self.tol and n_iter < self.max_iter:
                    w_old = self.random_weights_[:, comp].copy()
                    #                    phi = self.transform(minibatch)
                    #                    diff_mat = gram_minibatch - numpy.dot(phi, phi.T)

                    wx_b = numpy.dot(minibatch, self.random_weights_[:, comp]
                                     ) + self.random_offset_[comp]
                    sin_wx = numpy.sin(wx_b).reshape((-1, 1))
                    cos_wx = numpy.cos(wx_b).reshape((-1, 1))

                    sin_cos = numpy.dot(sin_wx, cos_wx.T) * 2 / (
                        self.n_components * self.minibatch_size**2)
                    diff_sin_cos = numpy.diag(
                        numpy.dot(diff_mat, 2. * sin_cos.T)).reshape((-1, 1))
                    dl_dw = numpy.sum(diff_sin_cos * minibatch, axis=0)

                    self.random_weights_[:, comp] -= (self.alpha) * (
                        self.lbda * self.random_weights_[:, comp] + dl_dw)

                    if self.update_b:
                        dl_db = numpy.sum(diff_sin_cos)
                        self.random_offset_[comp] -= self.alpha * dl_db
                    err = numpy.linalg.norm(w_old -
                                            self.random_weights_[:, comp])
                    n_iter += 1
                EnLoss[comp] = self.loss_function(minibatch, gram_minibatch)

                if self.verbose:
                    print("%d iterations" % n_iter)
                    print('End Loss', EnLoss[comp])
                    time.sleep(2)
                self.intial_loss = IntLoss
                self.end_loss = EnLoss
        return self
Example #6
0
def test_classifier_regularization(normalize, loss):
    rng = np.random.RandomState(0)
    transformer = RBFSampler(n_components=100, random_state=0, gamma=10)
    transformer.fit(X)
    X_trans = transformer.transform(X)
    if normalize:
        X_trans = StandardScaler().fit_transform(X_trans)
    y, coef = generate_target(X_trans, rng, -0.1, 0.1)
    y_train = y[:n_train]
    y_test = y[n_train:]
    y_train = np.sign(y_train)
    y_test = np.sign(y_test)
    # overfitting
    clf = SGDClassifier(transformer,
                        max_iter=500,
                        warm_start=True,
                        verbose=False,
                        fit_intercept=True,
                        loss=loss,
                        alpha=0.00001,
                        intercept_decay=1e-10,
                        random_state=0,
                        tol=0,
                        normalize=normalize)
    clf.fit(X_train[:100], y_train[:100])
    train_acc = clf.score(X_train[:100], y_train[:100])
    assert train_acc >= 0.95

    # underfitting
    clf_under = SGDClassifier(transformer,
                              max_iter=100,
                              warm_start=True,
                              verbose=False,
                              fit_intercept=True,
                              loss=loss,
                              alpha=10000,
                              random_state=0,
                              normalize=normalize)
    clf_under.fit(X_train, y_train)
    assert np.sum(clf_under.coef_**2) < np.sum(clf.coef_**2)

    # l1 regularization
    clf_l1 = SGDClassifier(transformer,
                           max_iter=100,
                           warm_start=True,
                           verbose=False,
                           fit_intercept=True,
                           loss=loss,
                           alpha=1000,
                           l1_ratio=0.9,
                           random_state=0,
                           normalize=normalize)
    clf_l1.fit(X_train, y_train)
    assert_almost_equal(np.sum(np.abs(clf_l1.coef_)), 0)
Example #7
0
    def prepare_feature_vector(self, n, H, n_samples):
        rbfs = []
        samples = [self.env.observation_space.sample() for _ in range(n_samples)]
        scaler = StandardScaler()
        samples = scaler.fit_transform(samples)
        for i in range(n):
            r = RBFSampler(n_components=H, gamma=0.8 * (1 + n))
            r.fit(samples)
            rbf = Pipeline(steps=[["scale", scaler], ["rbf", r]])
            rbfs.append(rbf)

        self.feature_generator = FeatureUnion([["rbf-{}".format(i), rbf] for (i, rbf) in enumerate(rbfs)])
class GPSamplePath(Function):
    def __init__(self, seed=1):
        self.dim = 1
        self.bounds = [[-3, 3]]
        self.y_bounds = [-2, 2]
        super().__init__(self.dim, self.bounds, seed)
        self.fit()
        self.min, self.max = self.get_min_max()
        res = minimize(self.value_std,
                       self.bounds,
                       maxf=self.dim * 1000,
                       algmethod=1)
        self.x_opt = res['x'][0]
        self.y_opt = -self.value_std(self.x_opt)

    def base_function(self, x):
        res = (6 * x - 2)**2 * np.sin(12 * x - 4)

        return res

    def fit(self):
        X = np.linspace(self.bounds[0][0], self.bounds[0][1], 3)
        Y = np.random.uniform(self.y_bounds[0], self.y_bounds[1], 3)
        X = X.reshape(-1, 1)
        self.rbf_feature = RBFSampler(gamma=1, n_components=30)
        self.rbf_feature.fit(np.atleast_2d(X[0]))
        phi_X = self.rbf_feature.transform(X)
        self.w = np.linalg.inv(phi_X.T.dot(phi_X)).dot(phi_X.T).dot(Y)

    def get_min_max(self):
        X = np.linspace(self.bounds[0][0], self.bounds[0][1], 10000)
        Y = self.value(X)
        return np.min(Y), np.max(Y)

    def value(self, x):
        x = x.reshape(-1, 1)
        res = self.rbf_feature.transform(x).dot(self.w)
        return res

    def value_std(self, x):
        res = self.value(x)
        res = (res - self.min) / (self.max - self.min)

        return res

    def get_pool(self, K):
        return np.linspace(self.bounds[0][0], self.bounds[0][1], K)

    def plot(self):
        x_range = np.linspace(self.bounds[0][0], self.bounds[0][1], 100)
        y = self.value_std(x_range)
        plt.plot(x_range, y)
        plt.show()
Example #9
0
 def build(self, input_shape):
     rbf_sampler = RBFSampler(
         gamma=self.gamma,
         n_components=self.dim,
         random_state=self.random_state)
     x = np.zeros(shape=(1, self.input_dim))
     rbf_sampler.fit(x)
     self.rff_weights = tf.Variable(
         initial_value=rbf_sampler.random_weights_,
         dtype=tf.float32,
         trainable=True,
         name="rff_weights")
     self.built = True
Example #10
0
class _RBFSamplerImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)
Example #11
0
def test_regressor_regularization(normalize, loss):
    rng = np.random.RandomState(0)
    transformer = RBFSampler(n_components=100, random_state=0, gamma=10)
    transformer.fit(X)
    X_trans = transformer.transform(X)
    if normalize:
        X_trans = StandardScaler().fit_transform(X_trans)
    y, coef = generate_target(X_trans, rng, -0.1, 0.1)
    y_train = y[:n_train]
    y_test = y[n_train:]
    # overfitting
    clf = SAGARegressor(transformer, max_iter=300, warm_start=True,
                        verbose=False, fit_intercept=True, loss=loss,
                        alpha=0.0001, intercept_decay=1e-6,
                        random_state=0, tol=0, normalize=normalize)
    clf.fit(X_train[:100], y_train[:100])
    l2 = np.mean((y_train[:100] - clf.predict(X_train[:100]))**2)
    assert l2 < 0.01

    # underfitting
    clf_under = SAGARegressor(transformer, max_iter=100, warm_start=True,
                              verbose=False, fit_intercept=True, loss=loss,
                              alpha=100000, random_state=0,
                              normalize=normalize)
    clf_under.fit(X_train, y_train)
    assert np.sum(clf_under.coef_ ** 2) < np.sum(clf.coef_ ** 2)


    # l1 regularization
    clf_l1 = SAGARegressor(transformer, max_iter=100, warm_start=True,
                           verbose=False, fit_intercept=True,
                           loss=loss, alpha=1000, l1_ratio=0.9,
                           random_state=0, normalize=normalize)
    clf_l1.fit(X_train, y_train)
    assert_almost_equal(np.sum(np.abs(clf_l1.coef_)), 0)

    # comparison with sgd
    sgd = SGDRegressor(alpha=0.01, max_iter=100, eta0=1,
                       learning_rate='constant', fit_intercept=True,
                       random_state=0)
    sgd.fit(X_trans[:n_train], y_train)
    test_l2_sgd = np.mean((y_test - sgd.predict(X_trans[n_train:]))**2)
    clf = SAGARegressor(transformer, max_iter=100, warm_start=True,
                        verbose=False, fit_intercept=True, loss=loss,
                        alpha=0.01, random_state=0, normalize=normalize,
                        )

    clf.fit(X_train, y_train)
    test_l2 = np.mean((y_test - clf.predict(X_test))**2)
    assert test_l2 < test_l2_sgd
Example #12
0
    def fit(self, X, y=None):
        RBFSampler.fit(self, X=X, y=y)
        X_copy = X.copy()
        Ndata = numpy.shape(X_copy)[0]
        Xdim = numpy.shape(X_copy)[1]
        #        if self.update_b is True:
        #            WRD = numpy.concatenate((self.random_weights_, self.random_offset_[numpy.newaxis,:]), axis = 0)
        #            X_copy = numpy.concatenate((X_copy, numpy.ones((Ndata,1), dtype= numpy.float64)), axis=1)
        #        else:
        #            WRD = self.random_weights_

        #        WRD = self.random_weights_
        #        WRD = numpy.random.randn(Xdim,self.n_components)
        #        Q = numpy.linalg.qr(WRD, mode='raw')[0]
        #        S = numpy.sqrt(numpy.random.chisquare(Xdim, Xdim))
        #        weights = numpy.diag(S).dot(Q.T)
        sigma = numpy.sqrt(1 / (2 * self.gamma))
        #        self.random_weights_ = weights
        #

        #        reps = int(numpy.ceil(self.n_components / Xdim))
        #        Q = numpy.empty((Xdim, Xdim*reps))
        #
        #        for r in range(reps):
        #            #W = self.random_weights_
        #            #W = self._random.randn(Xdim, Xdim)
        #            W = numpy.random.randn(Xdim, Xdim)
        #            Q[:, (r * Xdim):((r + 1) * Xdim)] = numpy.linalg.qr(W)[0]
        #
        #        S = numpy.sqrt(numpy.random.chisquare(df=Xdim, size=Xdim))
        #        weights = numpy.diag(S).dot(Q[:, :self.n_components])
        #        sigma = numpy.sqrt(2 * self.gamma)
        #        #self.random_weights_ = (1/sigma)*weights
        #        #self.random_weights_ = numpy.sqrt(2*sigma)*weights

        or_rbf = OrthogonalRBF(Xdim=Xdim,
                               nbases=self.n_components,
                               lenscale=sigma,
                               random_state=self.random_state)
        self.weights = or_rbf.W
        self.offset = numpy.random.rand(self.n_components) * (2 * numpy.pi)
        self.random_weights_ = or_rbf.W / sigma
        #self. random_offset_ = 3.0

        return self
Example #13
0
class RBFSamplerImpl():
    def __init__(self, gamma=1.0, n_components=100, random_state=None):
        self._hyperparams = {
            'gamma': gamma,
            'n_components': n_components,
            'random_state': random_state
        }

    def fit(self, X, y=None):
        self._sklearn_model = SKLModel(**self._hyperparams)
        if (y is not None):
            self._sklearn_model.fit(X, y)
        else:
            self._sklearn_model.fit(X)
        return self

    def transform(self, X):
        return self._sklearn_model.transform(X)
Example #14
0
class ValueFunction(object):
    """
    Value Funciton approximator.
    """
    def __init__(self):
        # sampleing envrionment state in order to featurize it.
        state_samples = np.array(
            [env.observation_space.sample() for x in range(10000)])

        # Standardize features by removing the mean and scaling to unit variance
        self.scaler = StandardScaler()
        self.scaler.fit(state_samples)
        scaler_samples = scaler.transform(state_samples)

        # Approximates feature map of an RBF kernel
        # by Monte Carlo approximation of its Fourier transform.
        self.featurizer_state = RBFSampler(gamma=0.5, n_components=100)
        self.featurizer_state.fit(scaler_samples)

        # action model for SGD regressor
        self.action_models = []
        nA = env.action_space.n
        for na in range(nA):
            # Linear classifiers with SGD training.
            model = SGDRegressor(learning_rate="constant")
            model.partial_fit([self.__featurize_state(env.reset())], [0])
            self.action_models.append(model)

        # print(self.action_models)

    def __featurize_state(self, state):
        scaler_state = self.scaler.transform([state])
        return self.featurizer_state.transform(scaler_state)[0]

    def predict(self, state):
        curr_features = self.__featurize_state(state)
        action_probs = np.array(
            [m.predict([curr_features])[0] for m in self.action_models])
        # print(action_probs)
        return action_probs

    def update(self, state, action, y):
        curr_features = self.__featurize_state(state)
        self.action_models[action].partial_fit([curr_features], [y])
Example #15
0
class Model:
    def __init__(self, grid):
        # fit the featurizer to data
        samples = gather_samples(grid)
        # self.featurizer = Nystroem()
        self.featurizer = RBFSampler()
        self.featurizer.fit(samples)
        dims = self.featurizer.n_components

        # initialize linear model weights
        self.w = np.zeros(dims)

    def predict(self, s):
        x = self.featurizer.transform([s])[0]
        return x @ self.w

    def grad(self, s):
        x = self.featurizer.transform([s])[0]
        return x
Example #16
0
    def fit(self, X, y=None):
        RBFSampler.fit(self, X=X, y=y)
        Xdim = numpy.shape(X)[0]
        #or_rbf = OrthogonalRBF(Xdim= Xdim, nbases=self.n_components, lenscale = self.gamma,
        #random_state= self.random_state)
        #self. random_weights_ = or_rbf.W
        #self. random_offset_ = 3.0

        WRD = self.random_weights_
        Q = numpy.linalg.qr(WRD)[0]
        S = numpy.sqrt(numpy.random.chisquare(Xdim, Xdim))
        weights = numpy.diag(S).dot(Q)
        sigma = numpy.sqrt(2 * self.gamma)
        self.random_weights_ = numpy.sqrt(2 * sigma) * weights

        #        #if self.update_b is True:
        #            WRD = numpy.concatenate((self.random_weights_, self.random_offset_[numpy.newaxis,:]), axis = 0)
        #            X_copy = numpy.concatenate((X_copy, numpy.ones((Ndata,1), dtype= numpy.float64)), axis=1)
        #        else:
        #            WRD = self.random_weights

        return self
Example #17
0
class LinearRBF(Policy):
    '''
    RBF features
    '''
    def __init__(self, state_dim, action_dim, number_of_features):
        Policy.__init__(self, state_dim, action_dim)
        self.rbf_feature = RBFSampler(gamma=25.,
                                      n_components=number_of_features)
        self.rbf_feature.fit(np.random.randn(action_dim, state_dim))

    def set_theta(self, theta):
        self.theta = theta

    def get_action(self, state):
        features = self.rbf_feature.transform(state.reshape(1, -1))
        action = features @ self.theta[:-self.action_dim].reshape(
            -1, self.action_dim)
        action = action + self.theta[-self.action_dim:]
        return action.reshape(-1)

    def get_number_of_parameters(self):
        return self.rbf_feature.get_params().get(
            "n_components") * self.action_dim + self.action_dim
Example #18
0
def test_feature_map_equals_scikit_learn():
    sigma = 2.
    gamma = sigma**2

    N = 10
    D = 20
    m = 3
    X = np.random.randn(N, D)
    np.random.seed(1)
    omega = sigma * np.random.randn(D, m)
    u = np.random.uniform(0, 2 * np.pi, m)

    # make sure basis is the same
    np.random.seed(1)
    rbf_sampler = RBFSampler(gamma, m, random_state=1)
    rbf_sampler.fit(X)
    assert_allclose(rbf_sampler.random_weights_, omega)
    assert_allclose(rbf_sampler.random_offset_, u)

    phi_scikit = rbf_sampler.transform(X)
    phi_mine = feature_map(X, omega, u)

    assert_allclose(phi_scikit, phi_mine)
Example #19
0
class Model:
    def __init__(self, grid):
        # fit the featurizer to data
        samples = gather_samples(grid)
        # self.featurizer = Nystroem()
        self.featurizer = RBFSampler()
        self.featurizer.fit(samples)
        dims = self.featurizer.n_components

        # initialize linear model weights
        self.w = np.zeros(dims)

    def predict(self, s, a):
        sa = merge_state_action(s, a)
        x = self.featurizer.transform([sa])[0]
        return x @ self.w

    def predict_all_actions(self, s):
        return [self.predict(s, a) for a in ALL_POSSIBLE_ACTIONS]

    def grad(self, s, a):
        sa = merge_state_action(s, a)
        x = self.featurizer.transform([sa])[0]
        return x
Example #20
0
class Model:
    def __init__(self, env):
        # fit the featurizer to data
        self.env = env
        samples = gather_samples(env)
        self.featurizer = RBFSampler()
        self.featurizer.fit(samples)
        dims = self.featurizer.n_components

        # initialize linear model weights
        self.w = np.zeros(dims)

    def predict(self, s, a):
        sa = np.concatenate((s, [a]))
        x = self.featurizer.transform([sa])[0]
        return x @ self.w

    def predict_all_actions(self, s):
        return [self.predict(s, a) for a in range(self.env.action_space.n)]

    def grad(self, s, a):
        sa = np.concatenate((s, [a]))
        x = self.featurizer.transform([sa])[0]
        return x
Example #21
0
def _test_learning_kernel_with_random_feature(divergence, trans=None, rho=1):
    if trans is None:
        trans = RBFSampler(gamma=1,  random_state=0)
    trans.set_params(n_components=128)
    X_trans = trans.fit_transform(X)
    score = kernel_alignment(np.dot(X_trans, X_trans.T), y, False)
    lkrf = LearningKernelwithRandomFeature(trans, warm_start=False,
                                           divergence=divergence,
                                           eps_abs=1e-6, eps_rel=1e-6,
                                           max_iter=100, rho=rho)
    X_trans = lkrf.fit_transform(X, y)
    score_lkrf = kernel_alignment(np.dot(X_trans, X_trans.T), y, False)
    assert score_lkrf >= score
    assert_almost_equal(np.sum(lkrf.importance_weights_), 1)
    assert np.min(lkrf.importance_weights_) >= 0
 
    # weak constrain: rho = 10*rho
    trans.fit(X)
    lkrf = LearningKernelwithRandomFeature(trans, warm_start=False,
                                           divergence=divergence, 
                                           eps_abs=1e-6, eps_rel=1e-6,
                                           max_iter=100, rho=rho*20)
    X_trans = lkrf.fit_transform(X, y)
    score_lkrf_weak = kernel_alignment(np.dot(X_trans, X_trans.T), y, False)
    print(score_lkrf_weak, score_lkrf, score)
    assert score_lkrf_weak >= score_lkrf

    # remove bases
    n_nz = np.sum(lkrf.importance_weights_ != 0)
    print(n_nz)

    if lkrf.remove_bases():
        X_trans_removed = lkrf.transform(X)
        assert_almost_equal(X_trans_removed.shape[1], n_nz)
        indices = np.nonzero(lkrf.importance_weights_)[0]
        assert_almost_equal(X_trans_removed, X_trans[:, indices])
Example #22
0
class PCPGAgent(BaseAgent):
    def __init__(self, config):
        BaseAgent.__init__(self, config)
        self.config = config
        self.task = config.task_fn()
        self.network, self.optimizer, self.replay_buffer, self.density_model = dict(), dict(), dict(), dict()
        self.replay_buffer_actions = dict()
        self.replay_buffer_infos = dict()

        # create policy networks for explore, exploit and rollin phases
        for mode in ['explore', 'exploit', 'rollin']:
            self.network[mode] = config.network_fn()
            self.replay_buffer[mode] = []
            self.replay_buffer_actions[mode] = []
            self.replay_buffer_infos[mode] = []
            
        self.optimizer['explore'] = config.optimizer_fn(self.network['explore'].parameters())
        self.optimizer['exploit'] = config.optimizer_fn(self.network['exploit'].parameters())
        self.total_steps = 0
        self.states = self.task.reset()
        self.states = config.state_normalizer(self.states)

        # list to store policies in the policy cover
        self.policy_mixture = [copy.deepcopy(self.network['explore'].state_dict())]

        # each policy will have its own optimizer
        self.policy_mixture_optimizers = [copy.deepcopy(self.optimizer['explore'].state_dict())]

        # weights among the policies in the cover, which is uses to sample
        self.policy_mixture_weights = torch.tensor([1.0])
        
        self.policy_mixture_returns = []
        self.timestamp = None

        # define exploration reward bonus
        if self.config.bonus == 'rnd':
            # RND bonus
            self.rnd_network = FCBody(self.config.state_dim).to(Config.DEVICE)
            self.rnd_pred_network = FCBody(self.config.state_dim).to(Config.DEVICE)
            self.rnd_optimizer = torch.optim.RMSprop(self.rnd_pred_network.parameters(), 0.001)
        elif self.config.bonus == 'randnet-kernel-s':
            # random network kernel mapping states to features
            if self.config.game == 'maze':
                self.kernel = ConvFCBodyMaze(size=config.maze_size, in_channels = 3, phi_dim = self.config.phi_dim).to(Config.DEVICE)
            else:
                self.kernel = FCBody(self.config.state_dim, hidden_units=(self.config.phi_dim, self.config.phi_dim)).to(Config.DEVICE)
        elif self.config.bonus == 'rbf-kernel':
            # RBF kernel
            self.rbf_feature = RBFSampler(gamma=1, random_state=1, n_components=self.config.phi_dim)
            if isinstance(self.task.action_space, Box):
                self.rbf_feature.fit(X = np.random.randn(5, self.config.state_dim + self.config.action_dim))
            else:
                self.rbf_feature.fit(X = np.random.randn(5, self.config.state_dim + 1))

        if isinstance(self.task.action_space, Box):
            self.uniform_prob = self.continous_uniform_prob()
        else:
            self.uniform_prob = 1./self.config.action_dim

    # takes as input a minibatch of states (and possibly actions), returns exploration reward for each
    def compute_reward_bonus(self, states, actions = None):
        if self.config.bonus == 'rnd':
            states = torch.from_numpy(states).float().to(Config.DEVICE)
            rnd_target = self.rnd_network(states).detach()
            rnd_pred = self.rnd_pred_network(states).detach()
            rnd_loss = F.mse_loss(rnd_pred, rnd_target, reduction='none').mean(1)
            reward_bonus = rnd_loss.cpu().numpy()

        elif 'randnet-kernel' in self.config.bonus:
            phi = self.compute_kernel(tensor(states), actions)
            reward_bonus = torch.sqrt((torch.mm(phi, self.density_model) * phi).sum(1)).detach()
            
        elif 'rbf-kernel' in self.config.bonus:
            assert actions is not None
            phi = self.compute_kernel(tensor(states), tensor(actions))
            reward_bonus = torch.sqrt((torch.mm(phi, self.density_model) * phi).sum(1)).detach()
            
        elif 'id-kernel' in self.config.bonus:
            phi = self.compute_kernel(tensor(states), actions)
            reward_bonus = torch.sqrt((torch.mm(phi, self.density_model) * phi).sum(1)).detach()

            
        elif 'counts' in self.config.bonus: # can use ground truth counts in combolock for debugging
            reward_bonus = []
            for s in self.config.state_normalizer(states):
                s = tuple(s)
                if not s in self.density_model['explore'].keys():
                    cnts = 0
                else:
                    cnts = self.density_model['explore'][s]
                if self.config.bonus == 'counts':
                    reward_bonus.append(1.0/(1.0 + cnts))
                elif self.config.bonus == 'counts-sqrt':
                    reward_bonus.append(1.0/math.sqrt(1.0 + cnts))
                    
            reward_bonus = np.array(reward_bonus)
            
        return reward_bonus



    def time(self, tag=''):
        if self.time is None or tag=='reset':
            self.timestamp = time()
        else:
            t = time()
            print(f'{tag} took {t - self.timestamp:.4f}s')
            self.timestamp = t
            
    # gather trajectories following a policy and return them in a buffer.
    # explore mode uses exploration bonus as reward, exploit uses environment reward
    # can specify whether to roll in using policy mixture, or instead use the latest policy
    def gather_trajectories(self, roll_in=True, add_bonus_reward=True, debug=False, mode=None, record_return=False):
        config = self.config
        states = self.states
        network = self.network[mode]

        roll_in_length = 0 if (debug or not roll_in) else random.randint(0, config.horizon - 1)
        roll_out_length = config.horizon - roll_in_length
        storage = Storage(roll_out_length)

        if roll_in_length > 0:
            assert roll_in
            # Sample previous policy to roll in
            i = torch.multinomial(self.policy_mixture_weights.cpu(), num_samples=1)
            self.network['rollin'].load_state_dict(self.policy_mixture[i])

            # Roll in
            for _ in range(roll_in_length):
                prediction = self.network['rollin'](states)
                next_states, rewards, terminals, info = self.task.step(to_np(prediction['a']))
                if self.config.game == 'maze':
                    for i in info:
                        self.unique_pos.add(tuple(i['agent_pos']))            
                
                next_states = config.state_normalizer(next_states)
                states = next_states
                self.total_steps += config.num_workers

        # Roll out
        for i in range(roll_out_length):
            if i == 0 and roll_in: #if roll-in is false, then we ignore epsilon greedy and simply roll-out the current policy
                # we are using \hat{\pi}
                sample_eps_greedy = random.random() < self.config.eps
                if sample_eps_greedy:
                    if isinstance(self.task.action_space, Discrete):
                        actions = torch.randint(self.config.action_dim, (states.shape[0],)).to(Config.DEVICE)
                    elif isinstance(self.task.action_space, Box):
                        actions = self.uniform_sample_cont_random_acts(states.shape[0])
                    prediction = network(states, tensor(actions))
                else:
                    prediction = network(states)
                #update the log_prob_a by including the epsilon_greed
                prediction['log_pi_a'] = (prediction['log_pi_a'].exp() * (1.-self.config.eps) + self.config.eps*self.uniform_prob).log()
            else:
                # we are using \pi
                prediction = network(states)

            next_states, rewards, terminals, info = self.task.step(to_np(prediction['a']))

            if self.config.game == 'maze':
                for i in info:
                    self.unique_pos.add(tuple(i['agent_pos']))            

            if add_bonus_reward:
                s = config.state_normalizer(states)
                reward_bonus = self.config.reward_bonus_normalizer(self.compute_reward_bonus(s,to_np(prediction['a'])))
                rewards = self.config.bonus_coeff*self.config.horizon*reward_bonus
                assert(all(rewards >= 0))

            if record_return:
                self.record_online_return(info)

            rewards = config.reward_normalizer(rewards)
            next_states = config.state_normalizer(next_states)
            storage.add(prediction)
            storage.add({'r': tensor(rewards).unsqueeze(-1),
                         'm': tensor(1 - terminals).unsqueeze(-1),
                         'i': list(info), 
                         's': tensor(states)})
            states = next_states
            self.total_steps += config.num_workers


#        assert(np.array(terminals).all()) # debug
        self.states = states
        prediction = network(states)
        storage.add(prediction)
        storage.placeholder()

        advantages = tensor(np.zeros((config.num_workers, 1)))
        returns = prediction['v'].detach()
        for i in reversed(range(roll_out_length)):
            returns = storage.r[i] + config.discount * storage.m[i] * returns
            if not config.use_gae:
                advantages = returns - storage.v[i].detach()
            else:
                td_error = storage.r[i] + config.discount * storage.m[i] * storage.v[i + 1] - storage.v[i]
                advantages = advantages * config.gae_tau * config.discount * storage.m[i] + td_error
            storage.adv[i] = advantages.detach()
            storage.ret[i] = returns.detach()

        return storage

    def log(self, s):
        logtxt(self.logger.log_dir + '.txt', s, show=True, date=False)


    # compute the mapping from states (and possibly actions) to features
    def compute_kernel(self, states, actions = None):
        actions_one_hot = tensor(np.eye(self.config.action_dim)[actions])
#        state_actions = torch.cat((tensor(states).to(Config.DEVICE), actions_one_hot), dim=1)
        
        if self.config.bonus == 'randnet-kernel-s':
            phi = F.normalize(self.kernel(tensor(states).to(Config.DEVICE)), p=2, dim=1)
        elif self.config.bonus == 'randnet-kernel-sa':
            phi = F.normalize(self.kernel(state_actions), p=2, dim=1)
        elif self.config.bonus == 'id-kernel-s':
            phi = states.to(Config.DEVICE)
        elif self.config.bonus == 'id-kernel-sa':
            phi = state_actions
        elif self.config.bonus == 'rbf-kernel':
            assert actions is not None
            if actions is None:
                phi = self.rbf_feature.transform(states.cpu().numpy())
                phi = torch.tensor(phi).to(Config.DEVICE)
            else:
                #concatenate state and action features
                np_states = states.cpu().numpy()
                np_actions = actions.cpu().numpy()
                if isinstance(self.task.action_space, Discrete):
                    np_actions = np.expand_dims(np_actions, axis = 1)
                assert np_actions.ndim == 2 and np_actions.shape[0] == np_states.shape[0] 
                states_acts_cat = np.concatenate((np_states, self.clip_actions(np_actions)), axis = 1)
                phi = self.rbf_feature.transform(states_acts_cat)
                phi = torch.tensor(phi).to(Config.DEVICE)
        else:
            raise NotImplementedError
        return phi


    # for visualizing visitations in combolock
    def log_visitations(self, visitations):
        self.log('lock1')
        self.log(np.around(visitations[0], 3))
        self.log('lock2')
        self.log(np.around(visitations[1], 3))

    # turn count-based density model into visitation table
    def compute_state_visitations(self, density_model, use_one_hot=False):
        locks = [np.zeros((3, self.config.horizon-1)), np.zeros((3, self.config.horizon-1))]
        N = sum(list(density_model.values()))
        for state in density_model.keys():
            if use_one_hot:
                k = np.argmax(state)
                (s, l, h) = np.unravel_index(k , (3, 3, self.config.horizon))
                if l in [0, 1]:
                    locks[l][s][h] += float(density_model[state]) / N
            else:
                if not all(np.array(state)==0.0):
                    s = np.argmax(state[:3])
                    l = int(state[-1])
                    h = np.argmax(state[3:-1])
                    locks[l][s][h] += float(density_model[state]) / N
        return locks
    
        
    # update the density model using data from replay buffer.
    # also computes covariance matrices for kernel case. 
    def update_density_model(self, mode=None):
        replay_buffer = self.replay_buffer[mode]
        replay_buffer_act = self.replay_buffer_actions[mode]
        states = torch.cat(sum(replay_buffer, []))
        actions = torch.cat(sum(replay_buffer_act,[]))
        
        if self.config.bonus == 'rnd':
            states = states.to(Config.DEVICE)
            targets = self.rnd_network(states).detach()
            data = DataLoader(TensorDataset(states, targets), batch_size = 100, shuffle=True)

            for i in range(1):
                total_loss = 0
                losses = []
                for j, batch in enumerate(data):
                    self.rnd_optimizer.zero_grad()
                    pred = self.rnd_pred_network(batch[0])
                    loss = F.mse_loss(pred, batch[1], reduction='none')
                    (loss.mean()).backward()
                    self.rnd_optimizer.step()
                    total_loss += loss.mean().item()
                    losses.append(loss)
                print(f'[RND loss: {total_loss / j:.5f}]')
            bonuses = torch.cat(losses).view(-1)
        
        elif self.config.bonus == 'rbf-kernel':
            N = states.shape[0]
            ind = np.random.choice(N, min(2000, N), replace=False)
            pdists = scipy.spatial.distance.pdist((states.cpu().numpy())[ind])
            self.rbf_feature.gamma = 1./(np.median(pdists)**2)
            phi = self.compute_kernel(states, actions = actions)
            n, d = phi.shape
            sigma = torch.mm(phi.t(), phi) + self.config.ridge*torch.eye(d).to(Config.DEVICE)
            self.density_model = torch.inverse(sigma).detach()

            covariance_matrices = []
            assert len(replay_buffer) == len(replay_buffer_act)
            for i in range(len(replay_buffer)):
                states = torch.cat(replay_buffer[i])
                actions = torch.cat(replay_buffer_act[i])
                phi = self.compute_kernel(states,actions)
                n, d = phi.shape
                sigma = torch.mm(phi.t(), phi) + self.config.ridge*torch.eye(d).to(Config.DEVICE)
                covariance_matrices.append(sigma.detach())
            m = 0
            for matrix in covariance_matrices:
                m = max(m, matrix.max())
            covariance_matrices = [matrix / m for matrix in covariance_matrices]

        elif 'kernel' in self.config.bonus:
            N = states.shape[0]
            phi = self.compute_kernel(states, actions)
            n, d = phi.shape
            sigma = torch.mm(phi.t(), phi) + self.config.ridge*torch.eye(d).to(Config.DEVICE)
            self.density_model = torch.inverse(sigma).detach()

            covariance_matrices = []
            assert len(replay_buffer) == len(replay_buffer_act)
            for i in range(len(replay_buffer)):
                states = torch.cat(replay_buffer[i])
                actions = torch.cat(replay_buffer_act[i])
                phi = self.compute_kernel(states, actions)
                n, d = phi.shape
                sigma = torch.mm(phi.t(), phi) + self.config.ridge*torch.eye(d).to(Config.DEVICE)
                covariance_matrices.append(sigma.detach().cpu())
            m = 0
            for matrix in covariance_matrices:
                m = max(m, matrix.max())
            covariance_matrices = [matrix / m for matrix in covariance_matrices]

            
        
        elif 'counts' in self.config.bonus:
            states = [tuple(s) for s in states.numpy()]
            unique_states = list(set(states))
            self.density_model[mode] = dict(zip(unique_states, [0] * len(unique_states)))
            for s in states: self.density_model[mode][s] += 1
            bonuses = torch.tensor([1.0/self.density_model[mode][s] for s in states])
            covariance_matrices, visitations = [], []
            for i, states in enumerate(replay_buffer):
                states = [tuple(s) for s in torch.cat(states).numpy()]
                density_model = dict(zip(unique_states, [0] * len(unique_states)))
                for s in states: density_model[s] += 1
                sums=torch.tensor([density_model[s] for s in unique_states]).float()
                covariance_matrices.append(torch.diag(sums) + torch.eye(len(unique_states)))
                visitations.append(self.compute_state_visitations(density_model))

            m = 0
            for matrix in covariance_matrices:
                m = max(m, matrix.max())
            covariance_matrices = [matrix / m for matrix in covariance_matrices]

        if mode == 'explore': self.optimize_policy_mixture_weights(covariance_matrices)

        # for combolock, compute the visitations for each policy
        if 'combolock' in self.config.game:

            visitations = []
            states = torch.cat(sum(replay_buffer, []))
            states = [tuple(s) for s in states.numpy()]
            unique_states = list(set(states))
            for i, states in enumerate(self.replay_buffer[mode]):
                states = [tuple(s) for s in torch.cat(states).numpy()]
                density_model = dict(zip(unique_states, [0] * len(unique_states)))
                for s in states: density_model[s] += 1
#                visitations.append(self.compute_state_visitations(self.replay_buffer_infos[mode][i]))
                visitations.append(self.compute_state_visitations(density_model))
                
            if mode == 'explore':
                weighted_visitations = [np.zeros((3, self.config.horizon - 1)), np.zeros((3, self.config.horizon - 1))]
                for i in range(len(visitations)):
                    weighted_visitations[0] += self.policy_mixture_weights[i].item()*visitations[i][0]
                    weighted_visitations[1] += self.policy_mixture_weights[i].item()*visitations[i][1]

                for i in range(len(visitations)):
                    self.log(f'\nstate visitations for policy {i}:')
                    self.log_visitations(visitations[i])

                self.log(f'\nstate visitations for weighted policy mixture:')
                self.log_visitations(weighted_visitations)
                
            elif mode == 'exploit':
                self.log(f'\nstate visitations for exploit policy:')
                self.log_visitations(visitations[-1])

        self.reward_bonus_normalizer= RescaleNormalizer()



    # optimize policy mixture weights using log-determinant loss
    def optimize_policy_mixture_weights(self, covariance_matrices):
        d = covariance_matrices[0].shape[0]
        N = len(covariance_matrices)
        if N == 1:
            self.policy_mixture_weights = torch.tensor([1.0])
        else:
            self.log_alphas = nn.Parameter(torch.randn(N))
            opt = torch.optim.Adam([self.log_alphas], lr=0.001)
            for i in range(5000):
                opt.zero_grad()
                sigma_weighted_sum = torch.zeros(d, d)
                for n in range(N):
                    sigma_weighted_sum += F.softmax(self.log_alphas, dim=0)[n]*covariance_matrices[n]
                loss = -torch.logdet(sigma_weighted_sum)
                if math.isnan(loss.item()):
                    pdb.set_trace()
                if not i % 500:
                    print(f'optimizing log det, loss={loss.item()}')
                loss.backward()
                opt.step()
            with torch.no_grad():
                self.policy_mixture_weights = F.softmax(self.log_alphas, dim=0)
        self.log(f'\npolicy mixture weights: {self.policy_mixture_weights.numpy()}')


    # roll out using explore/exploit policies and store data in replay buffer
    def update_replay_buffer(self):
        print('[gathering trajectories for replay buffer]')
        for mode in ['explore', 'exploit']:
            states, actions, returns, infos = [], [], [], []
            for _ in range(self.config.n_rollouts_for_density_est):
                new_traj = self.gather_trajectories(roll_in=False, add_bonus_reward=False, mode=mode,
                                                    record_return=(mode=='exploit'))            
                states += new_traj.cat(['s'])
                returns += new_traj.cat(['r'])
                actions += new_traj.cat(['a']) #append actions as well
                infos += new_traj.i

            mean_return = torch.cat(returns).cpu().mean()*self.config.horizon
            if mode == 'explore':
                self.policy_mixture_returns.append(mean_return.item())
                self.log(f'[policy mixture returns: {np.around(self.policy_mixture_returns, 3)}]')
            states = [s.cpu() for s in states]
            print(f'return ({mode}): {mean_return}')
            self.replay_buffer[mode].append(states)
 
            actions = [a.cpu() for a in actions]
            self.replay_buffer_actions[mode].append(actions)
            self.replay_buffer_infos[mode].append(sum(infos, []))
        
    # optimize explore and/or exploit policies
    def optimize_policy(self):            
        for mode in ['explore', 'exploit']:
            if mode == 'exploit' and self.epoch < self.config.start_exploit:
                continue
            for i in range(self.config.n_policy_loops):
                rewards = self.step_optimize_policy(mode=mode)
                if not i % 5: print(f'[optimizing policy ({mode}), step {i}, mean return: {rewards.mean():.5f}]')

        self.policy_mixture.append(copy.deepcopy(self.network['explore'].state_dict()))
        self.policy_mixture_optimizers.append(copy.deepcopy(self.optimizer['explore'].state_dict()))
        print(f'{len(self.policy_mixture)} policies in mixture')

        
    def initialize_new_policy(self, mode):
        self.network[mode] = self.config.network_fn()
        self.optimizer[mode] = self.config.optimizer_fn(self.network[mode].parameters())
        

    # gather a batch of data and perform some policy optimization steps
    def step_optimize_policy(self, mode=None):
        config = self.config
        network = self.network[mode]
        optimizer = self.optimizer[mode]

        states, actions, rewards, log_probs_old, returns, advantages = [], [], [], [], [], []
        self.time('reset')

        # gather the trajectories
        for i in range(self.config.n_traj_per_loop):

            #some fraction of the time, we roll-in from the policy itself (so no data is wasted), half of the time from mixture
            coin = np.random.rand()
            if coin <= (1.0-self.config.proll): #simply roll-in with the policy itself, not from mixture:
                traj = self.gather_trajectories(add_bonus_reward=(mode=='explore'), mode=mode, roll_in = False)
            else: #from mixture
                traj = self.gather_trajectories(add_bonus_reward=(mode=='explore'), mode=mode, roll_in = True)

            states += traj.cat(['s'])
            actions += traj.cat(['a'])
            log_probs_old += traj.cat(['log_pi_a'])
            returns += traj.cat(['ret'])
            rewards += traj.cat(['r'])
            advantages += traj.cat(['adv'])
#        self.time('gathering trajectories')
        states = torch.cat(states, 0)
        actions = torch.cat(actions, 0)
        log_probs_old = torch.cat(log_probs_old, 0)
        returns = torch.cat(returns, 0)
        rewards = torch.cat(rewards, 0)
        advantages = torch.cat(advantages, 0)
        assert states.shape[0] == actions.shape[0] == rewards.shape[0] == advantages.shape[0] == returns.shape[0]
        
        actions = actions.detach()
        log_probs_old = log_probs_old.detach()
        advantages = (advantages - advantages.mean()) / advantages.std()

        self.time('reset')

        # optimize the policy using the gathered trajectories using PPO objective
        for _ in range(config.optimization_epochs):
            sampler = random_sample(np.arange(states.size(0)), config.mini_batch_size)
            for batch_indices in sampler:
                batch_indices = tensor(batch_indices).long()
                sampled_states = states[batch_indices]
                sampled_actions = actions[batch_indices]
                sampled_log_probs_old = log_probs_old[batch_indices]
                sampled_returns = returns[batch_indices]
                sampled_advantages = advantages[batch_indices]

                prediction = network(sampled_states, sampled_actions)

                ratio = (prediction['log_pi_a'] - sampled_log_probs_old).exp()
                obj = ratio * sampled_advantages
                obj_clipped = ratio.clamp(1.0 - self.config.ppo_ratio_clip,
                                          1.0 + self.config.ppo_ratio_clip) * sampled_advantages
                policy_loss = -torch.min(obj, obj_clipped).mean() - config.entropy_weight * prediction['ent'].mean()

                value_loss = 0.5 * (sampled_returns - prediction['v']).pow(2).mean()

                optimizer.zero_grad()
                (policy_loss + value_loss).backward()
                nn.utils.clip_grad_norm_(network.parameters(), config.gradient_clip)
                optimizer.step()
#        self.time('optimizing policy')

        return rewards.mean()


    # we clip the actions since the policy uses Gaussian distribution to sample actions
    # in the continuous case. This avoids the policy generating large actions to maximize
    # the negative log-det.
    def clip_actions(self, actions): 
        #action: numpy
        if isinstance(self.task.action_space, Box):
            #only clip in continuos setting. 
            for i in range(self.config.action_dim):
                actions[:, i] = np.clip(actions[:,i], self.task.action_space.low[i], 
                    self.task.action_space.high[i])
        return actions
        
    
    def eval_step(self, state):
        network = self.network['exploit']
        prediction = network(state)
        action = to_np(prediction['a'])
        return action


    #test function for policy: 
    def test_exploit_policy_performance(self):
        network = self.network['exploit']
        roll_in_length = self.config.horizon
        storage = Storage(roll_in_length)
        num_trajs = 0
        total_rews = 0
        states = self.task.reset() #reset environment, so roll-in from the beignning
        for i in range(roll_in_length):
            prediction = network(states)
            next_states, rewards, terminals, info = self.task.step(to_np(prediction['a']))
            num_trajs += terminals.sum()
            total_rews += rewards.sum()
        
        assert num_trajs > 0
        return total_rews / num_trajs #this may overestimates rewards...but fair for all baselines as well..
Example #23
0
X = mat[colstolearn]
X = mat[colstolearn2]
X = mat.drop(['y'], axis=1)
Y = mat['y'] / med
Y = mat['y']
Y = mat[((mat['y'] > 10000) | (mat['y'] < 0)) == False]['y'] / med
Y = np.log1p(Y)

scaler = StandardScaler()
scaler.fit(X)
joblib.dump(scaler, 'sklean_scaler1.pkl', compress=True)

X = scaler.transform(X)

rbf = RBFSampler(gamma=0.05, n_components=100)
rbf.fit(X)
X = rbf.transform(X)

X, Y, med = shuffle(X, Y, med)

offset = int(X.shape[0] * 0.2)
X_train, y_train = X[:offset], Y[:offset]
X_test, y_test = X[offset:], Y[offset:]

X_test, y_test, med_test = X[offset:], Y[offset:], med[offset:]

n_est = 80
params = {
    'loss': 'lad',
    'n_estimators': n_est,
    'max_depth': 8,
Example #24
0
class LSTDQ_Kernel():
    def __init__(self,
                 dataset,
                 obs_dim,
                 act_dim,
                 gamma,
                 horizon,
                 value_reg,
                 default_length_scale=0.2,
                 random_feature_per_obs_dim=250,
                 norm=None,
                 scale_length_adjustment='median',
                 dtype=np.float32,
                 policy_net=None,
                 separate_action_indexing=False,
                 action_encoding_scheme='continuous'):
        self.obs_dim = obs_dim
        self.act_dim = act_dim
        self.gamma = gamma
        self.horizon = horizon
        self.norm = norm
        self.policy_net = policy_net
        self.value_reg = value_reg
        self.dtype = dtype
        self.separate_action_indexing = separate_action_indexing
        self.action_encoding_scheme = action_encoding_scheme

        self.n_samples = dataset['obs'].shape[0]
        self.n_episode = dataset['init_obs'].shape[0]

        self.non_terminal_idx = (dataset['info'] == False)[:, 0]
        self.n_samples_non_terminal = self.non_terminal_idx.sum()
        self.data_acts = dataset['acts'][self.non_terminal_idx]

        if self.policy_net is not None:
            self.pi_current = self.policy_net.get_probabilities(dataset['obs'])
            self.pi_next = self.policy_net.get_probabilities(
                dataset['next_obs'])
            self.pi_init = self.policy_net.get_probabilities(
                dataset['init_obs'])
            self.pi_term = self.policy_net.get_probabilities(
                dataset['term_obs'])
        else:
            self.pi_current = dataset['target_prob_obs'][self.non_terminal_idx]
            self.pi_next = dataset['target_prob_next_obs'][
                self.non_terminal_idx]
            self.pi_init = dataset['target_prob_init_obs']
            self.pi_term = dataset['target_prob_term_obs']
        if self.norm is None:
            self.obs = dataset['obs'][self.non_terminal_idx]
            self.next_obs = dataset['next_obs'][self.non_terminal_idx]
            self.init_obs = dataset['init_obs']
            self.term_obs = dataset['term_obs']
        elif self.norm == 'std':
            self.obs_mean = np.mean(dataset['obs'], axis=0, keepdims=True)
            self.obs_std = np.std(dataset['obs'], axis=0, keepdims=True)
            self.obs = (dataset['obs'] - self.obs_mean) / self.obs_std
            self.next_obs = (dataset['next_obs'] -
                             self.obs_mean) / self.obs_std
            self.init_obs = (dataset['init_obs'] -
                             self.obs_mean) / self.obs_std
            self.term_obs = (dataset['term_obs'] -
                             self.obs_mean) / self.obs_std
        else:
            raise NotImplementedError
        # pdb.set_trace()
        #* what if we only whiten over the non-terminal tuples
        non_terminal_idx = (dataset['info'] == False)[:, 0]
        obs_mean = np.mean(dataset['obs'][non_terminal_idx],
                           axis=0,
                           keepdims=True)
        obs_std = np.std(dataset['obs'][non_terminal_idx],
                         axis=0,
                         keepdims=True)
        # #* re-whiten the observations:
        self.obs = (self.obs - obs_mean) / obs_std
        self.next_obs = (self.next_obs - obs_mean) / obs_std
        self.init_obs = (self.init_obs - obs_mean) / obs_std
        self.term_obs = (self.term_obs - obs_mean) / obs_std

        #* if not separate action indexing, we are concatenating (s,a) as input
        if not self.separate_action_indexing:
            if self.action_encoding_scheme == 'continuous':
                encoded_actions = np.linspace(-1, 1, self.act_dim)
                # mean_action = np.mean(encoded_actions[self.data_acts[non_terminal_idx]])
                # std_action = np.std(encoded_actions[self.data_acts[non_terminal_idx]])
                mean_action = np.mean(encoded_actions[self.data_acts])
                std_action = np.std(encoded_actions[self.data_acts])

                self.encoded_actions = (encoded_actions -
                                        mean_action) / std_action

                # self.act = (self.data_acts / (self.act_dim-1)) * 2 -1
                # self.act = (self.act - np.mean(self.act, axis=0, keepdims=True))/np.std(self.act, axis=0, keepdims=True)
                self.act = self.encoded_actions[self.data_acts]

                self.input = np.concatenate((self.obs, self.act), axis=1)
                self.input_dim = self.input.shape[1]
            else:
                raise NotImplementedError
        else:
            self.input = self.obs
            self.input_dim = self.obs.shape[1]

        if scale_length_adjustment == 'median':
            sample_num = 5000
            # idx1 = np.random.choice(self.n_samples, sample_num); idx2 = np.random.choice(self.n_samples, sample_num)
            # idx1 = np.random.choice(np.arange(self.n_samples)[non_terminal_idx], sample_num); idx2 = np.random.choice(np.arange(self.n_samples)[non_terminal_idx], sample_num)
            idx1 = np.random.choice(self.n_samples_non_terminal, sample_num)
            idx2 = np.random.choice(self.n_samples_non_terminal, sample_num)
            # med_dist = np.median(np.square(self.obs[None, idx1, :] - self.obs[idx2, None, :]), axis = (0,1))
            med_dist = np.median(np.square(self.input[None, idx1, :] -
                                           self.input[idx2, None, :]),
                                 axis=(0, 1))
            med_dist[
                med_dist <
                0.01] = 0.01  # enforce a upperbound on the scale-length of the action component
            self.scale_length_vector = 1.0 / med_dist
        else:
            # scale_length_vector = np.ones(self.obs_dim)
            self.scale_length_vector = np.ones(self.input_dim)

        # self.scale_length_vector = np.linspace(1,2,5)
        self.scale_length_vector = np.ones(self.input_dim)
        self.z_dim = random_feature_per_obs_dim * self.input_dim
        self.rff = RBFSampler(n_components=self.z_dim,
                              gamma=default_length_scale)
        self.rff.fit([self.input[0]])
        # #* set the fourier feature
        # transformer_list = []
        # # self.z_dim = random_feature_per_obs_dim * self.obs_dim
        # self.z_dim = random_feature_per_obs_dim * self.input_dim
        # models = [RBFSampler(n_components = random_feature_per_obs_dim, gamma = default_length_scale*dist) for dist in self.scale_length_vector]
        # for model in models:
        #     # model.fit([self.obs[0]])
        #     model.fit([self.input[0]])
        #     transformer_list.append((str(model), model))
        # self.rff = FeatureUnion(transformer_list)

        # models = [RBFSampler(n_components = random_feature_per_obs_dim, gamma = default_length_scale*dist) for dist in self.scale_length_vector]
        # for model in models:
        #     # model.fit([self.obs[0]])
        #     model.fit([self.input[0]])
        #     transformer_list.append((str(model), model))
        # self.rff = [RBFSampler(n_components = random_feature_per_obs_dim, gamma = default_length_scale)]
        # self.rff.fit([self.input[0]])

        #* Some commonly used variables
        # self.I_sa = np.eye(self.act_dim*self.z_dim)
        self.rews = dataset['rews'][self.non_terminal_idx]
        # self.init_idx = np.arange(0, self.n_samples, self.horizon)
        # self.end_idx = np.arange(self.horizon-1, self.n_samples, self.horizon)

        self.rho = dataset['ratio'][
            self.
            non_terminal_idx]  #* make sure that the importance weights are already calculated
        # pdb.set_trace()

    def estimate(self):
        if self.separate_action_indexing:
            value_est = self.estimate_LSTDQ_separate_action_indexing()
        else:
            value_est = self.estimate_LSTDQ_concat_sa_input()
        return value_est

    def estimate_LSTDQ_concat_sa_input(self):
        # transformed_action = np.linspace(-1,1, self.act_dim)
        # n_samples = self.non_terminal_idx.sum()
        a_prime = np.tile(self.encoded_actions,
                          self.n_samples_non_terminal)[:, np.newaxis]
        # a_prime = np.tile(self.encoded_actions, self.n_samples)[:,np.newaxis]
        x_prime = np.concatenate(
            (np.repeat(self.next_obs, self.act_dim, axis=0), a_prime), axis=1)
        # a0_expanded = np.tile(transformed_action,self.n_episode)[:,np.newaxis]
        a0_expanded = np.tile(self.encoded_actions, self.n_episode)[:,
                                                                    np.newaxis]
        x0 = np.concatenate(
            (np.repeat(self.init_obs, self.act_dim, axis=0), a0_expanded),
            axis=1)
        # aterm_expanded = np.tile(transformed_action, self.n_episode)[:,np.newaxis]
        aterm_expanded = np.tile(self.encoded_actions,
                                 self.n_episode)[:, np.newaxis]
        xterm = np.concatenate(
            (np.repeat(self.term_obs, self.act_dim, axis=0), aterm_expanded),
            axis=1)

        Z = self.rff.transform(self.input).astype(self.dtype)
        Z_prime = self.rff.transform(x_prime).astype(self.dtype)
        aprime_probs = self.pi_next.flatten()[:, np.newaxis]
        Z_prime = Z_prime * aprime_probs
        Z_prime = Z_prime.reshape((self.n_samples_non_terminal, self.act_dim,
                                   self.z_dim)).sum(axis=1)

        reg = self.value_reg

        regularized_inverse = np.linalg.inv(
            np.matmul(Z.T, Z - self.gamma * Z_prime) +
            reg * np.eye(self.z_dim))
        featurized_reward = np.matmul(Z.T, self.rews)
        value_coef = np.matmul(regularized_inverse, featurized_reward)

        Z0 = self.rff.transform(x0)
        Q0 = np.matmul(Z0, value_coef)

        Z_term = self.rff.transform(xterm)
        Q_term = np.matmul(Z_term, value_coef)

        V_init = (Q0 * self.pi_init.flatten()[:, np.newaxis]).reshape(
            (self.n_episode, self.act_dim)).sum(axis=1)
        V_term = (Q_term * self.pi_term.flatten()[:, np.newaxis]).reshape(
            (self.n_episode, self.act_dim)).sum(axis=1)
        V_traj = V_init - V_term * self.gamma**self.horizon
        value_est = np.mean(V_traj)
        # pdb.set_trace()
        return value_est

    def estimate_LSTDQ_separate_action_indexing(self):
        #* separate action set indexing
        act_idx = []
        for i in range(self.act_dim):
            act_idx.append(np.where(self.data_acts == i)[0])
        #* apply transformation
        Z = self.rff.transform(self.obs).astype(self.dtype)
        Z_prime = self.rff.transform(self.next_obs).astype(self.dtype)
        Z_init = self.rff.transform(self.init_obs).astype(self.dtype)
        Z_term = self.rff.transform(self.term_obs).astype(self.dtype)
        # import pdb; pdb.set_trace()
        assert self.z_dim == Z.shape[1]
        Phi = np.zeros((Z.shape[0], Z.shape[1] * self.act_dim),
                       dtype=self.dtype)
        Phi_pi = np.zeros((Z.shape[0], Z.shape[1] * self.act_dim),
                          dtype=self.dtype)
        Phi_prime_pi = np.zeros(
            (Z_prime.shape[0], Z_prime.shape[1] * self.act_dim),
            dtype=self.dtype)
        Phi_init_pi = np.zeros(
            (Z_init.shape[0], Z_init.shape[1] * self.act_dim),
            dtype=self.dtype)
        Phi_term_pi = np.zeros(
            (Z_term.shape[0], Z_term.shape[1] * self.act_dim),
            dtype=self.dtype)
        for i in range(self.act_dim):
            Phi[act_idx[i],
                i * self.z_dim:(i + 1) * self.z_dim] = Z[act_idx[i]]
            Phi_pi[:, i * self.z_dim:(i + 1) *
                   self.z_dim] = self.pi_current[:, i][:, None] * Z
            Phi_prime_pi[:, i * self.z_dim:(i + 1) *
                         self.z_dim] = self.pi_next[:, i][:, None] * Z_prime
            Phi_init_pi[:, i * self.z_dim:(i + 1) *
                        self.z_dim] = self.pi_init[:, i][:, None] * Z_init
            Phi_term_pi[:, i * self.z_dim:(i + 1) *
                        self.z_dim] = self.pi_term[:, i][:, None] * Z_term

        I_sa = np.eye(self.act_dim * self.z_dim, dtype=self.dtype)

        regularized_inverse = np.linalg.inv(
            np.matmul(Phi.T, Phi - self.gamma * Phi_prime_pi) +
            self.value_reg * I_sa)
        featurized_reward = np.matmul(Phi.T, self.rews)
        reward_coef = np.matmul(regularized_inverse, featurized_reward)
        V_init = Phi_init_pi @ reward_coef
        V_term = Phi_term_pi @ reward_coef
        V_traj = V_init - V_term * self.gamma**self.horizon
        value_est = np.mean(V_traj)
        # import pdb; pdb.set_trace()
        return value_est
Example #25
0
import sys
import os
import numpy as np
import itertools
from sklearn.svm import LinearSVC
from sklearn.kernel_approximation import RBFSampler
from sklearn.kernel_approximation import AdditiveChi2Sampler

pycharm_mode = True
N_FEATURES = 400  # Dimension of the original data.
BATCH_SIZE = 30000

chi = AdditiveChi2Sampler()
chi.fit(np.zeros(N_FEATURES).ravel())
rbf = RBFSampler(gamma=1, random_state=1337, n_components=5500)
rbf.fit(np.zeros(1200).ravel())

def transform(x_original):
    return rbf.transform(chi.transform(x_original)).ravel()

def lines(source):
    for line in source:
        line = line.strip()
        (label, x_string) = line.split(" ", 1)
        label = int(label)
        x_original = np.fromstring(x_string, sep=' ')
        yield label, transform(x_original)

def main():
    if pycharm_mode:
        import argparse
class SarsaLambdaAgent:
    def __init__(self, environment=gym.make('MountainCar-v0')):
        self.env = environment
        self.state = self.env.reset()
        self.state_low_bound = self.env.observation_space.low
        self.state_high_bound = self.env.observation_space.high
        self.n_action = env.action_space.n

        self.action_space = gym.spaces.Discrete(self.n_action)

        self.d = 100
        self.w = np.random.rand(self.d)

        self.feature = RBFSampler(gamma=1, random_state=1)
        X = []
        for _ in range(100000):
            s = env.observation_space.sample()
            sa = np.append(s, np.random.randint(self.n_action))
            X.append(sa)
        self.feature.fit(X)

    def feature_x(self, s, a):
        # print('state = ', s, ' & action = ', a)
        feature_sa = self.feature.transform([[s[0], s[1], a]])
        # print(feature_sa)
        return feature_sa

    def is_state_valid(self, s):
        valid = True
        for i in range(s.shape[0]):
            if (s[i] < self.state_low_bound[i]) and (s[i] > self.state_high_bound[i]):
                valid = False
        return valid

    def Q_hat(self, s, a):
        if self.is_state_valid(s):
            return np.dot(self.feature_x(s, a), np.transpose(self.w))

    def reset(self):
        self.state = self.env.reset()

    def A_max(self, state, epsilon):
        if np.random.rand() < epsilon:
            # Exploration
            return np.random.randint(self.n_action)
        else:
            # Exploitation
            max_a = []
            maxQ = -np.inf
            for a in range(0, self.n_action):
                if self.Q_hat(state, a) > maxQ:
                    max_a = [a]
                    maxQ = self.Q_hat(state, a)
                elif self.Q_hat(state, a) == maxQ:
                    max_a.append(a)
            if max_a != []:
                return max_a[np.random.randint(0, len(max_a))]
            else:
                return np.random.randint(self.n_action)

    def train(self, n_episode=5000, learning_rate=0.01, gamma=0.99, epsilon=0.01, lamda=0.9):
        num_steps_of_episode = []
        for i_episode in range(n_episode):
            self.reset()
            n_trajectory = 0
            a = self.A_max(state=self.state, epsilon=epsilon)
            z = np.zeros(self.d)
            Q_old = 0

            while True:
                s = np.copy(self.state)
                while True:
                    try:
                        s_, r_, done, _ = self.env.step(a)
                        a_ = self.A_max(state=s_, epsilon=epsilon)
                        # env.render()
                        break
                    except (RuntimeError, TypeError, NameError):
                        print("Action {} at state {} is invalid!".format(a, self.state))

                Q = self.Q_hat(s, a)
                Q_ = self.Q_hat(s_, a)
                delta = r_ + gamma*Q_ - Q
                z = gamma * lamda * z + (1 - learning_rate * gamma * lamda * np.dot(self.feature_x(s, a), np.transpose(z))) * self.feature_x(s, a)

                self.w = self.w + learning_rate * (delta + Q - Q_old) * z - learning_rate * (Q - Q_old) * self.feature_x(s, a)
                Q_old = Q_
                self.state = s_
                a = a_
                n_trajectory += 1

                if done:
                    num_steps_of_episode.append(n_trajectory)
                    if n_trajectory % DISPLAY_STEP == 0:
                        print("Episode = {}, took {} to go to the goal.".format(i_episode, n_trajectory))
                    break

        return num_steps_of_episode

    def get_w(self):
        return self.w
Example #27
0
class CCNNLayer:

    def __init__(self, name: str, input_size: int, filter_size: int,
                 gamma: float, m: int, R: float, r: int, lr: float):

        self.name = name
        self.input_size = input_size    
        self.filter_size = filter_size
        self.patch_size = filter_size ** 2
        self.output_size = self.input_size - self.filter_size + 1
        self.n_patchs = self.output_size ** 2        
        self.m = m
        self.R = R
        self.lr = lr
        
        self.rbf_feature = RBFSampler(gamma=gamma, n_components=m, random_state=1)
        self.svd = TruncatedSVD(n_components=r)


    def initPars(self, n_classes: int, batch_size: int):

        self.n_classes = n_classes        
        self.batch_size = batch_size
        self.lr /= batch_size
        
        self.A = np.random.normal(0, 0.1, size=(n_classes, self.n_patchs, self.m))
        
        
    def getZMatrix(self, X):
        """
        Input: (n_instances, n_channels, input_size, input_size)
        
        Output: (n_instances, n_patchs, m)
        """
        
        Z = view_as_windows(X, (1, X.shape[1], self.filter_size, self.filter_size))
        Z = Z.reshape(np.prod(Z.shape[:4]), np.prod(Z.shape[4:]))
        Q = self.rbf_feature.transform(Z).astype(np.float16)
        
        return Q.reshape(X.shape[0], self.n_patchs, -1)


    def predict(self, X, transform: bool=False):
        """
        Input: (batch_size, n_channels, input_size, input_size)
        
        Transformed input: (batch_size, n_patchs, m)
        
        Output: (batch_size, n_classes)
        """

        Z = self.getZMatrix(X) if transform else X
        p = np.exp(np.tensordot(Z, self.A, axes=[(1, 2), (1, 2)]))

        return (p.T / np.sum(p, axis=1)).T


    def fit(self, X, ylabel, n_epoch: int):

        assert X.shape[2] == X.shape[3] == self.input_size
        
        n = X.shape[0]
        self.rbf_feature.fit(np.zeros((1, X.shape[1] * self.filter_size ** 2)))
        
        print("Preparing patches...")
        
        Z_batches = [self.getZMatrix(X[i: i + self.batch_size]) 
                     for i in range(0, n, self.batch_size)]
        y_batches = ylabel.reshape(-1, self.batch_size)
        
        print("Starting PSGD...")
        
        loss = np.inf
        rhat = self.m

        for epoch in range(n_epoch):
            print("{0}: Epoch {1}: loss = {2}, r_hat = {3}".format(self.name, epoch + 1, loss / n, rhat))
            loss = 0
            for i, (Z_batch, y_batch) in enumerate(zip(Z_batches, y_batches)):
                p_batch = self.predict(Z_batch)
                loss += np.sum(-np.log(p_batch[np.arange(self.batch_size), y_batch]))
                dL_batch = -p_batch
                dL_batch[np.arange(self.batch_size), y_batch] += 1

                self.A += self.lr * np.tensordot(dL_batch, Z_batch, axes=[0, 0])
  
            A_unfold = self.A.reshape(-1, self.A.shape[2]).T
            U = self.svd.fit_transform(A_unfold)
            self.U = U.copy()
            d = np.linalg.norm(U, axis=0)
            U *= 1 / d            
            d_cum = np.cumsum(d) 
            rhat = np.searchsorted(d_cum - self.R > np.append(d[1:] * np.arange(1, d.size), 0), True) + 1

            if rhat >= d.size:
                print("Warning: Hard-thresholding applied")
                                
            if rhat <= d.size:                
                scale = np.maximum(0, d - (d_cum[rhat - 1] - self.R) / rhat)
                U = U[:, :rhat]
                d = d[:rhat]                  
                self.U = U * scale[:rhat] 

            self.A = ((self.U * (1 / d)) @ (U.T @ A_unfold)).T.reshape(*self.A.shape)
        
        Z_batches = None
        y_batches = None
        
        
            
    def transform(self, X):
        """
        Input: (batch_size, n_channels, input_size, input_size)
        
        Output: (batch_size, n_output_channels, output_size, output_size)
        """
        
        Z = np.rollaxis(np.tensordot(self.U, self.getZMatrix(X), axes=[0, 2]), 0, 2)

        return Z.reshape(Z.shape[0], Z.shape[1], self.output_size, self.output_size)
Example #28
0
state_samples = np.array(
    [env.observation_space.sample() for x in range(10000)])
# Num	Observation 	Min 	Max
# 0 	position	-1.2	0.6
# 1 	velocity	-0.07	0.07
position_max = np.amax(observation_examples[:, 0])
position_min = np.amin(observation_examples[:, 0])
velocity_max = np.amax(observation_examples[:, 1])
velocity_min = np.amin(observation_examples[:, 1])

scaler = StandardScaler()
scaler.fit(state_samples)
scaler_samples = scaler.transform(state_samples)

featurizer_state = RBFSampler(gamma=0.5, n_components=100)
featurizer_state.fit(scaler_samples)
print(featurizer_state)

state = env.reset()
print(observation_examples[20])
featurized = featurizer_state.transform([observation_examples[10]])

# In[75]:


class ValueFunction(object):
    """
    Value Funciton approximator.
    """
    def __init__(self):
        # sampleing envrionment state in order to featurize it.
     XtrainT = kpls.transform(ktrain)
     XtestT = kpls.transform(ktest)
     
     if n==573:
         kplsScoresNys[:,0] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest)
     elif n==1073:
         kplsScoresNys[:,1] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest)
     elif n==1573:
         kplsScoresNys[:,2] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest)
 
 # RBF sampler method
 elapTimeRBFS = np.zeros(np.shape(nComponents))
 kplsScoresRBFS = np.zeros((2,3))
 for i,n in enumerate(nComponents):
     rbfs = RBFSampler(n_components=n,gamma=gamma)
     rbfs.fit(Xtrain)
     ktrain = rbfs.transform(Xtrain)
     ktest = rbfs.transform(Xtest)
     startTime = timeit.default_timer()
     kpls.fit(ktrain,Ytrain)
     elapTimeRBFS[i] = timeit.default_timer() - startTime
     XtrainT = kpls.transform(ktrain)
     XtestT = kpls.transform(ktest)
     
     if n==573:
         kplsScoresRBFS[:,0] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest)
     elif n==1073:
         kplsScoresRBFS[:,1] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest)
     elif n==1573:
         kplsScoresRBFS[:,2] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest)
         
                                       ngram_range=(2, 4))

# Fit the rbf_sampler with the similarity matrix.
column_transformer = make_column_transformer(
    (similarity_encoder, ['NONPROPRIETARYNAME']),
    (OneHotEncoder(handle_unknown='ignore'), ['DOSAGEFORMNAME', 'ROUTENAME']),
    sparse_threshold=1)

transformed_categories = column_transformer.fit_transform(X_encoder)

# gamma is a parameter of the rbf function, that sets how fast the similarity
# between two points should decrease as the distance between them rises. It
# is data-specific, and needs to be chosen carefully, for example using
# cross-validation.
rbf_sampler = RBFSampler(gamma=0.5, n_components=n_out_rbf, random_state=42)
rbf_sampler.fit(transformed_categories)


def encode(X, y_int, one_hot_encoder, column_transformer, rbf_sampler):
    X_sim_encoded = column_transformer.transform(X)

    X_highdim = rbf_sampler.transform(X_sim_encoded.toarray())

    y_onehot = one_hot_encoder.transform(y_int.reshape(-1, 1))

    return X_highdim, y_onehot


# The inputs and labels of the val and test sets have to be pre-processed the
# same way the training set was processed:
X_test_kernel_approx, y_true_test_onehot = encode(X_test, y_test,
Example #31
0
class DecomposableKernel(object):
    r"""
    Decomposable Operator-Valued Kernel of the form:

    .. math::
        X, Y \mapsto K(X, Y) = k_s(X, Y) A

    where A is a symmetric positive semidefinite operator acting on the
    outputs.

    Attributes
    ----------
    A : {array, LinearOperator}, shape = [n_targets, n_targets]
        Linear operator acting on the outputs

    scalar_kernel : {callable}
        Callable which associate to the training points X the Gram matrix.

    scalar_kernel_params : {mapping of string to any}
        Additional parameters (keyword arguments) for kernel function passed as
        callable object.

    References
    ----------

    See also
    --------

    DecomposableKernelMap
        Decomposable Kernel map

    Examples
    --------
    >>> import operalib as ovk
    >>> import numpy as np
    >>> X = np.random.randn(100, 10)
    >>> K = ovk.DecomposableKernel(np.eye(2))
    >>> # The kernel matrix as a linear operator
    >>> K(X, X)  # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
    <200x200 _CustomLinearOperator with dtype=float64>
    """

    def __init__(self, A, scalar_kernel=rbf_kernel, scalar_kernel_params=None):
        """Initialize the Decomposable Operator-Valued Kernel.

        Parameters
        ----------

        A : {array, LinearOperator}, shape = [n_targets, n_targets]
            Linear operator acting on the outputs

        scalar_kernel : {callable}
            Callable which associate to the training points X the Gram matrix.

        scalar_kernel_params : {mapping of string to any}, optional
            Additional parameters (keyword arguments) for kernel function
            passed as callable object.
        """
        self.A = A
        self.scalar_kernel = scalar_kernel
        self.scalar_kernel_params = scalar_kernel_params
        self.p = A.shape[0]

    def get_kernel_map(self, X):
        r"""Return the kernel map associated with the data X.

        .. math::
               K_x: Y \mapsto K(X, Y)

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Samples.

        Returns
        -------
        K_x : DecomposableKernelMap, callable

        .. math::
            K_x: Y \mapsto K(X, Y).
        """
        from .kernel_maps import DecomposableKernelMap
        return DecomposableKernelMap(X, self.A,
                                     self.scalar_kernel,
                                     self.scalar_kernel_params)

    def get_orff_map(self, X, D=100, eps=1e-5, random_state=0):
        r"""Return the Random Fourier Feature map associated with the data X.

        .. math::
               K_x: Y \mapsto \tilde{\Phi}(X)

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Samples.

        Returns
        -------
        \tilde{\Phi}(X) : Linear Operator, callable
        """
        u, s, v = svd(self.A, full_matrices=False, compute_uv=True)
        self.B_ = dot(diag(sqrt(s[s > eps])), v[s > eps, :])
        self.r = self.B_.shape[0]

        if (self.scalar_kernel is rbf_kernel) and not hasattr(self, 'Xb_'):
            if self.scalar_kernel_params is None:
                gamma = 1.
            else:
                gamma = self.scalar_kernel_params['gamma']
            self.phi_ = RBFSampler(gamma=gamma,
                                   n_components=D, random_state=random_state)
            self.phi_.fit(X)
            self.Xb_ = self.phi_.transform(X).astype(X.dtype)
        elif (self.scalar_kernel is 'skewed_chi2') and not hasattr(self,
                                                                   'Xb_'):
            if self.scalar_kernel_params is None:
                skew = 1.
            else:
                skew = self.scalar_kernel_params['skew']
            self.phi_ = SkewedChi2Sampler(skewedness=skew,
                                          n_components=D,
                                          random_state=random_state)
            self.phi_.fit(X)
            self.Xb_ = self.phi_.transform(X).astype(X.dtype)
        elif not hasattr(self, 'Xb_'):
            raise NotImplementedError('ORFF map for kernel is not '
                                      'implemented yet')

        D = self.phi_.n_components
        if X is self.Xb_:
            cshape = (D, self.r)
            rshape = (self.Xb_.shape[0], self.p)
            oshape = (self.Xb_.shape[0] * self.p, D * self.r)
            return LinearOperator(oshape,
                                  dtype=self.Xb_.dtype,
                                  matvec=lambda b: dot(dot(self.Xb_,
                                                           b.reshape(cshape)),
                                                       self.B_),
                                  rmatvec=lambda r: dot(Xb.T,
                                                        dot(r.reshape(rshape),
                                                            self.B_.T)))
        else:
            Xb = self.phi_.transform(X)
            cshape = (D, self.r)
            rshape = (X.shape[0], self.p)
            oshape = (Xb.shape[0] * self.p, D * self.r)
            return LinearOperator(oshape,
                                  dtype=self.Xb_.dtype,
                                  matvec=lambda b: dot(dot(Xb,
                                                           b.reshape(cshape)),
                                                       self.B_),
                                  rmatvec=lambda r: dot(Xb.T,
                                                        dot(r.reshape(rshape),
                                                            self.B_.T)))

    def __call__(self, X, Y=None):
        r"""Return the kernel map associated with the data X.

        .. math::
               K_x: \begin{cases}
               Y \mapsto K(X, Y) \enskip\text{if } Y \text{is None,} \\
               K(X, Y) \enskip\text{otherwise.}
               \end{cases}

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples1, n_features]
            Samples.

        Y : {array-like, sparse matrix}, shape = [n_samples2, n_features],
                                          default = None
            Samples.

        Returns
        -------
        K_x : DecomposableKernelMap, callable or LinearOperator

            .. math::
               K_x: \begin{cases}
               Y \mapsto K(X, Y) \enskip\text{if } Y \text{is None,} \\
               K(X, Y) \enskip\text{otherwise}
               \end{cases}
        """
        Kmap = self.get_kernel_map(X)
        if Y is None:
            return Kmap
        else:
            return Kmap(Y)
Example #32
0
class ApproximateTDAgent:
    def __init__(self, env, num_episodes=10000):
        """
        Constructor for Temporal Difference Agent using function approximation.
        The function approximator used is a linear regression with RBF Kernel: y = np.dot(W, ph(x))

        :param env: OpenAI Gym environment to interface with
        :param num_episodes: number of episodes to play to bootstrap phi(x) and W
        """

        # Interface with the environment
        self.env = env

        # Initialize featurizer function phi(x)
        self.featurizer = RBFSampler()
        samples = []
        done = False
        for n in range(num_episodes):
            print("Running initial exploration episode: {}".format(n))
            s = self.env.reset()
            while not done:
                # Play the game randomly
                a = self.env.action_space.sample()
                x = self._vectorize(s, a)
                samples.append(x)
                s, _, done, _ = self.env.step(a)

        self.featurizer.fit(samples)
        self.W = np.zeros(self.featurizer.n_components)

    def _vectorize(self, s, a):
        """ 
        Helper function to vectorize state s and action a.
        
        :param s: state
        :type s: tuple
        :param a: action
        :type a: int
        """
        s = np.array(s)
        # One-hot encoding of actions
        a_vector = np.zeros(self.env.action_space.n)
        a_vector[a] = 1
        return np.concatenate((s, a_vector))

    def iterate_policy(self,
                       alpha=0.1,
                       gamma=0.9,
                       epsilon=0.3,
                       num_episodes=1000):
        """ Implementation of Q learning on the environment """

        deltas = []
        for n in range(num_episodes):
            print("Iterating episode {}".format(n))
            s = self.env.reset()
            done = False
            max_diff = float("-inf")
            while not done:
                a = self._select_action(s, epsilon)
                s_prime, r, done, _ = self.env.step(a)

                if done:
                    y = r
                else:
                    y = r + gamma * np.max(self.predict(s_prime))

                phi_x = self.featurizer.transform([self._vectorize(s, a)])[0]
                diff = y - np.dot(self.W, phi_x)
                self.W = self.W + alpha * diff * phi_x
                max_diff = max(max_diff, diff)
                s = s_prime

            deltas.append(max_diff)

        return deltas

    def _select_action(self, state, epsilon):
        """ 
        Helper function to choose between the explore-exploit dilemma 
        This is actually the pi(a|s) function
        """
        p = np.random.random()
        if p <= epsilon:
            selected_action = self.env.action_space.sample()
        else:
            Q_values = self.predict(state)
            selected_action = np.argmax(Q_values)

        return selected_action

    def predict(self, state):
        """
        Predict the Q values for all actions of input state

        :param state: state for which Q is predicted
        """
        # Calculate estimate of Q from dot(W, phi(x))
        # This is a linear regression model
        Q_values = []
        for action in range(self.env.action_space.n):
            x = self._vectorize(state, action)
            x = self.featurizer.transform([x])[0]
            Q_values.append(np.dot(self.W, x))

        return Q_values

    def play(self):
        """
        Play the agent according to current policy
        """
        done = False
        s = self.env.reset()
        total_rewards = 0
        while not done:
            # Always play according to policy
            a = self._select_action(s, epsilon=0.0)
            s, r, done, info = self.env.step(a)
            self.env.render()
            total_rewards += r

        return total_rewards
Example #33
0
class RBFDivFreeKernel(object):
    r"""
    Divergence-free Operator-Valued Kernel of the form:

    .. math::
        X \mapsto K_X(Y) = exp(-\gamma||X-Y||^2)A_{X,Y},

    where,

    .. math::
        A_{X,Y} = 2\gamma(X-Y)(X-T)^T+((d-1)-2\gamma||X-Y||^2 I).

    Attributes
    ----------
    gamma : {float}
        RBF kernel parameter.

    References
    ----------

    See also
    --------

    RBFDivFreeKernelMap
        Divergence-free Kernel map

    Examples
    --------
    >>> import operalib as ovk
    >>> import numpy as np
    >>> X = np.random.randn(100, 2)
    >>> K = ovk.RBFDivFreeKernel(1.)
    >>> # The kernel matrix as a linear operator
    >>> K(X, X)  # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
    <200x200 _CustomLinearOperator with dtype=float64>
    """

    def __init__(self, gamma):
        """Initialize the Decomposable Operator-Valued Kernel.

        Parameters
        ----------
        gamma : {float}, shape = [n_targets, n_targets]
            RBF kernel parameter.
        """
        self.gamma = gamma

    def get_kernel_map(self, X):
        r"""Return the kernel map associated with the data X.

        .. math::
               K_x: Y \mapsto K(X, Y)

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Samples.

        Returns
        -------
        K_x : DecomposableKernelMap, callable

        .. math::
            K_x: Y \mapsto K(X, Y).
        """
        from .kernel_maps import RBFDivFreeKernelMap
        return RBFDivFreeKernelMap(X, self.gamma)

    def get_orff_map(self, X, D=100, random_state=0):
        r"""Return the Random Fourier Feature map associated with the data X.

        .. math::
               K_x: Y \mapsto \tilde{\Phi}(X)

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Samples.

        Returns
        -------
        \tilde{\Phi}(X) : Linear Operator, callable
        """
        self.r = 1
        if not hasattr(self, 'Xb_'):
            self.phi_ = RBFSampler(gamma=self.gamma,
                                   n_components=D, random_state=random_state)
            self.phi_.fit(X)
            self.Xb_ = self.phi_.transform(X)
            self.Xb_ = (self.Xb_.reshape((self.Xb_.shape[0],
                                          1, self.Xb_.shape[1])) *
                        self.phi_.random_weights_.reshape((1, -1,
                                                           self.Xb_.shape[1])))
            self.Xb_ = self.Xb_.reshape((-1, self.Xb_.shape[2]))

        D = self.phi_.n_components
        if X is self.Xb_:
            return LinearOperator(self.Xb_.shape,
                                  matvec=lambda b: dot(self.Xb_ * b),
                                  rmatvec=lambda r: dot(self.Xb_.T * r))
        else:
            Xb = self.phi_.transform(X)
            # TODO:
            # w = self.phi_.random_weights_.reshape((1, -1, Xb.shape[1]))
            # wn = np.linalg.norm(w)
            # Xb = (Xb.reshape((Xb.shape[0], 1, Xb.shape[1])) *
            #       wn * np.eye()w np.dot(w.T, w) / wn)
            Xb = Xb.reshape((-1, Xb.shape[2]))
            return LinearOperator(Xb.shape,
                                  matvec=lambda b: dot(Xb, b),
                                  rmatvec=lambda r: dot(Xb.T, r))

    def __call__(self, X, Y=None):
        r"""Return the kernel map associated with the data X.

        .. math::
               K_x: \begin{cases}
               Y \mapsto K(X, Y) \enskip\text{if } Y \text{is None,} \\
               K(X, Y) \enskip\text{otherwise.}
               \end{cases}

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples1, n_features]
            Samples.

        Y : {array-like, sparse matrix}, shape = [n_samples2, n_features],
                                          default = None
            Samples.

        Returns
        -------
        K_x : DecomposableKernelMap, callable or LinearOperator

        .. math::
            K_x: \begin{cases}
            Y \mapsto K(X, Y) \enskip\text{if } Y \text{is None,} \\
            K(X, Y) \enskip\text{otherwise}
            \end{cases}
        """
        Kmap = self.get_kernel_map(X)
        if Y is None:
            return Kmap
        else:
            return Kmap(Y)
Example #34
0
class ApproximateTDAgent(TemporalDifferenceAgent):
    """ An agent that implements the function approximation Q-learning algorithm """
    def __init__(self,
                 env,
                 start_state=(0, 0),
                 initial_policy=None,
                 action_space=None):

        if initial_policy:
            self.policy = initial_policy
        else:
            # Define a random policy if policy is not given
            self.policy = {
                (0, 0): "down",
                (0, 1): "left",
                (0, 2): "right",
                (0, 3): "left",
                (1, 0): "down",
                (1, 2): "up",
                (2, 0): "right",
                (2, 1): "right",
                (2, 2): "right",
            }

        # Initialize action_space
        if action_space:
            self.action_space = action_space
        else:
            self.action_space = ["up", "down", "left", "right"]

        # Initialize state of agent
        self.start_state = start_state

        # Initialize featurizer function phi(x)
        self.featurizer = RBFSampler()

        # Placeholder of weights for linear regression model
        # Use explore() method to populate W with the right dimensions of a fitted featurizer
        # This helps to build a linear regression model of dot(W, phi(x))
        self.W = None

        # Initialize V[s]
        self.V = {}
        self.num_rows = 0
        self.num_columns = 0
        for s in env.get_states():
            self.num_rows = max(self.num_rows, s[0] + 1)
            self.num_columns = max(self.num_columns, s[1] + 1)
            self.V[s] = 0

    def explore(self, env, num_episodes=10000):
        """ 
        Function for agent to randomly explore the gridworld and collect samples for estimating Q(s,a).
        The more num_episodes, the more data is collected for better estimates.
        """
        samples = []
        for n in range(num_episodes):
            print("Running initial exploration episode: {}".format(n))
            s = self.start_state
            while not env.is_terminal(s):
                # Play the game randomly
                a = np.random.choice(self.action_space)
                x = self._vectorize(s, a)
                samples.append(x)
                _, s_prime = env.move(s, a)
                s = s_prime

        # Fit the RBF featurizer and initialize weights
        self.featurizer.fit(samples)
        self.W = np.zeros(self.featurizer.n_components)

    def _vectorize(self, s, a):
        """ Helper function to vectorize state s and action a """
        s = np.array(s)
        # One-hot encoding of actions
        a_idx = self.action_space.index(a)
        a = np.zeros(len(self.action_space))
        a[a_idx] = 1
        return np.concatenate((s, a))

    def iterate_policy(self,
                       env,
                       alpha=0.1,
                       gamma=0.9,
                       epsilon=0.3,
                       num_episodes=1000):

        deltas = []
        for n in range(num_episodes):
            print("Running policy iteration episode: {}".format(n))
            max_diff = float("-inf")
            s = self.start_state
            while not env.is_terminal(s):
                a = self._select_action(s, epsilon)
                r, s_prime = env.move(s, a)
                if env.is_terminal(s_prime):
                    y = r
                else:
                    y = r + gamma * np.max(self.predict(s_prime))
                phi_x = self.featurizer.transform([self._vectorize(s, a)])[0]
                diff = y - np.dot(self.W, phi_x)
                self.W = self.W + alpha * diff * phi_x
                max_diff = max(max_diff, diff)
                s = s_prime

            deltas.append(max_diff)

        # Update optimal policy and value function
        for s in env.get_states():
            if not env.is_terminal(s):
                Q_values = self.predict(s)
                self.policy[s] = self.action_space[np.argmax(Q_values)]
                self.V[s] = np.max(Q_values)

        return deltas

    def _select_action(self, state, epsilon):
        """ 
        Helper function to choose between the explore-exploit dilemma 
        This is actually the pi(a|s) function
        """
        p = np.random.random()
        if p <= epsilon:
            selected_action = np.random.choice(self.action_space)
        else:
            Q_values = self.predict(state)
            selected_action = self.action_space[np.argmax(Q_values)]

        return selected_action

    def predict(self, state):
        """
        Predict the Q values for all actions of input state

        :param state: state for which Q is predicted
        """
        # Calculate estimate of Q from dot(W, phi(x))
        # This is a linear regression model
        Q_values = []
        for action in self.action_space:
            x = self._vectorize(state, action)
            x = self.featurizer.transform([x])[0]
            Q_values.append(np.dot(self.W, x))

        return Q_values
Example #35
0
import numpy as np
from sklearn.linear_model import SGDClassifier
from sklearn import cross_validation
from sklearn import svm
from sklearn.kernel_approximation import RBFSampler
from sklearn.kernel_approximation import AdditiveChi2Sampler
from sklearn.grid_search import GridSearchCV


DIMENSION = 400  # Dimension of the original data.
CLASSES = (-1, +1)   # The classes that we are trying to predict.

chi_feature = AdditiveChi2Sampler(sample_steps=1)
chi_feature.fit(np.zeros([1,400]))
rbf = RBFSampler(n_components = 15*DIMENSION, random_state = 1)
rbf.fit(np.zeros([1,400]))

def transform(x_original):
    out = np.concatenate(([1], rbf.transform(chi_feature.transform(x_original)[0])[0]))
    return out

if __name__ == "__main__":
    X = []
    Y = []
    # initialize stochastic gradiant descent    
    cls = SGDClassifier(alpha = 0.0001, fit_intercept=False, n_iter = 15, penalty = "l2", warm_start = "True")
    for line in sys.stdin:
        line = line.strip()
        (label, x_string) = line.split(" ", 1)
        label = int(label)
        x_original = np.fromstring(x_string, sep=' ')