def test_rbf_sampler():
    # test that RBFSampler approximates kernel on random data
    # compute exact kernel
    gamma = 10.
    kernel = rbf_kernel(X, Y, gamma=gamma)

    # approximate kernel mapping
    rbf_transform = RBFSampler(gamma=gamma, n_components=1000, random_state=42)
    X_trans = rbf_transform.fit_transform(X)
    Y_trans = rbf_transform.transform(Y)
    kernel_approx = np.dot(X_trans, Y_trans.T)

    error = kernel - kernel_approx
    assert np.abs(np.mean(error)) <= 0.01  # close to unbiased
    np.abs(error, out=error)
    assert np.max(error) <= 0.1  # nothing too far off
    assert np.mean(error) <= 0.05  # mean is fairly close
Example #2
0
def computeAccuracyScore_HDIn( idx, idy,indiv_train, tidx, tidy,indiv_test,  hideNeurons,classiType):
    #distrib_group = np.sort(np.unique(idg))
    #global nbcallin
    indivuniq = np.unique(indiv_train)

    resChunk = np.zeros(max(indivuniq) + 1)

    if classiType == 'MLP':
        clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=hideNeurons, random_state=1,
                            learning_rate='adaptive', activation='logistic')
    else:
        if classiType == 'ADA':
            clf = AdaBoostClassifier()
        if classiType == 'SDG':
            clf = linear_model.SGDClassifier(class_weight='balanced')
        if classiType == 'SVC':
            clf = svm.SVC(gamma=.4,class_weight='balanced')#,tol=hideNeurons)
        if classiType == 'linSVC':
            clf = svm.LinearSVC(class_weight='balanced')
        if classiType == 'KNEIG':
            clf = KNeighborsClassifier(n_neighbors=5,algorithm='auto')
        if classiType == 'SVMfourier':
            feature_map_fourier = RBFSampler(gamma=.2,n_components=hideNeurons)#, random_state=1

            clf = pipeline.Pipeline([("feature_map", feature_map_fourier),
                                     ("svm", svm.LinearSVC(class_weight='balanced'))])
        if classiType == 'SVMnystro':
            feature_map_nystroem = Nystroem(gamma=.2,n_components=hideNeurons)#, random_state=1
            clf = pipeline.Pipeline([("feature_map", feature_map_nystroem),
                                     ("svm", svm.LinearSVC(class_weight='balanced'))])


    for i in indivuniq:
        normtrain = normalize(idx.iloc[indiv_train==i, :], axis=1, copy=False)
        normtest = normalize(tidx.iloc[indiv_test==i,:], axis=1, copy=False)


        clf.fit(normtrain, idy[indiv_train==i])
        data_y_predict = clf.predict(normtest)

        resChunk[i] =  accuracy_score(tidy[indiv_test==i], data_y_predict, normalize=True)
        #print("hdin",nbcallin," i ",i)
        #resChunk[i] =i+(100* nbcallin)
    #print(resChunk)
    return resChunk
Example #3
0
    def __init__(self, config):
        BaseAgent.__init__(self, config)
        self.config = config
        self.task = config.task_fn()
        self.network, self.optimizer, self.replay_buffer, self.density_model = dict(), dict(), dict(), dict()
        self.replay_buffer_actions = dict()
        self.replay_buffer_infos = dict()
        self.traces = []
        for mode in ['explore-exploit', 'rollin']:
            self.network[mode] = config.network_fn()
            self.replay_buffer[mode] = []
            self.replay_buffer_actions[mode] = []
            self.replay_buffer_infos[mode] = []
            
        self.optimizer['explore-exploit'] = config.optimizer_fn(self.network['explore-exploit'].parameters())

        self.network['exploit'] = self.network['explore-exploit']
        self.total_steps = 0
        self.states = self.task.reset()
        self.states = config.state_normalizer(self.states)
        self.policy_mixture = [copy.deepcopy(self.network['explore-exploit'].state_dict())]
        self.policy_mixture_optimizers = [copy.deepcopy(self.optimizer['explore-exploit'].state_dict())]
        self.policy_mixture_weights = torch.tensor([1.0])
        self.policy_mixture_returns = []
        self.timestamp = None

        if self.config.bonus == 'rnd':
            self.rnd_network = FCBody(self.config.state_dim).to(Config.DEVICE)
            self.rnd_pred_network = FCBody(self.config.state_dim).to(Config.DEVICE)
            self.rnd_optimizer = torch.optim.RMSprop(self.rnd_pred_network.parameters(), 0.001)
        elif self.config.bonus == 'randnet-kernel-s':
            self.kernel = FCBody(self.config.state_dim, hidden_units=(self.config.phi_dim, self.config.phi_dim)).to(Config.DEVICE)
        elif self.config.bonus == 'randnet-kernel-sa':
            self.kernel = FCBody(self.config.state_dim + self.config.action_dim, hidden_units=(self.config.phi_dim, self.config.phi_dim)).to(Config.DEVICE)
        elif self.config.bonus == 'rbf-kernel':
            self.rbf_feature = RBFSampler(gamma=1, random_state=1, n_components=self.config.phi_dim)
            if isinstance(self.task.action_space, Box):
                self.rbf_feature.fit(X = np.random.randn(5, self.config.state_dim + self.config.action_dim))
            else:
                self.rbf_feature.fit(X = np.random.randn(5, self.config.state_dim + 1))

        if isinstance(self.task.action_space, Box):
            self.uniform_prob = self.continous_uniform_prob()
        else:
            self.uniform_prob = 1./self.config.action_dim
Example #4
0
    def do(self, n_pts):
        """
        Extract the model using a linear classifier
        over an approximate feature map of an RBF-kernel.

        with n pairs of points on the decision boundary
        of the ATTACKED MODEL.
        :param n_pts:
        :return:
        """
        # Collect n pairs of points on the decision boundary of the oracle
        # WTF ?! We expected the contrary.
        X, y = self.collect_pts(n_pts)

        print 'done collecting points'

        rbf_map = RBFSampler(n_components=n_pts, random_state=1)
        solver = HyperSolver(p=self.POS, n=self.NEG)
        rbf_solver = pipeline.Pipeline([("mapper", rbf_map),
                                        ("solver", solver)])

        gamma_range = np.logspace(-15, 6, 22, base=2)
        param_grid = dict(mapper__gamma=gamma_range)
        cv = StratifiedShuffleSplit(y, n_iter=5, test_size=0.2, random_state=1)
        grid = GridSearchCV(rbf_solver, param_grid=param_grid, cv=cv, n_jobs=8)
        grid.fit(X, y)

        scores = [x[1] for x in grid.grid_scores_]
        scores = np.array(scores).reshape(len(gamma_range))
        plt.figure(figsize=(8, 6))
        plt.plot(gamma_range, scores)

        plt.xlabel('gamma')
        plt.ylabel('score')
        plt.title('Validation accuracy (RTiX, %s)' %
                  os.path.basename(self.name))
        plt.savefig(self.name + '-SLViF-grid-npts=%d.pdf' % n_pts)

        # final train
        g = grid.best_params_['mapper__gamma']
        print 'best parameters are g=%f' % g
        rbf_svc2 = grid.best_estimator_
        y_pred = rbf_svc2.predict(self.Xt)
        print 'SCORE: %f' % sm.accuracy_score(self.Yt, y_pred)
        return grid.best_score_, sm.accuracy_score(self.Yt, y_pred)
Example #5
0
    def estimate_LSTDQ_separate_action_indexing(self):
        #* set the fourier feature
        # random_feature_per_obs_dim = 250
        # default_length_scale = 0.1
        transformer_list = []
        self.z_dim = self.random_feature_per_obs_dim * self.obs_dim
        # self.z_dim = random_feature_per_obs_dim * self.input_dim
        models = [RBFSampler(n_components = self.random_feature_per_obs_dim, gamma = self.default_length_scale*dist) for dist in self.scale_length_vector[:-1]]
        for model in models:
            model.fit([self.obs[0]])
            # model.fit([self.input[0]])
            transformer_list.append((str(model), model))
        self.rff = FeatureUnion(transformer_list)

        #* separate action set indexing
        act_idx = []
        for i in range(self.act_dim):
            act_idx.append(np.where(self.data_acts==i)[0])
        #* apply transformation
        Z = self.rff.transform(self.obs).astype(self.dtype); Z_prime = self.rff.transform(self.next_obs).astype(self.dtype)
        Z_init = self.rff.transform(self.init_obs).astype(self.dtype); Z_term = self.rff.transform(self.term_obs).astype(self.dtype)
        assert self.z_dim == Z.shape[1]
        Phi = np.zeros((Z.shape[0], Z.shape[1]* self.act_dim), dtype=self.dtype)
        Phi_pi = np.zeros((Z.shape[0], Z.shape[1]* self.act_dim),dtype=self.dtype)
        Phi_prime_pi = np.zeros((Z_prime.shape[0], Z_prime.shape[1]* self.act_dim),dtype=self.dtype)
        Phi_init_pi = np.zeros((Z_init.shape[0], Z_init.shape[1]*self.act_dim), dtype=self.dtype)
        Phi_term_pi = np.zeros((Z_term.shape[0], Z_term.shape[1]*self.act_dim),dtype=self.dtype)
        for i in range(self.act_dim):
            Phi[act_idx[i], i*self.z_dim:(i+1)*self.z_dim] = Z[act_idx[i]]
            Phi_pi[:, i*self.z_dim:(i+1)*self.z_dim] = self.pi_current[:,i][:,None] * Z        
            Phi_prime_pi[:,i*self.z_dim:(i+1)*self.z_dim] = self.pi_next[:,i][:,None] * Z_prime
            Phi_init_pi[:,i*self.z_dim:(i+1)*self.z_dim] = self.pi_init[:,i][:,None]*Z_init
            Phi_term_pi[:,i*self.z_dim:(i+1)*self.z_dim] = self.pi_term[:,i][:,None]*Z_term
        
        I_sa = np.eye(self.act_dim*self.z_dim)

        regularized_inverse = np.linalg.inv( np.matmul(Phi.T, Phi-self.gamma*Phi_prime_pi) + self.value_reg*I_sa)
        featurized_reward = np.matmul(Phi.T, self.rews)
        reward_coef = np.matmul(regularized_inverse, featurized_reward)
        V_init = Phi_init_pi @ reward_coef
        V_term = Phi_term_pi @ reward_coef
        V_traj = V_init - V_term*self.gamma**self.horizon
        value_est = np.mean(V_traj)
        # import pdb; pdb.set_trace()
        return value_est
Example #6
0
 def build(self, input_shape):
     rbf_sampler = RBFSampler(
         gamma=self.gamma,
         n_components=self.dim,
         random_state=self.random_state)
     x = np.zeros(shape=(1, self.input_dim))
     rbf_sampler.fit(x)
     self.rff_weights = tf.Variable(
         initial_value=rbf_sampler.random_weights_,
         dtype=tf.float32,
         trainable=True,
         name="rff_weights")
     self.offset = tf.Variable(
         initial_value=rbf_sampler.random_offset_,
         dtype=tf.float32,
         trainable=True,
         name="offset")
     self.built = True
Example #7
0
def logistic(data, log_C, log_gamma):
    lb = data.lb
    train_y = lb.inverse_transform(data.train_y)
    test_y = lb.inverse_transform(data.test_y)
    print('Running Logistic Regression')
    C = np.exp(log_C)
    gamma = np.exp(log_gamma)
    print('Training with C:{}, gamma:{}'.format(C, gamma))
    rbf_feature = RBFSampler(gamma=gamma, n_components=200, random_state=0)
    trans_tr_x = rbf_feature.fit_transform(data.train_x)
    trans_test_x = rbf_feature.transform(data.test_x)
    clf = LogisticRegression(random_state=0,
                             solver='lbfgs',
                             multi_class='multinomial',
                             C=C)
    clf.fit(trans_tr_x, train_y)
    te_predict = clf.predict_proba(trans_test_x)
    return roc_auc_score(data.test_y, te_predict)
Example #8
0
    def __init__(self, env):
        observation_examples = np.random.random((20000, 4)) * 2 - 1
        scaler = StandardScaler()
        scaler.fit(observation_examples)
        l = []
        for i in range(4):
            l.append(
                (str(i), RBFSampler(gamma=np.random.rand(),
                                    n_components=1000)))
        # Used to converte a state to a featurizes represenation.
        # We use RBF kernels with different variances to cover different parts of the space
        featurizer = FeatureUnion(l)
        feature_examples = featurizer.fit_transform(
            scaler.transform(observation_examples))

        self.dimensions = feature_examples.shape[1]
        self.scaler = scaler
        self.featurizer = featurizer
Example #9
0
def dim_ridge(data, log_alpha, log_bw1, log_bw2, log_bw3, log_bw4, log_bw5,
              log_bw6):
    alpha = np.exp(log_alpha)
    bw1 = np.exp(log_bw1)
    bw2 = np.exp(log_bw2)
    bw3 = np.exp(log_bw3)
    bw4 = np.exp(log_bw4)
    bw5 = np.exp(log_bw5)
    bw6 = np.exp(log_bw6)
    bw = np.array([bw1, bw2, bw3, bw4, bw5, bw6])
    print('Training with alpha:{}, bw:{}'.format(alpha, bw))
    rbf_feature = RBFSampler(gamma=0.5, n_components=200, random_state=0)
    trans_tr_x = rbf_feature.fit_transform(np.divide(data.train_x, bw))
    trans_test_x = rbf_feature.transform(np.divide(data.test_x, bw))
    clf = Ridge(alpha=alpha)
    clf.fit(trans_tr_x, data.train_y)
    score = clf.score(trans_test_x, data.test_y)
    return max(score, -1.0)
Example #10
0
    def __init__(self, num_params, observation_examples, gamma_list, lr=1e-3):
        super().__init__(num_params, lr=lr)
        # define & normalize scaler
        self.scaler = sklearn.preprocessing.StandardScaler()
        self.scaler.fit(observation_examples)
        # define featurizer
        num_featurizers = 4  # featurizers per 1 action dimension
        assert num_params % num_featurizers == 0
        assert len(gamma_list) == num_featurizers
        num_components = num_params // num_featurizers  # components per 1 action dimension

        self.featurizer = sklearn.pipeline.FeatureUnion([
            (str(i),
             RBFSampler(gamma=gamma_list[i], n_components=num_components))
            for i in range(num_featurizers)
        ])

        self.featurizer.fit(self.scaler.transform(observation_examples))
Example #11
0
    def kernel_transform(self, X1, X2 = None, kernel_type = 'linear_primal', n_components = 100, gamma = 1.0):
        """
        Forms the kernel matrix using the samples X1
        Parameters:
        ----------
        X1: np.ndarray
            data (n_samples1,n_features) to form a kernel of shape (n_samples1,n_samples1)
        X2: np.ndarray
            data (n_samples2,n_features) to form a kernel of shape (n_samples1,n_samples2)
        kernel_type : str
            type of kernel to be used
        gamma: float
            kernel parameter
        Returns:
        -------
        X: np.ndarray
            the kernel of shape (n_samples,n_samples)
        """
        if(kernel_type == 'linear'):
            X = linear_kernel(X1,X2)
        elif(kernel_type == 'rbf'):  
            X = rbf_kernel(X1,X2,gamma) 
        elif(kernel_type == 'tanh'):
            X = sigmoid_kernel(X1,X2,-gamma) 
        elif(kernel_type == 'sin'):
#            X = np.sin(gamma*manhattan_distances(X1,X2))
            X = np.sin(gamma*pairwise_distances(X1,X2)**2)
        elif(kernel_type =='TL1'):                
            X = np.maximum(0,gamma - manhattan_distances(X1,X2)) 
        elif(kernel_type == 'rff_primal'):
            rbf_feature = RBFSampler(gamma=gamma, random_state=1, n_components = n_components)
            X = rbf_feature.fit_transform(X1)
        elif(kernel_type == 'nystrom_primal'):
            #cannot have n_components more than n_samples1
            if(n_components > X1.shape[0]):
                raise ValueError('n_samples should be greater than n_components')
            rbf_feature = Nystroem(gamma=gamma, random_state=1, n_components = n_components)
            X = rbf_feature.fit_transform(X1)
        elif(kernel_type == 'linear_primal'):                
            X = X1
        else:
            print('No kernel_type passed: using linear primal solver')
            X = X1
        return X
Example #12
0
def EfficientDecomposableGaussianORFF(X,
                                      A,
                                      gamma=1.,
                                      D=100,
                                      eps=1e-5,
                                      random_state=0):
    r"""Return the Efficient ORFF map associated with the data X.

    Parameters
    ----------
    X : {array-like}, shape = [n_samples, n_features]
        Samples.
    A : {array-like}, shape = [n_targets, n_targets]
        Operator of the Decomposable kernel (positive semi-definite)
    gamma : {float},
        Gamma parameter of the RBF kernel.
    D : {integer},
        Number of random features.
    eps : {float},
        Cutoff threshold for the singular values of A.
    random_state : {integer},
        Seed of the generator.

    Returns
    -------
    \tilde{\Phi}(X) : Linear Operator, callable
    """
    # Decompose A=BB^T
    u, s, v = svd(A, full_matrices=False, compute_uv=True)
    B = dot(diag(sqrt(s[s > eps])), v[s > eps, :])

    # Sample a RFF from the scalar Gaussian kernel
    phi_s = RBFSampler(gamma=gamma, n_components=D, random_state=random_state)
    phiX = phi_s.fit_transform(X)

    # Create the ORFF linear operator
    cshape = (D, B.shape[0])
    rshape = (X.shape[0], B.shape[1])
    return LinearOperator(
        (phiX.shape[0] * B.shape[1], D * B.shape[0]),
        matvec=lambda b: dot(phiX, dot(b.reshape(cshape), B)),
        rmatvec=lambda r: dot(phiX.T, dot(r.reshape(rshape), B.T)),
        dtype=float)
    def __init__(self, environment=gym.make('MountainCar-v0')):
        self.env = environment
        self.state = self.env.reset()
        self.state_low_bound = self.env.observation_space.low
        self.state_high_bound = self.env.observation_space.high
        self.n_action = env.action_space.n

        self.action_space = gym.spaces.Discrete(self.n_action)

        self.d = 100
        self.w = np.random.rand(self.d)

        self.feature = RBFSampler(gamma=1, random_state=1)
        X = []
        for _ in range(100000):
            s = env.observation_space.sample()
            sa = np.append(s, np.random.randint(self.n_action))
            X.append(sa)
        self.feature.fit(X)
def blindSVM(pickle_file,text,y_blind):
    y_svm_blind = deepcopy(y_blind)
    cleaned = tokenize(text)
    with open("./data/svm/words/integer_index_tokens.pickle","rb") as f:
        word_dict = pickle.load(f)
    X_blind_words = bagging(cleaned,word_dict)
    full_name = glob("./pickles/"+pickle_file+"/best*")[0]
    with open(full_name,"rb") as f:
        model = pickle.load(f)
    y_svm_blind[np.where(y_svm_blind==0)[0]] = -1
    if "rbf" in pickle_file:
        number = int(re.sub(".pickle","",re.sub(r".*best_model_","",full_name)))
        df = pd.read_csv(glob("./pickles/"+pickle_file+"/log*")[0])
        g = float(df[df["model"] == number]["gamma"])
        n = int(df[df["model"] == number]["n_components"])
        rbf_feature = RBFSampler(gamma=g,n_components=n)
        X_blind_words = rbf_feature.fit_transform(X_blind_words)
    out = model.decision_function(X_blind_words)
    np.save("./pickles/"+pickle_file+"/prob_map_blind.npy", out)
Example #15
0
def letter():
    params = Hyperparameters(
        epsilon=1e-10,
        fuzzy=0.1,
        C1=8,
        C2=2,
        C3=8,
        C4=2,
        max_iter=50,
        phi=0,
        kernel=RBFSampler(gamma=0.03, n_components=500),
        forget_score=10,
    )

    _data = pd.read_csv(f'{DATA_DIR}/letter-recognition.data')
    train_data = _data.values[:16000, 1:]
    train_label = _data.values[:16000, 0]
    test_data = _data.values[16000:, 1:]
    test_label = _data.values[16000:, 0]

    for i, lbl in enumerate(train_label):
        train_label[i] = ord(lbl) - 65  # '65' -> 'A'

    for i, lbl in enumerate(test_label):
        test_label[i] = ord(lbl) - 65  # '65' -> 'A'

    test_label = test_label.reshape(test_label.shape[0], 1).astype(np.int)

    ifbtsvm = iFBTSVM(parameters=params, n_jobs=4)

    # Training
    before = time.monotonic()
    ifbtsvm.fit(X=train_data, y=train_label)
    after = time.monotonic()
    elapsed = (after - before)

    # Prediction
    accuracy = ifbtsvm.score(X=test_data,
                             y=test_label.reshape(test_label.shape[0], 1))
    print(
        f'Letter: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s'
    )
    return accuracy, np.around(elapsed, 3)
Example #16
0
    def fit(self, X, y):
        # fit RFF Model
        self.pipeline = Pipeline(
            [
                (
                    "rff",
                    RBFSampler(
                        n_components=self.n_components,
                        gamma=self.gamma,
                        random_state=self.random_state,
                    ),
                ),
                ("sgd", SGDRegressor(**self.sgd_kwargs)),
            ]
        )

        # fit LR model
        self.pipeline.fit(X, y)
        return self
Example #17
0
    def grid_retrain_in_f(self, n_dim=500):
        rbf_map = RBFSampler(n_dim, random_state=1)
        fourier_approx_svm = pipeline.Pipeline([("mapper", rbf_map),
                                                ("svm", LinearSVC())])

        # C_range = np.logspace(-5, 15, 21, base=2)
        # gamma_range = np.logspace(-15, 3, 19, base=2)
        # param_grid = dict(mapper__gamma=gamma_range, svm__C=C_range)
        # cv = StratifiedShuffleSplit(Y, n_iter=5, test_size=0.2, random_state=42)
        # grid = GridSearchCV(fourier_approx_svm, param_grid=param_grid, cv=cv)
        # grid.fit(X, Y)
        #
        # rbf_svc2 = grid.best_estimator_

        rbf_svc2 = fourier_approx_svm
        rbf_svc2.fit(self.X_ex, self.y_ex)

        self.set_clf2(rbf_svc2)
        return self.benchmark()
Example #18
0
    def get_orff_map(self, X, D=100, random_state=0):
        r"""Return the Random Fourier Feature map associated with the data X.

        .. math::
               K_x: Y \mapsto \tilde{\Phi}(X)

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Samples.

        Returns
        -------
        \tilde{\Phi}(X) : Linear Operator, callable
        """
        self.r = 1
        if not hasattr(self, 'Xb_'):
            self.phi_ = RBFSampler(gamma=self.gamma,
                                   n_components=D, random_state=random_state)
            self.phi_.fit(X)
            self.Xb_ = self.phi_.transform(X)
            self.Xb_ = (self.Xb_.reshape((self.Xb_.shape[0],
                                          1, self.Xb_.shape[1])) *
                        self.phi_.random_weights_.reshape((1, -1,
                                                           self.Xb_.shape[1])))
            self.Xb_ = self.Xb_.reshape((-1, self.Xb_.shape[2]))

        D = self.phi_.n_components
        if X is self.Xb_:
            return LinearOperator(self.Xb_.shape,
                                  matvec=lambda b: dot(self.Xb_ * b),
                                  rmatvec=lambda r: dot(self.Xb_.T * r))
        else:
            Xb = self.phi_.transform(X)
            # TODO:
            # w = self.phi_.random_weights_.reshape((1, -1, Xb.shape[1]))
            # wn = np.linalg.norm(w)
            # Xb = (Xb.reshape((Xb.shape[0], 1, Xb.shape[1])) *
            #       wn * np.eye()w np.dot(w.T, w) / wn)
            Xb = Xb.reshape((-1, Xb.shape[2]))
            return LinearOperator(Xb.shape,
                                  matvec=lambda b: dot(Xb, b),
                                  rmatvec=lambda r: dot(Xb.T, r))
    def generate_data_transformers(self):
        # Data Transformation (Scaling, Normalization)
        if self.data_transform:
            if self.data_transform == 'EXP':
                transformer = ''
                transformer.name = ''

            elif data_transform == 'NORM':
                pass

            transformer.params = utils.get_params_string(
                self.data_transform_params)
            self.transformer = transformer

        # Feature Selection (Var, Chi^2)
        if self.feature_selection:
            if self.feature_selection == 'VAR':
                selector = VarianceThreshold(**self.feature_selection_params)
                selector.name = 'VarianceThreshold'

            elif self.feature_selection == 'CHI2':
                pass

            selector.params = utils.get_params_string(
                self.feature_selection_params)
            self.selector = selector

        # Kernel Approximation (RBF, Chi^2)
        if self.approximation_kernel:
            if self.approximation_kernel == 'RBF':
                approx_kernel_map = RBFSampler(
                    **self.kernel_approximation_params)
                approx_kernel_map.name = 'RBFSampler'

            elif self.approximation_kernel == 'CHI2':
                approx_kernel_map = AdditiveChi2Sampler(
                    **self.kernel_approximation_params)
                approx_kernel_map.name = 'AdditiveChi2Sampler'

            approx_kernel_map.params = utils.get_params_string(
                self.kernelapproximation_params)
            self.approx_kernel_map = approx_kernel_map
Example #20
0
    def trainKernelApproxSvgOnVoting(self, X_predicted: numpy.ndarray,
                                     y: numpy.ndarray):
        """Train kernel for classifier

        Args:
            X_predicted (numpy.ndarray): The prediction of the other classifiers.
            y (numpy.ndarray): The real labels.
        """
        if not X_predicted.size:
            raise ("No X_predicted data was orovided")
        if not y.size:
            raise ("No y data was provided")
        logging.info("training stacking classifier")
        self.rbfKernel = RBFSampler(gamma=1, random_state=1)
        X_features = self.rbfKernel.fit_transform(X_predicted)
        self.stackingEstimator = SGDClassifier(
            max_iter=config.getValueFromConfig("SGDClassifierIterations"))
        self.stackingEstimator.fit(X_features, y)
        logging.info("stacking-classifier: " +
                     str(self.stackingEstimator.score(X_features, y)))
Example #21
0
    def __init__(self):
        """Init a new agent.
        """
        self.gamma = 1.0
        self.scaler = StandardScaler()
        self.feature_generation = RBFSampler()

        init_samples = np.array(
            [[np.random.uniform(-1.2, 0.6),
              np.random.uniform(-0.07, 0.07)] for _ in range(10000)])
        init_samples_scaled = self.scaler.fit_transform(init_samples)
        self.feature_generation.fit(init_samples_scaled)

        self.models = []
        for _ in range(3):
            model = SGDRegressor(learning_rate="constant")
            model.partial_fit(
                [self.preprocessing([np.random.uniform(-0.6, -0.4), 0])],
                [-1])  # dirty !
            self.models.append(model)
def rbf_map(X_train=X_train_red,
            X_test=X_test_red,
            gamma=0.2,
            rbfsampler=True,
            n_components=100,
            scale=False):
    if rbfsampler:
        feature_map = RBFSampler(gamma=gamma,
                                 random_state=8,
                                 n_components=n_components)
    else:
        feature_map = Nystroem(gamma=gamma,
                               random_state=8,
                               n_components=n_components)
    X_train_mapped = feature_map.fit_transform(X_train)
    X_test_mapped = feature_map.transform(X_test)
    if scale:
        X_train_mapped, X_test_mapped = scale_data(X_train_mapped,
                                                   X_test_mapped)
    return X_train_mapped, X_test_mapped
    def __init__(self, sklearn_kernel, sklearn_kernel_gamma,
                 sklearn_kernel_ncomponents, sklearn_loss,
                 sklearn_loss_penalty, sklearn_learning_rate_alpha,
                 sklearn_learning_rate):
        super().__init__("sklearn")

        if sklearn_kernel == "rbf":
            ker = RBFSampler(gamma=sklearn_kernel_gamma,
                             n_components=sklearn_kernel_ncomponents)
            self.kernel = lambda x: ker.fit_transform(x)
        elif sklearn_kernel == "":
            self.kernel = lambda x: x
        else:
            print("Invalid kernel {}".format(sklearn_kernel))
            assert False

        self.sklearn_loss = sklearn_loss
        self.sklearn_loss_penalty = sklearn_loss_penalty
        self.sklearn_learning_rate_alpha = sklearn_learning_rate_alpha
        self.sklearn_learning_rate = sklearn_learning_rate
    def kernel_transformation_using_nystroem_rbf(self,df,cat):
        df=df.fillna(0)
        df=df.replace([np.inf, -np.inf], 0)
        datecol=[x for x in df.columns if df[x].dtypes=='datetime64[ns]']
        X1=[x for x in df.columns if df[x].dtypes != 'object' and x not in datecol and x not in self.target]     
        X=[x for x in X1 if x not in cat]
        y=self.target
        j = np.linspace((10**-2),(10**2),50)
        g=0
        max1=0
        df=df.fillna(0)
        df=df.replace(np.inf, 0)
        df=df.replace(-np.inf, 0)
        for i in j:
            rbf_feature = Nystroem(kernel = 'rbf', gamma=i, random_state=2,n_components=10)
            rbf_feature.fit(df[X])
            X_features = rbf_feature.transform(df[X])
            X_features=np.nan_to_num(X_features)
#            SGDClassifier(loss='hinge', penalty='l2', alpha=0.0001, l1_ratio=0.15, fit_intercept=True, shuffle=True, verbose=0, epsilon=0.1, n_jobs=1, random_state=None, learning_rate='optimal', eta0=0.0, power_t=0.5, class_weight=None, warm_start=False, average=False, n_iter=None)
            clf = SGDClassifier()   
            clf.fit(X_features, df[y])
            y_pred = clf.predict(X_features)
            score=f1_score(df[y], y_pred, average='micro') 
            if(score>max1):
                max1=score
                g=i
        rbf_feature = RBFSampler(gamma=g, random_state=2,n_components=10)
        rbf_feature.fit(df[X])
        X_features = rbf_feature.transform(df[X])
        l=[]
        for r in range(10):
            l.append('k_'+str(r))
        X_features=pd.DataFrame(data=X_features,columns=l)
#        SGDClassifier(loss='hinge', penalty='l2', alpha=0.0001, l1_ratio=0.15, fit_intercept=True,shuffle=True, verbose=0, epsilon=0.1, n_jobs=1, random_state=None, learning_rate='optimal', eta0=0.0, power_t=0.5, class_weight=None, warm_start=False, average=False, n_iter=None)
        clf = SGDClassifier()   
        clf.fit(X_features, df[y])
        score=f1_score(df[y], y_pred, average='micro') 
        print("Score is")
        print(score)
        print(g)
        return X_features
Example #25
0
 def ESTIM(self):
     ESTIMATORS = {
         "dummy":
         DummyClassifier(),
         'CART':
         DecisionTreeClassifier(),
         'ExtraTrees':
         ExtraTreesClassifier(n_estimators=100),
         'RandomForest':
         RandomForestClassifier(n_estimators=100),
         'Nystroem-SVM':
         make_pipeline(Nystroem(gamma=0.015, n_components=1000),
                       SVC(C=100, kernel='linear', probability=True)),
         'SampledRBF-SVM':
         make_pipeline(RBFSampler(gamma=0.015, n_components=1000),
                       LinearSVC(C=100)),
         'LogisticRegression-SAG':
         LogisticRegression(solver='sag', tol=1e-1, C=1e4),
         'LogisticRegression-SAGA':
         LogisticRegression(solver='saga', tol=1e-1, C=1e4),
         'MultilayerPerceptron':
         MLPClassifier(hidden_layer_sizes=(100, 100),
                       max_iter=400,
                       alpha=1e-4,
                       solver='sgd',
                       learning_rate_init=0.2,
                       momentum=0.9,
                       verbose=1,
                       tol=1e-4,
                       random_state=1),
         'MLP-adam':
         MLPClassifier(hidden_layer_sizes=(100, 100),
                       max_iter=400,
                       alpha=1e-4,
                       solver='adam',
                       learning_rate_init=0.001,
                       verbose=1,
                       tol=1e-4,
                       random_state=1)
     }
     return ESTIMATORS
Example #26
0
    def __init__(self,
                 rank,
                 delta,
                 lbd=0,
                 gamma_range=[1.0],
                 random_state=None,
                 normalize=True):
        """
        :param rank:
            Rank of approximation.
        :param delta:
            Number of lookahead columns.
            With delta=0, random selection of features is recovered.
        :param gamma_range:
            Hyperparameter to RBF kernels to be modeled
        :param random_state:
            Random state.
        :param normalize:
            Normalize the implicit feature space.
        """
        assert isinstance(gamma_range, list) or isinstance(
            gamma_range, ndarray)

        self.G = None
        self.active_set = None
        self.beta = None
        self.bias = None
        self.gmeans = None
        self.gnorms = None

        self.gamma_range = gamma_range
        self.random_state = random_state
        self.rank = rank
        self.delta = delta
        self.lbd = lbd
        self.normalize = normalize
        self.samplers = [
            RBFSampler(gamma=g,
                       random_state=random_state,
                       n_components=delta + rank) for g in gamma_range
        ]
Example #27
0
def main():
    #print(pegasos(x_train, y_train, 1, 1000))
    #pegasos_kernel(x_train, y_train, 1, 1000, rbf_func)
    margins = []
    #print(x_train.shape)
    for n in xrange(1, 5):
        train = np.loadtxt('data/data' + str(n) + '_train.csv')
        test = np.loadtxt('data/data' + str(n) + '_validate.csv')
        x_train = train[:, 0:2].copy()
        y_train = train[:, 2:3].copy().flatten()
        x_test = test[:, 0:2].copy()
        y_test = test[:, 2:3].copy().flatten()
        print(y_test.shape)
        rbf_func = RBFSampler(gamma=1, random_state=1).fit_transform
        alpha = pegasos_kernel(x_train, y_train, 0.02, 1000,
                               rbf_func)[:y_test.size]
        x_data = x_test
        x_data = rbf_func(x_test)
        K = x_test.dot(x_test.T)
        results = y_test * np.sign(alpha.dot(K))
        print(sum(results[results > 0]))
Example #28
0
def dim_logistic(data, log_C, log_bw1, log_bw2, log_bw3, log_bw4, log_bw5,
                 log_bw6):
    lb = data.lb
    train_y = lb.inverse_transform(data.train_y)
    test_y = lb.inverse_transform(data.test_y)
    C = np.exp(log_C)
    bw1 = np.exp(log_bw1)
    bw2 = np.exp(log_bw2)
    bw3 = np.exp(log_bw3)
    bw4 = np.exp(log_bw4)
    bw5 = np.exp(log_bw5)
    bw6 = np.exp(log_bw6)
    bw = np.array([bw1, bw2, bw3, bw4, bw5, bw6])
    print('Training with C:{}, bw:{}'.format(C, bw))
    rbf_feature = RBFSampler(gamma=0.5, n_components=200, random_state=0)
    trans_tr_x = rbf_feature.fit_transform(np.divide(data.train_x, bw))
    trans_test_x = rbf_feature.transform(np.divide(data.test_x, bw))
    clf = LogisticRegression(random_state=0, solver='lbfgs', C=C)
    clf.fit(trans_tr_x, train_y)
    te_predict = clf.predict_proba(trans_test_x)
    return roc_auc_score(data.test_y, te_predict)
Example #29
0
    def create_Kernel(self, W):
        db = self.db
        sigma = db['sigma']

        X = db['data'].dot(W)
        gamma_V = 1.0 / (2 * np.power(sigma, 2))

        if self.N < 30000:  # if rbf kernel too large use RFF instead
            db['Kernel_matrix'] = sklearn.metrics.pairwise.rbf_kernel(
                X, gamma=gamma_V)
        else:
            rbf_feature = RBFSampler(gamma=gamma_V,
                                     random_state=1,
                                     n_components=2000)
            Z = rbf_feature.fit_transform(X)
            db['Kernel_matrix'] = Z.dot(Z.T)

        db['D_matrix'] = np.diag(
            1 / np.sqrt(np.sum(db['Kernel_matrix'], axis=1)))  # 1/sqrt(D)

        return db['Kernel_matrix']
Example #30
0
 def initialize(self, gamma=None, fourierFeatures=None, decay=None):
   """Initializes RBFSampler for the detector"""
       
   if gamma is None:
       self.gamma = 2.0
   else:
       self.gamma = gamma
   if fourierFeatures is None:
       self.fourierFeatures = 6000
   else:
       self.fourierFeatures = fourierFeatures
   if decay is None:
       self.decay = 0.2
   else:
       self.decay = decay
       
   print('parameters -- gamma={} fourierFeatures={} decay={}'.format(self.gamma, self.fourierFeatures, self.decay))
       
   self.kernel = RBFSampler(gamma=self.gamma,
                            n_components = self.fourierFeatures,
                            random_state=290)