def compute_vector_lr(syn0_proxy, bins_proxy, neg_words_mult, lbda, word_idxs):
    syn0 = syn0_proxy.get()
    bins = bins_proxy.get()

    counts = defaultdict(lambda: np.zeros(2).astype(np.uint32))

    for widx in word_idxs:
        counts[widx][0] += 1

    neg_words_idxs = sample(bins, int(neg_words_mult * len(word_idxs)))
    for neg_widx in neg_words_idxs:
        counts[neg_widx][1] += 1

    vectors = syn0[counts.keys()]
    count_pairs = np.vstack(counts.values())

    f = lambda w, params=(vectors, count_pairs[:, 0], count_pairs[:, 1], lbda): log_l(w, *params)

    x0 = np.zeros(syn0.shape[1] + 1)
    opt = lbfgsb.fmin_l_bfgs_b(f, x0)

    if opt[2]["warnflag"]:
        logging.debug("Error in optimization: %s", opt[2])

    lr_vec = opt[0].astype(np.float32)
    if not np.all(np.isfinite(lr_vec)):
        logging.info("Error computing lr vector")
        lr_vec[:] = 0

    return lr_vec
Example #2
0
    def __solver__(self, p):
        #WholeRepr2LinConst(p)#TODO: remove me

        bounds = []

        # don't work in Python ver < 2.5
        # BOUND = lambda x: x if isfinite(x) else None

        def BOUND(x):
            if isfinite(x): return x
            else: return None

        for i in range(p.n): bounds.append((BOUND(p.lb[i]), BOUND(p.ub[i])))

        xf, ff, d = fmin_l_bfgs_b(p.f, p.x0, fprime=p.df,
                  approx_grad=0,  bounds=bounds,
                  iprint=p.iprint, maxfun=p.maxFunEvals)

        if d['warnflag'] in (0, 2):
            # if 2 - some problems can be present, but final check from RunProbSolver will set negative istop if solution is unfeasible
            istop = SOLVED_WITH_UNIMPLEMENTED_OR_UNKNOWN_REASON
            if d['warnflag'] == 0: msg = 'converged'
        elif d['warnflag'] == 1:  istop = IS_MAX_FUN_EVALS_REACHED

        p.xk = p.xf = xf
        p.fk = p.ff = ff
        p.istop = istop
        p.iterfcn()
Example #3
0
 def smoothData(self,x,y,weight,nMiss=0):
   '''
   smooth data
   '''
   import scipy.optimize.lbfgsb as lbfgsb
   from scipy.fftpack.realtransforms import dct,idct
   n0 = len(x)
   #x = np.array([x,x,x]).flatten()
   #y = np.array([y,y,y]).flatten()
   #weight = np.array([weight,weight,weight]).flatten()
   n = len(x)
   weight = 1./weight
   # scale 0 to 1
   weight = weight/np.max(weight)
   i = np.arange(1,n+1)
   eigenvalues = -2. + 2.*np.cos((i-1)*np.pi/n)
   DCTy = dct(y,norm='ortho',type=2)
   dcty2 = DCTy**2
   eigenvalues2 = eigenvalues**2
   x0 = np.atleast_1d(1.)
   y_hat = np.zeros_like(y)
   xpost,f,d = lbfgsb.fmin_l_bfgs_b(gcv,x0,fprime=None,factr=10.,\
          approx_grad=True,args=(y,weight,eigenvalues2,n,nMiss,y_hat))
   solvedGamma = np.exp(xpost)[0]
   return y_hat,solvedGamma 
Example #4
0
    def __solver__(self, p):
        #WholeRepr2LinConst(p)#TODO: remove me

        bounds = []

        # don't work in Python ver < 2.5
        # BOUND = lambda x: x if isfinite(x) else None

        def BOUND(x):
            if isfinite(x): return x
            else: return None

        for i in range(p.n):
            bounds.append((BOUND(p.lb[i]), BOUND(p.ub[i])))

        xf, ff, d = fmin_l_bfgs_b(p.f,
                                  p.x0,
                                  fprime=p.df,
                                  approx_grad=0,
                                  bounds=bounds,
                                  iprint=p.iprint,
                                  maxfun=p.maxFunEvals)

        if d['warnflag'] in (0, 2):
            # if 2 - some problems can be present, but final check from RunProbSolver will set negative istop if solution is unfeasible
            istop = SOLVED_WITH_UNIMPLEMENTED_OR_UNKNOWN_REASON
            if d['warnflag'] == 0: msg = 'converged'
        elif d['warnflag'] == 1: istop = IS_MAX_FUN_EVALS_REACHED

        p.xk = p.xf = xf
        p.fk = p.ff = ff
        p.istop = istop
        p.iterfcn()
 def minimize_lbfgs(self, parameters, x, y):
     parameters2 = parameters.reshape([self.M], order="F")
     # minimizador L-BFGS-B
     result, _, _ = opt2.fmin_l_bfgs_b(self.get_objective,
                                       parameters2,
                                       args=[x, y],
                                       maxiter=50)
     return result.reshape([-1, 1], order="F")
Example #6
0
 def minimize_lbfgs(self, parameters, x, y, sigma, emp_counts, classes_idx,
                    nr_x, nr_f, nr_c):
     parameters2 = parameters.reshape([nr_f * nr_c], order="F")
     result, _, d = opt2.fmin_l_bfgs_b(
         self.get_objective,
         parameters2,
         args=[x, y, sigma, emp_counts, classes_idx, nr_x, nr_f, nr_c])
     return result.reshape([nr_f, nr_c], order="F")
Example #7
0
def nls_lbfgs_b(S,
                D,
                C_init=None,
                l1_reg=0.1,
                max_iter=1000,
                tol=1e-4,
                callback=None):
    """Non-negative least squares solver using L-BFGS-B.
    
    """
    S = ss.csr_matrix(check_array(S, accept_sparse='csr'))
    D = ss.csr_matrix(check_array(D, accept_sparse='csr'))
    n_features = S.shape
    n_components = D.shape[1]

    DtD = safe_sparse_dot(D.T, D)
    DtSD = safe_sparse_dot(D.T, safe_sparse_dot(S, D))

    def f(C, *args):
        C = ss.diags(C)
        tonorm = S - safe_sparse_dot(D, safe_sparse_dot(C, D.T))
        reg = l1_reg * C.diagonal().sum()
        return (0.5 * (ss.linalg.norm(tonorm)**2)) + reg

    def fprime(C, *args):
        C = ss.diags(C)
        DtDCDtD = safe_sparse_dot(DtD, safe_sparse_dot(C, DtD))
        reg = l1_reg * ss.eye(C.shape[0])
        full = DtDCDtD - DtSD + reg
        return full.diagonal()

    if C_init is None:
        C = np.zeros(n_components, dtype=np.float64)
    elif C_init.shape == (n_features, n_features):
        C = np.diag(C_init)
    else:
        C = C_init

    C, residual, d = fmin_l_bfgs_b(
        f,
        x0=C,
        fprime=fprime,
        pgtol=tol,
        bounds=[(0, None)] * n_components,
        maxiter=max_iter,
        callback=callback,
    )

    # testing reveals that sometimes, very small negative values occur
    C[C < 0] = 0

    if l1_reg:
        residual -= l1_reg * C.sum()
    residual = np.sqrt(2 * residual)
    if d['warnflag'] > 0:
        print("L-BFGS-B failed to converge")

    return C, residual
Example #8
0
 def train(self, w0, debug=False):
     if debug:
         iprint = 0
     else:
         iprint = -1
     x, f, d = fmin_l_bfgs_b(self.objective, w0, fprime=self.grad, pgtol=1e-09, iprint=iprint)
     if d["warnflag"] != 0:
         raise OptimisationException(d["task"])
     return x
Example #9
0
 def train(self,w0,debug=False):
   if debug:
     iprint = 0
   else:
     iprint = -1
   x,f,d = fmin_l_bfgs_b(self.objective, w0, fprime=self.grad, pgtol=1e-09, iprint=iprint)
   if d['warnflag'] != 0:
     raise OptimisationException(d['task'])
   return x
Example #10
0
def solve_l1l1_approx(X, y, lbda):
    make_l1l1_approx()
    f = lambda w, params=(X, y, lbda): l1l1_approx(w, *params)

    x0 = np.zeros(X.shape[1] + 1)
    opt = lbfgsb.fmin_l_bfgs_b(f, x0, bounds=[(0, None)] * x0.shape[0])

    logging.debug(opt[2])
    return opt[0].astype(np.float32)
Example #11
0
    def find2(self, POIMobj, motif_len, motif_start, base, path2pwm=None,solver="NLP"):
        self.motif_start = motif_start
        self.motif_len = motif_len
        x0 = tools.ini_pwm(motif_len, 1, len(base))[0]

        x0 = x0.flatten()

        lb = np.ones(x0.shape) * 0.001
        ub = np.ones(x0.shape) * 0.999
        iprint = 0
        maxIter = 1000
        ftol = 1e-04
        gradtol = 1e-03
        diffInt = 1e-05
        contol = 1e-02
        maxFunEvals = 1e04
        maxTime = 100

        lenA = int(len(x0))
        lenk = int(len(x0)) / len(base)
        Aeq = np.zeros((lenk, lenA))
        beq = np.ones(lenk)
        for i in range(lenk):
            for pk in range(i, lenA, lenk):
                Aeq[i, pk] = 1

                # ,Aeq=Aeq,beq=beq,
        cons = {'type': 'eq', 'fun': lambda x: np.dot(Aeq, x) - beq}
        bnds = []
        for i in range(len(x0)):
            bnds.append((lb[i], ub[i]))
        # bnds = np.vstack((lb,ub))


        if solver == "ralg":
            from openopt import NLP
            p = NLP(self.f_L2, x0,lb=lb, ub=ub, Aeq=Aeq,beq=beq, args=(POIMobj.gPOIM,POIMobj.L,motif_start,POIMobj.small_k,motif_len),  diffInt=diffInt, ftol=ftol, plot=0, iprint=iprint,maxIter = maxIter, maxFunEvals = maxFunEvals, show=False, contol=contol)
            result = p._solve(solver)
            x = result.xf
            f = result.ff
        elif solver == "LBFGSB":
            x, f, d = fmin_l_bfgs_b(self.f_L2, x0,
                                    args=(POIMobj.gPOIM, POIMobj.L, motif_start, POIMobj.small_k, motif_len),
                                    approx_grad=True)#constraints=cons)#
        elif solver == "SLSQP":
            result = minimize(self.f_L2, x0,args=(POIMobj.gPOIM, POIMobj.L, motif_start, POIMobj.small_k, motif_len),method='SLSQP',bounds=bnds,constraints=cons)
            x = result.x
            f = result.fun
        self.motif_pwm = np.reshape(x, (4, motif_len))
        fopt = f
        self.normalize()
        if not(path2pwm is None):
            np.savetxt(path2pwm, self.poim_norm)

        return self.motif_pwm
Example #12
0
File: LBFGSB.py Project: MSTU/grid
	def Run (self):
				
		self.iteration = 0
		self.ma.initHistory()
		
#		print 'point1'
		
		(self.xopt, f, d) = fmin_l_bfgs_b (self.Objective, self.vl0, approx_grad=1, bounds = self.bounds, m = self.m, factr=self.factr, pgtol=self.pgtol, epsilon=self.epsilon, maxfun=self.maxfun)
#		print 'point2'
		
		self.Objective(self.xopt)	
Example #13
0
    def train_lmbfgs(self):
        """
        Train the model by maximising posterior with LM-BFGS.

        The training data should have been set at this stage:
            >> h = hcrf(H, maxw, maxf)
            >> h.X = X
            >> h.Y = Y
            >> h.lamb = lamb
            >> final_params = h.train_lmbfgs()
        Return the final parameter vector.
        """
        initial = self.param[self.param_non_inf_indexes]
        fparam = fmin_l_bfgs_b(self.get_obj, initial)
        return fparam
Example #14
0
	def Run (self):
				
		self.iteration = 0
		self.ma.initHistory()
		
#		print 'point1'

		vl0 = self.NormX (self.vl0)
#		print 'vl0=',self.vl0

		if self.method == 'Opt_1D':
			(xa, xb) = self.bounds[0]
#			print 'xa=',xa,' xb=',xb
			xa = self.NormX ([xa])
			xb = self.NormX ([xb])
			self.xopt = fminbound (self.Objective1D, xa[0], xb[0], xtol = self.xtol, maxfun = self.maxfun)
			self.Objective1D(self.xopt)	
			return

		
		elif self.method == 'NelderMead':
			self.xopt = fmin (self.Objective, vl0, xtol=self.xtol, ftol=self.ftol, maxfun=self.maxfun)
		elif self.method == 'Powell':
			self.xopt = fmin_powell (self.Objective, vl0, xtol=self.xtol, ftol=self.ftol, maxfun=self.maxfun)
		elif self.method == 'LBFGSB':
			bounds = self.NormBounds (self.bounds)
			(self.xopt, f, d) = fmin_l_bfgs_b (self.Objective, vl0, approx_grad=1, bounds = bounds, epsilon=self.epsilon, maxfun=self.maxfun)
		elif self.method == 'TNC':
			bounds = self.NormBounds (self.bounds)
			(self.xopt, f, d) = fmin_tnc (self.Objective, vl0, approx_grad=1, bounds = bounds, ftol=self.ftol, xtol=self.xtol, epsilon=self.epsilon, maxfun=self.maxfun)
		elif self.method == 'Anneal':
			(lower, upper) = self.NormBoundsAnneal(self.bounds)
			(self.xopt, r) = anneal (self.Objective, vl0, schedule = self.schedule, 
														maxeval=self.maxfun, feps=self.ftol, lower=lower, upper=upper)
		elif self.method == 'Cobyla':
			self.isCobyla = 1
			self.ce = self.CreateBounds(self.vl, self.fce)
			self.xopt = self.fmin_cobyla (self.Objective, vl0, self.ce, rhobeg=self.rhobeg, rhoend=self.rhoend, maxfun=self.maxfun)

		else:
			print
			print 'Optimization Error: method ', self.method, 'is absent'
			print
			return
#		print 'point2'
		
		self.Objective(self.xopt)	
Example #15
0
def optimization_layer(result, iprint=-1):
    """
    Implementation of the Optimization layer. It uses L-BFGS [1] as special case of L-BFGS-B [2] in scipy.optimize.
    The result object is modified to yield the optimal BEModel.
    A sub-dictionary with additional information is added under the key result.additional['Opt'].

    [1] D.C. Liu and J. Nocedal. ``On the Limited Memory Method for Large Scale Optimization'',
        Math. Prog. B 45 (3), pp.~503--528, 1989. DOI 10.1007/BF01589116

    [2] C. Zhu, R.H. Byrd and J. Nocedal, ``Algorithm 778: L-BFGS-B: Fortran subroutines for large-scale
        bound-constrained optimization'', ACM Trans. Math. Software 23 (4), pp.~550--560, 1997.
        DOI 10.1145/279232.279236

    Parameters
    ----------
    result : object
        A valid :py:class:`cobea.model.Result` object.
        The object is modified during processing; the model variables are set to their optimal values.
    iprint : int
        (Optional) verbosity of fmin_l_bfgs_b. Default: -1

    Returns
    -------
    result : object
        Identical to input object.
    """
    x = result._to_statevec()
    print('Optimization layer: running with %i model parameters...' %
          result.ndim)
    xopt, fval, optimizer_dict = fmin_l_bfgs_b(result._gradient,
                                               x,
                                               args=(result.input_matrix, ),
                                               iprint=iprint,
                                               maxiter=int(2e4),
                                               factr=100)
    print('    ...finished with %i gradient (L-BFGS) iterations.' %
          optimizer_dict['nit'])
    print('    chi^2 = %.3e (%s)^2' % (fval, result.unit))
    result._from_statevec(xopt)
    result.additional['Opt'] = optimizer_dict
    return result
Example #16
0
    def _fit_inner(self, X, y, activations, deltas, coef_grads,
                   intercept_grads, layer_units):

        # Store meta information for the parameters
        self._coef_indptr = []
        self._intercept_indptr = []
        start = 0

        # Save sizes and indices of coefficients for faster unpacking
        for i in range(self.n_layers_ - 1):
            n_fan_in, n_fan_out = layer_units[i], layer_units[i + 1]

            end = start + (n_fan_in * n_fan_out)
            self._coef_indptr.append((start, end, (n_fan_in, n_fan_out)))
            start = end

        # Save sizes and indices of intercepts for faster unpacking
        for i in range(self.n_layers_ - 1):
            end = start + layer_units[i + 1]
            self._intercept_indptr.append((start, end))
            start = end

        # Run LBFGS
        packed_coef_inter = self._pack(self.coefs_, self.intercepts_)

        if self.verbose is True or self.verbose >= 1:
            iprint = 1
        else:
            iprint = -1

        optimal_parameters, self.loss_, d = fmin_l_bfgs_b(
            x0=packed_coef_inter,
            func=self._loss_grad_lbfgs,
            maxfun=self.max_iter,
            iprint=iprint,
            pgtol=self.tol,
            args=(X, y, activations, deltas, coef_grads, intercept_grads))

        self._unpack(optimal_parameters)
Example #17
0
 def train(self,debug=False):
   """Train the mixture model."""
   if debug:
     iprint = 0
   else:
     iprint = -1
   # Initialise weights to zero, except interpolation
   num_phrase_features = self.phrase_index[1] - self.phrase_index[0]
   num_models = ((self.interp_index[1] - self.interp_index[0])/num_phrase_features)+1
   w0 = [0.0] * self.interp_index[0]
   w0 += [1.0/num_models] * (self.interp_index[1]-self.interp_index[0])
   bounds = [(None,None)] * len(w0)
   bounds[self.interp_index[0]:self.interp_index[1]] = \
     [(self.interp_floor,1)] * (self.interp_index[1] - self.interp_index[0])
   w0 = np.array(w0)
   x,f,d = fmin_l_bfgs_b(self.objective, w0, fprime=self.gradient, bounds=bounds,  pgtol=1e-09, iprint=iprint)
   if d['warnflag'] != 0:
     raise OptimisationException(d['task'])
   weights = x[:self.interp_index[0]]
   mix_weights = x[self.interp_index[0]:]
   mix_weights = mix_weights.reshape((num_models-1,num_phrase_features))
   mix_weights = np.vstack((mix_weights, 1-np.sum(mix_weights,axis=0)))
   return weights,mix_weights
Example #18
0
 def train(self, debug=False):
     """Train the mixture model."""
     if debug:
         iprint = 0
     else:
         iprint = -1
     # Initialise weights to zero, except interpolation
     num_phrase_features = self.phrase_index[1] - self.phrase_index[0]
     num_models = ((self.interp_index[1] - self.interp_index[0]) / num_phrase_features) + 1
     w0 = [0.0] * self.interp_index[0]
     w0 += [1.0 / num_models] * (self.interp_index[1] - self.interp_index[0])
     bounds = [(None, None)] * len(w0)
     bounds[self.interp_index[0] : self.interp_index[1]] = [(self.interp_floor, 1)] * (
         self.interp_index[1] - self.interp_index[0]
     )
     w0 = np.array(w0)
     x, f, d = fmin_l_bfgs_b(self.objective, w0, fprime=self.gradient, bounds=bounds, pgtol=1e-09, iprint=iprint)
     if d["warnflag"] != 0:
         raise OptimisationException(d["task"])
     weights = x[: self.interp_index[0]]
     mix_weights = x[self.interp_index[0] :]
     mix_weights = mix_weights.reshape((num_models - 1, num_phrase_features))
     mix_weights = np.vstack((mix_weights, 1 - np.sum(mix_weights, axis=0)))
     return weights, mix_weights
Example #19
0
File: hcrf.py Project: dirko/pyhcrf
    def fit(self, X, y):
        """Fit the model according to the given training data.

        Parameters
        ----------
        X : List of list of ints. Each list of ints represent a training example. Each int in that list
            is the index of a one-hot encoded feature.

        y : array-like, shape (n_samples,)
            Target vector relative to X.

        Returns
        -------
        self : object
            Returns self.
        """
        classes = list(set(y))
        num_classes = len(classes)
        self.classes_ = classes
        if self.transitions is None:
            self.transitions = self._create_default_transitions(num_classes, self.num_states)

        # Initialise the parameters
        _, num_features = X[0].shape
        num_transitions, _ = self.transitions.shape
        numpy.random.seed(self._random_seed)
        if self.state_parameters is None:
            self.state_parameters = numpy.random.standard_normal((num_features,
                                                                  self.num_states,
                                                                  num_classes)) * self.state_parameter_noise
        if self.transition_parameters is None:
            self.transition_parameters = numpy.random.standard_normal((num_transitions)) * self.transition_parameter_noise

        initial_parameter_vector = self._stack_parameters(self.state_parameters, self.transition_parameters)
        function_evaluations = [0]

        def objective_function(parameter_vector, batch_start_index=0, batch_end_index=-1):
            ll = 0.0
            gradient = numpy.zeros_like(parameter_vector)
            state_parameters, transition_parameters = self._unstack_parameters(parameter_vector)
            for x, ty in zip(X, y)[batch_start_index: batch_end_index]:
                y_index = classes.index(ty)
                dll, dgradient_state, dgradient_transition = log_likelihood(x,
                                                                            y_index,
                                                                            state_parameters,
                                                                            transition_parameters,
                                                                            self.transitions)
                dgradient = self._stack_parameters(dgradient_state, dgradient_transition)
                ll += dll
                gradient += dgradient

            parameters_without_bias = numpy.array(parameter_vector)  # exclude the bias parameters from being regularized
            parameters_without_bias[0] = 0
            ll -= self.l2_regularization * numpy.dot(parameters_without_bias.T, parameters_without_bias)
            gradient = gradient.flatten() - 2.0 * self.l2_regularization * parameters_without_bias

            if batch_start_index == 0:
                function_evaluations[0] += 1
                if self._verbosity > 0 and function_evaluations[0] % self._verbosity == 0:
                    print '{:10} {:10.2f} {:10.2f}'.format(function_evaluations[0], ll, sum(abs(gradient)))
            return -ll, -gradient

        # If the stochastic gradient stepsize is defined, do 1 epoch of SGD to initialize the parameters.
        if self._sgd_stepsize:
            total_nll = 0.0
            for i in range(len(y)):
                nll, ngradient = objective_function(initial_parameter_vector, i, i + 1)
                total_nll += nll
                initial_parameter_vector -= ngradient * self._sgd_stepsize
                if self._sgd_verbosity > 0:
                    if i % self._sgd_verbosity == 0:
                        print '{:10} {:10.2f} {:10.2f}'.format(i, -total_nll / (i + 1) * len(y), sum(abs(ngradient)))

        self._optimizer_result = fmin_l_bfgs_b(objective_function, initial_parameter_vector, **self.optimizer_kwargs)
        self.state_parameters, self.transition_parameters = self._unstack_parameters(self._optimizer_result[0])
        return self
Example #20
0
def learn(data,
          p_init,
          p_bounds,
          parametrisation,
          A,
          reg_func,
          loss,
          penalty,
          params,
          track=False):
    if track:
        tracker = ObjectiveTracker(params,
                                   parametrisation,
                                   print_to_stdout=True)

        @tracker
        def obj(p, data, params):
            f_data, f_pen, g = obj_func_general_parametrisation(
                p, data, parametrisation, A, reg_func, loss, penalty, params)
            return f_data, f_pen, g
    else:
        counter = 0

        def callback(p):
            nonlocal counter
            counter += 1
            S, alpha, eps = parametrisation(torch.tensor(p), params)
            S = S.reshape(-1).cpu().numpy()
            alpha = alpha.cpu().numpy()
            print(
                '\nIteration #{}: Current sampling rate {:.1f}%, alpha {:.2e}, eps {:.2e}'
                .format(counter,
                        np.mean(S > 0) * 100, alpha.item(), eps.item()))

        def obj(p, data, params):
            f_data, f_pen, g = obj_func_general_parametrisation(
                p, data, parametrisation, A, reg_func, loss, penalty, params)
            return f_data + f_pen, g

    start_time = datetime.datetime.now()
    if 'pgtol' in params['alg_params']['LBFGSB']:
        pgtol = params['alg_params']['LBFGSB']['pgtol']
    else:
        pgtol = 1e-10
    if 'maxit' in params['alg_params']['LBFGSB']:
        maxiter = params['alg_params']['LBFGSB']['maxit']
    else:
        maxiter = 1000
    print('Learning sampling pattern:')
    p, _, info = fmin_l_bfgs_b(
        lambda p: obj(p, data, params),
        p_init,
        bounds=p_bounds,
        pgtol=pgtol,
        factr=0,
        maxiter=maxiter,
        callback=tracker.callback if track else callback)
    end_time = datetime.datetime.now()
    elapsed_time = end_time - start_time
    results = {'elapsed_time': elapsed_time, 'p': p, 'info': info}
    if track:
        results['tracker'] = tracker
    return results
Example #21
0
File: smoothn.py Project: UCL/kaska
def smoothn(y,
            nS0=10,
            axis=None,
            smoothOrder=2.0,
            sd=None,
            verbose=False,
            s0=None,
            z0=None,
            isrobust=False,
            w=None,
            s=None,
            max_iter=100,
            tol_z=1e-3,
            weightstr='bisquare'):
    """
    Robust spline smoothing for 1-D to n-D data.

    SMOOTHN provides a fast, automatized and robust discretized smoothing
    spline for data of any dimension.

    Parameters
    ----------
    y : numpy array or numpy masked array
        The data to be smoothed.

    nS0 : int, optional
        The number of samples to use when estimating the smoothing parameter.
        Default value is 10.

    smoothOrder : float, optional
        The polynomial order to smooth the function to.
        Default value is 2.0.

    sd : numpy array, optional
        Weighting of the data points in standard deviation format.
        Deafult is to not weight by standard deviation.

    verbose : { True, False }, optional
        Create extra logging during operation.

    s0 : float, optional
        Initial value of the smoothing parameter.
        Defaults to no value, being instead derived from calculation.

    z0 : float, optional
        Initial estimate of the smoothed data.

    isrobust : { False, True }
        Whether the smoothing applies the robust smoothing algorithm. This
        allows the smoothing to ignore outlier data without creating large
        spikes to fit the data.

    w : numpy array, optional
        Linear wighting to apply to the data.
        Default is to assume no linear weighting.

    s : float
        Initial smoothing parameter.
        Default is to calculate a value.

    max_iter : int, optional
        The maximum number of iterations to attempt the smoothing.
        Default is 100 iterations.

    tol_z: float, optional
        Tolerance at which the smoothing will be considered converged.
        Default value is 1e-3

    weightstr : { 'bisquare', 'cauchy', 'talworth'}, optional
        The type of weighting applied to the data when performing robust smoothing.

    Returns
    -------

    (z, s, exitflag)
        A tuple of the returned results.
    z : numpy array
        The smoothed data.
    s : float
        The value of the smoothing parameter used to perform this smoothing.
    exitflag : {0, -1}
        A return flag of 0 indicates successfuly execution, -1 an error
        (see the log).

    Notes
    -----

    Z = SMOOTHN(Y) automatically smoothes the uniformly-sampled array Y. Y
    can be any n-D noisy array (time series, images, 3D data,...). Non
    finite data (NaN or Inf) are treated as missing values.

    Z = SMOOTHN(Y,S) smoothes the array Y using the smoothing parameter S.
    S must be a real positive scalar. The larger S is, the smoother the
    output will be. If the smoothing parameter S is omitted (see previous
    option) or empty (i.e. S = []), it is automatically determined using
    the generalized cross-validation (GCV) method.

    Z = SMOOTHN(Y,w) or Z = SMOOTHN(Y,w,S) specifies a weighting array w of
    real positive values, that must have the same size as Y. Note that a
    nil weight corresponds to a missing value.

    Robust smoothing
    ----------------
    Z = SMOOTHN(...,'robust') carries out a robust smoothing that minimizes
    the influence of outlying data.

    [Z,S] = SMOOTHN(...) also returns the calculated value for S so that
    you can fine-tune the smoothing subsequently if needed.

    An iteration process is used in the presence of weighted and/or missing
    values. Z = SMOOTHN(...,OPTION_NAME,OPTION_VALUE) smoothes with the
    termination parameters specified by OPTION_NAME and OPTION_VALUE. They
    can contain the following criteria:
        -----------------
        tol_z:       Termination tolerance on Z (default = 1e-3)
                    tol_z must be in ]0,1[
        max_iter:    Maximum number of iterations allowed (default = 100)
        Initial:    Initial value for the iterative process (default =
                    original data)
        -----------------
    Syntax: [Z,...] = SMOOTHN(...,'max_iter',500,'tol_z',1e-4,'Initial',Z0);

    [Z,S,EXITFLAG] = SMOOTHN(...) returns a boolean value EXITFLAG that
    describes the exit condition of SMOOTHN:
        1       SMOOTHN converged.
        0       Maximum number of iterations was reached.

    Class Support
    -------------
    Input array can be numeric or logical. The returned array is of class
    double.

    Notes
    -----
    The n-D (inverse) discrete cosine transform functions <a
    href="matlab:web('http://www.biomecardio.com/matlab/dctn.html')"
    >DCTN</a> and <a
    href="matlab:web('http://www.biomecardio.com/matlab/idctn.html')"
    >IDCTN</a> are required.

    To be made
    ----------
    Estimate the confidence bands (see Wahba 1983, Nychka 1988).

    Reference
    ---------
    Garcia D, Robust smoothing of gridded data in one and higher dimensions
    with missing values. Computational Statistics & Data Analysis, 2010
    <a
    href="matlab:web('http://www.biomecardio.com/pageshtm/publi/csda10.pdf')"
    >PDF download</a>

    Examples:
    --------
    # 1-D example
    x = linspace(0,100,2**8);
    y = cos(x/10)+(x/50)**2 + randn(size(x))/10;
    y[[70, 75, 80]] = [5.5, 5, 6];
    z = smoothn(y); # Regular smoothing
    zr = smoothn(y,'robust'); # Robust smoothing
    subplot(121), plot(x,y,'r.',x,z,'k','LineWidth',2)
    axis square, title('Regular smoothing')
    subplot(122), plot(x,y,'r.',x,zr,'k','LineWidth',2)
    axis square, title('Robust smoothing')

    # 2-D example
    xp = 0:.02:1;
    [x,y] = meshgrid(xp);
    f = exp(x+y) + sin((x-2*y)*3);
    fn = f + randn(size(f))*0.5;
    fs = smoothn(fn);
    subplot(121), surf(xp,xp,fn), zlim([0 8]), axis square
    subplot(122), surf(xp,xp,fs), zlim([0 8]), axis square

    # 2-D example with missing data
    n = 256;
    y0 = peaks(n);
    y = y0 + rand(size(y0))*2;
    I = randperm(n^2);
    y(I(1:n^2*0.5)) = NaN; # lose 1/2 of data
    y(40:90,140:190) = NaN; # create a hole
    z = smoothn(y); # smooth data
    subplot(2,2,1:2), imagesc(y), axis equal off
    title('Noisy corrupt data')
    subplot(223), imagesc(z), axis equal off
    title('Recovered data ...')
    subplot(224), imagesc(y0), axis equal off
    title('... compared with original data')

    # 3-D example
    [x,y,z] = meshgrid(-2:.2:2);
    xslice = [-0.8,1]; yslice = 2; zslice = [-2,0];
    vn = x.*exp(-x.^2-y.^2-z.^2) + randn(size(x))*0.06;
    subplot(121), slice(x,y,z,vn,xslice,yslice,zslice,'cubic')
    title('Noisy data')
    v = smoothn(vn);
    subplot(122), slice(x,y,z,v,xslice,yslice,zslice,'cubic')
    title('Smoothed data')

    # Cardioid
    t = linspace(0,2*pi,1000);
    x = 2*cos(t).*(1-cos(t)) + randn(size(t))*0.1;
    y = 2*sin(t).*(1-cos(t)) + randn(size(t))*0.1;
    z = smoothn(complex(x,y));
    plot(x,y,'r.',real(z),imag(z),'k','linewidth',2)
    axis equal tight

    # Cellular vortical flow
    [x,y] = meshgrid(linspace(0,1,24));
    Vx = cos(2*pi*x+pi/2).*cos(2*pi*y);
    Vy = sin(2*pi*x+pi/2).*sin(2*pi*y);
    Vx = Vx + sqrt(0.05)*randn(24,24); # adding Gaussian noise
    Vy = Vy + sqrt(0.05)*randn(24,24); # adding Gaussian noise
    I = randperm(numel(Vx));
    Vx(I(1:30)) = (rand(30,1)-0.5)*5; # adding outliers
    Vy(I(1:30)) = (rand(30,1)-0.5)*5; # adding outliers
    Vx(I(31:60)) = NaN; # missing values
    Vy(I(31:60)) = NaN; # missing values
    Vs = smoothn(complex(Vx,Vy),'robust'); # automatic smoothing
    subplot(121), quiver(x,y,Vx,Vy,2.5), axis square
    title('Noisy velocity field')
    subplot(122), quiver(x,y,real(Vs),imag(Vs)), axis square
    title('Smoothed velocity field')

    See also SMOOTH, SMOOTH3, DCTN, IDCTN.

    -- Damien Garcia -- 2009/03, revised 2010/11
    Visit my <a
    href="matlab:web('http://www.biomecardio.com/matlab/smoothn.html')
    >website</a> for more details about SMOOTHN

    # Check input arguments
    error(nargchk(1,12,nargin));

    z0=None,w=None,s=None,max_iter=100,tol_z=1e-3
    """

    (y, w) = preprocessing(y, w, sd)

    sizy = y.shape

    # sort axis
    if axis is None:
        axis = tuple(np.arange(y.ndim))

    noe = y.size  # number of elements
    if noe < 2:
        return y, s, EXIT_SUCCESS, W_TOT_DEFAULT

    # ---
    # "Weighting function" criterion
    weightstr = weightstr.lower()
    # ---
    # Weights. Zero weights are assigned to not finite values (Inf or NaN),
    # (Inf/NaN values = missing data).
    is_finite = np.isfinite(y)
    nof = np.sum(is_finite)  # number of finite elements
    # ---
    # Weighted or missing data?
    isweighted = np.any(w != 1)
    # ---
    # Automatic smoothing?
    isauto = not s

    # Creation of the Lambda tensor
    lambda_ = define_lambda(y, axis)

    #  Upper and lower bound for the smoothness parameter
    s_min_bnd, s_max_bnd = smoothness_bounds(y)

    #  Initialize before iterating
    y_tensor_rank = np.sum(np.array(sizy) != 1)  # tensor rank of the y-array
    # ---
    w_tot = w
    # --- Initial conditions for z
    z = initial_z(y, z0, isweighted)
    # ---
    z0 = z
    y[~is_finite] = 0  # arbitrary values for missing y-data
    # ---
    tol = 1.0
    robust_iterative_process = True
    robust_step = 1
    nit = 0
    # --- Error on p. Smoothness parameter s = 10^p
    errp = 0.1
    # opt = optimset('TolX',errp);
    # --- Relaxation factor relaxation_factor: to speedup convergence
    relaxation_factor = 1 + 0.75 * isweighted
    # ??
    #  Main iterative process
    # ---
    xpost = init_xpost(s, s_min_bnd, s_max_bnd, isauto)

    while robust_iterative_process:
        # --- "amount" of weights (see the function GCVscore)
        aow = np.sum(w_tot) / noe  # 0 < aow <= 1
        # ---
        while tol > tol_z and nit < max_iter:
            if verbose:
                LOG.info(f"tol {tol:s} nit {nit:s}")
            nit = nit + 1
            dct_y = dctND(w_tot * (y - z) + z, f=dct)
            if isauto and not np.remainder(np.log2(nit), 1):
                # ---
                # The generalized cross-validation (GCV) method is used.
                # We seek the smoothing parameter s that minimizes the GCV
                # score i.e. s = Argmin(GCVscore).
                # Because this process is time-consuming, it is performed from
                # time to time (when nit is a power of 2)
                # ---
                # errp in here somewhere

                # bounds = [(log10(s_min_bnd),log10(s_max_bnd))]
                # args = (lambda_, aow,dct_y,is_finite,w_tot,y,nof,noe)
                # xpost,f,d = lbfgsb.fmin_l_bfgs_b(gcv,xpost,fprime=None,
                # factr=10., approx_grad=True,bounds=,bounds\
                #   args=args)

                # if we have no clue what value of s to use, better span the
                # possible range to get a reasonable starting point ...
                # only need to do it once though. nS0 is the number of samples
                # used
                if not s0:
                    ss = np.arange(nS0) * (1.0 / (nS0 - 1.0)) * (np.log10(
                        s_max_bnd) - np.log10(s_min_bnd)) + np.log10(s_min_bnd)
                    g = np.zeros_like(ss)
                    for i, p in enumerate(ss):
                        g[i] = gcv(p, lambda_, aow, dct_y, is_finite, w_tot, y,
                                   nof, noe, smoothOrder)
                    xpost = [ss[g == g.min()]]
                else:
                    xpost = [s0]
                bounds = [(np.log10(s_min_bnd), np.log10(s_max_bnd))]
                args = (lambda_, aow, dct_y, is_finite, w_tot, y, nof, noe,
                        smoothOrder)
                xpost, _, _ = lbfgsb.fmin_l_bfgs_b(gcv,
                                                   xpost,
                                                   fprime=None,
                                                   factr=1e7,
                                                   approx_grad=True,
                                                   bounds=bounds,
                                                   args=args)
            s = 10**xpost[0]
            # update the value we use for the initial s estimate
            s0 = xpost[0]

            gamma = gamma_from_lambda(lambda_, s, smoothOrder)

            z = relaxation_factor * dctND(gamma*dct_y, f=idct) +\
                (1 - relaxation_factor) * z
            # if no weighted/missing data => tol=0 (no iteration)
            tol = isweighted * norm(z0 - z) / norm(z)

            z0 = z  # re-initialization
        exitflag = nit < max_iter

        if isrobust:  # -- Robust Smoothing: iteratively re-weighted process
            # --- average leverage
            h = np.sqrt(1 + 16.0 * s)
            h = np.sqrt(1 + h) / np.sqrt(2) / h
            h = h**y_tensor_rank
            # --- take robust weights into account
            w_tot = w * robust_weights(y - z, is_finite, h, weightstr)
            # --- re-initialize for another iterative weighted process
            isweighted = True
            tol = 1
            nit = 0
            # ---
            robust_step = robust_step + 1
            # 3 robust steps are enough.
            robust_iterative_process = robust_step < 3
        else:
            robust_iterative_process = False  # stop the whole process

    #  Warning messages
    # ---
    if isauto:
        limit = ""
        if np.abs(np.log10(s) - np.log10(s_min_bnd)) < errp:
            limit = "lower"
        elif np.abs(np.log10(s) - np.log10(s_max_bnd)) < errp:
            limit = "upper"
        warning(f"smoothn:S{limit.capitalize()}Bound", [
            f"s = {s:.3f}: the {limit} bound for s has been reached. " +
            "Put s as an input variable if required."
        ])

    return z, s, exitflag, w_tot
Example #22
0
'''

from scipy.optimize.lbfgsb import fmin_l_bfgs_b

bou = np.array([[-1, 1], [0, 1]])


def f(x):
    x_1 = x[0]
    x_2 = x[1]
    return np.sin(x_1 + x_2) * x_1


print(len(bou))
x0 = [0.2, 0.3]
x_1, f_1, d_1 = fmin_l_bfgs_b(f, x0, bounds=bou, maxfun=1500, approx_grad=True)
print(x_1)
print(f_1)
print(d_1)
'''

def f_d(x_1, x_2):
    return pdist(np.vstack([x_1, x_2]))



dis = [f_d(x[i], y) for i in range(x.shape[0])]

print(dis)

Example #23
0
    def optimize(self, parallel=False, parallel_verbose=0, **kwargs):
        # -- Getting parameters --
        # ------------------------
        #Upper level inputs
        mask_type = kwargs.get("mask_type", "")
        learn_mask = kwargs.get("learn_mask", True)
        learn_alpha = kwargs.get("learn_alpha", True)
        l0 = kwargs.get("l0", None)
        p0 = kwargs.get("p0", None)
        shots = False

        # -- Checking inputs --
        if mask_type in ["cartesian", "radial_CO"]:
            if l0 is None:
                raise ValueError(
                    "an initial mask parametrisation l0 must be given")
            shots = True
        else:
            if p0 is None: raise ValueError("an initial mask p0 must be given")

        t1 = time.time()
        self.niter = 0

        # -- Initializing --
        # ------------------
        print("Multithread:", parallel)

        if shots:
            n = len(l0) - 1
            self.alphas = [l0[-1]]
        else:
            n = len(p0) - 1
            self.alphas = [p0[-1]]

        self.energy_upper = [
            E(lk=l0,
              pk=p0,
              mask_type=mask_type,
              images=self.images,
              kspace_data=self.kspace_data,
              samples=self.samples,
              wavelet_name=self.wavelet_name,
              wavelet_scale=self.wavelet_scale,
              param=self.param,
              verbose=self.verbose,
              const=self.const,
              n_rad=self.n_rad,
              parallel=parallel)
        ]

        # -- Using L-BFGS-B --
        # --------------------
        if shots:
            #Optimize l
            lf, _, _ = fmin_l_bfgs_b(
                lambda x: E(lk=x,
                            mask_type=mask_type,
                            images=self.images,
                            kspace_data=self.kspace_data,
                            samples=self.samples,
                            wavelet_name=self.wavelet_name,
                            wavelet_scale=self.wavelet_scale,
                            param=self.param,
                            verbose=self.verbose,
                            const=self.const,
                            n_rad=self.n_rad,
                            parallel=parallel,
                            parallel_verbose=parallel_verbose),
                l0,
                lambda x: grad_E(lk=x,
                                 mask_type=mask_type,
                                 images=self.images,
                                 kspace_data=self.kspace_data,
                                 samples=self.samples,
                                 wavelet_name=self.wavelet_name,
                                 wavelet_scale=self.wavelet_scale,
                                 param=self.param,
                                 verbose=self.verbose,
                                 const=self.const,
                                 n_rad=self.n_rad,
                                 learn_mask=learn_mask,
                                 learn_alpha=learn_alpha,
                                 parallel=parallel,
                                 parallel_verbose=parallel_verbose),
                bounds=[(0, 1)] * n + [(1e-10, np.inf)],
                pgtol=self.pgtol,
                maxfun=self.maxfun,
                maxiter=self.maxiter,
                maxls=2,
                callback=lambda x: self.fcall(x, mask_type))

        else:
            #Optimize p directly
            pf, _, _ = fmin_l_bfgs_b(
                lambda x: E(pk=x,
                            mask_type=mask_type,
                            images=self.images,
                            kspace_data=self.kspace_data,
                            samples=self.samples,
                            wavelet_name=self.wavelet_name,
                            wavelet_scale=self.wavelet_scale,
                            param=self.param,
                            verbose=self.verbose,
                            const=self.const,
                            n_rad=self.n_rad,
                            parallel=parallel,
                            parallel_verbose=parallel_verbose),
                p0,
                lambda x: grad_E(pk=x,
                                 mask_type=mask_type,
                                 images=self.images,
                                 kspace_data=self.kspace_data,
                                 samples=self.samples,
                                 wavelet_name=self.wavelet_name,
                                 wavelet_scale=self.wavelet_scale,
                                 param=self.param,
                                 verbose=self.verbose,
                                 const=self.const,
                                 n_rad=self.n_rad,
                                 learn_mask=learn_mask,
                                 learn_alpha=learn_alpha,
                                 parallel=parallel,
                                 parallel_verbose=parallel_verbose),
                bounds=[(0, 1)] * n + [(1e-10, np.inf)],
                pgtol=self.pgtol,
                maxfun=self.maxfun,
                maxiter=self.maxiter,
                maxls=2,
                callback=lambda x: self.fcall(x, mask_type))

        # -- Returning output --
        # ----------------------
        print("\033[1m" + f"\nFINISHED IN {time.time()-t1} SECONDS\n" +
              "\033[0m")
        if shots: return lf, self.energy_upper, self.alphas
        else: return pf, self.energy_upper, self.alphas
Example #24
0
def train():

    np.random.seed(131742)
    #get sentences, trees and labels
    nExamples = -1
    print "loading data.."
    rnnData = RNNDataCorpus()
    rnnData.load_data(load_file=config.train_data, nExamples=nExamples)

    #initialize params
    print "initializing params"
    params = Params(data=rnnData, wordSize=50, rankWo=2)

    #define theta
    #one vector for all the parameters of mvrnn model:  W, Wm, Wlabel, L, Lm
    n = params.wordSize
    fanIn = params.fanIn
    nWords = params.nWords
    nLabels = params.categories
    rank = params.rankWo
    Wo = 0.01 * np.random.randn(n + 2 * n * rank, nWords)  #Lm, as in paper
    Wo[:n, :] = np.ones((n, Wo.shape[1]))  #Lm, as in paper
    Wcat = 0.005 * np.random.randn(nLabels, fanIn)  #Wlabel, as in paper
    #    Wv = 0.01*np.random.randn(n, nWords)
    #    WO = 0.01*np.random.randn(n, 2*n)
    #    W = 0.01*np.random.randn(n, 2*n+1)

    #load pre-trained weights here
    mats = sio.loadmat(config.pre_trained_weights)
    Wv = mats.get('Wv')  #L, as in paper
    W = mats.get('W')  #W, as in paper
    WO = mats.get('WO')  #Wm, as in paper

    sentencesIdx = np.arange(rnnData.ndoc())
    np.random.shuffle(sentencesIdx)
    nTrain = 4 * len(sentencesIdx) / 5
    trainSentIdx = sentencesIdx[0:nTrain]
    testSentIdx = sentencesIdx[nTrain:]
    batchSize = 5
    nBatches = len(trainSentIdx) / batchSize
    evalFreq = 5  #evaluate after every 5 minibatches
    nTestSentEval = 50  #number of test sentences to be evaluated

    rnnData_train = RNNDataCorpus()
    rnnData.copy_into_minibatch(rnnData_train, trainSentIdx)

    rnnData_test = RNNDataCorpus()
    if (len(testSentIdx) > nTestSentEval):
        #        np.random.shuffle(testSentIdx)  #choose random test examples
        thisTestSentIdx = testSentIdx[:nTestSentEval]
    else:
        thisTestSentIdx = testSentIdx
    rnnData.copy_into_minibatch(rnnData_test, thisTestSentIdx)

    #    [Wv_test, Wo_test, _] = getRelevantWords(rnnData_test, Wv,Wo,params)
    [Wv_trainTest, Wo_trainTest, all_train_idx
     ] = getRelevantWords(rnnData, Wv, Wo,
                          params)  #sets nWords_reduced, returns new arrays
    theta = np.concatenate((W.flatten(), WO.flatten(), Wcat.flatten(),
                            Wv_trainTest.flatten(), Wo_trainTest.flatten()))

    #optimize
    print "starting training..."
    nIter = 100
    rnnData_minibatch = RNNDataCorpus()
    for i in range(nIter):
        #train in minibatches
        #        ftrain = np.zeros(nBatches)
        #        for ibatch in range(nBatches):
        #            set_minibatch(rnnData, rnnData_minibatch, ibatch, nBatches, trainSentIdx)

        #            print 'Iteration: ', i, ' minibatch: ', ibatch
        tunedTheta, fbatch_train, _ = lbfgsb.fmin_l_bfgs_b(
            func=costFn,
            x0=theta,
            fprime=None,
            args=(rnnData_train, params),
            approx_grad=0,
            bounds=None,
            m=5,
            factr=1000000000000000.0,
            pgtol=1.0000000000000001e-5,
            epsilon=1e-08,
            iprint=3,
            maxfun=1,
            disp=0)

        #map parameters back
        W[:, :], WO[:, :], Wcat[:, :], Wv_trainTest, Wo_trainTest = unroll_theta(
            tunedTheta, params)
        Wv[:, all_train_idx] = Wv_trainTest
        Wo[:, all_train_idx] = Wo_trainTest

        #        ftrain[ibatch] = fbatch_train
        theta = tunedTheta  #for next iteration

        print "========================================"
        print "XXXXXXIteration ", i,
        print "Average cost: ", np.average(fbatch_train)
        evaluate(Wv, Wo, W, WO, Wcat, params, rnnData_test)
        print "========================================"

        #save weights
        save_dict = {'Wv': Wv, 'Wo': Wo, 'Wcat': Wcat, 'W': W, 'WO': WO}
        sio.savemat(config.saved_params_file + '_lbfgs_iter' + str(i),
                    mdict=save_dict)
        print "saved tuned theta. "
Example #25
0
    def fit(self, X, y):
        """Fit the model according to the given training data.

        Parameters
        ----------
        X : List of list of ints. Each list of ints represent a training example. Each int in that list
            is the index of a one-hot encoded feature.

        y : array-like, shape (n_samples,)
            Target vector relative to X.

        Returns
        -------
        self : object
            Returns self.
        """
        classes = list(set(y))
        num_classes = len(classes)
        self.classes_ = classes
        if self.transitions is None:
            self.transitions = self._create_default_transitions(
                num_classes, self.num_states)

        # Initialise the parameters
        _, num_features = X[0].shape
        num_transitions, _ = self.transitions.shape
        numpy.random.seed(self._random_seed)
        if self.state_parameters is None:
            self.state_parameters = numpy.random.standard_normal(
                (num_features, self.num_states,
                 num_classes)) * self.state_parameter_noise
        if self.transition_parameters is None:
            self.transition_parameters = numpy.random.standard_normal(
                (num_transitions)) * self.transition_parameter_noise

        initial_parameter_vector = self._stack_parameters(
            self.state_parameters, self.transition_parameters)
        function_evaluations = [0]

        def objective_function(parameter_vector,
                               batch_start_index=0,
                               batch_end_index=-1):
            ll = 0.0
            gradient = numpy.zeros_like(parameter_vector)
            state_parameters, transition_parameters = self._unstack_parameters(
                parameter_vector)
            for x, ty in zip(X, y)[batch_start_index:batch_end_index]:
                y_index = classes.index(ty)
                dll, dgradient_state, dgradient_transition = log_likelihood(
                    x, y_index, state_parameters, transition_parameters,
                    self.transitions)
                dgradient = self._stack_parameters(dgradient_state,
                                                   dgradient_transition)
                ll += dll
                gradient += dgradient

            parameters_without_bias = numpy.array(
                parameter_vector
            )  # exclude the bias parameters from being regularized
            parameters_without_bias[0] = 0
            ll -= self.l2_regularization * numpy.dot(parameters_without_bias.T,
                                                     parameters_without_bias)
            gradient = gradient.flatten(
            ) - 2.0 * self.l2_regularization * parameters_without_bias

            if batch_start_index == 0:
                function_evaluations[0] += 1
                if self._verbosity > 0 and function_evaluations[
                        0] % self._verbosity == 0:
                    print '{:10} {:10.2f} {:10.2f}'.format(
                        function_evaluations[0], ll, sum(abs(gradient)))
            return -ll, -gradient

        # If the stochastic gradient stepsize is defined, do 1 epoch of SGD to initialize the parameters.
        if self._sgd_stepsize:
            total_nll = 0.0
            for i in range(len(y)):
                nll, ngradient = objective_function(initial_parameter_vector,
                                                    i, i + 1)
                total_nll += nll
                initial_parameter_vector -= ngradient * self._sgd_stepsize
                if self._sgd_verbosity > 0:
                    if i % self._sgd_verbosity == 0:
                        print '{:10} {:10.2f} {:10.2f}'.format(
                            i, -total_nll / (i + 1) * len(y),
                            sum(abs(ngradient)))

        self._optimizer_result = fmin_l_bfgs_b(objective_function,
                                               initial_parameter_vector,
                                               **self.optimizer_kwargs)
        self.state_parameters, self.transition_parameters = self._unstack_parameters(
            self._optimizer_result[0])
        return self
Example #26
0
    def learn(self, X, y):
        """
        Learn the model from the given data.

        :param X: the attribute data
        :type X: numpy.array

        :param y: the class variable data
        :type y: numpy.array

        """
        def rand(eps):
            """Return random number in interval [-eps, eps]."""
            return rnd.random() * 2 * eps - eps

        def g_func(z):
            """The sigmoid (logistic) function."""
            return 1. / (1. + np.exp(-z))

        def h_func(thetas, x):
            """The model function."""
            a = np.array([[1.] + list(x)]).T # Initialize a
            for l in range(1, len(thetas) + 1): # Forward propagation
                a = np.vstack((np.array([[1.]]), g_func(thetas[l - 1].T.dot(a))))
            return a[1:]

        def llog(val):
            "The limited logarithm."
            e = 1e-10
            return np.log(np.clip(val, e, 1. - e))
            
        def unroll(thetas):
            """Unrolls a list of thetas into vector."""
            sd = [m.shape for m in thetas] # Keep the shape data
            thetas = np.concatenate([theta.reshape(np.prod(theta.shape))for theta in thetas])
            return thetas, sd

        def roll(thetas, sd):
            """Rolls a vector of thetas back into list."""
            thetas = np.split(thetas, [sum(np.prod(s) for s in sd[:i]) + np.prod(sd[i]) for i in range(len(sd) - 1)])
            return [np.reshape(theta, sd[i]) for i, theta in enumerate(thetas)]

        def cost(thetas, X, y, sd, S, lambda_):
            """The cost function of the neural network."""
            thetas = roll(thetas, sd)
            m, _ = X.shape
            L = len(S)
            reg_factor = (lambda_ / float(2 * m)) * sum(sum(sum(thetas[l][1:, :]**2)) for l in range(L - 1))
            cost = (-1. / float(m)) * sum(sum(self._classes_map[y[s]] * llog(h_func(thetas, X[s])) + (1. - self._classes_map[y[s]]) * llog(1. - h_func(thetas, X[s]))) for s in range(m)) + reg_factor
            if self._verbose: print "Current value of cost func.: " + str(cost[0])
            return cost[0]
        
        def grad(thetas, X, y, sd, S, lambda_):
            """The gradient (derivate) function which includes the back
            propagation algorithm."""
            thetas = roll(thetas, sd)
            m, _ = X.shape
            L = len(S)
            d = [np.array([[0. for j in range(S[l + 1])] for i in range(S[l] + 1)]) for l in range(L - 1)] # Initialize the delta matrix
            
            for s in range(m):
                a = [np.array([[1.] + list(X[s])]).T] # Initialize a (only a, d & theta matrices have 1 more element in columns, biases)

                for l in range(1, L): # Forward propagation
                    a.append(np.vstack((np.array([[1.]]), g_func(thetas[l - 1].T.dot(a[l - 1])))))
                # TODO
                # Softmax: treat last a column differently
                #ez = np.exp(thetas[L - 2].T.dot(a[L - 2]))
                #sez = sum(ez)
                #a.append(np.vstack((np.array([[1]]), ez / sez)))
                
                deltas = [None for l in range(L - 1)] + [a[-1][1:] - self._classes_map[y[s]]]
                for l in range(L - 2, 0, -1): # Backward propagation
                    deltas[l] = (thetas[l].dot(deltas[l + 1]) * (a[l] * (1. - a[l])))[1:]

                for l in range(L - 1):
                    d[l] = d[l] + a[l].dot(deltas[l + 1].T)
            
            D = [(1. / float(m)) * d[l] + lambda_ * thetas[l] for l in range(L - 1)]
            D = [Di - lambda_ * np.vstack((thetas[l][0], np.zeros((Di.shape[0] - 1, Di.shape[1])))) for l, Di in enumerate(D)] # Where i = 0, don't use regularization
            D, _ = unroll(D)
            return D

        def gradApprox(thetas, X, y, sd, S, lambda_):
            """Approximate the gradient of the cost function
            (only used for debugging, not in final version)."""
            eps = 1e-14
            return (grad(thetas + eps, X, y, sd, S, lambda_) - grad(thetas - eps, X, y, sd, S, lambda_)) / float(2 * eps)
        
        # Set the random seed
        rnd.seed(self._seed)
        
        # Initialize the final layer of neural net (outputs)
        self._classes = list(set(y))
        for i, cl in enumerate(self._classes):
            self._classes_map[cl] = np.zeros((len(self._classes), 1))
            self._classes_map[cl][i] = 1.
            
        S = [len(X[0])] + self._hl + [len(self._classes)] # Complete information about levels
        L = len(S)
        thetas0 = [np.array([[rand(self._eps) for j in range(S[l + 1])] for i in range(S[l] + 1)]) for l in range(L - 1)] # Initialize the thetas matrix
        thetas0, sd = unroll(thetas0)
        #return grad(thetas0, X, y, sd, S, self._lambda), gradApprox(thetas0, X, y, sd, S, self._lambda) # For testing

        # The L-BFGS-B bounds parameter is redefined: input is (lower_bound, upper_bound) instead of array of bounds for each theta parameter
        if self._opt_args != None and "bounds" in self._opt_args:
            bounds = [self._opt_args["bounds"] for i in range(len(thetas0))]
            self._opt_args["bounds"] = bounds
            
        self._thetas, self._cost, _ = scp.fmin_l_bfgs_b(cost, thetas0, grad, args = (X, y, sd, S, self._lambda), **self._opt_args)
        self._thetas = roll(self._thetas, sd)
        self._cost = float(self._cost)
        self._can_classify = True
Example #27
0
def binary_debug(svm, data,
        l2_regularization=1e-3,
        dtype='float64',
        cost_fn='L2Huber',
        bfgs_factr=1e11,  # 1e7 for moderate tolerance, 1e12 for low
        bfgs_maxfun=1000,
        decisions=None
        ):
    n_features, = svm.weights.shape
    X, y = data

    assert set(y) == set([-1, 1])
    _X = theano.shared(X.astype(dtype), allow_downcast=True, borrow=True)
    _yvecs = theano.shared(y.astype(dtype), allow_downcast=True, borrow=True)

    sgd_params = tensor.vector(dtype=dtype)

    sgd_weights = sgd_params[:n_features]
    sgd_bias = sgd_params[n_features]

    margin = _yvecs * (tensor.dot(_X, sgd_weights)
            #+ sgd_bias
            )

    # XXX REFACTOR
    if cost_fn == 'L2Half':
        losses = tensor.maximum(0, 1 - margin) ** 2
    elif cost_fn == 'L2Huber':
        # "Huber-ized" L2-SVM
        losses = tensor.switch(
                margin > -1,
                # -- smooth part
                tensor.maximum(0, 1 - margin) ** 2,
                # -- straight part
                -4 * margin)
    elif cost_fn == 'Hinge':
        losses = tensor.maximum(0, 1 - margin)
    else:
        raise ValueError('invalid cost-fn', cost_fn)

    l2_cost = .5 * l2_regularization * tensor.dot(
            sgd_weights, sgd_weights)

    cost = losses.mean() + l2_cost  + sgd_bias ** 2
    dcost_dparams = tensor.grad(cost, sgd_params)

    _f_df = theano.function([sgd_params], [cost, dcost_dparams])

    def flatten_svm(obj):
        # Note this is different from multi-class case because bias is scalar
        return np.concatenate([obj.weights.flatten(), [obj.bias]])

    def f(p):
        c, d = _f_df(p.astype(dtype))
        return c.astype('float64'), d.astype('float64')

    params = np.zeros(n_features + 1)
    params[:n_features] = svm.weights
    params[n_features] = svm.bias

    best, bestval, info_dct = fmin_l_bfgs_b(f,
            params,
            iprint=1,
            factr=1e-5,
            maxfun=bfgs_maxfun,
            m=50,
            pgtol=1e-5,
            )
    best_svm = copy.deepcopy(svm)
    best_svm.weights = np.array(best[:n_features], dtype=dtype)
    best_svm.bias = float(best[n_features])

    # why ???
    _X.set_value(np.ones((2, 2), dtype=dtype))
    _yvecs.set_value(np.ones(2, dtype=dtype))
    return best_svm
Example #28
0
import openbabel
import numpy

from scipy.optimize.lbfgsb import fmin_l_bfgs_b

def f(x):
    return x[0]**2 + x[1]**2

def g(x):
    return numpy.array([2*x[0], 2*x[1]])

x0 = numpy.array([3,1])

opt, energy, dict = fmin_l_bfgs_b(f, x0, fprime=g)

print opt
Example #29
0
 def optimise_lbfgs(self, start):
     print
     print "***** LBFGS OPTIMISATION  *****"
     x, f, d = fmin_l_bfgs_b(self.objective, start, fprime=self.grad, pgtol=1e-09, iprint=0)
     return x
Example #30
0
def smoothn(y,nS0=10,axis=None,smoothOrder=2.0,sd=None,verbose=False,\
	s0=None,z0=None,isrobust=False,W=None,s=None,MaxIter=100,TolZ=1e-3,weightstr='bisquare'):
  '''
   function [z,s,exitflag,Wtot] = smoothn(varargin)

   SMOOTHN Robust spline smoothing for 1-D to N-D data.
   SMOOTHN provides a fast, automatized and robust discretized smoothing
   spline for data of any dimension.

   Z = SMOOTHN(Y) automatically smoothes the uniformly-sampled array Y. Y
   can be any N-D noisy array (time series, images, 3D data,...). Non
   finite data (NaN or Inf) are treated as missing values.

   Z = SMOOTHN(Y,S) smoothes the array Y using the smoothing parameter S.
   S must be a real positive scalar. The larger S is, the smoother the
   output will be. If the smoothing parameter S is omitted (see previous
   option) or empty (i.e. S = []), it is automatically determined using
   the generalized cross-validation (GCV) method.

   Z = SMOOTHN(Y,W) or Z = SMOOTHN(Y,W,S) specifies a weighting array W of
   real positive values, that must have the same size as Y. Note that a
   nil weight corresponds to a missing value.

   Robust smoothing
   ----------------
   Z = SMOOTHN(...,'robust') carries out a robust smoothing that minimizes
   the influence of outlying data.

   [Z,S] = SMOOTHN(...) also returns the calculated value for S so that
   you can fine-tune the smoothing subsequently if needed.

   An iteration process is used in the presence of weighted and/or missing
   values. Z = SMOOTHN(...,OPTION_NAME,OPTION_VALUE) smoothes with the
   termination parameters specified by OPTION_NAME and OPTION_VALUE. They
   can contain the following criteria:
       -----------------
       TolZ:       Termination tolerance on Z (default = 1e-3)
                   TolZ must be in ]0,1[
       MaxIter:    Maximum number of iterations allowed (default = 100)
       Initial:    Initial value for the iterative process (default =
                   original data)
       -----------------
   Syntax: [Z,...] = SMOOTHN(...,'MaxIter',500,'TolZ',1e-4,'Initial',Z0);

   [Z,S,EXITFLAG] = SMOOTHN(...) returns a boolean value EXITFLAG that
   describes the exit condition of SMOOTHN:
       1       SMOOTHN converged.
       0       Maximum number of iterations was reached.

   Class Support
   -------------
   Input array can be numeric or logical. The returned array is of class
   double.

   Notes
   -----
   The N-D (inverse) discrete cosine transform functions <a
   href="matlab:web('http://www.biomecardio.com/matlab/dctn.html')"
   >DCTN</a> and <a
   href="matlab:web('http://www.biomecardio.com/matlab/idctn.html')"
   >IDCTN</a> are required.

   To be made
   ----------
   Estimate the confidence bands (see Wahba 1983, Nychka 1988).

   Reference
   --------- 
   Garcia D, Robust smoothing of gridded data in one and higher dimensions
   with missing values. Computational Statistics & Data Analysis, 2010. 
   <a
   href="matlab:web('http://www.biomecardio.com/pageshtm/publi/csda10.pdf')">PDF download</a>

   Examples:
   --------
   # 1-D example
   x = linspace(0,100,2**8);
   y = cos(x/10)+(x/50)**2 + randn(size(x))/10;
   y[[70, 75, 80]] = [5.5, 5, 6];
   z = smoothn(y); # Regular smoothing
   zr = smoothn(y,'robust'); # Robust smoothing
   subplot(121), plot(x,y,'r.',x,z,'k','LineWidth',2)
   axis square, title('Regular smoothing')
   subplot(122), plot(x,y,'r.',x,zr,'k','LineWidth',2)
   axis square, title('Robust smoothing')

   # 2-D example
   xp = 0:.02:1;
   [x,y] = meshgrid(xp);
   f = exp(x+y) + sin((x-2*y)*3);
   fn = f + randn(size(f))*0.5;
   fs = smoothn(fn);
   subplot(121), surf(xp,xp,fn), zlim([0 8]), axis square
   subplot(122), surf(xp,xp,fs), zlim([0 8]), axis square

   # 2-D example with missing data
   n = 256;
   y0 = peaks(n);
   y = y0 + rand(size(y0))*2;
   I = randperm(n^2);
   y(I(1:n^2*0.5)) = NaN; # lose 1/2 of data
   y(40:90,140:190) = NaN; # create a hole
   z = smoothn(y); # smooth data
   subplot(2,2,1:2), imagesc(y), axis equal off
   title('Noisy corrupt data')
   subplot(223), imagesc(z), axis equal off
   title('Recovered data ...')
   subplot(224), imagesc(y0), axis equal off
   title('... compared with original data')

   # 3-D example
   [x,y,z] = meshgrid(-2:.2:2);
   xslice = [-0.8,1]; yslice = 2; zslice = [-2,0];
   vn = x.*exp(-x.^2-y.^2-z.^2) + randn(size(x))*0.06;
   subplot(121), slice(x,y,z,vn,xslice,yslice,zslice,'cubic')
   title('Noisy data')
   v = smoothn(vn);
   subplot(122), slice(x,y,z,v,xslice,yslice,zslice,'cubic')
   title('Smoothed data')

   # Cardioid
   t = linspace(0,2*pi,1000);
   x = 2*cos(t).*(1-cos(t)) + randn(size(t))*0.1;
   y = 2*sin(t).*(1-cos(t)) + randn(size(t))*0.1;
   z = smoothn(complex(x,y));
   plot(x,y,'r.',real(z),imag(z),'k','linewidth',2)
   axis equal tight

   # Cellular vortical flow
   [x,y] = meshgrid(linspace(0,1,24));
   Vx = cos(2*pi*x+pi/2).*cos(2*pi*y);
   Vy = sin(2*pi*x+pi/2).*sin(2*pi*y);
   Vx = Vx + sqrt(0.05)*randn(24,24); # adding Gaussian noise
   Vy = Vy + sqrt(0.05)*randn(24,24); # adding Gaussian noise
   I = randperm(numel(Vx));
   Vx(I(1:30)) = (rand(30,1)-0.5)*5; # adding outliers
   Vy(I(1:30)) = (rand(30,1)-0.5)*5; # adding outliers
   Vx(I(31:60)) = NaN; # missing values
   Vy(I(31:60)) = NaN; # missing values
   Vs = smoothn(complex(Vx,Vy),'robust'); # automatic smoothing
   subplot(121), quiver(x,y,Vx,Vy,2.5), axis square
   title('Noisy velocity field')
   subplot(122), quiver(x,y,real(Vs),imag(Vs)), axis square
   title('Smoothed velocity field')

   See also SMOOTH, SMOOTH3, DCTN, IDCTN.

   -- Damien Garcia -- 2009/03, revised 2010/11
   Visit my <a
   href="matlab:web('http://www.biomecardio.com/matlab/smoothn.html')">website</a> for more details about SMOOTHN 

  # Check input arguments
  error(nargchk(1,12,nargin));

  z0=None,W=None,s=None,MaxIter=100,TolZ=1e-3
  '''
  if type(y) == ma.core.MaskedArray:  # masked array
    is_masked = True
    mask = y.mask
    y = np.array(y)
    y[mask] = 0.
    if W != None:
      W  = np.array(W)
      W[mask] = 0.
    if sd != None:
      W = np.array(1./sd**2)
      W[mask] = 0.
      sd = None
    y[mask] = np.nan
    
  if sd != None:
    sd_ = np.array(sd)
    mask = (sd > 0.)
    W = np.zeros_like(sd_)
    W[mask] = 1./sd_[mask]**2
    sd = None

  if W != None:
    W = W/W.max()

  sizy = y.shape;

  # sort axis
  if axis == None:
    axis = tuple(np.arange(y.ndim))

  noe = y.size # number of elements
  if noe<2:
    z = y
    exitflag = 0;Wtot=0
    return z,s,exitflag,Wtot
  #---
  # Smoothness parameter and weights
  #if s != None:
  #  s = []
  if W == None:
    W = ones(sizy);

  #if z0 == None:
  #  z0 = y.copy()

  #---
  # "Weighting function" criterion
  weightstr = weightstr.lower()
  #---
  # Weights. Zero weights are assigned to not finite values (Inf or NaN),
  # (Inf/NaN values = missing data).
  IsFinite = np.array(isfinite(y)).astype(bool);
  nof = IsFinite.sum() # number of finite elements
  W = W*IsFinite;
  if any(W<0):
    error('smoothn:NegativeWeights',\
        'Weights must all be >=0')
  else:
      #W = W/np.max(W)
      pass
  #---
  # Weighted or missing data?
  isweighted = any(W != 1);
  #---
  # Robust smoothing?
  #isrobust
  #---
  # Automatic smoothing?
  isauto = not s;
  #---
  # DCTN and IDCTN are required
  try:
    from scipy.fftpack.realtransforms import dct,idct
  except:
    z = y
    exitflag = -1;Wtot=0
    return z,s,exitflag,Wtot

  ## Creation of the Lambda tensor
  #---
  # Lambda contains the eingenvalues of the difference matrix used in this
  # penalized least squares process.
  axis = tuple(np.array(axis).flatten())
  d =  y.ndim;
  Lambda = zeros(sizy);
  for i in axis:
    # create a 1 x d array (so e.g. [1,1] for a 2D case
    siz0 = ones((1,y.ndim))[0];
    siz0[i] = sizy[i];
    # cos(pi*(reshape(1:sizy(i),siz0)-1)/sizy(i)))
    # (arange(1,sizy[i]+1).reshape(siz0) - 1.)/sizy[i]
    Lambda = Lambda + (cos(pi*(arange(1,sizy[i]+1) - 1.)/sizy[i]).reshape(siz0))
    #else:
    #  Lambda = Lambda + siz0
  Lambda = -2.*(len(axis)-Lambda);
  if not isauto:
    Gamma = 1./(1+(s*abs(Lambda))**smoothOrder);

  ## Upper and lower bound for the smoothness parameter
  # The average leverage (h) is by definition in [0 1]. Weak smoothing occurs
  # if h is close to 1, while over-smoothing appears when h is near 0. Upper
  # and lower bounds for h are given to avoid under- or over-smoothing. See
  # equation relating h to the smoothness parameter (Equation #12 in the
  # referenced CSDA paper).
  N = sum(array(sizy) != 1); # tensor rank of the y-array
  hMin = 1e-6; hMax = 0.99;
  # (h/n)**2 = (1 + a)/( 2 a)
  # a = 1/(2 (h/n)**2 -1) 
  # where a = sqrt(1 + 16 s)
  # (a**2 -1)/16
  try:
    sMinBnd = np.sqrt((((1+sqrt(1+8*hMax**(2./N)))/4./hMax**(2./N))**2-1)/16.);
    sMaxBnd = np.sqrt((((1+sqrt(1+8*hMin**(2./N)))/4./hMin**(2./N))**2-1)/16.);
  except:
    sMinBnd = None
    sMaxBnd = None
  ## Initialize before iterating
  #---
  Wtot = W;
  #--- Initial conditions for z
  if isweighted:
    #--- With weighted/missing data
    # An initial guess is provided to ensure faster convergence. For that
    # purpose, a nearest neighbor interpolation followed by a coarse
    # smoothing are performed.
    #---
    if z0 != None: # an initial guess (z0) has been provided
        z = z0;
    else:
        z = y #InitialGuess(y,IsFinite);
        z[~IsFinite] = 0.
  else:
    z = zeros(sizy);
  #---
  z0 = z;
  y[~IsFinite] = 0; # arbitrary values for missing y-data
  #---
  tol = 1.;
  RobustIterativeProcess = True;
  RobustStep = 1;
  nit = 0;
  #--- Error on p. Smoothness parameter s = 10^p
  errp = 0.1;
  #opt = optimset('TolX',errp);
  #--- Relaxation factor RF: to speedup convergence
  RF = 1 + 0.75*isweighted;
  # ??
  ## Main iterative process
  #---
  if isauto:
    try:
      xpost = array([(0.9*log10(sMinBnd) + log10(sMaxBnd)*0.1)])
    except:
      array([100.])
  else:
    xpost = array([log10(s)])
  while RobustIterativeProcess:
    #--- "amount" of weights (see the function GCVscore)
    aow = sum(Wtot)/noe; # 0 < aow <= 1
    #---
    while tol>TolZ and nit<MaxIter:
        if verbose:
          print 'tol',tol,'nit',nit
        nit = nit+1;
        DCTy = dctND(Wtot*(y-z)+z,f=dct,axis=axis);
        if isauto and not remainder(log2(nit),1):
            #---
            # The generalized cross-validation (GCV) method is used.
            # We seek the smoothing parameter s that minimizes the GCV
            # score i.e. s = Argmin(GCVscore).
            # Because this process is time-consuming, it is performed from
            # time to time (when nit is a power of 2)
            #---
            # errp in here somewhere
            
            #xpost,f,d = lbfgsb.fmin_l_bfgs_b(gcv,xpost,fprime=None,factr=10.,\
            #   approx_grad=True,bounds=[(log10(sMinBnd),log10(sMaxBnd))],\
            #   args=(Lambda,aow,DCTy,IsFinite,Wtot,y,nof,noe))

            # if we have no clue what value of s to use, better span the
            # possible range to get a reasonable starting point ...
            # only need to do it once though. nS0 is teh number of samples used
            if not s0:
              ss = np.arange(nS0)*(1./(nS0-1.))*(log10(sMaxBnd)-log10(sMinBnd))+ log10(sMinBnd)
              g = np.zeros_like(ss)
              for i,p in enumerate(ss):
                g[i] = gcv(p,Lambda,aow,DCTy,IsFinite,Wtot,y,nof,noe,smoothOrder,axis)
                #print 10**p,g[i]
              xpost = [np.median(ss[g==g.min()])]
              #print '==============='
              #print nit,tol,g.min(),xpost[0],s
              #print '==============='
            else:
              xpost = [s0]
            xpost,f,d = lbfgsb.fmin_l_bfgs_b(gcv,xpost,fprime=None,factr=10.,\
               approx_grad=True,bounds=[(log10(sMinBnd),log10(sMaxBnd))],\
               args=(Lambda,aow,DCTy,IsFinite,Wtot,y,nof,noe,smoothOrder,axis))
        s = 10**xpost[0];
        # update the value we use for the initial s estimate
        s0 = xpost[0]

        Gamma = 1./(1+(s*abs(Lambda))**smoothOrder);

        z = RF*dctND(Gamma*DCTy,f=idct,axis=axis) + (1-RF)*z;
        # if no weighted/missing data => tol=0 (no iteration)
        tol = isweighted*norm(z0-z)/norm(z);
       
        z0 = z; # re-initialization
    exitflag = nit<MaxIter;

    if isrobust: #-- Robust Smoothing: iteratively re-weighted process
        #--- average leverage
        h = sqrt(1+16.*s); 
        h = sqrt(1+h)/sqrt(2)/h; 
        h = h**N;
        #--- take robust weights into account
        Wtot = W*RobustWeights(y-z,IsFinite,h,weightstr);
        #--- re-initialize for another iterative weighted process
        isweighted = True; tol = 1; nit = 0; 
        #---
        RobustStep = RobustStep+1;
        RobustIterativeProcess = RobustStep<3; # 3 robust steps are enough.
    else:
        RobustIterativeProcess = False; # stop the whole process

  ## Warning messages
  #---
  if isauto:
    if abs(log10(s)-log10(sMinBnd))<errp:
        warning('MATLAB:smoothn:SLowerBound',\
            ['s = %.3f '%(s) + ': the lower bound for s '\
            + 'has been reached. Put s as an input variable if required.'])
    elif abs(log10(s)-log10(sMaxBnd))<errp:
        warning('MATLAB:smoothn:SUpperBound',\
            ['s = %.3f '%(s) + ': the upper bound for s '\
            + 'has been reached. Put s as an input variable if required.'])
    #warning('MATLAB:smoothn:MaxIter',\
    #    ['Maximum number of iterations (%d'%(MaxIter) + ') has '\
    #    + 'been exceeded. Increase MaxIter option or decrease TolZ value.'])
  return z,s,exitflag,Wtot
Example #31
0
 def optimise_lbfgs(self,start):
   print
   print "***** LBFGS OPTIMISATION  *****"
   x,f,d = fmin_l_bfgs_b(self.objective, start, fprime=self.grad, pgtol=1e-09, iprint=0)
   return x
Example #32
0
 def minimize_lbfgs(self, parameters, x, y, sigma, emp_counts, classes_idx, nr_x, nr_f, nr_c):
     parameters2 = parameters.reshape([nr_f*nr_c], order="F")
     result, _, d = opt2.fmin_l_bfgs_b(self.get_objective, parameters2, args=[x, y, sigma, emp_counts, classes_idx, nr_x, nr_f, nr_c])
     return result.reshape([nr_f, nr_c], order="F")
Example #33
0
def train():
    
    np.random.seed(131742)
    #get sentences, trees and labels
    nExamples = -1
    print "loading data.."
    rnnData = RNNDataCorpus()
    rnnData.load_data(load_file=config.train_data, nExamples=nExamples)  
    
    #initialize params
    print "initializing params"
    params = Params(data=rnnData, wordSize=50, rankWo=2)

    #define theta
    #one vector for all the parameters of mvrnn model:  W, Wm, Wlabel, L, Lm
    n = params.wordSize; fanIn = params.fanIn; nWords = params.nWords; nLabels = params.categories; rank=params.rankWo
    Wo = 0.01*np.random.randn(n + 2*n*rank, nWords) #Lm, as in paper
    Wo[:n,:] = np.ones((n,Wo.shape[1])) #Lm, as in paper
    Wcat = 0.005*np.random.randn(nLabels, fanIn) #Wlabel, as in paper
#    Wv = 0.01*np.random.randn(n, nWords)
#    WO = 0.01*np.random.randn(n, 2*n)
#    W = 0.01*np.random.randn(n, 2*n+1)
    
    
    #load pre-trained weights here
    mats = sio.loadmat(config.pre_trained_weights)
    Wv = mats.get('Wv')  #L, as in paper
    W = mats.get('W') #W, as in paper
    WO = mats.get('WO') #Wm, as in paper
    
    
    sentencesIdx = np.arange(rnnData.ndoc())
    np.random.shuffle(sentencesIdx)
    nTrain = 4*len(sentencesIdx)/5
    trainSentIdx = sentencesIdx[0:nTrain]
    testSentIdx = sentencesIdx[nTrain:]
    batchSize = 5 
    nBatches = len(trainSentIdx)/batchSize
    evalFreq = 5  #evaluate after every 5 minibatches
    nTestSentEval = 50 #number of test sentences to be evaluated
    
   
    rnnData_train = RNNDataCorpus()
    rnnData.copy_into_minibatch(rnnData_train, trainSentIdx)
    
    rnnData_test = RNNDataCorpus()
    if(len(testSentIdx) > nTestSentEval):
#        np.random.shuffle(testSentIdx)  #choose random test examples
        thisTestSentIdx = testSentIdx[:nTestSentEval]
    else:
        thisTestSentIdx = testSentIdx
    rnnData.copy_into_minibatch(rnnData_test, thisTestSentIdx)
    
    
#    [Wv_test, Wo_test, _] = getRelevantWords(rnnData_test, Wv,Wo,params) 
    [Wv_trainTest, Wo_trainTest, all_train_idx] = getRelevantWords(rnnData, Wv,Wo,params) #sets nWords_reduced, returns new arrays    
    theta = np.concatenate((W.flatten(), WO.flatten(), Wcat.flatten(), Wv_trainTest.flatten(), Wo_trainTest.flatten()))
    
    #optimize    
    print "starting training..."
    nIter = 100
    rnnData_minibatch = RNNDataCorpus()
    for i in range(nIter):        
        #train in minibatches
#        ftrain = np.zeros(nBatches)
#        for ibatch in range(nBatches):            
#            set_minibatch(rnnData, rnnData_minibatch, ibatch, nBatches, trainSentIdx)
            
#            print 'Iteration: ', i, ' minibatch: ', ibatch
        tunedTheta, fbatch_train, _ = lbfgsb.fmin_l_bfgs_b(func=costFn, x0=theta, fprime=None, args=(rnnData_train, params), approx_grad=0, bounds=None, m=5,
                                        factr=1000000000000000.0, pgtol=1.0000000000000001e-5, epsilon=1e-08,
                                        iprint=3, maxfun=1, disp=0)
          
        #map parameters back
        W[:,:], WO[:,:], Wcat[:,:], Wv_trainTest, Wo_trainTest = unroll_theta(tunedTheta, params)
        Wv[:,all_train_idx] = Wv_trainTest
        Wo[:,all_train_idx] = Wo_trainTest
        
#        ftrain[ibatch] = fbatch_train  
        theta = tunedTheta  #for next iteration         
        
        print "========================================"
        print "XXXXXXIteration ", i, 
        print "Average cost: ", np.average(fbatch_train)
        evaluate(Wv,Wo,W,WO,Wcat,params, rnnData_test)
        print "========================================"                  
  
        #save weights
        save_dict = {'Wv':Wv, 'Wo':Wo, 'Wcat':Wcat, 'W':W, 'WO':WO}
        sio.savemat(config.saved_params_file+'_lbfgs_iter'+str(i), mdict=save_dict)
        print "saved tuned theta. "
Example #34
0
def BlockedTheanoOVA(svm, data,
        l2_regularization=1e-3,
        dtype='float64',
        GPU_blocksize=1000 * (1024 ** 2), # bytes
        verbose=False,
        ):
    n_features, n_classes  = svm.weights.shape

    _X = theano.shared(np.ones((2, 2), dtype=dtype),
            allow_downcast=True)
    _yvecs = theano.shared(np.ones((2, 2), dtype=dtype),
            allow_downcast=True)

    sgd_params = tensor.vector(dtype=dtype)

    flat_sgd_weights = sgd_params[:n_features * n_classes]
    sgd_weights = flat_sgd_weights.reshape((n_features, n_classes))
    sgd_bias = sgd_params[n_features * n_classes:]

    margin = _yvecs * (tensor.dot(_X, sgd_weights) + sgd_bias)
    losses = tensor.maximum(0, 1 - margin) ** 2
    l2_cost = .5 * l2_regularization * tensor.dot(
            flat_sgd_weights, flat_sgd_weights)

    cost = losses.mean(axis=0).sum() + l2_cost
    dcost_dparams = tensor.grad(cost, sgd_params)

    _f_df = theano.function([sgd_params], [cost, dcost_dparams])

    assert dtype == 'float32'
    sizeof_dtype = 4
    X, y = data
    yvecs = np.asarray(
            (y[:, None] == np.arange(n_classes)) * 2 - 1,
            dtype=dtype)

    X_blocks = np.ceil(X.size * sizeof_dtype / float(GPU_blocksize))

    examples_per_block = len(X) // X_blocks

    if verbose:
        print 'dividing into', X_blocks, 'blocks of', examples_per_block

    # -- create a dummy class because a nested function cannot modify
    #    params_mean in enclosing scope
    class Dummy(object):
        def __init__(self, collect_estimates):
            params = np.zeros(n_features * n_classes + n_classes)
            params[:n_features * n_classes] = svm.weights.flatten()
            params[n_features * n_classes:] = svm.bias

            self.params = params
            self.params_mean = params.copy().astype('float64')
            self.params_mean_i = 0
            self.collect_estimates = collect_estimates

        def update_mean(self, p):
            self.params_mean_i += 1
            alpha = 1.0 / self.params_mean_i
            self.params_mean *= 1 - alpha
            self.params_mean += alpha * p

        def __call__(self, p):
            if self.collect_estimates:
                self.update_mean(p)
            c, d = _f_df(p.astype(dtype))
            return c.astype('float64'), d.astype('float64')
    dummy = Dummy(X_blocks > 2)

    i = 0
    while i + examples_per_block <= len(X):
        if verbose:
            print 'training on examples', i, 'to', i + examples_per_block
        _X.set_value(
                X[i:i + examples_per_block],
                borrow=True)
        _yvecs.set_value(
                yvecs[i:i + examples_per_block],
                borrow=True)

        best, bestval, info_dct  = fmin_l_bfgs_b(dummy,
                dummy.params_mean.copy(),
                iprint=1 if verbose else -1,
                factr=1e11,  # -- 1e12 for low acc, 1e7 for moderate
                maxfun=1000,
                )
        dummy.update_mean(best)

        i += examples_per_block

    params = dummy.params_mean

    rval = classifier_from_weights(
            weights=params[:n_classes * n_features].reshape(
                (n_features, n_classes)),
            bias=params[n_classes * n_features:])

    return rval
Example #35
0
    def learn(self, X, y):
        """
        Learn the model from the given data.

        :param X: the attribute data
        :type X: numpy.array

        :param y: the class variable data
        :type y: numpy.array

        """
        def rand(eps):
            """Return random number in interval [-eps, eps]."""
            return rnd.random() * 2 * eps - eps

        def g_func(z):
            """The sigmoid (logistic) function."""
            return 1. / (1. + np.exp(-z))

        def h_func(thetas, x):
            """The model function."""
            a = np.array([[1.] + list(x)]).T  # Initialize a
            for l in range(1, len(thetas) + 1):  # Forward propagation
                a = np.vstack(
                    (np.array([[1.]]), g_func(thetas[l - 1].T.dot(a))))
            return a[1:]

        def llog(val):
            "The limited logarithm."
            e = 1e-10
            return np.log(np.clip(val, e, 1. - e))

        def unroll(thetas):
            """Unrolls a list of thetas into vector."""
            sd = [m.shape for m in thetas]  # Keep the shape data
            thetas = np.concatenate(
                [theta.reshape(np.prod(theta.shape)) for theta in thetas])
            return thetas, sd

        def roll(thetas, sd):
            """Rolls a vector of thetas back into list."""
            thetas = np.split(thetas, [
                sum(np.prod(s) for s in sd[:i]) + np.prod(sd[i])
                for i in range(len(sd) - 1)
            ])
            return [np.reshape(theta, sd[i]) for i, theta in enumerate(thetas)]

        def cost(thetas, X, y, sd, S, lambda_):
            """The cost function of the neural network."""
            thetas = roll(thetas, sd)
            m, _ = X.shape
            L = len(S)
            reg_factor = (lambda_ / float(2 * m)) * sum(
                sum(sum(thetas[l][1:, :]**2)) for l in range(L - 1))
            cost = (-1. / float(m)) * sum(
                sum(self._classes_map[y[s]] * llog(h_func(thetas, X[s])) +
                    (1. - self._classes_map[y[s]]) *
                    llog(1. - h_func(thetas, X[s])))
                for s in range(m)) + reg_factor
            if self._verbose:
                print "Current value of cost func.: " + str(cost[0])
            return cost[0]

        def grad(thetas, X, y, sd, S, lambda_):
            """The gradient (derivate) function which includes the back
            propagation algorithm."""
            thetas = roll(thetas, sd)
            m, _ = X.shape
            L = len(S)
            d = [
                np.array([[0. for j in range(S[l + 1])]
                          for i in range(S[l] + 1)]) for l in range(L - 1)
            ]  # Initialize the delta matrix

            for s in range(m):
                a = [
                    np.array([[1.] + list(X[s])]).T
                ]  # Initialize a (only a, d & theta matrices have 1 more element in columns, biases)

                for l in range(1, L):  # Forward propagation
                    a.append(
                        np.vstack((np.array([[1.]]),
                                   g_func(thetas[l - 1].T.dot(a[l - 1])))))
                # TODO
                # Softmax: treat last a column differently
                #ez = np.exp(thetas[L - 2].T.dot(a[L - 2]))
                #sez = sum(ez)
                #a.append(np.vstack((np.array([[1]]), ez / sez)))

                deltas = [None for l in range(L - 1)
                          ] + [a[-1][1:] - self._classes_map[y[s]]]
                for l in range(L - 2, 0, -1):  # Backward propagation
                    deltas[l] = (thetas[l].dot(deltas[l + 1]) *
                                 (a[l] * (1. - a[l])))[1:]

                for l in range(L - 1):
                    d[l] = d[l] + a[l].dot(deltas[l + 1].T)

            D = [(1. / float(m)) * d[l] + lambda_ * thetas[l]
                 for l in range(L - 1)]
            D = [
                Di - lambda_ * np.vstack(
                    (thetas[l][0], np.zeros((Di.shape[0] - 1, Di.shape[1]))))
                for l, Di in enumerate(D)
            ]  # Where i = 0, don't use regularization
            D, _ = unroll(D)
            return D

        def gradApprox(thetas, X, y, sd, S, lambda_):
            """Approximate the gradient of the cost function
            (only used for debugging, not in final version)."""
            eps = 1e-14
            return (grad(thetas + eps, X, y, sd, S, lambda_) -
                    grad(thetas - eps, X, y, sd, S, lambda_)) / float(2 * eps)

        # Set the random seed
        rnd.seed(self._seed)

        # Initialize the final layer of neural net (outputs)
        self._classes = list(set(y))
        for i, cl in enumerate(self._classes):
            self._classes_map[cl] = np.zeros((len(self._classes), 1))
            self._classes_map[cl][i] = 1.

        S = [len(X[0])] + self._hl + [len(self._classes)
                                      ]  # Complete information about levels
        L = len(S)
        thetas0 = [
            np.array([[rand(self._eps) for j in range(S[l + 1])]
                      for i in range(S[l] + 1)]) for l in range(L - 1)
        ]  # Initialize the thetas matrix
        thetas0, sd = unroll(thetas0)
        #return grad(thetas0, X, y, sd, S, self._lambda), gradApprox(thetas0, X, y, sd, S, self._lambda) # For testing

        # The L-BFGS-B bounds parameter is redefined: input is (lower_bound, upper_bound) instead of array of bounds for each theta parameter
        if self._opt_args != None and "bounds" in self._opt_args:
            bounds = [self._opt_args["bounds"] for i in range(len(thetas0))]
            self._opt_args["bounds"] = bounds

        self._thetas, self._cost, _ = scp.fmin_l_bfgs_b(cost,
                                                        thetas0,
                                                        grad,
                                                        args=(X, y, sd, S,
                                                              self._lambda),
                                                        **self._opt_args)
        self._thetas = roll(self._thetas, sd)
        self._cost = float(self._cost)
        self._can_classify = True
Example #36
0
def SubsampledTheanoOVA(svm, data,
        l2_regularization=1e-3,
        dtype='float64',
        feature_bytes=1000 * (1024 ** 2), # bytes
        verbose=False,
        rng=None,
        n_runs=None,  # None -> smallest int that uses all data
        n_keep=None,  # None -> X.shape[1] / n_runs
        cost_fn='L2Huber',
        bfgs_factr=1e11,  # 1e7 for moderate tolerance, 1e12 for low
        bfgs_maxfun=1000,
        decisions=None,
        decision_hack=None,
        ):
    # I tried to change the problem to work with reduced regularization
    # or a smaller minimal margin (e.g. < 1) to compensate for the missing
    # features, but nothing really worked.
    #
    # I think the better thing would be to do boosting, in just the way we
    # did in the eccv12 project (see e.g. MarginASGD)
    n_features, n_classes = svm.weights.shape
    X, y = data
    if verbose:
        print 'Training svm on design matrix of size', X.shape
        print '   with', n_classes, 'features'
    if n_keep is None:
        if n_runs is None:
            sizeof_dtype = {'float32': 4, 'float64': 8}[dtype]
            Xbytes = X.size * sizeof_dtype
            keep_ratio = float(feature_bytes) / Xbytes
            n_runs = int(np.ceil(1. / keep_ratio))
        n_keep = int(np.ceil(X.shape[1] / float(n_runs)))
    else:
        if n_runs is None:
            n_runs = int(np.ceil(X.shape[1] / float(n_keep)))

    _X = theano.shared(np.ones((2, 2), dtype=dtype),
            allow_downcast=True)
    _yvecs = theano.shared(np.ones((2, 2), dtype=dtype),
            allow_downcast=True)

    if decisions is None:
        _decisions = theano.shared(
                np.zeros((len(y), n_classes), dtype=dtype),
                allow_downcast=True)
    else:
        decisions = np.asarray(decisions).astype(dtype)
        # -- N.B. for multi-class the decisions would be an examples x classes
        # matrix
        if decisions.shape != (len(y), n_classes):
            raise ValueError('decisions have wrong shape', decisions.shape)
        _decisions = theano.shared(decisions)
        del decisions

    sgd_params = tensor.vector(dtype=dtype)
    s_n_use = tensor.lscalar()

    flat_sgd_weights = sgd_params[:s_n_use * n_classes]
    sgd_weights = flat_sgd_weights.reshape((s_n_use, n_classes))
    sgd_bias = sgd_params[s_n_use * n_classes:]

    margin = _yvecs * (tensor.dot(_X, sgd_weights) + sgd_bias + _decisions)

    if cost_fn == 'L2Half':
        losses = tensor.maximum(0, 1 - margin) ** 2
    elif cost_fn == 'L2Huber':
        # "Huber-ized" L2-SVM
        losses = tensor.switch(
                margin > -1,
                # -- smooth part
                tensor.maximum(0, 1 - margin) ** 2,
                # -- straight part
                -4 * margin)
    elif cost_fn == 'Hinge':
        losses = tensor.maximum(0, 1 - margin)
    else:
        raise ValueError('invalid cost-fn', cost_fn)

    l2_cost = .5 * l2_regularization * tensor.dot(
            flat_sgd_weights, flat_sgd_weights)

    cost = losses.mean(axis=0).sum() + l2_cost
    dcost_dparams = tensor.grad(cost, sgd_params)

    _f_df = theano.function([sgd_params, s_n_use], [cost, dcost_dparams])

    yvecs = np.asarray(
            (y[:, None] == np.arange(n_classes)) * 2 - 1,
            dtype=dtype)

    # TODO: reconsider how to use this function when doing partial fitting
    #_f_update_decisions = theano.function([sgd_params, s_n_use], [],
    #        updates={
    #            _decisions: (_decisions
    #                + tensor.dot(_X, sgd_weights) + sgd_bias),
    #            })

    def flatten_svm(obj):
        return np.concatenate([obj.weights.flatten(), obj.bias])

    if verbose:
        print 'keeping', n_keep, 'of', X.shape[1], 'features'

    if rng is None:
        rng = np.random.RandomState(123)

    all_feat_randomized = rng.permutation(X.shape[1])
    bests = []
    for ii in range(n_runs):
        use_features = all_feat_randomized[ii * n_keep: (ii + 1) * n_keep]
        assert len(use_features)
        n_use = len(use_features)

        def f(p):
            c, d = _f_df(p.astype(dtype), n_use)
            return c.astype('float64'), d.astype('float64')

        params = np.zeros(n_use * n_classes + n_classes)
        params[:n_use * n_classes] = svm.weights[use_features].flatten()
        params[n_use * n_classes:] = svm.bias

        _X.set_value(X[:, use_features], borrow=True)
        _yvecs.set_value(yvecs, borrow=True)

        best, bestval, info_dct = fmin_l_bfgs_b(f,
                params,
                iprint=1 if verbose else -1,
                factr=bfgs_factr,  # -- 1e12 for low acc, 1e7 for moderate
                maxfun=bfgs_maxfun,
                )
        best_svm = copy.deepcopy(svm)
        best_svm.weights[use_features] = best[:n_classes * n_use].reshape(
                    (n_use, n_classes))
        best_svm.bias = best[n_classes * n_use:]
        bests.append(flatten_svm(best_svm))

    # sum instead of mean here, because each loop iter trains only a subset of
    # features. XXX: This assumes that those subsets are mutually exclusive
    best_params = np.sum(bests, axis=0)
    rval = copy.deepcopy(svm)
    rval.weights = best_params[:n_classes * n_features].reshape(
                (n_features, n_classes))
    rval.bias = best_params[n_classes * n_features:]

    # XXX: figure out why Theano may be not freeing this memory, why does
    # writing little matrices here help?
    _X.set_value(np.ones((2, 2), dtype=dtype))
    _yvecs.set_value(np.ones((2, 2), dtype=dtype))
    _decisions.set_value(np.ones((2, 2), dtype=dtype))
    return rval
Example #37
0
def BinarySubsampledTheanoOVA(svm, data,
        l2_regularization=1e-3,
        dtype='float64',
        feature_bytes=1000 * (1024 ** 2), # bytes
        verbose=False,
        rng=None,
        n_runs=None,  # None -> smallest int that uses all data
        cost_fn='L2Huber',
        bfgs_factr=1e11,  # 1e7 for moderate tolerance, 1e12 for low
        bfgs_maxfun=1000,
        decisions=None
        ):
    n_features, = svm.weights.shape
    X, y = data

    # XXX REFACTOR
    if n_runs is None:
        sizeof_dtype = {'float32': 4, 'float64': 8}[dtype]
        Xbytes = X.size * sizeof_dtype
        keep_ratio = float(feature_bytes) / Xbytes
        n_runs = int(np.ceil(1. / keep_ratio))
        print 'BinarySubsampledTheanoOVA using n_runs =', n_runs
    n_keep = int(np.ceil(X.shape[1] / float(n_runs)))

    assert set(y) == set([-1, 1])
    _X = theano.shared(np.ones((2, 2), dtype=dtype),
            allow_downcast=True, borrow=True)
    _yvecs = theano.shared(y.astype(dtype),
            allow_downcast=True, borrow=True)
    if decisions:
        decisions = np.asarray(decisions).astype(dtype)
        # -- N.B. for multi-class the decisions would be an examples x classes
        # matrix
        if decisions.shape != y.shape:
            raise ValueError('decisions have wrong shape', decisions.shape)
        _decisions = theano.shared(decisions)
        del decisions
    else:
        _decisions = theano.shared(y.astype(dtype) * 0, allow_downcast=True)

    sgd_params = tensor.vector(dtype=dtype)
    s_n_use = tensor.lscalar()

    sgd_weights = sgd_params[:s_n_use]
    sgd_bias = sgd_params[s_n_use]

    margin = _yvecs * (tensor.dot(_X, sgd_weights) + sgd_bias + _decisions)

    # XXX REFACTOR
    if cost_fn == 'L2Half':
        losses = tensor.maximum(0, 1 - margin) ** 2
    elif cost_fn == 'L2Huber':
        # "Huber-ized" L2-SVM
        losses = tensor.switch(
                margin > -1,
                # -- smooth part
                tensor.maximum(0, 1 - margin) ** 2,
                # -- straight part
                -4 * margin)
    elif cost_fn == 'Hinge':
        losses = tensor.maximum(0, 1 - margin)
    else:
        raise ValueError('invalid cost-fn', cost_fn)

    l2_cost = .5 * l2_regularization * tensor.dot(
            sgd_weights, sgd_weights)

    cost = losses.mean() + l2_cost
    dcost_dparams = tensor.grad(cost, sgd_params)

    _f_df = theano.function([sgd_params, s_n_use], [cost, dcost_dparams])

    _f_update_decisions = theano.function([sgd_params, s_n_use], [],
            updates={
                _decisions: (
                    tensor.dot(_X, sgd_weights) + sgd_bias + _decisions),
                })

    def flatten_svm(obj):
        # Note this is different from multi-class case because bias is scalar
        return np.concatenate([obj.weights.flatten(), [obj.bias]])

    if verbose:
        print 'keeping', n_keep, 'of', X.shape[1], 'features, per round'
        print 'running for ', n_runs, 'rounds'

    if rng is None:
        rng = np.random.RandomState(123)

    all_feat_randomized = rng.permutation(X.shape[1])
    bests = []
    for ii in range(n_runs):
        use_features = all_feat_randomized[ii * n_keep: (ii + 1) * n_keep]
        assert len(use_features)
        n_use = len(use_features)

        def f(p):
            c, d = _f_df(p.astype(dtype), n_use)
            return c.astype('float64'), d.astype('float64')

        params = np.zeros(n_use + 1)
        params[:n_use] = svm.weights[use_features].flatten()
        params[n_use] = svm.bias

        _X.set_value(X[:, use_features], borrow=True)

        best, bestval, info_dct = fmin_l_bfgs_b(f,
                params,
                iprint=int(verbose) - 1,
                factr=bfgs_factr,
                maxfun=bfgs_maxfun,
                )
        best_svm = copy.deepcopy(svm)
        best_svm.weights[use_features] = np.array(best[:n_use], dtype=dtype)
        best_svm.bias = float(best[n_use])
        bests.append(flatten_svm(best_svm))

        _f_update_decisions(best.astype(dtype), n_use)
        margin_ii = _decisions.get_value() * _yvecs.get_value()
        print 'run %i: margin min:%f mean:%f max:%f' % (
                ii, np.min(margin_ii), np.mean(margin_ii), np.max(margin_ii))
        if 0:
            # XXX This is a hack that helps but it's basically wrong. The
            # correct thing to do would be to add two scalars to the
            # optimization: one scalar represents the total l2 norm of the
            # weight vector fit so far.  The second scalar represents how much
            # to down-weight the total vector fit so far in response to the
            # utility of the current feature set.  So this second scalar would
            # scale the vector of previous decisions, and the l2-cost would
            # always be the l2-cost of the entire vector so far.
            _decisions.set_value(
                    _decisions.get_value() - np.min(margin_ii) * y)
        elif (ii < (n_runs - 1)) and (np.min(margin_ii) > .95):
            print 'Margin has been maximized after', ii, 'of', n_runs
            break

    # N.B. we might have used fewer than n_runs
    best_params = np.sum(bests, axis=0)
    best_params[n_features] /= len(bests)  # bias is estimated on each run
    rval = copy.deepcopy(svm)
    rval.weights = best_params[:n_features].astype(dtype)
    rval.bias = float(best_params[n_features])

    # XXX: figure out why Theano may be not freeing this memory, why does
    # writing little matrices here help?
    _X.set_value(np.ones((2, 2), dtype=dtype))
    _yvecs.set_value(np.ones(2, dtype=dtype))
    return rval
def smoothn(y,nS0=10,axis=None,smoothOrder=2.0,sd=None,verbose=False,\
	s0=None,z0=None,isrobust=False,W=None,s=None,MaxIter=100,TolZ=1e-3,weightstr='bisquare'):
  '''
   function [z,s,exitflag,Wtot] = smoothn(varargin)

   SMOOTHN Robust spline smoothing for 1-D to N-D data.
   SMOOTHN provides a fast, automatized and robust discretized smoothing
   spline for data of any dimension.

   Z = SMOOTHN(Y) automatically smoothes the uniformly-sampled array Y. Y
   can be any N-D noisy array (time series, images, 3D data,...). Non
   finite data (NaN or Inf) are treated as missing values.

   Z = SMOOTHN(Y,S) smoothes the array Y using the smoothing parameter S.
   S must be a real positive scalar. The larger S is, the smoother the
   output will be. If the smoothing parameter S is omitted (see previous
   option) or empty (i.e. S = []), it is automatically determined using
   the generalized cross-validation (GCV) method.

   Z = SMOOTHN(Y,W) or Z = SMOOTHN(Y,W,S) specifies a weighting array W of
   real positive values, that must have the same size as Y. Note that a
   nil weight corresponds to a missing value.

   Robust smoothing
   ----------------
   Z = SMOOTHN(...,'robust') carries out a robust smoothing that minimizes
   the influence of outlying data.

   [Z,S] = SMOOTHN(...) also returns the calculated value for S so that
   you can fine-tune the smoothing subsequently if needed.

   An iteration process is used in the presence of weighted and/or missing
   values. Z = SMOOTHN(...,OPTION_NAME,OPTION_VALUE) smoothes with the
   termination parameters specified by OPTION_NAME and OPTION_VALUE. They
   can contain the following criteria:
       -----------------
       TolZ:       Termination tolerance on Z (default = 1e-3)
                   TolZ must be in ]0,1[
       MaxIter:    Maximum number of iterations allowed (default = 100)
       Initial:    Initial value for the iterative process (default =
                   original data)
       -----------------
   Syntax: [Z,...] = SMOOTHN(...,'MaxIter',500,'TolZ',1e-4,'Initial',Z0);

   [Z,S,EXITFLAG] = SMOOTHN(...) returns a boolean value EXITFLAG that
   describes the exit condition of SMOOTHN:
       1       SMOOTHN converged.
       0       Maximum number of iterations was reached.

   Class Support
   -------------
   Input array can be numeric or logical. The returned array is of class
   double.

   Notes
   -----
   The N-D (inverse) discrete cosine transform functions <a
   href="matlab:web('http://www.biomecardio.com/matlab/dctn.html')"
   >DCTN</a> and <a
   href="matlab:web('http://www.biomecardio.com/matlab/idctn.html')"
   >IDCTN</a> are required.

   To be made
   ----------
   Estimate the confidence bands (see Wahba 1983, Nychka 1988).

   Reference
   --------- 
   Garcia D, Robust smoothing of gridded data in one and higher dimensions
   with missing values. Computational Statistics & Data Analysis, 2010. 
   <a
   href="matlab:web('http://www.biomecardio.com/pageshtm/publi/csda10.pdf')">PDF download</a>

   Examples:
   --------
   # 1-D example
   x = linspace(0,100,2**8);
   y = cos(x/10)+(x/50)**2 + randn(size(x))/10;
   y[[70, 75, 80]] = [5.5, 5, 6];
   z = smoothn(y); # Regular smoothing
   zr = smoothn(y,'robust'); # Robust smoothing
   subplot(121), plot(x,y,'r.',x,z,'k','LineWidth',2)
   axis square, title('Regular smoothing')
   subplot(122), plot(x,y,'r.',x,zr,'k','LineWidth',2)
   axis square, title('Robust smoothing')

   # 2-D example
   xp = 0:.02:1;
   [x,y] = meshgrid(xp);
   f = exp(x+y) + sin((x-2*y)*3);
   fn = f + randn(size(f))*0.5;
   fs = smoothn(fn);
   subplot(121), surf(xp,xp,fn), zlim([0 8]), axis square
   subplot(122), surf(xp,xp,fs), zlim([0 8]), axis square

   # 2-D example with missing data
   n = 256;
   y0 = peaks(n);
   y = y0 + rand(size(y0))*2;
   I = randperm(n^2);
   y(I(1:n^2*0.5)) = NaN; # lose 1/2 of data
   y(40:90,140:190) = NaN; # create a hole
   z = smoothn(y); # smooth data
   subplot(2,2,1:2), imagesc(y), axis equal off
   title('Noisy corrupt data')
   subplot(223), imagesc(z), axis equal off
   title('Recovered data ...')
   subplot(224), imagesc(y0), axis equal off
   title('... compared with original data')

   # 3-D example
   [x,y,z] = meshgrid(-2:.2:2);
   xslice = [-0.8,1]; yslice = 2; zslice = [-2,0];
   vn = x.*exp(-x.^2-y.^2-z.^2) + randn(size(x))*0.06;
   subplot(121), slice(x,y,z,vn,xslice,yslice,zslice,'cubic')
   title('Noisy data')
   v = smoothn(vn);
   subplot(122), slice(x,y,z,v,xslice,yslice,zslice,'cubic')
   title('Smoothed data')

   # Cardioid
   t = linspace(0,2*pi,1000);
   x = 2*cos(t).*(1-cos(t)) + randn(size(t))*0.1;
   y = 2*sin(t).*(1-cos(t)) + randn(size(t))*0.1;
   z = smoothn(complex(x,y));
   plot(x,y,'r.',real(z),imag(z),'k','linewidth',2)
   axis equal tight

   # Cellular vortical flow
   [x,y] = meshgrid(linspace(0,1,24));
   Vx = cos(2*pi*x+pi/2).*cos(2*pi*y);
   Vy = sin(2*pi*x+pi/2).*sin(2*pi*y);
   Vx = Vx + sqrt(0.05)*randn(24,24); # adding Gaussian noise
   Vy = Vy + sqrt(0.05)*randn(24,24); # adding Gaussian noise
   I = randperm(numel(Vx));
   Vx(I(1:30)) = (rand(30,1)-0.5)*5; # adding outliers
   Vy(I(1:30)) = (rand(30,1)-0.5)*5; # adding outliers
   Vx(I(31:60)) = NaN; # missing values
   Vy(I(31:60)) = NaN; # missing values
   Vs = smoothn(complex(Vx,Vy),'robust'); # automatic smoothing
   subplot(121), quiver(x,y,Vx,Vy,2.5), axis square
   title('Noisy velocity field')
   subplot(122), quiver(x,y,real(Vs),imag(Vs)), axis square
   title('Smoothed velocity field')

   See also SMOOTH, SMOOTH3, DCTN, IDCTN.

   -- Damien Garcia -- 2009/03, revised 2010/11
   Visit my <a
   href="matlab:web('http://www.biomecardio.com/matlab/smoothn.html')">website</a> for more details about SMOOTHN 

  # Check input arguments
  error(nargchk(1,12,nargin));

  z0=None,W=None,s=None,MaxIter=100,TolZ=1e-3
  '''
  if type(y) == ma.core.MaskedArray:  # masked array
    is_masked = True
    mask = y.mask
    y = np.array(y)
    y[mask] = 0.
    if W != None:
      W  = np.array(W)
      W[mask] = 0.
    if sd != None:
      W = np.array(1./sd**2)
      W[mask] = 0.
      sd = None
    y[mask] = np.nan
    
  if sd != None:
    sd_ = np.array(sd)
    mask = (sd > 0.)
    W = np.zeros_like(sd_)
    W[mask] = 1./sd_[mask]**2
    sd = None

  if W != None:
    W = W/W.max()

  sizy = y.shape;

  # sort axis
  if axis == None:
    axis = tuple(np.arange(y.ndim))

  noe = y.size # number of elements
  if noe<2:
    z = y
    exitflag = 0;Wtot=0
    return z,s,exitflag,Wtot
  #---
  # Smoothness parameter and weights
  #if s != None:
  #  s = []
  if W == None:
    W = ones(sizy);

  #if z0 == None:
  #  z0 = y.copy()

  #---
  # "Weighting function" criterion
  weightstr = weightstr.lower()
  #---
  # Weights. Zero weights are assigned to not finite values (Inf or NaN),
  # (Inf/NaN values = missing data).
  IsFinite = np.array(isfinite(y)).astype(bool);
  nof = IsFinite.sum() # number of finite elements
  W = W*IsFinite;
  if any(W<0):
    error('smoothn:NegativeWeights',\
        'Weights must all be >=0')
  else:
      #W = W/np.max(W)
      pass
  #---
  # Weighted or missing data?
  isweighted = any(W != 1);
  #---
  # Robust smoothing?
  #isrobust
  #---
  # Automatic smoothing?
  isauto = not s;
  #---
  # DCTN and IDCTN are required
  try:
    from scipy.fftpack.realtransforms import dct,idct
  except:
    z = y
    exitflag = -1;Wtot=0
    return z,s,exitflag,Wtot

  ## Creation of the Lambda tensor
  #---
  # Lambda contains the eingenvalues of the difference matrix used in this
  # penalized least squares process.
  axis = tuple(np.array(axis).flatten())
  d =  y.ndim;
  Lambda = zeros(sizy);
  for i in axis:
    # create a 1 x d array (so e.g. [1,1] for a 2D case
    siz0 = ones((1,y.ndim))[0];
    siz0[i] = sizy[i];
    # cos(pi*(reshape(1:sizy(i),siz0)-1)/sizy(i)))
    # (arange(1,sizy[i]+1).reshape(siz0) - 1.)/sizy[i]
    Lambda = Lambda + (cos(pi*(arange(1,sizy[i]+1) - 1.)/sizy[i]).reshape(siz0))
    #else:
    #  Lambda = Lambda + siz0
  Lambda = -2.*(len(axis)-Lambda);
  if not isauto:
    Gamma = 1./(1+(s*abs(Lambda))**smoothOrder);

  ## Upper and lower bound for the smoothness parameter
  # The average leverage (h) is by definition in [0 1]. Weak smoothing occurs
  # if h is close to 1, while over-smoothing appears when h is near 0. Upper
  # and lower bounds for h are given to avoid under- or over-smoothing. See
  # equation relating h to the smoothness parameter (Equation #12 in the
  # referenced CSDA paper).
  N = sum(array(sizy) != 1); # tensor rank of the y-array
  hMin = 1e-6; hMax = 0.99;
  # (h/n)**2 = (1 + a)/( 2 a)
  # a = 1/(2 (h/n)**2 -1) 
  # where a = sqrt(1 + 16 s)
  # (a**2 -1)/16
  try:
    sMinBnd = np.sqrt((((1+sqrt(1+8*hMax**(2./N)))/4./hMax**(2./N))**2-1)/16.);
    sMaxBnd = np.sqrt((((1+sqrt(1+8*hMin**(2./N)))/4./hMin**(2./N))**2-1)/16.);
  except:
    sMinBnd = None
    sMaxBnd = None
  ## Initialize before iterating
  #---
  Wtot = W;
  #--- Initial conditions for z
  if isweighted:
    #--- With weighted/missing data
    # An initial guess is provided to ensure faster convergence. For that
    # purpose, a nearest neighbor interpolation followed by a coarse
    # smoothing are performed.
    #---
    if z0 != None: # an initial guess (z0) has been provided
        z = z0;
    else:
        z = y #InitialGuess(y,IsFinite);
        z[~IsFinite] = 0.
  else:
    z = zeros(sizy);
  #---
  z0 = z;
  y[~IsFinite] = 0; # arbitrary values for missing y-data
  #---
  tol = 1.;
  RobustIterativeProcess = True;
  RobustStep = 1;
  nit = 0;
  #--- Error on p. Smoothness parameter s = 10^p
  errp = 0.1;
  #opt = optimset('TolX',errp);
  #--- Relaxation factor RF: to speedup convergence
  RF = 1 + 0.75*isweighted;
  # ??
  ## Main iterative process
  #---
  if isauto:
    try:
      xpost = array([(0.9*log10(sMinBnd) + log10(sMaxBnd)*0.1)])
    except:
      array([100.])
  else:
    xpost = array([log10(s)])
  while RobustIterativeProcess:
    #--- "amount" of weights (see the function GCVscore)
    aow = sum(Wtot)/noe; # 0 < aow <= 1
    #---
    while tol>TolZ and nit<MaxIter:
        if verbose:
          print('tol',tol,'nit',nit)
          
        nit = nit+1;
        DCTy = dctND(Wtot*(y-z)+z,f=dct);
        if isauto and not remainder(log2(nit),1):
            #---
            # The generalized cross-validation (GCV) method is used.
            # We seek the smoothing parameter s that minimizes the GCV
            # score i.e. s = Argmin(GCVscore).
            # Because this process is time-consuming, it is performed from
            # time to time (when nit is a power of 2)
            #---
            # errp in here somewhere
            
            #xpost,f,d = lbfgsb.fmin_l_bfgs_b(gcv,xpost,fprime=None,factr=10.,\
            #   approx_grad=True,bounds=[(log10(sMinBnd),log10(sMaxBnd))],\
            #   args=(Lambda,aow,DCTy,IsFinite,Wtot,y,nof,noe))

            # if we have no clue what value of s to use, better span the
            # possible range to get a reasonable starting point ...
            # only need to do it once though. nS0 is teh number of samples used
            if not s0:
              ss = np.arange(nS0)*(1./(nS0-1.))*(log10(sMaxBnd)-log10(sMinBnd))+ log10(sMinBnd)
              g = np.zeros_like(ss)
              for i,p in enumerate(ss):
                g[i] = gcv(p,Lambda,aow,DCTy,IsFinite,Wtot,y,nof,noe,smoothOrder)
                #print 10**p,g[i]
              xpost = [ss[g==g.min()]]
              #print '==============='
              #print nit,tol,g.min(),xpost[0],s
              #print '==============='
            else:
              xpost = [s0]
            xpost,f,d = lbfgsb.fmin_l_bfgs_b(gcv,xpost,fprime=None,factr=10.,\
               approx_grad=True,bounds=[(log10(sMinBnd),log10(sMaxBnd))],\
               args=(Lambda,aow,DCTy,IsFinite,Wtot,y,nof,noe,smoothOrder))
        s = 10**xpost[0];
        # update the value we use for the initial s estimate
        s0 = xpost[0]

        Gamma = 1./(1+(s*abs(Lambda))**smoothOrder);

        z = RF*dctND(Gamma*DCTy,f=idct) + (1-RF)*z;
        # if no weighted/missing data => tol=0 (no iteration)
        tol = isweighted*norm(z0-z)/norm(z);
       
        z0 = z; # re-initialization
    exitflag = nit<MaxIter;

    if isrobust: #-- Robust Smoothing: iteratively re-weighted process
        #--- average leverage
        h = sqrt(1+16.*s); 
        h = sqrt(1+h)/sqrt(2)/h; 
        h = h**N;
        #--- take robust weights into account
        Wtot = W*RobustWeights(y-z,IsFinite,h,weightstr);
        #--- re-initialize for another iterative weighted process
        isweighted = True; tol = 1; nit = 0; 
        #---
        RobustStep = RobustStep+1;
        RobustIterativeProcess = RobustStep<3; # 3 robust steps are enough.
    else:
        RobustIterativeProcess = False; # stop the whole process

  ## Warning messages
  #---
  if isauto:
    if abs(log10(s)-log10(sMinBnd))<errp:
        warning('MATLAB:smoothn:SLowerBound',\
            ['s = %.3f '%(s) + ': the lower bound for s '\
            + 'has been reached. Put s as an input variable if required.'])
    elif abs(log10(s)-log10(sMaxBnd))<errp:
        warning('MATLAB:smoothn:SUpperBound',\
            ['s = %.3f '%(s) + ': the upper bound for s '\
            + 'has been reached. Put s as an input variable if required.'])
    #warning('MATLAB:smoothn:MaxIter',\
    #    ['Maximum number of iterations (%d'%(MaxIter) + ') has '\
    #    + 'been exceeded. Increase MaxIter option or decrease TolZ value.'])
  return z,s,exitflag,Wtot
Example #39
0
import openbabel
import numpy

from scipy.optimize.lbfgsb import fmin_l_bfgs_b


def f(x):
    return x[0]**2 + x[1]**2


def g(x):
    return numpy.array([2 * x[0], 2 * x[1]])


x0 = numpy.array([3, 1])

opt, energy, dict = fmin_l_bfgs_b(f, x0, fprime=g)

print opt