Esempio n. 1
0
    def train(self, features, labels, normalisedlabels=False, names=None, **kwargs):
        def error(bs):
            response = bs[0] + np.dot(features, bs[1:])
            response = _sigmoidal(response)
            diff = response - labels
            log_like = np.dot(diff, diff)
            L2_penalty = self.alpha * np.dot(bs, bs)
            return log_like + L2_penalty
        def error_prime(bs):
            fB = np.dot(features, bs[1:])
            response = _sigmoidal(bs[0] + fB)
            sprime = response * (1-response)
            ds = (response - labels) * sprime
            b0p = np.sum(ds)
            b1p = np.dot(features.T, ds)
            bp = np.concatenate( ([b0p], b1p) )
            return 2.*(bp + self.alpha*bs)

        features = np.asanyarray(features)
        if not normalisedlabels:
            labels, _ = normaliselabels(labels)
        N,f = features.shape
        bs = np.zeros(f+1)
        try:
            from scipy import optimize
            # Some testing revealed that this was a good combination
            # call fmin_cg twice first and then fmin
            # I do not understand why 100%, but there it is
            bs = optimize.fmin_cg(error, bs, error_prime, disp=False)
            bs = optimize.fmin_cg(error, bs, error_prime, disp=False)
            bs = optimize.fmin(error, bs, disp=False)
        except ImportError:
            import warnings
            warnings.warn('''\Could not import scipy.optimize.
Fall back to very simple gradient descent (which is slow).''')
            bs = np.zeros(f+1)
            cur = 1.e-6
            ebs = error(bs)
            for i in xrange(1000000):
                dir = error_prime(bs)
                step = (lambda e : bs - e *dir)
                enbs = ebs + 1
                while enbs > ebs:
                    cur /= 2.
                    if cur == 0.:
                        break
                    nbs = step(cur)
                    enbs = error(nbs)
                while cur < 10.:
                    cur *= 2
                    nnbs = step(cur)
                    ennbs = error(nnbs)
                    if ennbs < enbs:
                        nbs = nnbs
                        enbs = ennbs
                    else:
                        break
                bs = nbs
                ebs = enbs
        return logistic_model(bs)
def conjugate_gradient(x0, f, f_prime, hessian=None):
    all_x_i = [x0[0]]
    all_y_i = [x0[1]]
    all_f_i = [f(x0)]
    def store(X):
        x, y = X
        all_x_i.append(x)
        all_y_i.append(y)
        all_f_i.append(f(X))
    optimize.fmin_cg(f, x0, f_prime, callback=store, gtol=1e-12)
    return all_x_i, all_y_i, all_f_i
def logRegression():
    '''This data is simulated'''
    data = np.loadtxt('logregression.txt')
    y = data[:,0]
    x = np.ones(data.shape)
    x[:,1:] = data[:,1:]
    def objective(b):
        '''Return -1*l(b), where l is the log likelihood.'''
        return np.log(1+np.exp(np.dot(x,b))).sum() - (y*(np.dot(x,b))).sum()
    guess = np.array([1., 1., 1., 1.])
    fmin_cg(objective, guess)
    def fit(self, X, Y):
        # save data and labels for plotting methods
        self.data = X
        self.labels = Y

        # get number of observations, features from data
        self.n_obs, self.n_features = X.shape

        # now make the weights attribute
        self.weights = np.random.rand(self.n_features)
        self.weights_history.append(self.weights)

        # use the scipy optimize Conjugate Gradient method
        optimize.fmin_cg(self.cost, self.weights, fprime=self.gradient, args=(X, Y))
Esempio n. 5
0
 def train(self, inputArr2D, targets, costFunc, costFuncGrad, maxIter=100):
     '''
     This method will fit the weights of the neural network to the targets.
     :param inputArr2D: 1 input per row.
     :param targets: ground truth class label for each input
     :param costFunc: callable *f(paramToOptimize, \*arg)* that will be used as cost function.
     :param costFuncGrad: callable *f'(paramToOptimize, \*arg)* that will be used to compute partial derivative of cost function over each parameter in paramToOptimize.
     '''
     self.forwardPropogateAllInput(inputArr2D)  # perform forward propagation to set self.outputs
     avgEx = 1.0 / targets.shape[0]
     flatWeights = asarray(self.layersExOutputLy[0].forwardWeight)
     for ly in self.layersExOutputLy[1:]:
         ly.avgActvArrAllEx = avgEx * npsum(ly.self2D[:, :-1], 0)
         flatWeights = append(flatWeights, asarray(ly.forwardWeight))
     fmin_cg(costFunc, flatWeights, costFuncGrad, (inputArr2D, targets, self.__weightDecayParam, self.__sparsity, self.__sparseParam, self), maxiter=maxIter, full_output=True)  # fmin_cg calls grad before cost func
def train_network(X, Y, layers, regularization = 0, max_iters = 200):
    """ Train a neural network and return the model.
    
    Args:
        X (array): data consisting of rows of features. 
        Y (array): array of labels corresponding to each row in X.
            Must consist of integers from 0 to n for some integer n.
        layers (list): the number of features in each layer. The first
            entry must be the number of features (columns) in X, the 
            last must be the number of classes, and those inbetween
            determine the size of each hidden layer.
        regularization (int): penalty factor for having larger weights.
            (defualt: 0).
        max_iters (int): the max number of iterations used by the algorithm
            when searching for optimal weights. A higher number will produce
            a better fit but extends run time (default: 200).
    """
    check_input_validity(X, Y, layers)
    num_classes = layers[-1]
    network = NeuralNetwork(layers)
    initial_weights = flatten_weights(network.initialize_weights()) 
    Y = process_labels(Y, num_classes)
    optimal = fmin_cg(compute_cost, 
                      initial_weights, 
                      back_propogate, 
                      args = (X, Y, network, regularization), 
                      maxiter = max_iters)
    forward_propogate(network.reshape_weights(optimal), X, network)
    network.weights = network.reshape_weights(optimal)
    return NeuralNetModel(network)
Esempio n. 7
0
def read_new_file():
    global Xtest, Ytest, Ysig2, target, X, Y, thetaArgs, theta, EI, yoffset, hyperprior

    filename = filestem + str(rng.randint(10)) + '.txt'
    print 'This is %s' % (filename)
    data = np.loadtxt(filename)
    Xtest = data[:,0:-1] # everything except the last column
    target = data[:,-1]   # just the last column

    D = 1 # dimensionality of search space
    yoffset = 0.0 # to keep track of shifts up and down to y-axis
    initNumSamples = 2 # number of initial inputs
    #xlo,xhi =0.01,4.0 # just the plotting boundary, not a real constrait :(  
    #Xtest = np.arange(xlo,xhi,(xhi-xlo)/100.0) # the inputs we'll keep track of.
    d = len(np.ravel(Xtest))
    Xtest = Xtest.reshape((d,1))


    # take the initial samples
    X = np.zeros((initNumSamples)) 
    Y = np.zeros((initNumSamples,))
    for s in range(initNumSamples):
        i = rng.randint(0,d)
        X[s] = Xtest[i]
        Y[s] = target[i]
    initX,initY = X,Y

    # Here we initialise hyperparameters, and make initial predictions.
    (init_theta, hyperprior) = gp.setAndSampleHyperprior(D) # initial hyperparams
    thetaArgs = (X.reshape(len(X),1),Y,hyperprior) # fmin_cg needs these all in one box.
    theta = fmin_cg(gp.calcNegLogPosterior,init_theta, gp.calcNegGradLogPosterior, [thetaArgs], gtol=1e-2,disp=0)
    Ytest,Ysig2 = gp.calcGPPrediction(theta,thetaArgs,Xtest)
    EI = calcEI(Ytest,Ysig2,np.max(Y))
Esempio n. 8
0
  def train(self, X, y, lmda):
    '''
    Train the neural networks parameters. Call scipy's
    fming_cg function for optimization.

    Arguments:
      X (m x n float matrix): Training examples.
      y (m 1d int array): Outputs of training examples.
      lmda (float): Lambda value for regularization.
    '''
    theta1 = self._rand_init_weights(self.input_layer_size,
                                     self.hidden_layer_size)
    theta2 = self._rand_init_weights(self.hidden_layer_size, 
                                     self.output_layer_size)
    theta = self.unroll(theta1, theta2)
    args = (X, y, lmda)

    res = opt.fmin_cg(self._cost, 
                      x0=theta, 
                      fprime=self._gradient_backpropagation,
                      args=args, 
                      maxiter=50, 
                      disp=False, 
                      full_output=True)
    # Save min cost and thetas.
    self.min_theta1, self.min_theta2 = self.roll(res[0])
    self.min_cost = res[1]
    print('cost {}'.format(self.min_cost))
def train_basis(basis, S, R, S_test, R_test, Mphi, Mrew, patience, 
                    max_iter, weighting, min_imp = 1e-5):
    try:
        n_test_inc = 0
        best_test_loss = numpy.inf
        while (n_test_inc < patience):
            
            basis.set_params( fmin_cg(basis.loss, basis.flat_params, basis.grad,
                                args = (S, R, Mphi, Mrew), 
                                full_output = False,
                                maxiter = max_iter, 
                                gtol = 1e-8) )
            
            err = basis.loss(basis.flat_params, S_test, R_test, Mphi, Mrew) 
            
            if err < (best_test_loss - min_imp):
                n_test_inc = 0
                print 'new best %s loss: ' % basis.loss_type, best_test_loss
            else:
                n_test_inc += 1
                logger.info( 'iters without better %s loss: ' % basis.loss_type, n_test_inc)
            if err < best_test_loss:
                best_test_loss = err
                best_theta = copy.deepcopy(basis.theta)

        basis.theta = best_theta

    except KeyboardInterrupt:
        print '\n user stopped current training loop'

    return basis
Esempio n. 10
0
    def test_cg(self):
        # conjugate gradient optimization routine
        if self.use_wrapper:
            opts = {'maxiter': self.maxiter, 'disp': self.disp,
                    'return_all': False}
            res = optimize.minimize(self.func, self.startparams, args=(),
                                    method='CG', jac=self.grad,
                                    options=opts)
            params, fopt, func_calls, grad_calls, warnflag = \
                res['x'], res['fun'], res['nfev'], res['njev'], res['status']
        else:
            retval = optimize.fmin_cg(self.func, self.startparams,
                                      self.grad, (), maxiter=self.maxiter,
                                      full_output=True, disp=self.disp,
                                      retall=False)
            (params, fopt, func_calls, grad_calls, warnflag) = retval

        assert_allclose(self.func(params), self.func(self.solution),
                        atol=1e-6)

        # Ensure that function call counts are 'known good'; these are from
        # Scipy 0.7.0. Don't allow them to increase.
        assert_(self.funccalls == 9, self.funccalls)
        assert_(self.gradcalls == 7, self.gradcalls)

        # Ensure that the function behaves the same; this is from Scipy 0.7.0
        assert_allclose(self.trace[2:4],
                        [[0, -0.5, 0.5],
                         [0, -5.05700028e-01, 4.95985862e-01]],
                        atol=1e-14, rtol=1e-7)
Esempio n. 11
0
def _fit_cg(f, score, start_params, fargs, kwargs, disp=True,
                maxiter=100, callback=None, retall=False,
                full_output=True, hess=None):
    gtol = kwargs.setdefault('gtol', 1.0000000000000001e-05)
    norm = kwargs.setdefault('norm', np.Inf)
    epsilon = kwargs.setdefault('epsilon', 1.4901161193847656e-08)
    retvals = optimize.fmin_cg(f, start_params, score, gtol=gtol, norm=norm,
                               epsilon=epsilon, maxiter=maxiter,
                               full_output=full_output, disp=disp,
                               retall=retall, callback=callback)
    if full_output:
        if not retall:
            xopt, fopt, fcalls, gcalls, warnflag = retvals
        else:
            xopt, fopt, fcalls, gcalls, warnflag, allvecs = retvals
        converged = not warnflag
        retvals = {'fopt': fopt, 'fcalls': fcalls, 'gcalls': gcalls,
                   'warnflag': warnflag, 'converged': converged}
        if retall:
            retvals.update({'allvecs': allvecs})

    else:
        xopt = None

    return xopt, retvals
Esempio n. 12
0
def run_once(x0,x1):
    sol = fmin_cg(rosen, [x0, x1], retall = True, full_output=1)
    xy = numpy.asarray(sol[-1])
    sam.putarray('xy',xy)
    sam.eval("plot(xy(:,1),xy(:,2),'w-','LineWidth',2)")
    sam.eval("plot(xy(:,1),xy(:,2),'wo','MarkerSize',6)")
    return sol
Esempio n. 13
0
def nnTrain(initial_theta, input_layer_size, hidden_layer_size, num_labels, X, y, lamda=0):
    """
    Trains a neural network, returns theta1, theta2
    
    Given:
    initial_theta: unrolled randomized theta that breaks symmetry
    nn layer sizes
    num_labels: number of output classes (equals K)
    X: m,n (DON'T include bias term in input X)
    y: m,K (vectorized representation for each y)
    lamda
    
    Returns:
    theta1, theta2
    """
    
    results = optimize.fmin_cg( nnCostFunction, 
                                fprime=nnGradientFunction, 
                                x0=initial_theta, 
                                args=(input_layer_size, hidden_layer_size, num_labels, X, y, lamda), 
                                maxiter=50, disp=False, full_output=True )
    theta_optimized = results[0]
    min_cost = results[1]
    rolled_theta_optimized = rolltheta(theta_optimized, input_layer_size, hidden_layer_size, num_labels)
    return rolled_theta_optimized[0], rolled_theta_optimized[1]
Esempio n. 14
0
  def train(self, X, Theta, Y, R, lmda):
    '''
    Train for the collaborative filtering and keep
    learned parameters in instance variable.

    Arguments:
      X (num_movies  x num_features float): Matrix of movies features 
         samples where each row of X corresponds to the feature
         vector x[i] for the i-th movie.
      Theta (num_users  x num_features float): Matrix of user features
        the j-th row of Theta corresponds to one parameter vector theta[j], 
        for the jth user.
      Y (num_movies x num_users float): Stores ratings (from 1 to 5).
      R (): The matrix R is an binary-valued indicator matrix where 
        R[i, j]==1 if user j gave a rating to movie i and.
        R[i, j]==0 if user j didn't give a rating to movie i.
      lmda (float): Regularization parameter lambda.
    '''
    self.min_X = None
    self.min_Theta = None
    self.min_cost = None

    # Initialize theta and args for fmin_cg.
    params = self.unroll(X, Theta)
    args = (Y, R, lmda)

    res = opt.fmin_cg(self.cost, 
                      x0=params, 
                      fprime=self.gradient,
                      args=args, 
                      maxiter=100,
                      disp=False, 
                      full_output=True)
    self.min_X, self.min_Theta = self.roll(res[0])
    self.min_cost = res[1]
Esempio n. 15
0
    def oneVsAll(self, X, y, num_labels, lmd):
        '''
        Trains multiple logistic regression classifiers, on the
        same data.  Training data will have positive data sets
        (matching digit being trained for) and negative data sets
        (other digits).  Each training session uses all data to
        compute one row of theta matrix
        '''
        m = y.shape[0]

        ones = np.ones(m)

        X = np.column_stack((ones, X))

        n = X.shape[1]

        all_theta = np.zeros((num_labels, n))

        for label in xrange(1, num_labels+1):
            match = (y == label)

            init_theta = np.zeros(n)

            # all_theta[label-1,:] = optimize.fmin_cg(self.lrCostFunction, fprime=self.lrGradFunction, x0=init_theta, args=(X, match, lmd), maxiter=200)
            all_theta[label-1,:] = optimize.fmin_cg(self.lrCostFunction, x0=init_theta, args=(X, match, lmd), maxiter=200)

        self.all_theta = all_theta

        return all_theta
Esempio n. 16
0
    def test_cg(self, use_wrapper=False):
        """ conjugate gradient optimization routine """
        if use_wrapper:
            opts = {'maxit': self.maxiter, 'disp': False}
            params, info = optimize.minimize(self.func, self.startparams,
                                             args=(), method='CG',
                                             jac=self.grad, options=opts,
                                             full_output=True,
                                             retall=False)

            fopt, func_calls, grad_calls, warnflag = \
                    info['fun'], info['nfev'], info['njev'], info['status']
        else:
            retval = optimize.fmin_cg(self.func, self.startparams, self.grad, (),
                                      maxiter=self.maxiter,
                                      full_output=True, disp=False, retall=False)

            (params, fopt, func_calls, grad_calls, warnflag) = retval

        err = abs(self.func(params) - self.func(self.solution))
        #print "CG: Difference is: " + str(err)
        assert_(err < 1e-6)

        # Ensure that function call counts are 'known good'; these are from
        # Scipy 0.7.0. Don't allow them to increase.
        assert_(self.funccalls == 9, self.funccalls)
        assert_(self.gradcalls == 7, self.gradcalls)

        # Ensure that the function behaves the same; this is from Scipy 0.7.0
        assert_(np.allclose(self.trace[2:4],
                           [[0, -0.5, 0.5],
                            [0, -5.05700028e-01, 4.95985862e-01]],
                           atol=1e-14, rtol=1e-7), self.trace[2:4])
Esempio n. 17
0
def linear_optimize(X, y, lamda):
    lamda = float(lamda)
    num_samples = float(len(y))
    def cost_function(theta):
        regularization_term = (lamda / (2.0 * num_samples)) * \
        np.sum(np.power(theta[1::], 2))
        cost = (1.0 / (2.0 * num_samples)) * np.sum(
            np.power(np.subtract(np.dot(X, theta), y), 2)) + regularization_term
        return cost
    def gradient_function(theta):
        grad = np.zeros_like(theta)
        grad[0] = (1 / num_samples) * np.sum(
            np.subtract(np.dot(X, theta), y) * X[0::,0])
        for i in range(1, len(grad)):
            reg_term = (lamda / num_samples) * theta[i]
            grad[i] = (1 / num_samples) * np.sum(
                np.subtract(np.dot(X, theta), y) * X[0::,i]) + reg_term
        return grad
    initial_theta = np.array(np.zeros((X.shape[1], 1)), dtype=np.float64)
    return fmin_cg(
        f=cost_function,
        x0=initial_theta,
        fprime=gradient_function,
        maxiter=200,
        disp=0)
Esempio n. 18
0
    def fit(self, x_n3, y_n3):        

        trans_init = (y_n3.mean(axis=0) - x_n3.mean(axis=0))[:2]

        self_copy = deepcopy(self)
        def f(params):
            self_copy.set_params(params)
            xmapped_n3 = self_copy.transform_points(x_n3)
            return fit_score(xmapped_n3, y_n3,.5)


        # vals_params = []
        # for rot_init in rot_inits:
        #     opt_params, opt_val, _, _, _ = opt.fmin_cg(f, np.r_[trans_init],full_output=True)
        #     vals_params.append((opt_val, opt_params))
        # 
        # 
        # best_val, best_params = min(vals_params, key = lambda x:x[0])

        best_params, best_val, _,_,_ = opt.fmin_cg(f, np.r_[trans_init], full_output=True)

        print "best_params:", best_params
        self.set_params(best_params)
        self.objective = best_val

        Globals.setup()
        draw_orig_new_warped_pcs(x_n3, y_n3, self.transform_points(x_n3))
Esempio n. 19
0
def test():
    data = np.loadtxt("data.txt")
    X = data[:,0:-1] # everything except the last column
    y = data[:,-1]   # just the last column

    args = (X,y)

    #theta = np.array([ 1.7657065779589087, -1.3841332550882446, -10.162222605402242])
    #theta = np.array([ 1.7999382115210827, -14.001391904643032 , -5.577578503745549])
    theta = np.zeros(3)
    theta[0] = np.random.normal(0,5)
    theta[1] = np.random.normal(0,5)
    theta[2] = np.random.normal(0,5)
    print theta
    print np.exp(theta)
    print logPosterior(theta,args)
    print gradLogPosterior(theta,args)
    print so.check_grad(logPosterior, gradLogPosterior, theta, args)

    newTheta = so.fmin_cg(logPosterior, theta, fprime=gradLogPosterior, args=[args], gtol=1e-4,maxiter=100,disp=1)
    print newTheta, logPosterior(newTheta,args)

    K = kernel2(X,X,newTheta,wantderiv=False)
    L = np.linalg.cholesky(K)
    beta = np.linalg.solve(L.transpose(), np.linalg.solve(L,y))
    test = X
    #pred = [predict(i,input,K,target,newTheta,L,beta) for i in input]
    #pred = np.squeeze([predict(i,input,K,target,newTheta,L,beta) for i in input])
    demoplot(theta,args)
    demoplot(newTheta,args)
Esempio n. 20
0
def unblur(y, msk, sigma, maxiter=20):
    """
    y should be zero in the mask
    """
    cache = {'xf': None, 'res': None}
    nvoxels = np.sum(1 - msk)
    print nvoxels
    x = np.array(y)
    dom = True - msk

    def residual(xf):
        if not xf is cache['xf']:
            x[dom] = xf
            cache['res'] = blur(x, msk, sigma)[dom] - y[dom]
            cache['xf'] = xf
        return cache['res']

    def callback(xf):
        print error(xf)

    def error(xf):
        return .5 * np.sum(residual(xf) ** 2)

    xf = op.fmin_cg(error, x[dom], fprime=residual, 
                    maxiter=maxiter, callback=callback)
    x[dom] = xf
    return x
Esempio n. 21
0
    def fit(self, x_n3, y_n3, prev_params=None):

        if prev_params is None:
            trans_init = y_n3.mean(axis=0) - x_n3.mean(axis=0)
            rot_inits = [(0, 0, 0)]
        else:
            trans_init = prev_params[:3]
            rot_inits = [prev_params[3:]]

        self_copy = deepcopy(self)
        def f(params):
            self_copy.set_params(params)
            xmapped_n3 = self_copy.transform_points(x_n3)
            return fit_score(xmapped_n3, y_n3,.5)


        vals_params = []
        for rot_init in rot_inits:
            opt_params, opt_val, _, _, _ = opt.fmin_cg(f, np.r_[trans_init, rot_init],full_output=True)
            vals_params.append((opt_val, opt_params))


        best_val, best_params = min(vals_params, key = lambda x:x[0])
        print "best_params:", best_params
        self.set_params(best_params)
        self.objective = best_val

        Globals.setup()
        draw_orig_new_warped_pcs(x_n3, y_n3, self.transform_points(x_n3))
def fit(X,y,maxiter = 50,method = 'TNC',lam = 0.1):
    no_of_rows = X.shape[0]
    no_of_features = X.shape[1]
    no_of_labels = len(set(y))
    fit_theta = np.zeros((no_of_labels,no_of_features+1))
    #adding a vector of ones to the X matrix(as the first column) - bias terms for each training exmaples
    X = np.insert(X,0,1,axis=1)
    
    initial_theta = np.zeros((no_of_features+1,1))

    for i in range(no_of_labels):
        temp_y = (y == (i)) + 0 # here labels are 0,1,2,3.. if they are 1,2,3,4... use: temp_y = (y == (i+1))+0
        #temp_y is a vector of size no_of_training_examples
        #since each iteration corresponds to finding theta for a single class (one-vs-all)
        #each time, we only take the predection of class 'i'on all training example
        
        _res = optimize.fmin_cg(returnJ, fprime=returnThetaGrad,x0 = initial_theta,args=(X, temp_y,lam), maxiter=50, disp=False, full_output=True)
        fit_theta[i,:] = _res[0]  
        """
        different minimization functions (above and below)
        """
        #options = {'maxiter': maxiter}
        #_res = optimize.minimize(returnJ, initial_theta, jac=returnThetaGrad, method=method,args=(X, temp_y,lam), options=options)        
        #fit_theta[i,:] = _res.x

    return fit_theta
Esempio n. 23
0
	def train(self, myiter = 200):
		print "training..."
		init_theta = self.get_theta()
		theta = fmin_cg(self.cost, init_theta, args = (self.X, self.y, self.LAMBDA), 
			fprime = self.grad, maxiter = myiter)

		return theta
 def optimize(self, theta, maxfun, revert=False):
     '''real executing function'''
     self.size = len(theta)
     self.revert = revert
     initials = []
     index = 0
     self.feature2index, self.index2feature = {}, {}
     for feature, value in theta.items():
         self.feature2index[feature] = index
         self.index2feature[index] = feature
         initials.append(float(value))
         index += 1
     if self.method == "LBFGS":
         from scipy.optimize import fmin_l_bfgs_b
         # http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.fmin_l_bfgs_b.html
         (xopt, fopt, return_status) = fmin_l_bfgs_b(self.value_translator, initials, self.gradient_translator,
                                                     pgtol=0.1, maxfun=maxfun)
         # print "============Optimization by LBFGS returns: ", return_status['task']
     elif self.method == "CG":
         from scipy.optimize import fmin_cg
         # http://www.scipy.org/doc/api_docs/SciPy.optimize.optimize.html#fmin_cg
         (xopt, fopt, _, _, return_status) = fmin_cg(self.value_translator, initials, self.gradient_translator,
                                                     full_output=1, disp=0)
         # print "============CG: ", return_status
     else:
         raise Exception("No optimization method defined!")
     self.size = None
     for i, x in enumerate(xopt):
         theta[self.index2feature[i]] = x
     return (fopt, theta, return_status)
Esempio n. 25
0
def run_once(x0,x1):
    sol = fmin_cg(rosen, [x0, x1], retall = True, full_output=1)
    xy = numpy.asarray(sol[-1])
    pylab.plot(xy[:,0],xy[:,1],'w-',linewidth=2)
    pylab.plot(xy[:,0],xy[:,1],'wo',markersize=6)
    show()
    return sol
Esempio n. 26
0
    def fit(self, data, targets):
        if type(data) is not np.ndarray:
            data = np.array(data)
        if type(targets) is not np.ndarray:
            targets = np.array(targets)
        assert data.ndim == 2
        assert targets.ndim == 1
        assert len(data) == len(targets)

        # Turn 1D targets array into a 2D array
        targets_2D = np.zeros((self._layer_sizes[-1], len(targets)))
        for obs_number, label in enumerate(targets):
            # watch out! observations go down columns here
            targets_2D[label, obs_number] = 1

        data = data.transpose()
        def obj(Thetas_vec):
            return calc_cost(data, targets_2D, self._lmbda,
                             unflatten(Thetas_vec, self._layer_sizes))

        def obj_grad(Thetas_vec):
            curr_Thetas = unflatten(Thetas_vec, self._layer_sizes)
            targets_est, As, Zs = feedforward_full(curr_Thetas, data)
            deltas = backprop(curr_Thetas, Zs, targets_est, targets_2D)
            grads = calc_grads(deltas, As, self._lmbda, curr_Thetas)

            return flatten(grads)

        f_prime = get_obj_grad(layer_sizes, features, classes, lmbda)
        min_thetas = fmin_cg(obj, flatten(init_thetas), f_prime, maxiter=max_iter)
        self._Thetas = min_thetas
        return self
Esempio n. 27
0
def fit(Y, R, alpha, n):
    """
    Fits the parameters of the collaborative filtering model

    Arguments
    ----------
    Y: mxu rating matrix
    R: mxu i has been rated by j boolean matrix
    n: Number of features.
    alpha: regularization parameter controls model complexity.

    Return
    ----------
    (X,Theta)
    X: mxn feature matrix
    Theta: uxn weight matrix
    """
    m, u = Y.shape
    p = np.random.random((m + u) * n)

    # minimize cost function
    costf = lambda x: cost(x, Y, R, alpha)
    gradf = lambda x: grad(x, Y, R, alpha)
    p = fmin_cg(costf, p, fprime=gradf, maxiter=100, disp=False)

    # unroll parameters
    X = np.resize(p[:m * n], (m, n))
    Theta = np.resize(p[m * n:], (u, n))

    return (X, Theta)
Esempio n. 28
0
    def train_cg(self, input, target, **kwargs):
        """
        Train network with conjugate gradient algorithm.

        :Parameters:
            input : 2-D array
                Array of input patterns
            target : 2-D array
                Array of network targets
            maxiter : integer, optional
                Maximum number of iterations (default is 10000)
            disp : bool
                If True convergence method is displayed (default)

        .. seealso::
            `scipy.optimize.fmin_cg` optimizer is used in this method. Look
            at its documentation for possible other useful parameters.
        """
        if 'maxiter' not in kwargs: kwargs['maxiter'] = 10000
        input, target = self._setnorm(input, target)
        func = netprop.func
        fprime = netprop.grad
        extra_args = (self.conec, self.bconecno, self.units, \
                           self.inno, self.outno, input, target)
        self.weights = optimize.fmin_cg(func, self.weights, fprime=fprime, \
                                        args=extra_args, **kwargs)
        self.trained = 'cg'
Esempio n. 29
0
 def test_logreg_grad(self):
     ex1 = ml.logreg_grad(np.zeros(3), self.ex2data1[0], self.ex2data1[1])
     ex2 = optimize.fmin_cg(ml.logreg_cost, np.ones(3),
                            args=(self.ex2data1[0], self.ex2data1[1]),
                            fprime = ml.logreg_grad)
     self.assertEqual(round(np.sum(ex1),1), -23.4)
     self.assertEqual(round(np.sum(ex2), 2), -24.75)
  def train(self, lmda):
    '''
    Train using training examples, call scipy's fming_cg
    function for optimization.

    Arguments:
      lmda (float): Lambda value for regularization.

    Return:
      (float): Cost.
      (1d float array): Minimum thetas.
    '''
    # Reset before training.
    self._min_theta = None
    self._costs = None

    # Initialize theta and args for fmin_cg.
    theta = np.zeros(self._X.shape[0])
    args = (self._X, self._y, lmda)

    res = opt.fmin_cg(LinearRegressionRegularized._cost, 
                      x0=theta, 
                      fprime=LinearRegressionRegularized._gradient,
                      args=args, 
                      maxiter=200, 
                      disp=False, 
                      full_output=True)
    self._min_theta = res[0]
    self._costs = res[1]
Esempio n. 31
0
            gradient[i][j] = derivative
            j += 1
        i += 1
    val = gradient.flatten()
    print(val.shape)
    return val


Y, W, X_prim = generateData()
W0 = np.ones([10, 2])
W0 = W0.flatten().T
W0 = np.random.randn(20)
W0 = W0.reshape((10, 2)).flatten().T
args = Y

W_star = opt.fmin_cg(f, W0, fprime=dfx, args=(Y, ))  # x_star = A_star

W_star = W_star.reshape((2, 10)).T

inner = np.linalg.inv(np.dot(W_star.T, W_star))
X_approx = np.dot(Y, np.dot(W_star, inner))
print(X_approx[50:60])
print(X_prim[50:60])
line1, = plt.plot(X_approx[:, 0],
                  X_approx[:, 1],
                  label="Learned $X'$",
                  color='b')
line2, = plt.plot(X_prim[:, 0], X_prim[:, 1], label="True $X'$", color='r')
fl = plt.legend(handles=[line1], loc=1)
ax = plt.gca().add_artist(fl)
plt.legend(handles=[line2], loc=4)
Esempio n. 32
0
m, n = X_pf.shape
theta_pf = np.zeros((n + 1, 1))
#print theta_pf.shape
result = op.minimize(computeCost,
                     theta_pf,
                     method='TNC',
                     jac=Gradient,
                     args=(X_pf, Y),
                     options={'maxiter': 200})
print result
r = op.fmin_bfgs(computeCost2, theta_pf, args=(X_pf, Y), maxiter=40)
print r
print r[0]
theta_pf = r

print op.fmin_cg(computeCost2, theta_pf, maxiter=200, args=(X_pf, Y))
#theta_pf=result.x

theta_pf = theta_pf.reshape((n + 1, 1))
#print theta_pf
''' ### Plotting the Linear Regulization line ### '''

scatter(X, Y, marker='x')
plt.xlabel('Change in water levels(X)')
plt.ylabel('Water flowing out of the dam(Y)')
theta_pfR = np.zeros((1, 1))
theta_pfR = np.append(theta_pfR, theta_pf, axis=0)
X_pfR = np.append(np.ones((X_pf.shape[0], 1)), X_pf, axis=1)
x = np.arange(X.min() - 15, X.max() + 25, 0.05)
x = x.reshape((x.size, 1))
x_poly = polymap(x, p)
Esempio n. 33
0
def logistic_reg(x, y, theta, l=0, verbose=0, method='g'):
    """
  Determines theta vector for a given polynomial degree and lambda
  x is a panda DataFrame
  y is a panda DataFrame
  l = 0: regularization coefficient / default is no regularization
  Methods for cost function minimization (default is gradient descent):
    'g': gradient descent
    'cg': conjugate gradient
    'bfgs': BFGS (Broyden Fletcher Goldfarb Shanno)
  """
    # Number of features
    n = x.shape[1]
    # Number of training set examples
    m = x.shape[0]
    # Number of classes
    K = y.shape[1]

    if len(theta[1]) != n + 1:
        print "In logistic_reg.py:\nproblem of dimension between number of features and number of parameters !!"
        print "Number of features:", n
        print "Length of theta vector:", len(theta[1])
        sys.exit()

    for k in range(1, K + 1):
        theta[k] = np.array(theta[k], dtype=float)
        CF = CostFunction(x, y.values[:, k - 1], l)

        if verbose:
            if n == 1:
                from PdF_log_reg import hypothesis_function
                syn, hyp = hypothesis_function(x.min(), x.max(), theta[k])
                plot_hyp_func(x, y[k], syn, hyp)
            if n == 2:
                plot_db(x,
                        y[k],
                        theta[k],
                        lim=3,
                        title='Initial decision boundary')
            if n == 3:
                plot_db_3d(x,
                           y[k],
                           theta[k],
                           lim=3,
                           title='Initial decision boundary')

        stop = 10**-3
        if method == 'cg':
            # Conjugate gradient
            from scipy.optimize import fmin_cg
            theta[k], allvecs = fmin_cg(CF.compute_cost,
                                        theta[k],
                                        fprime=CF.compute_gradient,
                                        gtol=stop,
                                        disp=verbose,
                                        retall=True)
        elif method == 'bfgs':
            # BFGS (Broyden Fletcher Goldfarb Shanno)
            from scipy.optimize import fmin_bfgs
            theta[k], allvecs = fmin_bfgs(CF.compute_cost,
                                          theta[k],
                                          fprime=CF.compute_gradient,
                                          gtol=stop,
                                          disp=verbose,
                                          retall=True)
        elif method == 'g':
            # Gradient descent
            theta[k], min_cost = gradient_descent(CF, theta[k], opt=0)
            allvecs = None

        if verbose:
            if allvecs:
                min_cost = []
                for vec in allvecs:
                    min_cost.append(CF.compute_cost(vec))
            nb_iter = len(min_cost)
            plot_cost_function(nb_iter, min_cost)
            plt.show()

    if verbose:
        if n == 1 and K == 1:
            from PdF_log_reg import hypothesis_function
            syn, hyp = hypothesis_function(x.min(), x.max(), theta[1])
            plot_hyp_func(x, y[1], syn, hyp)
        if n == 2:
            if K != 1:
                plot_multiclass_2d(x, theta)
            else:
                plot_db(x, y, theta[1], title='Decision boundary')
        if n == 3:
            if K != 1:
                plot_multiclass_3d(x, theta)
            else:
                plot_db_3d(x, y, theta[1], title='Decision boundary')
        plt.show()

    return theta
Esempio n. 34
0
                                       hidden_layer_size,
                                       num_labels,
                                       Xtraining,
                                       ytraining,
                                       lam,
                                       returnType='J')
gradFunc = lambda p: nnCostFunctionVec(p,
                                       input_layer_size,
                                       hidden_layer_size,
                                       num_labels,
                                       Xtraining,
                                       ytraining,
                                       lam,
                                       returnType='grad')

nn_params = optimize.fmin_cg(costFunc, nn_params, fprime=gradFunc, maxiter=500)

Theta1 = nn_params[0:hidden_layer_size * (input_layer_size + 1)].reshape(
    hidden_layer_size, input_layer_size + 1, order='F')
Theta2 = nn_params[(hidden_layer_size * (input_layer_size + 1)):].reshape(
    num_labels, (hidden_layer_size + 1), order='F')

displayData(Theta1[:, 1:])

pred = predict(Theta1, Theta2, Xtraining)
pred = pred.reshape(pred.size, 1)

trainingAccuracy = np.mean(pred == ytraining) * 100

print("Training accuracy: ", trainingAccuracy)
Esempio n. 35
0
    nIter = 100
    nCall = 1e4
    comsci = True  # compare vs SciPy
    plotting = False  # plot the result

    ### Solver call
    xs,fs,ct,Xs,it = cg(fcn,x0,nCall,\
                        lin=0,nIter=nIter)

    print "f(xs)=%f" % fs
    print "calls=%d" % ct
    print "iter=%d" % it

    # Scipy call
    if comsci == True:
        res = fmin_cg(fcn, x0, retall=True)

    ### Plotting
    if plotting == True:
        # Define meshgrid
        delta = 0.025
        x = np.arange(min(x0[0],xstar[0])-0.5, \
                      max(x0[0],xstar[0])+0.5, delta)
        y = np.arange(min(x0[1],xstar[1])-0.5, \
                      max(x0[1],xstar[1])+0.5, delta)
        X, Y = np.meshgrid(x, y)
        dim = np.shape(X)
        # Compute function values
        Xv = X.flatten()
        Yv = Y.flatten()
        Input = zip(Xv, Yv)
    
    return J

def gradf(params,*args):
    X_train,y_train = args
    m,n = X_train.shape
    theta = params.reshape(-1,1)
    h = out(X_train,theta)
    grad = np.zeros((X_train.shape[1],1))
    grad = X_train.T.dot((h-y_train)) / m
    g = grad.ravel()
    return g

#res = optimize.minimize(f,x0=init_theta,args=args,method='BFGS',jac=gradf,\
#                        options={'gtol': 1e-6, 'disp': True})
res = optimize.fmin_cg(f,x0=params,fprime=gradf,args=args,maxiter=500)
print(res)

#可视化一下线性的决策边界
label = np.array(y)
index_0 = np.where(label.ravel()==0)
plt.scatter(X[index_0,1],X[index_0,2],marker='x'\
            ,color = 'b',label = 'Not admitted',s = 15)
index_1 =np.where(label.ravel()==1)
plt.scatter(X[index_1,1],X[index_1,2],marker='o',\
            color = 'r',label = 'Admitted',s = 15)

#show the decision boundary
x1 = np.arange(20,100,0.5)
x2 = (- res[0] - res[1]*x1) / res[2]
plt.plot(x1,x2,color = 'black')
Esempio n. 37
0
grid_size = int((x_max - x_min) * resolution)
grid = np.zeros((grid_size + 1, 2))
grid[0, 0] = np.rint(np.log(x_min / (a * (z**(-1.0 / 3.0)))) / b + 1)
for i in np.arange(1, grid_size + 1):  #Set box bounds
    grid[i, 0] = i * (x_max - x_min) / grid_size  #UNITS: a.u.
    grid[i,
         0] = np.log(grid[i, 0] /
                     (a * (z**(-1.0 / 3.0)))) / b + 1  #Converts to g.p units
    grid[i, 0] = np.rint(grid[i, 0])  #Ensures integer values of g.p

from scipy import optimize

terms = len(coeff_vector)
for jj in range(new_terms + 1):
    print 'entering jj'
    opt_coeffs = optimize.fmin_cg(err_check,
                                  coeff_vector,
                                  fprime=slope_find,
                                  epsilon=0.00001)
    coeff_vector = np.append(opt_coeffs, np.array([0.0]))
    print coeff_vector

print 'final coeff_vector'
print coeff_vector[0:-1]
print 'final error = %s' % err_check(coeff_vector[0:-1])
print 'Total inconsistent slopes: %s' % inconsistent_slope_count[0]
x = np.linspace(x_min, x_max, grid_size + 1)
w = (x_max - x_min) / (grid_size + 2)
plt.bar(x, grid[:, 1], width=w)
#plt.title('Atomic #: ' + str(z) +'   Er_stat=' + str(Er_stat))
plt.show()
Esempio n. 38
0
for N_hdn in (1, 3, 10, 50):
    print("START:", N_hdn)

    w = np.random.randn((N_in + 1) * N_hdn + (N_hdn + 1) * N_out) * np.sqrt(10)
    # 確率的勾配降下法
    #    Err_bk=0
    #    Err=J(w,*(x_t,y_t))
    #    i=0
    #    while(np.fabs(Err-Err_bk)>1e-7 and i < 50000):
    #        w = w - 0.1*gradient(w,*(x_t,y_t))/(N_hdn)
    #        Err_bk=Err
    #        Err=J(w,*(x_t,y_t))
    #        if(i%500==0):print(i,Err)
    #        i+=1
    # 共役勾配法
    w = optimize.fmin_cg(J, w, fprime=gradient, args=(x_t, y_t))
    #w = optimize.fmin_bfgs(J, w, fprime=gradient, args=(x_t,y_t))
    #w = optimize.fmin_cg(J, w, args=args,gtol=0)

    w_l1 = w[:(N_in + 1) * N_hdn].reshape(N_in + 1, N_hdn)
    w_l2 = w[(N_in + 1) * N_hdn:].reshape(N_hdn + 1, N_out)

    # プロット
    xd = np.arange(-0., 1.001, 0.001)
    yd = np.zeros(xd.shape)
    zd = np.zeros([xd.shape[0], N_hdn + 1])

    plt.subplot(2, 2, grpNo)
    grpNo += 1
    plt.plot(x_t, y_t, 'o')
    for i in range(xd.shape[0]):
Esempio n. 39
0
def train(X, y, reg):
    args = (X, y, reg)
    inital_theta = np.zeros((X.shape[1], 1))
    params = inital_theta.ravel()
    res = optimize.fmin_cg(f, x0=params, fprime=gradf, args=args, maxiter=500)
    return res
Esempio n. 40
0
from models.optimizers import ConjugateGradientAlgorithm
from functions import functionObj, exercise61
from scipy.optimize import fmin_cg
import autograd.numpy as np

x_0 = np.zeros(16)
f_x = exercise61

print('-----------Non-linear Conjugate from Scipy-----------')

f_x_obj = functionObj(f_x)

x_min, f_min, _, _, _, = fmin_cg(f_x_obj, x_0, full_output=True)
x_min = f_x_obj.best_x
f_min = f_x_obj.best_f
print('X: ', x_min)
print('F: ', f_min)
print('Function evals: %d' % (f_x_obj.fevals))

print('-----------ConjugateDescentAlgorithm-----------')
f_x_obj = functionObj(f_x)

opt = ConjugateGradientAlgorithm(f_x_obj, x_0, 1e3, xtol=1e-6)
opt.find_min()
x_min = f_x_obj.best_x._value
f_min = f_x_obj.best_f._value
print('X: ', x_min)
print('F: ', f_min)
print('Function evals: %d' % (f_x_obj.fevals))
Esempio n. 41
0
# Train NN
_lambda = 1

# Debug
c = 0
def debug(_):
  global c
  c = c + 1
  print("Iteration #{c}".format(**globals()))

print('Training...')
nn_params = fmin_cg(
              nn_cost_function(input_layer_size, hidden_layer_size, num_labels, X, Y, _lambda),
              init_nn_params,
              fprime=nn_gradients(input_layer_size, hidden_layer_size, num_labels, X, Y, _lambda),
              maxiter=50,
              callback=debug
            )

# Reshape
theta_1 = nn_params[0:(hidden_layer_size*(input_layer_size+1))]. \
            reshape([hidden_layer_size, input_layer_size+1])
theta_2 = nn_params[(hidden_layer_size*(input_layer_size+1)):]. \
            reshape([num_labels, hidden_layer_size+1])

# Display data
display_data(theta_1[:, 1:], save=True, file_name='2.png')

# Estimate performance
a = forward_prop(X)(theta_1, theta_2)['a'][-1]
Esempio n. 42
0
    point = [1234., -500., 10., 0.001]  # both cg and nm does fine
    point = [1000, -100, 0, 1]  # cg will do badly on this one
    # this will try nelder-mead from an unconverged DE solution
    #point = dstepmon.x[-150]
    #
    simplex, esow = Monitor(), Monitor()
    solver = fmin(len(point))
    solver.SetInitialPoints(point)
    solver.SetEvaluationMonitor(esow)
    solver.SetGenerationMonitor(simplex)
    solver.Solve(cost_function, CRT())
    sol = solver.Solution()

    print "\nsimplex solution: ", sol
    #
    solcg = fmin_cg(cost_function, point)
    print "\nConjugate-Gradient (Polak Rubiere) : ", solcg
    #
    if leastsq:
        sollsq = leastsq(vec_cost_function, point)
        sollsq = sollsq[0]
        print "\nLeast Squares (Levenberg Marquardt) : ", sollsq
    #
    legend = [
        'Noisy data', 'Differential Evolution', 'Nelder Mead', 'Polak Ribiere'
    ]
    plot_noisy_data()
    plot_sol(desol, 'r-')
    plot_sol(sol, 'k--')
    plot_sol(solcg, 'b-.')
    if leastsq:
Esempio n. 43
0
def optimizeTheta(mytheta,myX,myy,mylambda=0.):
    result = optimize.fmin_cg(computeCost, fprime=costGradient, x0=mytheta, \
                              args=(myX, myy, mylambda), maxiter=50, disp=False,\
                              full_output=True)
    return result[0], result[1]
def recommender():
    cur = mysql.connection.cursor()
    num_places = 48
    reg_param = 30  #regularisation function

    #contains the ratings given by the users to different places
    cur.execute("SELECT * FROM ratings")
    rate = cur.fetchall()
    num = len(rate)
    ratings = [[0 for i in range(num)] for j in range(48)]

    #transpose the matrix
    for i in range(num):
        for j in range(48):
            ratings[j][i] = rate[i][j]

    num_users = num

    #did_rate checks if a user has rated a places
    did_rate = [[0 for i in range(num_users)] for j in range(num_places)]
    for i in range(num_places):
        for j in range(num_users):
            if ratings[i][j] != 0:
                did_rate[i][j] = 1

    ratings = np.array(ratings)
    #print(did_rate)
    #print()
    #return str(ratings)
    #Normalize our data
    #Normalization makes the average of the data as a 0
    ratings, ratings_mean = normalize_rat(ratings, did_rate)
    #return str(ratings_mean)
    #print(ratings)

    #update the number of users
    num_users = ratings.shape[1]
    num_features = 13

    #how much of each feature is present
    cur.execute("SELECT * FROM place_features")
    place_features = cur.fetchall()

    place_features = np.array(place_features)
    place_features, place_mean = normalize(place_features)
    #print('place_features')
    #print(place_features)
    #print()
    #return str(place_features)

    #what kind of place a user would prefer
    cur.execute("SELECT * FROM user_prefs")
    user_prefs = cur.fetchall()
    user_prefs = np.array(user_prefs)
    user_prefs, user_mean = normalize(user_prefs)
    #print("user_prefs")
    #print(user_prefs)
    #return str(user_prefs)

    #X=place features and theta=user pref y=X*theta
    initial_X_and_theta = r_[place_features.T.flatten(),
                             user_prefs.T.flatten()]

    #return str(initial_X_and_theta)
    #we are going to use gradient descent
    #performing gradient descent
    minimized_cost_and_optimal_params = optimize.fmin_cg(
        calculate_cost,
        fprime=calculate_gradient,
        x0=initial_X_and_theta,
        args=(ratings, did_rate, num_users, num_places, num_features,
              reg_param),
        maxiter=100,
        disp=True,
        full_output=True)
    #return str(minimized_cost_and_optimal_params)
    cost, optimal_place_features_and_user_prefs = minimized_cost_and_optimal_params[
        1], minimized_cost_and_optimal_params[0]
    #return str(optimal_place_features_and_user_prefs)
    place_features, user_prefs = unroll_params(
        optimal_place_features_and_user_prefs, num_users, num_places,
        num_features)
    #print(place_features)
    #return str(place_features)

    all_predictions = place_features.dot(user_prefs.T)
    #return str(ratings_mean)

    predictions_for_user = all_predictions[:, 0:1] + ratings_mean
    #print("Final ratings I would give to the Places:")
    #return str(all_predictions)
    final_output = []
    for i in range(num_places):
        final_output.append([predictions_for_user[i], i + 1])
    final_output.sort(reverse=True)

    #return str(final_output[1][0])
    cur.execute("SELECT * FROM place_id")
    place_id = cur.fetchall()

    cur.execute("DROP TABLE results")
    cur.execute("CREATE TABLE results (place VARCHAR(32))")
    #return str(final_output)
    for i in range(10):
        cur.execute("SELECT place from place_id WHERE id =" +
                    str(final_output[i][1]))
        #return str(final_output[i][1])
        ans = cur.fetchone()
        #return str(ans)
        m = str(ans[0])
        #return str(m)
        cur.execute("INSERT INTO results (place) VALUES(%s)", [str(m)])
        #return str(pl)
        #print(final_output[i][0]," ",final_output[i][1])
    #print(predictions_for_user)
    #print(len(predictions_for_user))

    cur.execute("SELECT * FROM results")
    userDetails = cur.fetchall()
    mysql.connection.commit()
    cur.close()
    return render_template('users.html', userDetails=userDetails)
Esempio n. 45
0
def minimize(func,
             x0,
             gradient=None,
             hessian=None,
             algorithm="default",
             verbose=False,
             **args):
    r"""
    This function is an interface to a variety of algorithms for computing
    the minimum of a function of several variables.

    INPUT:

    - ``func`` -- Either a symbolic function or a Python function whose
      argument is a tuple with `n` components

    - ``x0`` -- Initial point for finding minimum.

    - ``gradient`` -- Optional gradient function. This will be computed
      automatically for symbolic functions.  For Python functions, it allows
      the use of algorithms requiring derivatives.  It should accept a
      tuple of arguments and return a NumPy array containing the partial
      derivatives at that point.

    - ``hessian`` --  Optional hessian function. This will be computed
      automatically for symbolic functions. For Python functions, it allows
      the use of algorithms requiring derivatives. It should accept a tuple
      of arguments and return a NumPy array containing the second partial
      derivatives of the function.

    - ``algorithm`` -- String specifying algorithm to use. Options are
      ``'default'`` (for Python functions, the simplex method is the default)
      (for symbolic functions bfgs is the default):

       - ``'simplex'`` -- using the downhill simplex algorithm

       - ``'powell'`` -- use the modified Powell algorithm

       - ``'bfgs'`` -- (Broyden-Fletcher-Goldfarb-Shanno) requires gradient

       - ``'cg'`` -- (conjugate-gradient) requires gradient

       - ``'ncg'`` -- (newton-conjugate gradient) requires gradient and hessian

    - ``verbose`` -- (optional, default: False) print convergence message

    .. NOTE::

        For additional information on the algorithms implemented in this function,
        consult SciPy's `documentation on optimization and root
        finding <https://docs.scipy.org/doc/scipy/reference/optimize.html>`_

    EXAMPLES:

    Minimize a fourth order polynomial in three variables (see the
    :wikipedia:`Rosenbrock_function`)::

        sage: vars = var('x y z')
        sage: f = 100*(y-x^2)^2+(1-x)^2+100*(z-y^2)^2+(1-y)^2
        sage: minimize(f, [.1,.3,.4]) # abs tol 1e-6
        (1.0, 1.0, 1.0)

    Try the newton-conjugate gradient method; the gradient and hessian are
    computed automatically::

        sage: minimize(f, [.1, .3, .4], algorithm="ncg") # abs tol 1e-6
        (1.0, 1.0, 1.0)

    We get additional convergence information with the `verbose` option::

        sage: minimize(f, [.1, .3, .4], algorithm="ncg", verbose=True)
        Optimization terminated successfully.
        ...
        (0.9999999..., 0.999999..., 0.999999...)

    Same example with just Python functions::

        sage: def rosen(x): # The Rosenbrock function
        ....:    return sum(100.0r*(x[1r:]-x[:-1r]**2.0r)**2.0r + (1r-x[:-1r])**2.0r)
        sage: minimize(rosen, [.1,.3,.4]) # abs tol 3e-5
        (1.0, 1.0, 1.0)

    Same example with a pure Python function and a Python function to
    compute the gradient::

        sage: def rosen(x): # The Rosenbrock function
        ....:    return sum(100.0r*(x[1r:]-x[:-1r]**2.0r)**2.0r + (1r-x[:-1r])**2.0r)
        sage: import numpy
        sage: from numpy import zeros
        sage: def rosen_der(x):
        ....:    xm = x[1r:-1r]
        ....:    xm_m1 = x[:-2r]
        ....:    xm_p1 = x[2r:]
        ....:    der = zeros(x.shape, dtype=float)
        ....:    der[1r:-1r] = 200r*(xm-xm_m1**2r) - 400r*(xm_p1 - xm**2r)*xm - 2r*(1r-xm)
        ....:    der[0] = -400r*x[0r]*(x[1r]-x[0r]**2r) - 2r*(1r-x[0])
        ....:    der[-1] = 200r*(x[-1r]-x[-2r]**2r)
        ....:    return der
        sage: minimize(rosen, [.1,.3,.4], gradient=rosen_der, algorithm="bfgs") # abs tol 1e-6
        (1.0, 1.0, 1.0)
    """
    from sage.structure.element import Expression
    from sage.ext.fast_callable import fast_callable
    import numpy
    from scipy import optimize
    if isinstance(func, Expression):
        var_list = func.variables()
        var_names = [str(_) for _ in var_list]
        fast_f = fast_callable(func, vars=var_names, domain=float)
        f = lambda p: fast_f(*p)
        gradient_list = func.gradient()
        fast_gradient_functions = [
            fast_callable(gradient_list[i], vars=var_names, domain=float)
            for i in range(len(gradient_list))
        ]
        gradient = lambda p: numpy.array(
            [a(*p) for a in fast_gradient_functions])
    else:
        f = func

    if algorithm == "default":
        if gradient is None:
            min = optimize.fmin(f, [float(_) for _ in x0],
                                disp=verbose,
                                **args)
        else:
            min = optimize.fmin_bfgs(f, [float(_) for _ in x0],
                                     fprime=gradient,
                                     disp=verbose,
                                     **args)
    else:
        if algorithm == "simplex":
            min = optimize.fmin(f, [float(_) for _ in x0],
                                disp=verbose,
                                **args)
        elif algorithm == "bfgs":
            min = optimize.fmin_bfgs(f, [float(_) for _ in x0],
                                     fprime=gradient,
                                     disp=verbose,
                                     **args)
        elif algorithm == "cg":
            min = optimize.fmin_cg(f, [float(_) for _ in x0],
                                   fprime=gradient,
                                   disp=verbose,
                                   **args)
        elif algorithm == "powell":
            min = optimize.fmin_powell(f, [float(_) for _ in x0],
                                       disp=verbose,
                                       **args)
        elif algorithm == "ncg":
            if isinstance(func, Expression):
                hess = func.hessian()
                hess_fast = [[
                    fast_callable(a, vars=var_names, domain=float) for a in row
                ] for row in hess]
                hessian = lambda p: [[a(*p) for a in row] for row in hess_fast]
                from scipy import dot
                hessian_p = lambda p, v: dot(numpy.array(hessian(p)), v)
                min = optimize.fmin_ncg(f, [float(_) for _ in x0], fprime=gradient, \
                      fhess=hessian, fhess_p=hessian_p, disp=verbose, **args)
    return vector(RDF, min)
Esempio n. 46
0
    #val = N*0.5*(np.trace(np.dot(inv,delta)) + np.trace(np.dot(np.dot(np.dot(-inv,delta),inv),S)))
    val = np.reshape(val, (D * L, ))
    #print(val)
    return val


#main
D = 10
L = 2
N = 100
x, Y_pure = generateData()
#print(Y_pure.shape)
sig = [0.1, 0.4, 1]
W0 = np.reshape(random.rand(20), (D, L))
print(W0)
for i in range(len(sig)):
    sigma = sig[i]
    Y = Y_pure
    #Y = Y_pure + random.multivariate_normal(np.zeros((D)), sigma*np.eye(D), N)
    Wstar = opt.fmin_cg(f, W0, fprime=dfx)
    #print(Wstar)
    Wstar = np.reshape(Wstar, (D, L))

    square = np.dot(np.transpose(Wstar), Wstar)
    t_recover = np.dot(np.dot(Y, Wstar), np.linalg.inv(square))
    #print(t_recover.shape)
    plt.figure()
    dot = plt.plot(t_recover[:, 0], t_recover[:, 1], 'r.')
    plt.title('the retrieve latent X with sigma: ' + str(sigma))
    plt.legend([dot], ["Recovered X"])
    plt.show()
Esempio n. 47
0
    else:
        ergs,frcs= gather_smd_data(maindir)

    if fmethod in ('test','TEST') and potential in ('NN'):
        NN.init(maindir,params,sample_dirs,samples,nprcs,fmatch \
                ,ergrefs,frcrefs,fmethod,parfile,runmode \
                ,rcut,pranges,vranges)

    output_energy_relation(ergs,ergrefs,samples,sample_dirs,fname='out.erg.pmd-vs-dft.ini')
    output_force_relation(frcs,frcrefs,samples,sample_dirs,fname='out.frc.pmd-vs-dft.ini')

    if fmethod in ('cg','CG','conjugate-gradient'):
        print '>>>>> conjugate-gradient was selected.'
        if gradient in ('numerical'):
            solution= opt.fmin_cg(func,vars,args=(maindir,)
                                  ,maxiter=niter,disp=True
                                  ,epsilon=eps,gtol=gtol)
        else:
            if potential in ('linreg'):
                solution= opt.fmin_cg(func,vars,args=(maindir,)
                                      ,fprime=grad_linreg
                                      ,maxiter=niter,disp=True
                                      ,gtol=gtol)
            elif potential in ('NN'):
                solution= opt.fmin_cg(func,vars \
                                      ,args=(maindir,) \
                                      ,fprime=NN.grad \
                                      ,maxiter=niter,disp=True \
                                      ,gtol=gtol)
        print ' CG solution:',solution
Esempio n. 48
0
def fprime(xk, f, epsilon, *args):
    f0 = f(*((xk, ) + args))
    grad = np.zeros((xk.shape), float)
    ei = np.zeros((xk.shape), float)
    for i in range(xk.shape[0]):
        ei[i, -1] = 1.0
        d = np.multiply(epsilon, ei)
        grad[i, -1] = (f(*((xk + d, ) + args)) - f0) / d[i, -1]
        ei[i, -1] = 0.0
    return grad


if __name__ == "__main__":
    fname = "../datas/DLBCL-Stanford/DLBCL-Stanford.mat"
    fname = "../datas/ColonTumor/colonTumor.mat"
    print fname
    X, Y = read_data(fname)
    X_index = np.array((1, 2, 4, 5))
    X_model = X[:, X_index]
    d, c = X_model.shape[1], Y.shape[1]
    W = np.ones((d, c))
    from scipy import optimize as opt
    threshold = 0.01
    W = opt.fmin_cg(F, W, args=(X_model, Y, threshold)).reshape((d, c))
    W = np.matrix(W)
    print "W = %s" % str(W)
    eps = np.sqrt(np.finfo(float).eps)
    eps_mat = eps * W
    w_prime = fprime(W, F, eps_mat, X_model, Y, threshold)
    print "w_prime = %s" % str(w_prime)
Esempio n. 49
0
def iresolve(images,
             tf_matrices,
             scale=1.3,
             initial_guess=initial_guess_avg,
             initial_guess_args={},
             camera=None,
             camera_args={},
             cost_measure=None,
             cost_args={}):
    """Super-resolve a set of low-resolution images.

    Parameters
    ----------
    images : list of ndarrays
        Low-resolution input frames.
    tf_matrices : list of (3, 3) ndarrays
        List of transformation matrices to transform each
        low-resolution frame to a reference image (typically,
        ``images[0]``).
    scale : float
        Resolution improvement required.
    initial_guess : callable, f(imgs, Hs, scale, oshape, **initial_guess_args)
        Function that calculates an initial estimate of the high-resolution
        image for initialising the iterative process.  If not specified,
        ``initial_guess_avg`` is used.  See ``initial_guess_avg`` for
        more information.
    initial_guess_args : dict, optional
        Optional keyword arguments for `initial_guess`.
    camera : callable, f(nr, img, H, scale, oshape, **camera_args), optional
        Function that emulates the effect of the camera on a
        high-resolution frame.  See the docstring of ``default_camera``
        for more detail.  If not specified, ``default_camera`` is used.
    camera_args : dict, optional
        Optional keyword arguments for `camera`.
    cost_measure : callable, f(nr, x, y, **cost_args)
        Function that calculates the difference between two
        low-resolution frames.  If not specified, ``cost_squared_error``
        is used.
    cost_args : dict, optional
        Optional keyword arguments for `cost_measure`.

    Returns
    -------
    out : ndarray
        Super-resolved image.

    """
    if camera is None:
        camera = default_camera

    if cost_measure is None:
        cost_measure = cost_squared_error

    oshape = [int(i) for i in np.array(images[0].shape) * float(scale)]

    HR = initial_guess(images,
                       tf_matrices,
                       scale=scale,
                       oshape=oshape,
                       **initial_guess_args)

    HR_guess = HR.copy()

    def sr_func(HR, it=[0]):
        if it[0] % 100 == 0:
            log.info('Saving output for function call %d' % it[0])
            np.save('HR', HR.reshape(oshape))

        it[0] += 1
        err = 0
        save_shape = HR.shape
        HR.shape = oshape
        for i, (H, LR) in enumerate(zip(tf_matrices, images)):
            LR_est = camera(i, HR, H, scale, images[0].shape, **camera_args)

            err += cost_measure(i, LR, LR_est, HR, HR_guess, **cost_args)

        HR.shape = save_shape

        return err

    def callback(x, it=[1]):
        it[0] += 1

        log.info('Iteration #%d' % it[0])

    tic = time.time()
    log.info('Starting optimisation. This may take a long time (hour).')
    HR = opt.fmin_cg(sr_func, HR, callback=callback, maxiter=5)
    toc = time.time()

    log.info('Operation took %.2f seconds' % (toc - tic))

    return HR.reshape(oshape)
Esempio n. 50
0
    x_t = data[idx:idx + N_td]
    y_t = num2cls(label[idx:idx + N_td])

    # 学習前の交差エントロピー誤差関数
    J_classification(w, *(x_t, y_t))

    # 確率的勾配降下法
    #    eta=0.001
    #    for k in range(100):
    #        w = w - eta * gradient(w,*(x_t,y_t))
    #        print(k)

    # 共役勾配法
    w = optimize.fmin_cg(J_classification,
                         w,
                         fprime=gradient,
                         args=(x_t, y_t),
                         gtol=1)

    # 学習後の交差エントロピー誤差関数
    J_classification(w, *(x_t, y_t))

# チェック
print("TEST")
w_l1 = w[:(N_in + 1) * N_hdn].reshape(N_in + 1, N_hdn)
w_l2 = w[(N_in + 1) * N_hdn:].reshape(N_hdn + 1, N_out)
okn = 0
testN = 5000
for i in range(testN):
    #    idx=np.random.randint(60000)
    idx = i
Esempio n. 51
0
def yaw_solve(yaw_image,
              yaw,
              scale,
              tol=1e-10,
              iter_lim=None,
              damp=1e-1,
              method='CG',
              norm=2):
    """Super-resolve a nonzero yaw image by solving
    a large, sparse set of linear equations.

    This method approximates the camera with a downsampling operator,
    using polygon interpolation.  The LSQR method is used
    to solve the equation :math:`A\mathbf{x} = b` where :math:`A` is
    the downsampling operator, :math:`\mathbf{x}` is the
    high-resolution estimate (flattened in raster scan/
    lexicographic order), and :math:`\mathbf{b}` is a vector
    of all the yaw_image pixels.

    Parameters
    ----------
    yaw_image : ndarray
        Nonzero yaw input frame.
    tf_matrix : (3, 3) ndarray
        Transformation matrix that relates all yaw_image pixels 
        to a reference high-resolution frame.
    scale : float
        The resolution of the output image is `scale` times the resolution
        of the input images.
    damp : float, optional
        If an initial guess is provided, `damp` specifies how much that
        estimate is weighed in the entire process.  A larger value of
        `damp` results in a solution closer to `x0`, whereas a smaller
        version of `damp` yields a solution closer to the solution
        obtained without any initial estimate.
    method : {'CG', 'LSQR', 'descent', 'L-BFGS-B'}
        Whether to use conjugate gradients, least-squares, gradient descent
        or L-BFGS-B to determine the solution.
    norm : {1, 2}
        Whether to use the L1 or L2 norm to measure errors between images.

    Returns
    -------
    HR : ndarray
        High-resolution estimate.

    """

    ishape = yaw_image.shape
    oshape = yaw_image.shape

    print "Constructing camera operator..."
    op = poly_interp_op_yaw(oshape[0],
                            oshape[1],
                            ishape[0],
                            ishape[1],
                            yaw,
                            scale,
                            search_win=round(scale) * 2 + 1)

    #dop = op.todense()
    #dsDebug = gdal.GetDriverByName("GTIFF").Create('/tmp/debug.tif', opd.shape[1], opd.shape[0], 1, gdal.GDT_Float32)
    #dsDebug.GetRasterBand(1).WriteArray(opd)
    #dsDebug.FlushCache()

    M = np.prod(ishape)
    b = yaw_image.flat

    atol = btol = conlim = tol
    show = True

    # Construct the prior
    opT = op.T
    opT_sum1 = opT.sum(
        axis=1).flatten() + 0.00001  # add small bias to avoid division by zero
    opTb = opT.dot(b)
    x0 = opTb / opT_sum1

    #return x0.reshape(oshape)

    # Error and gradient functions, used in conjugate gradient optimisation
    def sr_func(x, norm=norm):
        return (np.linalg.norm(op * x - b, norm) ** 2 + \
                damp * np.linalg.norm(x - x0.flat, norm) ** 2)

    def sr_gradient(x, norm=norm):
        # Careful! Mixture of sparse and dense operators.
        #Axb = op * x - b
        #nrm_sq = np.dot(Axb, Axb) # Dense
        #Axbop = (op.T * Axb).T # Sparse
        #return nrm_sq * Axbop
        Axb = op * x - b
        L = len(x)
        if norm == 1:
            xmx0 = x - x0.flat
            term1 = np.linalg.norm(Axb, 1) * np.sign(Axb.T) * op
            term2 = damp * np.linalg.norm(xmx0, 1) * np.sign(xmx0.flat)
        elif norm == 2:
            term1 = (Axb.T * op)
            term2 = damp * (x - x0.flat)
        else:
            raise ValueError('Invalid norm for error measure (%s).' % norm)

        return 2 * (term1 + term2)

    print "Super resolving..."

    ## Conjugate Gradient Optimisation
    if method == 'CG':

        x, fopt, f_calls, gcalls, warnflag = \
           opt.fmin_cg(sr_func, x0, fprime=sr_gradient, gtol=0,
                       disp=True, maxiter=iter_lim, full_output=True)

    elif method == 'LSQR':

        ## LSQR Optimisation
        ##
        x0 = x0.flat
        b = b - op * x0
        x, istop, itn, r1norm, r2norm, anorm, acond, arnorm, xnorm, var = \
          lsqr(op, b, atol=atol, btol=btol,
               conlim=conlim, damp=damp, show=show, iter_lim=iter_lim)
        x = x0 + x

    elif method == 'descent':

        ## Steepest Descent Optimisation
        ##
        x = np.array(x0, copy=True).reshape(np.prod(x0.shape))
        for i in range(50):
            print(op.T * ((op * x) - b)).shape
            print "Gradient descent step %d" % i
            x += damp * -1 * (op.T * ((op * x) - b))
            # Could add prior: + lam * (x - x0.flat))


## L-BFGS-B
    elif method == 'L-BFGS-B':
        x, f, d = opt.fmin_l_bfgs_b(sr_func, x0.flat, fprime=sr_gradient)
        print "L-BFGS-B converged after %d function calls." % d['funcalls']
        print "Final function value:", f
        print "Reason for termination:", d['task']

    elif method == 'direct':
        x = sparse.linalg.spsolve(op, b)

    else:
        raise ValueError('Invalid method (%s) specified.' % method)

    return x.reshape(oshape), x0.reshape(oshape)
Esempio n. 52
0
def solve(images,
          tf_matrices,
          scale,
          x0=None,
          tol=1e-10,
          iter_lim=None,
          damp=1e-1,
          method='CG',
          operator='bilinear',
          norm=1,
          standard_form=False):
    """Super-resolve a set of low-resolution images by solving
    a large, sparse set of linear equations.

    This method approximates the camera with a downsampling operator,
    using bilinear or polygon interpolation.  The LSQR method is used
    to solve the equation :math:`A\mathbf{x} = b` where :math:`A` is
    the downsampling operator, :math:`\mathbf{x}` is the
    high-resolution estimate (flattened in raster scan/
    lexicographic order), and :math:`\mathbf{b}` is a stacked vector
    of all the low-resolution images.

    Parameters
    ----------
    images : list of ndarrays
        Low-resolution input frames.
    tf_matrices : list of (3, 3) ndarrays
        Transformation matrices that relate all low-resolution frames
        to a reference low-resolution frame (usually ``images[0]``).
    scale : float
        The resolution of the output image is `scale` times the resolution
        of the input images.
    x0 : ndarray, optional
        Initial guess of HR image.
    damp : float, optional
        If an initial guess is provided, `damp` specifies how much that
        estimate is weighed in the entire process.  A larger value of
        `damp` results in a solution closer to `x0`, whereas a smaller
        version of `damp` yields a solution closer to the solution
        obtained without any initial estimate.
    method : {'CG', 'LSQR', 'descent', 'L-BFGS-B'}
        Whether to use conjugate gradients, least-squares, gradient descent
        or L-BFGS-B to determine the solution.
    operator : {'bilinear', 'polygon'}
        The camera model is approximated as an interpolation process.  The
        bilinear interpolation operator only works well for zoom ratios < 2.
    norm : {1, 2}
        Whether to use the L1 or L2 norm to measure errors between images.
    standard_form : bool
        Whether to convert the matrix operator to standard form before
        processing.

    Returns
    -------
    HR : ndarray
        High-resolution estimate.

    """
    assert len(images) == len(tf_matrices)

    HH = [H.copy() for H in tf_matrices]
    HH_scaled = []
    scale = float(scale)
    for H in HH:
        HS = np.array([[scale, 0, 0], [0, scale, 0], [0, 0, 1]])

        HH_scaled.append(np.linalg.inv(np.dot(HS, H)))

    HH = HH_scaled
    oshape = np.floor(np.array(images[0].shape) * scale)
    LR_shape = images[0].shape

    print "Constructing camera operator (%s)..." % operator
    if operator == 'bilinear':
        op = bilinear(oshape[0], oshape[1], HH, *LR_shape, boundary=0)
    elif operator == 'polygon':
        sub_ops = []
        for H in HH:
            sub_ops.append(
                poly_interp_op(oshape[0],
                               oshape[1],
                               H,
                               *LR_shape,
                               search_win=round(scale) * 2 + 1))
        op = sparse.vstack(sub_ops, format='csr')
    else:
        raise ValueError('Invalid operator requested (%s).' % operator)

##  Visualise mapping of frames
##
##     import matplotlib.pyplot as plt
##     P = np.prod(LR_shape)
##     img = (op * x0.flat).reshape(LR_shape)
##     plt.subplot(1, 4, 1)
##     plt.imshow(x0, cmap=plt.cm.gray)
##     plt.title('x0')
##     plt.subplot(1, 4, 2)
##     plt.imshow(images[0], cmap=plt.cm.gray)
##     plt.title('LR frame')
##     plt.subplot(1, 4, 3)
##     plt.imshow(img, cmap=plt.cm.gray)
##     plt.title('LR image Ax0')
##     plt.subplot(1, 4, 4)
##     plt.imshow(images[0] - img, cmap=plt.cm.gray)
##     plt.title('diff images[0] - Ax')
##     plt.show()

    if standard_form:
        print "Bringing matrix to standard form..."
        P = ordering.standard_form(op)
        op = P * op

    k = len(images)
    M = np.prod(LR_shape)
    b = np.empty(k * M)
    for i in range(k):
        b[i * M:(i + 1) * M] = images[i].flat

    if standard_form:
        b = P * b

    atol = btol = conlim = tol
    show = True

    # Construct the prior
    opT = op.T
    opT_sum1 = opT.sum(
        axis=1).flatten() + 0.00001  # add small bias to avoid division by zero
    opTb = opT.dot(b)
    x0 = opTb / opT_sum1

    #return x0.reshape(oshape)

    # Error and gradient functions, used in conjugate gradient optimisation
    def sr_func(x, norm=norm):
        return (np.linalg.norm(op * x - b, norm) ** 2 + \
                damp * np.linalg.norm(x - x0.flat, norm) ** 2)

    def sr_gradient(x, norm=norm):
        # Careful! Mixture of sparse and dense operators.
        #Axb = op * x - b
        #nrm_sq = np.dot(Axb, Axb) # Dense
        #Axbop = (op.T * Axb).T # Sparse
        #return nrm_sq * Axbop
        Axb = op * x - b
        L = len(x)
        if norm == 1:
            xmx0 = x - x0.flat
            term1 = np.linalg.norm(Axb, 1) * np.sign(Axb.T) * op
            term2 = damp * np.linalg.norm(xmx0, 1) * np.sign(xmx0.flat)
        elif norm == 2:
            term1 = (Axb.T * op)
            term2 = damp * (x - x0.flat)
        else:
            raise ValueError('Invalid norm for error measure (%s).' % norm)

        return 2 * (term1 + term2)

    print "Super resolving..."

    ## Conjugate Gradient Optimisation
    if method == 'CG':

        x, fopt, f_calls, gcalls, warnflag = \
           opt.fmin_cg(sr_func, x0, fprime=sr_gradient, gtol=0,
                       disp=True, maxiter=iter_lim, full_output=True)

    elif method == 'LSQR':

        ## LSQR Optimisation
        ##
        x0 = x0.flat
        b = b - op * x0
        x, istop, itn, r1norm, r2norm, anorm, acond, arnorm, xnorm, var = \
          lsqr(op, b, atol=atol, btol=btol,
               conlim=conlim, damp=damp, show=show, iter_lim=iter_lim)
        x = x0 + x

    elif method == 'descent':

        ## Steepest Descent Optimisation
        ##
        x = np.array(x0, copy=True).reshape(np.prod(x0.shape))
        for i in range(50):
            print(op.T * ((op * x) - b)).shape
            print "Gradient descent step %d" % i
            x += damp * -1 * (op.T * ((op * x) - b))
            # Could add prior: + lam * (x - x0.flat))


## L-BFGS-B
    elif method == 'L-BFGS-B':
        x, f, d = opt.fmin_l_bfgs_b(sr_func, x0.flat, fprime=sr_gradient)
        print "L-BFGS-B converged after %d function calls." % d['funcalls']
        print "Final function value:", f
        print "Reason for termination:", d['task']

    elif method == 'direct':
        x = sparse.linalg.spsolve(op, b)

    else:
        raise ValueError('Invalid method (%s) specified.' % method)

    return x.reshape(oshape)
    print("Training error | Cross validation error | Accuracy on test set")
    for i in range(25):
        iterative(la)
        la = la * 2
elif task == 2:
    # Learning curves
    print("\n=================================================")
    print("Learning curves\n")
    print("lambda=", la)
    print("# of training samples | Training error | Cross validation error")
    for i in range(Xtrain.shape[0] // 50):
        Xtrain = Xt[0:i * 50 + 1, :]
        ytrain = yt[0:i * 50 + 1, :]
        answer = sc.fmin_cg(calculateJ,
                            rndInit,
                            calculateGrad,
                            maxiter=90,
                            disp=False)
        J1 = J(answer, 900, 25, 10, Xtrain, ytrain, 0)
        J2 = J(answer, 900, 25, 10, Xcv, ycv, 0)
        print(i * 50, J1, J2)
elif task == 3:
    # Gradient checnking
    print("\n=================================================")
    print("Gradient checking\n")
    print(
        "Calcualting theta values with backpropagation (will takes some time)")
    num_input = 400
    answer = sc.fmin_cg(calculateJ,
                        rndInit,
                        calculateGrad,
Esempio n. 54
0
def scipy_cg(project,x0=None,xb=None,its=100,accu=1e-10,grads=True):
    """ result = scipy_cg(project,x0=[],xb=[],its=100,accu=1e-10)

        Runs the Scipy implementation of CG with
        an SU2 project

        Inputs:
            project - an SU2 project
            x0      - optional, initial guess
            xb      - optional, design variable bounds
            its     - max outer iterations, default 100
            accu    - accuracy, default 1e-10

        Outputs:
           result - the outputs from scipy.fmin_slsqp
    """

    # import scipy optimizer
    from scipy.optimize import fmin_cg

    # handle input cases
    if x0 is None: x0 = []
    if xb is None: xb = []

    # function handles
    func           = obj_f

    # gradient handles
    if project.config.get('GRADIENT_METHOD','NONE') == 'NONE':
        fprime         = None
    else:
        fprime         = obj_df

    # number of design variables
    n_dv = len( project.config['DEFINITION_DV']['KIND'] )
    project.n_dv = n_dv

    # Initial guess
    if not x0: x0 = [0.0]*n_dv

    # prescale x0
    dv_scales = project.config['DEFINITION_DV']['SCALE']
    x0 = [ x0[i]/dv_scl for i,dv_scl in enumerate(dv_scales) ]

    # scale accuracy
    obj = project.config['OPT_OBJECTIVE']
    obj_scale = obj[obj.keys()[0]]['SCALE']
    accu = accu*obj_scale

    # scale accuracy
    eps = 1.0e-04

    # optimizer summary
    sys.stdout.write('Conjugate gradient (CG) parameters:\n')
    sys.stdout.write('Number of design variables: ' + str(n_dv) + '\n')
    sys.stdout.write('Objective function scaling factor: ' + str(obj_scale) + '\n')
    sys.stdout.write('Maximum number of iterations: ' + str(its) + '\n')
    sys.stdout.write('Requested accuracy: ' + str(accu) + '\n')
    sys.stdout.write('Initial guess for the independent variable(s): ' + str(x0) + '\n')
    sys.stdout.write('Lower and upper bound for each independent variable: ' + str(xb) + '\n\n')

    # Evaluate the objective function (only 1st iteration)
    obj_f(x0,project)

    # Run Optimizer
    outputs = fmin_cg( x0             = x0             ,
                       f              = func           ,
                       fprime         = fprime         ,
                       args           = (project,)     ,
                       gtol           = accu           ,
                       epsilon        = eps            ,
                       maxiter        = its            ,
                       full_output    = True           ,
                       disp           = True           ,
                       retall         = True           )


    # Done
    return outputs
Esempio n. 55
0
X = np.random.randn(num_movies, num_features)
theta = np.random.randn(num_users, num_features)

initial_params = np.concatenate([X.flatten(), theta.flatten()])

lmd = 10


def cost_func(p):
    return ccf.cofi_cost_function(p, Ynorm, R, num_users, num_movies, num_features, lmd)[0]


def grad_func(p):
    return ccf.cofi_cost_function(p, Ynorm, R, num_users, num_movies, num_features, lmd)[1]

theta, *unused = opt.fmin_cg(cost_func, fprime=grad_func, x0=initial_params, maxiter=100, disp=False, full_output=True)

# Unfold the returned theta back into U and W
X = theta[0:num_movies * num_features].reshape((num_movies, num_features))
theta = theta[num_movies * num_features:].reshape((num_users, num_features))

print('Recommender system learning completed')
print(theta)

input('Program paused. Press ENTER to continue')

# ===================== Part 8: Recommendation for you =====================
# After training the model, you can now make recommendations by computing
# the predictions matrix.
#
p = np.dot(X, theta.T)
Esempio n. 56
0
    def max_sase_bump(self,
                      bump,
                      alpha,
                      method='simplex',
                      params={},
                      opt_pointing=False):
        '''
        direct sase optimization with simplex, using correctors as a multiknob
        '''
        if self.debug:
            print('starting multiknob optimization, correctors = ', bump)

        if opt_pointing:
            weight_gmd_bpm_1 = 10.0
            weight_gmd_bpm_2 = 10.0
        else:
            weight_gmd_bpm_1 = 0.0
            weight_gmd_bpm_2 = 0.0

        def error_func(x):

            print(self.dp)

            pen_max = 100.0

            if abs(x) > 1:
                return pen_max

            dI = bump["dI"]
            currents = bump["currents"]
            #print 'error_func: ', bpm_names, '->',  planes
            correctors_ = bump["correctors"]
            for i in range(len(correctors_)):
                print("alpha = ", x)
                print('{0} x[{1}]={2}'.format(correctors_[i], i,
                                              currents[i] + dI[i] * x))
                limits = self.dp.get_limits(correctors_[i])
                print('limits=[{0}, {1}]'.format(limits[0], limits[1]))
                if currents[i] + dI[i] * x < limits[
                        0] or currents[i] + dI[i] * x > limits[1]:
                    print('limits exceeded')
                    return pen_max

            for i in range(len(correctors_)):
                print('setting', correctors_[i], '->', currents[i] + dI[i] * x)
                self.mi.set_value(correctors_[i], currents[i] + dI[i] * x)

            sleep(self.timeout)

            sase = self.mi.get_sase(detector=self.detector)
            alarm = np.max(self.mi.get_alarms())
            #z1, z2 = get_sase_pos()

            if self.debug: print('alarm:', alarm)
            if self.debug: print('sase:', sase)
            #print 'pointing', z1, z2, 'weights', weight_gmd_bpm_1, weight_gmd_bpm_2

            pen = 0.0

            if alarm > 1.0:
                return pen_max
            if alarm > 0.7:
                return alarm * 50.0
            pen += alarm

            pen -= sase

            if self.debug: print('penalty:', pen)

            return pen

        sase_ref = self.mi.get_sase(detector=self.detector)

        x = alpha
        x_init = x

        if self.logging:
            f = open(self.log_file, 'a')
            f.write('\n*** optimization step ***\n')
            f.write(str(bump["correctors"]) + '\n')
            f.write(method + '\n')
            f.write('x0=' + str(x_init) + '\n')
            f.write('sase0=' + str(sase_ref) + '\n')

        if method == 'cg':
            print('using CG optimizer, params:', params)

            try:
                max_iter = params['maxiter']
            except KeyError:
                max_iter = 10 * len(x)

            try:
                epsilon = params['epsilon']
            except KeyError:
                epsilon = 0.1

            try:
                gtol = params['gtol']
            except KeyError:
                gtol = 1.e-3

            opt.fmin_cg(error_func,
                        x,
                        gtol=gtol,
                        epsilon=epsilon,
                        maxiter=max_iter)

        if method == 'simplex':
            print('using simplex optimizer, params:', params)

            try:
                max_iter = params['maxiter']
            except KeyError:
                max_iter = 10 * len(bump["correctors"])

            try:
                xtol = params['xtol']
            except KeyError:
                xtol = 1.e-3

            opt.fmin(error_func, x, xtol=xtol, maxiter=max_iter)

        if method == 'powell':
            print('using powell optimizer, params:', params)

            try:
                max_iter = params['maxiter']
            except KeyError:
                max_iter = 10 * len(x)

            try:
                xtol = params['xtol']
            except KeyError:
                xtol = 1.e-3

            opt.fmin_powell(error_func, x, xtol=xtol, maxiter=max_iter)

        if method == 'fancy_stuff_from':
            print('using fancy optimizer, params:', params)
            pass

        sase_new = self.mi.get_sase(detector=self.detector)

        print('step ended changing sase from/to', sase_ref, sase_new)
        if sase_new <= sase_ref:
            for i in range(len(bump["correctors"])):
                print('reverting', bump["correctors"][i], '->',
                      bump["currents"][i])
                self.mi.set_value(bump["correctors"][i], bump["currents"][i])

        if self.logging:
            f.write('sase_new=' + str(sase_new) + '\n')
            f.close()
    Xmat = np.ones(m)
    for x in x_data:
        Xmat = np.column_stack((Xmat, x))
    return Xmat


# cost function
def Jcost(theta, X, y):
    m = len(y)
    h = expit(X @ theta)
    cost = -(y * np.log(h)) - ((1 - y) * np.log(1 - h))
    J = (1 / m) * np.sum(cost)
    return J


# gradient
def Jcost_prime(theta, X, y):
    m = len(y)
    return (1 / m) * (X.T @ (X @ theta - y))


# conjugated gradient descend
drat, am = choose_data(mtdata, 'drat', 'am')  # extract data
drat_nor, = feat_scale(drat)  # scaling
X = make_X(drat_nor)  # construct the data matrix
m, k = X.shape
y = am  # construct the y vector
theta0 = np.array([0, 10])  # initial guess of tehta

res = op.fmin_cg(Jcost, theta0, fprime=Jcost_prime, args=(X, y))
Esempio n. 58
0
    def max_sase(self,
                 devices,
                 method='simplex',
                 params={},
                 opt_pointing=False):
        '''
        direct sase optimization with simplex, using correctors as a multiknob
        '''

        if self.debug:
            print('starting multiknob optimization, correctors = ', devices)

        def error_func(x, x_init, tols):
            if self.debug: print("X_relative = ", x, x_init, tols)
            x = x_init + (x - 1) * tols / (10. * 0.05)  # relative to absolute

            if self.debug: print("X_absolute = ", x, x_init)
            if self.debug: print("isRunning:", self.isRunning)
            self.niter += 1

            print("number of func eval: ", self.niter)

            if not self.isRunning:
                print("save machine parameters and kill optimizer")
                if self.sop != None:
                    self.save_action([devices, method, params],
                                     flag="force stop")
                sleep(
                    1
                )  # in order to give time to save parameters before next evaluation of error_func

            pen_max = 100.0

            for i in range(len(x)):
                if self.debug:
                    print('{0} x[{1}]={2}'.format(devices[i], i, x[i]))
                limits = self.dp.get_limits(devices[i])
                if self.debug:
                    print('limits=[{0}, {1}]'.format(limits[0], limits[1]))
                if x[i] < limits[0] or x[i] > limits[1]:
                    print('limits exceeded')
                    return pen_max

            for i in range(len(devices)):
                print('setting', devices[i], '->', x[i])
                self.mi.set_value(devices[i], x[i])

            sleep(self.timeout)

            sase = self.mi.get_sase(detector=self.detector)
            alarm = np.max(self.mi.get_alarms())

            if self.debug: print('alarm:', alarm)
            if self.debug: print('sase:', sase)

            pen = 0.0
            if alarm > 1.0:
                return pen_max
            if alarm > 0.7:
                return alarm * 50.0

            pen += alarm
            pen -= sase

            if self.debug: print('penalty:', pen)
            self.penalty = pen
            return pen

        sase_ref = self.mi.get_sase(detector=self.detector)

        x = np.array([self.mi.get_value(dev) for dev in devices])
        tols = np.zeros(len(devices))
        for i, dev in enumerate(devices):
            limits = self.dp.get_limits(dev)
            tols[i] = (limits[1] - limits[0]) / 2.

        x_init = x

        if self.logging:
            f = open(self.log_file, 'a')
            f.write('\n*** optimization step ***\n')
            f.write(str(devices) + '\n')
            f.write(method + '\n')
            f.write('x0=' + str(x_init) + '\n')
            f.write('sase0=' + str(sase_ref) + '\n')

        try:
            max_iter = params['maxiter']
        except KeyError:
            max_iter = 10 * len(x)

        if max_iter == None:
            max_iter = 10 * len(x)

        try:
            xtol = params['xtol']
        except KeyError:
            xtol = 1.e-3

        self.maxiter = max_iter
        if method == 'simplex':
            print('using simplex optimizer, params:', params)
            # opt.fmin(error_func,x,xtol=xtol, maxiter=max_iter)
            opt.fmin(error_func,
                     np.ones(len(x)),
                     args=(x_init, tols),
                     xtol=xtol,
                     maxfun=max_iter)
            # opt.fmin(error_func, x, args=(x_init,tols), xtol=xtol, maxfun=max_iter)

        if method == 'cg':
            print('using CG optimizer, params:', params)

            try:
                epsilon = params['epsilon']
            except KeyError:
                epsilon = 0.1

            opt.fmin_cg(error_func,
                        x / x_init,
                        args=(x_init, ),
                        gtol=xtol,
                        epsilon=epsilon,
                        maxiter=max_iter)

        if method == 'powell':
            print('using powell optimizer, params:', params)
            opt.fmin_powell(error_func,
                            x / x_init,
                            args=(x_init, ),
                            xtol=xtol,
                            maxiter=max_iter)

        if method == 'fancy_stuff_from':
            print('using fancy optimizer, params:', params)
            pass

        sase_new = self.mi.get_sase(detector=self.detector)
        #self.save_machine_set()

        #print ('step ended changing sase from/to', sase_ref, sase_new)
        #if sase_new <= sase_ref:
        #    for i in range(len(devices)):
        #        print ('reverting', devices[i], '->',x_init[i])
        #        self.mi.set_value(devices[i], x_init[i])

        if self.logging:
            f.write('sase_new=' + str(sase_new) + '\n')
            f.close()
Esempio n. 59
0

# Test ForwardProp

theta = generateNeuralNetwork(layerSizes=[400, 25, 10], eps=chooseEps(400, 10))
y_vect = vectorize_labels(y, numLabels=theta[1].shape[0])
lam = 1
sampleSize = y.shape[0]

cF = nnCostFunction_reg(theta, x, y_vect, sampleSize, lam)
print(cF)

# Test Gradient
Delta = backPropagate(theta, x, y_vect, sampleSize, lam)
checkGradient(theta, Delta, x, y_vect, sampleSize, lam)

# Run Optimizer
theta_unrolled = unrollData(theta)
x_unrolled = unrollDataX(x)

opt = op.fmin_cg(f=nnCostFunction_reg_unrolled,
                 x0=theta_unrolled,
                 fprime=backPropagate_unrolled,
                 args=(x_unrolled, y, sampleSize, lam),
                 maxiter=50,
                 disp=1,
                 full_output=1)
optTheta = rollData(opt[0])
pred = makePrediction(x, y, optTheta)
displayHiddenUnits(optTheta, 5)
Esempio n. 60
0
# network. To train your neural network, we will now use 'opt.fmin_cg'.
#

print('Training Neural Network ... ')

lmd = 1


def cost_func(p):
    return ncf.nn_cost_function(p, input_layer_size, hidden_layer_size, num_labels, X, y, lmd)[0]


def grad_func(p):
    return ncf.nn_cost_function(p, input_layer_size, hidden_layer_size, num_labels, X, y, lmd)[1]

nn_params, *unused = opt.fmin_cg(cost_func, fprime=grad_func, x0=nn_params, maxiter=400, disp=True, full_output=True)

# Obtain theta1 and theta2 back from nn_params
theta1 = nn_params[:hidden_layer_size * (input_layer_size + 1)].reshape(hidden_layer_size, input_layer_size + 1)
theta2 = nn_params[hidden_layer_size * (input_layer_size + 1):].reshape(num_labels, hidden_layer_size + 1)

input('Program paused. Press ENTER to continue')

# ===================== Part 10: Visualize Weights =====================
# You can now 'visualize' what the neural network is learning by
# displaying the hidden units to see what features they are capturing in
# the data

print('Visualizing Neural Network...')

dd.display_data(theta1[:, 1:])