Beispiel #1
0
    def fit(self, X, y, ns, ufc, ignore_sensitive=False, **params):

        # fix ns to 1 in current version
        ns = 1

        # compute weights
        Xw = np.array([[0.0], [1.0]])
        self.w_ = ufc.predict_proba(Xw)[:, 1]

        # add a constanet term
        if self.fit_intercept:
            X = np.c_[np.ones(X.shape[0]), X]

        # check optimization parameters
        if not 'disp' in params:
            params['disp'] = False
        if not 'maxiter' in params:
            params['maxiter'] = 100

        self.coef_ = np.zeros(X.shape[1])
        self.coef_ = fmin_cg(self.loss,
                             self.coef_,
                             fprime=self.grad_loss,
                             args=(X, y, ns),
                             **params)

        # clear the weights for sensitive features
        if ignore_sensitive:
            self.coef_[-ns:] = 0.0
Beispiel #2
0
    def train_alt(self, alpha=0):
        """ Define the gradient and hand it off to a scipy gradient-based
        optimizer. """

        # Set alpha so it can be referred to later if needed
        self.alpha = alpha

        x_total = np.concatenate((self.x_train, self.x_test), axis=0)
        #similarityMatrix = np.ones((self.x_train.shape[0],x_total.shape[0]))
        similarityMatrix = similarity_calculator.get_similarities_alt(
            x_total, self.x_train)
        # Define the derivative of the likelihood with respect to beta_k.
        # Need to multiply by -1 because we will be minimizing.
        # The following has a dimension of [1 x k] where k = |W|
        dl_by_dWk = lambda W, k: (k > 0) * self.sfRegStep(
            W, k, similarityMatrix, alpha, x_total)

        # The full gradient is just an array of componentwise derivatives
        gradient = lambda W: np.array([dl_by_dWk(W, k) \
                                       for k in range(self.x_train.shape[1])]).transpose()

        # The function to be minimized
        # Use the negative log likelihood for the objective function.
        objectiveFunction = lambda W: -self.likelihood_alt(
            similarityMatrix, betas=W, alpha=self.alpha)

        # Optimize
        print('Optimizing for alpha = {}'.format(alpha))
        #self.betas = fmin_bfgs(objectiveFunction, self.betas, fprime=gradient)
        self.betas = fmin_cg(objectiveFunction,
                             self.betas,
                             fprime=gradient,
                             maxiter=10)
Beispiel #3
0
def Train(parameters):
    optimialParameters = optimize.fmin_cg(f=nnCostFunction,
                                          x0=parameters,
                                          fprime=nnGradient,
                                          args=((input_layer_size,
                                                 hidden_layer_size, num_labels,
                                                 X, yVectors, lam)))
    return optimialParameters
def optimize_theta(th, X, Y, m, n, myLambda):
    result = optimize.fmin_cg(cost_function,
                              x0=th,
                              fprime=gradient,
                              args=(X, Y, m, n, myLambda),
                              maxiter=100,
                              disp=True,
                              full_output=True)
    return result[0], result[1]
    def train(self, update=True, **params):
        """
        Training of model to generate the theta value for the this classifier
        
        Parameters
        ----------
        update : boolean
            when set to true, the classifier's theta value is updated                
        **params : list
            inputLayerSize : int
                Number of input features
            hiddenLayerSize : int
                Number of nodes in the hidden layer
            numLabels : int
                Number of unique labels (i.e. classes)
            X : ndarray (2D)
                Contains the training set, with each row as one record
            y : ndarray (1D)
                Contains the corresponding label for each row in X
            lambdaVal : float
                Regularization parameter
            maxIter : int
                Maximum number of iterations that the optimization algorithm will run for each label
                
        Returns:
        --------
        xopt : ndarray (1D)
            optimized theta value
        cost : float
            cost associated to xopt                   
        """    
        inputLayerSize = params["inputLayerSize"]
        hiddenLayerSize = params["hiddenLayerSize"]
        numLabels = params["numLabels"]
        X = params["X"]
        y = params["y"]
        lambdaVal = params["lambdaVal"]
        maxIter = params["maxIter"]

        theta1 = self.randomInitWeights(inputLayerSize, hiddenLayerSize)
        theta2 = self.randomInitWeights(hiddenLayerSize, numLabels)
        nnParams = np.append(theta1, theta2)

        shortCostFunction = lambda nnParams : self.computeCost(inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal, nnParams)
        shortGradFunction = lambda nnParams : self.computeGradient(inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal, nnParams)
        
        retVal = fmin_cg(shortCostFunction, x0=nnParams, fprime=shortGradFunction, maxiter=maxIter, full_output=True)
        nnParams = retVal[0]
        
        if update:
            self.theta1 = np.reshape(nnParams[0:hiddenLayerSize*(inputLayerSize+1)], (hiddenLayerSize, inputLayerSize+1))
            self.theta2 = np.reshape(nnParams[hiddenLayerSize*(inputLayerSize+1):], (numLabels, hiddenLayerSize+1))
        
        retVal = (retVal[0], retVal[1])
        return retVal
Beispiel #6
0
Datei: Q21.py Projekt: nuffe/ml2
def run():
    ## Our data
    sigma = 0.1
    N = 100

    latent = np.linspace(1, 4 * np.pi, N)  # True latent variable
    A = np.random.normal(0, 1, (10, 2))
    fnonlin = np.column_stack((latent * np.sin(latent), latent * np.cos(latent)))
    Y = np.dot(A, fnonlin.transpose()) + np.random.normal(0, sigma, (10, N))

    # Center Y
    Y = Y - Y.mean(axis=1)[:, None]

    ## The cov matrix we need for our goal function
    S = np.cov(Y, bias=1)

    # fmin_cg expects a vector, not a matrix..., and it HAS to be a 1-D arr
    def loglik(W):
        W = W.reshape(10, 2)
        C = dot(W, W.transpose()) + sigma ** 2 * eye(10)
        return N * (log(det(C)) + trace(dot(inv(C), S)))

    def dloglik(W):
        W = W.reshape(10, 2)
        C = dot(W, W.transpose()) + sigma ** 2 * eye(10)

        t1 = dot(inv(C), S)
        t2 = dot(inv(C), W)
        left = dot(t1, t2)
        right = dot(inv(C), W)
        grad = N * (-left + right)  # Sanity check: check if dloglik(W_star) ~= 0, i.e. I correctly specified
        # the gradients and we are at a stationary point...
        return grad.reshape(20)

    # No noise
    Winit = np.random.normal(0, 1, 20)
    W_star = fmin_cg(loglik, Winit, fprime=dloglik, disp=0)
    W_star = W_star.reshape(10, 2)
    ## Recover our latent factors based on the estimated W
    X_hat = np.zeros((N, 2))
    for n in range(N):
        X_hat[n] = dot(inv(dot(W_star.transpose(), W_star)), dot(W_star.transpose(), Y[:, n]))

    X_hat1 = np.copy(X_hat)

    ## Run some experiments ##

    # Some noise
    sigma = 1

    Y = np.dot(A, fnonlin.transpose()) + np.random.normal(0, sigma, (10, N))
    Y = Y - Y.mean(axis=1)[:, None]
    S = np.cov(Y, bias=1)

    W_star = fmin_cg(loglik, Winit, fprime=dloglik, disp=0)
    W_star = W_star.reshape(10, 2)
    X_hat = np.zeros((N, 2))
    for n in range(N):
        X_hat[n] = dot(inv(dot(W_star.transpose(), W_star)), dot(W_star.transpose(), Y[:, n]))

    X_hatNoise = np.copy(X_hat)

    # Plenty of noise
    sigma = 10

    Y = np.dot(A, fnonlin.transpose()) + np.random.normal(0, sigma, (10, N))
    Y = Y - Y.mean(axis=1)[:, None]
    S = np.cov(Y, bias=1)

    W_star = fmin_cg(loglik, Winit, fprime=dloglik, disp=0)
    W_star = W_star.reshape(10, 2)
    X_hat = np.zeros((N, 2))
    for n in range(N):
        X_hat[n] = dot(inv(dot(W_star.transpose(), W_star)), dot(W_star.transpose(), Y[:, n]))

    X_hatNoiseP = np.copy(X_hat)

    ## Plot it
    plt.subplot(2, 2, 1)
    plt.title("The true lower dimensional representation")
    plt.plot(latent, label="True latent variable")
    plt.plot(fnonlin[:, 0], fnonlin[:, 1], label="Non linear transform")
    plt.legend()
    plt.xlabel("$Xi$")

    plt.subplot(2, 2, 2)
    plt.title("Recovered latent variables, no noise")
    plt.plot(X_hat1[:, 0], X_hat1[:, 1])
    plt.xlabel("$X1$")
    plt.ylabel("$X2$")

    plt.subplot(2, 2, 3)
    plt.title("sigma=1")
    plt.plot(X_hatNoise[:, 0], X_hatNoise[:, 1])
    plt.xlabel("$X1$")
    plt.ylabel("$X2$")

    plt.subplot(2, 2, 4)
    plt.title("sigma=10")
    plt.plot(X_hatNoiseP[:, 0], X_hatNoiseP[:, 1])
    plt.xlabel("$X1$")
    plt.ylabel("$X2$")
Beispiel #7
0
def func(p,*args):
    a,b=p
   
    x,y=args
    cost = y- (a*x + b);
    return  cost

x = np.arange(1,10,1);
y_true = 3*x+ 4;
y_mean = y_true + 10*np.random.rand(len(x))

p0= np.array([1,2]);
print p0
rs1= fmin_bfgs(func1,[1,2],args=(x,y_mean))

rs2= fmin_cg(func1,[1,2],args=(x,y_mean))
rs = leastsq(func,p0,args=(x,y_mean));
# 
# rs1=fmin_bfgs(func,p0,args=(x,y_mean))

print "rs=",rs
# 
print "rs1=",rs1
print "rs2=",rs2
y1= rs[0][0]*x + rs[0][1]
y2 = rs1[0]*x + rs1[1]
pl.plot(x,y1,'r',label="y1");
pl.plot(x,y2,'b',label="y2");
pl.plot(x,y_mean,'og',label='y_mean');
pl.legend()
pl.show()
    def train(self, update=True, **params):
        """
        Training of model to generate the theta value for the this classifier
        
        Parameters
        ----------
        update : boolean
            when set to true, the classifier's theta value is updated        
        **params : list
            X : ndarray (2D)
                Contains the training set, with each row as one record
            y : ndarray (1D)
                Contains the corresponding label for each row in X
            lambdaVal : float
                Regularization parameter
            maxIter : int
                Maximum number of iterations that the optimization algorithm will run for each label
            numOfLabels : int
                Number of unique labels
                
        Returns:
        --------
        xopt : ndarray (1D)
            optimized theta value
        cost : float
            cost associated to xopt
        """
        X = params["X"]
        y = params["y"]
        lambdaVal = params["lambdaVal"]
        maxIter = params["maxIter"]
        numOfLabels = params["numOfLabels"]

        thetaSize = X.shape[1]
        retTheta = np.zeros((numOfLabels, thetaSize + 1))
        X = np.c_[np.ones(X.shape[0]), X]
        theta = np.zeros(thetaSize + 1)
        cost = 0

        for i in range(0, numOfLabels):
            tmpY = (y == i).astype(int)
            shortCostFunction = lambda theta: self.computeCost(
                X, tmpY, lambdaVal, theta)
            shortGradFunction = lambda theta: self.computeGradient(
                X, tmpY, lambdaVal, theta)
            retVal = fmin_cg(shortCostFunction,
                             x0=theta,
                             fprime=shortGradFunction,
                             maxiter=maxIter,
                             full_output=True)

            retTheta[i, :] = retVal[0]
            cost += retVal[1]

        cost /= numOfLabels
        retVal = (retTheta, cost)

        if update:
            self.theta = retTheta

        return retVal
Beispiel #9
0
Datei: Q21.py Projekt: erikcs/ml2
def run():
    ## Our data
    sigma = .1
    N = 100

    latent = np.linspace(1, 4 * np.pi, N)  # True latent variable
    A = np.random.normal(0, 1, (10, 2))
    fnonlin = np.column_stack(
        (latent * np.sin(latent), latent * np.cos(latent)))
    Y = np.dot(A, fnonlin.transpose()) + np.random.normal(0, sigma, (10, N))

    # Center Y
    Y = Y - Y.mean(axis=1)[:, None]

    ## The cov matrix we need for our goal function
    S = np.cov(Y, bias=1)

    # fmin_cg expects a vector, not a matrix..., and it HAS to be a 1-D arr
    def loglik(W):
        W = W.reshape(10, 2)
        C = dot(W, W.transpose()) + sigma**2 * eye(10)
        return N * (log(det(C)) + trace(dot(inv(C), S)))

    def dloglik(W):
        W = W.reshape(10, 2)
        C = dot(W, W.transpose()) + sigma**2 * eye(10)

        t1 = dot(inv(C), S)
        t2 = dot(inv(C), W)
        left = dot(t1, t2)
        right = dot(inv(C), W)
        grad = N * (
            -left + right
        )  # Sanity check: check if dloglik(W_star) ~= 0, i.e. I correctly specified
        # the gradients and we are at a stationary point...
        return grad.reshape(20)

    # No noise
    Winit = np.random.normal(0, 1, 20)
    W_star = fmin_cg(loglik, Winit, fprime=dloglik, disp=0)
    W_star = W_star.reshape(10, 2)
    ## Recover our latent factors based on the estimated W
    X_hat = np.zeros((N, 2))
    for n in range(N):
        X_hat[n] = dot(inv(dot(W_star.transpose(), W_star)),
                       dot(W_star.transpose(), Y[:, n]))

    X_hat1 = np.copy(X_hat)

    ## Run some experiments ##

    # Some noise
    sigma = 1

    Y = np.dot(A, fnonlin.transpose()) + np.random.normal(0, sigma, (10, N))
    Y = Y - Y.mean(axis=1)[:, None]
    S = np.cov(Y, bias=1)

    W_star = fmin_cg(loglik, Winit, fprime=dloglik, disp=0)
    W_star = W_star.reshape(10, 2)
    X_hat = np.zeros((N, 2))
    for n in range(N):
        X_hat[n] = dot(inv(dot(W_star.transpose(), W_star)),
                       dot(W_star.transpose(), Y[:, n]))

    X_hatNoise = np.copy(X_hat)

    # Plenty of noise
    sigma = 10

    Y = np.dot(A, fnonlin.transpose()) + np.random.normal(0, sigma, (10, N))
    Y = Y - Y.mean(axis=1)[:, None]
    S = np.cov(Y, bias=1)

    W_star = fmin_cg(loglik, Winit, fprime=dloglik, disp=0)
    W_star = W_star.reshape(10, 2)
    X_hat = np.zeros((N, 2))
    for n in range(N):
        X_hat[n] = dot(inv(dot(W_star.transpose(), W_star)),
                       dot(W_star.transpose(), Y[:, n]))

    X_hatNoiseP = np.copy(X_hat)

    ## Plot it
    plt.subplot(2, 2, 1)
    plt.title('The true lower dimensional representation')
    plt.plot(latent, label='True latent variable')
    plt.plot(fnonlin[:, 0], fnonlin[:, 1], label='Non linear transform')
    plt.legend()
    plt.xlabel('$Xi$')

    plt.subplot(2, 2, 2)
    plt.title('Recovered latent variables, no noise')
    plt.plot(X_hat1[:, 0], X_hat1[:, 1])
    plt.xlabel('$X1$')
    plt.ylabel('$X2$')

    plt.subplot(2, 2, 3)
    plt.title('sigma=1')
    plt.plot(X_hatNoise[:, 0], X_hatNoise[:, 1])
    plt.xlabel('$X1$')
    plt.ylabel('$X2$')

    plt.subplot(2, 2, 4)
    plt.title('sigma=10')
    plt.plot(X_hatNoiseP[:, 0], X_hatNoiseP[:, 1])
    plt.xlabel('$X1$')
    plt.ylabel('$X2$')
Beispiel #10
0
    a, b = p

    x, y = args
    cost = y - (a * x + b)
    return cost


x = np.arange(1, 10, 1)
y_true = 3 * x + 4
y_mean = y_true + 10 * np.random.rand(len(x))

p0 = np.array([1, 2])
print p0
rs1 = fmin_bfgs(func1, [1, 2], args=(x, y_mean))

rs2 = fmin_cg(func1, [1, 2], args=(x, y_mean))
rs = leastsq(func, p0, args=(x, y_mean))
#
# rs1=fmin_bfgs(func,p0,args=(x,y_mean))

print "rs=", rs
#
print "rs1=", rs1
print "rs2=", rs2
y1 = rs[0][0] * x + rs[0][1]
y2 = rs1[0] * x + rs1[1]
pl.plot(x, y1, 'r', label="y1")
pl.plot(x, y2, 'b', label="y2")
pl.plot(x, y_mean, 'og', label='y_mean')
pl.legend()
pl.show()