def test(X, y, learned_params):
    
    N = np.shape(X)[0] #no of instances
    X = np.append(np.ones((N,1)), X,1) #appending a column of ones as bias (used in logistic regression weights prediction)
    F = np.shape(X)[1] #no of features+1
    
    
    class_prob = []
    for w in learned_params.keys():
        prob = Utils.logistic_transformation(learned_params[w], X)
        class_prob.append(prob)
    
    max_prob = np.max(class_prob, 0)
    
    predicted_y = []
    output_label = range(min_class_label, max_class_label+1)
    for i in xrange(np.size(max_prob)):
            class_label = np.where(class_prob == max_prob[i])[0]
            predicted_y.append(output_label[class_label[0]])
    
    print "predicted y :", predicted_y
    print "Actual y:", y
    accuracy = Utils.calculate_accuracy(np.array(y), np.array(predicted_y))
    print "accuracy for test data :", accuracy
    f_score_mean, f_score_std = Utils.calculate_average_F1score(np.array(y), np.array(predicted_y), min_class_label, max_class_label)
    print "Average f score for test data :", f_score_mean
    
    error_rate = Utils.calculate_error_rate(np.array(y), np.array(predicted_y))
    #ch = stdin.read(1)
    return (accuracy, f_score_mean, f_score_std, error_rate)
Exemple #2
0
 def Estep(x, w, a, b):
     p = Utils.logistic_transformation(w, x)
     log_p_a =np.log(p) + np.log(a)
     log_p_ab = np.log(p*a + (1-p)*b)
     log_ycap = log_p_a - log_p_ab
     ycap = np.exp(log_ycap)
     return ycap
def logistic_regression(x,y,beta_start=None,verbose=False,CONV_THRESH=1.e-3,
                        MAXIT=500):
    """
 Uses the Newton-Raphson algorithm to calculate maximum
 likliehood estimates of a logistic regression.

 Can handle multivariate case (more than one predictor).

 x - 2-d array of predictors. Number of predictors = x.shape[0]=N
 y - binary outcomes (len(y) = x.shape[1])
 beta_start - initial beta vector (default zeros(N+1,x.dtype)
 if verbose=True, diagnostics printed for each iteration.
 MAXIT - max number of iterations (default 500)
 CONV_THRESH - convergence threshold (sum of absolute differences
  of beta-beta_old)

 returns beta (the logistic regression coefficients, a N+1 element vector),
 J_bar (the (N+1)x(N=1) information matrix), and l (the log-likeliehood).
 J_bar can be used to estimate the covariance matrix and the standard
 error beta.
 l can be used for a chi-squared significance test.

 covmat = inverse(J_bar)     --> covariance matrix
 stderr = sqrt(diag(covmat)) --> standard errors for beta
 deviance = -2l              --> scaled deviance statistic
 chi-squared value for -2l is the model chi-squared test.
    """
    if x.shape[-1] != len(y):
        raise ValueError, "x.shape[-1] and y should be the same length!"
    try:
        N, npreds = x.shape[1], x.shape[0]
    except: # single predictor, use simple logistic regression routine.
        N, npreds = x.shape[-1], 1
        return simple_logistic_regression(x,y,beta_start=beta_start,
               CONV_THRESH=CONV_THRESH,MAXIT=MAXIT,verbose=verbose)
    if beta_start is None:
        beta_start = np.zeros(npreds+1,x.dtype)
    X = np.ones((npreds+1,N), x.dtype)
    X[1:, :] = x
    Xt = np.transpose(X)

    iter = 0; diff = 1.; beta = beta_start  # initial values
    l = np.sum( y * -np.logaddexp(0, -1 * np.dot(beta, X)) + (1-y) * -np.logaddexp(0, 1 * np.dot(beta, X)))

    if verbose:
        print 'Logistic Regression : '
        print 'iteration  beta log-likliehood |log-log_old|'
    try:
        while iter < MAXIT:
            beta_old = beta
            l_old = l
            #ebx = np.exp(np.dot(beta, X))
            p = Utils.logistic_transformation(beta.T, X.T)
            p = p.T
            #p = ebx/(1.+ebx)
            #l = np.sum(y*np.log(p) + (1.-y)*np.log(1.-p)) # log-likeliehood
            #l = np.sum( y * -np.logaddexp(0, -1 * np.dot(beta, X)) + (1-y) * -np.logaddexp(0, 1 * np.dot(beta, X)))
            s = np.dot(X, y-p)                            # scoring function
            J_bar = np.dot(X*np.multiply(p,1.-p),Xt)      # information matrix
            #beta = beta_old + np.dot(np.linalg.inv(J_bar),s) # new value of beta
            beta = beta_old + invertAdotB(J_bar, s)
            #diff = np.sum(np.fabs(beta-beta_old)) # sum of absolute differences
            l = np.sum( y * -np.logaddexp(0, -1 * np.dot(beta, X)) + (1-y) * -np.logaddexp(0, 1 * np.dot(beta, X)))

            diff = np.sum(np.fabs(l - l_old))
            if verbose:
                print iter+1, beta, l, diff
            if diff <= CONV_THRESH and l>l_old: break
            iter = iter + 1
        if iter == MAXIT and diff > CONV_THRESH:
            print 'warning: convergence not achieved with threshold of %s in %s iterations' % (CONV_THRESH,MAXIT)
        return beta #, J_bar, l
    except Exception, e:
        #print "beta", beta
        #print "J_bar", J_bar
        #print "s", s
        #import traceback
        #print traceback.print_exc()
        raise