def test(X, y, learned_params): N = np.shape(X)[0] #no of instances X = np.append(np.ones((N,1)), X,1) #appending a column of ones as bias (used in logistic regression weights prediction) F = np.shape(X)[1] #no of features+1 class_prob = [] for w in learned_params.keys(): prob = Utils.logistic_transformation(learned_params[w], X) class_prob.append(prob) max_prob = np.max(class_prob, 0) predicted_y = [] output_label = range(min_class_label, max_class_label+1) for i in xrange(np.size(max_prob)): class_label = np.where(class_prob == max_prob[i])[0] predicted_y.append(output_label[class_label[0]]) print "predicted y :", predicted_y print "Actual y:", y accuracy = Utils.calculate_accuracy(np.array(y), np.array(predicted_y)) print "accuracy for test data :", accuracy f_score_mean, f_score_std = Utils.calculate_average_F1score(np.array(y), np.array(predicted_y), min_class_label, max_class_label) print "Average f score for test data :", f_score_mean error_rate = Utils.calculate_error_rate(np.array(y), np.array(predicted_y)) #ch = stdin.read(1) return (accuracy, f_score_mean, f_score_std, error_rate)
def Estep(x, w, a, b): p = Utils.logistic_transformation(w, x) log_p_a =np.log(p) + np.log(a) log_p_ab = np.log(p*a + (1-p)*b) log_ycap = log_p_a - log_p_ab ycap = np.exp(log_ycap) return ycap
def logistic_regression(x,y,beta_start=None,verbose=False,CONV_THRESH=1.e-3, MAXIT=500): """ Uses the Newton-Raphson algorithm to calculate maximum likliehood estimates of a logistic regression. Can handle multivariate case (more than one predictor). x - 2-d array of predictors. Number of predictors = x.shape[0]=N y - binary outcomes (len(y) = x.shape[1]) beta_start - initial beta vector (default zeros(N+1,x.dtype) if verbose=True, diagnostics printed for each iteration. MAXIT - max number of iterations (default 500) CONV_THRESH - convergence threshold (sum of absolute differences of beta-beta_old) returns beta (the logistic regression coefficients, a N+1 element vector), J_bar (the (N+1)x(N=1) information matrix), and l (the log-likeliehood). J_bar can be used to estimate the covariance matrix and the standard error beta. l can be used for a chi-squared significance test. covmat = inverse(J_bar) --> covariance matrix stderr = sqrt(diag(covmat)) --> standard errors for beta deviance = -2l --> scaled deviance statistic chi-squared value for -2l is the model chi-squared test. """ if x.shape[-1] != len(y): raise ValueError, "x.shape[-1] and y should be the same length!" try: N, npreds = x.shape[1], x.shape[0] except: # single predictor, use simple logistic regression routine. N, npreds = x.shape[-1], 1 return simple_logistic_regression(x,y,beta_start=beta_start, CONV_THRESH=CONV_THRESH,MAXIT=MAXIT,verbose=verbose) if beta_start is None: beta_start = np.zeros(npreds+1,x.dtype) X = np.ones((npreds+1,N), x.dtype) X[1:, :] = x Xt = np.transpose(X) iter = 0; diff = 1.; beta = beta_start # initial values l = np.sum( y * -np.logaddexp(0, -1 * np.dot(beta, X)) + (1-y) * -np.logaddexp(0, 1 * np.dot(beta, X))) if verbose: print 'Logistic Regression : ' print 'iteration beta log-likliehood |log-log_old|' try: while iter < MAXIT: beta_old = beta l_old = l #ebx = np.exp(np.dot(beta, X)) p = Utils.logistic_transformation(beta.T, X.T) p = p.T #p = ebx/(1.+ebx) #l = np.sum(y*np.log(p) + (1.-y)*np.log(1.-p)) # log-likeliehood #l = np.sum( y * -np.logaddexp(0, -1 * np.dot(beta, X)) + (1-y) * -np.logaddexp(0, 1 * np.dot(beta, X))) s = np.dot(X, y-p) # scoring function J_bar = np.dot(X*np.multiply(p,1.-p),Xt) # information matrix #beta = beta_old + np.dot(np.linalg.inv(J_bar),s) # new value of beta beta = beta_old + invertAdotB(J_bar, s) #diff = np.sum(np.fabs(beta-beta_old)) # sum of absolute differences l = np.sum( y * -np.logaddexp(0, -1 * np.dot(beta, X)) + (1-y) * -np.logaddexp(0, 1 * np.dot(beta, X))) diff = np.sum(np.fabs(l - l_old)) if verbose: print iter+1, beta, l, diff if diff <= CONV_THRESH and l>l_old: break iter = iter + 1 if iter == MAXIT and diff > CONV_THRESH: print 'warning: convergence not achieved with threshold of %s in %s iterations' % (CONV_THRESH,MAXIT) return beta #, J_bar, l except Exception, e: #print "beta", beta #print "J_bar", J_bar #print "s", s #import traceback #print traceback.print_exc() raise