def logistic_gradient(X, Y, gd_lambda, descent=True, epsilon_accepted=1e-6, max_iterations=10000000):
    accepted = False
    iterations = 0
    epsilon = 1
    X['b'] = np.ones(len(X))
    m = X.shape[1]  # number of cols
    print 'sh0: {} len(X): {}'.format(m, len(X))
    w_old = np.zeros(m)

    while not accepted:
        w_new = np.zeros(w_old.shape)
        for j in range(len(w_old)): # by col
            delta = 0.0
            for i in range(len(X)):  # by row
                delta += (Y.values[i] - sigmoid(np.dot(w_old, X.values[i]))) * X.values[i, j]
            w_new[j] = w_old[j] + gd_lambda * delta

        if np.any(np.isnan(w_new)):
            raise ValueError('NAN is found on iteration {}'.format(iterations))
        epsilon = sum(np.abs(w_new - w_old))/len(w_new)
        print 'epsilon: {}'.format(epsilon)
        print 'w:'
        print '{} iterations, w: {}'.format(iterations, w_new[:])
        w_old = w_new
        if epsilon < epsilon_accepted:
            accepted = True
        if iterations >= max_iterations:
            accepted = True
        iterations += 1
    return w_new
def predict(df, model, binary=False, logistic=False):
    if 'b' not in df.columns:
        df['b'] = 1
    if binary:
        cutoff = .5
    if binary and logistic:
        predictions = [sigmoid(x) for x in np.dot(df, model)]
    else:
        predictions = np.dot(df, model)
    if binary:
        for p in range(len(predictions)):
            if predictions[p] < cutoff:
                predictions[p] = 0
            else:
                predictions[p] = 1
    return predictions
def testLogGradient2():
    X = np.random.random(size=[10, 2])
    y = utils.sigmoid(X[:, 0]* .5 + 2 * X[:, 1] + 3)
    df = pd.DataFrame(data=X)
    w = gd.logistic_gradient(df, y, .05)
    print w