def testBinary():
    not_binary = [5,6,7]
    binary = [1,0,1]
    df = pd.DataFrame({'not_binary': not_binary, 'binary': binary})
    print df['not_binary']
    nb_result = utils.check_binary(df['not_binary'])
    b_result = utils.check_binary(df['binary'])
    print 'not binary: {} binary: {}'.format(nb_result, b_result)
def dec_or_reg_tree(df_train, df_test, Y):
    binary = utils.check_binary(df_train[Y])
    if binary:
        newtree = treeHW4.TreeOptimal(max_depth=1)
        y = list(df_train[Y])
        nondf_train = utils.pandas_to_data(df_train)
        nondf_test = utils.pandas_to_data(df_test)
        newtree.fit(nondf_train, y)
        predict = newtree.predict(nondf_train)
        error_train = mystats.get_error(predict, y, binary)

        y = utils.pandas_to_data(df_test[Y])
        predict = newtree.predict(nondf_test)
        error_test = mystats.get_error(predict, y)
    else:

        node = mytree.Node(np.ones(len(df_train)))
        hw1.branch_node(node, df_train, 5, Y)
        model = mytree.Tree(node)
        predict = model.predict_obj()
        error_train = mystats.get_error(predict, df_train[Y], binary)

        node.presence = np.ones(len(df_test))
        hw1.test_node(node, df_test, Y)
        test_tree = mytree.Tree(node)
        predict = test_tree.predict_obj()
        error_test = mystats.get_error(predict, df_test[Y], binary)
    return [error_train, error_test]
def linear_gd_error(df, Y):
    binary = utils.check_binary(df[Y])
    model = gd.gradient(df, df[Y], 0.00001, max_iterations=50)
    print model
    predict = gd.predict(df, model, binary)
    print predict
    error = mystats.get_error(predict, df_train[Y], binary)
    return error
def logistic_gd(df_train, df_test, Y):
    """ logistic gradient descent """
    binary = utils.check_binary(df_train[Y])
    model = gd.logistic_gradient(df_train, df_train[Y], 0.1, max_iterations=5)
    print model
    predict = gd.predict(df_train, model, binary, True)
    print predict
    error_train = mystats.get_error(predict, df_train[Y], binary)
    predict = gd.predict(df_test, model, binary, True)
    print predict
    error_test = mystats.get_error(predict, df_test[Y], binary)
    return [error_train, error_test]
def linear_gd(df_train, df_test, Y):
    """ linear gradient descent """
    binary = utils.check_binary(df_train[Y])
    model = gd.gradient(df_train, df_train[Y], 0.00001, max_iterations=50)
    print model
    predict = gd.predict(df_train, model, binary)
    print predict
    error_train = mystats.get_error(predict, df_train[Y], binary)
    predict = gd.predict(df_test, model, binary)
    print predict
    error_test = mystats.get_error(predict, df_test[Y], binary)
    return [error_train, error_test]
def testLogisticGradient():
    """ logistic gradient descent """
    df_test, df_train = utils.split_test_and_train(utils.load_and_normalize_spam_data())
    Y = 'is_spam'
    binary = utils.check_binary(df_train[Y])
    model = gd.logistic_gradient(df_train, df_train[Y], .1, max_iterations=5)
    #print model
    #raw_input()
    predict = gd.predict(df_train, model, binary, True)
    print predict
    error_train = mystats.get_error(predict, df_train[Y], binary)
    #raw_input()
    predict = gd.predict(df_test, model, binary, True)
    print predict
    error_test = mystats.get_error(predict, df_test[Y], binary)
    print 'error train {} error_test {}'.format(error_train, error_test)
    return [error_train, error_test]
def linear_reg_errors(df_train, df_test, Y, ridge=False, sigmoid=False):
    binary = utils.check_binary(df_train[Y])
    error_train = linear_reg(df_train, Y, binary, ridge, sigmoid)
    error_test = linear_reg(df_test, Y, binary, ridge, sigmoid)
    return [error_train, error_test]