def testBinary(): not_binary = [5,6,7] binary = [1,0,1] df = pd.DataFrame({'not_binary': not_binary, 'binary': binary}) print df['not_binary'] nb_result = utils.check_binary(df['not_binary']) b_result = utils.check_binary(df['binary']) print 'not binary: {} binary: {}'.format(nb_result, b_result)
def dec_or_reg_tree(df_train, df_test, Y): binary = utils.check_binary(df_train[Y]) if binary: newtree = treeHW4.TreeOptimal(max_depth=1) y = list(df_train[Y]) nondf_train = utils.pandas_to_data(df_train) nondf_test = utils.pandas_to_data(df_test) newtree.fit(nondf_train, y) predict = newtree.predict(nondf_train) error_train = mystats.get_error(predict, y, binary) y = utils.pandas_to_data(df_test[Y]) predict = newtree.predict(nondf_test) error_test = mystats.get_error(predict, y) else: node = mytree.Node(np.ones(len(df_train))) hw1.branch_node(node, df_train, 5, Y) model = mytree.Tree(node) predict = model.predict_obj() error_train = mystats.get_error(predict, df_train[Y], binary) node.presence = np.ones(len(df_test)) hw1.test_node(node, df_test, Y) test_tree = mytree.Tree(node) predict = test_tree.predict_obj() error_test = mystats.get_error(predict, df_test[Y], binary) return [error_train, error_test]
def linear_gd_error(df, Y): binary = utils.check_binary(df[Y]) model = gd.gradient(df, df[Y], 0.00001, max_iterations=50) print model predict = gd.predict(df, model, binary) print predict error = mystats.get_error(predict, df_train[Y], binary) return error
def logistic_gd(df_train, df_test, Y): """ logistic gradient descent """ binary = utils.check_binary(df_train[Y]) model = gd.logistic_gradient(df_train, df_train[Y], 0.1, max_iterations=5) print model predict = gd.predict(df_train, model, binary, True) print predict error_train = mystats.get_error(predict, df_train[Y], binary) predict = gd.predict(df_test, model, binary, True) print predict error_test = mystats.get_error(predict, df_test[Y], binary) return [error_train, error_test]
def linear_gd(df_train, df_test, Y): """ linear gradient descent """ binary = utils.check_binary(df_train[Y]) model = gd.gradient(df_train, df_train[Y], 0.00001, max_iterations=50) print model predict = gd.predict(df_train, model, binary) print predict error_train = mystats.get_error(predict, df_train[Y], binary) predict = gd.predict(df_test, model, binary) print predict error_test = mystats.get_error(predict, df_test[Y], binary) return [error_train, error_test]
def testLogisticGradient(): """ logistic gradient descent """ df_test, df_train = utils.split_test_and_train(utils.load_and_normalize_spam_data()) Y = 'is_spam' binary = utils.check_binary(df_train[Y]) model = gd.logistic_gradient(df_train, df_train[Y], .1, max_iterations=5) #print model #raw_input() predict = gd.predict(df_train, model, binary, True) print predict error_train = mystats.get_error(predict, df_train[Y], binary) #raw_input() predict = gd.predict(df_test, model, binary, True) print predict error_test = mystats.get_error(predict, df_test[Y], binary) print 'error train {} error_test {}'.format(error_train, error_test) return [error_train, error_test]
def linear_reg_errors(df_train, df_test, Y, ridge=False, sigmoid=False): binary = utils.check_binary(df_train[Y]) error_train = linear_reg(df_train, Y, binary, ridge, sigmoid) error_test = linear_reg(df_test, Y, binary, ridge, sigmoid) return [error_train, error_test]