Beispiel #1
0
def test_big_tree():
    # load data
    X_train, y_train, X_test, y_test = data.load_decision_tree_data()

    # set classifier
    dTree = decision_tree.DecisionTree()

    # training
    dTree.train(X_train.tolist(), y_train.tolist())

    # print
    # Utils.print_tree(dTree)

    # testing
    y_est_test = dTree.predict(X_test)
    test_accu = accuracy_score(y_est_test, y_test)
    print('test_accu', test_accu)

    Utils.reduced_error_prunning(dTree, X_test, y_test)

    y_est_test = dTree.predict(X_test)
    test_accu = accuracy_score(y_est_test, y_test)
    print('test_accu', test_accu)

    # print
    Utils.print_tree(dTree)
def pruning_decision_tree_test():
    # load data
    X_train, y_train, X_test, y_test = data.sample_decision_tree_pruning()

    # build the tree
    dTree = decision_tree.DecisionTree()
    dTree.train(X_train, y_train)

    # print
    print('Your decision tree:')
    Utils.print_tree(dTree)
    print('My decision tree:')
    print(
        'branch 0{\n\tdeep: 0\n\tnum of samples for each class: 5 : 9 \n\tsplit by dim 0\n\tbranch 0->0{\n\t\tdeep: 1'
        '\n\t\tnum of samples for each class: 3 : 2 \n\t\tsplit by dim 1\n\t\tbranch 0->0->0{\n\t\t\tdeep: 2\n\t\t\t'
        'num of samples for each class: 3 \n\t\t\tclass:0\n\t\t}\n\t\tbranch 0->0->1{\n\t\t\tdeep: 2\n\t\t\tnum of '
        'samples for each class: 2 \n\t\t\tclass:1\n\t\t}\n\t}\n\tbranch 0->1{\n\t\tdeep: 1\n\t\tnum of samples for '
        'each class: 4 \n\t\tclass:1\n\t}\n\tbranch 0->2{\n\t\tdeep: 1\n\t\tnum of samples for each class: 2 : 3 '
        '\n\t\tsplit by dim 2\n\t\tbranch 0->2->0{\n\t\t\tdeep: 2\n\t\t\tnum of samples for each class: 3 \n\t\t\t'
        'class:1\n\t\t}\n\t\tbranch 0->2->1{\n\t\t\tdeep: 2\n\t\t\tnum of samples for each class: 2 \n\t\t\tclass:0'
        '\n\t\t}\n\t}\n}')

    Utils.reduced_error_prunning(dTree, X_test, y_test)

    print('Your decision tree after pruning:')
    Utils.print_tree(dTree)
    print('My decision tree after pruning:')
    print(
        'branch 0{\n\tdeep: 0\n\tnum of samples for each class: 5 : 9 \n\tsplit by dim 0\n\tbranch 0->0{\n\t\tdeep: '
        '1\n\t\tnum of samples for each class: 3 : 2 \n\t\tsplit by dim 1\n\t\tbranch 0->0->0{\n\t\t\tdeep: 2\n\t\t\t'
        'num of samples for each class: 3 \n\t\t\tclass:0\n\t\t}\n\t\tbranch 0->0->1{\n\t\t\tdeep: 2\n\t\t\tnum of '
        'samples for each class: 2 \n\t\t\tclass:1\n\t\t}\n\t}\n\tbranch 0->1{\n\t\tdeep: 1\n\t\tnum of samples for '
        'each class: 4 \n\t\tclass:1\n\t}\n\tbranch 0->2{\n\t\tdeep: 1\n\t\tnum of samples for each class: 2 : 3 '
        '\n\t\tclass:1\n\t}\n}')
def decision_tree_test():
    features, labels = data.sample_decision_tree_data()

    # build the tree
    dTree = decision_tree.DecisionTree()

    dTree.train(features, labels)

    # print
    print('Your decision tree: ')
    Utils.print_tree(dTree)
    print('My decision tree: ')
    print(
        'branch 0{\n\tdeep: 0\n\tnum of samples for each class: 2 : 2 \n\tsplit by dim 0\n\tbranch 0->0{\n\t\tdeep: '
        '1\n\t\tnum of samples for each class: 1 \n\t\tclass:0\n\t}\n\tbranch 0->1{\n\t\tdeep: 1\n\t\tnum of '
        'samples for each class: 1 : 1 \n\t\tsplit by dim 0\n\t\tbranch 0->1->0{\n\t\t\tdeep: 2\n\t\t\tnum of '
        'samples for each class: 1 \n\t\t\tclass:0\n\t\t}\n\t\tbranch 0->1->1{\n\t\t\tdeep: 2\n\t\t\tnum of '
        'samples for each class: 1 \n\t\t\tclass:1\n\t\t}\n\t}\n\tbranch 0->2{\n\t\tdeep: 1\n\t\tnum of '
        'samples for each class: 1 \n\t\tclass:1\n\t}\n}')

    # data
    X_test, y_test = data.sample_decision_tree_test()

    # testing
    y_est_test = dTree.predict(X_test)
    print('Your estimate test: ', y_est_test)
    print('My estimate test: ', [0, 0, 1])
Beispiel #4
0
def check_game_tree():
    d = 3
    bf = 2
    data_size = 1
    data_sets = GameTree(probability=[0.8, 0.6],
                         d=d,
                         bf=bf,
                         data_size=data_size,
                         tree_name='kocsis')
    utils.print_tree(tree=data_sets.tree[-1], d=d, bf=bf, data_name='value')
Beispiel #5
0
def t2():
    data = np.loadtxt('car.data',delimiter=',')
    x_train = pd.DataFrame(data)
    y_train = x_train[0].tolist()
    x_train = x_train.drop([0],axis=1)
    x_train = np.array(x_train).tolist()
    x_test = x_train[1500:]
    y_test = y_train[1500:]
    x_train = x_train[:1500]
    y_train = y_train[:1500]

    tree = DecisionTree()
    tree.train(x_train,y_train)
    p = tree.predict(x_train)
    U.print_tree(decisionTree=tree)
    U.reduced_error_prunning(decisionTree=tree,X_test=x_test,y_test=y_test)
    print('---------------------------')
    U.print_tree(decisionTree=tree)
Beispiel #6
0
def get_minimax_path(tree, bf, d, draw=False):
    minimax_tree = minimax_algo_nx(tree, bf, d)

    if draw:
        utils.print_tree(tree=minimax_tree, d=d, bf=bf)

    children = list(minimax_tree.successors(0))
    root_val = minimax_tree.nodes[0]['value']

    path = [0 for i in range(d)]
    for i in range(d):
        b_idx = np.argmax([minimax_tree.nodes[children[c]]['value'] for c in range(len(children))])
        #b_idx = [minimax_tree.nodes[children[c]]['value']for c in range(len(children))].index(root_val)
        path[i] = children[b_idx]

        children = list(minimax_tree.successors(children[b_idx]))

    return path
Beispiel #7
0
def main():

    d = 5
    bf = 2
    data_size = 1
    rollout_num = 50

    data_set = GameTree(probability=[0.8, 0.6],
                        d=d,
                        bf=bf,
                        data_size=data_size,
                        tree_name='kocsis')

    results = [[] for i in range(data_size)]
    accuracy = [[] for i in range(data_size)]
    for i in range(data_size):
        ans_path = mini_max.get_minimax_path(tree=data_set.tree[i],
                                             d=d,
                                             bf=bf,
                                             draw=True)
        results[i], accuracy[i] = mcts(tree=data_set.tree[i],
                                       n=rollout_num,
                                       ans_path=ans_path,
                                       algo_name='UCT')

        print("{}%done, ans={},results={}".format((float(i) / data_size) * 100,
                                                  ans_path, results[i]))

    utils.print_tree(tree=data_set.tree[-1], d=d, bf=bf, data_name='ucb')

    #print("ans={},minimax_ans={}".format(ans, minimax_ans))

    #correct_rate = accuracy(ans, minimax_ans)
    #print(accuracy)

    means = np.zeros(rollout_num)
    accuracy = np.array(accuracy)
    for i in range(rollout_num):
        means[i] = np.mean(accuracy[:, i])

    print("result = {}".format(results[-1]))
    print("means = {}".format(means))
Beispiel #8
0
def test_tree():
    features, labels = data.sample_decision_tree_data()
    # build the tree
    dTree = decision_tree.DecisionTree()
    dTree.train(features, labels)
    # print
    Utils.print_tree(dTree)

    # data
    X_test, y_test = data.sample_decision_tree_test()
    # testing
    y_est_test = dTree.predict(X_test)
    test_accu = accuracy_score(y_est_test, y_test)
    print('test_accu', test_accu)

    Utils.reduced_error_prunning(dTree, X_test, y_test)

    y_est_test = dTree.predict(X_test)
    test_accu = accuracy_score(y_est_test, y_test)
    print('test_accu', test_accu)
Beispiel #9
0
    arg_parser.add_argument('-t', '--tree', help='prints the abstract syntax tree', action='store_true')
    arg_parser.add_argument('-o', '--optimize', help='optimizes the emitted code', action='store_true')
    arg_parser.add_argument('-r', '--recompile', help='recompiles the standard library', action='store_true')
    arg_parser.add_argument('src', help='source file')
    arg_parser.add_argument('dest', help='destination file', nargs='?', default=None)
    args = arg_parser.parse_args()
    try:
        with open(args.src) as cmm_file:
            code = cmm_file.read()
    except OSError as e:
        print(e, file=sys.stderr)
        sys.exit(arg_parser.format_usage())
    try:
        stdlib = load_stdlib()
        tokens = Lexer().tokenize(code)
        tree = Parser(tokens).parse(os.path.basename(args.src))
        code = CodeGenerator(tree, stdlib).generate(args.optimize)
        if args.tree:
            print_tree(tree)
            if args.dest is None:
                print()
        if args.dest is not None:
            with open(args.dest, 'w') as out_file:
                out_file.write(code)
        else:
            print(code)
    except CompilerError as e:
        if args.debug:
            raise e
        sys.exit(e)
Beispiel #10
0
training_data = [
    ['Green', 3, 'Apple'],
    ['Yellow', 3, 'Apple'],
    ['Red', 1, 'Grape'],
    ['Red', 1, 'Grape'],
    ['Yellow', 3, 'Lemon'],
]

# Column labels.
# These are used only to print the tree.
header = ["color", "diameter", "label"]

my_tree = build_tree(training_data)


print_tree(my_tree)

# Evaluate
testing_data = [
    ['Green', 3, 'Apple'],
    ['Yellow', 4, 'Apple'],
    ['Red', 2, 'Grape'],
    ['Red', 1, 'Grape'],
    ['Yellow', 3, 'Lemon'],
]

for row in testing_data:
    print ("Actual: %s. Predicted: %s" %
           (row[-1], print_results(my_tree.classify(row))))

    q = queue.Queue()
    root = pre_list[0]
    q.put(root)
    while not q.empty():
        root = q.get()
        left_index = mid_list.index(root.val)
        pre_list = pre_list[1:left_index + 1]
        mid_list = mid_list[left_index:]


'''
根据后序序列,中序序列构造二叉树
递归方法解决
'''
from utils import TreeNode, print_tree


def buildTree(postorder, inorder):
    if not postorder or not inorder:
        return None
    tree_val = postorder[-1]
    root = TreeNode(tree_val)
    left_index = inorder.index(tree_val)
    root.left = buildTree(postorder[:left_index], inorder[:left_index])
    root.right = buildTree(postorder[left_index:-1], inorder[left_index + 1:])
    return root


root = buildTree([4, 5, 2, 6, 7, 3, 1], [4, 2, 5, 1, 6, 3, 7])
print_tree(root)
Beispiel #12
0
        :type head: ListNode
        :rtype: TreeNode
        """
        def sortedArrayToBST(nums):
            if not nums:
                return None
            n = len(nums)
            if n == 1:  # add these 2 line will speed up, of course
                return TreeNode(nums[0])
            i = int(n / 2)
            node = TreeNode(nums[i])
            node.left = sortedArrayToBST(nums[:i])
            node.right = sortedArrayToBST(nums[i + 1:])
            return node
        # to use the ascending order property, transform into index-based array
        _array = []
        while head:
            _array.append(head.val)
            head = head.next
        return sortedArrayToBST(_array)

s = Solution()
for _ in range(5):
    st = random.randint(1, 100)
    gap = random.randint(1, 1000)
    lst = sorted(random.sample(range(st, st + gap), min(random.randint(1, 20), gap)))
    print lst
    head = listToLinkedlist(lst)
    res = s.sortedListToBST(head)
    print_tree(res)
Beispiel #13
0
def main():
    global args
    args = parse_args(type=1)
    print(args.name)
    print(args.model_name)

    args.input_dim = 300

    if args.mem_dim == 0:
        if args.model_name == 'dependency':
            args.mem_dim = 168
        elif args.model_name == 'constituency':
            args.mem_dim = 150
        elif args.model_name == 'lstm':
            args.mem_dim = 168
        elif args.model_name == 'bilstm':
            args.mem_dim = 168

    if args.num_classes == 0:
        if args.fine_grain:
            args.num_classes = 5  # 0 1 2 3 4
        else:
            args.num_classes = 3  # 0 1 2 (1 neutral)
    elif args.num_classes == 2:
        # assert False # this will not work
        assert not args.fine_grain

    args.cuda = args.cuda and torch.cuda.is_available()
    # args.cuda = False
    print(args)
    # torch.manual_seed(args.seed)
    # if args.cuda:
    # torch.cuda.manual_seed(args.seed)

    train_dir = os.path.join(args.data, 'train/')
    dev_dir = os.path.join(args.data, 'dev/')
    test_dir = os.path.join(args.data, 'test/')

    # write unique words from all token files
    token_files = [
        os.path.join(split, 'sents.toks')
        for split in [train_dir, dev_dir, test_dir]
    ]
    vocab_file = os.path.join(args.data, 'vocab-cased.txt')  # use vocab-cased
    # build_vocab(token_files, vocab_file) NO, DO NOT BUILD VOCAB,  USE OLD VOCAB

    # get vocab object from vocab file previously written
    vocab = Vocab(filename=vocab_file)
    print('==> SST vocabulary size : %d ' % vocab.size())

    # Load SST dataset splits

    is_preprocessing_data = False  # let program turn off after preprocess data

    # train
    train_file = os.path.join(args.data, 'sst_train.pth')
    if os.path.isfile(train_file):
        train_dataset = torch.load(train_file)
    else:
        train_dataset = SSTDataset(train_dir, vocab, args.num_classes,
                                   args.fine_grain, args.model_name)
        torch.save(train_dataset, train_file)
        is_preprocessing_data = True

    # dev
    dev_file = os.path.join(args.data, 'sst_dev.pth')
    if os.path.isfile(dev_file):
        dev_dataset = torch.load(dev_file)
    else:
        dev_dataset = SSTDataset(dev_dir, vocab, args.num_classes,
                                 args.fine_grain, args.model_name)
        torch.save(dev_dataset, dev_file)
        is_preprocessing_data = True

    # test
    test_file = os.path.join(args.data, 'sst_test.pth')
    if os.path.isfile(test_file):
        test_dataset = torch.load(test_file)
    else:
        test_dataset = SSTDataset(test_dir, vocab, args.num_classes,
                                  args.fine_grain, args.model_name)
        torch.save(test_dataset, test_file)
        is_preprocessing_data = True

    criterion = nn.NLLLoss()
    # initialize model, criterion/loss_function, optimizer

    model = DMNWraper(args.cuda, args.input_dim, args.mem_dim, criterion,
                      args.train_subtrees, args.num_classes, args.embdrop)

    embedding_model = nn.Embedding(vocab.size(), args.input_dim)

    if args.cuda:
        embedding_model = embedding_model.cuda()

    if args.cuda:
        model.cuda(), criterion.cuda()

    # for words common to dataset vocab and GLOVE, use GLOVE vectors
    # for other words in dataset vocab, use random normal vectors
    if args.embedding == 'glove':
        emb_torch = 'sst_embed.pth'
        emb_vector = 'glove.840B.300d'
        emb_vector_path = os.path.join(args.glove, emb_vector)
        assert os.path.isfile(emb_vector_path + '.txt')
    elif args.embedding == 'paragram':
        emb_torch = 'sst_embed_paragram.pth'
        emb_vector = 'paragram_300_sl999'
        emb_vector_path = os.path.join(args.paragram, emb_vector)
        assert os.path.isfile(emb_vector_path + '.txt')
    elif args.embedding == 'paragram_xxl':
        emb_torch = 'sst_embed_paragram_xxl.pth'
        emb_vector = 'paragram-phrase-XXL'
        emb_vector_path = os.path.join(args.paragram, emb_vector)
        assert os.path.isfile(emb_vector_path + '.txt')
    else:
        assert False

    emb_file = os.path.join(args.data, emb_torch)
    if os.path.isfile(emb_file):
        emb = torch.load(emb_file)
    else:

        # load glove embeddings and vocab
        glove_vocab, glove_emb = load_word_vectors(emb_vector_path)
        print('==> Embedding vocabulary size: %d ' % glove_vocab.size())

        emb = torch.zeros(vocab.size(), glove_emb.size(1))

        for word in vocab.labelToIdx.keys():
            if glove_vocab.getIndex(word):
                emb[vocab.getIndex(word)] = glove_emb[glove_vocab.getIndex(
                    word)]
            else:
                emb[vocab.getIndex(word)] = torch.Tensor(
                    emb[vocab.getIndex(word)].size()).normal_(-0.05, 0.05)
        torch.save(emb, emb_file)
        is_preprocessing_data = True  # flag to quit
        print('done creating emb, quit')

    if is_preprocessing_data:
        print('quit program')
        quit()

    # plug these into embedding matrix inside model
    if args.cuda:
        emb = emb.cuda()
    embedding_model.state_dict()['weight'].copy_(emb)

    if args.optim == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.wd)
    elif args.optim == 'adagrad':
        # optimizer   = optim.Adagrad(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.wd)
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=args.lr,
                                  weight_decay=args.wd)
    elif args.optim == 'adam_combine':
        optimizer = optim.Adam([{
            'params': model.parameters(),
            'lr': args.lr,
            'weight_decay': args.wd
        }, {
            'params': embedding_model.parameters(),
            'lr': args.emblr,
            'weight_decay': args.embwd
        }])
        args.manually_emb = 0
    elif args.optim == 'adagrad_combine':
        optimizer = optim.Adagrad([{
            'params': model.parameters(),
            'lr': args.lr,
            'weight_decay': args.wd
        }, {
            'params': embedding_model.parameters(),
            'lr': args.emblr,
            'weight_decay': args.embwd
        }])
        args.manually_emb = 0
    elif args.optim == 'adam_combine_v2':
        model.embedding_model = embedding_model
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.wd)
        args.manually_emb = 0
    metrics = Metrics(args.num_classes)
    utils.count_param(model)

    trainer = SentimentTrainer(args, model, embedding_model, criterion,
                               optimizer)

    trainer.set_initial_emb(emb)
    question_idx = vocab.labelToIdx['sentiment']
    question_idx = torch.Tensor([question_idx])
    trainer.set_question(question_idx)

    # trainer = SentimentTrainer(args, model, embedding_model ,criterion, optimizer)

    mode = args.mode
    if mode == 'DEBUG':
        for epoch in range(args.epochs):
            # print a tree
            tree, sent, label = dev_dataset[3]
            utils.print_span(tree, sent, vocab)
            quit()

            dev_loss = trainer.train(dev_dataset)
            dev_loss, dev_pred, _ = trainer.test(dev_dataset)
            test_loss, test_pred, _ = trainer.test(test_dataset)

            dev_acc = metrics.sentiment_accuracy_score(dev_pred,
                                                       dev_dataset.labels)
            test_acc = metrics.sentiment_accuracy_score(
                test_pred, test_dataset.labels)
            print('==> Dev loss   : %f \t' % dev_loss, end="")
            print('Epoch ', epoch, 'dev percentage ', dev_acc)
    elif mode == "PRINT_TREE":
        for i in range(0, 10):
            ttree, tsent, tlabel = dev_dataset[i]
            utils.print_tree(ttree, 0)
            print('_______________')
        print('break')
        quit()
    elif mode == 'EVALUATE':
        filename = args.name + '.pth'
        epoch = args.epochs
        model_name = str(epoch) + '_model_' + filename
        embedding_name = str(epoch) + '_embedding_' + filename
        model = torch.load(os.path.join(args.saved, model_name))
        embedding_model = torch.load(os.path.join(args.saved, embedding_name))

        trainer = SentimentTrainer(args, model, embedding_model, criterion,
                                   optimizer)
        trainer.set_question(question_idx)
        test_loss, test_pred, subtree_metrics = trainer.test(test_dataset)
        test_acc = metrics.sentiment_accuracy_score(
            test_pred, test_dataset.labels, num_classes=args.num_classes)
        print('Epoch with max dev:' + str(epoch) + ' |test percentage ' +
              str(test_acc))
        print('____________________' + str(args.name) + '___________________')
        print_list = subtree_metrics.print_list
        torch.save(print_list,
                   os.path.join(args.saved, args.name + 'printlist.pth'))
        utils.print_trees_file(args,
                               vocab,
                               test_dataset,
                               print_list,
                               name='tree')
    elif mode == "EXPERIMENT":
        # dev_loss, dev_pred = trainer.test(dev_dataset)
        # dev_acc = metrics.sentiment_accuracy_score(dev_pred, dev_dataset.labels, num_classes=args.num_classes)
        max_dev = 0
        max_dev_epoch = 0
        filename = args.name + '.pth'
        for epoch in range(args.epochs):
            # train_loss, train_pred, _ = trainer.test(train_dataset)
            train_loss_while_training = trainer.train(train_dataset)
            train_loss, train_pred, _ = trainer.test(train_dataset)
            dev_loss, dev_pred, _ = trainer.test(dev_dataset)
            dev_acc = metrics.sentiment_accuracy_score(
                dev_pred, dev_dataset.labels, num_classes=args.num_classes)
            train_acc = metrics.sentiment_accuracy_score(
                train_pred, train_dataset.labels, num_classes=args.num_classes)
            print('==> Train loss   : %f \t' % train_loss_while_training,
                  end="")
            print('Epoch ', epoch, 'dev percentage ', dev_acc)
            print('Epoch %d dev percentage %f ' % (epoch, dev_acc))
            print('Train acc %f ' % (train_acc))
            if dev_acc > max_dev:
                print('update best dev acc %f ' % (dev_acc))
                max_dev = dev_acc
                max_dev_epoch = epoch
                utils.mkdir_p(args.saved)
                torch.save(
                    model,
                    os.path.join(args.saved,
                                 str(epoch) + '_model_' + filename))
                torch.save(
                    embedding_model,
                    os.path.join(args.saved,
                                 str(epoch) + '_embedding_' + filename))
            gc.collect()
        print('epoch ' + str(max_dev_epoch) + ' dev score of ' + str(max_dev))
        print('eva on test set ')
        model = torch.load(
            os.path.join(args.saved,
                         str(max_dev_epoch) + '_model_' + filename))
        embedding_model = torch.load(
            os.path.join(args.saved,
                         str(max_dev_epoch) + '_embedding_' + filename))
        trainer = SentimentTrainer(args, model, embedding_model, criterion,
                                   optimizer)
        trainer.set_question(question_idx)
        test_loss, test_pred, _ = trainer.test(test_dataset)
        test_acc = metrics.sentiment_accuracy_score(
            test_pred, test_dataset.labels, num_classes=args.num_classes)
        print('Epoch with max dev:' + str(max_dev_epoch) +
              ' |test percentage ' + str(test_acc))
        print('____________________' + str(args.name) + '___________________')
    else:
        for epoch in range(args.epochs):
            train_loss = trainer.train(train_dataset)
            train_loss, train_pred, _ = trainer.test(train_dataset)
            dev_loss, dev_pred, _ = trainer.test(dev_dataset)
            test_loss, test_pred, _ = trainer.test(test_dataset)

            train_acc = metrics.sentiment_accuracy_score(
                train_pred, train_dataset.labels)
            dev_acc = metrics.sentiment_accuracy_score(dev_pred,
                                                       dev_dataset.labels)
            test_acc = metrics.sentiment_accuracy_score(
                test_pred, test_dataset.labels)
            print('==> Train loss   : %f \t' % train_loss, end="")
            print('Epoch ', epoch, 'train percentage ', train_acc)
            print('Epoch ', epoch, 'dev percentage ', dev_acc)
            print('Epoch ', epoch, 'test percentage ', test_acc)
Beispiel #14
0
    def __init__(self, x):
        self.val = x
        self.left = None
        self.right = None


class Solution:
    def sortedArrayToBST(self, nums: List[int]) -> TreeNode:
        if not nums:
            return

        mid = len(nums) // 2

        # mid is element of the root
        root = TreeNode(nums[mid])

        # left subtree of root has all
        # values <arr[mid]
        root.left = self.sortedArrayToBST(nums[:mid])

        # right subtree of root has all
        # values >arr[mid]
        root.right = self.sortedArrayToBST(nums[mid + 1:])

        return root


solution = Solution()
res = solution.sortedArrayToBST([-10, -3, 0, 5, 9])
print_tree(res)
Beispiel #15
0
validation_data_file = sys.argv[2]
test_data_file = sys.argv[3]
prune_factor = float(sys.argv[4])

print('Use training data from %s' % training_data_file)
print('Use validation data from %s' % validation_data_file)
print('Use training data from %s' % test_data_file)
print('Use prune factor: %s' % prune_factor)

print('')
training_data = utils.read_data(training_data_file)
validation_data = utils.read_data(validation_data_file)
test_data = utils.read_data(test_data_file)

root, node_num, leaf_num = id3.train(training_data)
utils.print_tree(root, training_data)

print('')
print('Pre-Pruned Accuracy')
print('- - - - - - - - - - - - -')
train_accuracy = id3.test(root, training_data) * 100
print('Number of training instances = %s' % len(training_data))
print('Number of training attributes = %s' % len(training_data[0].feature_map))
print('Total number of nodes in the tree = %s' % node_num)
print('Number of leaf nodes in the tree = %s' % leaf_num)
print('Accuracy of the model on the training dataset = %.1f%%' %
      train_accuracy)

validation_accuracy = id3.test(root, validation_data) * 100
print('')
print('Number of validation instances = %s' % len(validation_data))
Beispiel #16
0
                    layer = node_iter.right
                    break
                node_iter = node_iter.next
        return


s = Solution()
head = TreeLinkNode(0)
root = TreeLinkNode(1)
head.right = root
root.left = TreeLinkNode(2)
root.right = TreeLinkNode(3)
root.left.left = TreeLinkNode(4)
root.left.right = TreeLinkNode(5)
root.right.right = TreeLinkNode(7)
root.left.left.right = TreeLinkNode(8)
root.right.right.left = TreeLinkNode(9)
root.left.left.right.right = TreeLinkNode(6)
print_tree(head)
s.connect(head)
inspect = head
layer, nxt = [head], []
while layer:
    for node in layer:
        print node.val, node.next.val if node.next else 'None'
        if node.left:
            nxt.append(node.left)
        if node.right:
            nxt.append(node.right)
    nxt, layer = [], nxt
Beispiel #17
0
#best_model, best_k, best_function, best_scaler = model_selection_with_transformation(distance_funcs, scaling_classes, Xtrain, ytrain, Xval, yval)

import data
import hw1_dt as decision_tree
import utils as Utils
from sklearn.metrics import accuracy_score

features, labels = data.sample_decision_tree_data()

# build the tree
dTree = decision_tree.DecisionTree()
dTree.train(features, labels)

# print
Utils.print_tree(dTree)

# data
X_test, y_test = data.sample_decision_tree_test()

# testing
y_est_test = dTree.predict(X_test)

test_accu = accuracy_score(y_est_test, y_test)
print('test_accu', test_accu)
"""

"""
#load data
X_train, y_train, X_test, y_test = data.load_decision_tree_data()
Beispiel #18
0
def main(write_to):

    startTime = time.time()

    global args
    args = parse_args(type=1)
    args.input_dim = 300
    if args.model_name == 'dependency':
        args.mem_dim = 168
    elif args.model_name == 'constituency':
        args.mem_dim = 150
    if args.fine_grain:
        args.num_classes = 5  # 0 1 2 3 4
    else:
        args.num_classes = 3  # 0 1 2 (1 neutral)
    args.cuda = args.cuda and torch.cuda.is_available()
    # args.cuda = False
    print(args)
    # torch.manual_seed(args.seed)
    # if args.cuda:
    # torch.cuda.manual_seed(args.seed)

    #    train_dir = os.path.join(args.data,'train/')
    train_dir = os.path.join(
        args.data, 'dev/')  # Fei: wants to train on a smaller data set
    #    dev_dir = os.path.join(args.data,'dev/')
    #    test_dir = os.path.join(args.data,'test/')

    # write unique words from all token files
    token_files = [os.path.join(split, 'sents.toks') for split in [train_dir]]
    vocab_file = os.path.join(args.data, 'vocab-cased.txt')  # use vocab-cased
    # build_vocab(token_files, vocab_file) NO, DO NOT BUILD VOCAB,  USE OLD VOCAB

    #    vocab_file = os.path.join(args.data, 'vocab-cased-dev.txt')
    #    build_vocab(token_files, vocab_file)

    # get vocab object from vocab file previously written
    vocab = Vocab(filename=vocab_file)
    print('==> SST vocabulary size : %d ' % vocab.size())

    # Load SST dataset splits

    is_preprocessing_data = False  # let program turn off after preprocess data

    # train
    train_file = os.path.join(args.data, 'sst_train.pth')
    if os.path.isfile(train_file):
        train_dataset = torch.load(train_file)
    else:
        train_dataset = SSTDataset(train_dir, vocab, args.num_classes,
                                   args.fine_grain, args.model_name)
        torch.save(train_dataset, train_file)
        is_preprocessing_data = True

    # dev


#    dev_file = os.path.join(args.data,'sst_dev.pth')
#    if os.path.isfile(dev_file):
#        dev_dataset = torch.load(dev_file)
#    else:
#        dev_dataset = SSTDataset(dev_dir, vocab, args.num_classes, args.fine_grain, args.model_name)
#        torch.save(dev_dataset, dev_file)
#        is_preprocessing_data = True

# test
#    test_file = os.path.join(args.data,'sst_test.pth')
#    if os.path.isfile(test_file):
#        test_dataset = torch.load(test_file)
#    else:
#        test_dataset = SSTDataset(test_dir, vocab, args.num_classes, args.fine_grain, args.model_name)
#        torch.save(test_dataset, test_file)
#        is_preprocessing_data = True

    criterion = nn.NLLLoss()
    # initialize model, criterion/loss_function, optimizer
    model = TreeLSTMSentiment(args.cuda, vocab.size(), args.input_dim,
                              args.mem_dim, args.num_classes, args.model_name,
                              criterion)

    embedding_model = nn.Embedding(vocab.size(), args.input_dim)
    # Fei: don't optimize embedding
    embedding_model.weight.requires_grad = False

    if args.cuda:
        embedding_model = embedding_model.cuda()

    if args.cuda:
        model.cuda(), criterion.cuda()
    if args.optim == 'adam':
        optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                      model.parameters()),
                               lr=args.lr,
                               weight_decay=args.wd)
    elif args.optim == 'adagrad':
        # optimizer   = optim.Adagrad(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.wd)
        optimizer = optim.Adagrad(
            [{
                'params': filter(lambda p: p.requires_grad,
                                 model.parameters()),
                'lr': args.lr
            }  # Fei: filter non_trainable
             ],
            lr=args.lr,
            weight_decay=args.wd)
    metrics = Metrics(args.num_classes)

    utils.count_param(model)

    # for words common to dataset vocab and GLOVE, use GLOVE vectors
    # for other words in dataset vocab, use random normal vectors
    emb_file = os.path.join(args.data, 'sst_embed.pth')
    if os.path.isfile(emb_file):
        emb = torch.load(emb_file)
    else:

        # load glove embeddings and vocab
        glove_vocab, glove_emb = load_word_vectors(
            os.path.join(args.glove, 'glove.840B.300d'))
        print('==> GLOVE vocabulary size: %d ' % glove_vocab.size())

        emb = torch.zeros(vocab.size(), glove_emb.size(1))

        for word in vocab.labelToIdx.keys():
            if glove_vocab.getIndex(word):
                emb[vocab.getIndex(word)] = glove_emb[glove_vocab.getIndex(
                    word)]
            else:
                emb[vocab.getIndex(word)] = torch.Tensor(
                    emb[vocab.getIndex(word)].size()).normal_(-0.05, 0.05)
        torch.save(emb, emb_file)
        is_preprocessing_data = True  # flag to quit
        print('done creating emb, quit')

    if is_preprocessing_data:
        print('done preprocessing data, quit program to prevent memory leak')
        print('please run again')
        quit()

    # plug these into embedding matrix inside model
    if args.cuda:
        emb = emb.cuda()

    # model.childsumtreelstm.emb.state_dict()['weight'].copy_(emb)
    embedding_model.state_dict()['weight'].copy_(emb)

    # create trainer object for training and testing
    trainer = SentimentTrainer(args, model, embedding_model, criterion,
                               optimizer)

    loopStart = time.time()
    #print('prepare time is %s ' % (loopStart - startTime))
    loss_save = []

    mode = 'EXPERIMENT'
    if mode == 'DEBUG':
        for epoch in range(args.epochs):
            dev_loss = trainer.train(dev_dataset)
            dev_loss, dev_pred = trainer.test(dev_dataset)
            test_loss, test_pred = trainer.test(test_dataset)

            dev_acc = metrics.sentiment_accuracy_score(dev_pred,
                                                       dev_dataset.labels)
            test_acc = metrics.sentiment_accuracy_score(
                test_pred, test_dataset.labels)
            print('==> Dev loss   : %f \t' % dev_loss, end="")
            print('Epoch ', epoch, 'dev percentage ', dev_acc)
    elif mode == "PRINT_TREE":
        for i in range(0, 10):
            ttree, tsent, tlabel = dev_dataset[i]
            utils.print_tree(ttree, 0)
            print('_______________')
        print('break')
        quit()
    elif mode == "EXPERIMENT":
        max_dev = 0
        max_dev_epoch = 0
        filename = args.name + '.pth'
        for epoch in range(args.epochs):
            train_loss = trainer.train(train_dataset)
            #dev_loss, dev_pred = trainer.test(dev_dataset)
            #dev_acc = metrics.sentiment_accuracy_score(dev_pred, dev_dataset.labels)
            print('==> Train loss   : %f \t' % train_loss, end="")
            loss_save.append(train_loss)
            #print('Epoch ', epoch, 'dev percentage ', dev_acc)
            #torch.save(model, args.saved + str(epoch) + '_model_' + filename)
            #torch.save(embedding_model, args.saved + str(epoch) + '_embedding_' + filename)
            #if dev_acc > max_dev:
            #    max_dev = dev_acc
            #    max_dev_epoch = epoch
            #gc.collect()

        print("done")
        #print('epoch ' + str(max_dev_epoch) + ' dev score of ' + str(max_dev))
        #print('eva on test set ')
        #model = torch.load(args.saved + str(max_dev_epoch) + '_model_' + filename)
        #embedding_model = torch.load(args.saved + str(max_dev_epoch) + '_embedding_' + filename)
        #trainer = SentimentTrainer(args, model, embedding_model, criterion, optimizer)
        #test_loss, test_pred = trainer.test(test_dataset)
        #test_acc = metrics.sentiment_accuracy_score(test_pred, test_dataset.labels)
        #print('Epoch with max dev:' + str(max_dev_epoch) + ' |test percentage ' + str(test_acc))
        #print('____________________' + str(args.name) + '___________________')
    else:
        for epoch in range(args.epochs):
            train_loss = trainer.train(train_dataset)
            train_loss, train_pred = trainer.test(train_dataset)
            dev_loss, dev_pred = trainer.test(dev_dataset)
            test_loss, test_pred = trainer.test(test_dataset)

            train_acc = metrics.sentiment_accuracy_score(
                train_pred, train_dataset.labels)
            dev_acc = metrics.sentiment_accuracy_score(dev_pred,
                                                       dev_dataset.labels)
            test_acc = metrics.sentiment_accuracy_score(
                test_pred, test_dataset.labels)
            print('==> Train loss   : %f \t' % train_loss, end="")
            print('Epoch ', epoch, 'train percentage ', train_acc)
            print('Epoch ', epoch, 'dev percentage ', dev_acc)
            print('Epoch ', epoch, 'test percentage ', test_acc)

    loopEnd = time.time()
    print('looptime is %s ' % (loopEnd - loopStart))

    prepareTime = loopStart - startTime
    loopTime = loopEnd - loopStart
    timePerEpoch = loopTime / args.epochs

    with open(write_to, "w") as f:
        f.write("unit: " + "1 epoch\n")
        for loss in loss_save:
            f.write(str(loss) + "\n")
        f.write("run time: " + str(prepareTime) + " " + str(timePerEpoch) +
                "\n")
Beispiel #19
0
        # by layer, this is not constant extra space, but suit imperfect tree
        # if not root:
        #     return
        # layer, nextlayer = [root], []
        # while layer:
        #     n = len(layer)
        #     for i in xrange(n - 1):
        #         layer[i].next = layer[i + 1]
        #         if layer[i].left:
        #             nextlayer.append(layer[i].left)
        #         if layer[i].right:
        #             nextlayer.append(layer[i].right)
        #     if layer[-1].left:
        #         nextlayer.append(layer[-1].left)
        #     if layer[-1].right:
        #         nextlayer.append(layer[-1].right)
        #     layer, nextlayer = nextlayer, []

s = Solution()
root = TreeLinkNode(1)
root.left = TreeLinkNode(2)
root.right = TreeLinkNode(3)
root.left.left = TreeLinkNode(4)
root.left.right = TreeLinkNode(7)
root.right.left = TreeLinkNode(5)
root.right.right = TreeLinkNode(6)
print_tree(root)
s.connect(root)
print(root.left.right.next.val)
Beispiel #20
0
# S_information_gain = utils.information_gain(S, 'Dedicacion', 'Salva', False)
# print('Information gain del atributo Dedicación: ', S_information_gain)
# S_information_gain = utils.information_gain(S, 'Humor Docente', 'Salva', False)
# print('Information gain del atributo Humor Docente: ', S_information_gain)
# S_information_gain = utils.information_gain(S, 'Horario', 'Salva', False)
# print('Information gain del atributo Horario: ', S_information_gain)
# S_information_gain = utils.information_gain(S, 'Dificultad', 'Salva', False)
# print('Information gain del atributo Dificultad: ', S_information_gain)
# S_information_gain = utils.information_gain(S, 'Humedad', 'Salva', False)
# print('Information gain del atributo Humedad: ', S_information_gain)

tree = utils.ID3_algorithm(
    S, ['Dedicacion', 'Dificultad', 'Horario', 'Humedad', 'Humor Docente'],
    'Salva', True, False)

utils.print_tree(tree, tree['data'], None, True, '')

print()
print()
print('Aplicacion de ID3 a un segundo conjunto de entrenamiento')
print()

# Algoritmo aplicado al segundo conjunto de prueba
tree2 = utils.ID3_algorithm(
    S2, ['Dedicacion', 'Dificultad', 'Horario', 'Humedad', 'Humor Docente'],
    'Salva', True, False)

utils.print_tree(tree2, tree['data'], None, True, '')

#############################################
# Ejercicio con el data set del laboratorio #
Beispiel #21
0
def main():
    if (len(sys.argv) != 5):
        sys.exit("invalid command-line arguments format")

    # handling command-line arguments
    data = DataReader2(sys.argv[1])
    data.init_examples()
    training_set_size = int(sys.argv[2])
    num_trials = int(sys.argv[3])
    verbose = int(sys.argv[4])
    if (verbose != 1 and verbose != 0):
        sys.exit("invalid command-line argument")
    if (num_trials < 1):
        sys.exit("invalid command-line argument")

    # extract examples and attributes
    # an example = a feature vector + a label (represented by a tuple)
    examples = data.get_examples()  # a list of examples
    attributes = data.get_attributes()  # a list of attribute names
    if (training_set_size >= len(examples)):
        sys.exit("invalid command-line argument")

    # lists of classification performances (correct rates)
    # e.x.: [1.0, 0.95, 0.83, ...]
    correct_rates_id3 = []
    correct_rates_prior = []

    for i in range(0, num_trials):  # a single trial
        print 'TRIAL NUMBER:', i + 1
        print '-' * 30

        # randomly pick a training set of size *training_set_size*
        random.shuffle(examples)
        training_examples = examples[0:training_set_size]
        testing_examples = examples[training_set_size:]

        # a list of actual labels of testing examples
        actuals = utils.extract_labels(testing_examples)

        # build a decision tree based on these training examples
        tree = id3.DTL(training_examples, range(0, len(attributes)), True)
        # print the structure of the decision tree built from the training set
        print 'DECISION TREE STRUCTURE'
        utils.print_tree(tree, attributes)

        # list of predicted labels using id3
        output_id3_1 = utils.trial_id3(tree, testing_examples)
        # list of predicted labels using prior probability
        output_prior_1 = utils.trial_priorprob(training_examples,
                                               testing_examples)

        # computes and prints correct rate of this trial
        correct_rate_id3 = utils.correct_rate(output_id3_1, actuals)
        correct_rates_id3.append(correct_rate_id3)
        correct_rate_prior = utils.correct_rate(output_prior_1, actuals)
        correct_rates_prior.append(correct_rate_prior)
        print '\n'
        print 'proportion of correct classification'
        print 'decision tree:', correct_rate_id3
        print 'prior probability:', correct_rate_prior
        print '\n'

        if (verbose == 1):
            output_id3_2 = list(testing_examples)
            output_prior_2 = list(testing_examples)
            for j in range(0, len(output_id3_2)):
                output_id3_2[j][-1] = output_id3_1[j]
                output_prior_2[j][-1] = output_prior_1[j]

            print '*' * 10, 'examples in the training set: ', '*' * 10
            utils.print_dataset(training_examples, attributes)

            print '*' * 10, 'examples in the testing set: ', '*' * 10
            utils.print_dataset(testing_examples, attributes)

            print '*' * 10, 'classification by the decision tree: ', '*' * 10
            utils.print_dataset(output_id3_2, attributes)

            print '*' * 10, 'classification by prior probability: ', '*' * 10
            utils.print_dataset(output_prior_2, attributes)

    # other outputs
    print '*' * 5, 'information', '*' * 5
    print 'file:' + sys.argv[1]
    print 'training set size:', sys.argv[2]
    print 'testing set size:', len(examples) - int(sys.argv[2])
    print 'number of trials:', num_trials
    mean_tree = utils.mean(correct_rates_id3)
    mean_prior = utils.mean(correct_rates_prior)
    print 'mean classification performance (decision tree):', mean_tree
    print 'mean classification performance (prior probability):', mean_prior
Beispiel #22
0
class Solution(object):
    def generateTrees(self, n):
        """
        :type n: int
        :rtype: List[TreeNode]
        Given an integer n, generate all structurally unique BST's (binary search trees) that store values 1...n.
        """

        def g(st, ed):  # really shocking to write recursive code accepted without debug
            if st == ed:
                return [None]
            res = []
            for i in xrange(st, ed):
                for l in g(st, i):
                    for r in g(i + 1, ed):
                        root = TreeNode(i)
                        root.left = l
                        root.right = r
                        res.append(root)
            return res

        if n < 1:
            return []
        return g(1, n + 1)


s = Solution()
for tn in s.generateTrees(3):
    print_tree(tn)
Beispiel #23
0
from bplustree import Bplustree
from utils import print_tree

tree = Bplustree(4)

# print(tree.root.keys)
# tree.insert(5)
tree.insert(7)
print("\n")

print_tree(tree.root, '   ', 0)

tree.insert(8)
print("\n")

print_tree(tree.root, '   ', 0)

tree.insert(9)
print("\n")

print_tree(tree.root, '   ', 0)

tree.insert(10)
print("\n")

print_tree(tree.root, '   ', 0)

tree.insert(13)
print("\n")

print_tree(tree.root, '   ', 0)