コード例 #1
0
ファイル: parse_complete.py プロジェクト: itrowa/arsenal
def parse(line):
    words = line.split()

    def expand(start, end, tag):
        """Yield all trees rooted by tag over words[start:end]."""
        if end-start == 1:
            word = words[start]
            for leaf in lexicon:
                if leaf.tag == tag and leaf.word == word:
                    yield leaf
        if tag in grammar:
            for tags in grammar[tag]:
                for branches in expand_all(start, end, tags):
                    yield Tree(tag, branches)

    def expand_all(start, end, tags):
        """Yield all sequences of branches for tags over words[start:end]."""
        if len(tags) == 1:
            for branch in expand(start, end, tags[0]):
                yield [branch]
        else:
            first, rest = tags[0], tags[1:]
            for middle in range(start+1, end+1-len(rest)):
                for first_branch in expand(start, middle, first):
                    for rest_branches in expand_all(middle, end, rest):
                        yield [first_branch] + rest_branches

    for tree in expand(0, len(words), 'S'):
        print_tree(tree)
コード例 #2
0
ファイル: max_parse.py プロジェクト: itrowa/arsenal
def max_parses(line, n=1):
    words = line.split()

    @memoize
    @max_trees(n)
    def expand(start, end, tag):
        """Yield all trees rooted by tag over words[start:end]."""
        if end - start == 1:
            word = words[start]
            if tag in tags_for_word(word):
                yield Leaf(tag, word)
        if tag in grammar:
            for tags in grammar[tag]:
                for branches in expand_all(start, end, tags):
                    yield Tree(tag, branches)

    def expand_all(start, end, tags):
        """Yield all sequences of branches for tags over words[start:end]."""
        if len(tags) == 1:
            for branch in expand(start, end, tags[0]):
                yield [branch]
        else:
            first, rest = tags[0], tags[1:]
            for middle in range(start + 1, end + 1 - len(rest)):
                for first_branch in expand(start, middle, first):
                    for rest_branches in expand_all(middle, end, rest):
                        yield [first_branch] + rest_branches

    for tree in expand(0, len(words), 'S'):
        print_tree(tree)
コード例 #3
0
ファイル: max_parse.py プロジェクト: itrowa/arsenal
def max_parses(line, n=1):
    words = line.split()

    @memoize
    @max_trees(n)
    def expand(start, end, tag):
        """Yield all trees rooted by tag over words[start:end]."""
        if end-start == 1:
            word = words[start]
            if tag in tags_for_word(word):
                yield Leaf(tag, word)
        if tag in grammar:
            for tags in grammar[tag]:
                for branches in expand_all(start, end, tags):
                    yield Tree(tag, branches)

    def expand_all(start, end, tags):
        """Yield all sequences of branches for tags over words[start:end]."""
        if len(tags) == 1:
            for branch in expand(start, end, tags[0]):
                yield [branch]
        else:
            first, rest = tags[0], tags[1:]
            for middle in range(start+1, end+1-len(rest)):
                for first_branch in expand(start, middle, first):
                    for rest_branches in expand_all(middle, end, rest):
                        yield [first_branch] + rest_branches

    for tree in expand(0, len(words), 'S'):
        print_tree(tree)
コード例 #4
0
def parse(line):
    words = line.split()

    def expand(start, end, tag):
        """Yield all trees rooted by tag over words[start:end]."""
        if end - start == 1:
            word = words[start]
            for leaf in lexicon:
                if leaf.tag == tag and leaf.word == word:
                    yield leaf
        if tag in grammar:
            for tags in grammar[tag]:
                for branches in expand_all(start, end, tags):
                    yield Tree(tag, branches)

    def expand_all(start, end, tags):
        """Yield all sequences of branches for tags over words[start:end]."""
        if len(tags) == 1:
            for branch in expand(start, end, tags[0]):
                yield [branch]
        else:
            first, rest = tags[0], tags[1:]
            for middle in range(start + 1, end + 1 - len(rest)):
                for first_branch in expand(start, middle, first):
                    for rest_branches in expand_all(middle, end, rest):
                        yield [first_branch] + rest_branches

    for tree in expand(0, len(words), 'S'):
        print_tree(tree)
コード例 #5
0
            if show_hidden_file and len(entry.name) > 1 and entry.name.startswith("."):
                continue
            if entry.is_dir():
                dir_attr["__children"].extend(make_dir_tree(entry.path))
            else:
                dir_attr["__children"].append({"__id":entry.name})
        L.append(dir_attr)
    return L

if __name__ == "__main__":
    from print_tree import print_tree
    from sys import argv
    from argparse import ArgumentParser
    from argparse import ArgumentDefaultsHelpFormatter
    ap = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
    ap.add_argument("dir", nargs="*", help="1st item.")
    ap.add_argument("-a", action="store_true", dest="show_hidden_file",
                    help="show hidden files.")
    opt = ap.parse_args()
    if len(opt.dir) == 0:
        top_dir = ["."]
    else:
        top_dir = opt.dir
    try:
        for d in top_dir:
            L = make_dir_tree(d)
            print_tree(L, style=[" ", "|--", "'--", False])
    except NotADirectoryError as e:
        print(f"ERROR: {top_dir} is not a directory.")
        exit(0)
コード例 #6
0
ファイル: find_nodes_with_sum.py プロジェクト: dyno/tree
        if v < _sum:
            lo = lo_iter.next()
        else:
            hi = hi_iter.next()

#======================================================================
if __name__ == "__main__":
    from test import test_root
    from print_tree import print_tree

    root = test_root()
    d = locals().copy()
    l = [ d[attr] for attr in d
                if callable(d[attr]) and "with_sum" in attr]
    l.sort()

    for f in l:
        print "\n------------------------------------------------------"
        print f.__doc__
        print_tree(root)
        for _sum in [-1, 1, 2, 3, 4, 4.5, 5, 6, 7, 8, 10, 11, 12, 13, 14, 20]:
            r = f(root, _sum)
            if r is not None:
                x, y = r
                print "%d+%d=%d" % (x, y, _sum)
            else:
                print "no 2 nodes sum to %s" % repr(_sum)


コード例 #7
0
        if v == _sum:
            return lo.value, hi.value

        if v < _sum:
            lo = lo_iter.next()
        else:
            hi = hi_iter.next()


#======================================================================
if __name__ == "__main__":
    from test import test_root
    from print_tree import print_tree

    root = test_root()
    d = locals().copy()
    l = [d[attr] for attr in d if callable(d[attr]) and "with_sum" in attr]
    l.sort()

    for f in l:
        print "\n------------------------------------------------------"
        print f.__doc__
        print_tree(root)
        for _sum in [-1, 1, 2, 3, 4, 4.5, 5, 6, 7, 8, 10, 11, 12, 13, 14, 20]:
            r = f(root, _sum)
            if r is not None:
                x, y = r
                print "%d+%d=%d" % (x, y, _sum)
            else:
                print "no 2 nodes sum to %s" % repr(_sum)
コード例 #8
0
ファイル: heapsort-v1.0.py プロジェクト: Yinkai0619/skills
        3. 再次重复第1、2步直至剩余一个元素
    :param total:
    :param array:
    :return:
    '''
    while total > 1:
        array[1], array[total] = array[total], array[1]  # 堆顶和最后一个结点互换
        total -= 1
        if total == 2 and array[total] >= array[
                total - 1]:  # 当剩余2个元素,如果最后一个结点比堆顶大,则不再调整
            break
        heap_adjust(total, 1, array)
    return array


if __name__ == '__main__':
    # 构建待排序元素:
    # origin = [x * 10 for x in range(1, 10)]
    # random.shuffle(origin)
    # origin.insert(0, 0)
    origin = [0, 20, 10, 40, 70, 50, 60, 90, 30,
              80]  # 为了能和二叉树编码一致,增加一个无用的占位值0在首位
    print(origin)
    print_tree.print_tree(origin, True)
    print('=' * 50)

    total = len(origin) - 1  # 初始待排序元素个数,即n

    print_tree.print_tree(sort(total, max_heap(total, origin)))
    print(origin)
コード例 #9
0
def xgboost_train(file, num_class, num_rounds, early_stopping_rounds):
    # 记录程序运行时间
    start_time = time.time()

    # 读入数据
    # train = pd.read_csv('DigitRecognizer/train.csv')
    # tests = pd.read_csv('DigitRecognizer/test.csv')
    train = pd.read_csv(file)
    label_this = train.columns.values.tolist()

    # 用sklearn.model_selection进行训练数据集划分,这里训练集和交叉验证集比例为8:2,可以自己根据需要设置
    train_xy, val = train_test_split(train, test_size=0.3, random_state=1)

    y = train_xy.Label
    x = train_xy.drop(['Id', 'Label'], axis=1)
    val_y = val.Label
    val_x = val.drop(['Id', 'Label'], axis=1)

    # xgb矩阵赋值
    xgb_val = xgb.DMatrix(val_x, label=val_y)
    xgb_train = xgb.DMatrix(x, label=y)
    # xgb_test = xgb.DMatrix(tests)
    # 先用原样本试一试
    xgb_test = xgb.DMatrix(train.drop(['Id', 'Label'], axis=1))
    ceate_feature_map(x.columns)

    params = {
        'booster': 'gbtree',
        'objective': 'multi:softmax',  # 多分类问题
        'num_class': num_class,  # 类别数,与multisoftmax并用
        'gamma': 0.1,  # 用于控制是否后剪枝的参数,越大越保守,一般0.1、0.2这样子。
        'max_depth': 12,  # 构建树的深度,越大越容易过拟合
        'lambda:': 2,  # 控制模型复杂度的权重值的L2正则化项参数,参数越大,模型越不容易过拟合。
        'subsample': 0.7,  # 随机采样训练样本
        'colsample_bytree': 0.7,  # 生成树时进行的列采样
        'mid_child_weight': 3,
        # 这个参数默认是 1,是每个叶子里面 h 的和至少是多少,对正负样本不均衡时的 0-1 分类而言
        # 假设 h 在 0.01 附近,min_child_weight 为 1 意味着叶子节点中最少需要包含 100 个样本。
        # 这个参数非常影响结果,控制叶子节点中二阶导的和的最小值,该参数值越小,越容易 overfitting。
        'silent': 0,  # 如同学习率
        'eta': 0.007,
        'seed': 1000,
        'nthread': 0,  # cpu 线程数
        # 'eval_metric': 'auc'
    }

    plst = list(params.items())
    # num_rounds = 5000  # 迭代次数
    watchlist = [(xgb_train, 'train'), (xgb_val, 'val')]

    # 训练模型并保存
    # early_stopping_rounds 当设置的迭代次数较大时,early_stopping_rounds 可在一定的迭代次数内准确率没有提升就停止训练
    save_location = './model/xgb.model'
    model = xgb.train(plst,
                      xgb_train,
                      num_rounds,
                      watchlist,
                      early_stopping_rounds=early_stopping_rounds)
    model.save_model(save_location)  # 用于存储训练出的模型
    print('best best_ntree_limit', model.best_ntree_limit)
    pt.print_tree(model)
    [
        ft_importance, feat_importances_sorted, machine_importance,
        machine_importances_sorted
    ] = get_xgb_feat_importances(model)

    ft_importance.to_csv('./result/feature_imporatance.csv', index=False)
    machine_importance.to_csv('./result/process_machine_imporatance.csv',
                              index=False)
    machine_importances_sorted.to_csv(
        './result/process_machine_importances_sorted.csv', index=False)

    preds = model.predict(xgb_test, ntree_limit=model.best_ntree_limit)
    # np.savetxt('xgb_submission.csv', np.c_[range(1,len(tests)+1), preds], delimiter=',', header='ImageId, Label', comments='', fmt='%d')
    np.savetxt('./model/xgb_submission.csv',
               np.c_[range(1,
                           len(train) + 1), preds],
               delimiter=',',
               header='ImageId, Label',
               comments='',
               fmt='%d')

    # 输出运行时长
    cost_time = time.time() - start_time
    print('xgboost success!', '\n', 'cost time: ', cost_time, '(s)......')

    # # 画重要性图和树状图
    # xgb.plot_importance(model)
    # xgb.plot_tree(model, fmap='xgb.fmap')

    return ft_importance, feat_importances_sorted, machine_importance, machine_importances_sorted
    """
コード例 #10
0
        },
        {
            "ID": "I",
            "PID": "H"
        },
        {
            "ID": "B",
            "PID": "C"
        },
        {
            "ID": "E",
            "PID": "A"
        },
        {
            "ID": "F",
            "PID": None
        },
        {
            "ID": "K",
            "PID": "F"
        },
        {
            "ID": "D",
            "PID": "B"
        },
    ]
    shuffle(L)
    make_from_bottom_up_tree(L, keys=("ID", "PID"))
    #
    print_tree(L, keys=("ID", "__children"))
コード例 #11
0
ファイル: parse.py プロジェクト: chizhangucb/Python_Material
    'VP': [['V', 'NP']],
    'RP': [['R', 'NP', 'V']],
}


def expand(tag):
    """Yield all trees rooted by tag."""
    for leaf in lexicon:
        if tag == leaf.tag:
            yield leaf
    if tag in grammar:
        for tags in grammar[tag]:
            for branches in expand_all(tags):
                yield Tree(tag, branches)


def expand_all(tags):
    """Yield all sequences of branches for a sequence of tags."""
    if len(tags) == 1:
        for branch in expand(tags[0]):
            yield [branch]
    else:
        first, rest = tags[0], tags[1:]
        for first_branch in expand(first):
            for rest_branches in expand_all(rest):
                yield [first_branch] + rest_branches


for tree in expand('S'):
    print_tree(tree)
コード例 #12
0
    else:
        new_node.left = build_bracket_tree(bracket_depth - 1, players)
        new_node.right = build_bracket_tree(bracket_depth - 1, players)

    return new_node


def bracket_builder(tournament_name, tournament_event, top_players):
    num_top_players = len(top_players)
    bracket_size = init_sim_bracket(tournament_name, tournament_event)
    winners_players = top_players[:num_top_players / 2]
    winners_players.reverse()
    losers_players = top_players[num_top_players / 2:]
    losers_players.reverse()

    winners_bracket = build_bracket_tree(log(bracket_size / 2, 2),
                                         winners_players)
    losers_bracket = build_bracket_tree(log(bracket_size / 2, 2),
                                        losers_players)
    return (winners_bracket, losers_bracket)


if __name__ == "__main__":
    top_players = get_top_players("get-on-my-level-2016", "melee-singles")
    winners_bracket, losers_bracket = bracket_builder("get-on-my-level-2016",
                                                      "melee-singles",
                                                      top_players)

    print_tree(winners_bracket)
    print_tree(losers_bracket)