Example #1
0
    print 'Longest sentence by character is %d. ' % max_step_c
    print 'Longest sentence by word is %d. ' % max_step_w

    print 'Longest word is %d. ' % max_w_len

    # b_train_x: shape=(4, bucket 数量,)
    b_train_x, b_train_y = toolbox.buckets(train_x,
                                           train_y,
                                           size=args.bucket_size)
    b_dev_x, b_dev_y = toolbox.buckets(dev_x, dev_y, size=args.bucket_size)

    # 在句子在末尾填充 0 从而使得每个 bucket 内的句子长度保持一致
    # b_train_x: shape=(4, bucket 数量,每个 bucket 内的句子数量,句子长度)
    # b_train_y: shape=(1, bucket 数量,每个 bucket 内的句子数量,句子长度),1表示每个字对应1个标签
    b_train_x, b_train_y, b_buckets, b_counts = toolbox.pad_bucket(
        b_train_x, b_train_y)
    # b_buckets:每一个 bucket 中句子的长度(已经对齐过,bucket 中所有句子的长度一致)
    # b_counts:每一个 bucket 中句子的个数
    b_dev_x, b_dev_y, b_buckets, _ = toolbox.pad_bucket(b_dev_x,
                                                        b_dev_y,
                                                        bucket_len_c=b_buckets)

    print 'Training set: %d instances; Dev set: %d instances.' % (len(
        train_x[0]), len(dev_x[0]))

    nums_tags = toolbox.get_nums_tags(tag2idx, args.tag_scheme)
    # 用来对session进行参数配置,allow_soft_placement 表示如果你指定的设备不存在,允许TF自动分配设备
    config = tf.ConfigProto(allow_soft_placement=True)
    gpu_config = "/gpu:" + str(args.gpu)
    print 'Initialization....'
    t = time()
Example #2
0
                                        limit=args.sent_limit,
                                        sent_seg=args.sent_seg,
                                        is_space=is_space,
                                        ignore_space=args.ignore_space)
        train_x += train_gram
        dev_x += dev_gram
        nums_grams = []
        for dic in gram2idx:
            nums_grams.append(len(dic.keys()))

    max_len = max(max_len_train, max_len_dev)

    b_train_x, b_train_y = toolbox.buckets(train_x,
                                           train_y,
                                           size=args.bucket_size)
    b_train_x, b_train_y, b_lens, b_count = toolbox.pad_bucket(
        b_train_x, b_train_y, max_len)

    b_dev_x = [toolbox.pad_zeros(dev_x_i, max_len) for dev_x_i in dev_x]

    b_dev_y_gold = [
        line.strip() for line in codecs.open(
            path + '/tag_dev_gold.txt', 'r', encoding='utf-8')
    ]

    nums_tag = len(tag2idx)

    config = tf.ConfigProto(allow_soft_placement=True)
    gpu_config = "/gpu:" + str(args.gpu)

    transducer = None
    transducer_graph = None