Example #1
0
def train(sess, model, batch_size, config, lr, lrv, data, dr=None, drv=None, pixels=None, pt_h=None, verbose=False):
    assert len(data) == len(model)
    num_items = len(data)
    samples = zip(*data)
    random.shuffle(samples)
    start_idx = 0
    n_samples = len(samples)
    model.append(lr)
    if dr is not None:
        model.append(dr)
    if pixels is not None:
        model.append(pt_h)
    while start_idx < len(samples):
        if verbose:
            print '%d' % (start_idx * 100 / n_samples) + '%'
        next_batch_samples = samples[start_idx:start_idx + batch_size]
        real_batch_size = len(next_batch_samples)
        if real_batch_size < batch_size:
            next_batch_samples.extend(samples[:batch_size - real_batch_size])
        holders = []
        for item in range(num_items):
            holders.append([s[item] for s in next_batch_samples])
        holders.append(lrv)
        if dr is not None:
            holders.append(drv)
        if pixels is not None:
            pt_ids = [s[0] for s in next_batch_samples]
            holders.append(toolbox.get_batch_pixels(pt_ids, pixels))
        sess.run(config, feed_dict={m: h for m, h in zip(model, holders)})
        start_idx += batch_size
Example #2
0
def train(sess, placeholders, batch_size, train_step, loss, lr, lrv, data, debug_variable=None, dr=None, drv=None,
          pixels=None, pt_h=None, verbose=False, merged_summary=None, log_writer=None, single_summary=None,
          epoch_index=0):
    """
    训练单个 bucket 的模型
    :param epoch_index:
    :param single_summary:
    :param merged_summary:
    :param log_writer:
    :param debug_variable:
    :param loss:
    :param sess: tf.Session
    :param placeholders: [tf.placeholder],总共5个,表示一个句子的字符本身、偏旁部首、2gram、3gram、对应标签
    :param batch_size:
    :param train_step: 目标 bucket 的 train_step, optimizer.apply_gradients()
    :param lr: 初始学习率
    :param lrv: 衰减后的学习率
    :param data: 当前 bucket 中的所有句子,shape=(5, bucket 中句子数量,句子长度)
    :param dr: drop_out
    :param drv: =drop_out
    :param pixels:
    :param pt_h:
    :param verbose:
    """
    if debug_variable is not None:
        session = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False))
        np.set_printoptions(threshold=10000)
    # len(data)=5,表示一个句子的字符本身、偏旁部首、2gram、3gram、对应标签
    lm_targets = []
    for sentence in data[0]:
        lm_target = np.append(sentence[1:], 0)
        lm_targets.append(lm_target)
    data.append(lm_targets)
    assert len(data) == len(placeholders)
    num_items = len(data)
    samples = zip(*data)
    random.shuffle(samples)
    start_idx = 0
    n_samples = len(samples)
    placeholders.append(lr)
    if dr is not None:
        placeholders.append(dr)
    if pixels is not None:
        placeholders.append(pt_h)
    while start_idx < len(samples):
        if verbose:
            print '%s : %d of %d' % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), start_idx, n_samples)
        next_batch_samples = samples[start_idx:start_idx + batch_size]
        real_batch_size = len(next_batch_samples)
        if real_batch_size < batch_size:
            next_batch_samples.extend(samples[:batch_size - real_batch_size])
        holders = []
        for item in range(num_items):
            holders.append([s[item] for s in next_batch_samples])
        holders.append(lrv)
        if dr is not None:
            holders.append(drv)
        if pixels is not None:
            pt_ids = [s[0] for s in next_batch_samples]
            holders.append(toolbox.get_batch_pixels(pt_ids, pixels))
        feed_dict = {m: h for m, h in zip(placeholders, holders)}
        if debug_variable is not None:
            debug_info = sess.run([train_step, loss] + debug_variable, feed_dict=feed_dict)
            # masks = tf.cast(tf.sign(data[0]), dtype=tf.float32)
            print "data and loss"
            masks = tf.cast(tf.sign(data[0][0]), dtype=tf.float32)
            lengths = tf.reduce_sum(tf.sign(data[0][0]), axis=0)
            print "masks", tf.Tensor.eval(masks, session=session)
            print "lengths", tf.Tensor.eval(lengths, session=session)
            for i in range(0, np.min([len(data[0]), len(debug_info[2][0]), len(debug_info[3][0])])):
                for (y, y_) in zip(debug_info[2][0][i], debug_info[3][0][i]):
                    lm_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=y_)
                    lm_loss_value = tf.Tensor.eval(lm_loss, session=session)
                    if np.isnan(lm_loss_value) or True:
                        print "lm_loss:", lm_loss_value
                        print "y:", np.array2string(y, precision=2, separator=',', suppress_small=True)
                        print "y_:", y_

            print "total loss:", debug_info[1]

        else:
            if single_summary is not None and log_writer is not None:
                loss_value, summary, _ = sess.run([loss, single_summary, train_step], feed_dict=feed_dict)
                log_step = start_idx + len(samples)*epoch_index
                if verbose:
                    print "log step: %d" % log_step
                log_writer.add_summary(summary[0], log_step)
                log_writer.add_summary(summary[1], log_step)
            else:
                _, loss_value = sess.run([train_step, loss], feed_dict=feed_dict)
        start_idx += batch_size
Example #3
0
def predict(sess, model, data, dr=None, transitions=None, crf=True, decode_sess=None, scores=None, decode_holders=None,
            argmax=True, batch_size=100, pixels=None, pt_h=None, ensemble=False, verbose=False):
    en_num = None
    if ensemble:
        en_num = len(sess)
    # 输入向量是4个,字符、偏旁、2gram、3gram
    num_items = len(data)
    input_v = model[:num_items]
    if dr is not None:
        input_v.append(dr)
    if pixels is not None:
        input_v.append(pt_h)
    # 预测向量1个
    predictions = model[num_items:]
    # output = [[]]
    output = [[] for _ in range(len(predictions))]
    samples = zip(*data)
    start_idx = 0
    n_samples = len(samples)
    if crf:
        trans = []
        for i in range(len(predictions)):
            if ensemble:
                en_trans = 0
                for en_sess in sess:
                    en_trans += en_sess.run(transitions[i])
                trans.append(en_trans/en_num)
            else:
                trans.append(sess.run(transitions[i]))
    while start_idx < n_samples:
        if verbose:
            print '%d' % (start_idx*100/n_samples) + '%'
        next_batch_input = samples[start_idx:start_idx + batch_size]
        batch_size = len(next_batch_input)
        holders = []
        for item in range(num_items):
            holders.append([s[item] for s in next_batch_input])
        if dr is not None:
            holders.append(0.0)
        if pixels is not None:
            pt_ids = [s[0] for s in next_batch_input]
            holders.append(toolbox.get_batch_pixels(pt_ids, pixels))
        # length_holder = tf.cast(tf.pack(holders[0]), dtype=tf.int32)
        # length = tf.reduce_sum(tf.sign(length_holder), reduction_indices=1)
        length = np.sum(np.sign(holders[0]), axis=1)
        length = length.astype(int)
        if crf:
            assert transitions is not None and len(transitions) == len(predictions) and len(scores) == len(decode_holders)
            for i in range(len(predictions)):
                if ensemble:
                    en_obs = 0
                    for en_sess in sess:
                        en_obs += en_sess.run(predictions[i], feed_dict={i: h for i, h in zip(input_v, holders)})
                    ob = en_obs/en_num
                else:
                    ob = sess.run(predictions[i], feed_dict={i: h for i, h in zip(input_v, holders)})
                # trans = sess.run(transitions[i])
                pre_values = [ob, trans[i], length, batch_size]
                assert len(pre_values) == len(decode_holders[i])
                max_scores, max_scores_pre = decode_sess.run(scores[i], feed_dict={i: h for i, h in zip(decode_holders[i], pre_values)})
                output[i].extend(toolbox.viterbi(max_scores, max_scores_pre, length, batch_size))
        elif argmax:
            for i in range(len(predictions)):
                pre = sess.run(predictions[i], feed_dict={i: h for i, h in zip(input_v, holders)})
                pre = np.argmax(pre, axis=2)
                pre = pre.tolist()
                pre = toolbox.trim_output(pre, length)
                output[i].extend(pre)
        else:
            for i in range(len(predictions)):
                pre = sess.run(predictions[i], feed_dict={i: h for i, h in zip(input_v, holders)})
                pre = pre.tolist()
                pre = toolbox.trim_output(pre, length)
                output[i].extend(pre)
        start_idx += batch_size
    return output