Ejemplo n.º 1
0
def predict(sess,
            model,
            data,
            dr=None,
            transitions=None,
            crf=True,
            decode_sess=None,
            scores=None,
            decode_holders=None,
            argmax=True,
            batch_size=100,
            pixels=None,
            pt_h=None,
            ensemble=False,
            verbose=False):
    en_num = None
    if ensemble:
        en_num = len(sess)
    num_items = len(data)
    input_v = model[:num_items]
    if dr is not None:
        input_v.append(dr)
    if pixels is not None:
        input_v.append(pt_h)
    predictions = model[num_items:]
    output = [[] for _ in range(len(predictions))]
    samples = zip(*data)
    start_idx = 0
    n_samples = len(samples)
    if crf:
        trans = []
        for i in range(len(predictions)):
            if ensemble:
                en_trans = 0
                for en_sess in sess:
                    en_trans += en_sess.run(transitions[i])
                trans.append(en_trans / en_num)
            else:
                trans.append(sess.run(transitions[i]))
    while start_idx < n_samples:
        if verbose:
            print '%d' % (start_idx * 100 / n_samples) + '%'
        next_batch_input = samples[start_idx:start_idx + batch_size]
        batch_size = len(next_batch_input)
        holders = []
        for item in range(num_items):
            holders.append([s[item] for s in next_batch_input])
        if dr is not None:
            holders.append(0.0)
        if pixels is not None:
            pt_ids = [s[0] for s in next_batch_input]
            holders.append(toolbox.get_batch_pixels(pt_ids, pixels))
        length = np.sum(np.sign(holders[0]), axis=1)
        length = length.astype(int)
        if crf:
            assert transitions is not None and len(transitions) == len(
                predictions) and len(scores) == len(decode_holders)
            for i in range(len(predictions)):
                if ensemble:
                    en_obs = 0
                    for en_sess in sess:
                        en_obs += en_sess.run(
                            predictions[i],
                            feed_dict={i: h
                                       for i, h in zip(input_v, holders)})
                    ob = en_obs / en_num
                else:
                    ob = sess.run(
                        predictions[i],
                        feed_dict={i: h
                                   for i, h in zip(input_v, holders)})
                pre_values = [ob, trans[i], length, batch_size]
                assert len(pre_values) == len(decode_holders[i])
                max_scores, max_scores_pre = decode_sess.run(
                    scores[i],
                    feed_dict={
                        i: h
                        for i, h in zip(decode_holders[i], pre_values)
                    })
                output[i].extend(
                    toolbox.viterbi(max_scores, max_scores_pre, length,
                                    batch_size))
        elif argmax:
            for i in range(len(predictions)):
                pre = sess.run(
                    predictions[i],
                    feed_dict={i: h
                               for i, h in zip(input_v, holders)})
                pre = np.argmax(pre, axis=2)
                pre = pre.tolist()
                pre = toolbox.trim_output(pre, length)
                output[i].extend(pre)
        else:
            for i in range(len(predictions)):
                pre = sess.run(
                    predictions[i],
                    feed_dict={i: h
                               for i, h in zip(input_v, holders)})
                pre = pre.tolist()
                pre = toolbox.trim_output(pre, length)
                output[i].extend(pre)
        start_idx += batch_size
    return output
Ejemplo n.º 2
0
def predict(sess,
            placeholders,
            data,
            dr=None,
            transitions=None,
            crf=True,
            decode_sess=None,
            scores=None,
            decode_holders=None,
            argmax=True,
            batch_size=100,
            ensemble=False,
            verbose=False):
    en_num = None
    if ensemble:
        en_num = len(sess)
    # 输入向量是4个,字符、偏旁、2gram、3gram
    num_items = len(data)
    input_v = placeholders[:num_items]
    if dr is not None:
        input_v.append(dr)
    # 预测向量1个
    predictions = placeholders[num_items:]
    # output = [[]]
    output = [[] for _ in range(len(predictions))]
    samples = zip(*data)
    start_idx = 0
    n_samples = len(samples)
    if crf:
        trans = []
        for i in range(len(predictions)):
            if ensemble:
                en_trans = 0
                for en_sess in sess:
                    en_trans += en_sess.run(transitions[i])
                trans.append(en_trans / en_num)
            else:
                trans.append(sess.run(transitions[i]))
    while start_idx < n_samples:
        if verbose:
            print '%d' % (start_idx * 100 / n_samples) + '%'
        next_batch_input = samples[start_idx:start_idx + batch_size]
        batch_size = len(next_batch_input)
        holders = []
        for item in range(num_items):
            holders.append([s[item] for s in next_batch_input])
        if dr is not None:
            holders.append(0.0)
        # length_holder = tf.cast(tf.pack(holders[0]), dtype=tf.int32)
        # length = tf.reduce_sum(tf.sign(length_holder), reduction_indices=1)
        length = np.sum(np.sign(holders[0]), axis=1)
        length = length.astype(int)
        if crf:
            assert transitions is not None and len(transitions) == len(
                predictions) and len(scores) == len(decode_holders)
            for i in range(len(predictions)):
                if ensemble:
                    en_obs = 0
                    for en_sess in sess:
                        en_obs += en_sess.run(
                            predictions[i],
                            feed_dict={i: h
                                       for i, h in zip(input_v, holders)})
                    ob = en_obs / en_num
                else:
                    ob = sess.run(
                        predictions[i],
                        feed_dict={i: h
                                   for i, h in zip(input_v, holders)})
                # trans = sess.run(transitions[i])
                pre_values = [ob, trans[i], length, batch_size]
                assert len(pre_values) == len(decode_holders[i])
                max_scores, max_scores_pre = decode_sess.run(
                    scores[i],
                    feed_dict={
                        i: h
                        for i, h in zip(decode_holders[i], pre_values)
                    })
                output[i].extend(
                    toolbox.viterbi(max_scores, max_scores_pre, length,
                                    batch_size))
        elif argmax:
            for i in range(len(predictions)):
                pre = sess.run(
                    predictions[i],
                    feed_dict={i: h
                               for i, h in zip(input_v, holders)})
                pre = np.argmax(pre, axis=2)
                pre = pre.tolist()
                pre = toolbox.trim_output(pre, length)
                output[i].extend(pre)
        else:
            for i in range(len(predictions)):
                pre = sess.run(
                    predictions[i],
                    feed_dict={i: h
                               for i, h in zip(input_v, holders)})
                pre = pre.tolist()
                pre = toolbox.trim_output(pre, length)
                output[i].extend(pre)
        start_idx += batch_size
    return output
Ejemplo n.º 3
0
def predict(sess,
            model,
            data,
            dr=None,
            transitions=None,
            crf=True,
            decode_sess=None,
            scores=None,
            decode_holders=None,
            argmax=True,
            batch_size=100,
            ensemble=False,
            verbose=False):
    en_num = None
    if ensemble:
        en_num = len(sess)
    num_items = len(data)
    input_v = model[:num_items]
    if dr is not None:
        input_v.append(dr)
    predictions = model[num_items:]
    output = [[] for _ in range(len(predictions))]
    samples = list(zip(*data))
    start_idx = 0
    n_samples = len(samples)
    if crf > 0:
        trans = []
        for i in range(len(predictions)):
            if ensemble:
                en_trans = 0
                for en_sess in sess:
                    en_trans += en_sess.run(transitions[i])
                trans.append(en_trans / en_num)
            else:
                trans.append(sess.run(transitions[i]))
    while start_idx < n_samples:
        if verbose:
            print('%d' % (start_idx * 100 / n_samples) + '%')
        next_batch_input = samples[start_idx:start_idx + int(batch_size)]
        batch_size = len(next_batch_input)
        holders = []
        for item in range(num_items):
            holders.append([s[item] for s in next_batch_input])
        if dr is not None:
            holders.append(0.0)
        length = np.sum(np.sign(holders[0]), axis=1)
        if crf > 0:
            assert transitions is not None and len(transitions) == len(
                predictions) and len(scores) == len(decode_holders)
            for i in range(len(predictions)):
                if ensemble:
                    en_obs = 0
                    for en_sess in sess:
                        en_obs += en_sess.run(
                            predictions[i],
                            feed_dict={i: h
                                       for i, h in zip(input_v, holders)})
                    ob = en_obs / en_num
                else:
                    ob = sess.run(
                        predictions[i],
                        feed_dict={i: h
                                   for i, h in zip(input_v, holders)})
                pre_values = [ob, trans[i], length, batch_size]
                assert len(pre_values) == len(decode_holders[i])
                max_scores, max_scores_pre = decode_sess.run(
                    scores[i],
                    feed_dict={
                        i: h
                        for i, h in zip(decode_holders[i], pre_values)
                    })
                output[i].extend(
                    toolbox.viterbi(max_scores, max_scores_pre, length,
                                    batch_size))
        elif argmax:
            for i in range(len(predictions)):
                pre = sess.run(
                    predictions[i],
                    feed_dict={i: h
                               for i, h in zip(input_v, holders)})
                dim_axis = len(list(pre.shape)) - 1
                if argmax is True:
                    pre = np.argmax(pre, axis=dim_axis)
                else:
                    pre = softmax(pre)
                    pre[:, :, 0][pre[:, :, 0] > argmax] = 1
                    pre[:, :, 0][pre[:, :, 0] <= argmax] = 0
                    pre = np.argmax(pre, axis=dim_axis)
                pre = pre.tolist()
                if dim_axis > 1:
                    pre = toolbox.trim_output(pre, length)
                output[i].extend(pre)
        else:
            for i in range(len(predictions)):
                pre = sess.run(
                    predictions[i],
                    feed_dict={i: h
                               for i, h in zip(input_v, holders)})
                #pre = softmax(pre)
                dim_axis = len(list(pre.shape)) - 1
                if dim_axis > 1:
                    pre = toolbox.trim_output(pre, length)
                output[i].extend(pre)
        start_idx += batch_size
    return output