def predict(sess, model, data, dr=None, transitions=None, crf=True, decode_sess=None, scores=None, decode_holders=None, argmax=True, batch_size=100, pixels=None, pt_h=None, ensemble=False, verbose=False): en_num = None if ensemble: en_num = len(sess) num_items = len(data) input_v = model[:num_items] if dr is not None: input_v.append(dr) if pixels is not None: input_v.append(pt_h) predictions = model[num_items:] output = [[] for _ in range(len(predictions))] samples = zip(*data) start_idx = 0 n_samples = len(samples) if crf: trans = [] for i in range(len(predictions)): if ensemble: en_trans = 0 for en_sess in sess: en_trans += en_sess.run(transitions[i]) trans.append(en_trans / en_num) else: trans.append(sess.run(transitions[i])) while start_idx < n_samples: if verbose: print '%d' % (start_idx * 100 / n_samples) + '%' next_batch_input = samples[start_idx:start_idx + batch_size] batch_size = len(next_batch_input) holders = [] for item in range(num_items): holders.append([s[item] for s in next_batch_input]) if dr is not None: holders.append(0.0) if pixels is not None: pt_ids = [s[0] for s in next_batch_input] holders.append(toolbox.get_batch_pixels(pt_ids, pixels)) length = np.sum(np.sign(holders[0]), axis=1) length = length.astype(int) if crf: assert transitions is not None and len(transitions) == len( predictions) and len(scores) == len(decode_holders) for i in range(len(predictions)): if ensemble: en_obs = 0 for en_sess in sess: en_obs += en_sess.run( predictions[i], feed_dict={i: h for i, h in zip(input_v, holders)}) ob = en_obs / en_num else: ob = sess.run( predictions[i], feed_dict={i: h for i, h in zip(input_v, holders)}) pre_values = [ob, trans[i], length, batch_size] assert len(pre_values) == len(decode_holders[i]) max_scores, max_scores_pre = decode_sess.run( scores[i], feed_dict={ i: h for i, h in zip(decode_holders[i], pre_values) }) output[i].extend( toolbox.viterbi(max_scores, max_scores_pre, length, batch_size)) elif argmax: for i in range(len(predictions)): pre = sess.run( predictions[i], feed_dict={i: h for i, h in zip(input_v, holders)}) pre = np.argmax(pre, axis=2) pre = pre.tolist() pre = toolbox.trim_output(pre, length) output[i].extend(pre) else: for i in range(len(predictions)): pre = sess.run( predictions[i], feed_dict={i: h for i, h in zip(input_v, holders)}) pre = pre.tolist() pre = toolbox.trim_output(pre, length) output[i].extend(pre) start_idx += batch_size return output
def predict(sess, placeholders, data, dr=None, transitions=None, crf=True, decode_sess=None, scores=None, decode_holders=None, argmax=True, batch_size=100, ensemble=False, verbose=False): en_num = None if ensemble: en_num = len(sess) # 输入向量是4个,字符、偏旁、2gram、3gram num_items = len(data) input_v = placeholders[:num_items] if dr is not None: input_v.append(dr) # 预测向量1个 predictions = placeholders[num_items:] # output = [[]] output = [[] for _ in range(len(predictions))] samples = zip(*data) start_idx = 0 n_samples = len(samples) if crf: trans = [] for i in range(len(predictions)): if ensemble: en_trans = 0 for en_sess in sess: en_trans += en_sess.run(transitions[i]) trans.append(en_trans / en_num) else: trans.append(sess.run(transitions[i])) while start_idx < n_samples: if verbose: print '%d' % (start_idx * 100 / n_samples) + '%' next_batch_input = samples[start_idx:start_idx + batch_size] batch_size = len(next_batch_input) holders = [] for item in range(num_items): holders.append([s[item] for s in next_batch_input]) if dr is not None: holders.append(0.0) # length_holder = tf.cast(tf.pack(holders[0]), dtype=tf.int32) # length = tf.reduce_sum(tf.sign(length_holder), reduction_indices=1) length = np.sum(np.sign(holders[0]), axis=1) length = length.astype(int) if crf: assert transitions is not None and len(transitions) == len( predictions) and len(scores) == len(decode_holders) for i in range(len(predictions)): if ensemble: en_obs = 0 for en_sess in sess: en_obs += en_sess.run( predictions[i], feed_dict={i: h for i, h in zip(input_v, holders)}) ob = en_obs / en_num else: ob = sess.run( predictions[i], feed_dict={i: h for i, h in zip(input_v, holders)}) # trans = sess.run(transitions[i]) pre_values = [ob, trans[i], length, batch_size] assert len(pre_values) == len(decode_holders[i]) max_scores, max_scores_pre = decode_sess.run( scores[i], feed_dict={ i: h for i, h in zip(decode_holders[i], pre_values) }) output[i].extend( toolbox.viterbi(max_scores, max_scores_pre, length, batch_size)) elif argmax: for i in range(len(predictions)): pre = sess.run( predictions[i], feed_dict={i: h for i, h in zip(input_v, holders)}) pre = np.argmax(pre, axis=2) pre = pre.tolist() pre = toolbox.trim_output(pre, length) output[i].extend(pre) else: for i in range(len(predictions)): pre = sess.run( predictions[i], feed_dict={i: h for i, h in zip(input_v, holders)}) pre = pre.tolist() pre = toolbox.trim_output(pre, length) output[i].extend(pre) start_idx += batch_size return output
def predict(sess, model, data, dr=None, transitions=None, crf=True, decode_sess=None, scores=None, decode_holders=None, argmax=True, batch_size=100, ensemble=False, verbose=False): en_num = None if ensemble: en_num = len(sess) num_items = len(data) input_v = model[:num_items] if dr is not None: input_v.append(dr) predictions = model[num_items:] output = [[] for _ in range(len(predictions))] samples = list(zip(*data)) start_idx = 0 n_samples = len(samples) if crf > 0: trans = [] for i in range(len(predictions)): if ensemble: en_trans = 0 for en_sess in sess: en_trans += en_sess.run(transitions[i]) trans.append(en_trans / en_num) else: trans.append(sess.run(transitions[i])) while start_idx < n_samples: if verbose: print('%d' % (start_idx * 100 / n_samples) + '%') next_batch_input = samples[start_idx:start_idx + int(batch_size)] batch_size = len(next_batch_input) holders = [] for item in range(num_items): holders.append([s[item] for s in next_batch_input]) if dr is not None: holders.append(0.0) length = np.sum(np.sign(holders[0]), axis=1) if crf > 0: assert transitions is not None and len(transitions) == len( predictions) and len(scores) == len(decode_holders) for i in range(len(predictions)): if ensemble: en_obs = 0 for en_sess in sess: en_obs += en_sess.run( predictions[i], feed_dict={i: h for i, h in zip(input_v, holders)}) ob = en_obs / en_num else: ob = sess.run( predictions[i], feed_dict={i: h for i, h in zip(input_v, holders)}) pre_values = [ob, trans[i], length, batch_size] assert len(pre_values) == len(decode_holders[i]) max_scores, max_scores_pre = decode_sess.run( scores[i], feed_dict={ i: h for i, h in zip(decode_holders[i], pre_values) }) output[i].extend( toolbox.viterbi(max_scores, max_scores_pre, length, batch_size)) elif argmax: for i in range(len(predictions)): pre = sess.run( predictions[i], feed_dict={i: h for i, h in zip(input_v, holders)}) dim_axis = len(list(pre.shape)) - 1 if argmax is True: pre = np.argmax(pre, axis=dim_axis) else: pre = softmax(pre) pre[:, :, 0][pre[:, :, 0] > argmax] = 1 pre[:, :, 0][pre[:, :, 0] <= argmax] = 0 pre = np.argmax(pre, axis=dim_axis) pre = pre.tolist() if dim_axis > 1: pre = toolbox.trim_output(pre, length) output[i].extend(pre) else: for i in range(len(predictions)): pre = sess.run( predictions[i], feed_dict={i: h for i, h in zip(input_v, holders)}) #pre = softmax(pre) dim_axis = len(list(pre.shape)) - 1 if dim_axis > 1: pre = toolbox.trim_output(pre, length) output[i].extend(pre) start_idx += batch_size return output