def generate_gan(sess, model, loss_type="pair", negative_size=3): samples = [] for _index, pair in enumerate(raw): if _index % 100 == 0: print("have sampled %d pairs" % _index) q = pair[2] a = pair[3] neg_alist_index = [i for i in range(len(alist))] neg_alist_index.remove(_index) # remove the positive index sampled_index = np.random.choice(neg_alist_index, size=[FLAGS.pools_size], replace=False) pools = np.array(alist)[sampled_index] canditates = insurance_qa_data_helpers.loadCandidateSamples(q, a, pools, vocab) predicteds = [] for batch in insurance_qa_data_helpers.batch_iter(canditates, batch_size=FLAGS.batch_size): feed_dict = {model.input_x_1: batch[:, 0], model.input_x_2: batch[:, 1], model.input_x_3: batch[:, 2]} predicted = sess.run(model.gan_score, feed_dict) predicteds.extend(predicted) # index=np.argmax(predicteds) # samples.append([encode_sent(vocab,item, FLAGS.max_sequence_length) for item in [q,a,pools[index]]]) exp_rating = np.exp(np.array(predicteds) * FLAGS.sampled_temperature * 1.5) prob = exp_rating / np.sum(exp_rating) neg_samples = np.random.choice(pools, size=negative_size, p=prob, replace=False) for neg in neg_samples: samples.append([encode_sent(vocab, item, FLAGS.max_sequence_length) for item in [q, a, neg]]) return samples
def generate_dns_pair(sess, model): samples = [] for _index, pair in enumerate(raw): if _index % 100 == 0: print("have sampled %d pairs" % _index) q = pair[2] a = pair[3] pools = np.random.choice(alist, size=[FLAGS.pools_size]) canditates = insurance_qa_data_helpers.loadCandidateSamples( q, a, pools, vocab) predicteds = [] for batch in insurance_qa_data_helpers.batch_iter( canditates, batch_size=FLAGS.batch_size): feed_dict = { model.input_x_1: batch[:, 0], model.input_x_2: batch[:, 1], model.input_x_3: batch[:, 2] } predicted = sess.run(model.score13, feed_dict) predicteds.extend(predicted) index = np.argmax(predicteds) samples.append([ encode_sent(vocab, item, FLAGS.max_sequence_length) for item in [q, a, pools[index]] ]) return samples
def generate_dns_pair(sess, model): samples = [] for _index, pair in enumerate(raw): #ループ内は1組のqaについて扱う if _index % 100 == 0: print("have sampled %d pairs" % _index) q = pair[2] a = pair[3] pools = np.random.choice(alist, size=[FLAGS.pools_size]) #1qaペアにつき、プールサイズ(100)分のneg canditates = insurance_qa_data_helpers.loadCandidateSamples( q, a, pools, vocab) predicteds = [] for batch in insurance_qa_data_helpers.batch_iter( canditates, batch_size=FLAGS.batch_size): #batchサイズ(100)分: (q,a,neg1),...,(q,a,negバッチサイズ) #batch[:,0]にはバッチサイズ分のq、batch[:,1]はa、batch[:.2]はneg。 feed_dict = { model.input_x_1: batch[:, 0], model.input_x_2: batch[:, 1], model.input_x_3: batch[:, 2] } predicted = sess.run(model.score13, feed_dict) #score13はcos(q,neg) predicteds.extend(predicted) index = np.argmax(predicteds) samples.append([ encode_sent(vocab, item, FLAGS.max_sequence_length) for item in [q, a, pools[index]] ]) return samples
def generate_uniform_pair(): samples = [] for pair in raw: q = pair[2] a = pair[3] index = random.randint(0, len(alist) - 1) neg = alist[index] samples.append([ encode_sent(vocab, item, FLAGS.max_sequence_length) for item in [q, a, neg] ]) return samples