コード例 #1
0
def get_label_scores(params, replay):
  serp_len = params['serp_len']
  batch_size = tf.shape(replay['serp'])[0]
  hidden_state_size = params['hidden_state_size']

  doc_col = mu._shared_doc_embeddings(replay['docs'], params,
                                        '/label/doc_emb',
                                        inference=True,
                                        label_network=True,
                                        reuse_variable_scope=False)

  doc_emb = mu._shared_doc_embeddings(replay['serp'], params,
                                            '/label/doc_emb',
                                            inference=True,
                                            label_network=True,
                                            reuse_variable_scope=True)

  init_scores = tf.zeros([batch_size, 1])
  init_hidden = tf.zeros([batch_size, hidden_state_size])

  serp_emb = tf.transpose(doc_emb, [1, 0, 2])
  gru = ru.get_gru_score_layer(params, '/label/gru',
                               label_network=True,
                               inference=True,
                               reuse_variable_scope=False)

  hidden_states, _ = tf.scan(gru, serp_emb, (init_hidden, init_scores))

  hidden_states = tf.transpose(hidden_states, [1, 0, 2])
  hidden_states = hidden_states[:, :-1, None, :]

  max_col = tf.expand_dims(doc_col, axis=1)
  label_scores = tf.squeeze(gru((hidden_states, None), max_col)[1], axis=3)

  return tf.stop_gradient(label_scores)
コード例 #2
0
def pbow_model(params, examples, labels, epsilon):
  serp_len = params['serp_len']
  doc_emb_size = params['doc_emb'][-1]
  hidden_state_size = params['hidden_state_size']
  docs = mu._get_doc_tensors(examples, params, 'main')
  result = {}

  n_docs = tf.shape(docs)[0]
  result['docs_per_query'] = n_docs

  score_filter = tf.zeros([n_docs, 1], dtype=tf.float32)
  neginf = tf.add(score_filter,np.NINF)
  ind_range = tf.cast(tf.range(n_docs), dtype=tf.int64)

  doc_emb = mu._shared_doc_embeddings(docs, params,
                                      '/main/doc_emb',
                                      inference=True)

  pbow = tf.zeros([1, hidden_state_size])
  doc_i = tf.zeros([n_docs,1])
  doc_pos = tf.concat([doc_i, docs], axis=1)
  doc_emb = mu._shared_doc_embeddings(doc_pos, params,
                                      '/main/doc_emb',
                                      inference=True)
  doc_input = doc_emb
  for i in range(serp_len):
    scores = mu._create_subnetwork(doc_input, params, '/main/score',
                                   reuse_variable_scope=i>0,
                                   inference=True)
    tf.summary.scalar('policy/scores/pos_%d' % i, tf.reduce_mean(scores))
    action, max_ind = mu.select_eps_greedy_action(scores,
                                                  epsilon,
                                                  score_filter)

    select_doc = tf.gather(docs, action)
    select_doc_input = tf.gather(doc_emb, action)
    result['label_%d' % i] = tf.cond(
      tf.less(i, n_docs),
      lambda: tf.gather(labels, action, axis=0),
      lambda: tf.constant([[0]], dtype=tf.int64),
      )
    result['doc_%d' % i] = select_doc

    if i > 0:
      result['max_doc_%d' % i] = tf.gather(docs, max_ind)

    if i < serp_len-1:
      score_filter = tf.where(tf.equal(ind_range, action),
                              neginf,
                              score_filter)

      pbow += select_doc_input
      doc_i += 1
      doc_pos = tf.concat([doc_i, docs], axis=1)
      doc_emb = mu._shared_doc_embeddings(doc_pos, params,
        '/main/doc_emb', reuse_variable_scope=True,
        inference=True)
      doc_input = pbow + doc_emb
  return result
コード例 #3
0
def get_label_scores(params, replay):
    serp_len = params['serp_len']
    all_docs = replay['docs']
    batch_docs = replay['serp']
    batch_pos = replay['pos_order']
    max_n_docs = params['max_docs']
    n_docs = batch_docs.shape[0]
    hidden_state_size = params['hidden_state_size']

    init_hidden = tf.zeros([n_docs, hidden_state_size])

    doc_col = mu._shared_doc_embeddings(all_docs,
                                        params,
                                        '/label/doc_emb',
                                        label_network=True,
                                        inference=True,
                                        reuse_variable_scope=False)

    doc_emb = mu._shared_doc_embeddings(batch_docs[:, :-1, :],
                                        params,
                                        '/label/doc_emb',
                                        label_network=True,
                                        inference=True,
                                        reuse_variable_scope=True)

    serp_emb = tf.transpose(doc_emb, [1, 0, 2])
    gru = ru.get_gru_layer(params,
                           '/label/gru',
                           label_network=True,
                           inference=True,
                           reuse_variable_scope=False)

    pos = tf.cast(batch_pos, tf.float32)[:, :-1, None]
    pos = tf.transpose(pos, [1, 0, 2])
    gru_input = tf.concat([serp_emb, pos], axis=2)
    hidden_states = tf.scan(gru, gru_input, init_hidden)
    hidden_states = tf.transpose(hidden_states, [1, 0, 2])

    tiled_states = tf.tile(hidden_states[:, :, None, :], [1, 1, max_n_docs, 1])
    tiled_docs = tf.tile(doc_col[:, None, :, :], [1, serp_len - 1, 1, 1])

    score_input = tf.concat([tiled_states, tiled_docs], axis=3)
    return mu._create_subnetwork(score_input,
                                 params,
                                 subnetwork_name='/label/scoring',
                                 label_network=True,
                                 inference=True,
                                 reuse_variable_scope=False)
コード例 #4
0
def naive_model(params, examples, labels, epsilon):
  serp_len = params['serp_len']
  doc_emb_size = params['doc_emb'][-1]
  docs = mu._get_doc_tensors(examples, params, 'main')
  result = {}

  n_docs = tf.shape(docs)[0]
  ind_range = tf.cast(tf.range(n_docs), dtype=tf.int64)
  result['docs_per_query'] = n_docs

  score_filter = tf.zeros([n_docs, 1], dtype=tf.float32)
  neginf = tf.add(score_filter,np.NINF)

  doc_emb = mu._shared_doc_embeddings(docs, params,
                                      '/main/doc_emb',
                                      inference=True)

  serp_docs = []
  serp_labels = []
  doc_input = doc_emb
  for i in range(serp_len):
    scores = mu._create_subnetwork(doc_input, params,
                                   '/main/score/pos_%d' % i,
                                   reuse_variable_scope=False,
                                   inference=True)
    tf.summary.scalar('policy/scores/pos_%d' % i, tf.reduce_mean(scores))
    action, max_ind = mu.select_eps_greedy_action(scores,
                                                  epsilon,
                                                  score_filter)

    select_doc = tf.gather(docs, action)
    serp_labels.append(
      tf.cond(
        tf.less(i, n_docs),
        lambda: tf.gather(labels, action, axis=0),
        lambda: tf.constant([[0]], dtype=tf.int64),
      )
    )
    serp_docs.append(select_doc)

    if i > 0:
      result['max_doc_%d' % i] = tf.gather(docs, max_ind)

    if i < serp_len-1:
      score_filter = tf.where(tf.equal(ind_range, action),
                              neginf,
                              score_filter)
      select_doc_input = tf.gather(doc_emb, action)
      tiled = tf.tile(select_doc_input, [n_docs, 1])
      doc_input = tf.concat([tiled, doc_input], axis=1)
  result['serp'] = tf.stack(serp_docs, axis=1)
  # result['max_docs'] = tf.stack(max_docs, axis=1)
  result['labels'] = tf.concat(serp_labels, axis=1)
  tf.summary.histogram("label/output", result['labels'])
  return result
コード例 #5
0
def calculate_naive_loss(params, replay, rewards):
  serp_len = params['serp_len']
  visible_dropout = params['visible_dropout']
  docs_in_query = replay['docs_per_query']
  batch_docs = replay['serp']
  n_docs = tf.shape(batch_docs)[0]

  drop_docs = tf.nn.dropout(batch_docs, visible_dropout)

  doc_emb = mu._shared_doc_embeddings(drop_docs, params,
                                      '/main/doc_emb',
                                      inference=True,
                                      reuse_variable_scope=True)

  main_serp = tf.gather(doc_emb, 1, axis=1)
  main_scores = []
  for i in range(serp_len):
    main_scores.append(mu._create_subnetwork(main_serp, params,
                                   '/main/score/pos_%d' % i,
                                   reuse_variable_scope=True,
                                   label_network=False,
                                   inference=False))
    if i < serp_len - 1:
      main_serp = tf.concat([
                    main_serp,
                    tf.gather(doc_emb, 1, axis=1),
                    ], axis=1)

  scores = tf.concat(main_scores, axis=1)
  unfiltered_mc_loss = (rewards-scores)**2

  ind = tf.expand_dims(tf.range(serp_len),0)
  mask = tf.less(ind, docs_in_query)
  filtered_mc_loss = tf.where(mask,
                           unfiltered_mc_loss,
                           tf.zeros_like(unfiltered_mc_loss))
  mc_loss = tf.reduce_mean(filtered_mc_loss)

  # tf.summary.scalar('q_loss/loss', mean_losses[1])
  tf.summary.scalar('monte_carlo/loss', mc_loss)

  tf.summary.scalar('loss', mc_loss)

  return (mc_loss, mc_loss, mc_loss)
コード例 #6
0
def model(params, examples, labels):
  serp_len = params['serp_len']
  doc_emb_size = params['doc_emb'][-1]
  hidden_state_size = params['hidden_state_size']
  docs = examples['doc_tensors']
  batch_size = docs.shape[0].value
  batch_max_docs = tf.shape(docs)[1]
  docs_per_query = examples['n_docs']

  result = {
    'docs_per_query': docs_per_query,
    }

  doc_emb = mu._shared_doc_embeddings(docs, params,
                                      '/main/doc_emb')


  hidden_init = tf.zeros([batch_size, hidden_state_size])
  

  rnn_fn = get_sigmoid_layer(params, '/main/gru',
                                  label_network=False,
                                  inference=False,
                                  reuse_variable_scope=False)

  batch_ind = tf.range(batch_size, dtype=tf.int64)[:, None]

  hidden_state = hidden_init
  n_doc_filter = tf.sequence_mask(docs_per_query[:, 0], batch_max_docs)
  doc_filter = tf.where(n_doc_filter,
        tf.zeros([batch_size, batch_max_docs]),
        tf.fill([batch_size, batch_max_docs], np.NINF))
   #tf.zeros([n_docs, hidden_state_size])
  serp_labels = []
  serp_ind = []
  probs = []
  for i in range(serp_len):
    hidden_states = tf.tile(hidden_state[:, None, :], [1, batch_max_docs, 1])
    score_input = tf.concat([hidden_states, doc_emb], axis=2)
    scores = mu._create_subnetwork(score_input,
                                   params,
                                   subnetwork_name='/main/scoring',
                                   label_network=False,
                                   reuse_variable_scope=i>0,
                                   inference=False)[:, :, 0]
    seq_mask = tf.less(i, docs_per_query[:,0])
    if params['evaluation']:
      sampled = tf.argmax(scores + doc_filter, axis=1)[:, None]
    else:
      sampled = tf.multinomial((scores + doc_filter), 1)
      sampled = tf.where(seq_mask,
                         sampled,
                         tf.zeros_like(sampled))
    serp_ind.append(sampled[:, 0])
      # sampled = tf.Print(sampled, [tf.shape(scores + doc_filter)], 'shape: ')
      # sampled = tf.Print(sampled, [tf.reduce_sum(scores, axis=1)], 'scores: ')
      # sampled = tf.Print(sampled, [tf.reduce_sum(doc_filter, axis=1)], 'filter: ')
      # sampled = tf.Print(sampled, [tf.reduce_sum(scores + doc_filter, axis=1)], 'sum: ')
      # sampled = tf.Print(sampled, [docs_per_query[:,0]], 'docs_per_query: ')
      # sampled = tf.Print(sampled, [sampled[:,0]], 'sampled: ')

    gather_ind = tf.concat([batch_ind, sampled], axis=1)
    sampled_scores = tf.gather_nd(scores, gather_ind)
    exp_scores = tf.exp(scores + doc_filter)
    exp_scores = tf.where(tf.less(exp_scores, 0.001),
                          exp_scores + 0.001,
                          exp_scores)
    denom = tf.reduce_sum(exp_scores, axis=1)

    doc_filter += tf.one_hot(sampled[:, 0], batch_max_docs,
                             on_value=np.NINF, off_value=0.)
    select_doc = tf.gather_nd(doc_emb, gather_ind)
    hidden_state = rnn_fn(hidden_state, select_doc)

    select_labels = tf.gather_nd(labels, gather_ind)
    serp_labels.append(tf.where(
        seq_mask,
        select_labels,
        tf.zeros([batch_size], dtype=tf.int32),
      ))
    probs.append(tf.where(
        seq_mask,
        sampled_scores - tf.log(denom),
        tf.zeros([batch_size]),
      ))
    # probs[-1] = tf.Print(probs[-1], [denom], 'denom %d:' % i)
    # probs[-1] = tf.Print(probs[-1], [tf.exp(np.NINF)], 'test %d:' % i)

    # probs[-1] = tf.Print(probs[-1], [sampled], 'sampled %d:' % i)
    # probs[-1] = tf.Print(probs[-1], [serp_labels[-1]], 'labels %d:' % i)
    # probs[-1] = tf.Print(probs[-1], [probs[-1]], 'probs %d:' % i)


  result['labels'] = tf.stack(serp_labels, axis=1)
  result['probs'] = tf.stack(probs, axis=1)
  result['serp_ind'] = tf.stack(serp_ind, axis=1)


  # result['probs'] = tf.Print(result['probs'], [result['serp_ind']], 'serp_ind: ')
  # result['probs'] = tf.Print(result['probs'], [tf.exp(result['probs'])], 'prob: ')

  return result
コード例 #7
0
def model(params, examples, labels, epsilon, stats_ops):
    serp_len = params['serp_len']
    doc_emb_size = params['doc_emb'][-1]
    hidden_state_size = params['hidden_state_size']
    docs = mu._get_doc_tensors(examples, params, 'main')

    result = {}

    n_docs = tf.shape(docs)[0]
    result['docs_per_query'] = n_docs

    doc_emb = mu._shared_doc_embeddings(docs,
                                        params,
                                        '/main/doc_emb',
                                        inference=True)

    hidden_init = tf.zeros([1, hidden_state_size])

    gru_fn = ru.get_gru_layer(params,
                              '/main/gru',
                              label_network=False,
                              inference=True,
                              reuse_variable_scope=False)

    policy = PositionEpsilonGreedy(serp_len, epsilon, n_docs)

    hidden_state = hidden_init  #tf.zeros([n_docs, hidden_state_size])
    serp = []
    serp_pos = []
    serp_labels = []
    serp_ind = []
    for i in range(serp_len):
        hidden_states = tf.tile(hidden_state, [n_docs, 1])
        score_input = tf.concat([hidden_states, doc_emb], axis=1)
        scores = mu._create_subnetwork(score_input,
                                       params,
                                       subnetwork_name='/main/scoring',
                                       label_network=False,
                                       reuse_variable_scope=i > 0,
                                       inference=True)
        for j in range(serp_len):
            mean_summary(params, 'policy_%d/pos_%d' % (i, j), scores[:, j],
                         stats_ops)

        action_ind, action_pos = policy.choose(scores)
        select_doc = tf.gather(docs, action_ind)

        serp.append(select_doc)
        serp_ind.append(action_ind)

        in_doc = tf.less(i, n_docs)
        serp_labels.append(
            tf.cond(
                in_doc,
                lambda: tf.gather(labels, action_ind, axis=0),
                lambda: tf.constant([[0]], dtype=tf.int64),
            ))
        serp_labels[-1].set_shape([1, 1])
        serp_pos.append(
            tf.cond(
                in_doc,
                lambda: tf.expand_dims(action_pos, axis=1),
                lambda: tf.constant([[serp_len]], dtype=tf.int32),
            ))
        serp_pos[-1].set_shape([1, 1])

        if i < serp_len - 1:
            a_pos = tf.expand_dims(tf.cast(action_pos, tf.float32), axis=1)
            a_doc = tf.gather(doc_emb, action_ind)
            gru_input = tf.concat([a_doc, a_pos], axis=1)
            hidden_state = gru_fn(hidden_state, gru_input)

    pos_order = tf.concat(serp_pos, axis=1)

    order_ind = tf.nn.top_k(-pos_order, serp_len)[1]
    # order_ind.set_shape()
    unordered_labels = tf.squeeze(tf.concat(serp_labels, axis=1), axis=0)
    ordered_labels = tf.gather(unordered_labels, order_ind)

    result['serp'] = tf.stack(serp, axis=1)
    result['serp_ind'] = tf.stack(serp_ind, axis=1)
    result['serp_doc'] = tf.stack(serp_ind, axis=1)
    result['labels'] = ordered_labels
    result['select_order_labels'] = unordered_labels[None, :]
    # pos_order = tf.Print(pos_order, [unordered_labels[i] for i in range(10)], 'unordered: ')
    # pos_order = tf.Print(pos_order, [pos_order[0, i] for i in range(10)], 'reranking: ')
    # pos_order = tf.Print(pos_order, [result['labels'][0, i] for i in range(10)], 'ordered: ')
    # pos_order = tf.Print(pos_order, [n_docs], '                        ')
    result['pos_order'] = pos_order

    # tf.summary.histogram("label/output", result['labels'])

    # if params['context_input']:
    max_docs = params['max_docs']
    padding = tf.convert_to_tensor([[0, max_docs - n_docs], [0, 0]])
    padded_docs = tf.pad(docs, padding, "CONSTANT")
    padded_docs = tf.reshape(padded_docs, [1, max_docs, docs.shape[1].value])
    result['docs'] = padded_docs
    return result
コード例 #8
0
def loss(params, replay, rewards):
    serp_len = params['serp_len']
    visible_dropout = params['visible_dropout']
    docs_in_query = replay['docs_per_query']
    batch_docs = replay['serp']
    batch_pos = replay['pos_order']
    max_n_docs = params['max_docs']
    n_docs = batch_docs.shape[0]
    hidden_state_size = params['hidden_state_size']

    drop_col = tf.nn.dropout(replay['docs'], visible_dropout)

    doc_col = mu._shared_doc_embeddings(drop_col,
                                        params,
                                        '/main/doc_emb',
                                        inference=False,
                                        reuse_variable_scope=True)
    init_hidden = tf.zeros([n_docs, hidden_state_size])

    drop_docs = tf.nn.dropout(batch_docs, visible_dropout)

    doc_emb = mu._shared_doc_embeddings(drop_docs,
                                        params,
                                        '/main/doc_emb',
                                        inference=False,
                                        reuse_variable_scope=True)

    serp_emb = tf.transpose(doc_emb, [1, 0, 2])
    gru = ru.get_gru_layer(params,
                           '/main/gru',
                           label_network=False,
                           inference=False,
                           reuse_variable_scope=True)

    pos = tf.expand_dims(tf.cast(batch_pos, tf.float32), axis=2)
    pos = tf.transpose(pos, [1, 0, 2])
    gru_input = tf.concat([serp_emb, pos], axis=2)
    hidden_states = tf.scan(gru, gru_input, init_hidden)
    score_states = tf.concat(
        [init_hidden[None, :, :], hidden_states[:-1, :, :]], axis=0)
    score_input = tf.concat([score_states, serp_emb], axis=2)
    pos_scores = mu._create_subnetwork(score_input,
                                       params,
                                       subnetwork_name='/main/scoring',
                                       label_network=False,
                                       reuse_variable_scope=True,
                                       inference=False)
    pos_scores = tf.transpose(pos_scores, [1, 0, 2])
    pos_filter = tf.one_hot(batch_pos, serp_len)

    scores = tf.reduce_sum(pos_scores * pos_filter, axis=2)
    unfiltered_mc_loss = (rewards - scores)**2

    max_filter = max_train_filter(params, hidden_states, serp_len, doc_col,
                                  replay['serp_ind'], batch_pos, docs_in_query,
                                  max_n_docs)

    label_scores = get_label_scores(params, replay)

    double_max_scores = tf.reduce_sum(max_filter * label_scores, axis=[2, 3])
    q_values = tf.concat([double_max_scores, rewards], axis=1)

    end_mask = tf.equal(docs_in_query - 1, tf.range(serp_len)[None, :])
    reward_tile = tf.tile(rewards, [1, serp_len])
    q_values = tf.where(end_mask, reward_tile, q_values)

    unfiltered_dqn_loss = (scores - q_values)**2

    doc_denom = tf.cast(tf.reduce_sum(docs_in_query), tf.float32)
    mask = tf.squeeze(tf.sequence_mask(docs_in_query, serp_len), axis=1)

    filtered_mc_loss = tf.where(mask, unfiltered_mc_loss,
                                tf.zeros_like(unfiltered_mc_loss))
    mc_loss = tf.reduce_sum(filtered_mc_loss) / doc_denom

    filtered_dqn_loss = tf.where(mask, unfiltered_dqn_loss,
                                 tf.zeros_like(unfiltered_dqn_loss))
    dqn_loss = tf.reduce_sum(filtered_dqn_loss) / doc_denom

    # dqn_loss = tf.Print(dqn_loss, [scores[0,i] for i in range(10)], 'Scores:')
    # dqn_loss = tf.Print(dqn_loss, [q_values[0,i] for i in range(10)], 'Q_values:')
    # dqn_loss = tf.Print(dqn_loss, [unfiltered_dqn_loss[0,i] for i in range(10)], 'Loss:')
    # dqn_loss = tf.Print(dqn_loss, [n_docs], 'DQN:')
    # mc_loss = tf.Print(mc_loss, [n_docs], 'MC:')

    tf.summary.scalar('monte_carlo/loss', mc_loss)
    tf.summary.scalar('DQN/loss', dqn_loss)

    tf.summary.scalar('DQN/double_max_scores',
                      tf.reduce_mean(double_max_scores))

    return mc_loss, dqn_loss
コード例 #9
0
def calculate_gru_loss(params, replay, rewards):
  serp_len = params['serp_len']
  visible_dropout = params['visible_dropout']
  docs_in_query = replay['docs_per_query']
  batch_docs = replay['serp']
  max_n_docs = params['max_docs']
  n_docs = tf.shape(batch_docs)[0]
  hidden_state_size = params['hidden_state_size']

  drop_col = tf.nn.dropout(replay['docs'], visible_dropout)

  doc_col = mu._shared_doc_embeddings(drop_col, params,
                                        '/main/doc_emb',
                                        inference=False,
                                        reuse_variable_scope=True)
  # if params['context_input']:
  #   gru = ru.get_gru_layer(params, '/main/gru/collection/',
  #                                label_network=False,
  #                                inference=False,
  #                                reuse_variable_scope=False)
  #   input_col = tf.transpose(doc_col, [1, 0, 2])

  #   hidden_col = tf.scan(gru, input_col, init_hidden)

  #   hidden_col = tf.transpose(hidden_col, [1, 0, 2])

  #   indices = tf.stack([tf.range(n_docs), tf.squeeze(docs_in_query-1 ,axis=1)], axis=1)
  #   init_hidden = tf.gather_nd(hidden_col, indices)
  # else:
  init_hidden = tf.zeros([n_docs, hidden_state_size])

  drop_docs = tf.nn.dropout(batch_docs, visible_dropout)

  doc_emb = mu._shared_doc_embeddings(drop_docs, params,
                                      '/main/doc_emb',
                                      label_network=False,
                                      inference=False,
                                      reuse_variable_scope=True)

  serp_emb = tf.transpose(doc_emb, [1, 0, 2])
  gru = ru.get_gru_layer(params, '/main/gru',
                               label_network=False,
                               inference=False,
                               reuse_variable_scope=True)

  init_scores = tf.zeros([n_docs, 1])
  hidden_states = tf.scan(gru, serp_emb[:-1, :, :], init_hidden)
  hidden_states = tf.concat([init_hidden[None, :, :], hidden_states], axis=0)
  scores = mu._create_subnetwork(score_input,
                                   params,
                                   subnetwork_name='/main/scoring',
                                   label_network=False,
                                   reuse_variable_scope=i>0,
                                   inference=True)


  scores = tf.squeeze(tf.transpose(scores, [1,0,2]), axis=2)
  unfiltered_mc_loss = (rewards-scores)**2.

  max_filter = max_train_filter(gru, hidden_states, serp_len,
                                doc_col, replay['serp_ind'],
                                docs_in_query, max_n_docs)

  label_scores = get_label_scores(params, replay)

  double_max_scores = tf.reduce_sum(max_filter*label_scores, axis=2)
  q_values = tf.concat([double_max_scores, rewards], axis=1)

  end_mask = tf.equal(docs_in_query-1,
                      tf.expand_dims(tf.range(serp_len), axis=0))
  reward_tile = tf.tile(rewards, [1, serp_len])
  q_values = tf.where(end_mask, reward_tile, q_values)

  unfiltered_dqn_loss = (scores - q_values)**2.

  doc_denom = tf.cast(tf.reduce_sum(tf.minimum(docs_in_query, serp_len)), tf.float32)
  mask = tf.squeeze(tf.sequence_mask(docs_in_query, serp_len), axis=1)


  filtered_mc_loss = tf.where(mask,
                           unfiltered_mc_loss,
                           tf.zeros_like(unfiltered_mc_loss))
  mc_loss = tf.reduce_sum(filtered_mc_loss)/doc_denom

  filtered_dqn_loss = tf.where(mask,
                               unfiltered_dqn_loss,
                               tf.zeros_like(unfiltered_dqn_loss))

  dqn_loss = tf.reduce_sum(filtered_dqn_loss)/doc_denom

  # tf.summary.scalar('q_loss/loss', mean_losses[1])

  tf.summary.scalar('monte_carlo/loss', mc_loss)
  tf.summary.scalar('DQN/loss', dqn_loss)

  filtered_double_max = tf.where(mask[:,:-1],
                                 double_max_scores,
                                 tf.zeros_like(double_max_scores))
  double_max_denom = doc_denom - tf.cast(n_docs, tf.float32)
  double_max_mean = tf.reduce_sum(filtered_double_max)/double_max_denom
  tf.summary.scalar('DQN/double_max_scores', double_max_mean)

  return mc_loss, dqn_loss
コード例 #10
0
def calculate_pbow_loss(params, replay, rewards):
  serp_len = params['serp_len']
  visible_dropout = params['visible_dropout']
  batch_size = tf.shape(replay['doc_0'])[0]
  docs_in_query = replay['docs_per_query']

  # Gather documents from replay.
  docs = [replay['doc_%d' % i] for i in range(serp_len)]
  drop_docs = [tf.nn.dropout(x, visible_dropout) for x in docs]
  max_docs = [replay['max_doc_%d' % i] for i in range(1, serp_len)]

  # Create embeddings from documents.
  doc_i = tf.zeros([batch_size, 1])
  main_emb = []
  label_emb = []
  max_emb = []
  for i in range(serp_len):
    main_pos_doc = tf.concat([doc_i, drop_docs[i]], axis=1)
    main_emb.append(mu._shared_doc_embeddings(
                    main_pos_doc,
                    params, '/main/doc_emb',
                    reuse_variable_scope=True,
                    inference=False))

    label_pos_doc = tf.concat([doc_i, docs[i]], axis=1)
    label_emb.append(mu._shared_doc_embeddings(label_pos_doc,
                      params, '/label/doc_emb',
                      reuse_variable_scope=i>0,
                      label_network=True,
                      inference=True))
    doc_i += 1
    # Documents for max choices, index is one behind.
    if i < serp_len - 1:
      max_pos_doc = tf.concat([doc_i, max_docs[i]], axis=1)
      max_emb.append(mu._shared_doc_embeddings(max_pos_doc,
                        params, '/label/doc_emb',
                        reuse_variable_scope=True,
                        label_network=True,
                        inference=True))

  serp_pbow = main_emb[0]
  label_pbow = label_emb[0] 
  losses = None
  for i in range(serp_len-1):
    scores = mu._create_subnetwork(serp_pbow, params, '/main/score',
                                   reuse_variable_scope=True,
                                   label_network=False,
                                   inference=False)

    max_scores = mu._create_subnetwork(label_pbow + max_emb[i],
                                       params,
                                       '/label/score',
                                       reuse_variable_scope=i>0,
                                       label_network=True,
                                       inference=False)

    losses = get_pos_loss(params, losses, i, docs_in_query,
                          scores, max_scores, rewards)

    serp_pbow += main_emb[i+1]
    if i < serp_len - 2:
      label_pbow += label_emb[i+1]

  scores = mu._create_subnetwork(serp_pbow, params,
                                 '/main/score',
                                 reuse_variable_scope=True,
                                 label_network=False,
                                 inference=False)
  losses = get_pos_loss(params, losses,
                        serp_len-1, docs_in_query,
                        scores, rewards, rewards)

  mean_losses = [tf.reduce_mean(loss) for loss in losses]

  tf.summary.scalar('q_loss/loss', mean_losses[1])
  tf.summary.scalar('monte_carlo/loss', mean_losses[2])

  tf.summary.scalar('loss', mean_losses[0])
  return mean_losses
コード例 #11
0
def gru_model(params, examples, labels, epsilon):
  serp_len = params['serp_len']
  doc_emb_size = params['doc_emb'][-1]
  hidden_state_size = params['hidden_state_size']
  # docs = mu._get_doc_tensors(examples, params, 'main')
  docs = examples['doc_tensors']
  batch_size = docs.shape[0].value
  batch_max_docs = tf.shape(docs)[1]
  docs_per_query = examples['n_docs']

  # if params['context_input']:
  #   to_shuffle = tf.concat([tf.cast(labels, tf.float32), docs], axis=1)
  #   shuffled = tf.random_shuffle(to_shuffle)
  #   labels = tf.cast(tf.slice(shuffled, [0, 0], [-1, 1]), tf.int64)
  #   docs = tf.slice(shuffled, [0, 1], [-1, -1])
  assert not params['context_input'], 'Context not supported for GRU.'

  result = {
    'docs_per_query': docs_per_query,
    }

  doc_emb = mu._shared_doc_embeddings(docs, params,
                                      '/main/doc_emb',
                                      inference=True)


  hidden_init = tf.zeros([batch_size, hidden_state_size])
  # if params['context_input']:
  #   context_gru_fn = ru.get_gru_layer(params, '/main/gru/context',
  #                                     label_network=False,
  #                                     inference=True,
  #                                     reuse_variable_scope=False)
  #   context_init = hidden_init
  #   context = tf.scan(context_gru_fn,
  #                     tf.expand_dims(doc_emb, axis=1), context_init)

  #   hidden_init = tf.gather(context, n_docs-1)

  gru_fn = ru.get_gru_layer(params, '/main/gru',
                                  label_network=False,
                                  inference=True,
                                  reuse_variable_scope=False)

  policy = mu.EpsilonGreedy(epsilon, batch_size, batch_max_docs, docs_per_query)
  hidden_state = hidden_init
   #tf.zeros([n_docs, hidden_state_size])
  serp = []
  serp_labels = []
  serp_ind = []
  for i in range(serp_len):
    hidden_states = tf.tile(hidden_state[:, None, :], [1, batch_max_docs, 1])
    score_input = tf.concat([hidden_states, doc_emb], axis=2)
    scores = mu._create_subnetwork(score_input,
                                   params,
                                   subnetwork_name='/main/scoring',
                                   label_network=False,
                                   reuse_variable_scope=i>0,
                                   inference=True)

    tf.summary.scalar('policy/scores/pos_%d' % i, tf.reduce_mean(scores))
    action = policy.choose(scores)
    serp_ind.append(action)

    nd_ind = tf.stack([tf.range(batch_size, dtype=tf.int64), action], axis=1)
    select_doc = tf.gather_nd(docs, nd_ind)
    select_labels = tf.gather_nd(labels, nd_ind)[:, None]
    
    serp_labels.append(tf.where(
      tf.less(i, docs_per_query),
      select_labels,
      tf.zeros([batch_size, 1], dtype=tf.int32),
      ))
    serp.append(select_doc)

    if i < serp_len-1:
      select_emb = tf.gather_nd(doc_emb, nd_ind)
      hidden_state = gru_fn(hidden_state, select_emb)


  result['serp'] = tf.stack(serp, axis=1)
  result['serp_ind'] = tf.stack(serp_ind, axis=1)
  result['serp_ind'] = tf.Print(result['serp_ind'], serp_ind, 'serp_ind: ')
  result['labels'] = tf.concat(serp_labels, axis=1)
  tf.summary.histogram("label/output", result['labels'])

  # if params['context_input']:
  max_docs = params['max_docs']
  padding = tf.convert_to_tensor([[0, 0], [0, max_docs-batch_max_docs], [0, 0]])
  padded_docs = tf.pad(docs, padding, "CONSTANT")
  padded_docs = tf.reshape(padded_docs, [batch_size, max_docs, docs.shape[2].value])
  result['docs'] = padded_docs

  return result
コード例 #12
0
def model(params, examples, labels, epsilon, stats_ops):
  serp_len = params['serp_len']
  doc_emb_size = params['doc_emb'][-1]
  hidden_state_size = params['hidden_state_size']
  docs = examples['doc_tensors']
  batch_size = docs.shape[0].value
  batch_max_docs = tf.shape(docs)[1]
  docs_per_query = examples['n_docs']

  result = {
    'docs_per_query': docs_per_query,
    }

  doc_emb = mu._shared_doc_embeddings(docs, params,
                                      '/main/doc_emb',
                                      inference=True)

  hidden_init = tf.zeros([batch_size, hidden_state_size])

  gru_fn = ru.get_gru_layer(params, '/main/gru',
                            label_network=False,
                            inference=True,
                            reuse_variable_scope=False)

  policy = PositionEpsilonGreedy(serp_len, epsilon, batch_size,
                                 batch_max_docs, docs_per_query)

  hidden_state = hidden_init
  serp = []
  serp_pos = []
  serp_labels = []
  serp_ind = []
  for i in range(serp_len):
    hidden_states = tf.tile(hidden_state[:, None, :], [1, batch_max_docs, 1])
    score_input = tf.concat([hidden_states, doc_emb], axis=2)
    doc_scores = mu._create_subnetwork(score_input,
                                       params,
                                       subnetwork_name='/main/scoring/doc',
                                       label_network=False,
                                       reuse_variable_scope=i>0,
                                       inference=True,
                                       n_output=1)

    action_ind = policy.choose_doc(doc_scores)

    ind_nd = tf.stack([tf.range(batch_size, dtype=tf.int64), action_ind],
                      axis=1)

    select_doc = tf.gather_nd(docs, ind_nd)

    serp.append(select_doc) 
    serp_ind.append(action_ind)

    select_emb = tf.gather_nd(doc_emb, ind_nd)
    pos_input = tf.concat([hidden_state, select_emb], axis=1)
    pos_scores = mu._create_subnetwork(pos_input,
                                       params,
                                       subnetwork_name='/main/scoring/pos',
                                       label_network=False,
                                       reuse_variable_scope=i>0,
                                       inference=True,
                                       n_output=10)
    # pos_scores = tf.Print(pos_scores, [pos_scores[0,x] for x in range(10)], 'scores %d: ' % i)

    mean_summary(params, 'policy_%d/doc' % i,
                 tf.gather_nd(doc_scores, ind_nd), stats_ops)
    for j in range(serp_len):
      mean_summary(params, 'policy_%d/pos_%d' % (i, j),
                   pos_scores[:, j], stats_ops)

    action_pos = policy.choose_pos(pos_scores)
    # if i == 0:
    #   action_pos = tf.Print(action_pos, [pos_scores[0,x] for x in range(10)], 'pos_scores: ')
    #   action_pos = tf.Print(action_pos, [action_pos], 'pos: ')

    in_doc = tf.less(i, docs_per_query[:, 0])
    serp_labels.append(tf.where(
      in_doc,
      tf.gather_nd(labels, ind_nd),
      tf.zeros([batch_size], dtype=tf.int32),
      ))
    serp_pos.append(tf.where(
      in_doc,
      action_pos,
      tf.fill([batch_size], tf.cast(serp_len, dtype=tf.int64)),
      ))

    if i < serp_len-1:
      a_pos = tf.cast(action_pos, tf.float32)[:, None]
      gru_input = tf.concat([select_emb, a_pos], axis=1)
      hidden_state = gru_fn(hidden_state, gru_input)

  pos_order = tf.stack(serp_pos, axis=1)
  _, order_ind = tf.nn.top_k(-pos_order, serp_len)
  unordered_labels = tf.stack(serp_labels, axis=1)
  batch_ind_nd = tf.tile(tf.range(batch_size)[:, None], [1, serp_len])
  order_ind_nd = tf.stack([tf.reshape(batch_ind_nd, [-1]),
                           tf.reshape(order_ind, [-1])],
                           axis=1)
  ordered_labels = tf.gather_nd(unordered_labels, order_ind_nd)
  ordered_labels = tf.reshape(ordered_labels, [batch_size, serp_len])
  
  result['serp'] = tf.stack(serp, axis=1)
  result['serp_ind'] = tf.stack(serp_ind, axis=1)
  result['labels'] = ordered_labels
  result['select_order_labels'] = unordered_labels
  result['pos_order'] = pos_order

  max_docs = params['max_docs']
  padding = tf.convert_to_tensor([[0, 0], [0, max_docs-batch_max_docs], [0, 0]])
  padded_docs = tf.pad(docs, padding, "CONSTANT")
  padded_docs = tf.reshape(padded_docs, [batch_size, max_docs, docs.shape[2].value])
  result['docs'] = padded_docs
  return result
コード例 #13
0
def loss(params, replay, rewards, doc_rewards):
  serp_len = params['serp_len']
  visible_dropout = params['visible_dropout']
  docs_per_query = replay['docs_per_query']
  batch_docs = replay['serp']
  batch_pos = replay['pos_order']
  max_n_docs = params['max_docs']
  batch_size = batch_docs.shape[0]
  hidden_state_size = params['hidden_state_size']
  doc_level_rewards = params['doc_rewards']

  mask = tf.squeeze(tf.sequence_mask(docs_per_query, serp_len), axis=1)

  init_hidden = tf.zeros([batch_size, hidden_state_size])

  drop_col = tf.nn.dropout(replay['docs'], visible_dropout)

  doc_col = mu._shared_doc_embeddings(drop_col, params,
                                        '/main/doc_emb',
                                        inference=False,
                                        reuse_variable_scope=True)

  drop_docs = tf.nn.dropout(batch_docs, visible_dropout)

  doc_emb = mu._shared_doc_embeddings(drop_docs, params,
                                      '/main/doc_emb',
                                      inference=False,
                                      reuse_variable_scope=True)

  serp_emb = tf.transpose(doc_emb, [1, 0, 2])
  gru = ru.get_gru_layer(params, '/main/gru',
                               label_network=False,
                               inference=False,
                               reuse_variable_scope=True)

  pos = tf.cast(batch_pos, tf.float32)[:, :, None]
  pos = tf.transpose(pos, [1, 0, 2])
  gru_input = tf.concat([serp_emb, pos], axis=2)
  hidden_states = tf.scan(gru, gru_input, init_hidden)
  hidden_states = tf.concat([init_hidden[None, :, :],
                            hidden_states[:-1, :, :]], axis=0)
  hidden_states = tf.transpose(hidden_states, [1, 0, 2])

  score_input = tf.concat([hidden_states, doc_emb], axis=2)
  doc_scores = mu._create_subnetwork(score_input,
                                     params,
                                     subnetwork_name='/main/scoring/doc',
                                     label_network=False,
                                     reuse_variable_scope=True,
                                     inference=False)[:, :, 0]
  pos_scores = mu._create_subnetwork(score_input,
                                     params,
                                     subnetwork_name='/main/scoring/pos',
                                     label_network=False,
                                     reuse_variable_scope=True,
                                     inference=False,
                                     n_output=serp_len)

  batch_pos_filtered = tf.where(mask,
                                batch_pos,
                                tf.zeros_like(batch_pos))
  batch_ind_nd = tf.tile(tf.range(batch_size, dtype=tf.int64)[:, None], [1, serp_len])
  serp_ind_nd = tf.tile(tf.range(serp_len, dtype=tf.int64)[:, None], [batch_size, 1])
  pos_ind_nd = tf.stack([tf.reshape(batch_ind_nd, [-1]),
                         tf.reshape(serp_ind_nd, [-1]),
                         tf.reshape(batch_pos_filtered, [-1]),
                        ], axis=1)
  pos_scores = tf.gather_nd(pos_scores, pos_ind_nd)
  pos_scores = tf.reshape(pos_scores, [batch_size, serp_len])

  if not doc_level_rewards:
    unfiltered_mc_loss = (rewards-pos_scores)**2 + (rewards-doc_scores)**2
  else:
    cum_rewards = tf.cumsum(doc_rewards, axis=1, reverse=True)
    unfiltered_mc_loss = (cum_rewards-pos_scores)**2 + (cum_rewards-doc_scores)**2

  max_doc_ind, max_pos = max_train_doc_pos(params, hidden_states,
                                        doc_emb, serp_len, doc_col,
                                        replay['serp_ind'], batch_pos,
                                        docs_per_query, max_n_docs)

  label_doc_scores, q_pos_values = get_label_scores(params, replay, max_doc_ind, max_pos)

  if not doc_level_rewards:
    q_doc_values = tf.concat([label_doc_scores, rewards], axis=1)
    end_mask = tf.equal(docs_per_query-1,
                        tf.range(serp_len)[None, :])
    reward_tile = tf.tile(rewards, [1, serp_len])
    q_doc_values = tf.where(end_mask, reward_tile, q_doc_values)
  else:
    zero_end = tf.zeros([batch_size, 1])
    q_doc_values = tf.concat([label_doc_scores, zero_end], axis=1)
    end_mask = tf.equal(docs_per_query-1,
                        tf.range(serp_len)[None, :])
    q_doc_values = tf.where(end_mask, tf.zeros_like(q_doc_values), q_doc_values)
    q_doc_values += doc_rewards


    # q_doc_values = tf.Print(q_doc_values, [batch_pos[0,x] for x in range(10)], 'pos: ')
    # q_doc_values = tf.Print(q_doc_values, [pos_scores[0,x] for x in range(10)], 'pos_scores: ')
    # q_doc_values = tf.Print(q_doc_values, [q_doc_values[0,x] for x in range(10)], 'q-values: ')
    # q_doc_values = tf.Print(q_doc_values, [doc_rewards[0,x] for x in range(10)], 'doc_rewards: ')


  unfiltered_doc_loss = (doc_scores - q_pos_values)**2
  unfiltered_pos_loss = (pos_scores - q_doc_values)**2
  unfiltered_dqn_loss = unfiltered_doc_loss + unfiltered_pos_loss

  query_denom = tf.cast(docs_per_query[:, 0], tf.float32)
  query_denom = tf.minimum(query_denom, serp_len)
  query_denom = tf.maximum(query_denom, tf.ones_like(query_denom))

  filtered_mc_loss = tf.where(mask,
                              unfiltered_mc_loss,
                              tf.zeros_like(unfiltered_mc_loss))
  mc_loss = tf.reduce_mean(tf.reduce_sum(filtered_mc_loss, axis=1)/query_denom)

  filtered_dqn_loss = tf.where(mask,
                               unfiltered_dqn_loss,
                               tf.zeros_like(unfiltered_dqn_loss))
  dqn_loss = tf.reduce_mean(tf.reduce_sum(filtered_dqn_loss, axis=1)/query_denom)

  tf.summary.scalar('monte_carlo/loss', mc_loss)

  tf.summary.scalar('DQN/loss', dqn_loss)

  tf.summary.scalar('DQN/max_doc_scores', tf.reduce_mean(label_doc_scores))
  tf.summary.scalar('DQN/max_pos_scores', tf.reduce_mean(q_pos_values))

  return mc_loss, dqn_loss
コード例 #14
0
def get_label_scores(params, replay, max_doc_ind, max_pos):
  serp_len = params['serp_len']
  all_docs = replay['docs']
  batch_docs = replay['serp']
  batch_pos = replay['pos_order']
  max_n_docs = params['max_docs']
  batch_size = all_docs.shape[0]
  hidden_state_size = params['hidden_state_size']

  init_hidden = tf.zeros([batch_size, hidden_state_size])

  doc_emb = mu._shared_doc_embeddings(batch_docs, params,
                                      '/label/doc_emb',
                                      label_network=True,
                                      inference=True,
                                      reuse_variable_scope=False)

  batch_ind_nd = tf.tile(tf.range(batch_size, dtype=tf.int64)[:, None], [1, serp_len-1])
  doc_ind_nd = tf.stack([tf.reshape(batch_ind_nd, [-1]),
                         tf.reshape(max_doc_ind, [-1]),
                        ], axis=1)
  max_docs = tf.gather_nd(all_docs, doc_ind_nd)
  max_docs = tf.reshape(max_docs, [batch_size, serp_len-1, all_docs.shape[2]])

  max_emb = mu._shared_doc_embeddings(max_docs, params,
                                      '/label/doc_emb',
                                      label_network=True,
                                      inference=True,
                                      reuse_variable_scope=True)

  serp_emb = tf.transpose(doc_emb, [1, 0, 2])
  gru = ru.get_gru_layer(params, '/label/gru',
                               label_network=True,
                               inference=True,
                               reuse_variable_scope=False)

  pos = tf.cast(batch_pos, tf.float32)[:, :-1, None]
  pos = tf.transpose(pos, [1, 0, 2])
  gru_input = tf.concat([serp_emb[:-1, :, :], pos], axis=2)
  hidden_states = tf.scan(gru, gru_input, init_hidden)
  hidden_states = tf.transpose(hidden_states, [1, 0, 2])

  score_input = tf.concat([hidden_states, max_emb], axis=2)
  doc_scores = mu._create_subnetwork(score_input,
                                     params,
                                     subnetwork_name='/label/scoring/doc',
                                     label_network=True,
                                     inference=True,
                                     reuse_variable_scope=False)[:,:,0]

  pos_states = tf.concat([init_hidden[:, None, :], hidden_states], axis=1)
  pos_input = tf.concat([pos_states, doc_emb], axis=2)
  pos_scores = mu._create_subnetwork(pos_input,
                                     params,
                                     subnetwork_name='/label/scoring/pos',
                                     label_network=True,
                                     inference=True,
                                     reuse_variable_scope=False,
                                     n_output=10)

  batch_ind_nd = tf.tile(tf.range(batch_size, dtype=tf.int64)[:, None], [1, serp_len])
  serp_ind_nd = tf.tile(tf.range(serp_len, dtype=tf.int64)[None, :], [batch_size, 1])
  pos_ind_nd = tf.stack([tf.reshape(batch_ind_nd, [-1]),
                         tf.reshape(serp_ind_nd, [-1]),
                         tf.reshape(max_pos, [-1]),
                        ],axis=1)
  pos_scores = tf.gather_nd(pos_scores, pos_ind_nd)
  pos_scores = tf.reshape(pos_scores, [batch_size, serp_len])

  return doc_scores, pos_scores
コード例 #15
0
def loss(params, replay, rewards, doc_rewards):
    serp_len = params['serp_len']
    visible_dropout = params['visible_dropout']
    docs_per_query = replay['docs_per_query']
    batch_docs = replay['serp']
    max_n_docs = params['max_docs']
    batch_size = params['replay_batch']
    hidden_state_size = params['hidden_state_size']
    doc_level_rewards = params['doc_rewards']

    drop_col = tf.nn.dropout(replay['docs'], visible_dropout)

    doc_col = mu._shared_doc_embeddings(drop_col,
                                        params,
                                        '/main/doc_emb',
                                        inference=False,
                                        reuse_variable_scope=True)

    init_hidden = tf.zeros([batch_size, hidden_state_size])
    if params['context_input']:
        context_gru_fn = ru.get_gru_layer(params,
                                          '/main/gru/context',
                                          label_network=False,
                                          inference=False,
                                          reuse_variable_scope=True)
        scan_input = tf.transpose(doc_col, [1, 0, 2])
        context = tf.scan(context_gru_fn, scan_input, init_hidden)

        ind_nd = tf.concat([docs_per_query - 1,
                            tf.range(batch_size)[:, None]],
                           axis=1)
        init_hidden = tf.gather_nd(context, ind_nd)

    drop_docs = tf.nn.dropout(batch_docs, visible_dropout)

    doc_emb = mu._shared_doc_embeddings(drop_docs,
                                        params,
                                        '/main/doc_emb',
                                        label_network=False,
                                        inference=False,
                                        reuse_variable_scope=True)

    serp_emb = tf.transpose(doc_emb, [1, 0, 2])
    gru = ru.get_gru_layer(params,
                           '/main/gru',
                           label_network=False,
                           inference=False,
                           reuse_variable_scope=True)

    hidden_states = tf.scan(gru, serp_emb[:-1, :, :], init_hidden)
    hidden_states = tf.concat([init_hidden[None, :, :], hidden_states], axis=0)
    score_input = tf.concat([hidden_states, serp_emb], axis=2)

    scores = mu._create_subnetwork(score_input,
                                   params,
                                   subnetwork_name='/main/scoring',
                                   label_network=False,
                                   reuse_variable_scope=True,
                                   inference=False)

    scores = tf.transpose(scores, [1, 0, 2])[:, :, 0]

    if not doc_level_rewards:
        unfiltered_mc_loss = (rewards - scores)**2.
    else:
        cum_rewards = tf.cumsum(doc_rewards, axis=1, reverse=True)
        unfiltered_mc_loss = (rewards - scores)**2.

    max_train_ind = max_train_docs(params, replay, hidden_states, doc_col)
    label_scores = get_label_scores(params, replay, max_train_ind)
    if not doc_level_rewards:
        q_values = tf.concat([label_scores, rewards], axis=1)

        end_mask = tf.equal(docs_per_query - 1, tf.range(serp_len)[None, :])
        reward_tile = tf.tile(rewards, [1, serp_len])
        q_values = tf.where(end_mask, reward_tile, q_values)

        unfiltered_dqn_loss = (scores - q_values)**2.
    else:
        zero_end = tf.zeros([batch_size, 1])
        q_values = tf.concat([label_scores, zero_end], axis=1)
        end_mask = tf.equal(docs_per_query - 1, tf.range(serp_len)[None, :])
        q_values = tf.where(end_mask, tf.zeros_like(q_values), q_values)
        q_values += doc_rewards

        unfiltered_dqn_loss = (scores - q_values)**2.

    mask = tf.squeeze(tf.sequence_mask(docs_per_query, serp_len), axis=1)
    query_denom = tf.cast(docs_per_query[:, 0], tf.float32)
    query_denom = tf.minimum(query_denom, serp_len)
    query_denom = tf.maximum(query_denom, tf.ones_like(query_denom))

    filtered_mc_loss = tf.where(mask, unfiltered_mc_loss,
                                tf.zeros_like(unfiltered_mc_loss))
    mc_loss = tf.reduce_mean(
        tf.reduce_sum(filtered_mc_loss, axis=1) / query_denom)

    filtered_dqn_loss = tf.where(mask, unfiltered_dqn_loss,
                                 tf.zeros_like(unfiltered_dqn_loss))
    dqn_loss = tf.reduce_mean(
        tf.reduce_sum(filtered_dqn_loss, axis=1) / query_denom)

    # dqn_loss = tf.Print(dqn_loss, [filtered_dqn_loss[0, j] for j in range(10)], 'dqn loss: ')
    # dqn_loss = tf.Print(dqn_loss, [query_denom[0]], 'denom: ')
    # dqn_loss = tf.Print(dqn_loss, [dqn_loss], 'total loss: ')

    tf.summary.scalar('monte_carlo/loss', mc_loss)
    tf.summary.scalar('DQN/loss', dqn_loss)

    tf.summary.scalar('DQN/double_max_scores', tf.reduce_mean(label_scores))

    return mc_loss, dqn_loss
コード例 #16
0
def get_label_scores(params, replay, max_train_ind):
    serp_len = params['serp_len']
    batch_size = replay['serp'].shape[0]
    hidden_state_size = params['hidden_state_size']
    docs_per_query = replay['docs_per_query']
    doc_col = replay['docs']
    batch_ind = tf.tile(
        tf.range(batch_size, dtype=tf.int64)[:, None], [1, serp_len - 1])
    max_ind = tf.stack(
        [tf.reshape(batch_ind, [-1]),
         tf.reshape(max_train_ind, [-1])], axis=1)
    max_docs = tf.gather_nd(doc_col, max_ind)
    max_docs = tf.reshape(max_docs, [batch_size, serp_len - 1, -1])

    max_emb = mu._shared_doc_embeddings(max_docs,
                                        params,
                                        '/label/doc_emb',
                                        inference=True,
                                        label_network=True,
                                        reuse_variable_scope=False)

    doc_emb = mu._shared_doc_embeddings(replay['serp'][:, :-1],
                                        params,
                                        '/label/doc_emb',
                                        inference=True,
                                        label_network=True,
                                        reuse_variable_scope=True)

    gru = ru.get_gru_layer(params,
                           '/label/gru',
                           label_network=True,
                           inference=True,
                           reuse_variable_scope=False)

    init_hidden = tf.zeros([batch_size, hidden_state_size])
    if params['context_input']:
        emb_col = mu._shared_doc_embeddings(doc_col,
                                            params,
                                            '/label/doc_emb',
                                            inference=True,
                                            label_network=True,
                                            reuse_variable_scope=True)
        context_gru_fn = ru.get_gru_layer(params,
                                          '/label/gru/context',
                                          label_network=True,
                                          inference=True,
                                          reuse_variable_scope=False)
        scan_input = tf.transpose(emb_col, [1, 0, 2])
        context = tf.scan(context_gru_fn, scan_input, init_hidden)

        ind_nd = tf.concat([docs_per_query - 1,
                            tf.range(batch_size)[:, None]],
                           axis=1)
        init_hidden = tf.gather_nd(context, ind_nd)

    serp_emb = tf.transpose(doc_emb, [1, 0, 2])
    hidden_states = tf.scan(gru, serp_emb, init_hidden)
    hidden_states = tf.transpose(hidden_states, [1, 0, 2])

    score_input = tf.concat([hidden_states, max_emb], axis=2)
    scores = mu._create_subnetwork(score_input,
                                   params,
                                   subnetwork_name='/label/scoring',
                                   label_network=True,
                                   reuse_variable_scope=False,
                                   inference=True)
    return tf.stop_gradient(scores)[:, :, 0]