예제 #1
0
def get_minibatch_input(minibatch, tokenized_paras, tokenized_paras_chars,
                        ques_to_para):
  # Variable length question, answer and paragraph sequences for batch.
  ques_lens_in = [ len(example[0]) for example in minibatch ]
  paras_in = [ tokenized_paras[ques_to_para[example[2]]] \
                 for example in minibatch ]
  paras_chars_in = [ tokenized_paras_chars[ques_to_para[example[2]]] \
                       for example in minibatch ]
  paras_chars_in_b = [ [ c[::-1] for c in f ] for f in paras_chars_in ]
  paras_lens_in = [ len(para) for para in paras_in ]

  max_ques_len = max(ques_lens_in)
  max_para_len = max(paras_lens_in)

  ques_chars_forward = [ example[3] for example in minibatch ]
  ques_chars_backward = [ [ c[::-1] for c in f ] for f in ques_chars_forward ]
  ques_chars_lens_in = [ [ len(x) for x in ques_word_chars ] \
                           for ques_word_chars in ques_chars_forward ]
  paras_chars_lens_in = [ [ len(x) for x in para_word_chars ] \
                           for para_word_chars in paras_chars_in ]

  max_ques_wordlen = max([ max(x) for x in ques_chars_lens_in ])
  max_paras_wordlen = max([ max(x) for x in paras_chars_lens_in ])

  ques_chars_lens_in = [ pad(x, 1, max_ques_len) for x in ques_chars_lens_in ]
  paras_chars_lens_in = [ pad(x, 1, max_para_len) for x in paras_chars_lens_in ]

  # Question forward character LSTM input.
  ques_chars_forward_in = []
  zero_padded_word = [0] * max_ques_wordlen
  for ques in ques_chars_forward:
    ques_words = []
    for word in ques:
      ques_words.append(pad(word, 0, max_ques_wordlen))
    ques_chars_forward_in.append(pad(ques_words, zero_padded_word, max_ques_len))
  ques_chars_forward_in = np.array(ques_chars_forward_in)

  # Question backward character LSTM input.
  ques_chars_backward_in = []
  for ques in ques_chars_backward:
    ques_words = []
    for word in ques:
      ques_words.append(pad(word, 0, max_ques_wordlen))
    ques_chars_backward_in.append(pad(ques_words, zero_padded_word, max_ques_len))
  ques_chars_backward_in = np.array(ques_chars_backward_in)

  # Passage forward character LSTM input.
  paras_chars_forward_in = []
  zero_padded_word = [0] * max_paras_wordlen
  for para in paras_chars_in:
    para_words = []
    for word in para:
      para_words.append(pad(word, 0, max_paras_wordlen))
    paras_chars_forward_in.append(pad(para_words, zero_padded_word, max_para_len))
  paras_chars_forward_in = np.array(paras_chars_forward_in)

  # Passage backward character LSTM input.
  paras_chars_backward_in = []
  for para in paras_chars_in_b:
    para_words = []
    for word in para:
      para_words.append(pad(word, 0, max_paras_wordlen))
    paras_chars_backward_in.append(pad(para_words, zero_padded_word, max_para_len))
  paras_chars_backward_in = np.array(paras_chars_backward_in)

  # ans_in.shape = (2, batch)
  ans_in = np.array([ example[1] for example in minibatch ]).T

  # Fixed-length (padded) input sequences with shape=(seq_len, batch).
  ques_in_f = np.array([ pad(example[0], 0, max_ques_len)\
                         for example in minibatch ]).T
  paras_in_f = np.array([ pad(para, 0, max_para_len) for para in paras_in ]).T
  ques_in_b = np.array([ pad(example[0][::-1], 0, max_ques_len)\
                           for example in minibatch ]).T
  paras_in_b = np.array([ pad(para[::-1], 0, max_para_len) for para in paras_in ]).T

  passage_input_f = paras_in_f
  passage_input_b = paras_in_b
  question_input_f = ques_in_f
  question_input_b = ques_in_b
  passage_input_lens = paras_lens_in
  question_input_lens = ques_lens_in
  passage_input_chars_f = np.transpose(paras_chars_forward_in, (1, 0, 2))
  passage_input_chars_b = np.transpose(paras_chars_backward_in, (1, 0, 2))
  question_input_chars_f = np.transpose(ques_chars_forward_in, (1, 0, 2))
  question_input_chars_b = np.transpose(ques_chars_backward_in, (1, 0, 2))
  passage_input_chars_lens = np.transpose(np.array(paras_chars_lens_in))
  question_input_chars_lens = np.transpose(np.array(ques_chars_lens_in))
  answer_input = ans_in

  return passage_input_f, passage_input_b, question_input_f, question_input_b,\
         passage_input_lens, question_input_lens, passage_input_chars_f,\
         passage_input_chars_b, question_input_chars_f, question_input_chars_b,\
         passage_input_chars_lens, question_input_chars_lens, answer_input
예제 #2
0
파일: Main.py 프로젝트: sheetalsh456/q-net
def test_model(args):
  # Read and process data
  train, dev, test, batch_size, test_batch_size, train_ques_to_para,\
  dev_ques_to_para, test_ques_to_para, train_tokenized_paras,\
  dev_tokenized_paras, test_tokenized_paras, train_1_examples, dev_1_examples,\
  test_1_examples, train_order, dev_order, test_order, train_data, dev_data,\
  test_data = read_and_process_data(args)

  # Build model
  print "Building model."
  model, config = build_model(args, train_data.dictionary.size(),
                              train_data.dictionary.index_to_word)
  print "Done."

  #------------------------- Reload and test model ----------------------------#
  tf_config = tf.ConfigProto()
  tf_config.gpu_options.allow_growth = True

  with tf.Session(config=tf_config) as sess:
    print "Initializing variables."
    tf.global_variables_initializer().run()
    print "Done."
    assert not args.ckpt == 0
    assert not args.predictions_output_json is None

    saver = tf.train.Saver(max_to_keep=args.epochs)
    print "Loading model from checkpoint."
    saver.restore(sess, args.model_dir + 'model' + str(args.ckpt) + '.ckpt')
    print "Done."

    # Run pass over test data to compute stats
    test_start_t = time.time()
    test_loss_sum = 0.0
    test_error_sum = 0
    test_error0_sum = 0
    test_error1_sum = 0
    all_predictions = {}
    for i, num in enumerate(test_order):
      print "\rTest: %.2f s (Done %d of %d) " %\
        ((time.time()-test_start_t)*(len(test_order)-i-1)/(i+1), i+1, len(test_order)),
      sys.stdout.flush()

      # Prepare test batch by computing lengths and padding
      test_batch = test[num:num+test_batch_size]
      ans_lens_in = [ len(example[1]) for example in test_batch ]
      ques_lens_in = [ len(example[0]) for example in test_batch ]
      paras_in = [ test_tokenized_paras[test_ques_to_para[example[3]]] \
                     for example in test_batch ]
      paras_lens_in = [ len(para) for para in paras_in ]
      max_ans_len = max(ans_lens_in)
      max_ques_len = max(ques_lens_in)
      max_para_len = max(paras_lens_in)

      ans_in = [ pad(example[1], 0, max_ans_len) for example in test_batch ]
      ques_in = [ pad(example[0], 0, max_ques_len) for example in test_batch ]
      paras_in = [ pad(para_in, 0, max_para_len) for para_in in paras_in ]
      labels = [ example[2] for example in test_batch ]

      # Add all batch qids to predictions dict, if they don't already exist
      qids = [ example[3] for example in test_batch ]
      answers = [ " ".join([ test_data.dictionary.get_word(idx) for idx in example[1] ]) \
                    for example in test_batch ]

      for qid in qids:
        if not qid in all_predictions:
          all_predictions[qid] = []

      test_loss, predictions =\
        sess.run([model.loss, model.predictions],
                 feed_dict = { model.ans_input: ans_in,
                               model.ans_lens: ans_lens_in,
                               model.ques_input: ques_in,
                               model.ques_lens: ques_lens_in,
                               model.passage_input: paras_in,
                               model.passage_lens: paras_lens_in,
                               model.labels: labels,
                               model.keep_prob: 1.0 })

      test_loss_sum += test_loss
      print "[Average loss : %.5f]" % (test_loss_sum/(i+1)),

      for qid, answer, prob in zip(qids, answers, predictions):
        all_predictions[qid].append([answer, prob])

      # Compute overall prediction-error, error for 0s, and error for 1s
      predictions = np.round(predictions)
      test_errors = np.abs(predictions-labels)
      test_error_sum += np.sum(test_errors)
      test_error0_sum += np.sum((1-np.array(labels)) * test_errors)
      test_error1_sum += np.sum(np.array(labels) * test_errors)

    # Print test stats for epoch
    print "\nTest Loss: %.4f (in time: %.2f s)" %\
    (test_loss_sum/len(test_order), (time.time() - test_start_t))
    print ("Total error: %d/%d (%.2f%%), 1 errors: %d/%d (%.2f%%), " +\
           "0 errors: %d/%d (%.2f%%)") %\
           (test_error_sum, len(test), 100 * float(test_error_sum)/len(test),
            test_error1_sum, test_1_examples, 100 * float(test_error1_sum)/test_1_examples,
            test_error0_sum, len(test)-test_1_examples,
            100 * float(test_error0_sum)/(len(test)-test_1_examples))

    # Select the best answer for each question (highest probability)
    print "Getting best answers."
    for qid in all_predictions:
      all_predictions[qid] = max(all_predictions[qid], key=itemgetter(1))[0]
    print "Done."

    # Dump the results json in the required format
    print "Dumping prediction results."
    with open(args.predictions_output_json, "w") as predictions_out:
      json.dump(all_predictions, predictions_out)
      predictions_out.close()
    print "Done."
예제 #3
0
파일: Main.py 프로젝트: sheetalsh456/q-net
def train_model(args):
  # Read and process data
  train, dev, test, batch_size, test_batch_size, train_ques_to_para,\
  dev_ques_to_para, test_ques_to_para, train_tokenized_paras,\
  dev_tokenized_paras, test_tokenized_paras, train_1_examples, dev_1_examples,\
  test_1_examples, train_order, dev_order, test_order, train_data, dev_data,\
  test_data = read_and_process_data(args)
    
  # Build model
  model, config = build_model(args, train_data.dictionary.size(),
                              train_data.dictionary.index_to_word)

  #------------------------------ Train System ----------------------------------#
  tf_config = tf.ConfigProto()
  tf_config.gpu_options.allow_growth = True

  with tf.Session(config=tf_config) as sess:
    tf.global_variables_initializer().run()

    # Keep model parameters for all epochs
    saver = tf.train.Saver(max_to_keep=args.epochs)

    # Should we resume running from an existing checkpoint?
    last_done_epoch = config['cont']
    if last_done_epoch > 0:
      print "Continue from ckpt", last_done_epoch + 1
      saver.restore(sess, args.model_dir + 'model' + str(last_done_epoch) + '.ckpt')

    start_time = time.time()
    print "Starting training."

    for EPOCH in range(last_done_epoch+1, args.epochs):
      start_t = time.time()
      random.shuffle(train_order)
      train_loss_sum = 0.0
      for i, num in enumerate(train_order):
        print "\rTrain epoch %d, %.2f s - (Done %d of %d) " %\
              (EPOCH, (time.time()-start_t)*(len(train_order)-i-1)/(i+1), i+1,
               len(train_order)),

        # Create next batch by getting lengths and padding
        #if(i+1 == len(train_order)):
        #train_batch = train[train_order[i]:]
        train_batch = train[train_order[i][0]:train_order[i][1]]
        random.shuffle(train_batch)
        ans_lens_in = [ len(example[1]) for example in train_batch ]
        ques_lens_in = [ len(example[0]) for example in train_batch ]
        paras_in = [ train_tokenized_paras[train_ques_to_para[example[3]]] \
                       for example in train_batch ]
        paras_lens_in = [ len(para) for para in paras_in ]
        max_ans_len = max(ans_lens_in)
        max_ques_len = max(ques_lens_in)
        max_para_len = max(paras_lens_in)

        ans_in = [ pad(example[1], 0, max_ans_len) for example in train_batch ]
        ques_in = [ pad(example[0], 0, max_ques_len) for example in train_batch ]
        paras_in = [ pad(para, 0, max_para_len) for para in paras_in ]
        labels = [ example[2] for example in train_batch ]

        train_loss, predictions, _ =\
          sess.run([model.loss, model.predictions, model.optimizer],
                   feed_dict = { model.ans_input: ans_in,
                                 model.ans_lens: ans_lens_in,
                                 model.ques_input: ques_in,
                                 model.ques_lens: ques_lens_in,
                                 model.passage_input: paras_in,
                                 model.passage_lens: paras_lens_in,
                                 model.labels: labels, model.keep_prob:
                                 config['drop_emb'] })

        predictions = np.round(predictions)
        train_errors = np.abs(predictions-labels)
        train_error_sum = np.sum(train_errors)
        train_error0_sum = np.sum((1-np.array(labels)) * train_errors)
        train_error1_sum = np.sum(np.array(labels) * train_errors)
        train_1_examples_batch = sum([ example[2] for example in train_batch ])
        print ("Total error: %.2f%%, 1 errors: %.2f%%, " +\
              "0 errors: %.2f%%, ") %\
              (100 * float(train_error_sum)/len(train_batch),
               100 * float(train_error1_sum)/(1+train_1_examples_batch),
               100 * float(train_error0_sum)/(len(train_batch)-train_1_examples_batch)),
        sys.stdout.flush()
        train_loss_sum += train_loss
        print "Loss, %.5f" % (train_loss_sum/(i+1)),

        # Print train stats for epoch
        print "\nEpoch %d: Train Loss: %.4f (in time: %.2f s)" %\
              (EPOCH, train_loss_sum/len(train_order), (time.time() - start_t))

      # Run pass over dev data to compute stats
      dev_start_t = time.time()
      dev_loss_sum = 0.0
      dev_error_sum = 0
      dev_error0_sum = 0
      dev_error1_sum = 0
      for i, num in enumerate(dev_order):
        print "\rDev: %.2f s (Done %d of %d) " %\
              ((time.time()-dev_start_t)*(len(dev_order)-i-1)/(i+1), i+1,
              len(dev_order)),
        sys.stdout.flush()

        # Prepare dev bath by computing lengths and padding
        dev_batch = dev[num:num+test_batch_size]
        ans_lens_in = [ len(example[1]) for example in dev_batch ]
        ques_lens_in = [ len(example[0]) for example in dev_batch ]
        paras_in = [ dev_tokenized_paras[dev_ques_to_para[example[3]]] \
                       for example in dev_batch ]
        paras_lens_in = [ len(para) for para in paras_in ]
        max_ans_len = max(ans_lens_in)
        max_ques_len = max(ques_lens_in)
        max_para_len = max(paras_lens_in)

        ans_in = [ pad(example[1], 0, max_ans_len) for example in dev_batch ]
        ques_in = [ pad(example[0], 0, max_ques_len) for example in dev_batch ]
        paras_in = [ pad(para_in, 0, max_para_len) for para_in in paras_in ]
        labels = [ example[2] for example in dev_batch ]

        dev_loss, predictions =\
          sess.run([model.loss, model.predictions],
                   feed_dict = { model.ans_input: ans_in,
                                 model.ans_lens: ans_lens_in,
                                 model.ques_input: ques_in,
                                 model.ques_lens: ques_lens_in,
                                 model.passage_input: paras_in,
                                 model.passage_lens: paras_lens_in,
                                 model.labels: labels,
                                 model.keep_prob: 1.0 })

        dev_loss_sum += dev_loss
        print "[Average loss : %.5f]" % (dev_loss_sum/(i+1)),

        # Compute overall prediction-error, error for 0s, and error for 1s
        predictions = np.round(predictions)
        dev_errors = np.abs(predictions-labels)
        dev_error_sum += np.sum(dev_errors)
        dev_error0_sum += np.sum((1-np.array(labels)) * dev_errors)
        dev_error1_sum += np.sum(np.array(labels) * dev_errors)

      # Print dev stats for epoch
      print "\nDev Loss: %.4f (in time: %.2f s)" %\
            (dev_loss_sum/len(dev_order), (time.time() - dev_start_t))
      print ("Total error: %d/%d (%.2f%%), 1 errors: %d/%d (%.2f%%), " +\
             "0 errors: %d/%d (%.2f%%)") %\
             (dev_error_sum, len(dev), 100 * float(dev_error_sum)/len(dev),
              dev_error1_sum, dev_1_examples, 100 * float(dev_error1_sum)/dev_1_examples,
              dev_error0_sum, len(dev)-dev_1_examples,
              100 * float(dev_error0_sum)/(len(dev)-dev_1_examples))

      # Save model parameters from this epoch.
      save_path = saver.save(sess, args.model_dir + 'model' + str(EPOCH) + '.ckpt')
      print "Model saved."
예제 #4
0
def test_model(args):
  # Read and process data
  train, dev, test, batch_size, test_batch_size, train_ques_to_para,\
  dev_ques_to_para, test_ques_to_para, train_tokenized_paras,\
  dev_tokenized_paras, test_tokenized_paras, train_order, dev_order, test_order,\
  train_data, dev_data, test_data = read_and_process_data(args)
    
  # Build model
  model, config = build_model(args, train_data.dictionary.size(),
                              train_data.dictionary.index_to_word,
                              train_data.dictionary.word_to_index)
  print(model)

  #------------------------- Reload and test model ----------------------------#
  if args.model_file is not None:
    model = model.load_from_file(args.model_file)
    print "Loaded model from %s." % args.model_file
  else:
    last_done_epoch = config['ckpt']
    model = model.load(args.model_dir, last_done_epoch)
    print "Loaded model."
    if not args.disable_glove:
      print "Embedding shape:", model.embedding.shape

  test_start_t = time.time()
  test_loss_sum = 0.0
  all_predictions = {}
  attention_starts = {}
  attention_ends = {}
  model.eval()

  for i, num in enumerate(test_order):
    print "\rTest: %.2f s (Done %d of %d) " %\
          ((time.time()-test_start_t)*(len(test_order)-i-1)/(i+1), i+1,
          len(test_order)),

    test_batch = test[num:num+test_batch_size]
    batch_size = len(test_batch)

    # Variable length question, answer and paragraph sequences for batch.
    ques_lens_in = [ len(example[0]) for example in test_batch ]
    paras_in = [ test_tokenized_paras[test_ques_to_para[example[2]]] \
                   for example in test_batch ]
    paras_lens_in = [ len(para) for para in paras_in ]
    max_ques_len = max(ques_lens_in)
    max_para_len = max(paras_lens_in)

    # ans_in.shape = (2, batch)
    ans_in = np.array([ example[1] for example in test_batch ]).T

    # Fixed-length (padded) input sequences with shape=(seq_len, batch).
    ques_in = np.array([ pad(example[0], 0, max_ques_len)\
                           for example in test_batch ]).T
    paras_in = np.array([ pad(para, 0, max_para_len) for para in paras_in ]).T

    passage_input = (paras_in, paras_lens_in)
    question_input = (ques_in, ques_lens_in)
    answer_input = ans_in

    # distributions[{0,1}].shape = (batch, max_passage_len)
    distributions = model(passage_input, question_input, answer_input)
    distributions[0] = distributions[0].data.cpu().numpy()
    distributions[1] = distributions[1].data.cpu().numpy()

    # Add all batch qids to predictions dict, if they don't already exist.
    qids = [ example[2] for example in test_batch ]
    for qid in qids:
      if not qid in all_predictions:
        all_predictions[qid] = []
    
    # Search, or be greedy?
    if not args.use_greedy:
      best_idxs = []
      for idx in range(len(test_batch)):
        best_prob = -1
        best = [0, 0]
        max_end = paras_lens_in[idx]
        for j, start_prob in enumerate(distributions[0][idx][:max_end]):
          cur_end_idx = max_end if args.max_answer_span == -1 \
                                else j + args.max_answer_span
          end_idx = np.argmax(distributions[1][idx][j:cur_end_idx])
          prob = distributions[1][idx][j+end_idx] * start_prob
          if prob > best_prob:
            best_prob = prob
            best = [j, j+end_idx]
        best_idxs.append(best)
    else:
      best_idxs = []
      for idx in range(len(test_batch)):
        start = np.argmax(distributions[0][idx])
        end_idx = paras_lens_in[idx] if args.max_answer_span == -1 \
                                     else start + args.max_answer_span
        end = np.argmax(distributions[1][idx][start:end_idx])
        best_idxs.append([start, start+end])

    tokenized_paras = test_data.tokenized_paras
    answers = [ tokenized_paras[test_ques_to_para[qids[idx]]][start:end+1] \
                  for idx, (start, end) in enumerate(best_idxs) ]
    answers = [ " ".join([ test_data.dictionary.get_word(idx) for idx in ans ]) \
                  for ans in answers ]

    for qid, answer in zip(qids, answers):
      all_predictions[qid] = answer

    # Dump start and end attention distributions.
    for idx in range(batch_size):
      if qids[idx] in attention_starts:
        attention_starts[qids[idx]][1].append(ans_in[0][idx])
      else:
        attention_starts[qids[idx]] = (distributions[0][idx], [ans_in[0][idx]])
      if qids[idx] in attention_ends:
        attention_ends[qids[idx]][1].append(ans_in[0][idx])
      else:
        attention_ends[qids[idx]] = (distributions[1][idx], [ans_in[1][idx]])

    test_loss_sum += model.loss.data[0]
    print "[Average loss : %.5f]" % (test_loss_sum/(i+1)),
    sys.stdout.flush()

  # Print stats
  print "\nTest Loss: %.4f (in time: %.2f s)" %\
        (test_loss_sum/len(test_order), (time.time() - test_start_t))

  # Dump the results json in the required format
  print "Dumping prediction results."
  json.dump(all_predictions, open(args.predictions_output_json, "w"))

  # Dump attention start and end distributions.
  pickle.dump(attention_starts,
              open(args.predictions_output_json + "_starts.p", "wb"))
  pickle.dump(attention_ends,
              open(args.predictions_output_json + "_ends.p", "wb"))
  print "Done."
예제 #5
0
def train_model(args):
  # Read and process data
  train, dev, test, batch_size, test_batch_size, train_ques_to_para,\
  dev_ques_to_para, test_ques_to_para, train_tokenized_paras,\
  dev_tokenized_paras, test_tokenized_paras, train_order, dev_order, test_order,\
  train_data, dev_data, test_data = read_and_process_data(args)
    
  # Build model
  model, config = build_model(args, train_data.dictionary.size(),
                              train_data.dictionary.index_to_word,
                              train_data.dictionary.word_to_index)

  if not os.path.exists(args.model_dir):
    os.mkdir(args.model_dir)

  #------------------------------ Train System ----------------------------------#
  # Should we resume running from an existing checkpoint?
  last_done_epoch = config['ckpt']
  if last_done_epoch > 0:
    model = model.load(args.model_dir, last_done_epoch)
    print "Loaded model."
    if not args.disable_glove:
      print "Embedding shape:", model.embedding.shape

  if args.model_file is not None:
    model = model.load_from_file(args.model_file)
    print "Loaded model from %s." % args.model_file

  start_time = time.time()
  print "Starting training."

  if args.optimizer == "SGD":
    print "Using SGD optimizer."
    optimizer = SGD(model.parameters(), lr = args.learning_rate)
  elif args.optimizer == "Adamax":
    print "Using Adamax optimizer."
    optimizer = Adamax(model.parameters(), lr = args.learning_rate)
    if last_done_epoch > 0:
      if os.path.exists(args.model_dir + "/optim_%d.pt" % last_done_epoch):
        optimizer = torch.load(args.model_dir + "/optim_%d.pt" % last_done_epoch)
      else:
        print "Optimizer saved state not found. Not loading optimizer."
  else:
    assert False, "Unrecognized optimizer."
  print(model)

  for EPOCH in range(last_done_epoch+1, args.epochs):
    start_t = time.time()
    train_loss_sum = 0.0
    model.train()
    for i, num in enumerate(train_order):
      print "\rTrain epoch %d, %.2f s - (Done %d of %d)" %\
            (EPOCH, (time.time()-start_t)*(len(train_order)-i-1)/(i+1), i+1,
             len(train_order)),

      # Create next batch by getting lengths and padding
      train_batch = train[num:num+batch_size]

      # Variable length question, answer and paragraph sequences for batch.
      ques_lens_in = [ len(example[0]) for example in train_batch ]
      paras_in = [ train_tokenized_paras[train_ques_to_para[example[2]]] \
                     for example in train_batch ]
      paras_lens_in = [ len(para) for para in paras_in ]
      max_ques_len = max(ques_lens_in)
      max_para_len = max(paras_lens_in)

      # ans_in.shape = (2, batch)
      ans_in = np.array([ example[1] for example in train_batch ]).T
      # Fixed-length (padded) input sequences with shape=(seq_len, batch).
      ques_in = np.array([ pad(example[0], 0, max_ques_len)\
                             for example in train_batch ]).T
      paras_in = np.array([ pad(para, 0, max_para_len) for para in paras_in ]).T

      passage_input = (paras_in, paras_lens_in)
      question_input = (ques_in, ques_lens_in)
      answer_input = ans_in

      # Zero previous gradient.
      model.zero_grad()
      model(passage_input, question_input, answer_input)
      model.loss.backward()
      optimizer.step()
      train_loss_sum += model.loss.data[0]

      print "Loss: %.5f (in time %.2fs)" % \
            (train_loss_sum/(i+1), time.time() - start_t),
      sys.stdout.flush()

    print "\nLoss: %.5f (in time %.2fs)" % \
          (train_loss_sum/len(train_order), time.time() - start_t)

    # End of epoch.
    random.shuffle(train_order)
    model.zero_grad()
    model.save(args.model_dir, EPOCH)

    # Updating LR for optimizer
    for param in optimizer.param_groups:
      param['lr'] *= config['decay_rate']
    if args.optimizer == "Adamax":
      torch.save(optimizer, args.model_dir + "/optim_%d.pt" % EPOCH)

    # Run pass over dev data.
    dev_start_t = time.time()
    dev_loss_sum = 0.0
    all_predictions = {}
    print "\nRunning on Dev."

    model.eval()
    for i, num in enumerate(dev_order):
      print "\rDev: %.2f s (Done %d of %d)" %\
            ((time.time()-dev_start_t)*(len(dev_order)-i-1)/(i+1), i+1,
            len(dev_order)),

      dev_batch = dev[num:num+test_batch_size]

      # Variable length question, answer and paragraph sequences for batch.
      ques_lens_in = [ len(example[0]) for example in dev_batch ]
      paras_in = [ dev_tokenized_paras[dev_ques_to_para[example[2]]] \
                     for example in dev_batch ]
      paras_lens_in = [ len(para) for para in paras_in ]
      max_ques_len = max(ques_lens_in)
      max_para_len = max(paras_lens_in)

      # ans_in.shape = (2, batch)
      ans_in = np.array([ example[1] for example in dev_batch ]).T

      # Fixed-length (padded) input sequences with shape=(seq_len, batch).
      ques_in = np.array([ pad(example[0], 0, max_ques_len)\
                             for example in dev_batch ]).T
      paras_in = np.array([ pad(para, 0, max_para_len) for para in paras_in ]).T

      passage_input = (paras_in, paras_lens_in)
      question_input = (ques_in, ques_lens_in)
      answer_input = ans_in

      # distributions[{0,1}].shape = (batch, max_passage_len)
      distributions = model(passage_input, question_input, answer_input)
      distributions[0] = distributions[0].data.cpu().numpy()
      distributions[1] = distributions[1].data.cpu().numpy()

      # Add all batch qids to predictions dict, if they don't already exist.
      qids = [ example[2] for example in dev_batch ]
      for qid in qids:
        if not qid in all_predictions:
          all_predictions[qid] = []
      
      best_idxs = []
      for idx in range(len(dev_batch)):
        best_prob = -1
        best = [0, 0]
        max_end = paras_lens_in[idx]
        for j, start_prob in enumerate(distributions[0][idx][:max_end]):
          cur_end_idx = min(j + args.max_answer_span, max_end)
          end_idx = np.argmax(distributions[1][idx][j:cur_end_idx])
          prob = distributions[1][idx][j+end_idx] * start_prob
          if prob > best_prob:
            best_prob = prob
            best = [j, j+end_idx]
        best_idxs.append(best)

      tokenized_paras = dev_data.tokenized_paras
      answers = [ tokenized_paras[dev_ques_to_para[qids[idx]]][start:end+1] \
                    for idx, (start, end) in enumerate(best_idxs) ]
      answers = [ " ".join([ dev_data.dictionary.get_word(idx) for idx in ans ]) \
                    for ans in answers ]

      for qid, answer in zip(qids, answers):
        all_predictions[qid] = answer

      dev_loss_sum += model.loss.data[0]
      print "[Average loss : %.5f]" % (dev_loss_sum/(i+1)),
      sys.stdout.flush()

    # Print dev stats for epoch
    print "\nDev Loss: %.4f (in time: %.2f s)" %\
          (dev_loss_sum/len(dev_order), (time.time() - dev_start_t))

    # Dump the results json in the required format
    print "Dumping prediction results."
    json.dump(
      all_predictions,
      open(args.model_dir + "/dev_predictions_" + str(EPOCH) + ".json", "w"))
    print "Done."
예제 #6
0
파일: Main.py 프로젝트: sheetalsh456/q-net
def get_batch(batch, ques_to_para, tokenized_paras, paras_pos_tags,
              paras_ner_tags, question_pos_tags, question_ner_tags,
              num_pos_tags, num_ner_tags):
    # Variable length question, answer and paragraph sequences for batch.
    ques_lens_in = [len(example[0]) for example in batch]
    paras_in = [ tokenized_paras[ques_to_para[example[2]]] \
                   for example in batch ]
    paras_pos_tags_in = [ paras_pos_tags[ques_to_para[example[2]]] \
                            for example in batch ]
    paras_ner_tags_in = [ paras_ner_tags[ques_to_para[example[2]]] \
                            for example in batch ]
    ques_pos_tags_in = [ question_pos_tags[example[2]] \
                            for example in batch ]
    ques_ner_tags_in = [ question_ner_tags[example[2]] \
                            for example in batch ]
    paras_lens_in = [len(para) for para in paras_in]
    max_ques_len = max(ques_lens_in)
    max_para_len = max(paras_lens_in)

    # ans_in.shape = (2, batch)
    ans_in = np.array([example[1] for example in batch]).T
    sent_in = np.array([example[4] for example in batch]).T

    # f1_mat_in.shape = (batch, seq_len, seq_len)
    f1_mat_in = np.array([ create2d(example[3], 0, max_para_len, example[1][0]) \
                             for example in batch])
    # Fixed-length (padded) input sequences with shape=(seq_len, batch).
    ques_in = np.array([ pad(example[0], 0, max_ques_len)\
                           for example in batch ]).T
    paras_in = np.array([pad(para, 0, max_para_len) for para in paras_in]).T

    # Fixed-length (padded) pos-tag and ner-tag inputs.
    question_pos_tags = \
      np.array([ pad([ one_hot(postag, num_pos_tags) for postag in ques_pos_tags ],
                     one_hot(-1, num_pos_tags),
                     max_ques_len) \
                   for ques_pos_tags in ques_pos_tags_in ])
    question_ner_tags = \
      np.array([ pad([ one_hot(nertag, num_ner_tags) for nertag in ques_ner_tags ],
                     one_hot(-1, num_ner_tags),
                     max_ques_len) \
                   for ques_ner_tags in ques_ner_tags_in ])
    paragraph_pos_tags = \
      np.array([ pad([ one_hot(postag, num_pos_tags) for postag in paras_pos_tags ],
                     one_hot(-1, num_pos_tags),
                     max_para_len) \
                   for paras_pos_tags in paras_pos_tags_in ])
    paragraph_ner_tags = \
      np.array([ pad([ one_hot(nertag, num_ner_tags) for nertag in paras_ner_tags ],
                     one_hot(-1, num_ner_tags),
                     max_para_len) \
                   for paras_ner_tags in paras_ner_tags_in ])
    question_pos_tags = np.transpose(question_pos_tags, (1, 0, 2))
    question_ner_tags = np.transpose(question_ner_tags, (1, 0, 2))
    paragraph_pos_tags = np.transpose(paragraph_pos_tags, (1, 0, 2))
    paragraph_ner_tags = np.transpose(paragraph_ner_tags, (1, 0, 2))

    passage_input = (paras_in, paras_lens_in)
    question_input = (ques_in, ques_lens_in)
    answer_input = ans_in
    answer_sentence_input = sent_in

    return passage_input, question_input, answer_input, f1_mat_in, question_pos_tags,\
           question_ner_tags, paragraph_pos_tags, paragraph_ner_tags,\
           answer_sentence_input