Ejemplo n.º 1
0
def run_task(data_dir, task_id):
    """
    Train and test for each task
    """
    print("Train and test for task %d ..." % task_id)

    # Parse data
    train_files = glob.glob('%s/qa%d_*_train.txt' % (data_dir, task_id))
    test_files = glob.glob('%s/qa%d_*_test.txt' % (data_dir, task_id))

    dictionary = {"nil": 0}
    train_story, train_questions, train_qstory = parse_babi_task(
        train_files, dictionary, False)
    test_story, test_questions, test_qstory = parse_babi_task(
        test_files, dictionary, False)

    general_config = BabiConfig(train_story, train_questions, dictionary)

    memory, model, loss = build_model(general_config)

    if general_config.linear_start:
        train_linear_start(train_story, train_questions, train_qstory, memory,
                           model, loss, general_config)
    else:
        train(train_story, train_questions, train_qstory, memory, model, loss,
              general_config)

    test(test_story, test_questions, test_qstory, memory, model, loss,
         general_config)
Ejemplo n.º 2
0
def run_task(data_dir, task_id):
    """
    Train and test for each task
    """
    print("Train and test for task %d ..." % task_id)

    print("We are going to use this")

    # Parse data
    train_files = glob.glob('%s/qa3_*_train.txt' % (data_dir, task_id))
    test_files = glob.glob('%s/qa3_*_test.txt' % (data_dir, task_id))

    dictionary = {"nil": 0}
    train_story, train_questions, train_qstory = parse_babi_task(
        train_files, dictionary, False)
    test_story, test_questions, test_qstory = parse_babi_task(
        test_files, dictionary, False)

    general_config = BabiConfig(train_story, train_questions, dictionary)

    # #### R: this line build a empty model to train
    # memory, model, loss = build_model(general_config)

    # if general_config.linear_start:
    #     train_linear_start(train_story, train_questions, train_qstory, memory, model, loss, general_config)
    # else:
    #     train(train_story, train_questions, train_qstory, memory, model, loss, general_config)

    # memory, model, loss = build_model(general_config)

    # this line
    test(test_story, test_questions, test_qstory, memory, model, loss,
         general_config)
Ejemplo n.º 3
0
    def train(self):
        """
        Train MemN2N model using training data for tasks.
        """
        np.random.seed(42)  # for reproducing
        assert self.data_dir is not None, "data_dir is not specified."
        print("Reading data from %s ..." % self.data_dir)

        # Parse training data
        train_data_path = glob.glob('%s/qa8_*_train.txt' % self.data_dir)
        dictionary = {"nil": 0}
        train_story, train_questions, train_qstory = parse_babi_task(
            train_data_path, dictionary, False)

        # Parse test data just to expand the dictionary so that it covers all words in the test data too
        test_data_path = glob.glob('%s/qa8_*_test.txt' % self.data_dir)
        parse_babi_task(test_data_path, dictionary, False)

        # Get reversed dictionary mapping index to word
        self.reversed_dict = dict((ix, w) for w, ix in dictionary.items())

        # Construct model
        self.general_config = BabiConfig(train_story, train_questions,
                                         dictionary)
        self.memory, self.model, self.loss = build_model(self.general_config)

        # Train model
        if self.general_config.linear_start:
            train_linear_start(train_story, train_questions, train_qstory,
                               self.memory, self.model, self.loss,
                               self.general_config)
        else:
            train(train_story, train_questions, train_qstory, self.memory,
                  self.model, self.loss, self.general_config)

        # Save model
        self.save_model()
Ejemplo n.º 4
0
def run_tableQA(data_path, model_file):
    """
    Train and test for table QA
    """

    # Parse data
    train_files = glob.glob(data_path.format('train'))
    test_files = glob.glob(data_path.format('test'))
    # SV: init dict with pre-trained vectors, e.g. from fastText
    # dictionary = fasttext.load_model(EMBEDDINGS_MODEL_PATH)
    dictionary = {"nil": 0}
    train_story, train_questions, train_qstory = parse_babi_task(
        train_files, dictionary, False)
    test_story, test_questions, test_qstory = parse_babi_task(
        test_files, dictionary, False)
    # print test_questions
    print 'Dictionary:', len(dictionary)
    general_config = BabiConfig(train_story, train_questions, dictionary)

    memory, model, loss = build_model(general_config)

    if general_config.linear_start:
        train_linear_start(train_story, train_questions, train_qstory, memory,
                           model, loss, general_config)
    else:
        train(train_story, train_questions, train_qstory, memory, model, loss,
              general_config)

    test(test_story, test_questions, test_qstory, memory, model, loss,
         general_config)

    # save_model
    with gzip.open(model_file, "wb") as f:
        print("Saving model to file %s ..." % model_file)
        reversed_dict = dict((ix, w) for w, ix in dictionary.items())
        pickle.dump((reversed_dict, memory, model, loss, general_config), f)
Ejemplo n.º 5
0
def run_task(data_dir, task_id, model_file, log_path):
    """
  Train and test for each task
  """
    print("Train and test for task %d ..." % task_id)

    train_files = glob.glob('%s/qa%d_*_train.txt' % (data_dir, task_id))
    test_files = glob.glob('%s/qa%d_*_test.txt' % (data_dir, task_id))

    dictionary = {"nil": 0}
    train_story, train_questions, train_qstory = \
      parse_babi_task(train_files, dictionary, False)
    test_story, test_questions, test_qstory = \
      parse_babi_task(test_files, dictionary, False)

    # Get reversed dictionary mapping index to word
    # NOTE: this needed to real-time testing
    reversed_dict = dict((ix, w) for w, ix in dictionary.items())

    general_config = BabiConfig(train_story, train_questions, dictionary)
    memory, model, loss_func = build_model(general_config)

    if general_config.linear_start:
        print('We will use LS training')
        best_model, best_memory = \
          train_linear_start(train_story,
                             train_questions,
                             train_qstory,
                             memory,
                             model,
                             loss_func,
                             general_config,
                             self.log_path)
    else:
        train_logger = open(os.path.join(self.log_path, 'train.log'), 'w')
        train_logger.write('epoch batch_iter lr loss err\n')
        train_logger.flush()
        val_logger = open(os.path.join(self.log_path, 'val.log'), 'w')
        val_logger.write('epoch batch_iter lr loss err\n')
        val_logger.flush()
        global_batch_iter = 0
        train_logger, val_logger, _, _, _ = \
          train(train_story,
                train_questions,
                train_qstory,
                memory,
                model,
                loss_func,
                general_config,
                train_logger,
                val_logger,
                global_batch_iter)
        train_logger.close()
        val_logger.close()

    model_file = os.path.join(log_path, model_file)
    with gzip.open(model_file, 'wb') as f:
        print('Saving model to file %s ...' % model_file)
        pickle.dump((reversed_dict, memory, model, loss_func, general_config),
                    f)

    print('Start to testing')
    test(test_story, test_questions, test_qstory, memory, model, loss_func,
         general_config)
Ejemplo n.º 6
0
def run_task(data_dir, task_id):
    """
    Train and test for each task
    """
    print("Train and test for task %d ..." % task_id)

    # Parse data
    train_files = glob.glob('%s/qa%d_*_train.txt' % (data_dir, task_id))
    test_files  = glob.glob('%s/qa%d_*_test.txt' % (data_dir, task_id))
    #train_files = glob.glob('%s/qa%d_*train.txt' % (data_dir, task_id))
    #test_files  = glob.glob('%s/qa%d_*test.txt' % (data_dir, task_id))

    # #### empty dictionary
    dictionary = {"nil": 0}
    train_story, train_questions, train_qstory = parse_babi_task(train_files, dictionary, False)
    test_story, test_questions, test_qstory    = parse_babi_task(test_files, dictionary, False)
    

    general_config = BabiConfig(train_story, train_questions, dictionary)


    memory, model, loss = build_model(general_config)

    if general_config.linear_start:
        train_linear_start(train_story, train_questions, train_qstory, memory, model, loss, general_config)
    else:
        train(train_story, train_questions, train_qstory, memory, model, loss, general_config)
    
    with open('R_trained.txt', 'a') as outfile:
        json.dump(general_config.dictionary, outfile, indent=2)

    print("######## trained dictionary")
    print(general_config.dictionary)


    ans_index = test(test_story, test_questions, test_qstory, memory, model, loss, general_config)





    ####R this line load model
    memn2n = MemN2N(args.data_dir, args.model_file)
    #Try to load model
    memn2n.load_model()  

    dictionary2 = {"nil": 0}
    train_story2, train_questions2, train_qstory2 = parse_babi_task(train_files, memn2n.general_config.dictionary, False)
    test_story2, test_questions2, test_qstory2    = parse_babi_task(test_files, memn2n.general_config.dictionary, False)

    #print(len(test_questions2))
    #general_config2 = BabiConfig(train_story2, train_questions2,memn2n.general_config.dictionary)



    with open('R_loaded.txt', 'a') as outfile2:
        json.dump(memn2n.general_config.dictionary, outfile2, indent=2)

    print("???????? loaded dictionary")
    print(memn2n.general_config.dictionary)

    ans_index = test(test_story2, test_questions2, test_qstory2, memn2n.memory, memn2n.model, memn2n.loss, memn2n.general_config)