Exemple #1
0
    def train(self):
        """
        Train MemN2N model using training data for tasks.
        """
        np.random.seed(42)  # for reproducing
        assert self.data_dir is not None, "data_dir is not specified."
        print("Reading data from %s ..." % self.data_dir)

        # Parse training data
        train_data_path = glob.glob('%s/qa*_*_train.txt' % self.data_dir)
        dictionary = {"nil": 0}
        train_story, train_questions, train_qstory = parse_babi_task(train_data_path, dictionary, False)

        # Parse test data just to expand the dictionary so that it covers all words in the test data too
        test_data_path = glob.glob('%s/qa*_*_test.txt' % self.data_dir)
        parse_babi_task(test_data_path, dictionary, False)

        # Get reversed dictionary mapping index to word
        self.reversed_dict = dict((ix, w) for w, ix in dictionary.items())

        # Construct model
        self.general_config = BabiConfigJoint(train_story, train_questions, dictionary)
        self.memory, self.model, self.loss = build_model(self.general_config)

        # Train model
        if self.general_config.linear_start:
            train_linear_start(train_story, train_questions, train_qstory,
                               self.memory, self.model, self.loss, self.general_config)
        else:
            train(train_story, train_questions, train_qstory,
                  self.memory, self.model, self.loss, self.general_config)

        # Save model
        self.save_model()
def run_task(data_dir, task_id):
    """
    Train and test for each task
    """
    print("Train and test for task %d ..." % task_id)

    # Parse data
    train_files = glob.glob('%s/qa%d_*_train.txt' % (data_dir, task_id))
    test_files = glob.glob('%s/qa%d_*_test.txt' % (data_dir, task_id))

    dictionary = {"nil": 0}
    train_story, train_questions, train_qstory = parse_babi_task(
        train_files, dictionary, False)
    test_story, test_questions, test_qstory = parse_babi_task(
        test_files, dictionary, False)

    general_config = BabiConfig(train_story, train_questions, dictionary)

    memory, model, loss = build_model(general_config)

    if general_config.linear_start:
        train_linear_start(train_story, train_questions, train_qstory, memory,
                           model, loss, general_config)
    else:
        train(train_story, train_questions, train_qstory, memory, model, loss,
              general_config)

    test(test_story, test_questions, test_qstory, memory, model, loss,
         general_config)
Exemple #3
0
    def train(self):
        """
        Train MemN2N model using training data for tasks.
        """
        np.random.seed(42)  # for reproducing
        assert self.data_dir is not None, "data_dir is not specified."
        print("Reading data from %s ..." % self.data_dir)

        # Parse training data
        train_data_path = glob.glob('%s/qa*_*_train.txt' % self.data_dir)
        dictionary = {"nil": 0}
        train_story, train_questions, train_qstory = parse_babi_task(train_data_path, dictionary, False)

        # Parse test data just to expand the dictionary so that it covers all words in the test data too
        test_data_path = glob.glob('%s/qa*_*_test.txt' % self.data_dir)
        parse_babi_task(test_data_path, dictionary, False)

        # Get reversed dictionary mapping index to word
        self.reversed_dict = dict((ix, w) for w, ix in dictionary.items())

        # Construct model
        self.general_config = BabiConfigJoint(train_story, train_questions, dictionary)
        self.memory, self.model, self.loss = build_model(self.general_config)

        # Train model
        if self.general_config.linear_start:
            train_linear_start(train_story, train_questions, train_qstory,
                               self.memory, self.model, self.loss, self.general_config)
        else:
            train(train_story, train_questions, train_qstory,
                  self.memory, self.model, self.loss, self.general_config)

        # Save model
        self.save_model()
def run_joint_tasks(data_dir):
    """
    Train and test for all tasks but the trained model is built using training data from all tasks.
    """
    print("Jointly train and test for all tasks ...")
    tasks = range(20)

    # Parse training data
    train_data_path = []
    for t in tasks:
        train_data_path += glob.glob('%s/qa%d_*_train.txt' % (data_dir, t + 1))

    dictionary = {"nil": 0}
    train_story, train_questions, train_qstory = parse_babi_task(
        train_data_path, dictionary, False)

    # Parse test data for each task so that the dictionary covers all words before training
    for t in tasks:
        test_data_path = glob.glob('%s/qa%d_*_test.txt' % (data_dir, t + 1))
        parse_babi_task(test_data_path, dictionary,
                        False)  # ignore output for now

    general_config = BabiConfigJoint(train_story, train_questions, dictionary)
    memory, model, loss = build_model(general_config)

    if general_config.linear_start:
        train_linear_start(train_story, train_questions, train_qstory, memory,
                           model, loss, general_config)
    else:
        train(train_story, train_questions, train_qstory, memory, model, loss,
              general_config)

    # Test on each task
    for t in tasks:
        print("Testing for task %d ..." % (t + 1))
        test_data_path = glob.glob('%s/qa%d_*_test.txt' % (data_dir, t + 1))
        dc = len(dictionary)
        test_story, test_questions, test_qstory = parse_babi_task(
            test_data_path, dictionary, False)
        assert dc == len(
            dictionary
        )  # make sure that the dictionary already covers all words

        test(test_story, test_questions, test_qstory, memory, model, loss,
             general_config)
def run_joint_tasks(data_dir):
    """
    Train and test for all tasks but the trained model is built using training data from all tasks.
    """
    print("Jointly train and test for all tasks ...")
    tasks = range(20)

    # Parse training data
    train_data_path = []
    for t in tasks:
        train_data_path += glob.glob('%s/qa%d_*_train.txt' % (data_dir, t + 1))

    dictionary = {"nil": 0}
    train_story, train_questions, train_qstory = parse_babi_task(train_data_path, dictionary, False)

    # Parse test data for each task so that the dictionary covers all words before training
    for t in tasks:
        test_data_path = glob.glob('%s/qa%d_*_test.txt' % (data_dir, t + 1))
        parse_babi_task(test_data_path, dictionary, False)  # ignore output for now

    general_config = BabiConfigJoint(train_story, train_questions, dictionary)
    memory, model, loss = build_model(general_config)

    if general_config.linear_start:
        train_linear_start(train_story, train_questions, train_qstory, memory, model, loss, general_config)
    else:
        train(train_story, train_questions, train_qstory, memory, model, loss, general_config)

    # Test on each task
    for t in tasks:
        print("Testing for task %d ..." % (t + 1))
        test_data_path = glob.glob('%s/qa%d_*_test.txt' % (data_dir, t + 1))
        dc = len(dictionary)
        test_story, test_questions, test_qstory = parse_babi_task(test_data_path, dictionary, False)
        assert dc == len(dictionary)  # make sure that the dictionary already covers all words

        test(test_story, test_questions, test_qstory, memory, model, loss, general_config)
def run_task(data_dir, task_id):
    """
    Train and test for each task
    """
    print("Train and test for task %d ..." % task_id)

    # Parse data
    train_files = glob.glob('%s/qa%d_*_train.txt' % (data_dir, task_id))
    test_files = glob.glob('%s/qa%d_*_test.txt' % (data_dir, task_id))

    dictionary = {"nil": 0}
    train_story, train_questions, train_qstory = parse_babi_task(train_files, dictionary, False)
    test_story, test_questions, test_qstory = parse_babi_task(test_files, dictionary, False)

    general_config = BabiConfig(train_story, train_questions, dictionary)

    memory, model, loss = build_model(general_config)

    if general_config.linear_start:
        train_linear_start(train_story, train_questions, train_qstory, memory, model, loss, general_config)
    else:
        train(train_story, train_questions, train_qstory, memory, model, loss, general_config)

    test(test_story, test_questions, test_qstory, memory, model, loss, general_config)
def run_tableQA(data_path, model_file):
    """
    Train and test for table QA
    """

    # Parse data
    train_files = glob.glob(data_path.format('train'))
    test_files = glob.glob(data_path.format('test'))
    # SV: init dict with pre-trained vectors, e.g. from fastText
    # dictionary = fasttext.load_model(EMBEDDINGS_MODEL_PATH)
    dictionary = {"nil": 0}
    train_story, train_questions, train_qstory = parse_babi_task(
        train_files, dictionary, False)
    test_story, test_questions, test_qstory = parse_babi_task(
        test_files, dictionary, False)
    # print test_questions
    print 'Dictionary:', len(dictionary)
    general_config = BabiConfig(train_story, train_questions, dictionary)

    memory, model, loss = build_model(general_config)

    if general_config.linear_start:
        train_linear_start(train_story, train_questions, train_qstory, memory,
                           model, loss, general_config)
    else:
        train(train_story, train_questions, train_qstory, memory, model, loss,
              general_config)

    test(test_story, test_questions, test_qstory, memory, model, loss,
         general_config)

    # save_model
    with gzip.open(model_file, "wb") as f:
        print("Saving model to file %s ..." % model_file)
        reversed_dict = dict((ix, w) for w, ix in dictionary.items())
        pickle.dump((reversed_dict, memory, model, loss, general_config), f)
Exemple #8
0
  def train(self):
    """
    Train MemN2N model using training data for tasks.
    """
    #np.random.seed(42)  # for reproducing
    np.random.seed(self.rnd_seed)  # for reproducing
    print("np.random.seed: %d" % self.rnd_seed)
    assert self.data_dir is not None, "data_dir is not specified."
    print("Reading data from %s ..." % self.data_dir)

    # Parse training data
    train_data_path = glob.glob('%s/qa*_*_train.txt' % self.data_dir)
    dictionary = {"nil": 0}
    train_story, train_questions, train_qstory = parse_babi_task(train_data_path, dictionary, False)

    # Parse test data just to expand the dictionary so that it covers all words in the test data too
    test_data_path = glob.glob('%s/qa*_*_test.txt' % self.data_dir)
    parse_babi_task(test_data_path, dictionary, False)

    # Get reversed dictionary mapping index to word
    self.reversed_dict = dict((ix, w) for w, ix in dictionary.items())

    # Construct model
    #self.general_config = Babi10kConfigJoint(train_story, train_questions, dictionary)
    self.general_config = BabiConfigJoint(train_story, train_questions, dictionary)
    self.memory, self.model, self.loss = build_model(self.general_config)

    # Train model
    if self.general_config.linear_start:
      print('We will use LS training')
      self.best_model, self.best_memory = \
        train_linear_start(train_story, 
                           train_questions, 
                           train_qstory, 
                           self.memory, 
                           self.model, 
                           self.loss, 
                           self.general_config, 
                           self.log_path)
    else:
      train_logger = open(os.path.join(self.log_path, 'train.log'), 'w')
      train_logger.write('epoch batch_iter lr loss err\n')
      train_logger.flush()
      val_logger = open(os.path.join(self.log_path, 'val.log'), 'w')
      val_logger.write('epoch batch_iter lr loss err\n')
      val_logger.flush()
      global_batch_iter = 0
      train_logger, val_logger, self.best_model, self.best_memory, _ = \
        train(train_story, 
              train_questions, 
              train_qstory,
              self.memory, 
              self.model, 
              self.loss, 
              self.general_config,
              train_logger, 
              val_logger,
              global_batch_iter)

      train_logger.close()
      val_logger.close()

    # Save model
    self.save_model()
Exemple #9
0
    def train_and_test(self, seed=None):
        """
        Train MemN2N model using training data for tasks.
        """
        if seed is None:
            np.random.seed(42)  # for reproducing
        else:
            np.random.seed(seed)
        train_data_arg = None
        test_data_arg = None
        if type(self.data_dir) is tuple:
            assert self.data_dir[
                0] is not None, "training data_dir is not specified."
            assert self.data_dir[
                1] is not None, "test data_dir is not specified."
            print("Reading training data from %s ..." % self.data_dir[0])
            print("Reading test data from %s ..." % self.data_dir[1])
            train_data_arg = '%s/qa*_train.txt' % self.data_dir[0]
            test_data_arg = '%s/qa*_valid.txt' % self.data_dir[1]
        else:
            assert self.data_dir is not None, "data_dir is not specified."
            print("Reading data from %s ..." % self.data_dir)
            train_data_arg = '%s/qa*_*_train.txt' % self.data_dir
            test_data_arg = '%s/qa*_*_test.txt' % self.data_dir
        assert train_data_arg is not None and test_data_arg is not None
        # Parse training data
        train_data_path = glob.glob(train_data_arg)
        dictionary = {"nil": 0}
        train_story, train_questions, train_qstory = parse_babi_task(
            train_data_path, dictionary, False)

        # Parse test data just to expand the dictionary so that it covers all words in the test data too
        test_data_path = glob.glob(test_data_arg)
        test_story, test_questions, test_qstory = parse_babi_task(
            test_data_path, dictionary, False)

        # Get reversed dictionary mapping index to word
        self.reversed_dict = dict((ix, w) for w, ix in dictionary.items())

        # Construct model
        self.general_config = BabiConfigJoint(train_story, train_questions,
                                              dictionary)

        #check for config switches format [initial learning rate, linear start option, hops]
        if self.config_switches is not None:
            self.general_config.train_config[
                "init_lrate"] = self.config_switches[0]

            #linear start option is passed to babi config constructor function so no need to set here

            # want equal of number of epochs for linear start and non linear start runs
            if self.general_config.linear_start is True:
                self.general_config.nepochs = 40
                self.general_config.ls_nepochs = 20
            else:
                self.general_config.nepochs = 60

            self.general_config.nhops = self.config_switches[2]

        self.memory, self.model, self.loss = build_model(self.general_config)

        # Train model
        train_val_results = []
        if self.general_config.linear_start:
            train_val_results += train_linear_start(train_story,
                                                    train_questions,
                                                    train_qstory, self.memory,
                                                    self.model, self.loss,
                                                    self.general_config)
        else:
            train_val_results += train(train_story, train_questions,
                                       train_qstory, self.memory, self.model,
                                       self.loss, self.general_config)

        test_error = test(test_story, test_questions, test_qstory, self.memory,
                          self.model, self.loss, self.general_config)

        model_test_accuracy = (1.0 - test_error) * 100.0

        train_val_file = self.model_file + 'train_val_accuracy.csv'
        with open(train_val_file, 'w') as f:
            f.write('epoch, TrainAccuracy, ValAccuracy\n')
            epoch = 1
            for item in train_val_results:
                line = '{}, {:.3f}, {:.3f}\n'.format(epoch, item[0], item[1])
                f.write(line)
                epoch += 1

        self.model_file += '_TestAcc{:.1f}percent_.pickle'.format(
            model_test_accuracy)
        # Save model
        self.save_model()
Exemple #10
0
def run_joint_tasks(data_dir, model_file, log_path):
    """
  Train and test for all tasks but the trained model is built using training data from all tasks.
  """
    print("Jointly train and test for all tasks ...")
    tasks = range(20)

    # Parse training data
    train_data_path = []
    for t in tasks:
        train_data_path += glob.glob('%s/qa%d_*_train.txt' % (data_dir, t + 1))

    dictionary = {"nil": 0}
    train_story, train_questions, train_qstory = \
      parse_babi_task(train_data_path,
                      dictionary,
                      False)

    # Parse test data for each task so that the dictionary covers all words before training
    for t in tasks:
        test_data_path = glob.glob('%s/qa%d_*_test.txt' % (data_dir, t + 1))
        parse_babi_task(test_data_path, dictionary,
                        False)  # ignore output for now

    # Get reversed dictionary mapping index to word
    # NOTE: this needed to real-time testing
    reversed_dict = dict((ix, w) for w, ix in dictionary.items())

    general_config = BabiConfigJoint(train_story, train_questions, dictionary)
    memory, model, loss_func = build_model(general_config)

    if general_config.linear_start:
        print('We will use LS training')
        train_linear_start(train_story, train_questions, train_qstory, memory,
                           model, loss_func, general_config, log_path)
    else:
        train_logger = open(os.path.join(log_file, 'train.log'), 'w')
        train_logger.write('epoch batch_iter lr loss err\n')
        train_logger.flush()
        val_logger = open(os.path.join(log_file, 'val.log'), 'w')
        val_logger.write('epoch batch_iter lr loss err\n')
        val_logger.flush()
        train_logger, val_logger, best_model, best_memory = \
          train(train_story,
                train_questions,
                train_qstory,
                memory,
                model,
                loss_func,
                general_config,
                train_logger,
                val_logger)

        train_logger.close()
        val_logger.close()

    model_file = os.path.join(log_path, model_file)
    with gzip.open(model_file, 'wb') as f:
        print('Saving model to file %s ...' % model_file)
        pickle.dump((reversed_dict, memory, model, loss_func, general_config),
                    f)

    # Test on each task
    print('Start to testing')
    for t in tasks:
        print("Testing for task %d ..." % (t + 1))
        test_data_path = glob.glob('%s/qa%d_*_test.txt' % (data_dir, t + 1))
        dc = len(dictionary)
        test_story, test_questions, test_qstory = \
          parse_babi_task(test_data_path,
                          dictionary,
                          False)
        assert dc == len(
            dictionary
        )  # make sure that the dictionary already covers all words

        test(test_story, test_questions, test_qstory, memory, model, loss_func,
             general_config)
Exemple #11
0
def run_task(data_dir, task_id, model_file, log_path):
    """
  Train and test for each task
  """
    print("Train and test for task %d ..." % task_id)

    train_files = glob.glob('%s/qa%d_*_train.txt' % (data_dir, task_id))
    test_files = glob.glob('%s/qa%d_*_test.txt' % (data_dir, task_id))

    dictionary = {"nil": 0}
    train_story, train_questions, train_qstory = \
      parse_babi_task(train_files, dictionary, False)
    test_story, test_questions, test_qstory = \
      parse_babi_task(test_files, dictionary, False)

    # Get reversed dictionary mapping index to word
    # NOTE: this needed to real-time testing
    reversed_dict = dict((ix, w) for w, ix in dictionary.items())

    general_config = BabiConfig(train_story, train_questions, dictionary)
    memory, model, loss_func = build_model(general_config)

    if general_config.linear_start:
        print('We will use LS training')
        best_model, best_memory = \
          train_linear_start(train_story,
                             train_questions,
                             train_qstory,
                             memory,
                             model,
                             loss_func,
                             general_config,
                             self.log_path)
    else:
        train_logger = open(os.path.join(self.log_path, 'train.log'), 'w')
        train_logger.write('epoch batch_iter lr loss err\n')
        train_logger.flush()
        val_logger = open(os.path.join(self.log_path, 'val.log'), 'w')
        val_logger.write('epoch batch_iter lr loss err\n')
        val_logger.flush()
        global_batch_iter = 0
        train_logger, val_logger, _, _, _ = \
          train(train_story,
                train_questions,
                train_qstory,
                memory,
                model,
                loss_func,
                general_config,
                train_logger,
                val_logger,
                global_batch_iter)
        train_logger.close()
        val_logger.close()

    model_file = os.path.join(log_path, model_file)
    with gzip.open(model_file, 'wb') as f:
        print('Saving model to file %s ...' % model_file)
        pickle.dump((reversed_dict, memory, model, loss_func, general_config),
                    f)

    print('Start to testing')
    test(test_story, test_questions, test_qstory, memory, model, loss_func,
         general_config)
Exemple #12
0
    def train(self):
        """
    Train MemN2N model using training data for tasks.
    """
        #np.random.seed(42)  # for reproducing
        np.random.seed(self.rnd_seed)  # for reproducing
        print("np.random.seed: %d" % self.rnd_seed)
        assert self.data_dir is not None, "data_dir is not specified."
        print("Reading data from %s ..." % self.data_dir)

        # Parse training data
        train_data_path = glob.glob('%s/qa*_*_train.txt' % self.data_dir)
        dictionary = {"nil": 0}
        train_story, train_questions, train_qstory = parse_babi_task(
            train_data_path, dictionary, False)

        # Parse test data just to expand the dictionary so that it covers all words in the test data too
        test_data_path = glob.glob('%s/qa*_*_test.txt' % self.data_dir)
        parse_babi_task(test_data_path, dictionary, False)

        # Get reversed dictionary mapping index to word
        self.reversed_dict = dict((ix, w) for w, ix in dictionary.items())

        # Construct model
        #self.general_config = Babi10kConfigJoint(train_story, train_questions, dictionary)
        self.general_config = BabiConfigJoint(train_story, train_questions,
                                              dictionary)
        self.memory, self.model, self.loss = build_model(self.general_config)

        # Train model
        if self.general_config.linear_start:
            print('We will use LS training')
            self.best_model, self.best_memory = \
              train_linear_start(train_story,
                                 train_questions,
                                 train_qstory,
                                 self.memory,
                                 self.model,
                                 self.loss,
                                 self.general_config,
                                 self.log_path)
        else:
            train_logger = open(os.path.join(self.log_path, 'train.log'), 'w')
            train_logger.write('epoch batch_iter lr loss err\n')
            train_logger.flush()
            val_logger = open(os.path.join(self.log_path, 'val.log'), 'w')
            val_logger.write('epoch batch_iter lr loss err\n')
            val_logger.flush()
            global_batch_iter = 0
            train_logger, val_logger, self.best_model, self.best_memory, _ = \
              train(train_story,
                    train_questions,
                    train_qstory,
                    self.memory,
                    self.model,
                    self.loss,
                    self.general_config,
                    train_logger,
                    val_logger,
                    global_batch_iter)

            train_logger.close()
            val_logger.close()

        # Save model
        self.save_model()
Exemple #13
0
def run_task(data_dir, task_id):
    """
    Train and test for each task
    """
    print("Train and test for task %d ..." % task_id)

    # Parse data
    train_files = glob.glob('%s/qa%d_*_train.txt' % (data_dir, task_id))
    test_files  = glob.glob('%s/qa%d_*_test.txt' % (data_dir, task_id))
    #train_files = glob.glob('%s/qa%d_*train.txt' % (data_dir, task_id))
    #test_files  = glob.glob('%s/qa%d_*test.txt' % (data_dir, task_id))

    # #### empty dictionary
    dictionary = {"nil": 0}
    train_story, train_questions, train_qstory = parse_babi_task(train_files, dictionary, False)
    test_story, test_questions, test_qstory    = parse_babi_task(test_files, dictionary, False)
    

    general_config = BabiConfig(train_story, train_questions, dictionary)


    memory, model, loss = build_model(general_config)

    if general_config.linear_start:
        train_linear_start(train_story, train_questions, train_qstory, memory, model, loss, general_config)
    else:
        train(train_story, train_questions, train_qstory, memory, model, loss, general_config)
    
    with open('R_trained.txt', 'a') as outfile:
        json.dump(general_config.dictionary, outfile, indent=2)

    print("######## trained dictionary")
    print(general_config.dictionary)


    ans_index = test(test_story, test_questions, test_qstory, memory, model, loss, general_config)





    ####R this line load model
    memn2n = MemN2N(args.data_dir, args.model_file)
    #Try to load model
    memn2n.load_model()  

    dictionary2 = {"nil": 0}
    train_story2, train_questions2, train_qstory2 = parse_babi_task(train_files, memn2n.general_config.dictionary, False)
    test_story2, test_questions2, test_qstory2    = parse_babi_task(test_files, memn2n.general_config.dictionary, False)

    #print(len(test_questions2))
    #general_config2 = BabiConfig(train_story2, train_questions2,memn2n.general_config.dictionary)



    with open('R_loaded.txt', 'a') as outfile2:
        json.dump(memn2n.general_config.dictionary, outfile2, indent=2)

    print("???????? loaded dictionary")
    print(memn2n.general_config.dictionary)

    ans_index = test(test_story2, test_questions2, test_qstory2, memn2n.memory, memn2n.model, memn2n.loss, memn2n.general_config)