Exemple #1
0
 def prepare_text_vocab(self, all_texts):
     """Preparing text vocab"""
     if os.path.exists(self.text_vocab_file_path) and \
       self.use_custom_vocab:
         logging.info("Reuse text vocab file: {}".format(
             self.text_vocab_file_path))
     else:
         prepare_vocab(self.text_vocab_file_path,
                       all_texts,
                       min_frequency=self.vocab_min_frequency)
         logging.info("Generate text vocab file: {}".format(
             self.text_vocab_file_path))
Exemple #2
0
 def prepare_label_vocab(self, all_labels):
     """Prepare label vocab"""
     for i in range(self.output_num):
         if os.path.exists(self.label_vocab_file_paths[i]) and \
           self.use_custom_vocab:
             logging.info("Reuse label vocab file: {}".format(
                 self.label_vocab_file_paths[i]))
         else:
             prepare_vocab(self.label_vocab_file_paths[i],
                           all_labels[i],
                           min_frequency=self.vocab_min_frequency,
                           use_default_dict=True)
             logging.info("Generate label vocab file: {}".format(
                 self.label_vocab_file_paths[i]))
Exemple #3
0
    def _prepare_vocabs(self, all_texts, all_labels):
        """Preparing vocab for x."""
        logging.info("Preparing vocab for x ...")
        prepare_vocab(self.text_vocab_file_path,
                      all_texts,
                      min_frequency=self.vocab_min_frequency)

        logging.info("Preparing vocab for y ...")
        if "vocab" in self.config["data"]["task"]["classes"]:
            prepare_vocab_from_config(self.label_vocab_file_path, self.config)
        else:
            prepare_vocab(self.label_vocab_file_path,
                          all_labels,
                          min_frequency=1,
                          use_default_dict=False)
Exemple #4
0
 def prepare_label_vocab(self, all_labels):
     """Prepare label vocab"""
     for i in range(self.output_num):
         if os.path.exists(self.label_vocab_file_paths[i]) and \
           self.use_custom_vocab:
             logging.info("Reuse label vocab file: {}".format(
                 self.label_vocab_file_paths[i]))
         else:
             if "vocab" in self.config["data"]["task"]["classes"]:
                 output_index = i if self.multi_output else None
                 prepare_vocab_from_config(self.label_vocab_file_paths[i],
                                           self.config,
                                           output_index=output_index)
             else:
                 prepare_vocab(self.label_vocab_file_paths[i],
                               all_labels[i],
                               min_frequency=1,
                               use_default_dict=False)
             logging.info("Generate label vocab file: {}".format(
                 self.label_vocab_file_paths[i]))