Exemple #1
0
 def __init__(self,
              debug=False,
              label=None,
              excluded_token_types=(Tokenizer.TYPES["WHITESPACE"],
                                    Tokenizer.TYPES["NEWLINE"]),
              case_sensitive=True,
              untagged_rule_name=None,
              no_rules_rule_name=None,
              excluded_rule_name=None,
              return_untagged_tags=False,
              return_no_rules_tags=False,
              return_excluded_tags=False,
              return_included_tags=False,
              text_name=None,
              model_path=None):
     super(TopicModelTagger,
           self).__init__(debug=debug,
                          label=label,
                          excluded_token_types=excluded_token_types,
                          case_sensitive=case_sensitive,
                          untagged_rule_name=untagged_rule_name,
                          no_rules_rule_name=no_rules_rule_name,
                          excluded_rule_name=excluded_rule_name,
                          return_untagged_tags=return_untagged_tags,
                          return_no_rules_tags=return_no_rules_tags,
                          return_excluded_tags=return_excluded_tags,
                          return_included_tags=return_included_tags)
     if text_name is None:
         raise ValueError("No text_name given.")
     self.text_name = text_name
     if model_path is None:
         raise ValueError("No path to topic model data given.")
     self.model_path = model_path
     # Instantiate a TopicModelDictionary to retrieve information about the topic model.
     self.model = TopicModelDictionary(debug=self.debug,
                                       model_path=model_path)
     self.untagged_rule_name = untagged_rule_name
     self.text_name = None
     # Ramp setup
     self.num_ramp_steps = None
     self.min_ramped_value = -1
     self.max_ramped_value = -1
     # Get topic "rules".
     self._rules = OrderedDict()
     # Append additional info to self._full_label.
     self._full_label += ".".join(
         [os.path.basename(self.model_path), self.text_name])