def __init__(
     self, stemming_language=None, remove_stopwords=False, remove_html_markup=False, remove_latex_markup=False
 ):
     self.verbose = 3
     BibIndexDefaultTokenizer.__init__(
         self, stemming_language, remove_stopwords, remove_html_markup, remove_latex_markup
     )
 def __init__(self,
              stemming_language=None,
              remove_stopwords=False,
              remove_html_markup=False,
              remove_latex_markup=False):
     BibIndexDefaultTokenizer.__init__(self, stemming_language,
                                       remove_stopwords, remove_html_markup,
                                       remove_latex_markup)
 def __init__(self, stemming_language = None, remove_stopwords = False, remove_html_markup = False, remove_latex_markup = False):
     BibIndexDefaultTokenizer.__init__(self, stemming_language,
                                             remove_stopwords,
                                             remove_html_markup,
                                             remove_latex_markup)
     self.single_initial_re = re.compile('^\w\.$')
     self.split_on_re = re.compile('[\.\s-]')
     # lastname_stopwords describes terms which should not be used for indexing,
     # in multiple-word last names.  These are purely conjunctions, serving the
     # same function as the American hyphen, but using linguistic constructs.
     self.lastname_stopwords = set(['y', 'of', 'and', 'de'])
예제 #4
0
 def __init__(self,
              stemming_language=None,
              remove_stopwords=False,
              remove_html_markup=False,
              remove_latex_markup=False):
     BibIndexDefaultTokenizer.__init__(self, stemming_language,
                                       remove_stopwords, remove_html_markup,
                                       remove_latex_markup)
     self.single_initial_re = re.compile('^\w\.$')
     self.split_on_re = re.compile('[\.\s-]')
     # lastname_stopwords describes terms which should not be used for indexing,
     # in multiple-word last names.  These are purely conjunctions, serving the
     # same function as the American hyphen, but using linguistic constructs.
     self.lastname_stopwords = set(['y', 'of', 'and', 'de'])
예제 #5
0
 def __init__(self, stemming_language = None, remove_stopwords = False, remove_html_markup = False, remove_latex_markup = False):
     """Initialisation"""
     BibIndexDefaultTokenizer.__init__(self, stemming_language,
                                             remove_stopwords,
                                             remove_html_markup,
                                             remove_latex_markup)