Example #1
0
 def __init__(self, session, config, parent):
     SimpleNormalizer.__init__(self, session, config, parent)
     self.stem = self.get_setting(session, 'useStem', 0)
     self.pos = self.get_setting(session, 'pos', 0)
     self.onlyPos = self.get_setting(session, 'justPos', 0)
     self.puncRe = re.compile('[ ]([.,;:?!][ \n])')
     self.xml = self.get_setting(session, 'xml', 0)
Example #2
0
 def __init__(self, session, config, parent):
     SimpleNormalizer.__init__(self, session, config, parent)
     self.stem = self.get_setting(session, 'useStem', 0)
     self.pos = self.get_setting(session, 'pos', 0)
     self.onlyPos = self.get_setting(session, 'justPos', 0)
     self.puncRe = re.compile('[ ]([.,;:?!][ \n])')
     self.xml = self.get_setting(session, 'xml', 0)
Example #3
0
 def __init__(self, session, config, parent):
     SimpleNormalizer.__init__(self, session, config, parent)
     # Load types from config
     types = self.get_setting(session, 'posTypes')
     if types:
         self.types = types.split()
     else:
         # Default to nouns
         self.types = ['NN', 'NNP', 'NNS']
     # Should we keep the /POS tag or strip it
     self.keepPos = self.get_setting(session, 'pos', 0)
Example #4
0
 def __init__(self, session, config, parent):
     SimpleNormalizer.__init__(self, session, config, parent)
     # Load types from config
     types = self.get_setting(session, 'posTypes')
     if types:
         self.types = types.split()
     else:
         # Default to nouns
         self.types = ['NN', 'NNP', 'NNS']
     # Should we keep the /POS tag or strip it
     self.keepPos = self.get_setting(session, 'pos', 0)
Example #5
0
     def __init__(self, session, config, parent):
         SimpleNormalizer.__init__(self, session, config, parent)
         fltr = self.get_setting(session, 'filter')
         if fltr[-6:] == 'Filter' and hasattr(lucene, fltr):            
             self.filter = getattr(lucene, fltr)
         else:
             raise ConfigFileException("Unknown Filter")
 
         # eg SnowballFilter(strm, 'English')
         # For more complex filter constructors, just subclass
         # FilterNormalizer as required
 
         arg1 = self.get_setting(session, 'argument', '')
         if arg1:
             self.argument = arg1
         else:
             self.argument = None
Example #6
0
    def __init__(self, session, config, parent):
        SimpleNormalizer.__init__(self, session, config, parent)
        fltr = self.get_setting(session, 'filter')
        if fltr[-6:] == 'Filter' and hasattr(lucene, fltr):
            self.filter = getattr(lucene, fltr)
        else:
            raise ConfigFileException("Unknown Filter")

        # eg SnowballFilter(strm, 'English')
        # For more complex filter constructors, just subclass
        # FilterNormalizer as required

        arg1 = self.get_setting(session, 'argument', '')
        if arg1:
            self.argument = arg1
        else:
            self.argument = None
Example #7
0
 def __init__(self, session, config, parent):
     SimpleNormalizer.__init__(self, session, config, parent)
     match = self.get_setting(session, 'regexp', '')
     if not match:
         match = self.get_setting(session, 'pattern')
         if not match:
             match = "((?:[ ][^\\s]+/JJ[SR]?)*)((?:[ ][^\\s]+/NN[SP]?)+)"
         else:
             match = match.replace('*', '*)')
             match = match.replace('+', '+)')
             match = match.replace('?', '?)')        
             match = match.replace('JJ', '((?:[ ][^\\s]+/JJ[SR]?)')
             match = match.replace('NN', '((?:[ ][^\\s]+/NN[SP]*)')
     self.pattern = re.compile(match)
     self.strip = re.compile('/(JJ[SR]?|NN[SP]*)|/(jj[sr]?|nn[sp]*)')
     self.minimum = self.get_setting(session, 'minimumWords', 0)
     self.subPhrases = self.get_setting(session, 'subPhrases', 0)
Example #8
0
 def __init__(self, session, config, parent):
     SimpleNormalizer.__init__(self, session, config, parent)
     match = self.get_setting(session, 'regexp', '')
     if not match:
         match = self.get_setting(session, 'pattern')
         if not match:
             match = "((?:[ ][^\\s]+/JJ[SR]?)*)((?:[ ][^\\s]+/NN[SP]?)+)"
         else:
             match = match.replace('*', '*)')
             match = match.replace('+', '+)')
             match = match.replace('?', '?)')
             match = match.replace('JJ', '((?:[ ][^\\s]+/JJ[SR]?)')
             match = match.replace('NN', '((?:[ ][^\\s]+/NN[SP]*)')
     self.pattern = re.compile(match)
     self.strip = re.compile('/(JJ[SR]?|NN[SP]*)|/(jj[sr]?|nn[sp]*)')
     self.minimum = self.get_setting(session, 'minimumWords', 0)
     self.subPhrases = self.get_setting(session, 'subPhrases', 0)
Example #9
0
 def __init__(self, session, config, parent):
     SimpleNormalizer.__init__(self, session, config, parent)
     raise MissingDependencyException(self.objectType, "lucene")
Example #10
0
 def __init__(self, session, config, parent):
     SimpleNormalizer.__init__(self, session, config, parent)
     self.filter = lucene.StopFilter
Example #11
0
 def __init__(self, session, config, parent):
     SimpleNormalizer.__init__(self, session, config, parent)
     self.filter = lucene.StopFilter
Example #12
0
 def __init__(self, session, config, parent):
     SimpleNormalizer.__init__(self, session, config, parent)
     self.novels = ['BH', 'BR', 'DC',
                     'DS', 'ED', 'GE', 'HT', 'LD', 'MC', 'NN',
                     'OCS', 'OMF', 'OT', 'PP', 'TTC']
Example #13
0
 def __init__(self, session, config, parent):
     SimpleNormalizer.__init__(self, session, config, parent)
     self.dickens = ['AN', 'BH', 'BL', 'BR', 'CC', 'CHI', 'CH', 'DC',
                     'DS', 'ED', 'GE', 'HM', 'HT', 'LD', 'MC', 'NN',
                     'OCS', 'OMF', 'OT', 'PP', 'SB', 'TTC', 'UT']
Example #14
0
 def __init__(self, session, config, parent):
     SimpleNormalizer.__init__(self, session, config, parent)
     self.dickens = ['AN', 'BH', 'BL', 'BR', 'CC', 'CHI', 'CH', 'DC',
                     'DS', 'ED', 'GE', 'HM', 'HT', 'LD', 'MC', 'NN',
                     'OCS', 'OMF', 'OT', 'PP', 'SB', 'TTC', 'UT']
Example #15
0
 def __init__(self, session, config, parent):
     SimpleNormalizer.__init__(self, session, config, parent)
     self.novels = ['BH', 'BR', 'DC',
                     'DS', 'ED', 'GE', 'HT', 'LD', 'MC', 'NN',
                     'OCS', 'OMF', 'OT', 'PP', 'TTC']
Example #16
0
 def __init__(self, session, config, parent):
     SimpleNormalizer.__init__(self, session, config, parent)
     raise MissingDependencyException(self.objectType, "lucene")