Python HTMLWordSplitter 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: Products.ZCTextIndex.HTMLSplitter

클래스/타입: HTMLWordSplitter

hotexamples.com에서의 예제들: 9

Python HTMLWordSplitter - 9개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 Products.ZCTextIndex.HTMLSplitter.HTMLWordSplitter에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

HTMLWordSplitter(6)

_split(1)

예제 #1

파일 보기

 def updateIndexes(self):
     if not getattr(self, 'audit_lexicon', None):
         # installing, add lexicon, indexes and metadata
         self.addIndex('last_audited_date', 'DateIndex')
         self.addIndex('audited_action', 'KeywordIndex')
         self.addColumn('Title')
         self.addColumn('id')
         self.addColumn('UID')
         self.addColumn('last_audited_date')
         self.addColumn('audited_action')
         l = PLexicon('audit_lexicon', '', HTMLWordSplitter(),
                      CaseNormalizer(), StopWordRemover())
         self._setObject('audit_lexicon', l)
     catalog = portal_api.get_tool('portal_catalog')
     indexes = catalog._catalog.indexes
     for name, index in indexes.items():
         if name in self._catalog.indexes.keys():
             continue
         if index.meta_type == 'DateRecurringIndex':
             continue
         elif index.meta_type == 'ZCTextIndex':
             extras = Empty()
             extras.doc_attr = name
             extras.index_type = 'Okapi BM25 Rank'
             extras.lexicon_id = 'audit_lexicon'
             self.addIndex(name, index.meta_type, extras)
         else:
             self.addIndex(name, index.meta_type)

예제 #2

파일 보기

class MySplitter:
    def __init__(self):
        self._v_splitter = HTMLWordSplitter()
    def __call__(self, text, stopdict, *args, **kwargs):
        words = self._v_splitter._split(text)
        def lookup(w):
            return stopdict.get(w, w)
        return filter(None, map(lookup, words))

예제 #3

파일 보기

파일: indexhtml.py 프로젝트: bendavis78/zope

class MySplitter:
    def __init__(self):
        self._v_splitter = HTMLWordSplitter()
    def __call__(self, text, stopdict, *args, **kwargs):
        words = self._v_splitter._split(text)
        def lookup(w):
            return stopdict.get(w, w)
        return filter(None, map(lookup, words))

예제 #4

파일 보기

파일: indexhtml.py 프로젝트: bendavis78/zope

def make_zc_index():
    # there's an elaborate dance necessary to construct an index
    class Struct:
        pass
    extra = Struct()
    extra.doc_attr = "read"
    extra.lexicon_id = "lexicon"
    caller = Struct()
    caller.lexicon = Lexicon(HTMLWordSplitter(), StopWordRemover())
    return ZCTextIndex("read", extra, caller)

예제 #5

파일 보기

파일: CatalogTool.py 프로젝트: bendavis78/zope

 def enumerateLexicons(self):
     return (
              ( 'plaintext_lexicon'
              , Splitter()
              , CaseNormalizer()
              , StopWordRemover()
              )
            , ( 'htmltext_lexicon'
              , HTMLWordSplitter()
              , CaseNormalizer()
              , StopWordRemover()
              )
            )

예제 #6

파일 보기

 def testSplitterLocaleAwareness(self):
     from Products.ZCTextIndex.HTMLSplitter import HTMLWordSplitter
     import locale
     loc = locale.setlocale(locale.LC_ALL)  # get current locale
     # set German locale
     try:
         if sys.platform != 'win32':
             locale.setlocale(locale.LC_ALL, 'de_DE.ISO8859-1')
         else:
             locale.setlocale(locale.LC_ALL, 'German_Germany.1252')
     except locale.Error:
         return  # This test doesn't work here :-(
     expected = [
         'm\xfclltonne', 'waschb\xe4r', 'beh\xf6rde', '\xfcberflieger'
     ]
     words = [" ".join(expected)]
     words = Splitter().process(words)
     self.assertEqual(words, expected)
     words = HTMLWordSplitter().process(words)
     self.assertEqual(words, expected)
     locale.setlocale(locale.LC_ALL, loc)  # restore saved locale

예제 #7

파일 보기

    def __init__(self, id='Help', title=''):
        self.id = id
        self.title = title
        c = self.catalog = ZCatalog('catalog')

        l = PLexicon('lexicon', '', HTMLWordSplitter(), CaseNormalizer(),
                     StopWordRemover())
        c._setObject('lexicon', l)
        i = ZCTextIndex('SearchableText',
                        caller=c,
                        index_factory=OkapiIndex,
                        lexicon_id=l.id)
        # not using c.addIndex because it depends on Product initialization
        c._catalog.addIndex('SearchableText', i)
        c._catalog.addIndex('categories', KeywordIndex('categories'))
        c._catalog.addIndex('permissions', KeywordIndex('permissions'))
        c.addColumn('categories')
        c.addColumn('permissions')
        c.addColumn('title_or_id')
        c.addColumn('url')
        c.addColumn('id')

예제 #8

파일 보기

 def __init__(self):
     self._v_splitter = HTMLWordSplitter()

예제 #9

파일 보기

 def __init__(self):
     self._v_splitter = HTMLWordSplitter()