Python get_regexp_from_termsの例

プログラミング言語: Python

名前空間/パッケージ名: digipal.utils

メソッド/関数: get_regexp_from_terms

hotexamples.comのコード掲載数: 4

Python get_regexp_from_terms - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのdigipal.utils.get_regexp_from_termsの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: search_manuscripts.py プロジェクト: MCadeStewart/digipal

    def _get_best_description_location(self, descriptions):
        '''
            Returns the first description that matches one term in the query
            and the location of the first match in the string.

            TODO: prefer a description and location which contains multiple terms
                next to each other.
        '''
        desc = None
        location = 0
        #terms = [t.lower() for t in self._get_query_terms() if t not in [u'or', u'and', u'not']]
        from digipal.utils import get_regexp_from_terms
        #terms = get_tokens_from_phrase(descriptions)
        terms = self._get_query_terms(True)
        re_terms = get_regexp_from_terms(terms)

        if re_terms:
            # search the descriptions
            for adesc in descriptions:
                m = re.search(ur'(?ui)' + re_terms, adesc.get_description_plain_text())
                if m:
                    location = m.start()
                    desc = adesc
                    break
        return desc, location

コード例 #2

ファイルを表示

ファイル: search_manuscripts.py プロジェクト: suzypiat/digipal

    def _get_best_description_location(self, descriptions):
        '''
            Returns the first description that matches one term in the query
            and the location of the first match in the string.

            TODO: prefer a description and location which contains multiple terms
                next to each other.
        '''
        desc = None
        location = 0
        #terms = [t.lower() for t in self._get_query_terms() if t not in [u'or', u'and', u'not']]
        from digipal.utils import get_regexp_from_terms
        #terms = get_tokens_from_phrase(descriptions)
        terms = self._get_query_terms(True)
        re_terms = get_regexp_from_terms(terms)

        if re_terms:
            # search the descriptions
            for adesc in descriptions:
                m = re.search(ur'(?ui)' + re_terms, adesc.get_description_plain_text())
                if m:
                    location = m.start()
                    desc = adesc
                    break
        return desc, location

コード例 #3

ファイルを表示

ファイル: html_escape.py プロジェクト: suzypiat/digipal

def tag_terms(value, terms=None):
    '''Wrap all occurrences of the terms found in value with a <span class="found-term">.
        Highlight terms in html.
        Terms is an array of words.
    '''
    from digipal.utils import get_regexp_from_terms, get_tokens_from_phrase, remove_combining_marks, remove_accents

    # not nice but we have to do this for the matching below to work
    # we loose *some* the accents, e.g. u'r\u0305'
    value = remove_combining_marks(value)
    value_no_accent = remove_accents(value)

    if terms:
        # Surround the occurrences of those terms in the value with a span (class="found-term")
        # TODO: this should really be done by Whoosh instead of manually here to deal properly with special cases.
        # e.g. 'G*' should highlight g and the rest of the word
        # Here we have a simple implementation that look for exact and complete matches only.
        # TODO: other issue is highlight of non field values, e.g. (G.) added at the end each description
        #         or headings.
        for re_term in get_regexp_from_terms(terms, True):
            # value = re.sub(ur'(?iu)(>[^<]*)('+re_term+ur')', ur'\1<span
            # class="found-term">\2</span>', u'>'+value)[1:]
            pos = 1
            pattern = re.compile(ur'(?iu)(>[^<]*?)(' + re_term + ur')')
            # print re_term
            while True:
                # print value_no_accent, pos
                # pos-1 because we want to include the last > we've inserted in the previous loop.
                # without this we might miss occurrences
                m = pattern.search(value_no_accent, pos - 1)
                # print m
                if m:
                    replacement = u'%s<span class="found-term">%s</span>' % (
                        value[m.start(1):m.end(1)], value[m.start(2):m.end(2)])

                    value = value[:m.start(0)] + replacement + value[m.end(0):]

                    replacement = u'%s<span class="found-term">%s</span>' % (
                        value_no_accent[m.start(1):m.end(1)],
                        value_no_accent[m.start(2):m.end(2)])
                    value_no_accent = value_no_accent[:m.start(
                        0)] + replacement + value_no_accent[m.end(0):]

                    pos = m.start(0) + len(replacement)
                else:
                    break

    return value

コード例 #4

ファイルを表示

ファイル: html_escape.py プロジェクト: kcl-ddh/digipal

def tag_terms(value, terms=None):
    '''Wrap all occurrences of the terms found in value with a <span class="found-term">.
        Highlight terms in html.
        Terms is an array of words.
    '''
    from digipal.utils import get_regexp_from_terms, get_tokens_from_phrase, remove_combining_marks, remove_accents

    # not nice but we have to do this for the matching below to work
    # we loose *some* the accents, e.g. u'r\u0305'
    value = remove_combining_marks(value)
    value_no_accent = remove_accents(value)

    if terms:
        # Surround the occurrences of those terms in the value with a span (class="found-term")
        # TODO: this should really be done by Whoosh instead of manually here to deal properly with special cases.
        # e.g. 'G*' should highlight g and the rest of the word
        # Here we have a simple implementation that look for exact and complete matches only.
        # TODO: other issue is highlight of non field values, e.g. (G.) added at the end each description
        #         or headings.
        for re_term in get_regexp_from_terms(terms, True):
            #value = re.sub(ur'(?iu)(>[^<]*)('+re_term+ur')', ur'\1<span class="found-term">\2</span>', u'>'+value)[1:]
            pos = 1
            pattern = re.compile(ur'(?iu)(>[^<]*?)(' + re_term + ur')')
            # print re_term
            while True:
                # print value_no_accent, pos
                # pos-1 because we want to include the last > we've inserted in the previous loop.
                # without this we might miss occurrences
                m = pattern.search(value_no_accent, pos - 1)
                # print m
                if m:
                    replacement = u'%s<span class="found-term">%s</span>' % (
                        value[m.start(1):m.end(1)], value[m.start(2):m.end(2)])

                    value = value[:m.start(0)] + replacement + value[m.end(0):]

                    replacement = u'%s<span class="found-term">%s</span>' % (
                        value_no_accent[m.start(1):m.end(1)], value_no_accent[m.start(2):m.end(2)])
                    value_no_accent = value_no_accent[:m.start(
                        0)] + replacement + value_no_accent[m.end(0):]

                    pos = m.start(0) + len(replacement)
                else:
                    break

    return value