Python normalize_text Examples

Programming Language: Python

Namespace/Package Name: yatiri.text

Method/Function: normalize_text

Examples at hotexamples.com: 6

Python normalize_text - 6 examples found. These are the top rated real world Python examples of yatiri.text.normalize_text extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: search.py Project: rolando-archive/yatiri

def index(items, doc_type, create=False):
    indexer = IndexerContext(settings.XAPIAN_DB)
    if create:
        with indexer as conn:
            create_index(conn)

    preprocess_text = lambda t: normalize_text(t).lower()

    with indexer as conn:
        n = 0
        for n, (key, data) in enumerate(items, 1):
            doc = xappy.UnprocessedDocument(key)
            doc.append('type', doc_type)
            for field in TEXT_FIELDS:
                val = data.get(field, '')
                if val:
                    doc.append(field, preprocess_text(val))
            for field in EXACT_FIELDS:
                val = data.get(field, '')
                if field == 'date' and val:
                    val = val.partition(' ')[0]
                    if not val.count('-') == 2:
                        val = None
                if val:
                    doc.append(field, val)

            for field, kwargs in SORTABLE_FIELDS:
                val = data.get(field)
                if not val:
                    continue
                doc.append(field, val, **kwargs)

            for field in FACET_FIELDS:
                val = data.get(field)
                if not val:
                    continue
                doc.append(field, val)

            for field in COLLAPSE_FIELDS:
                val = data.get(field)
                if not val:
                    continue
                doc.append(field, val)

            conn.add(doc)
        return n

Example #2

Show file

def index(items, doc_type, create=False):
    indexer = IndexerContext(settings.XAPIAN_DB)
    if create:
        with indexer as conn:
            create_index(conn)

    preprocess_text = lambda t: normalize_text(t).lower()

    with indexer as conn:
        n = 0
        for n, (key, data) in enumerate(items, 1):
            doc = xappy.UnprocessedDocument(key)
            doc.append('type', doc_type)
            for field in TEXT_FIELDS:
                val = data.get(field, '')
                if val:
                    doc.append(field, preprocess_text(val))
            for field in EXACT_FIELDS:
                val = data.get(field, '')
                if field == 'date' and val:
                    val = val.partition(' ')[0]
                    if not val.count('-') == 2:
                        val = None
                if val:
                    doc.append(field, val)

            for field, kwargs in SORTABLE_FIELDS:
                val = data.get(field)
                if not val:
                    continue
                doc.append(field, val, **kwargs)

            for field in FACET_FIELDS:
                val = data.get(field)
                if not val:
                    continue
                doc.append(field, val)

            for field in COLLAPSE_FIELDS:
                val = data.get(field)
                if not val:
                    continue
                doc.append(field, val)

            conn.add(doc)
        return n

Example #3

Show file

File: features.py Project: rmax/yatiri

def preprocessor(doc, fields):
    return normalize_text('\n'.join(doc.get(f, '') for f in fields))

Example #4

Show file

File: features.py Project: rmax/yatiri

def only_camelcase(doc):
    return ' '.join(
        RE_CAMEL.findall(normalize_text(doc['headline'] + ' ' + doc['body'])))

Example #5

Show file

File: features.py Project: rolando-archive/yatiri

def preprocessor(doc, fields):
    return normalize_text('\n'.join(
        doc.get(f, '') for f in fields
    ))

Example #6

Show file

File: features.py Project: rolando-archive/yatiri

def only_camelcase(doc):
    return ' '.join(RE_CAMEL.findall(normalize_text(
        doc['headline'] + ' ' + doc['body'])))