Example #1
0
def index(items, doc_type, create=False):
    indexer = IndexerContext(settings.XAPIAN_DB)
    if create:
        with indexer as conn:
            create_index(conn)

    preprocess_text = lambda t: normalize_text(t).lower()

    with indexer as conn:
        n = 0
        for n, (key, data) in enumerate(items, 1):
            doc = xappy.UnprocessedDocument(key)
            doc.append('type', doc_type)
            for field in TEXT_FIELDS:
                val = data.get(field, '')
                if val:
                    doc.append(field, preprocess_text(val))
            for field in EXACT_FIELDS:
                val = data.get(field, '')
                if field == 'date' and val:
                    val = val.partition(' ')[0]
                    if not val.count('-') == 2:
                        val = None
                if val:
                    doc.append(field, val)

            for field, kwargs in SORTABLE_FIELDS:
                val = data.get(field)
                if not val:
                    continue
                doc.append(field, val, **kwargs)

            for field in FACET_FIELDS:
                val = data.get(field)
                if not val:
                    continue
                doc.append(field, val)

            for field in COLLAPSE_FIELDS:
                val = data.get(field)
                if not val:
                    continue
                doc.append(field, val)

            conn.add(doc)
        return n
Example #2
0
def index(items, doc_type, create=False):
    indexer = IndexerContext(settings.XAPIAN_DB)
    if create:
        with indexer as conn:
            create_index(conn)

    preprocess_text = lambda t: normalize_text(t).lower()

    with indexer as conn:
        n = 0
        for n, (key, data) in enumerate(items, 1):
            doc = xappy.UnprocessedDocument(key)
            doc.append('type', doc_type)
            for field in TEXT_FIELDS:
                val = data.get(field, '')
                if val:
                    doc.append(field, preprocess_text(val))
            for field in EXACT_FIELDS:
                val = data.get(field, '')
                if field == 'date' and val:
                    val = val.partition(' ')[0]
                    if not val.count('-') == 2:
                        val = None
                if val:
                    doc.append(field, val)

            for field, kwargs in SORTABLE_FIELDS:
                val = data.get(field)
                if not val:
                    continue
                doc.append(field, val, **kwargs)

            for field in FACET_FIELDS:
                val = data.get(field)
                if not val:
                    continue
                doc.append(field, val)

            for field in COLLAPSE_FIELDS:
                val = data.get(field)
                if not val:
                    continue
                doc.append(field, val)

            conn.add(doc)
        return n
Example #3
0
def preprocessor(doc, fields):
    return normalize_text('\n'.join(doc.get(f, '') for f in fields))
Example #4
0
def only_camelcase(doc):
    return ' '.join(
        RE_CAMEL.findall(normalize_text(doc['headline'] + ' ' + doc['body'])))
Example #5
0
def preprocessor(doc, fields):
    return normalize_text('\n'.join(
        doc.get(f, '') for f in fields
    ))
Example #6
0
def only_camelcase(doc):
    return ' '.join(RE_CAMEL.findall(normalize_text(
        doc['headline'] + ' ' + doc['body'])))