def index(items, doc_type, create=False): indexer = IndexerContext(settings.XAPIAN_DB) if create: with indexer as conn: create_index(conn) preprocess_text = lambda t: normalize_text(t).lower() with indexer as conn: n = 0 for n, (key, data) in enumerate(items, 1): doc = xappy.UnprocessedDocument(key) doc.append('type', doc_type) for field in TEXT_FIELDS: val = data.get(field, '') if val: doc.append(field, preprocess_text(val)) for field in EXACT_FIELDS: val = data.get(field, '') if field == 'date' and val: val = val.partition(' ')[0] if not val.count('-') == 2: val = None if val: doc.append(field, val) for field, kwargs in SORTABLE_FIELDS: val = data.get(field) if not val: continue doc.append(field, val, **kwargs) for field in FACET_FIELDS: val = data.get(field) if not val: continue doc.append(field, val) for field in COLLAPSE_FIELDS: val = data.get(field) if not val: continue doc.append(field, val) conn.add(doc) return n
def preprocessor(doc, fields): return normalize_text('\n'.join(doc.get(f, '') for f in fields))
def only_camelcase(doc): return ' '.join( RE_CAMEL.findall(normalize_text(doc['headline'] + ' ' + doc['body'])))
def preprocessor(doc, fields): return normalize_text('\n'.join( doc.get(f, '') for f in fields ))
def only_camelcase(doc): return ' '.join(RE_CAMEL.findall(normalize_text( doc['headline'] + ' ' + doc['body'])))