def get_schema(): schema = wfields.Schema(id=wfields.ID(stored=True, unique=True), title=wfields.TEXT(stored=True), title_row=wfields.STORED(), content=wfields.TEXT(stored=True), content_row=wfields.STORED()) return schema
def _create_data_index(): schema = whoosh_fields.Schema( url=whoosh_fields.ID(stored=True, unique=True), type=whoosh_fields.STORED(), title=whoosh_fields.STORED(), description=whoosh_fields.STORED(), org=whoosh_fields.STORED(), subtype=whoosh_fields.STORED(), content=whoosh_fields.TEXT(), ) _ensure_dir(DATA_DIR) assert not whoosh_index.exists_in(DATA_DIR), DATA_DIR return whoosh_index.create_in(DATA_DIR, schema)
def project_schema(self): return fields.Schema( path=fields.ID(stored=True, unique=True), name=fields.ID(stored=True), user=fields.ID(stored=True), index=fields.ID(stored=True), classifiers=fields.KEYWORD(commas=True, scorable=True), keywords=fields.KEYWORD(stored=True, commas=False, scorable=True), version=fields.STORED(), doc_version=fields.STORED(), type=fields.ID(stored=True), text_path=fields.STORED(), text_title=fields.STORED(), text=fields.TEXT(analyzer=NgramWordAnalyzer(), stored=False, phrase=False))
def create_whoosh_schema(self): """ Dynamically create whoosh.fields.SchemaClass schema object. It defines how you index your dataset. :rtype: SchemaClass """ schema_classname = "WhooshSchema" schema_classname = str(schema_classname) attrs = OrderedDict() for c_setting in self.columns: if c_setting.type_is_ngram: field = fields.NGRAM( minsize=c_setting.ngram_minsize, maxsize=c_setting.ngram_maxsize, stored=True, ) elif c_setting.type_is_phrase: field = fields.TEXT(stored=True) elif c_setting.type_is_keyword: field = fields.KEYWORD( lowercase=c_setting.keyword_lowercase, commas=c_setting.keyword_commas, stored=True, ) else: field = fields.STORED() attrs[c_setting.name] = field SchemaClass = type(schema_classname, (fields.SchemaClass,), attrs) schema = SchemaClass() # type: SchemaClass return schema
def create_whoosh_schema(self): schema_classname = "WhooshSchema" schema_classname = str(schema_classname) attrs = OrderedDict() for c in self.columns: if c in self.ngram_columns: field = fields.NGRAM( minsize=self.ngram_minsize, maxsize=self.ngram_maxsize, stored=True, ) elif c in self.phrase_columns: field = fields.TEXT(stored=True) elif c in self.keyword_columns: field = fields.KEYWORD( lowercase=self.keyword_lowercase, commas=self.keyword_commas, stored=True, ) else: field = fields.STORED() attrs[c] = field SchemaClass = type(schema_classname, (fields.SchemaClass,), attrs) schema = SchemaClass() return schema