class _DefaultSearchSchema(SchemaClass): """ General search schema """ object_key = ID(stored=True, unique=True) id = NUMERIC(numtype=int, bits=64, signed=False, stored=True, unique=False) object_type = ID(stored=True, unique=False) creator = ID(stored=True) owner = ID(stored=True) #: security index. This list roles and user/group ids allowed to *see* this #: content allowed_roles_and_users = KEYWORD(stored=True) #: tags indexing tag_ids = KEYWORD(stored=True) tag_text = TEXT(stored=False, analyzer=accent_folder) # hierarchical index of ids path ('/' is the separator) parent_ids = FieldType(format=Existence(), analyzer=PathTokenizer(), stored=True, unique=False) name = TEXT(stored=True, analyzer=accent_folder) slug = ID(stored=True) description = TEXT(stored=True, analyzer=accent_folder) text = TEXT(stored=False, analyzer=accent_folder)
def _schema(self): # Creates a schema given this object's mingram and maxgram attributes. from whoosh.fields import Schema, FieldType, ID, STORED from whoosh.formats import Frequency from whoosh.analysis import SimpleAnalyzer idtype = ID() freqtype = FieldType(Frequency(), SimpleAnalyzer()) fls = [("word", STORED), ("score", STORED)] for size in xrange(self.mingram, self.maxgram + 1): fls.extend([("start%s" % size, idtype), ("end%s" % size, idtype), ("gram%s" % size, freqtype)]) return Schema(**dict(fls))
FieldType, DATETIME) # CUSTOM ANALYZER wordsplit + lowercase filter for case insensitive search ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter() # FILE INDEX SCHEMA DEFINITION FILE_INDEX_NAME = 'FILE_INDEX' FILE_SCHEMA = Schema( fileid=ID(unique=True), # Path repository=ID(stored=True), repository_id=NUMERIC(unique=True, stored=True), # Numeric id of repo repo_name=TEXT(stored=True), owner=TEXT(), path=TEXT(stored=True), content=FieldType(format=Characters(), analyzer=ANALYZER, scorable=True, stored=True), modtime=STORED(), md5=STORED(), extension=ID(stored=True), commit_id=TEXT(stored=True), size=NUMERIC(stored=True), mimetype=TEXT(stored=True), lines=NUMERIC(stored=True), ) # COMMIT INDEX SCHEMA COMMIT_INDEX_NAME = 'COMMIT_INDEX' COMMIT_SCHEMA = Schema( commit_id=ID(unique=True, stored=True), repository=ID(unique=True, stored=True),