Example #1
0
def get_schema():
    schema = wfields.Schema(id=wfields.ID(stored=True, unique=True),
                            title=wfields.TEXT(stored=True),
                            title_row=wfields.STORED(),
                            content=wfields.TEXT(stored=True),
                            content_row=wfields.STORED())
    return schema
Example #2
0
def _create_data_index():
    schema = whoosh_fields.Schema(
        url=whoosh_fields.ID(stored=True, unique=True),
        type=whoosh_fields.STORED(),
        title=whoosh_fields.STORED(),
        description=whoosh_fields.STORED(),
        org=whoosh_fields.STORED(),
        subtype=whoosh_fields.STORED(),
        content=whoosh_fields.TEXT(),
    )
    _ensure_dir(DATA_DIR)
    assert not whoosh_index.exists_in(DATA_DIR), DATA_DIR
    return whoosh_index.create_in(DATA_DIR, schema)
Example #3
0
 def project_schema(self):
     return fields.Schema(
         path=fields.ID(stored=True, unique=True),
         name=fields.ID(stored=True),
         user=fields.ID(stored=True),
         index=fields.ID(stored=True),
         classifiers=fields.KEYWORD(commas=True, scorable=True),
         keywords=fields.KEYWORD(stored=True, commas=False, scorable=True),
         version=fields.STORED(),
         doc_version=fields.STORED(),
         type=fields.ID(stored=True),
         text_path=fields.STORED(),
         text_title=fields.STORED(),
         text=fields.TEXT(analyzer=NgramWordAnalyzer(), stored=False, phrase=False))
Example #4
0
    def create_whoosh_schema(self):
        """
        Dynamically create whoosh.fields.SchemaClass schema object.

        It defines how you index your dataset.

        :rtype: SchemaClass
        """
        schema_classname = "WhooshSchema"
        schema_classname = str(schema_classname)
        attrs = OrderedDict()
        for c_setting in self.columns:
            if c_setting.type_is_ngram:
                field = fields.NGRAM(
                    minsize=c_setting.ngram_minsize,
                    maxsize=c_setting.ngram_maxsize,
                    stored=True,
                )
            elif c_setting.type_is_phrase:
                field = fields.TEXT(stored=True)
            elif c_setting.type_is_keyword:
                field = fields.KEYWORD(
                    lowercase=c_setting.keyword_lowercase,
                    commas=c_setting.keyword_commas,
                    stored=True,
                )
            else:
                field = fields.STORED()
            attrs[c_setting.name] = field
        SchemaClass = type(schema_classname, (fields.SchemaClass,), attrs)
        schema = SchemaClass() # type: SchemaClass
        return schema
Example #5
0
 def create_whoosh_schema(self):
     schema_classname = "WhooshSchema"
     schema_classname = str(schema_classname)
     attrs = OrderedDict()
     for c in self.columns:
         if c in self.ngram_columns:
             field = fields.NGRAM(
                 minsize=self.ngram_minsize,
                 maxsize=self.ngram_maxsize,
                 stored=True,
             )
         elif c in self.phrase_columns:
             field = fields.TEXT(stored=True)
         elif c in self.keyword_columns:
             field = fields.KEYWORD(
                 lowercase=self.keyword_lowercase,
                 commas=self.keyword_commas,
                 stored=True,
             )
         else:
             field = fields.STORED()
         attrs[c] = field
     SchemaClass = type(schema_classname, (fields.SchemaClass,), attrs)
     schema = SchemaClass()
     return schema