Beispiel #1
0
class WorkspaceSchema(fields.SchemaClass):

    id = fields.ID(stored=True, unique=True)
    owner = fields.TEXT(stored=True, spelling=True)
    name = fields.TEXT(stored=True, spelling=True)
    description = fields.NGRAM(stored=True, minsize=1, phrase=True)
    lastmodified = fields.DATETIME(stored=True)
    longdescription = fields.NGRAM(stored=True, minsize=1, phrase=True)
    public = fields.BOOLEAN(stored=True)
    users = fields.KEYWORD(commas=True)
    groups = fields.KEYWORD(commas=True)
    shared = fields.BOOLEAN(stored=True)
Beispiel #2
0
    def create_whoosh_schema(self):
        """
        Dynamically create whoosh.fields.SchemaClass schema object.

        It defines how you index your dataset.

        :rtype: SchemaClass
        """
        schema_classname = "WhooshSchema"
        schema_classname = str(schema_classname)
        attrs = OrderedDict()
        for c_setting in self.columns:
            if c_setting.type_is_ngram:
                field = fields.NGRAM(
                    minsize=c_setting.ngram_minsize,
                    maxsize=c_setting.ngram_maxsize,
                    stored=True,
                )
            elif c_setting.type_is_phrase:
                field = fields.TEXT(stored=True)
            elif c_setting.type_is_keyword:
                field = fields.KEYWORD(
                    lowercase=c_setting.keyword_lowercase,
                    commas=c_setting.keyword_commas,
                    stored=True,
                )
            else:
                field = fields.STORED()
            attrs[c_setting.name] = field
        SchemaClass = type(schema_classname, (fields.SchemaClass,), attrs)
        schema = SchemaClass() # type: SchemaClass
        return schema
Beispiel #3
0
def test_suggest_prefix():
    domain = ("Shoot To Kill", "Bloom, Split and Deviate",
              "Rankle the Seas and the Skies", "Lightning Flash Flame Shell",
              "Flower Wind Rage and Flower God Roar, Heavenly Wind Rage and "
              "Heavenly Demon Sneer",
              "All Waves, Rise now and Become my Shield, Lightning, Strike "
              "now and Become my Blade",
              "Cry, Raise Your Head, Rain Without end",
              "Sting All Enemies To Death", "Reduce All Creation to Ash",
              "Sit Upon the Frozen Heavens", "Call forth the Twilight")

    schema = fields.Schema(content=fields.TEXT(stored=True, spelling=True),
                           quick=fields.NGRAM(maxsize=10, stored=True))

    with TempIndex(schema, "sugprefix") as ix:
        with ix.writer() as w:
            for item in domain:
                content = u(item)
                w.add_document(content=content, quick=content)

        with ix.searcher() as s:
            sugs = s.suggest("content", u("ra"), maxdist=2, prefix=2)
            assert sugs == ['rage', 'rain']

            sugs = s.suggest("content", "ra", maxdist=2, prefix=1)
            assert sugs == ["rage", "rain", "roar"]
Beispiel #4
0
 def create_whoosh_schema(self):
     schema_classname = "WhooshSchema"
     schema_classname = str(schema_classname)
     attrs = OrderedDict()
     for c in self.columns:
         if c in self.ngram_columns:
             field = fields.NGRAM(
                 minsize=self.ngram_minsize,
                 maxsize=self.ngram_maxsize,
                 stored=True,
             )
         elif c in self.phrase_columns:
             field = fields.TEXT(stored=True)
         elif c in self.keyword_columns:
             field = fields.KEYWORD(
                 lowercase=self.keyword_lowercase,
                 commas=self.keyword_commas,
                 stored=True,
             )
         else:
             field = fields.STORED()
         attrs[c] = field
     SchemaClass = type(schema_classname, (fields.SchemaClass,), attrs)
     schema = SchemaClass()
     return schema
Beispiel #5
0
class UserSchema(fields.SchemaClass):

    pk = fields.ID(stored=True, unique=True)
    full_name = fields.TEXT(stored=True, spelling=True)
    username = fields.TEXT(stored=True, spelling=True)
    email = fields.TEXT(stored=True, spelling=True)
    content = fields.NGRAM(phrase=True)
Beispiel #6
0
def test_ngram_phrase():
    schema = fields.Schema(text=fields.NGRAM(minsize=2, maxsize=2, phrase=True), path=fields.ID(stored=True))
    ix = RamStorage().create_index(schema)
    writer = ix.writer()
    writer.add_document(text=u('\u9AD8\u6821\u307E\u3067\u306F\u6771\u4EAC\u3067\u3001\u5927\u5B66\u304B\u3089\u306F\u4EAC\u5927\u3067\u3059\u3002'), path=u('sample'))
    writer.commit()

    with ix.searcher() as s:
        p = qparser.QueryParser("text", schema)

        q = p.parse(u('\u6771\u4EAC\u5927\u5B66'))
        assert_equal(len(s.search(q)), 1)

        q = p.parse(u('"\u6771\u4EAC\u5927\u5B66"'))
        assert_equal(len(s.search(q)), 0)

        q = p.parse(u('"\u306F\u6771\u4EAC\u3067"'))
        assert_equal(len(s.search(q)), 1)
Beispiel #7
0
from whoosh.qparser import QueryParser
from whoosh.qparser import MultifieldParser
from whoosh import analysis
from indexing.models import Job
import sys
import csv


WHOOSH_SCHEMA = fields.Schema(jobtitle = fields.KEYWORD(stored=True),
							 company= fields.KEYWORD(stored=True),
							 city= fields.KEYWORD(stored=True),
							 state= fields.KEYWORD(stored=True),
							 country= fields.KEYWORD(stored=True),
							 source= fields.KEYWORD(stored=True),
							 date= fields.KEYWORD(stored=True),
							 JD= fields.NGRAM(stored=True), 
							 url=fields.KEYWORD(stored=True),
							 latitude=fields.KEYWORD(stored=True),
							 longitude=fields.KEYWORD(stored=True),
							 relative_time=fields.KEYWORD(stored=True),
							 job_id = fields.KEYWORD(stored=True),
							 category = fields.KEYWORD(stored=True)
						)

# ana = analysis.StemmingAnalyzer()

columns = ["jobtitle", "company", "city", "state", "country",
					 "source", "date", "JD","url", "latitude", "longitude",
					 "relative_time", "job_id", "category"]

Beispiel #8
0
class BookmarkSchema(fields.SchemaClass):
    url = fields.NGRAM(minsize=2, maxsize=10, stored=True)
    title = fields.NGRAM(minsize=2, maxsize=10, stored=True)
Beispiel #9
0
class GPSSchema(fields.SchemaClass):
    url = fields.NGRAM(minsize=2, maxsize=10, stored=True)
    title = fields.NGRAM(minsize=2, maxsize=10, stored=True)
    tags = fields.KEYWORD()