コード例 #1
0
class WorkspaceSchema(fields.SchemaClass):

    id = fields.ID(stored=True, unique=True)
    owner = fields.TEXT(stored=True, spelling=True)
    name = fields.TEXT(stored=True, spelling=True)
    description = fields.NGRAM(stored=True, minsize=1, phrase=True)
    lastmodified = fields.DATETIME(stored=True)
    longdescription = fields.NGRAM(stored=True, minsize=1, phrase=True)
    public = fields.BOOLEAN(stored=True)
    users = fields.KEYWORD(commas=True)
    groups = fields.KEYWORD(commas=True)
    shared = fields.BOOLEAN(stored=True)
コード例 #2
0
    def create_whoosh_schema(self):
        """
        Dynamically create whoosh.fields.SchemaClass schema object.

        It defines how you index your dataset.

        :rtype: SchemaClass
        """
        schema_classname = "WhooshSchema"
        schema_classname = str(schema_classname)
        attrs = OrderedDict()
        for c_setting in self.columns:
            if c_setting.type_is_ngram:
                field = fields.NGRAM(
                    minsize=c_setting.ngram_minsize,
                    maxsize=c_setting.ngram_maxsize,
                    stored=True,
                )
            elif c_setting.type_is_phrase:
                field = fields.TEXT(stored=True)
            elif c_setting.type_is_keyword:
                field = fields.KEYWORD(
                    lowercase=c_setting.keyword_lowercase,
                    commas=c_setting.keyword_commas,
                    stored=True,
                )
            else:
                field = fields.STORED()
            attrs[c_setting.name] = field
        SchemaClass = type(schema_classname, (fields.SchemaClass,), attrs)
        schema = SchemaClass() # type: SchemaClass
        return schema
コード例 #3
0
ファイル: test_spelling.py プロジェクト: CuteCha/dssm-theano
def test_suggest_prefix():
    domain = ("Shoot To Kill", "Bloom, Split and Deviate",
              "Rankle the Seas and the Skies", "Lightning Flash Flame Shell",
              "Flower Wind Rage and Flower God Roar, Heavenly Wind Rage and "
              "Heavenly Demon Sneer",
              "All Waves, Rise now and Become my Shield, Lightning, Strike "
              "now and Become my Blade",
              "Cry, Raise Your Head, Rain Without end",
              "Sting All Enemies To Death", "Reduce All Creation to Ash",
              "Sit Upon the Frozen Heavens", "Call forth the Twilight")

    schema = fields.Schema(content=fields.TEXT(stored=True, spelling=True),
                           quick=fields.NGRAM(maxsize=10, stored=True))

    with TempIndex(schema, "sugprefix") as ix:
        with ix.writer() as w:
            for item in domain:
                content = u(item)
                w.add_document(content=content, quick=content)

        with ix.searcher() as s:
            sugs = s.suggest("content", u("ra"), maxdist=2, prefix=2)
            assert sugs == ['rage', 'rain']

            sugs = s.suggest("content", "ra", maxdist=2, prefix=1)
            assert sugs == ["rage", "rain", "roar"]
コード例 #4
0
 def create_whoosh_schema(self):
     schema_classname = "WhooshSchema"
     schema_classname = str(schema_classname)
     attrs = OrderedDict()
     for c in self.columns:
         if c in self.ngram_columns:
             field = fields.NGRAM(
                 minsize=self.ngram_minsize,
                 maxsize=self.ngram_maxsize,
                 stored=True,
             )
         elif c in self.phrase_columns:
             field = fields.TEXT(stored=True)
         elif c in self.keyword_columns:
             field = fields.KEYWORD(
                 lowercase=self.keyword_lowercase,
                 commas=self.keyword_commas,
                 stored=True,
             )
         else:
             field = fields.STORED()
         attrs[c] = field
     SchemaClass = type(schema_classname, (fields.SchemaClass,), attrs)
     schema = SchemaClass()
     return schema
コード例 #5
0
ファイル: searchers.py プロジェクト: future-analytics/wstore
class UserSchema(fields.SchemaClass):

    pk = fields.ID(stored=True, unique=True)
    full_name = fields.TEXT(stored=True, spelling=True)
    username = fields.TEXT(stored=True, spelling=True)
    email = fields.TEXT(stored=True, spelling=True)
    content = fields.NGRAM(phrase=True)
コード例 #6
0
ファイル: test_searching.py プロジェクト: ws-os/oh-mainline
def test_ngram_phrase():
    schema = fields.Schema(text=fields.NGRAM(minsize=2, maxsize=2, phrase=True), path=fields.ID(stored=True))
    ix = RamStorage().create_index(schema)
    writer = ix.writer()
    writer.add_document(text=u('\u9AD8\u6821\u307E\u3067\u306F\u6771\u4EAC\u3067\u3001\u5927\u5B66\u304B\u3089\u306F\u4EAC\u5927\u3067\u3059\u3002'), path=u('sample'))
    writer.commit()

    with ix.searcher() as s:
        p = qparser.QueryParser("text", schema)

        q = p.parse(u('\u6771\u4EAC\u5927\u5B66'))
        assert_equal(len(s.search(q)), 1)

        q = p.parse(u('"\u6771\u4EAC\u5927\u5B66"'))
        assert_equal(len(s.search(q)), 0)

        q = p.parse(u('"\u306F\u6771\u4EAC\u3067"'))
        assert_equal(len(s.search(q)), 1)
コード例 #7
0
from whoosh.qparser import QueryParser
from whoosh.qparser import MultifieldParser
from whoosh import analysis
from indexing.models import Job
import sys
import csv


WHOOSH_SCHEMA = fields.Schema(jobtitle = fields.KEYWORD(stored=True),
							 company= fields.KEYWORD(stored=True),
							 city= fields.KEYWORD(stored=True),
							 state= fields.KEYWORD(stored=True),
							 country= fields.KEYWORD(stored=True),
							 source= fields.KEYWORD(stored=True),
							 date= fields.KEYWORD(stored=True),
							 JD= fields.NGRAM(stored=True), 
							 url=fields.KEYWORD(stored=True),
							 latitude=fields.KEYWORD(stored=True),
							 longitude=fields.KEYWORD(stored=True),
							 relative_time=fields.KEYWORD(stored=True),
							 job_id = fields.KEYWORD(stored=True),
							 category = fields.KEYWORD(stored=True)
						)

# ana = analysis.StemmingAnalyzer()

columns = ["jobtitle", "company", "city", "state", "country",
					 "source", "date", "JD","url", "latitude", "longitude",
					 "relative_time", "job_id", "category"]

コード例 #8
0
class BookmarkSchema(fields.SchemaClass):
    url = fields.NGRAM(minsize=2, maxsize=10, stored=True)
    title = fields.NGRAM(minsize=2, maxsize=10, stored=True)
コード例 #9
0
class GPSSchema(fields.SchemaClass):
    url = fields.NGRAM(minsize=2, maxsize=10, stored=True)
    title = fields.NGRAM(minsize=2, maxsize=10, stored=True)
    tags = fields.KEYWORD()