Python IDAnalyzer Examples

Programming Language: Python

Namespace/Package Name: whoosh.analysis

Class/Type: IDAnalyzer

Examples at hotexamples.com: 5

Python IDAnalyzer - 5 examples found. These are the top rated real world Python examples of whoosh.analysis.IDAnalyzer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

IDAnalyzer(5)

Frequently Used Methods

IDAnalyzer (5)

Example #1

Show file

 def __init__(self, stored=False, unique=False, field_boost=1.0):
     """
     :param stored: Whether the value of this field is stored with the document.
     """
     self.format = Existence(analyzer=IDAnalyzer(), field_boost=field_boost)
     self.stored = stored
     self.unique = unique

Example #2

Show file

File: fields.py Project: ra2003/whoosh-2

 def __init__(self, stored = False, unique = False):
     """
     :stored: Whether the value of this field is stored with the document.
     """
     self.format = Existance(analyzer = IDAnalyzer())
     self.stored = stored
     self.unique = unique

Example #3

Show file

    def index(self, index_name='unified'):
        types = self.get_requested_content_types()

        from whoosh.fields import TEXT, ID, NGRAM, NUMERIC
        from whoosh.analysis import StemmingAnalyzer, SimpleAnalyzer, IDAnalyzer
        from whoosh.analysis.filters import LowercaseFilter
        simp_ana = SimpleAnalyzer()
        print 'Building %s index...' % index_name

        # build a single schema from the fields exposed by the different search
        # types
        print '\tSchema:'
        fields = {}
        for type in types:
            for info in type.get_fields_info().values():
                if info['whoosh']['name'] not in fields and not info[
                        'whoosh'].get('ignore', False):
                    print '\t\t%s' % info
                    field_type = info['whoosh']['type']

                    if index_name == 'autocomplete':
                        # break the long text fields into terms, leave the
                        # others as single expression
                        if not (field_type.__class__ == NUMERIC):
                            if info.get('long_text', False):
                                field_type = TEXT(analyzer=simp_ana)
                            else:
                                field_type = ID(stored=True,
                                                analyzer=IDAnalyzer()
                                                | LowercaseFilter())
                    print '\t\t%s' % field_type
                    fields[info['whoosh']['name']] = field_type

                    # JIRA 508 - Add an ID counterpart to allow exact phrase search
#                     if info.get('long_text', False):
#                         fields[info['whoosh']['name']+'_iexact'] = ID(analyzer=IDAnalyzer(lowercase=True))

        from whoosh.fields import Schema
        schema = Schema(**fields)

        # Create the index schema
        index = self.recreate_index(index_name, schema)

        # Add documents to the index
        print '\tWrite indexes:'
        writer = index.writer()
        aci = {}
        for type in types:
            count = type.write_index(writer, self.is_verbose(), aci)
            print '\t\t%s %s records indexed' % (count,
                                                 type.get_model().__name__)

        # autocomplete
        if index_name == 'unified':
            f = open(types[0].get_autocomplete_path(True), 'w')
            f.write((ur'|'.join(aci.keys())).encode('utf8'))
            f.close()

        writer.commit()

Example #4

Show file

    def __init__(self,
                 type=int,
                 stored=False,
                 unique=False,
                 field_boost=1.0,
                 decimal_places=0,
                 shift_step=4,
                 signed=True):
        """
        :param type: the type of numbers that can be stored in this field: one
            of ``int``, ``long``, ``float``, or ``Decimal``.
        :param stored: Whether the value of this field is stored with the
            document.
        :param unique: Whether the value of this field is unique per-document.
        :param decimal_places: specifies the number of decimal places to save
            when storing Decimal instances as ``int`` or ``float``.
        :param shift_steps: The number of bits of precision to shift away at
            each tiered indexing level. Values should generally be 1-8. Lower
            values yield faster searches but take up more space. A value
            of `0` means no tiered indexing.
        :param signed: Whether the numbers stored in this field may be
            negative.
        """

        self.type = type
        if self.type is long_type:
            # This will catch the Python 3 int type
            self._to_text = long_to_text
            self._from_text = text_to_long
            self.sortable_typecode = "q" if signed else "Q"
        elif self.type is int:
            self._to_text = int_to_text
            self._from_text = text_to_int
            self.sortable_typecode = "i" if signed else "I"
        elif self.type is float:
            self._to_text = float_to_text
            self._from_text = text_to_float
            self.sortable_typecode = "f"
        elif self.type is Decimal:
            raise TypeError("To store Decimal instances, set type to int or "
                            "float and use the decimal_places argument")
        else:
            raise TypeError("%s field type can't store %r" %
                            (self.__class__, self.type))

        self.stored = stored
        self.unique = unique
        self.decimal_places = decimal_places
        self.shift_step = shift_step
        self.signed = signed
        self.analyzer = IDAnalyzer()
        self.format = formats.Existence(field_boost=field_boost)

Example #5

Show file

 def __init__(self, type=int, stored=False, unique=False, field_boost=1.0):
     self.type = type
     self.stored = stored
     self.unique = unique
     self.format = Existence(analyzer=IDAnalyzer(), field_boost=field_boost)