Esempio n. 1
0
 def __init__(self, stored=False, unique=False, field_boost=1.0):
     """
     :param stored: Whether the value of this field is stored with the document.
     """
     self.format = Existence(analyzer=IDAnalyzer(), field_boost=field_boost)
     self.stored = stored
     self.unique = unique
Esempio n. 2
0
 def __init__(self, stored = False, unique = False):
     """
     :stored: Whether the value of this field is stored with the document.
     """
     self.format = Existance(analyzer = IDAnalyzer())
     self.stored = stored
     self.unique = unique
Esempio n. 3
0
    def index(self, index_name='unified'):
        types = self.get_requested_content_types()

        from whoosh.fields import TEXT, ID, NGRAM, NUMERIC
        from whoosh.analysis import StemmingAnalyzer, SimpleAnalyzer, IDAnalyzer
        from whoosh.analysis.filters import LowercaseFilter
        simp_ana = SimpleAnalyzer()
        print 'Building %s index...' % index_name

        # build a single schema from the fields exposed by the different search
        # types
        print '\tSchema:'
        fields = {}
        for type in types:
            for info in type.get_fields_info().values():
                if info['whoosh']['name'] not in fields and not info[
                        'whoosh'].get('ignore', False):
                    print '\t\t%s' % info
                    field_type = info['whoosh']['type']

                    if index_name == 'autocomplete':
                        # break the long text fields into terms, leave the
                        # others as single expression
                        if not (field_type.__class__ == NUMERIC):
                            if info.get('long_text', False):
                                field_type = TEXT(analyzer=simp_ana)
                            else:
                                field_type = ID(stored=True,
                                                analyzer=IDAnalyzer()
                                                | LowercaseFilter())
                    print '\t\t%s' % field_type
                    fields[info['whoosh']['name']] = field_type

                    # JIRA 508 - Add an ID counterpart to allow exact phrase search
#                     if info.get('long_text', False):
#                         fields[info['whoosh']['name']+'_iexact'] = ID(analyzer=IDAnalyzer(lowercase=True))

        from whoosh.fields import Schema
        schema = Schema(**fields)

        # Create the index schema
        index = self.recreate_index(index_name, schema)

        # Add documents to the index
        print '\tWrite indexes:'
        writer = index.writer()
        aci = {}
        for type in types:
            count = type.write_index(writer, self.is_verbose(), aci)
            print '\t\t%s %s records indexed' % (count,
                                                 type.get_model().__name__)

        # autocomplete
        if index_name == 'unified':
            f = open(types[0].get_autocomplete_path(True), 'w')
            f.write((ur'|'.join(aci.keys())).encode('utf8'))
            f.close()

        writer.commit()
Esempio n. 4
0
    def __init__(self,
                 type=int,
                 stored=False,
                 unique=False,
                 field_boost=1.0,
                 decimal_places=0,
                 shift_step=4,
                 signed=True):
        """
        :param type: the type of numbers that can be stored in this field: one
            of ``int``, ``long``, ``float``, or ``Decimal``.
        :param stored: Whether the value of this field is stored with the
            document.
        :param unique: Whether the value of this field is unique per-document.
        :param decimal_places: specifies the number of decimal places to save
            when storing Decimal instances as ``int`` or ``float``.
        :param shift_steps: The number of bits of precision to shift away at
            each tiered indexing level. Values should generally be 1-8. Lower
            values yield faster searches but take up more space. A value
            of `0` means no tiered indexing.
        :param signed: Whether the numbers stored in this field may be
            negative.
        """

        self.type = type
        if self.type is long_type:
            # This will catch the Python 3 int type
            self._to_text = long_to_text
            self._from_text = text_to_long
            self.sortable_typecode = "q" if signed else "Q"
        elif self.type is int:
            self._to_text = int_to_text
            self._from_text = text_to_int
            self.sortable_typecode = "i" if signed else "I"
        elif self.type is float:
            self._to_text = float_to_text
            self._from_text = text_to_float
            self.sortable_typecode = "f"
        elif self.type is Decimal:
            raise TypeError("To store Decimal instances, set type to int or "
                            "float and use the decimal_places argument")
        else:
            raise TypeError("%s field type can't store %r" %
                            (self.__class__, self.type))

        self.stored = stored
        self.unique = unique
        self.decimal_places = decimal_places
        self.shift_step = shift_step
        self.signed = signed
        self.analyzer = IDAnalyzer()
        self.format = formats.Existence(field_boost=field_boost)
Esempio n. 5
0
 def __init__(self, type=int, stored=False, unique=False, field_boost=1.0):
     self.type = type
     self.stored = stored
     self.unique = unique
     self.format = Existence(analyzer=IDAnalyzer(), field_boost=field_boost)