Example #1
0
    def vector_as(self, docnum, fieldnum, astype):
        format = self.vector_format(fieldnum)

        if format is None:
            raise FieldConfigurationError("Field %r is not vectored" %
                                          self.schema.number_to_name(fieldnum))
        elif not format.supports(astype):
            raise FieldConfigurationError(
                "Field %r does not support %r" %
                (self.schema.number_to_name(fieldnum), astype))

        interpreter = format.interpreter(astype)
        for text, data in self.vector(docnum, fieldnum):
            yield (text, interpreter(data))
Example #2
0
    def postings_as(self, fieldnum, text, astype, exclude_docs=None):
        """Yields interpreted data for each document containing
        the given term. The current field must have stored positions
        for this to work.
        
        :astype:
            how to interpret the posting data, for example
            "positions". The field must support the interpretation.
        :exclude_docs:
            a set of document numbers to ignore. This
            is used by queries to skip documents that have already been
            eliminated from consideration.
        :boost: a factor by which to multiply each weight.
        """

        format = self.schema.field_by_number(fieldnum).format

        if not format.supports(astype):
            raise FieldConfigurationError(
                "Field %r format does not support %r" %
                (self.schema.name_to_number(fieldnum), astype))

        interp = format.interpreter(astype)
        for docnum, data in self.postings(fieldnum,
                                          text,
                                          exclude_docs=exclude_docs):
            yield (docnum, interp(data))
Example #3
0
    def doc_field_length(self, docnum, fieldid):
        fieldid = self.schema.to_number(fieldid)
        if fieldid not in self._scorable_fields:
            raise FieldConfigurationError("Field %r does not store lengths" %
                                          fieldid)

        pos = self._fieldnum_to_scorable_pos[fieldid]
        return self.doclengths.at(docnum, pos)
Example #4
0
    def doc_field_length(self, docnum, fieldid):
        """Returns the number of terms in the given field in the
        given document. This is used by some scoring algorithms.
        """

        fieldid = self.schema.to_number(fieldid)
        if fieldid not in self._scorable_fields:
            raise FieldConfigurationError("Field %r does not store lengths" %
                                          fieldid)

        pos = self._fieldnum_to_pos[fieldid]
        return self.doclength_table.get(docnum, pos)
Example #5
0
    def add(self, name, fieldtype, glob=False):
        # If the user passed a type rather than an instantiated field object,
        # instantiate it automatically
        if type(fieldtype) is type:
            try:
                fieldtype = fieldtype()
            except Exception:
                e = sys.exc_info()[1]
                raise FieldConfigurationError("Error: %s instantiating field "
                                              "%r: %r" % (e, name, fieldtype))

        if not isinstance(fieldtype, FieldType):
            raise FieldConfigurationError("%r is not a FieldType object" %
                                          fieldtype)

        self._subfields[name] = sublist = []
        for suffix, subfield in fieldtype.subfields():
            fname = name + "." + suffix if suffix else name
            sublist.append(fname)

            # Check field name
            if fname.startswith("_"):
                raise FieldConfigurationError("Names cannot start with _")
            elif " " in fname:
                raise FieldConfigurationError("Names cannot contain spaces")
            elif fname in self._fields or (glob and fname in self._dyn_fields):
                raise FieldConfigurationError("%r already in schema" % fname)

            # Add the field
            if isinstance(subfield, DICT):
                self.add(fname, subfield)
                continue
            if glob:
                expr = re.compile(fnmatch.translate(name))
                self._dyn_fields[fname] = (expr, subfield)
            else:
                subfield.on_add(self, fname)
                self._fields[fname] = subfield
Example #6
0
    def vector_as(self, docnum, fieldnum, astype):
        """Yields a sequence of interpreted (text, data) tuples
        representing the term vector for the given document and
        field.
        
        This method uses the vector format object's 'data_to_*'
        method to interpret the data. For example, if the vector
        format has a 'data_to_positions()' method, you can use
        vector_as(x, y, "positions") to get a positions vector.
        """

        format = self.vector_format(fieldnum)

        if format is None:
            raise FieldConfigurationError("Field %r is not vectored" %
                                          self.schema.number_to_name(fieldnum))
        elif not format.supports(astype):
            raise FieldConfigurationError(
                "Field %r does not support %r" %
                (self.schema.number_to_name(fieldnum), astype))

        interpreter = format.interpreter(astype)
        for text, data in self.vector(docnum, fieldnum):
            yield (text, interpreter(data))