Ejemplo n.º 1
0
    def invertDocument(self, doc):
        fields = doc.fields()
        for field in doc.fields():
            fieldName = field.name()
            fieldNumber = self.fieldInfos.fieldNumber(fieldName)
            
            position = self.fieldLengths[fieldNumber]    # Position in field

            if field.isIndexed:
                if not field.isTokenized:
                    # Untokenized
                    self.addPosition(fieldName, field.stringValue(), position)
                    position += 1
                else:
                    # Find or make a reader
                    if field.readerValue() is not None:
                        val = field.readerValue().read()
                    elif field.stringValue() is not None:
                        val = field.stringValue()
                    else:
                        raise Exception, 'Field must have either a String or Reader value'
                    
                    for tok in self.analyzer(val):
                        self.addPosition(fieldName, tok, position)
                        position += 1

                        if self.maxFieldLength and (position > self.maxFieldLength):
                            break
                        
            self.fieldLengths[fieldNumber] = position 
Ejemplo n.º 2
0
    def invertDocument(self, doc):
        fields = doc.fields()
        for field in doc.fields():
            fieldName = field.name()
            fieldNumber = self.fieldInfos.fieldNumber(fieldName)

            position = self.fieldLengths[fieldNumber]  # Position in field

            if field.isIndexed:
                if not field.isTokenized:
                    # Untokenized
                    self.addPosition(fieldName, field.stringValue(), position)
                    position += 1
                else:
                    # Find or make a reader
                    if field.readerValue() is not None:
                        val = field.readerValue().read()
                    elif field.stringValue() is not None:
                        val = field.stringValue()
                    else:
                        raise Exception, 'Field must have either a String or Reader value'

                    for tok in self.analyzer(val):
                        self.addPosition(fieldName, tok, position)
                        position += 1

                        if self.maxFieldLength and (position >
                                                    self.maxFieldLength):
                            break

            self.fieldLengths[fieldNumber] = position
Ejemplo n.º 3
0
 def writeNorms(self, doc, segment):
     for field in doc.fields():
         if field.isIndexed:
             fieldNumber = self.fieldInfos.fieldNumber(field.name())
             norm = self.directory.createFile(segment +
                                              '.f' + str(fieldNumber))
             try:
                 norm.writeByte(similarity.normInt(self.fieldLengths[fieldNumber]))
             finally:
                 norm.close()
Ejemplo n.º 4
0
 def writeNorms(self, doc, segment):
     for field in doc.fields():
         if field.isIndexed:
             fieldNumber = self.fieldInfos.fieldNumber(field.name())
             norm = self.directory.createFile(segment + '.f' +
                                              str(fieldNumber))
             try:
                 norm.writeByte(
                     similarity.normInt(self.fieldLengths[fieldNumber]))
             finally:
                 norm.close()