def invertDocument(self, doc): fields = doc.fields() for field in doc.fields(): fieldName = field.name() fieldNumber = self.fieldInfos.fieldNumber(fieldName) position = self.fieldLengths[fieldNumber] # Position in field if field.isIndexed: if not field.isTokenized: # Untokenized self.addPosition(fieldName, field.stringValue(), position) position += 1 else: # Find or make a reader if field.readerValue() is not None: val = field.readerValue().read() elif field.stringValue() is not None: val = field.stringValue() else: raise Exception, 'Field must have either a String or Reader value' for tok in self.analyzer(val): self.addPosition(fieldName, tok, position) position += 1 if self.maxFieldLength and (position > self.maxFieldLength): break self.fieldLengths[fieldNumber] = position
def writeNorms(self, doc, segment): for field in doc.fields(): if field.isIndexed: fieldNumber = self.fieldInfos.fieldNumber(field.name()) norm = self.directory.createFile(segment + '.f' + str(fieldNumber)) try: norm.writeByte(similarity.normInt(self.fieldLengths[fieldNumber])) finally: norm.close()
def writeNorms(self, doc, segment): for field in doc.fields(): if field.isIndexed: fieldNumber = self.fieldInfos.fieldNumber(field.name()) norm = self.directory.createFile(segment + '.f' + str(fieldNumber)) try: norm.writeByte( similarity.normInt(self.fieldLengths[fieldNumber])) finally: norm.close()