def __init__(self, input): super(PayloadSetter, self).__init__(input) self.input = input self.payloadAtt = self.addAttribute(PayloadAttribute.class_) self.data = JArray('byte')(1) self.p = BytesRef(self.data, 0, 1)
def testCompressionTools(self): bytes = JArray('byte')(self.binaryValCompressed) binaryFldCompressed = StoredField("binaryCompressed", CompressionTools.compress(bytes)) stringFldCompressed = StoredField( "stringCompressed", CompressionTools.compressString(self.binaryValCompressed)) doc = Document() doc.add(binaryFldCompressed) doc.add(stringFldCompressed) # add the doc to a ram index writer = self.getWriter(analyzer=StandardAnalyzer()) writer.addDocument(doc) writer.close() # open a reader and fetch the document reader = self.getReader() docFromReader = reader.document(0) self.assertTrue(docFromReader is not None) # fetch the binary compressed field and compare it's content with # the original one bytes = CompressionTools.decompress( docFromReader.getBinaryValue("binaryCompressed")) binaryFldCompressedTest = bytes.string_ self.assertEqual(binaryFldCompressedTest, self.binaryValCompressed) self.assertEqual( CompressionTools.decompressString( docFromReader.getBinaryValue("stringCompressed")), self.binaryValCompressed) reader.close()
def next(self): if not self.done: self.done = True text = JArray('char')(1024) size = reader.read(text, 0, 1024) return Token(text, 0, size, 0, size) return None
def getDocIdSet(self, reader): bits = OpenBitSet(long(reader.maxDoc())) isbns = self.accessor.isbns() docs = JArray(int)(1) freqs = JArray(int)(1) for isbn in isbns: if isbn is not None: termDocs = reader.termDocs(Term("isbn", isbn)) count = termDocs.read(docs, freqs) if count == 1: bits.set(long(docs[0])) return bits
def incrementToken(self): if self.input.incrementToken(): bytes = JArray('byte')("pos: %d" %(self.pos)) self.payloadAttr.setPayload(BytesRef(bytes)) if self.pos == 0 or self.i % 2 == 1: posIncr = 1 else: posIncr = 0 self.posIncrAttr.setPositionIncrement(posIncr) self.pos += posIncr self.i += 1 return True return False
def _updateOaiRecord(self, identifier, setSpecs, metadataPrefixes, delete=False, oldDoc=None, deleteInSets=None, deleteInPrefixes=None, _overrideStamp=None): oldDoc = oldDoc or self._getDocument(identifier) doc, oldDeletedSets, oldDeletedPrefixes = self._getNewDocument( identifier, oldDoc=oldDoc) newStamp = _overrideStamp if self._importMode else self._newStamp() doc.add(LongPoint(STAMP_FIELD, int(newStamp))) doc.add( StoredField(STAMP_FIELD, BytesRef(JArray('byte')(int_to_bytes(newStamp))))) doc.add(NumericDocValuesField(NUMERIC_STAMP_FIELD, int(newStamp))) allMetadataPrefixes, allDeletedPrefixes = self._setMetadataPrefixes( doc=doc, metadataPrefixes=asSet(metadataPrefixes), delete=delete, deleteInPrefixes=asSet(deleteInPrefixes), oldDeletedPrefixes=oldDeletedPrefixes) allSets, allDeletedSets = self._setSets(doc=doc, setSpecs=setSpecs or [], delete=delete, deleteInSets=deleteInSets, oldDeletedSets=oldDeletedSets) if delete or (allDeletedSets and allSets == allDeletedSets ) or allMetadataPrefixes == allDeletedPrefixes: doc.add( StringField(TOMBSTONE_FIELD, TOMBSTONE_VALUE, Field.Store.YES)) self._writer.updateDocument(Term(IDENTIFIER_FIELD, identifier), doc) self._latestModifications.add(str(identifier)) self.do.signalOaiUpdate(metadataPrefixes=allMetadataPrefixes, sets=allSets, stamp=newStamp)
def testBinaryFieldInIndex(self): ft = FieldType() ft.setStored(True) bytes = JArray('byte')(self.binaryValStored) binaryFldStored = StoredField("binaryStored", bytes) stringFldStored = Field("stringStored", self.binaryValStored, ft) doc = Document() doc.add(binaryFldStored) doc.add(stringFldStored) # test for field count self.assertEqual(2, doc.fields.size()) # add the doc to a ram index writer = self.getWriter( analyzer=StandardAnalyzer(Version.LUCENE_CURRENT)) writer.addDocument(doc) writer.close() # open a reader and fetch the document reader = self.getReader() docFromReader = reader.document(0) self.assert_(docFromReader is not None) # fetch the binary stored field and compare it's content with the # original one bytes = docFromReader.getBinaryValue("binaryStored") binaryFldStoredTest = bytes.bytes.string_ self.assertEqual(binaryFldStoredTest, self.binaryValStored) # fetch the string field and compare it's content with the original # one stringFldStoredTest = docFromReader.get("stringStored") self.assertEqual(stringFldStoredTest, self.binaryValStored) reader.close()
def readInternal(self, length, pos): self.fh.seek(pos) return JArray('byte')(self.fh.read(length))
def binary(self, b): c = JArray('byte')(b) field = StoredField("bin", c) v = field.binaryValue().bytes assert c == v and b == [a for a in v]
def _checkOneTermReuse(self, a, input, expected): self._assertAnalyzesToReuse(a, input, JArray('string')(expected))
def testPayloadsPos0(self): writer = self.getWriter(analyzer=TestPayloadAnalyzer()) doc = Document() doc.add( Field("content", "a a b c d e a f g h i j a b k k", TextField.TYPE_STORED)) writer.addDocument(doc) reader = writer.getReader() writer.close() tp = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "content", BytesRef("a")) count = 0 self.assert_(tp.nextDoc() != tp.NO_MORE_DOCS) # "a" occurs 4 times self.assertEqual(4, tp.freq()) expected = 0 self.assertEqual(expected, tp.nextPosition()) self.assertEqual(1, tp.nextPosition()) self.assertEqual(3, tp.nextPosition()) self.assertEqual(6, tp.nextPosition()) # only one doc has "a" self.assert_(tp.nextDoc() == tp.NO_MORE_DOCS) searcher = self.getSearcher(reader=reader) stq1 = SpanTermQuery(Term("content", "a")) stq2 = SpanTermQuery(Term("content", "k")) sqs = [stq1, stq2] snq = SpanNearQuery(sqs, 30, False) count = 0 sawZero = False pspans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq) while pspans.next(): payloads = pspans.getPayload() sawZero |= pspans.start() == 0 it = payloads.iterator() while it.hasNext(): count += 1 it.next() self.assertEqual(5, count) self.assert_(sawZero) spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq) count = 0 sawZero = False while spans.next(): count += 1 sawZero |= spans.start() == 0 self.assertEqual(4, count) self.assert_(sawZero) sawZero = False psu = PayloadSpanUtil(searcher.getTopReaderContext()) pls = psu.getPayloadsForQuery(snq) count = pls.size() it = pls.iterator() while it.hasNext(): bytes = JArray('byte').cast_(it.next()) s = bytes.string_ sawZero |= s == "pos: 0" self.assertEqual(5, count) self.assert_(sawZero)
def binary(self, b): c = JArray('byte')(b) field = Field("bin", c, Field.Store.YES) v = field.binaryValue assert c == v and b == [a for a in v]