Ejemplo n.º 1
0
    def __init__(self, input):
        super(PayloadSetter, self).__init__(input)

        self.input = input
        self.payloadAtt = self.addAttribute(PayloadAttribute.class_)
        self.data = JArray('byte')(1)
        self.p = BytesRef(self.data, 0, 1)
Ejemplo n.º 2
0
    def testCompressionTools(self):

        bytes = JArray('byte')(self.binaryValCompressed)
        binaryFldCompressed = StoredField("binaryCompressed",
                                          CompressionTools.compress(bytes))
        stringFldCompressed = StoredField(
            "stringCompressed",
            CompressionTools.compressString(self.binaryValCompressed))

        doc = Document()
        doc.add(binaryFldCompressed)
        doc.add(stringFldCompressed)

        # add the doc to a ram index
        writer = self.getWriter(analyzer=StandardAnalyzer())
        writer.addDocument(doc)
        writer.close()

        # open a reader and fetch the document
        reader = self.getReader()
        docFromReader = reader.document(0)
        self.assertTrue(docFromReader is not None)

        # fetch the binary compressed field and compare it's content with
        # the original one
        bytes = CompressionTools.decompress(
            docFromReader.getBinaryValue("binaryCompressed"))
        binaryFldCompressedTest = bytes.string_
        self.assertEqual(binaryFldCompressedTest, self.binaryValCompressed)
        self.assertEqual(
            CompressionTools.decompressString(
                docFromReader.getBinaryValue("stringCompressed")),
            self.binaryValCompressed)

        reader.close()
Ejemplo n.º 3
0
 def next(self):
     if not self.done:
         self.done = True
         text = JArray('char')(1024)
         size = reader.read(text, 0, 1024)
         return Token(text, 0, size, 0, size)
     return None
Ejemplo n.º 4
0
    def getDocIdSet(self, reader):

        bits = OpenBitSet(long(reader.maxDoc()))
        isbns = self.accessor.isbns()

        docs = JArray(int)(1)
        freqs = JArray(int)(1)

        for isbn in isbns:
            if isbn is not None:
                termDocs = reader.termDocs(Term("isbn", isbn))
                count = termDocs.read(docs, freqs)
                if count == 1:
                    bits.set(long(docs[0]))

        return bits
Ejemplo n.º 5
0
    def incrementToken(self):

        if self.input.incrementToken():
            bytes = JArray('byte')("pos: %d" %(self.pos))
            self.payloadAttr.setPayload(BytesRef(bytes))

            if self.pos == 0 or self.i % 2 == 1:
                posIncr = 1
            else:
                posIncr = 0

            self.posIncrAttr.setPositionIncrement(posIncr)
            self.pos += posIncr
            self.i += 1
            return True

        return False
Ejemplo n.º 6
0
    def _updateOaiRecord(self,
                         identifier,
                         setSpecs,
                         metadataPrefixes,
                         delete=False,
                         oldDoc=None,
                         deleteInSets=None,
                         deleteInPrefixes=None,
                         _overrideStamp=None):
        oldDoc = oldDoc or self._getDocument(identifier)
        doc, oldDeletedSets, oldDeletedPrefixes = self._getNewDocument(
            identifier, oldDoc=oldDoc)
        newStamp = _overrideStamp if self._importMode else self._newStamp()
        doc.add(LongPoint(STAMP_FIELD, int(newStamp)))
        doc.add(
            StoredField(STAMP_FIELD,
                        BytesRef(JArray('byte')(int_to_bytes(newStamp)))))
        doc.add(NumericDocValuesField(NUMERIC_STAMP_FIELD, int(newStamp)))

        allMetadataPrefixes, allDeletedPrefixes = self._setMetadataPrefixes(
            doc=doc,
            metadataPrefixes=asSet(metadataPrefixes),
            delete=delete,
            deleteInPrefixes=asSet(deleteInPrefixes),
            oldDeletedPrefixes=oldDeletedPrefixes)

        allSets, allDeletedSets = self._setSets(doc=doc,
                                                setSpecs=setSpecs or [],
                                                delete=delete,
                                                deleteInSets=deleteInSets,
                                                oldDeletedSets=oldDeletedSets)
        if delete or (allDeletedSets and allSets == allDeletedSets
                      ) or allMetadataPrefixes == allDeletedPrefixes:
            doc.add(
                StringField(TOMBSTONE_FIELD, TOMBSTONE_VALUE, Field.Store.YES))

        self._writer.updateDocument(Term(IDENTIFIER_FIELD, identifier), doc)
        self._latestModifications.add(str(identifier))
        self.do.signalOaiUpdate(metadataPrefixes=allMetadataPrefixes,
                                sets=allSets,
                                stamp=newStamp)
Ejemplo n.º 7
0
    def testBinaryFieldInIndex(self):

        ft = FieldType()
        ft.setStored(True)

        bytes = JArray('byte')(self.binaryValStored)
        binaryFldStored = StoredField("binaryStored", bytes)
        stringFldStored = Field("stringStored", self.binaryValStored, ft)

        doc = Document()
        doc.add(binaryFldStored)
        doc.add(stringFldStored)

        # test for field count
        self.assertEqual(2, doc.fields.size())

        # add the doc to a ram index
        writer = self.getWriter(
            analyzer=StandardAnalyzer(Version.LUCENE_CURRENT))
        writer.addDocument(doc)
        writer.close()

        # open a reader and fetch the document
        reader = self.getReader()
        docFromReader = reader.document(0)
        self.assert_(docFromReader is not None)

        # fetch the binary stored field and compare it's content with the
        # original one
        bytes = docFromReader.getBinaryValue("binaryStored")
        binaryFldStoredTest = bytes.bytes.string_
        self.assertEqual(binaryFldStoredTest, self.binaryValStored)

        # fetch the string field and compare it's content with the original
        # one
        stringFldStoredTest = docFromReader.get("stringStored")
        self.assertEqual(stringFldStoredTest, self.binaryValStored)

        reader.close()
Ejemplo n.º 8
0
 def readInternal(self, length, pos):
     self.fh.seek(pos)
     return JArray('byte')(self.fh.read(length))
Ejemplo n.º 9
0
    def binary(self, b):

        c = JArray('byte')(b)
        field = StoredField("bin", c)
        v = field.binaryValue().bytes
        assert c == v and b == [a for a in v]
Ejemplo n.º 10
0
 def _checkOneTermReuse(self, a, input, expected):
     self._assertAnalyzesToReuse(a, input, JArray('string')(expected))
Ejemplo n.º 11
0
    def testPayloadsPos0(self):

        writer = self.getWriter(analyzer=TestPayloadAnalyzer())

        doc = Document()
        doc.add(
            Field("content", "a a b c d e a f g h i j a b k k",
                  TextField.TYPE_STORED))
        writer.addDocument(doc)
        reader = writer.getReader()
        writer.close()

        tp = MultiFields.getTermPositionsEnum(reader,
                                              MultiFields.getLiveDocs(reader),
                                              "content", BytesRef("a"))

        count = 0
        self.assert_(tp.nextDoc() != tp.NO_MORE_DOCS)
        # "a" occurs 4 times
        self.assertEqual(4, tp.freq())

        expected = 0
        self.assertEqual(expected, tp.nextPosition())
        self.assertEqual(1, tp.nextPosition())
        self.assertEqual(3, tp.nextPosition())
        self.assertEqual(6, tp.nextPosition())

        # only one doc has "a"
        self.assert_(tp.nextDoc() == tp.NO_MORE_DOCS)

        searcher = self.getSearcher(reader=reader)

        stq1 = SpanTermQuery(Term("content", "a"))
        stq2 = SpanTermQuery(Term("content", "k"))
        sqs = [stq1, stq2]
        snq = SpanNearQuery(sqs, 30, False)

        count = 0
        sawZero = False
        pspans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq)
        while pspans.next():
            payloads = pspans.getPayload()
            sawZero |= pspans.start() == 0

            it = payloads.iterator()
            while it.hasNext():
                count += 1
                it.next()

        self.assertEqual(5, count)
        self.assert_(sawZero)

        spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq)
        count = 0
        sawZero = False
        while spans.next():
            count += 1
            sawZero |= spans.start() == 0

        self.assertEqual(4, count)
        self.assert_(sawZero)

        sawZero = False
        psu = PayloadSpanUtil(searcher.getTopReaderContext())
        pls = psu.getPayloadsForQuery(snq)
        count = pls.size()
        it = pls.iterator()
        while it.hasNext():
            bytes = JArray('byte').cast_(it.next())
            s = bytes.string_
            sawZero |= s == "pos: 0"

        self.assertEqual(5, count)
        self.assert_(sawZero)
Ejemplo n.º 12
0
    def binary(self, b):

        c = JArray('byte')(b)
        field = Field("bin", c, Field.Store.YES)
        v = field.binaryValue
        assert c == v and b == [a for a in v]