def _getFullStrings(self): mergePolicy = LogDocMergePolicy() mergePolicy.setMergeFactor(97) directory = RAMDirectory() self.dirs.append(directory) writer = self.getWriter(directory=directory, analyzer=SimpleAnalyzer(Version.LUCENE_CURRENT), maxBufferedDocs=4, mergePolicy=mergePolicy) onlyStored = FieldType() onlyStored.setStored(True) fixedLen = self.getRandomNumber(2, 8) fixedLen2 = self.getRandomNumber(1, 4) for i in xrange(NUM_STRINGS): doc = Document() num = self.getRandomCharString(self.getRandomNumber(2, 8), 48, 52) doc.add(Field("tracer", num, onlyStored)) doc.add(StringField("string", num, Field.Store.NO)) if self.supportsDocValues: if self.dvStringSorted: doc.add(SortedDocValuesField("string_dv", BytesRef(num))) else: doc.add(BinaryDocValuesField("string_dv", BytesRef(num))) num2 = self.getRandomCharString(self.getRandomNumber(1, 4), 48, 50) doc.add(StringField("string2", num2, Field.Store.NO)) if self.supportsDocValues: if self.dvStringSorted: doc.add(SortedDocValuesField("string2_dv", BytesRef(num2))) else: doc.add(BinaryDocValuesField("string2_dv", BytesRef(num2))) doc.add(Field("tracer2", num2, onlyStored)) for f2 in doc.getFields(): if f2.fieldType().indexed() and not f2.fieldType().omitNorms(): Field.cast_(f2).setBoost(2.0) numFixed = self.getRandomCharString(fixedLen, 48, 52) doc.add(Field("fixed_tracer", numFixed, onlyStored)) doc.add(StringField("string_fixed", numFixed, Field.Store.NO)) if self.supportsDocValues: if self.dvStringSorted: doc.add(SortedDocValuesField("string_fixed_dv", BytesRef(numFixed))) else: doc.add(BinaryDocValuesField("string_fixed_dv", BytesRef(numFixed))) num2Fixed = self.getRandomCharString(fixedLen2, 48, 52) doc.add(StringField("string2_fixed", num2Fixed, Field.Store.NO)) if self.supportsDocValues: if self.dvStringSorted: doc.add(SortedDocValuesField("string2_fixed_dv", BytesRef(num2Fixed))) else: doc.add(BinaryDocValuesField("string2_fixed_dv", BytesRef(num2Fixed))) doc.add(Field("tracer2_fixed", num2Fixed, onlyStored)) for f2 in doc.getFields(): if f2.fieldType().indexed() and not f2.fieldType().omitNorms(): Field.cast_(f2).setBoost(2.0) writer.addDocument(doc) writer.close() return self.getSearcher(directory=directory)
def _getIndex(self, even, odd): mergePolicy = LogDocMergePolicy() mergePolicy.setMergeFactor(1000) directory = RAMDirectory() self.dirs.append(directory) writer = self.getWriter(directory=directory, analyzer=SimpleAnalyzer(Version.LUCENE_CURRENT), maxBufferedDocs=2, mergePolicy=mergePolicy) if self.dvStringSorted: # Index sorted stringDVType = FieldInfo.DocValuesType.SORTED elif self.notSorted: # Index non-sorted stringDVType = FieldInfo.DocValuesType.BINARY else: # sorted anyway stringDVType = FieldInfo.DocValuesType.SORTED ft1 = FieldType() ft1.setStored(True) ft2 = FieldType() ft2.setIndexed(True) for i in xrange(len(self.data)): if (i % 2 == 0 and even) or (i % 2 == 1 and odd): doc = Document() doc.add(Field("tracer", self.data[i][0], ft1)) doc.add(TextField("contents", self.data[i][1], Field.Store.NO)) if self.data[i][2] is not None: doc.add(StringField("int", self.data[i][2], Field.Store.NO)) if self.supportsDocValues: doc.add(NumericDocValuesField("int_dv", Long.parseLong(self.data[i][2]))) if self.data[i][3] is not None: doc.add(StringField("float", self.data[i][3], Field.Store.NO)) if self.supportsDocValues: doc.add(FloatDocValuesField("float_dv", Float.parseFloat(self.data[i][3]))) if self.data[i][4] is not None: doc.add(StringField("string", self.data[i][4], Field.Store.NO)) if self.supportsDocValues: if stringDVType == FieldInfo.DocValuesType.SORTED: doc.add(SortedDocValuesField("string_dv", BytesRef(self.data[i][4]))) elif stringDVType == FieldInfo.DocValuesType.BINARY: doc.add(BinaryDocValuesField("string_dv", BytesRef(self.data[i][4]))) else: raise ValueError("unknown type " + stringDVType) if self.data[i][5] is not None: doc.add(StringField("custom", self.data[i][5], Field.Store.NO)) if self.data[i][6] is not None: doc.add(StringField("i18n", self.data[i][6], Field.Store.NO)) if self.data[i][7] is not None: doc.add(StringField("long", self.data[i][7], Field.Store.NO)) if self.data[i][8] is not None: doc.add(StringField("double", self.data[i][8], Field.Store.NO)) if self.supportsDocValues: doc.add(NumericDocValuesField("double_dv", Double.doubleToRawLongBits(Double.parseDouble(self.data[i][8])))) if self.data[i][9] is not None: doc.add(StringField("short", self.data[i][9], Field.Store.NO)) if self.data[i][10] is not None: doc.add(StringField("byte", self.data[i][10], Field.Store.NO)) if self.data[i][11] is not None: doc.add(StringField("parser", self.data[i][11], Field.Store.NO)) for f in doc.getFields(): if f.fieldType().indexed() and not f.fieldType().omitNorms(): Field.cast_(f).setBoost(2.0) writer.addDocument(doc) reader = writer.getReader() writer.close() return self.getSearcher(reader=reader)