def get_coll_termvector(self, field): """ Returns collection term vector for the given field.""" self.open_reader() fields = MultiFields.getFields(self.reader) if fields is not None: terms = fields.terms(field) if terms: termenum = terms.iterator(None) for bytesref in BytesRefIterator.cast_(termenum): yield bytesref.utf8ToString(), termenum
def index_scan(): print("Scanning the index") #pdb.set_trace() indexPath = File("indexOut/").toPath() indexDir = FSDirectory.open(indexPath) reader = DirectoryReader.open(indexDir) fields = MultiFields.getFields(reader) for field in fields: term = MultiFields.getTerms(reader,field) print(field, "->" , term)
def fieldnames(self): indexAndTaxonomy = self._indexAndTaxonomy fieldnames = [] fields = MultiFields.getFields(indexAndTaxonomy.searcher.getIndexReader()) if fields is None: return fieldnames iterator = fields.iterator() while iterator.hasNext(): fieldnames.append(iterator.next()) return fieldnames
def getTFForField(self, field): tfs = {} fields = MultiFields.getFields(self.reader) terms = fields.terms(field) enum = BytesRefIterator.cast_(terms.iterator(None)) try: while enum.next(): termval = TermsEnum.cast_(enum) termString = termval.term().utf8ToString() freq = self.reader.totalTermFreq(Term(field, termString)) tfs[termString] = freq except: pass return tfs