예제 #1
0
 def get_coll_termvector(self, field):
     """ Returns collection term vector for the given field."""
     self.open_reader()
     fields = MultiFields.getFields(self.reader)
     if fields is not None:
         terms = fields.terms(field)
         if terms:
             termenum = terms.iterator(None)
             for bytesref in BytesRefIterator.cast_(termenum):
                 yield bytesref.utf8ToString(), termenum
예제 #2
0
파일: TwIndexer.py 프로젝트: skopp002/cs242
def index_scan():
    print("Scanning the index")
    #pdb.set_trace()
    indexPath = File("indexOut/").toPath()
    indexDir = FSDirectory.open(indexPath)
    reader = DirectoryReader.open(indexDir)
    fields = MultiFields.getFields(reader)
    for field in fields:
        term = MultiFields.getTerms(reader,field)
        print(field, "->" , term)
 def get_coll_termvector(self, field):
     """ Returns collection term vector for the given field."""
     self.open_reader()
     fields = MultiFields.getFields(self.reader)
     if fields is not None:
         terms = fields.terms(field)
         if terms:
             termenum = terms.iterator(None)
             for bytesref in BytesRefIterator.cast_(termenum):
                 yield bytesref.utf8ToString(), termenum
예제 #4
0
 def fieldnames(self):
     indexAndTaxonomy = self._indexAndTaxonomy
     fieldnames = []
     fields = MultiFields.getFields(indexAndTaxonomy.searcher.getIndexReader())
     if fields is None:
         return fieldnames
     iterator = fields.iterator()
     while iterator.hasNext():
         fieldnames.append(iterator.next())
     return fieldnames
예제 #5
0
 def getTFForField(self, field):
     tfs = {}
     fields = MultiFields.getFields(self.reader)
     terms = fields.terms(field)
     enum = BytesRefIterator.cast_(terms.iterator(None))
     try:
       while enum.next():
             termval = TermsEnum.cast_(enum)
             termString  = termval.term().utf8ToString()
             freq    = self.reader.totalTermFreq(Term(field, termString))
             tfs[termString] = freq
     except:
         pass
     return tfs