def SearchExactAll(self, keyword): "블로그 내용과 ID에 대해여 Exact Matching 수행" searcher = lucene.IndexSearcher(self.store) print("Searching for ", keyword) k = keyword.decode('cp949').encode('utf-8') tqBloger = lucene.TermQuery(lucene.Term("bloger", k)) tqContents = lucene.TermQuery(lucene.Term("contents", k)) qBoolean = lucene.BooleanQuery() qBoolean.add(tqBloger, lucene.BooleanClause.Occur.SHOULD) qBoolean.add(tqContents, lucene.BooleanClause.Occur.SHOULD) hits = searcher.search(qBoolean) print ("%s matching documents" % hits.length()) return self.__MakeResultFormat(hits, searcher)
def SearchPrefixContents(self, keyword): "블로그 내용에 대하여 Prefix Matching 수행" searcher = lucene.IndexSearcher(self.store) print("Searching for ", keyword) k = keyword.decode('cp949').encode('utf-8') query = lucene.PrefixQuery( lucene.Term("contents", k) ) hits = searcher.search(query) print ("%s matching documents" % hits.length()) return self.__MakeResultFormat(hits, searcher)
def getHitCount(self, fieldName, searchString): reader = lucene.IndexReader.open(self.dir, True) #readOnly = True print '%s total docs in index' % reader.numDocs() reader.close() searcher = lucene.IndexSearcher(self.dir, True) #readOnly = True t = lucene.Term(fieldName, searchString) query = lucene.TermQuery(t) hitCount = len(searcher.search(query, 50).scoreDocs) searcher.close() print "%s total matching documents for %s\n---------------" \ % (hitCount, searchString) return hitCount
def get_word_list(text, is_list=False, field_name = 'fieldname'): if is_list: new_text = "" for i in text: new_text += i + "\n" text = new_text lucene.initVM(lucene.CLASSPATH) analyzer = lucene.KoreanAnalyzer(); #directory = lucene.FSDirectory.open("/tmp/testindex"); directory = lucene.RAMDirectory() # writer writer = lucene.IndexWriter(directory, analyzer) doc = lucene.Document() doc.add(lucene.Field(field_name, text, lucene.Field.Store.YES, lucene.Field.Index.ANALYZED)); writer.addDocument(doc); writer.close(); # get all terms from all index ireader = lucene.IndexReader.open(directory, False) term = lucene.Term(field_name, '') termenum = ireader.terms(term) term = termenum.term() i = 0 word_list = [] while term and term.field() == field_name: i += 1 termDocs = ireader.termDocs(term) termDocs.next() #print "[%04d]===> <%s> " % (i, term.text()) #print term.text() + " : " + str(termDocs.freq()) word_list.append({'text': term.text(), 'freq': termDocs.freq()}) term = termenum.next() and termenum.term() ireader.close(); directory.close(); return word_list
def handle_noargs(self, **options): siteconfig = SiteConfiguration.objects.get_current() # Refuse to do anything if they haven't turned on search. if not siteconfig.get("search_enable"): sys.stderr.write('Search is currently disabled. It must be ' 'enabled in the Review Board administration ' 'settings to run this command.\n') sys.exit(1) if not have_lucene: sys.stderr.write('PyLucene is required to build the search index.\n') sys.exit(1) incremental = options.get('incremental', True) store_dir = siteconfig.get("search_index_file") if not os.path.exists(store_dir): os.mkdir(store_dir) timestamp_file = os.path.join(store_dir, 'timestamp') timestamp = 0 if incremental: try: f = open(timestamp_file, 'r') timestamp = datetime.utcfromtimestamp(int(f.read())) f.close() except IOError: incremental = False f = open(timestamp_file, 'w') f.write('%d' % time.time()) f.close() if lucene_is_2x: store = lucene.FSDirectory.getDirectory(store_dir, False) writer = lucene.IndexWriter(store, False, lucene.StandardAnalyzer(), not incremental) elif lucene_is_3x: store = lucene.FSDirectory.open(lucene.File(store_dir)) writer = lucene.IndexWriter(store, lucene.StandardAnalyzer(lucene.Version.LUCENE_CURRENT), not incremental, lucene.IndexWriter.MaxFieldLength.LIMITED) else: assert False status = Q(status='P') | Q(status='S') objects = ReviewRequest.objects.filter(status) if incremental: query = Q(last_updated__gt=timestamp) # FIXME: re-index based on reviews once reviews are indexed. I # tried ORing this in, but it doesn't seem to work. # Q(review__timestamp__gt=timestamp) objects = objects.filter(query) if sys.stdout.isatty(): print 'Creating Review Request Index' totalobjs = objects.count() i = 0 prev_pct = -1 for request in objects: try: # Remove the old documents from the index if incremental: writer.deleteDocuments(lucene.Term('id', str(request.id))) self.index_review_request(writer, request) if sys.stdout.isatty(): i += 1 pct = (i * 100 / totalobjs) if pct != prev_pct: sys.stdout.write(" [%s%%]\r" % pct) sys.stdout.flush() prev_pct = pct except Exception, e: sys.stderr.write('Error indexing ReviewRequest #%d: %s\n' % \ (request.id, e))
def testDelete(self, fieldName, searchString): reader = lucene.IndexReader.open(self.dir, False) #readOnly = False reader.deleteDocuments(lucene.Term(fieldName, searchString)) reader.close()