def indexing(self, root = settings.ADMINS_ENGINE.mDocumentDirectory, parent = [], docID = 1, parentID = 0, id = 0): realPath = os.path.abspath(root) for i in os.listdir(realPath): path = os.path.join(realPath, i) if os.path.isfile(path): #index this file doc = Document() doc.add(Field('name', ("%s %s" % (' '.join(parent), i)).strip(), self.mFieldTypes['name'])) doc.add(Field('parent', ' '.join(parent), self.mFieldTypes['parent'])) doc.add(Field('id', str(docID), self.mFieldTypes['id'])) doc.add(Field('parentID', str(parentID), self.mFieldTypes['id'])) fd = open(path, 'r') content = fd.read() fd.close() if len(content) > 0: doc.add(Field('content', content, self.mFieldTypes['content'])) self.mIndexWriter.addDocument(doc) ##################### Logging ############################## if IS_DEBUG: nameDebug = AnalyzerDebug.debug(self.mAnalyzers['name'], ("%s %s" % (' '.join(parent), i)).strip()) parentDebug = AnalyzerDebug.debug(self.mAnalyzers['parent'], ' '.join(parent)) contentDebug = AnalyzerDebug.debug(self.mAnalyzers['default'], content) self.mLog = self.mLog + ( "File %s\n {name - %s}: %s\n {parent - %s}: %s\n {content}: %s\n\n" % (path, docID, nameDebug, parentID, parentDebug, contentDebug) ) docID = docID + 1 ################### index sub commands if os.path.isdir(path + ".sub"): parent.append(i) docID = self.indexing(path + ".sub", parent, docID, docID - 1, id + 1) parent.pop() if id == 0: self.mIndexWriter.commit() self.mIndexWriter.close() if IS_DEBUG: loggingBot = LoggingBot(self.mLog, settings.ADMINS_ENGINE.getIndexingLogQueue()) loggingBot.start() self.mLog = "" return docID
def searching (self, strQuery): query = self.mQueryParser.parse(strQuery) ################# testing new scorer #################### #testScorer = ConstantScoreQuery(query) testScorer = CommandScoreQuery(query, strQuery) ######################################################### topDocs = self.mIndexSearcher.search(testScorer, 10) scoreDocs = topDocs.scoreDocs ret = [] for d in scoreDocs: doc = self.mIndexSearcher.doc(d.doc) name = doc.get('name') parent = doc.get('parent') content = doc.get('content') ret.append({ 'name' : name, 'parent' : parent, 'content' : content, 'score' : d.score, }) ################# Debug Query Analyzer ############ if IS_DEBUG: self.mLog = self.mLog + ("%s - %s\n" % (datetime.datetime.now(), strQuery)) self.mLog = self.mLog + (" Parsed Query: <%s>\n" % query.toString()) self.mLog = self.mLog + (" Hits: %s\n MaxScore: %s\n" % (topDocs.totalHits, topDocs.getMaxScore()) ) self.mLog = self.mLog + (" Scorer: %s\n\n" % testScorer ) loggingBot = LoggingBot(self.mLog, settings.ADMINS_ENGINE.getSearchingLogQueue()) loggingBot.start() self.mLog = "" return ret