def main(): results={} global luceneSearcher #logging instantiate logPath = PATH+'/luceneSearcher.log' log('luceneSearcher', logPath) luceneSearcher = logging.getLogger('luceneSearcher') luceneSearcher.info('Processing Lucene...') configFile = '/config.cfg' pathVector = readData(configFile, '=') expectedResultsString='' for i in range(0, len(pathVector)): if pathVector[i][0] == "DOCS": dictionary=readXML(PATH+str(pathVector[i][1]).strip()) for k in dictionary.keys(): indexer(k, dictionary[k]) elif pathVector[i][0] == "QUERIES": queries = readData(str(pathVector[i][1]).strip(), ';') for q in queries: results.update({q[0].replace(' ',''): retriever(q[0].replace(' ',''), q[1])}) elif pathVector[i][0] == "EXPECTED": expectedResultsString = readData(str(pathVector[i][1]).strip(), ';') executeEvaluator(PATH, '/REPORT.txt', expectedResultsString, results)
def __init__(self,urllist = []): self.spider = spider() self.indexer = indexer() self.parser = parser() self.urllist = urllist
self.spider = spider() self.indexer = indexer() self.parser = parser() self.urllist = urllist def start(self): if len(self.urllist) == 0: return False self.spider.addurllist(self.urllist) self.spider.setparser(self.parser) self.spider.setindexer(self.indexer) spider.run() return True def cleanup(self): self.indexer.closedb() if __name__ == "__main__": spider = spider() #spider.addurl('http://localhost:9080/setest/test.php') spider.addurl('http://hq.booksarefun.com/') parserobj = parser() indexobj = indexer() spider.setparser(parserobj) spider.setindexer(indexobj) spider.run() indexobj.closedb() print 'done!'
def __init__(self, urllist=[]): self.spider = spider() self.indexer = indexer() self.parser = parser() self.urllist = urllist