def WebDimension(): index = IndexManager.get_proxy() x, y = lib.split(lib.dimensions(index.get_depth_stat())) plt.title('web dimensions') plt.plot(x, y, 'b', x, y , 'bo') plt.xlabel('depth') plt.ylabel('dimension') plt.show()
def __init__(self, config): self._index = IndexManager.get_proxy() self._index.clear() self._parser = Parser() self._tocrawl = LinkQueue(self._index) self._tocrawl.extend(config.seed, -1) self._thread_count = config.thread_count self._delay = config.delay self._index_pages = config.index_pages
def TestCrawler(): index = IndexManager.get_proxy() dimension = [20, 10, 2] x, y = lib.split(lib.get_pages(dimension)) crawler_x, crawler_y = lib.split(index.get_depth_stat()) plt.title('crawler verification, dimensions: %s' % dimension) plt.plot(x, y, 'b') plt.plot(crawler_x, crawler_y, 'ro') plt.xlabel('depth') plt.ylabel('crawl frontier') plt.show()
html.header() query = form.getvalue('q') if not query: query = '' print """ <div/> <a href="index.py"><b>search</b></a> <a href="statistic.py">statistic</a> <a href="web.py">web model</a> <div><br/> <form method="post"> <input name="q" value="%s"> <input name="s" type="submit" value="search"> </form> """ % query try: index = IndexManager.get_proxy() if query: for url in index.lookup(query): print '<div>' html.a(url, url) print '</div>' except socket.error: html.div("can't connect to DataStorage") html.footer()
from multiprocessing import freeze_support from search_engine.index.api import IndexManager import logging if __name__ == '__main__': freeze_support() logging.info('starting DataStorage') manager = IndexManager.default() s = manager.get_server() s.serve_forever()