def test_same_logger(self): """ singleton test """ myLogger = mslogger.MSLogger() myLogger2 = mslogger.MSLogger() self.assertEqual(myLogger, myLogger2)
def __init__(self, configfile): """ the invoker should make sure the configfile is exists! """ self.__logger = mslogger.MSLogger().getLogger() self.__config = None self.__spiderSection = "spider" self.__urlListFile = "" self.__outputDirectory = "" self.__maxDepth = "" self.__crawlInterval = "" self.__crawlTimeout = "" self.__targetUrl = "" self.__threadCount = 0 if not os.path.exists(configfile): msg = "file " + configfile + " not exist!" self.__logger.error(msg) return self.__config = configparser.ConfigParser( inline_comment_prefixes=(';')) self.__logger.info("configfile:" + configfile) self.__config.read(configfile, "utf-8") self.__spiderSection = "spider" self.__urlListFile = self.getSpiderConfig("url_list_file") self.__outputDirectory = self.getSpiderConfig("output_directory") self.__maxDepth = self.getSpiderConfig("max_depth") self.__crawlInterval = self.getSpiderConfig("crawl_interval") self.__crawlTimeout = self.getSpiderConfig("crawl_timeout") self.__targetUrl = self.getSpiderConfig("target_url") self.__threadCount = self.getSpiderConfig("thread_count")
def __init__(self, seedfile): """the invoker should make sure the seedfile is exist""" # the file which have the seed sites self.__seedfile = seedfile # the unvisite sites self.__unvistedUrls = queue.Queue() # size is infinite # the visited sites self.__visitedUrls = [] self.__logger = mslogger.MSLogger().getLogger() self.lock = threading.Lock() self._read_seed_file() self.config = msconfigparser.MSConfigParser(msconfigparser.default_cf)
def __init__(self, argv): parser = argparse.ArgumentParser() parser.add_argument("-c", "--config", default="./spider.conf", dest="configfile", help="indicate the config file") parser.add_argument("-v", "--version", action="version", version="version 0.0.0", help="show the version") self.__args = parser.parse_args(argv) self.__logger = mslogger.MSLogger().getLogger()
def __init__(self, urlstore): threading.Thread.__init__(self) self.__urlstore = urlstore self.__logger = mslogger.MSLogger().getLogger() self.config = msconfigparser.MSConfigParser(msconfigparser.default_cf)
def __init__(self, url, depth): self.__url = url self.__depth = depth self.__logger = mslogger.MSLogger().getLogger() self.__more_urls = [] self.config = msconfigparser.MSConfigParser("")