def __init__(self, conf): host = conf.get('db', 'host') port = conf.getint('db', 'port') basename = conf.get('db', 'database') self.conn = get_conn(host, port, basename) self.job_co = self.conn['mining_job'] self.task_co = self.conn['mining_task'] self.page_co = self.conn['mining_page']
def __init__(self, conf): self.logger = logging.getLogger("") self.db = get_conn('localhost', 10010, 'news_crawler')
def get_all_seed(self): db = get_conn(self.conf['address'], self.conf['port'],self.conf['db_name']) for seed in db.seed.find(): yield seed
def __init__(self, mining_server, conf): self.logger = logging.getLogger("root") self.conn = get_conn(conf.get("db", "host"), conf.getint("db", "port"), conf.get("db", "database")) self.mining_server = mining_server # self.init_seed() self.init_job()
def __init__(self, conf): self.conn = get_conn(conf.get("db", "host"), conf.getint("db", "port"), conf.get("db", "database")) self.parser = HTMLParser(encoding="utf-8", remove_comments=True, remove_blank_text=True)
def __init__(self, conf): self.url_dedup = URLDedup(conf) self.logger = logging.getLogger("") self.db = get_conn("localhost", 10010, "news_crawler")