コード例 #1
0
ファイル: db_helper.py プロジェクト: shenxiangq/news_crawler
 def __init__(self, conf):
     host = conf.get('db', 'host')
     port = conf.getint('db', 'port')
     basename = conf.get('db', 'database')
     self.conn = get_conn(host, port, basename)
     self.job_co = self.conn['mining_job']
     self.task_co = self.conn['mining_task']
     self.page_co = self.conn['mining_page']
コード例 #2
0
 def __init__(self, conf):
     self.logger = logging.getLogger("")
     self.db = get_conn('localhost', 10010, 'news_crawler')
コード例 #3
0
 def get_all_seed(self):
     db = get_conn(self.conf['address'], self.conf['port'],self.conf['db_name'])
     for seed in db.seed.find():
         yield seed
コード例 #4
0
 def __init__(self, mining_server, conf):
     self.logger = logging.getLogger("root")
     self.conn = get_conn(conf.get("db", "host"), conf.getint("db", "port"), conf.get("db", "database"))
     self.mining_server = mining_server
     # self.init_seed()
     self.init_job()
コード例 #5
0
 def __init__(self, conf):
     self.conn = get_conn(conf.get("db", "host"), conf.getint("db", "port"), conf.get("db", "database"))
     self.parser = HTMLParser(encoding="utf-8", remove_comments=True, remove_blank_text=True)
コード例 #6
0
 def __init__(self, conf):
     self.url_dedup = URLDedup(conf)
     self.logger = logging.getLogger("")
     self.db = get_conn("localhost", 10010, "news_crawler")