Ejemplo n.º 1
0
 def __init__(self, conf):
     host = conf.get('db', 'host')
     port = conf.getint('db', 'port')
     basename = conf.get('db', 'database')
     self.conn = get_conn(host, port, basename)
     self.job_co = self.conn['mining_job']
     self.task_co = self.conn['mining_task']
     self.page_co = self.conn['mining_page']
Ejemplo n.º 2
0
 def __init__(self, conf):
     self.logger = logging.getLogger("")
     self.db = get_conn('localhost', 10010, 'news_crawler')
Ejemplo n.º 3
0
 def get_all_seed(self):
     db = get_conn(self.conf['address'], self.conf['port'],self.conf['db_name'])
     for seed in db.seed.find():
         yield seed
Ejemplo n.º 4
0
 def __init__(self, mining_server, conf):
     self.logger = logging.getLogger("root")
     self.conn = get_conn(conf.get("db", "host"), conf.getint("db", "port"), conf.get("db", "database"))
     self.mining_server = mining_server
     # self.init_seed()
     self.init_job()
Ejemplo n.º 5
0
 def __init__(self, conf):
     self.conn = get_conn(conf.get("db", "host"), conf.getint("db", "port"), conf.get("db", "database"))
     self.parser = HTMLParser(encoding="utf-8", remove_comments=True, remove_blank_text=True)
Ejemplo n.º 6
0
 def __init__(self, conf):
     self.url_dedup = URLDedup(conf)
     self.logger = logging.getLogger("")
     self.db = get_conn("localhost", 10010, "news_crawler")