def anhuiTester(self): pinyin = "anhui" db_inst = DBManager.getInstance("ssdb", 'jyyc_%s' % pinyin, host="spider5", port=57888) handler = AnhuiJyycHandler(pinyin) self.testFromSSDB(db_inst, handler)
def heilongjiangTester(self): pinyin = "heilongjiang" db_inst = DBManager.getInstance("ssdb", 'jyyc_%s' % pinyin, host="spider5", port=57888) handler = HeilongjiangJyycHandler(pinyin) self.testFromSSDB(db_inst, handler)
def jiangsuTester(self): pinyin = "jiangsu" db_inst = DBManager.getInstance("ssdb", 'jyyc_%s' % pinyin, host="spider5", port=57888) handler = JiangsuJyycHandler(pinyin) self.testFromSSDB(db_inst, handler)
def beijingNbTest(self): pinyin = "beijing" db_inst = DBManager.getInstance("ssdb", "%s_nbxx" % pinyin, host="spider5", port=57888) row_key = "473dff8aacd4ab651b932bc8a3bbfda3|_|北京崇尚兴业商贸有限公司|_|110108010048185|_|2016-06-23|_|beijing|_|2015" handler = BeijingNbHandler(pinyin) self.testFromSSDB(db_inst, row_key, handler)
def shanghaiNbTest(self): pinyin = "shanghai" db_inst = DBManager.getInstance("ssdb", "%s_nbxx" % pinyin, host="spider5", port=57888) row_key = "70198bb285bc3e74898ed926a54aa5fa|_|上海佳吉快运有限公司|_|913101186074971991|_|2016-06-12|_|shanghai|_|2015" handler = ShanghaiNbHandler(pinyin) self.testFromSSDB(db_inst, row_key, handler)
def guizhouNbTest(self): pinyin = "guizhou" db_inst = DBManager.getInstance("ssdb", "new_%s_nbxx" % pinyin, host="spider5", port=57888) row_key = "0ad1548ffe8ba00864126cc2c2a22619|_|锦屏县锦顺出租汽车有限公司|_|522628000053658|_|2016-06-10|_|guizhou|_|2015" handler = GuizhouNbHandler(pinyin) self.testFromSSDB(db_inst, row_key, handler)
def testBySeed(crawler, pinyin, seed): CrawlerTester.pinyin = pinyin CrawlerTester.seed_dict = seed CrawlerTester.db_inst = DBManager.getInstance("ssdb", "jyyc_" + CrawlerTester.pinyin, host="spider5", port=57888) return crawler.crawl(CrawlerTester.seed_dict['page'])
def testByKeyword(crawler, pinyin, keyword): CrawlerTester.pinyin = pinyin CrawlerTester.seed_dict = {"name": keyword} CrawlerTester.db_inst = DBManager.getInstance("ssdb", "new_" + CrawlerTester.pinyin, host="spider5", port=57888) return crawler.crawl(CrawlerTester.seed_dict['name'])
def __init__(self, queue_name): self.__db = DBManager.getInstance(f(seed_cf, 'seed_db', 'type'), queue_name, port=f(seed_cf, 'seed_db', 'port'), host=f(seed_cf, 'seed_db', 'host')) #self.__solr = DBManager.getInstance('solr','seed',server=["spider7:8983","spider7:8984","spider7:8985"]) self.queue_name = queue_name from CommonLib.Logging import Logging self.log = Logging(name=queue_name) self.__data_dic = {}
def jiangxiTest(self): pinyin = "jiangxi" db_inst = DBManager.getInstance("ssdb", "new_" + pinyin, host="spider5", port=57888) row_key = "" row_key = "5994e3d1afbf82a9e526efae797d02db|_|乐平市新睦水稻种植专业合作社|_|jiangxi|_|2016-05-25" handler = JiangxiHandler(pinyin) self.testFromSSDB(db_inst, row_key, handler)
def guangdongNbTest(self): pinyin = "guangdong" db_inst = DBManager.getInstance("ssdb", "%s_nbxx" % pinyin, host="spider5", port=57888) row_key = "23b441b3d17a26ef2e06ba79a4ed676a|_|广州广之旅国际旅行社股份有限公司|_|914401011904322413|_|2016-06-13|_|guangdong|_|2015" row_key = "b64ffc239d74dc4ad0a59cd4f6218e27|_|佛山市南湖国际旅行社股份有限公司|_|91440604776910212C|_|2016-06-13|_|guangdong|_|2014" handler = GuangdongNbHandler(pinyin) self.testFromSSDB(db_inst, row_key, handler)
def chongqingTest(self): pinyin = "chongqing" db_inst = DBManager.getInstance("ssdb", "new_" + pinyin, host="spider5", port=57888) row_key = "" row_key = "fc46237ff1f403b39ad199502cd338a3|_|武隆县仁武酒业有限公司|_|chongqing|_|2016-05-30" handler = ChongqingHandler(pinyin) self.testFromSSDB(db_inst, row_key, handler)
def guangdongTest(self): pinyin = "guangdong" db_inst = DBManager.getInstance("ssdb", "new_%s" % pinyin, host="spider5", port=57888) row_key = "8effa8ebede5e87faa8157661c8d6555|_|广州顺丰速运有限公司|_|914401017248329968|_|2016-06-13|_|guangdong" # row_key = "23b441b3d17a26ef2e06ba79a4ed676a|_|广州广之旅国际旅行社股份有限公司|_|914401011904322413|_|2016-06-13|_|guangdong" row_key = "abb92dbbfaf77adafd1e98ddb100d076|_|佛山市南湖国际旅行社股份有限公司|_|91440604776910212C|_|2016-06-14|_|guangdong" handler = GuangdongHandler(pinyin) self.testFromSSDB(db_inst, row_key, handler)
def beijingTest(self): pinyin = "beijing" db_inst = DBManager.getInstance("ssdb", pinyin, host="spider5", port=57888) #row_key = "6fbb174d364fdf67fdb96cab6048db11|_|北京艺海佳景广告有限公司|_|beijing|_|2016-05-21" #row_key = "8d32a8b1d67d1f1d6a165c5577ac3efb|_|北京盛德东兴投资管理公司|_|beijing|_|2016-05-25" row_key = "dc3094f66fa56ffed50955b3b149cfa7|_|中国光大银行股份有限公司|_|beijing|_|2016-05-25" row_key = "6203925f878305c2f1f5be5a80434e0d|_|北京伊美尔长岛医学美容门诊部有限公司|_|91110108797596955A|_|2016-06-16|_|beijing" handler = BeijingHandler(pinyin) self.testFromSSDB(db_inst, row_key, handler)
def jilinTest(self): pinyin = "jilin" db_inst = DBManager.getInstance("ssdb", "new_" + pinyin, host="spider5", port=57888) # row_key = "66be9a1cbec45fd17e281324fca7f2fc|_|延边爱丽思鞋业有限公司|_|jilin|_|2016-05-25" # row_key = "4129cb0108048b80faf34503cad6ecc9|_|延边华侨旅游侨汇服务公司|_|jilin|_|2016-05-25" row_key = "" row_key = "f1c17231377bb8c56591ba774c2aca56|_|中国旅游服务公司吉林省公司|_|jilin|_|2016-05-25" handler = JilinHandler(pinyin) self.testFromSSDB(db_inst, row_key, handler)
def rename(): """ 把 ssdb 里面hash 重命名,把旧的拷贝到新的里面 :return: """ # name_list = ["beijing"] q_name = "new_beijing" db_inst = DBManager.getInstance("ssdb", q_name, host="spider5", port=57888) # rowkey = 4 rk = u"4ab61b0438638de25f6a68ba9b2834a5|_|北京梅牡易贷科技服务有限公司|_|beijing|_|2016-05-25" src_dic = db_inst.hget(rk) print src_dic with open("ttt.txt", "w") as f: f.write(src_dic)
class QYXX(Resource): __db = DBManager.getInstance(_type, 'name', host=_host, port=_port) threading_proxy = th() threading_proxy.start() def getChild(self, path, request): if path == "": return self else: return NotFount() return Resource.getChild(path, request) def _render_write(self, request, res): request.write(str(res)) request.finish() def getip(self, request): #url:http://spider7:9876/qyxx?area=jiangsu&last=127.0.0.1:8080 #url:http://127.0.0.1:9876/qyxx?area=jiangsu&last=127.0.0.1:8080 argss = request.args area = argss["area"][0] if "area" in argss else "common" last = argss["last"][0] if "area" in argss else None last = networkSegment(area, last) def _get_proxy(): if last: self.__db.keyDel(area + '_' + last) self.__db.changeTable('%s_bbd_white_proxy' % area) bbd_proxy = self.__db.get() if bbd_proxy: name = area + '_' + networkSegment(area, bbd_proxy) self.__db.keySetx(name, 300, ttl=300) self.__db.hincrHash(name, 'now_num') self.__db.multi_hsetHash(name, uptime=int(float(time.time()))) return bbd_proxy self.__db.changeTable('%s_white_proxy' % area) buy_proxy = self.__db.get() if buy_proxy: return buy_proxy return self.threading_proxy.get_proxy() deferToThread(_get_proxy).addCallback( lambda x: self._render_write(request, x)) def render_GET(self, request): self.getip(request) return NOT_DONE_YET
def __init__(self, queue_name, sub_type, get_db_dict=None, save_db_dict=None): # config = __import__("FetchConfig") # get_db_dict = config.QYXX_GET_DB # save_db_dict = config.QYXX_PUT_DB self.logger = Logging(__name__) # queue_name = queue_name # for debug self.__get_db = DBManager.getInstance(get_db_dict["type"], queue_name, port=get_db_dict["port"], host=get_db_dict["host"]) # self.__save_db = DBManager.getInstance(get_db_dict["type"], # queue_name, # port = get_db_dict["port"], # host = get_db_dict["host"]) self.queue_name = queue_name self.__data_dic = {}
def __init__(self): self.__db = DBManager.getInstance(_type, 'buy_tba_proxy_white_proxy', host=_host, port=_port)
def __init__(self): self.__ssdb = DBManager.getInstance('ssdb', 'test', port=port, host=host)
n += 1 text = text.replace(u':', ':') texts = text.split(':') if texts[0] == text: if n == 1: dict_[u'top_企业名称'] = text.strip() else: if len(texts) >= 2: dict_[u'top_' + texts[0].strip()] = texts[1].strip() except: self.log.info(u"获取top信息异常") return dict_ def testFromSSDB(db_inst, row_key): html_dict_str = db_inst.hget(row_key) if not html_dict_str: print(u"从SSDB获取数据失败!") return handler = HeilongjiangHandler("qinghai") html_dict = json.loads(html_dict_str) handler.parse(html_dict) if __name__ == "__main__": db_inst = DBManager.getInstance("ssdb", "heilongjiang", host="spider5", port=57888) row_key = "d833488218278803eadb28d469cd6257|_|黑龙江柏杉林木业有限公司|_|91230184690719556X|_|2016-06-21|_|heilongjiang" testFromSSDB(db_inst, row_key) pass
titles = h2_text.split() if len(titles) >= 2: dict_[u'top_公司名称'] = titles[0] top2s = titles[1].split(u":") if len(top2s) == 2: dict_[u'top_' + top2s[0].strip()] = top2s[1].strip() except: self.log.info(u"获取top信息异常") dict_ = dict() return dict_ def testFromSSDB(db_inst, row_key): html_dict_str = db_inst.hget(row_key) if not html_dict_str: print(u"从SSDB获取数据失败!") return handler = QinghaiHandler("qinghai") html_dict = json.loads(html_dict_str) handler.parse(html_dict) if __name__ == "__main__": db_inst = DBManager.getInstance("ssdb", "qinghai", host="spider5", port=57888) row_key = "62f434b12b8f3287f948e048da718882|_|西部矿业集团有限公司|_|9163000071040638XJ|_|2016-06-21|_|qinghai" testFromSSDB(db_inst, row_key) pass
def work(bbd_type, value_list=None): conf_file = "DBConfig.ini" db_conf_dict = \ { 'type':confGetterFunc(conf_file, 'html_db', 'type').lower(), 'host':confGetterFunc(conf_file, 'html_db', 'host').lower(), 'port':int(confGetterFunc(conf_file, 'html_db', 'port')) } def getNbxxDict(src_dict): nbxx_key_list = filter(lambda x: x.startswith("qynb_"), src_dict.keys()) nbxx_list = map(lambda x: {x: src_dict.pop(x)}, nbxx_key_list) return nbxx_list def getYear(nb_dict): key = nb_dict.keys()[0] year = key.split("_")[1] return year def storeResult(src_dict, company_dict=None): """ 回调函数,由爬虫调用,存储数据到ssdb :param src_dict: :param company_dict: :return: """ try: if src_dict["status"] == 0: src_dict = UniField.unifyRequestResult(src_dict, bbd_type) if src_dict.has_key("rowkey"): rowkey = src_dict["rowkey"] nbxx_list = getNbxxDict(src_dict) nb_year_list = [] # 用来向solr接口发送信息 for nb_item in nbxx_list: # 拆分年报成单独的数据条目,使用不同的rowkey, 放入hash year = getYear(nb_item) nb_year_list.append(year) nbxx_dict = UniField.cloneNeedColumns(src_dict) nbxx_dict.update({"bbd_seed": bbd_seed_dict}) nbxx_dict.update(nb_item) db_inst.changeTable(bbd_type + "_nbxx") nb_rk = rowkey + "|_|" + year nbxx_dict["rowkey"] = nb_rk nbxx_dict["year"] = year db_inst.hset(nb_rk, nbxx_dict) log.info(u"存储 %s 年年报 成功,rowkey 为 [ %s ]", year, nb_rk) zch = src_dict["rowkey_dict"]["company_zch"] company_name = src_dict["rowkey_dict"]["company_name"] log_info = get_logs(STATE.BBD_SEED_IS_CRAWL_ING, bbd_seed_dict) log.info(log_info) src_dict.update({"bbd_seed": bbd_seed_dict}) db_inst.changeTable(bbd_type) db_inst.save(src_dict) log.info(u" ,rowkey 为 [ %s ]", rowkey) NbxxApiControler().nbUpdate(company_name=company_name, pinyin=bbd_type, zch=zch, years_list=nb_year_list) else: raise Exception("No rowkey") else: db_inst.changeTable(bbd_type + "_error") db_inst.save(src_dict) except Exception as e: log.info(str(e)) db_inst.changeTable(bbd_type + "_error") db_inst.save(src_dict) log.info(u"存储抓取网页原文 失败,rowkey 为 [ %s ]", rowkey) def crawlerKeyWordList(keyword_list): """ 一次抓取关键词,如果第一个抓不到,尝试第二个,如果最后一个还是没成功,记录种子信息到ssdb :param keyword_list: :return: """ try: keyword_num = len(keyword_list) for keyword in keyword_list: keyword_num -= 1 seed_status = inst.crawl(keyword) if seed_status.access_type == SeedAccessType.OK: # 状态成功,打印消息 # log.info("End seed with keyword %s", keyword) log_info = get_logs(STATE.BBD_SEED_IS_CRAWL_SUC, bbd_seed_dict) log.info(log_info) log.info(u"种子抓取成功:)") break elif seed_status.access_type != SeedAccessType.OK and keyword_num > 0: # log_info = get_logs(STATE.BBD_SEED_IS_CRAWL_ERO, bbd_seed_dict) log.info(u"种子抓取失败,关键字 [%s]", keyword) continue else: seed.update(status=seed_status.access_type) log_info = get_logs(STATE.BBD_SEED_IS_CRAWL_ERO, bbd_seed_dict) log.info(log_info) log.info(u"种子抓取失败,存储到队列,种子状态为 %s", str(seed_status)) seed.save() except Exception as e: log.info(str(e)) raise Exception(u"种子抓取过程中遇到异常") ################################################################################################################################## try: from CommonLib.Logging import Logging log = Logging(name=bbd_type) log.info("Process begin for %s,logger=%s", bbd_type, str(log)) module_name = "Crawler" + bbd_type.capitalize() bbd_type = bbd_type.lower() inst = ClassFactory.getClassInst(module_name, package_name="qyxx_all", pinyin=bbd_type, callbackFromOuterControl=storeResult) db_inst = DBManager.getInstance(db_conf_dict["type"], bbd_type, host=db_conf_dict["host"], port=db_conf_dict["port"]) bbd_seed_dict = {} if value_list: for keywd_list in value_list: crawlerKeyWordList(keywd_list) else: seed = Seed(bbd_type) while True: seed.get() bbd_seed_dict = seed.getDict() log_info = get_logs(STATE.BBD_SEED_IS_CRAWL_ING, bbd_seed_dict) log.info("starting a new seed %s", log_info) if seed.url_status: seed_status = inst.crawlUrl(seed.url, seed.name) if seed_status.access_type == SeedAccessType.OK: # 状态成功,打印消息 log_info = get_logs(STATE.BBD_SEED_IS_CRAWL_SUC, bbd_seed_dict) log.info(log_info) else: # 用url没有抓成功, 用keywordlist 抓 log.info(" Url get company info failed [%s]", bbd_type) keyword_list = seed.values crawlerKeyWordList(keyword_list) else: keyword_list = seed.values crawlerKeyWordList(keyword_list) except Exception as e: log.info(str(e)) seed.save() raise Exception(e)
def __init__(self): self.dict_ = {} self.__db = DBManager.getInstance(_type, 'name', host=_host, port=_port)
n += 1 text = text.replace(u':', ':') texts = text.split(':') if texts[0] == text: if n == 1: dict_[u'top_企业名称'] = text.strip() else: if len(texts) == 2: dict_[u'top_' + texts[0].strip()] = texts[1].strip() except: self.log.info(u"获取top信息异常") return dict_ def testFromSSDB(db_inst, row_key): html_dict_str = db_inst.hget(row_key) if not html_dict_str: print(u"从SSDB获取数据失败!") return handler = ShanghaiHandler("shanghai") html_dict = json.loads(html_dict_str) handler.parse(html_dict) if __name__ == "__main__": db_inst = DBManager.getInstance("ssdb", "new_shanghai_data", host="spider5", port=57888) row_key = "c97ec20e493f366be44508f44001a583|_|上海乾辉工贸有限公司分公司|_|shanghai|_|2016-05-22" testFromSSDB(db_inst, row_key) pass
def __init__(self): self.__db = DBManager.getInstance(f('type'), f('table'), server=f('server'))
def work(pro_type, seed=None): def storeResult(src_dict, company_dict=None): # if company_dict.has_key(u"名称"): # src_dict.update({"company_name": company_dict[u"名称"]}) # src_dict.update({"values":company_dict}) src_dict = UniField.unifyRequestResult(src_dict, pro_type) if src_dict.has_key("rowkey"): rowkey = src_dict["rowkey"] print "统一字段后 rowkey=", rowkey src_dict.update({"BBD_SEED": seed.getDict()}) if src_dict["status"] == 0: db_inst.changeTable("new_" + pro_type) db_inst.hset(rowkey, src_dict) db_inst.save(src_dict) else: db_inst.changeTable("new_" + pro_type + "_error") db_inst.hset(rowkey, src_dict) db_inst.save(src_dict) print "rowkey=", rowkey else: print "No Rowkey ,抓取后的结果为:", src_dict def crawlerKeyWordList(keyword_list): """ 一次抓取关键词,如果第一个抓不到,尝试第二个,如果最后一个还是没成功,记录种子信息到ssdb :param keyword_list: :return: """ keyword_num = len(keyword_list) for keyword in keyword_list: seed_status = inst.crawl(keyword) if seed_status.access_type == SeedAccessType.OK: # 状态成功,打印消息 # log.info("End seed with keyword %s", keyword) log_info = get_logs(STATE.BBD_SEED_IS_CRAWL_SUC, seed.getDict()) log.info(log_info) break elif seed_status.access_type != SeedAccessType.OK and keyword_num > 0: keyword_num -= 1 # log_info = get_logs(STATE.BBD_SEED_IS_CRAWL_ERO, seed.getDict()) log.info("Use Key word [%s] get company failed", keyword) continue else: seed.update(status=seed_status.access_type) log_info = get_logs(STATE.BBD_SEED_IS_CRAWL_ERO, seed.getDict()) log.info(log_info) seed.save() try: from CommonLib.Logging import Logging log = Logging(name=pro_type) log.info("Process begin for %s", pro_type) module_name = "Crawler" + pro_type.capitalize() pro_type = pro_type.lower() inst = ClassFactory.getClassInst(module_name, package_name="qyxx_all", pinyin=pro_type, callbackFromOuterControl=storeResult) db_inst = DBManager.getInstance("ssdb", "new_" + pro_type, host="spider5", port=57888) if seed is None: seed = Seed(pro_type) seed.get() else: seed = seed # log_info = get_logs(STATE.BBD_SEED_IS_CRAWL_ING, seed.getDict()) # log.info("start to a new seed %s",log_info) # if seed.url_status: # seed_status = inst.crawlUrl(seed.url, seed.name) # if seed_status.access_type == SeedAccessType.OK: # 状态成功,打印消息 # log_info = get_logs(STATE.BBD_SEED_IS_CRAWL_SUC, seed.getDict()) # log.info(log_info) # else:# 用url没有抓成功, 用keywordlist 抓 # log.info(" Url get company info failed [%s]", pro_type) # keyword_list = seed.values # crawlerKeyWordList(keyword_list) # else: keyword_list = seed.values crawlerKeyWordList(keyword_list) except Exception as e: print str(e)
def work(bbd_type): conf_file = "DBConfig.ini" src_db_dict = \ { 'type': confGetterFunc(conf_file, 'html_db', 'type').lower(), 'host': confGetterFunc(conf_file, 'html_db', 'host').lower(), 'port': int(confGetterFunc(conf_file, 'html_db', 'port')) } des_db_dict = \ { 'type': confGetterFunc(conf_file, 'data_db', 'type').lower(), 'host': confGetterFunc(conf_file, 'data_db', 'host').lower(), 'port': int(confGetterFunc(conf_file, 'data_db', 'port')) } from CommonLib.Logging import Logging log = Logging(name=bbd_type) log.info("Process begin") bbd_type = bbd_type.lower() queue_name = bbd_type nb_module_name = bbd_type.capitalize() + "Nb" + "Handler" nb_handler = ClassFactory.getClassInst(nb_module_name, package_name="Parser", pinyin=bbd_type.lower()) bbd_table = "qyxx_data_nb" bbd_src_table = "qyxx_html_nb" normal_table = bbd_type + "_data" + "_nb" err_table = normal_table + "_error" # html_normal_table = bbd_type+"_src"+"_nb" des_db_inst = DBManager.getInstance(des_db_dict["type"], bbd_table, host=des_db_dict["host"], port=des_db_dict["port"]) #存 解析后数据 err_db_inst = DBManager.getInstance(src_db_dict["type"], err_table, host=src_db_dict["host"], port=src_db_dict["port"]) fetch = Fetcher(queue_name + "_nbxx", "qyxx", get_db_dict=src_db_dict, save_db_dict=des_db_dict) # debug while True: try: source_dict = fetch.hget() if source_dict: res_dict = UniField.cloneNeedColumns(source_dict) if res_dict.has_key("year"): res_dict["_id"] = UniField.updateId( res_dict['_id'], res_dict['year']) # log.info("start to a new seed %s",seed_dict) res_dict = nb_handler.parse(source_dict, res_dict) if res_dict["status"] == 0: res_dict = UniField.unifyParseResult(res_dict, bbd_table=bbd_table) des_db_inst.changeTable(bbd_table) des_db_inst.save(res_dict) log.info(u"插入数据到 [%s] 成功, 队列大小为: %s ", bbd_table, str(des_db_inst.size())) des_db_inst.changeTable(bbd_src_table) des_db_inst.save(source_dict) log.info(u"插入数据到 [%s] 成功, 队列大小为: %s ", bbd_src_table, str(des_db_inst.size())) # log_info = get_logs(STATE.BBD_SEED_IS_CRAWL_PARSE_SUC, seed_dict) # log.info(log_info) else: source_dict["data"] = res_dict err_db_inst.save(source_dict) # log_info = get_logs(STATE.BBD_SEED_IS_CRAWL_PARSE_ERO, seed_dict) # log.info(log_info) else: log.info(u"解析%s队列为空, 等待10秒重试", bbd_type) time.sleep(10) except Exception as e: log.info(str(e)) source_dict["data"] = res_dict err_db_inst.save(source_dict) raise Exception(e)
"").replace(u'\xa0', '').replace(u':', u':').split(key) if len(infos) == 2: dict_[u'top_企业名称'] = infos[0].strip() if u':' in infos[1]: temp = (key + infos[1]).split(u':') dict_['top_' + temp[0].strip()] = temp[1].strip() except: self.log.info(u"获取top信息异常") dict_ = dict() return dict_ def testFromSSDB(db_inst, row_key): html_dict_str = db_inst.hget(row_key) if not html_dict_str: print(u"从SSDB获取数据失败!") return handler = NeimengguHandler("neimenggu") html_dict = json.loads(html_dict_str) handler.parse(html_dict) if __name__ == "__main__": db_inst = DBManager.getInstance("ssdb", "neimenggu", host="spider5", port=57888) row_key = "1a7630b3a30addefcae6c3d092630a11|_|内蒙古蒙牛乳业包头有限责任公司|_|91150200701240234X|_|2016-06-22|_|neimenggu" testFromSSDB(db_inst, row_key) pass
def __init__(self): self.__db = DBManager.getInstance(_type, 'buy_proxy_test_results', host=_host, port=_port)