def download_page_info(self, json_data): if json_data.has_key("poi"): content = json_data["poi"] value_list = [] for item in content: company_name = item["name"] addr = item["address"] if item.has_key("tel"): phone_num = item["tel"] if phone_num == "": continue print(company_name, phone_num, addr) try: with open("temp_ts.dat", "a") as f: f.write(phone_num + "*" + company_name + "\n") except: pass res = self.get_phone_info(phone_num) time.sleep(0.5) if res: province = res[0] city = res[1] yys = res[2] else: province = "未知" city = "未知" yys = "未知" db = SqlLiteHandle() sql = "insert into ts_1 values (?,?,?,?,?,?,?)" db.insert(sql, [(phone_num, company_name, company_name, addr, province, city, yys)]) db.close()
def download_page_info_nm(self, json_data): if json_data.has_key("data"): data = json_data["data"] if data.has_key("poi_list"): data_list = data["poi_list"] for poi in data_list: company_name = poi["disp_name"] addr = poi["address"] phone_num = poi["tel"] if phone_num: print(company_name, phone_num, addr) try: with open("temp_ts.dat", "a") as f: f.write(phone_num + "*" + company_name + "\n") except: pass res = self.get_phone_info(phone_num) time.sleep(5) if res: province = res[0] city = res[1] yys = res[2] else: province = "未知" city = "未知" yys = "未知" db = SqlLiteHandle() sql = "insert into ts_1 values (?,?,?,?,?,?,?)" db.insert(sql, [(phone_num, company_name, company_name, addr, province, city, yys)]) db.close()
def create_table(self): # time_str = time.time() sql = "drop table tt_1" db = SqlLiteHandle() db.excute(sql) db.close() sql = """ CREATE TABLE tt_1 ( "phoneNum" TEXT(20), "contact" TEXT(200), "companyName" TEXT(400), "address" TEXT(600), "provice" TEXT(40), "city" TEXT(80), "yys" TEXT(40)); """ db = SqlLiteHandle() db.excute(sql) db.close()
def download_company_page(self, url): downloader = PageDownload() page = downloader.simple_download(url) if not page: return False phone_num = re.findall(self.hq_phoneNum_filter, page) contact = re.findall(self.hq_contact_filter, page) company_name = re.findall(self.hq_companyName_filter, page) address = re.findall(self.hq_address_filter, page) if company_name: company_name = company_name[0] else: company_name = "未知" if address: address = address[0] else: address = "未知" if contact: contact = contact[0] else: contact = "未知" if phone_num: phone_num = phone_num[0] else: return False try: with open("temp_tt.dat", "a") as f: f.write(phone_num + "*" + contact + "\n") except: pass res = self.get_phone_info(phone_num) if res: province = res[0] city = res[1] yys = res[2] else: province = "未知" city = "未知" yys = "未知" print url, phone_num, contact, company_name, address db = SqlLiteHandle() sql = "insert into tt_1 values (?,?,?,?,?,?,?)" db.insert( sql, [(phone_num, contact, company_name, address, province, city, yys)]) db.close() time.sleep(0.2) return True
def download_page_info(self, json_data): if json_data.has_key("searchResult"): content = json_data["searchResult"] value_list = [] for item in content: id = item["id"] company_name = item["title"] addr = item["address"] showType = item["showType"] try: url = "http://www.meituan.com/%s/%s/" % ( meituan_cls[showType], id) except: continue phone_num = self.get_phone_num(url) time.sleep(5) if phone_num: print(company_name, phone_num, addr) try: with open("temp_ts.dat", "a") as f: f.write(phone_num + "*" + company_name + "\n") except: pass res = self.get_phone_info(phone_num) time.sleep(0.5) if res: province = res[0] city = res[1] yys = res[2] else: province = "未知" city = "未知" yys = "未知" db = SqlLiteHandle() sql = "insert into ts_1 values (?,?,?,?,?,?,?)" db.insert(sql, [(phone_num, company_name, company_name, addr, province, city, yys)]) db.close()