def pharseHYMeta(self, text, id): kv_dic = WTHuoYuan.get_k_v_dic() result = {} result["webSiteId"] = id soup = BeautifulSoup(text) soup.prettify() table = soup.findAll(attrs={"class": "mt10"}) tds = table[0].findAll("td") tds.extend(table[1].findAll("td")) key = None value = None allInfo = {} for td in tds: text = "".join(td.fetchText(True)).strip() if text.endswith(":"): key = text.replace(":", "") else: value = text allInfo[key] = value key = None value = None for k, v in allInfo.iteritems(): if k: result[kv_dic.get(k)] = v return result
def getAndSaveHYFromId(self, id): url = "http://www.chinawutong.com/203/%s.html" % id result = {} try: WTHuoYuan.objects.get(webSiteId=id) logger.info("%s already exsists" % id) return except: pass text = self.httpClient.geturlcon(url) try: result = self.pharseHYMeta(text, id) except Exception, e: logger.info(traceback.format_exc()) if (result.get("startPlace")) and (result.get("destPlace")): huoyuan = WTHuoYuan() try: huoyuan.save_from_dic(result) except Exception, e: logger.error(traceback.format_exc()) else: logger.info("saved %s" % (id)) else: logger.info(url + " is a null page") def getAndSaveZhuanXianFromId(self, id): url = "http://www.chinawutong.com/201/%s.html" % id result = {} try: WTZhuanXian.objects.get(webSiteId=id)