def fetchBankListByUrl(self, url): banks = [] f = self.openUrl(url) if f == None: return soup = BeautifulSoup(f) lis = soup.find_all("li", class_="link") for l in lis: b = Bank() a = l.find("a") href = a["href"].encode("utf-8") if href.startswith("http:"): b.url = href else: b.url = "http://creditcard.cib.com.cn" + href title = a.string.strip().encode("utf-8") b.city = city_parser.parseBracketStyle(title) if b.city in [ "兴悦会", "机票随兴订", ]: b.city = None b.title = re.sub(r"\[.*?\](.*)", r"\1", title) banks.append(b) return banks
def fetchBankListByUrl(self, url): banks = []; f = self.openUrl(url); if f == None: return; soup = BeautifulSoup(f); lis = soup.find_all("li", class_="link"); for l in lis: b = Bank(); a = l.find("a"); href = a["href"].encode("utf-8") if href.startswith("http:"): b.url = href; else: b.url = "http://creditcard.cib.com.cn" + href; title = a.string.strip().encode("utf-8"); b.city = city_parser.parseBracketStyle(title); if b.city in ["兴悦会", "机票随兴订",]: b.city = None; b.title = re.sub(r"\[.*?\](.*)", r"\1", title); banks.append(b); return banks;
def fetchBankList(self): banks = []; baseUrl = "http://www.nbcb.com.cn/xyk/thtghd/index%s.shtml"; for page in range(1, self.getPageRange()): url = None; if page == 1: url = baseUrl % ""; else: url = baseUrl % "_" + str(page); soup = self.getSoup(url); if not soup: break; for a in soup.find("div", class_="newslist").find_all("a", class_=""): b = Bank(); b.url = "http://www.nbcb.com.cn" + a["href"].encode("utf-8"); title = soup_util.getStrings(a); m = re.match(r"\[(.*)\]", title); if m: b.city = m.group(1); b.title = re.sub(r"\[(.*)\]|【(.*)】", "", title); banks.append(b); return banks;
def getBankList(whereDict, city="all"): conn = getConnection() c = conn.cursor() where = buildWhereClause(whereDict) if city and city != "all": where += " and ct_name = '%s'" % city c.execute( "SELECT * FROM " + BankTable.TABLE_NAME + " LEFT OUTER JOIN " + " (SELECT _id AS ct_id, name AS ct_name FROM city) " + " ON ct_id == " + BankTable.COL_CITY_ID + " " + where + "ORDER BY _id DESC", list(whereDict.viewvalues())) conn.commit() banks = [] for row in c.fetchall(): bank = Bank() bank.name = row[BankTable.COL_NAME] bank.title = row[BankTable.COL_TITLE] bank.fetchTime = row[BankTable.COL_FETCH_TIME] bank.accepted = row[BankTable.COL_ACCEPTED] bank.url = row[BankTable.COL_URL] bank.id = row[BankTable.COL_ID] city = row["ct_name"] if city: bank.city = city banks.append(bank) return banks
def fetchBankList(self): banks = [] baseUrl = "http://www.psbc.com/portal/zh_CN/CreditCard/SpecialOffers/index%s.html" for page in range(0, self.getPageRange()): if page == 0: url = baseUrl % ("", ) else: url = baseUrl % ("_" + str(page), ) soup = self.getSoup(url) if not soup: break for a in soup.find("ul", class_="artic_list clearfix").find_all("a"): b = Bank() url = a["href"].encode("utf-8") if re.match(r"http", url): b.url = url else: b.url = "http://www.psbc.com" + url title = a.string.encode("utf-8") m = re.match("(.*):(.*)", title) if not m: b.title = title else: b.title = m.group(2) b.city = m.group(1) banks.append(b) return banks
def getBankList(whereDict, city="all"): conn = getConnection(); c = conn.cursor(); where = buildWhereClause(whereDict); if city and city != "all": where += " and ct_name = '%s'" % city; c.execute("SELECT * FROM " + BankTable.TABLE_NAME + " LEFT OUTER JOIN " + " (SELECT _id AS ct_id, name AS ct_name FROM city) " + " ON ct_id == " + BankTable.COL_CITY_ID + " " + where + "ORDER BY _id DESC", list(whereDict.viewvalues())) conn.commit(); banks = []; for row in c.fetchall(): bank = Bank(); bank.name = row[BankTable.COL_NAME]; bank.title = row[BankTable.COL_TITLE]; bank.fetchTime = row[BankTable.COL_FETCH_TIME]; bank.accepted = row[BankTable.COL_ACCEPTED]; bank.url = row[BankTable.COL_URL]; bank.id = row[BankTable.COL_ID]; city = row["ct_name"]; if city: bank.city = city; banks.append(bank); return banks;
def fetchBankList(self): banks = []; baseUrl = "http://www.psbc.com/portal/zh_CN/CreditCard/SpecialOffers/index%s.html"; for page in range(0, self.getPageRange()): if page == 0: url = baseUrl % ("",); else: url = baseUrl % ("_" + str(page),); soup = self.getSoup(url); if not soup: break; for a in soup.find("ul", class_="artic_list clearfix").find_all("a"): b = Bank(); url = a["href"].encode("utf-8"); if re.match(r"http", url): b.url = url; else: b.url = "http://www.psbc.com" + url; title = a.string.encode("utf-8"); m = re.match("(.*):(.*)", title); if not m: b.title = title; else: b.title = m.group(2); b.city = m.group(1); banks.append(b); return banks;
def fetchBankList(self): f = self.openUrl("http://cards.ecitic.com/youhui/shuakahuodong.shtml"); if f == None: return; soup = BeautifulSoup(f); lis = soup.find_all("li", class_="emb4 item-n"); banks = []; for li in lis: b = Bank(); h2 = li.find_all("h2")[0]; title = h2.string.encode("utf-8"); b.title = re.sub(r"\[.*\]", "", title); m = re.match(r".*\[(.*)\].*", title); if m: b.city = m.group(1); b.name = self.getName(); b.url = "http://cards.ecitic.com/youhui/" +li.find("a", class_="a-h")["href"].encode("utf-8"); ds = li.find("span", class_="date") if ds and ds.string: ds = ds.string.encode("utf-8"); m = re.match(r".*-(.*)", ds) if m: b.endDate = date_parser.parseSlashStyle(m.group(1).strip()); banks.append(b); return banks;
def fetchBankList(self): banks = []; baseUrl = "http://creditcard.pingan.com/cms-tmplt/creditcard/searchPreferentialActivity.do?type=&city=shenzhen¤tPage=%d"; for page in range(1, self.getPageRange()): url = baseUrl % page; soup = self.getSoup(url); if not soup: break; lis = soup.find_all("tr", class_="item"); if len(lis) == 0: break; for l in lis: b = Bank(); a = l.find("a"); title = a["title"].encode("utf-8"); m = re.match(r"\[(.*)\]", title); if m: b.city = m.group(1); b.title = re.sub(r"【.*】|\[.*\]", "", title); b.url = "http://creditcard.pingan.com" + a["href"].encode("utf-8"); ds = l.contents[-2].string.encode("utf-8"); b.endDate = date_parser.parseDashLineStyle(ds); banks.append(b); return banks;
def fetchBankList(self): banks = [] baseUrl = "http://creditcard.pingan.com/cms-tmplt/creditcard/searchPreferentialActivity.do?type=&city=shenzhen¤tPage=%d" for page in range(1, self.getPageRange()): url = baseUrl % page soup = self.getSoup(url) if not soup: break lis = soup.find_all("tr", class_="item") if len(lis) == 0: break for l in lis: b = Bank() a = l.find("a") title = a["title"].encode("utf-8") m = re.match(r"\[(.*)\]", title) if m: b.city = m.group(1) b.title = re.sub(r"【.*】|\[.*\]", "", title) b.url = "http://creditcard.pingan.com" + a["href"].encode( "utf-8") ds = l.contents[-2].string.encode("utf-8") b.endDate = date_parser.parseDashLineStyle(ds) banks.append(b) return banks
def fetchBankList(self): banks = []; baseUrls = ["http://xyk.cebbank.com/home/activities/category/a_region_dd/list%d.htm","http://xyk.cebbank.com/home/activities/category/a_life_cycle/list%d.htm",]; print baseUrls; for bu in baseUrls: for page in range(1, self.getPageRange()): ##url = "http://xyk.cebbank.com/home/activities/category/a_life_cycle/list%d.htm" % page; url = bu % page; soup = self.getSoup(url); if not self.isValidSoup(soup): break; lis = soup.find("ul", class_="th_list_ul").find_all("div", class_="floatleft"); for l in lis: b = Bank(); a = l.find("a"); b.url = "http://xyk.cebbank.com" + a["href"].encode("utf-8"); title = a.string.encode("utf-8").strip(); m = re.match(r"(.*?)((.*))", title); if m: b.title = m.group(1); b.city = m.group(2); else: b.title = title; banks.append(b); return banks;
def fetchBankList(self): f = self.openUrl("http://cards.ecitic.com/youhui/shuakahuodong.shtml") if f == None: return soup = BeautifulSoup(f) lis = soup.find_all("li", class_="emb4 item-n") banks = [] for li in lis: b = Bank() h2 = li.find_all("h2")[0] title = h2.string.encode("utf-8") b.title = re.sub(r"\[.*\]", "", title) m = re.match(r".*\[(.*)\].*", title) if m: b.city = m.group(1) b.name = self.getName() b.url = "http://cards.ecitic.com/youhui/" + li.find( "a", class_="a-h")["href"].encode("utf-8") ds = li.find("span", class_="date") if ds and ds.string: ds = ds.string.encode("utf-8") m = re.match(r".*-(.*)", ds) if m: b.endDate = date_parser.parseSlashStyle( m.group(1).strip()) banks.append(b) return banks
def fetchBankList(self): banks = [] baseUrl = "http://www.nbcb.com.cn/xyk/thtghd/index%s.shtml" for page in range(1, self.getPageRange()): url = None if page == 1: url = baseUrl % "" else: url = baseUrl % "_" + str(page) soup = self.getSoup(url) if not soup: break for a in soup.find("div", class_="newslist").find_all("a", class_=""): b = Bank() b.url = "http://www.nbcb.com.cn" + a["href"].encode("utf-8") title = soup_util.getStrings(a) m = re.match(r"\[(.*)\]", title) if m: b.city = m.group(1) b.title = re.sub(r"\[(.*)\]|【(.*)】", "", title) banks.append(b) return banks
def fetchBankList(self): banks = [] baseUrls = [ "http://xyk.cebbank.com/home/activities/category/a_region_dd/list%d.htm", "http://xyk.cebbank.com/home/activities/category/a_life_cycle/list%d.htm", ] print baseUrls for bu in baseUrls: for page in range(1, self.getPageRange()): ##url = "http://xyk.cebbank.com/home/activities/category/a_life_cycle/list%d.htm" % page; url = bu % page soup = self.getSoup(url) if not self.isValidSoup(soup): break lis = soup.find("ul", class_="th_list_ul").find_all( "div", class_="floatleft") for l in lis: b = Bank() a = l.find("a") b.url = "http://xyk.cebbank.com" + a["href"].encode( "utf-8") title = a.string.encode("utf-8").strip() m = re.match(r"(.*?)((.*))", title) if m: b.title = m.group(1) b.city = m.group(2) else: b.title = title banks.append(b) return banks
def getBanksByUrl(self, url): banks = []; soup = self.getSoup(url, encoding="gbk"); if not soup: return banks; lis = soup.find_all("a", href=re.compile(r"index\.html")); for l in lis: b = Bank(); b.url = "http://www.spdbccc.com.cn" + l["href"].encode("utf-8"); title = l.string.encode("utf-8"); b.title = re.sub(r"\[.*\](.*)", r"\1", title); m = re.match(r"\[(.*)地区\]", title); if m: b.city = m.group(1); banks.append(b); return banks;
def getBanksByUrl(self, url): banks = [] soup = self.getSoup(url, encoding="gbk") if not soup: return banks lis = soup.find_all("a", href=re.compile(r"index\.html")) for l in lis: b = Bank() b.url = "http://www.spdbccc.com.cn" + l["href"].encode("utf-8") title = l.string.encode("utf-8") b.title = re.sub(r"\[.*\](.*)", r"\1", title) m = re.match(r"\[(.*)地区\]", title) if m: b.city = m.group(1) banks.append(b) return banks
def fetchBankList(self): banks = []; baseUrls = ["http://card.cgbchina.com.cn/Channel/11820301?currentChannelPage=%d", "http://card.cgbchina.com.cn/Channel/11820220?currentChannelPage=%d", "http://card.cgbchina.com.cn/Channel/11820139?currentChannelPage=%d"]; for baseUrl in baseUrls: for page in range(1, self.getPageRange()): url = baseUrl % page; soup = self.getSoup(url); if not soup: break; youhuiContent = soup.find("div", class_="youhui_content"); if len(youhuiContent.contents) <= 1: break; for a in youhuiContent.find_all("a"): bank = Bank(); title = a.string.encode("utf-8"); m = re.match(r"【(.*)】(.*)", title); if not m: bank.title = title; else: bank.city = m.group(1); bank.title = m.group(2); url = a["href"].encode("utf-8"); if url.find("http") != -1: bank.url = url; else: bank.url = "http://card.cgbchina.com.cn" + url; dateStr = a.parent.find_next_sibling("p").string if dateStr: dateStr = dateStr.encode("utf-8").split("-")[-1].strip(); try: bank.endDate = datetime.strptime(dateStr, "%Y.%m.%d"); except ValueError: pass; banks.append(bank); return banks;
def fetchBankList(self): banks = []; baseUrl = "http://creditcard.cmbc.com.cn/promotioninfo/PromotionInfoList.aspx?page=%d"; for page in range(1, self.getPageRange()): url = baseUrl % page; soup = self.getSoup(url, encoding="gbk"); if not soup or(self.prevSoup and soup.get_text() == self.prevSoup.get_text()): break; self.prevSoup = soup; for l in soup.find_all("li", class_="lb_white"): a = l.find("a"); b = Bank(); b.title = a["title"].encode("utf-8").strip(); b.url = "http://creditcard.cmbc.com.cn/promotioninfo/" + a["href"].encode("utf-8").strip(); b.city = a.next_sibling.string.encode("utf-8").strip(); banks.append(b); return banks;
def fetchBankList(self): banks = [] baseUrl = "http://creditcard.cmbc.com.cn/promotioninfo/PromotionInfoList.aspx?page=%d" for page in range(1, self.getPageRange()): url = baseUrl % page soup = self.getSoup(url, encoding="gbk") if not soup or (self.prevSoup and soup.get_text() == self.prevSoup.get_text()): break self.prevSoup = soup for l in soup.find_all("li", class_="lb_white"): a = l.find("a") b = Bank() b.title = a["title"].encode("utf-8").strip() b.url = "http://creditcard.cmbc.com.cn/promotioninfo/" + a[ "href"].encode("utf-8").strip() b.city = a.next_sibling.string.encode("utf-8").strip() banks.append(b) return banks
def getBankListByUrl(self, url): banks = []; soup = self.getSoup(url); if not soup: return; lis = soup.find("table", width="550").find_all("a"); for a in lis: b = Bank(); b.url = url + a["href"].encode("utf-8"); title = a.string.encode("utf-8"); b.title = re.sub("[\[\(](.*)[\]\)]", "", title); m = re.match("[\[\(](.*?)[\]\)]", title); if m: s = m.group(1); if s: if s == "已结束": continue else: b.city = s; banks.append(b); return banks;
def parseOuter(self, url): banks = []; soup = self.getSoup(url); if soup != None: trs = soup.find_all("tr", style="height:25px;"); for tr in trs: a = tr.find("a"); u = "http://www.icbc.com.cn" + a["href"].encode("utf-8"); text = a.string.encode("utf-8"); if text.find("“精彩活动在这里") == -1: b = Bank(); b.url = u; #remove city info. m = re.match(r"(.*)--", text); if m: b.city = m.group(1); text = self.removeCity(text); b.title = text; banks.append(b); else: #banks += self.parseInner(u); pass return banks;
def parseOuter(self, url): banks = [] soup = self.getSoup(url) if soup != None: trs = soup.find_all("tr", style="height:25px;") for tr in trs: a = tr.find("a") u = "http://www.icbc.com.cn" + a["href"].encode("utf-8") text = a.string.encode("utf-8") if text.find("“精彩活动在这里") == -1: b = Bank() b.url = u #remove city info. m = re.match(r"(.*)--", text) if m: b.city = m.group(1) text = self.removeCity(text) b.title = text banks.append(b) else: #banks += self.parseInner(u); pass return banks