Ejemplo n.º 1
0
    def fetchBankListByUrl(self, url):
        banks = []

        f = self.openUrl(url)
        if f == None:
            return

        soup = BeautifulSoup(f)
        lis = soup.find_all("li", class_="link")
        for l in lis:
            b = Bank()
            a = l.find("a")
            href = a["href"].encode("utf-8")
            if href.startswith("http:"):
                b.url = href
            else:
                b.url = "http://creditcard.cib.com.cn" + href

            title = a.string.strip().encode("utf-8")
            b.city = city_parser.parseBracketStyle(title)
            if b.city in [
                    "兴悦会",
                    "机票随兴订",
            ]:
                b.city = None
            b.title = re.sub(r"\[.*?\](.*)", r"\1", title)
            banks.append(b)
        return banks
Ejemplo n.º 2
0
    def fetchBankListByUrl(self, url):
        banks = [];

        f = self.openUrl(url);
        if f == None:
            return;

        soup = BeautifulSoup(f);
        lis = soup.find_all("li", class_="link");
        for l in lis:
            b = Bank();
            a = l.find("a");
            href = a["href"].encode("utf-8")
            if href.startswith("http:"): 
                b.url = href;
            else:
		b.url = "http://creditcard.cib.com.cn" + href;

            title = a.string.strip().encode("utf-8");
	    b.city = city_parser.parseBracketStyle(title);
	    if b.city in ["兴悦会", "机票随兴订",]:
		b.city = None;
	    b.title = re.sub(r"\[.*?\](.*)", r"\1", title);
            banks.append(b);
        return banks;
Ejemplo n.º 3
0
    def fetchBankList(self):
        banks = [];
        baseUrl = "http://www.nbcb.com.cn/xyk/thtghd/index%s.shtml";
        for page in range(1, self.getPageRange()):
            url = None;
            if page == 1:
                url = baseUrl % "";
            else:
                url = baseUrl % "_" + str(page);

            soup = self.getSoup(url);
            if not soup:
                break;

            for a in soup.find("div", class_="newslist").find_all("a", class_=""):
                b = Bank();
                b.url = "http://www.nbcb.com.cn" + a["href"].encode("utf-8");
                title = soup_util.getStrings(a);
                m = re.match(r"\[(.*)\]", title);
                if m:
                    b.city = m.group(1);
                b.title = re.sub(r"\[(.*)\]|【(.*)】", "", title);
                banks.append(b);

	return banks;
Ejemplo n.º 4
0
def getBankList(whereDict, city="all"):
    conn = getConnection()
    c = conn.cursor()
    where = buildWhereClause(whereDict)
    if city and city != "all":
        where += " and ct_name = '%s'" % city

    c.execute(
        "SELECT * FROM " + BankTable.TABLE_NAME + " LEFT OUTER JOIN " +
        " (SELECT _id AS ct_id, name AS ct_name FROM city) " +
        " ON ct_id == " + BankTable.COL_CITY_ID + " " + where +
        "ORDER BY _id DESC", list(whereDict.viewvalues()))

    conn.commit()

    banks = []
    for row in c.fetchall():
        bank = Bank()
        bank.name = row[BankTable.COL_NAME]
        bank.title = row[BankTable.COL_TITLE]
        bank.fetchTime = row[BankTable.COL_FETCH_TIME]
        bank.accepted = row[BankTable.COL_ACCEPTED]
        bank.url = row[BankTable.COL_URL]
        bank.id = row[BankTable.COL_ID]
        city = row["ct_name"]
        if city:
            bank.city = city
        banks.append(bank)
    return banks
Ejemplo n.º 5
0
    def fetchBankList(self):
        banks = []
        baseUrl = "http://www.psbc.com/portal/zh_CN/CreditCard/SpecialOffers/index%s.html"
        for page in range(0, self.getPageRange()):
            if page == 0:
                url = baseUrl % ("", )
            else:
                url = baseUrl % ("_" + str(page), )
            soup = self.getSoup(url)
            if not soup:
                break

            for a in soup.find("ul",
                               class_="artic_list clearfix").find_all("a"):
                b = Bank()
                url = a["href"].encode("utf-8")
                if re.match(r"http", url):
                    b.url = url
                else:
                    b.url = "http://www.psbc.com" + url

                title = a.string.encode("utf-8")
                m = re.match("(.*):(.*)", title)
                if not m:
                    b.title = title
                else:
                    b.title = m.group(2)
                    b.city = m.group(1)

                banks.append(b)

        return banks
Ejemplo n.º 6
0
def getBankList(whereDict, city="all"):
    conn = getConnection();
    c = conn.cursor();
    where = buildWhereClause(whereDict);
    if city and city != "all":
        where += " and ct_name = '%s'" % city;

    c.execute("SELECT * FROM " + BankTable.TABLE_NAME 
            + " LEFT OUTER JOIN " 
            + " (SELECT _id AS ct_id, name AS ct_name FROM city) "  
            + " ON ct_id == " + BankTable.COL_CITY_ID + " "
            + where + "ORDER BY _id DESC", list(whereDict.viewvalues()))

    conn.commit();

    banks = [];
    for row in c.fetchall():
        bank = Bank();
        bank.name = row[BankTable.COL_NAME];
        bank.title = row[BankTable.COL_TITLE];
        bank.fetchTime = row[BankTable.COL_FETCH_TIME];
        bank.accepted = row[BankTable.COL_ACCEPTED];
        bank.url = row[BankTable.COL_URL];
        bank.id = row[BankTable.COL_ID];
        city = row["ct_name"];
        if city:
            bank.city = city;
        banks.append(bank);
    return banks;
Ejemplo n.º 7
0
    def fetchBankList(self):
        banks = [];
        baseUrl = "http://www.psbc.com/portal/zh_CN/CreditCard/SpecialOffers/index%s.html";
        for page in range(0, self.getPageRange()): 
            if page == 0:
                url = baseUrl % ("",);
            else:
                url = baseUrl % ("_" + str(page),);
            soup = self.getSoup(url);
            if not soup:
                break;

            for a in soup.find("ul", class_="artic_list clearfix").find_all("a"):
                b = Bank();
                url =  a["href"].encode("utf-8");
                if re.match(r"http", url):
                    b.url = url; 
                else:
                    b.url = "http://www.psbc.com" + url;

                title = a.string.encode("utf-8");
                m = re.match("(.*):(.*)", title);
                if not m:
                    b.title = title;
                else:
                    b.title = m.group(2);
                    b.city = m.group(1);

                banks.append(b);
            
	return banks;
Ejemplo n.º 8
0
    def fetchBankList(self):
        f = self.openUrl("http://cards.ecitic.com/youhui/shuakahuodong.shtml");
        if f == None:
            return;

        soup = BeautifulSoup(f);
        lis = soup.find_all("li", class_="emb4 item-n");
        banks = [];
        for li in lis:
            b = Bank();
            h2 = li.find_all("h2")[0];
            title = h2.string.encode("utf-8");
            b.title = re.sub(r"\[.*\]", "", title);
            m = re.match(r".*\[(.*)\].*", title);
            if m:
                b.city = m.group(1);
            b.name = self.getName();
            b.url = "http://cards.ecitic.com/youhui/" +li.find("a", class_="a-h")["href"].encode("utf-8");
            ds = li.find("span", class_="date")
            if ds and ds.string:
                ds = ds.string.encode("utf-8");
                m = re.match(r".*-(.*)", ds)
                if m: 
                    b.endDate = date_parser.parseSlashStyle(m.group(1).strip());
            banks.append(b);
        return banks;
Ejemplo n.º 9
0
    def fetchBankList(self):
        banks = [];
        baseUrl = "http://creditcard.pingan.com/cms-tmplt/creditcard/searchPreferentialActivity.do?type=&city=shenzhen&currentPage=%d";
        for page in range(1, self.getPageRange()):
            url = baseUrl % page;
            soup = self.getSoup(url);
            if not soup:
                break;

            lis = soup.find_all("tr", class_="item");
            if len(lis) == 0:
                break;

            for l in lis:
                b = Bank();
                a = l.find("a");
                title = a["title"].encode("utf-8");
                m = re.match(r"\[(.*)\]", title);
                if m:
                    b.city = m.group(1);
                b.title = re.sub(r"【.*】|\[.*\]", "", title);
                b.url = "http://creditcard.pingan.com" + a["href"].encode("utf-8");
                ds = l.contents[-2].string.encode("utf-8");
                b.endDate = date_parser.parseDashLineStyle(ds);
                banks.append(b);
        
        return banks;
Ejemplo n.º 10
0
    def fetchBankList(self):
        banks = []
        baseUrl = "http://creditcard.pingan.com/cms-tmplt/creditcard/searchPreferentialActivity.do?type=&city=shenzhen&currentPage=%d"
        for page in range(1, self.getPageRange()):
            url = baseUrl % page
            soup = self.getSoup(url)
            if not soup:
                break

            lis = soup.find_all("tr", class_="item")
            if len(lis) == 0:
                break

            for l in lis:
                b = Bank()
                a = l.find("a")
                title = a["title"].encode("utf-8")
                m = re.match(r"\[(.*)\]", title)
                if m:
                    b.city = m.group(1)
                b.title = re.sub(r"【.*】|\[.*\]", "", title)
                b.url = "http://creditcard.pingan.com" + a["href"].encode(
                    "utf-8")
                ds = l.contents[-2].string.encode("utf-8")
                b.endDate = date_parser.parseDashLineStyle(ds)
                banks.append(b)

        return banks
Ejemplo n.º 11
0
	def fetchBankList(self):
		banks = [];
		baseUrls = ["http://xyk.cebbank.com/home/activities/category/a_region_dd/list%d.htm","http://xyk.cebbank.com/home/activities/category/a_life_cycle/list%d.htm",];
		print baseUrls;
		for bu in baseUrls:
			for page in range(1, self.getPageRange()): 
				##url = "http://xyk.cebbank.com/home/activities/category/a_life_cycle/list%d.htm" % page;
				url = bu % page;
				soup = self.getSoup(url);
				if not self.isValidSoup(soup):
					break;

				lis = soup.find("ul", class_="th_list_ul").find_all("div", class_="floatleft");
				for l in lis:
					b = Bank();
					a = l.find("a");
					b.url = "http://xyk.cebbank.com" + a["href"].encode("utf-8");
					title = a.string.encode("utf-8").strip();
					m = re.match(r"(.*?)((.*))", title);
					if m:
						b.title = m.group(1);
						b.city = m.group(2);
					else:
						b.title = title;
					banks.append(b);
		return banks;
Ejemplo n.º 12
0
    def fetchBankList(self):
        f = self.openUrl("http://cards.ecitic.com/youhui/shuakahuodong.shtml")
        if f == None:
            return

        soup = BeautifulSoup(f)
        lis = soup.find_all("li", class_="emb4 item-n")
        banks = []
        for li in lis:
            b = Bank()
            h2 = li.find_all("h2")[0]
            title = h2.string.encode("utf-8")
            b.title = re.sub(r"\[.*\]", "", title)
            m = re.match(r".*\[(.*)\].*", title)
            if m:
                b.city = m.group(1)
            b.name = self.getName()
            b.url = "http://cards.ecitic.com/youhui/" + li.find(
                "a", class_="a-h")["href"].encode("utf-8")
            ds = li.find("span", class_="date")
            if ds and ds.string:
                ds = ds.string.encode("utf-8")
                m = re.match(r".*-(.*)", ds)
                if m:
                    b.endDate = date_parser.parseSlashStyle(
                        m.group(1).strip())
            banks.append(b)
        return banks
Ejemplo n.º 13
0
    def fetchBankList(self):
        banks = []
        baseUrl = "http://www.nbcb.com.cn/xyk/thtghd/index%s.shtml"
        for page in range(1, self.getPageRange()):
            url = None
            if page == 1:
                url = baseUrl % ""
            else:
                url = baseUrl % "_" + str(page)

            soup = self.getSoup(url)
            if not soup:
                break

            for a in soup.find("div", class_="newslist").find_all("a",
                                                                  class_=""):
                b = Bank()
                b.url = "http://www.nbcb.com.cn" + a["href"].encode("utf-8")
                title = soup_util.getStrings(a)
                m = re.match(r"\[(.*)\]", title)
                if m:
                    b.city = m.group(1)
                b.title = re.sub(r"\[(.*)\]|【(.*)】", "", title)
                banks.append(b)

        return banks
Ejemplo n.º 14
0
    def fetchBankList(self):
        banks = []
        baseUrls = [
            "http://xyk.cebbank.com/home/activities/category/a_region_dd/list%d.htm",
            "http://xyk.cebbank.com/home/activities/category/a_life_cycle/list%d.htm",
        ]
        print baseUrls
        for bu in baseUrls:
            for page in range(1, self.getPageRange()):
                ##url = "http://xyk.cebbank.com/home/activities/category/a_life_cycle/list%d.htm" % page;
                url = bu % page
                soup = self.getSoup(url)
                if not self.isValidSoup(soup):
                    break

                lis = soup.find("ul", class_="th_list_ul").find_all(
                    "div", class_="floatleft")
                for l in lis:
                    b = Bank()
                    a = l.find("a")
                    b.url = "http://xyk.cebbank.com" + a["href"].encode(
                        "utf-8")
                    title = a.string.encode("utf-8").strip()
                    m = re.match(r"(.*?)((.*))", title)
                    if m:
                        b.title = m.group(1)
                        b.city = m.group(2)
                    else:
                        b.title = title
                    banks.append(b)
        return banks
Ejemplo n.º 15
0
    def getBanksByUrl(self, url):
        banks = [];
        soup = self.getSoup(url, encoding="gbk");
        if not soup:
            return banks;

        lis = soup.find_all("a", href=re.compile(r"index\.html"));
        for l in lis:
            b = Bank();
            b.url = "http://www.spdbccc.com.cn" + l["href"].encode("utf-8"); 
            title = l.string.encode("utf-8");
	    b.title = re.sub(r"\[.*\](.*)", r"\1", title);
	    m = re.match(r"\[(.*)地区\]", title);
	    if m:
		b.city = m.group(1);
            banks.append(b);

        return banks;
Ejemplo n.º 16
0
    def getBanksByUrl(self, url):
        banks = []
        soup = self.getSoup(url, encoding="gbk")
        if not soup:
            return banks

        lis = soup.find_all("a", href=re.compile(r"index\.html"))
        for l in lis:
            b = Bank()
            b.url = "http://www.spdbccc.com.cn" + l["href"].encode("utf-8")
            title = l.string.encode("utf-8")
            b.title = re.sub(r"\[.*\](.*)", r"\1", title)
            m = re.match(r"\[(.*)地区\]", title)
            if m:
                b.city = m.group(1)
            banks.append(b)

        return banks
Ejemplo n.º 17
0
    def fetchBankList(self):

        banks = [];
        baseUrls = ["http://card.cgbchina.com.cn/Channel/11820301?currentChannelPage=%d", "http://card.cgbchina.com.cn/Channel/11820220?currentChannelPage=%d", "http://card.cgbchina.com.cn/Channel/11820139?currentChannelPage=%d"];
        for baseUrl in baseUrls:
            for page in range(1, self.getPageRange()): 
                url = baseUrl % page;
                soup = self.getSoup(url);
                if not soup: 
                    break;

                youhuiContent = soup.find("div", class_="youhui_content");
                if len(youhuiContent.contents) <= 1:
                    break;

                for a in youhuiContent.find_all("a"):
                    bank = Bank();
                    title = a.string.encode("utf-8");
                    m = re.match(r"【(.*)】(.*)", title);
                    if not m:
                        bank.title = title;
                    else:
                        bank.city = m.group(1);
                        bank.title = m.group(2);

                    url =  a["href"].encode("utf-8");
                    if url.find("http") != -1:
                        bank.url = url;
                    else:
                        bank.url = "http://card.cgbchina.com.cn" + url;

                    dateStr = a.parent.find_next_sibling("p").string
                    if dateStr:
                        dateStr = dateStr.encode("utf-8").split("-")[-1].strip();
                        try:
                            bank.endDate =  datetime.strptime(dateStr, "%Y.%m.%d");
                        except ValueError:
                            pass;

                    banks.append(bank);

        return banks;
Ejemplo n.º 18
0
    def fetchBankList(self):
        banks = [];
        baseUrl = "http://creditcard.cmbc.com.cn/promotioninfo/PromotionInfoList.aspx?page=%d";
        for page in range(1, self.getPageRange()):
            url = baseUrl % page;
            soup = self.getSoup(url, encoding="gbk"); 

            if not soup or(self.prevSoup and soup.get_text() == self.prevSoup.get_text()):
                break;

            self.prevSoup = soup;
            for l in soup.find_all("li", class_="lb_white"):
                a = l.find("a");
                b = Bank();
                b.title = a["title"].encode("utf-8").strip();
                b.url = "http://creditcard.cmbc.com.cn/promotioninfo/" + a["href"].encode("utf-8").strip();
                b.city = a.next_sibling.string.encode("utf-8").strip();
                banks.append(b);

        return banks;
Ejemplo n.º 19
0
    def fetchBankList(self):
        banks = []
        baseUrl = "http://creditcard.cmbc.com.cn/promotioninfo/PromotionInfoList.aspx?page=%d"
        for page in range(1, self.getPageRange()):
            url = baseUrl % page
            soup = self.getSoup(url, encoding="gbk")

            if not soup or (self.prevSoup
                            and soup.get_text() == self.prevSoup.get_text()):
                break

            self.prevSoup = soup
            for l in soup.find_all("li", class_="lb_white"):
                a = l.find("a")
                b = Bank()
                b.title = a["title"].encode("utf-8").strip()
                b.url = "http://creditcard.cmbc.com.cn/promotioninfo/" + a[
                    "href"].encode("utf-8").strip()
                b.city = a.next_sibling.string.encode("utf-8").strip()
                banks.append(b)

        return banks
Ejemplo n.º 20
0
    def getBankListByUrl(self, url):
        banks = [];

        soup = self.getSoup(url);
        if not soup:
            return;
        lis = soup.find("table", width="550").find_all("a");
        for a in lis:
            b = Bank();
	    b.url = url + a["href"].encode("utf-8");
            title = a.string.encode("utf-8");
            b.title = re.sub("[\[\(](.*)[\]\)]", "", title);
            m = re.match("[\[\(](.*?)[\]\)]", title);
            if m:
                s = m.group(1);
                if s:
                    if s == "已结束":
                        continue
                    else:
                        b.city = s;
            banks.append(b);
        return banks;
Ejemplo n.º 21
0
    def parseOuter(self, url):
        banks = [];
        soup = self.getSoup(url);
        if soup != None:
            trs = soup.find_all("tr", style="height:25px;");
            for tr in trs:
                a = tr.find("a");
                u = "http://www.icbc.com.cn" + a["href"].encode("utf-8");
                text = a.string.encode("utf-8");
                if text.find("“精彩活动在这里") == -1:
                    b = Bank();
                    b.url = u;
                    
                    #remove city info.
		    m = re.match(r"(.*)--", text);
		    if m:
			b.city = m.group(1);
		    text = self.removeCity(text);
                    b.title = text;
                    banks.append(b);
                else:
                    #banks += self.parseInner(u);
                    pass
        return banks;
Ejemplo n.º 22
0
    def parseOuter(self, url):
        banks = []
        soup = self.getSoup(url)
        if soup != None:
            trs = soup.find_all("tr", style="height:25px;")
            for tr in trs:
                a = tr.find("a")
                u = "http://www.icbc.com.cn" + a["href"].encode("utf-8")
                text = a.string.encode("utf-8")
                if text.find("“精彩活动在这里") == -1:
                    b = Bank()
                    b.url = u

                    #remove city info.
                    m = re.match(r"(.*)--", text)
                    if m:
                        b.city = m.group(1)
                    text = self.removeCity(text)
                    b.title = text
                    banks.append(b)
                else:
                    #banks += self.parseInner(u);
                    pass
        return banks