def requestAPIForURL(amount): amount = float(amount) i = amount / 10 j = amount // 10 needPages = int(i) if i == j else int(i) + 1 result = [] for i in range(1, 1 + needPages): APIURL = "http://www.laohucaijing.com/laohu_index1/ajax_news_list/?page=%s" % i html = crawlUtils.requestJsonWithProxy(APIURL)["html"] links = laohuCrawlMethod.GET_LINK_REGEX.findall( html.replace("\/", "/")) for j in set(links): if "author_detail" not in j: result.append("http://www.laohucaijing.com%s" % j) return result
def requestAPIForURL(amount): amount = float(amount) i = amount / 20 j = amount // 20 needPages = int(i) if i == j else int(i) + 1 result = [] for i in range(1, 1 + needPages): try: APIURL = "http://channel.inewsweek.chinanews.com/u/zk.shtml?pager=%s" % i jsonData = crawlUtils.requestJsonWithProxy(APIURL, needCut=True) result += [x["url"] for x in jsonData["docs"]] except: pass return result
def requestAPIForURL(amount): amount = float(amount) i = amount / 10 j = amount // 10 needPages = int(i) if i == j else int(i) + 1 result = [] for i in range(1, 1 + needPages): try: APIURL = "http://api.chinaipo.com/zh-hans/api/articles/?page=%s" % i jsonData = crawlUtils.requestJsonWithProxy(APIURL) for j in jsonData["results"]: originalId = j["originalId"] result.append("http://api.chinaipo.com/zh-hans/api/article/?originalId=%s" % originalId) except: pass return result
def requestAPIForURL(amount): amount = float(amount) i = amount / 10 j = amount // 10 needPages = int(i) if i == j else int(i) + 1 result = [] for i in range(1, 1 + needPages): try: APIURL = "http://app.eeo.com.cn/?app=wxmember&controller=index&action=getMoreArticle&catid=3572" \ "&allcid=358818,358815,358809,358808,358799,358795,358777,358775,358767,358763,358761,358740," \ "358732,358730,358718,358712&page=%s" % i jsonData = crawlUtils.requestJsonWithProxy(APIURL, needCut=True) links = [x["url"] for x in jsonData["article"]] result += links except: pass return result
def requestAPIForURL(amount): amount = float(amount) i = amount / 7 j = amount // 7 needPages = int(i) if i == j else int(i) + 1 result = [] homePage = crawlUtils.requestWithProxy("https://www.weiyangx.com")[0] nonce = weiyangCrawlMethod.REGEX_FINDING_NONCE.findall(homePage)[0] for i in range(1, 1 + needPages): APIURL = "https://www.weiyangx.com/wp-admin/admin-ajax.php" jsonData = crawlUtils.requestJsonWithProxy( APIURL, needCut=True, method="post", payload={ "action": "home_load_more_news", "postOffset": i * 8, "tagId": 0, "_ajax_nonce": nonce }) result += [x["url"] for x in jsonData["data"]] return result
def requestAPIForURL(amount): APIURL = "http://app.ikanchai.com/roll.php?do=more&status=1&sort=0&pagesize=%s&page=0" % amount jsonData = crawlUtils.requestJsonWithProxy(APIURL, needCut=True) result = [x["url"] for x in jsonData["data"]] return result