r = await self.async_crawl(url) if not r: return False dom = pq(r.content.decode()) now_pt = int(dom('span.price').text()) oepn_pt = int(dom('.idx-data-pri > li > b.i').eq(0).text()) high_pt = int(dom('.idx-data-pri > li > b.i').eq(1).text()) low_pt = int(dom('.idx-data-pri > li > b.i').eq(2).text()) if (now_pt >= self.high_limit or now_pt <= self.low_limit) and \ self.high_limit and self.low_limit: logger.warning('open: %s high: %s low: %s close: %s !!!!!!!!', oepn_pt, high_pt, low_pt, now_pt) else: logger.info('open: %s high: %s low: %s close: %s', oepn_pt, high_pt, low_pt, now_pt) return True if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("-l", "--low", help="low", type=int, default=0) parser.add_argument("-g", "--high", help="high", type=int, default=0) args = parser.parse_args() log.initlog('DEMO', level=logging.INFO, debug=True) c = Crawler() high_limit = 10450 low_limit = 10430 c.sleep_time = 30 c.low_limit = int(low_limit) c.high_limit = int(high_limit) c.run()
content=content ) # articles.insert(ret) async def login(self): url = 'http://www.huntcoco.com/member/account_login1.php' r = await self.async_crawl(url, method='POST', headers=""" Accept: */* Accept-Encoding: gzip, deflate Accept-Language: zh-TW,zh;q=0.9,en;q=0.8,zh-CN;q=0.7,en-US;q=0.6 Cache-Control: no-cache Connection: keep-alive Content-Length: 46 Content-Type: application/x-www-form-urlencoded; charset=UTF-8 Host: www.huntcoco.com Origin: http://www.huntcoco.com Pragma: no-cache Referer: http://www.huntcoco.com/index.php?goto=41 User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36 X-Requested-With: XMLHttpRequest """, data=headers_raw_to_dict(""" a: [email protected] b: ji394su3 """)) if __name__ == '__main__': log.initlog('DEMO', level=logging.DEBUG, debug=True) c = Crawler() c.run()
content=content, url=item["alternate"][0]["href"], category=category, ) self.datas.append(dct) print(item["title"], dct["url"]) def get_api(self, stream_id, continuation=None): api = "https://feedly.com/v3/streams/contents" ts = str(time.time()).replace(".", "")[:13] params = headers_raw_to_dict(""" streamId: {} count: 40 unreadOnly: true ranked: newest similar: true ck: {} ct: feedly.desktop cv: 31.0.269 continuation: {} """.format(stream_id, ts, continuation)) if not params["continuation"]: del params["continuation"] return url_add_params(api, **params) if __name__ == "__main__": log.initlog("DEMO", level=logging.DEBUG, debug=True) c = Crawler() c.run()