def getCrawlNoRssRequestLength(): try: http = HttpRequest() url = requst_norss_length_url response = http.setUrl(url).setBody({}).encrypt([]).post() res = json.loads(response)["data"] if res == "null": res = None except Exception, e: logging.info("-----%s-----" % e) return None
def getCrawlNoRssRequestLength(): try: http = HttpRequest() url = requst_norss_length_url response = http.setUrl(url).setBody({}).encrypt([]).post() res = json.loads(response)['data'] if res == 'null': res = None except Exception, e: logging.info("-----%s-----" % e) return None
def syncLastMd5(params): try: http = HttpRequest() url = sync_last_md5_url response = http.setUrl(url).setBody(params).encrypt([]).post() res = json.loads(response)['data'] if res == 'null': res = None except Exception, e: print e logging.info("-----%s-----" % e) return None
def getCrawlRssRequest(params={}): try: http = HttpRequest() url = request_rss_url response = http.setUrl(url).setBody(params).encrypt([]).post() res = json.loads(response)['data'] if res == 'null': res = None except Exception, e: print e logging.info("-----%s-----" % e) return None
def getCrawlRssRequest(params={}): try: http = HttpRequest() url = request_rss_url response = http.setUrl(url).setBody(params).encrypt([]).post() res = json.loads(response)["data"] if res == "null": res = None except Exception, e: print e logging.info("-----%s-----" % e) return None
def syncLastMd5(params): try: http = HttpRequest() url = sync_last_md5_url response = http.setUrl(url).setBody(params).encrypt([]).post() res = json.loads(response)["data"] if res == "null": res = None except Exception, e: print e logging.info("-----%s-----" % e) return None
def requstDistinct(hashCode): try: http = HttpRequest() url = requst_distinct_url hashCode = ",".join(hashCode) body = {'field': hashCode} encryptFields = [] response = http.setUrl(url).setBody(body).encrypt(encryptFields).post() res = json.loads(response)['data'] if not res: return [] return res except Exception, e: res = [] logging.info('-----------%s-------' % e) return res
def requstDistinct(hashCode): try: http = HttpRequest() url = requst_distinct_url hashCode = ",".join(hashCode) body = {"field": hashCode} encryptFields = [] response = http.setUrl(url).setBody(body).encrypt(encryptFields).post() res = json.loads(response)["data"] if not res: return [] return res except Exception, e: res = [] logging.info("-----------%s-------" % e) return res
def retrieve_data(key, code): try: logging.info("get daily data of %s" % (key+code)) s = re.compile(r",(\d+)").sub(r"\1", HttpRequest(DAILY_PRICE_URL % (key+code)).get()) s = re.compile(r"(\d+)\_(\d+)\_(\d+)").sub(r"\1\2\3", s) self.data[code] = json.loads(s)["record"] except: pass
def syncCrawlInfos(dataList): try: http = HttpRequest() http.setTimeout(900) url = sync_crawl_infos_url sqlList = json.dumps(dataList) body = {"sql": sqlList, "checksum": toMd5(sqlList)} encryptFields = [] headerDict = {"Content-Encoding": "gzip", "Accept-Encoding": "gzip"} response = http.setUrl(url).setBody(body).setHeader(headerDict).encrypt(encryptFields).post() res = json.loads(response)["data"] if not res: return [] return res except Exception, e: res = [] logging.info("-----------%s-------" % e, True) return res
def syncCrawlInfos(dataList): try: http = HttpRequest() http.setTimeout(900) url = sync_crawl_infos_url sqlList = json.dumps(dataList) body = {'sql': sqlList, 'checksum': toMd5(sqlList)} encryptFields = [] headerDict = {'Content-Encoding': 'gzip', 'Accept-Encoding': "gzip"} response = http.setUrl(url).setBody(body).setHeader( headerDict).encrypt(encryptFields).post() res = json.loads(response)['data'] if not res: return [] return res except Exception, e: res = [] logging.info('-----------%s-------' % e, True) return res
def __init__(self): self.code.clear() bs = BeautifulSoup(HttpRequest("http://quote.eastmoney.com/stocklist.html").get(), "lxml") for a in bs.find('div', id='quotesearch').find_all('a'): try: rg = re.match(".*(sh|sz)([0-9]{6})\.html", a['href']).groups() self.code[rg[0]].add(rg[1]) except: pass for key in self.code: logging.info("%s: %d" % (key, len(self.code[key])))