def perform(self): curl = Curl( str(self.url) ) curl.perform(False) if curl.getHttpReturnCode() == 200: self.current_focus, self.monthly_focus = self.__dump( curl.getHttpContent() ) return True else: return False
def get_stock_info(id): id = str(id) res = {} url = "http://www.sse.com.cn/sseportal/webapp/datapresent/SSEQueryListCmpAct?"\ "reportName=QueryListCmpRpt&REPORTTYPE=GSZC&PRODUCTID=%s&COMPANY_CODE=%s" % (id, id) logger.info(url) logger.info("fetch [%s] info %s" % (id, url) ) print "fetch [%s] info %s" % (id, url) a = Curl(url) a.perform(False) logger.info(a.getHttpHeader()) if a.getHttpReturnCode() == 200: re_obj = re.compile(r"<td class=\"content_b\".+?>(?P<key>.+?)</td>.+?<td.+?>(?P<value>.+?)</td>", re.S) for item in re_obj.findall(a.getHttpContent()): if u"上市日" in item[0]: res['date'] = filter_html(item[1]) if u"公司全称" in item[0]: name = re.split(r"\r\n", filter_html(item[1]) ) res['name'] = name[0] res['en_name'] = name[1] if u"注册地址" in item[0]: res['address'] = filter_html(item[1]) if u"网址" in item[0]: res['web'] = filter_html(item[1]) if u"所属省/直辖市" in item[0]: res['state'] = filter_html(item[1]) if u"CSRC行业" in item[0]: res['category'] = re.split("\s+", filter_html(item[1]))[0] return res else: return None
def get_stock_list(offset): res = [] url = "http://www.sse.com.cn/sseportal/webapp/datapresent/SSEQueryStockInfoAct"\ "?reportName=BizCompStockInfoRpt&PRODUCTID=&PRODUCTJP=&PRODUCTNAME=&keyword=&tab_flg=&CURSOR=%d" % offset logger.info(url) logger.info("fetch URL %s" % url) print "fetch URL %s" % url a = Curl(url) a.perform(False) logger.info(a.getHttpHeader()) if a.getHttpReturnCode() == 200: re_obj = re.compile(r"<table.+?bgcolor=\"#337fb2\">(.+?)</table>", re.S) match = re_obj.search(a.getHttpContent()) if match: content = match.groups()[0] tr_obj = re.compile(r"<tr>.+?</tr>", re.S) for item in tr_obj.findall(content): try: stock_obj = re.compile(r"<td.+?><a href=.+?>(.+?)</a></td>.+?<td.+?>(.+?)</td>", re.S) stock_match = stock_obj.search(item) res.append( [stock_match.groups()[0], stock_match.groups()[1]] ) except Exception: continue return res else: return None else: return None