def get_hudong(stockcode, fromday, today): def _get_page_num(doc): ele_pages = doc.findall('.//dl[@class="yms_box"]//a[@href]') for ele_page in ele_pages: text = ele_page.text.strip() if text == u'尾页': try: href = ele_page.get('href') num = href.split('(')[1].split(')')[0] return int(num) except: return None def _get_ask_count(doc): ele_asks = doc.findall('.//table[@class="req_box2"]') return len(ele_asks) data = { 'condition.dateFrom': fromday, 'condition.dateTo': today, 'condition.stockcode': stockcode, 'pageNo': 1 } html = utils.url_request('http://irm.cninfo.com.cn/ircs/interaction/queryQuestionByGszz.do?', data, cache_timeout=utils.HOUR) doc = etree.HTML(html) pagenum = _get_page_num(doc) or 0 first_page_count = _get_ask_count(doc) if pagenum > 1: data['pageNo'] = pagenum html = utils.url_request('http://irm.cninfo.com.cn/ircs/interaction/queryQuestionByGszz.do?', data, cache_timeout=utils.HOUR) doc = etree.HTML(html) ask_count = first_page_count*(pagenum-1) + _get_ask_count(doc) else: ask_count = first_page_count return ask_count
def get_financial_report(year, quarter): url = 'http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx' params = { 'type': 'SR', 'sty': 'YJBB', 'fd': '%s-%s'%(year, QUARTER_DATE_MAP[str(quarter)]), 'sr': -1, 'p': 1, 'ps': 100, 'js': '{pages:(pc),data:[(x)]}', 'stat': 0, } data = utils.url_request(url+'?'+utils.urlencode(params)) print '%s-%s'%(year, QUARTER_DATE_MAP[str(quarter)]) print data return