Example #1
0
    def return_stock_in_bankuai(self, bankuai):
        # bankuai parameter is a list from the top bankuai to the bottom bankuai in the format below
        # ['板块','概念板块','AB股票']
        #
        # return ['板块','概念板块','AB股票',[[code,name],[code,name],...]]
        bankuai_tree = self.__bankuai_tree

        if not bankuai[2] in bankuai_tree[bankuai[0]]["children"][bankuai[1]]["children"]:
            raise RuntimeError,("The url of [" + ",".join(bankuai) + "] is not correct.","in Eastmoney.py")
        
        bankuai_detail_url = self.return_url_for_bankuai_stock(bankuai)
        while True: # Infinite loop unitl stock download completes successfully
            try:
                bankuai_detail_page = read_url(bankuai_detail_url)
                break
            except:
                warn_log('Connection lost, retry in 10 seconds ...')
                time.sleep(10)                
                
        r_return_code_detail_grp = r'\[(?P<code_detail_grp>.*)\]'
        code_detail_grp = re.search(r_return_code_detail_grp, bankuai_detail_page).group("code_detail_grp")

        r_return_code_detail = r'"(?P<code_detail>[^"]*)"'
        r_code_detail = re.compile(r_return_code_detail)
        
        stocks = []
        for m in r_code_detail.finditer(code_detail_grp):
            match_group_into_list = m.group("code_detail").split(",")
            stocks.append([match_group_into_list[1],match_group_into_list[2].decode("utf-8")])
        
        out_list = copy.copy(bankuai)
        out_list.append(stocks)
        return out_list
Example #2
0
        def return_bankuai_tree(bankuai_url = self.__base_url + "/" + self.__bankuai_ext):
        # The bankuai in html is like
        # AAA
        # BBB->GGG
        # BBB->CCC
        # DDD->EEE->FFF
        # 
        # to match AAA, we use r_return_category_no_sub_category
        # to match BBB and its children, we use r_return_first_lvl_with_sub_category and r_return_detail_lvl
        # to match DDD and its children (2nd lvl), we use r_return_first_lvl_with_sub_category, r_return_second_lvl_with_sub_category and r_return_detail_lvl
        #
        # *The code for Chinese character is gb2312*
            bankuai_page = read_url(bankuai_url)

            d_code_url = {}
            #{name: url=...
            #        children={name: url=...
            #                        children={name: url=...}
            #                 },
            # name: url=...,}
            r_return_category_no_sub_category = r'<dd class="node-item" data-key="\w+"><a href="(?P<url>[0-9a-zA-Z_,#\.]+)"><span class="text">(?P<name>[^<]+)</span></a></dd>'
            r_return_first_lvl_with_sub_category = r'<dd class="js-sub" data-id="\w+"><span class="node-item" data-key="\w*"><b class="icon-sub-title"></b><a href="(?P<url>[0-9a-zA-Z_,#\.]+)"[^>]*><span class="text">(?P<name>[^<]+)</span>(?P<content>.*?)(?=</ul>)</ul></dd>'
            r_return_second_lvl_with_sub_category = r'<li class="node-sub-sub"><a href="(?P<url>[^"]+)" class="[^>]+"><span class="text">(?P<name>[^<]+)</span></a><b class="icon-right"></b><div class="[^>]+">(?P<content>.*?)(?=</div>)</div><div class="hover-mask"></div></li>'
            r_return_detail_lvl = r'<a href="(?P<url>[^"]+)"[^>]*><span class="text">(?P<name>[^<]+)</span></a>'
            
            r_no_sub_cat = re.compile(r_return_category_no_sub_category)
            r_first_lvl = re.compile(r_return_first_lvl_with_sub_category)
            r_second_lvl = re.compile(r_return_second_lvl_with_sub_category)
            r_detail_lvl = re.compile(r_return_detail_lvl)
            
            for m in r_no_sub_cat.finditer(bankuai_page):
                if m.group("name").decode("gb2312") not in d_code_url: 
                    d_code_url[m.group("name").decode("gb2312")] = {"url": m.group("url")}
                    
            for m in r_first_lvl.finditer(bankuai_page):
                first_lvl_group_name = m.group("name").decode("gb2312")
                d_code_url[first_lvl_group_name] = {"url": m.group("url")}
                d_code_url[first_lvl_group_name].setdefault("children", {})
                if m.group("content").find("class=\"node-sub-sub\"") == -1:
                # one sub-branches
                    for m1 in r_detail_lvl.finditer(m.group("content")):
                        d_code_url[first_lvl_group_name]["children"][m1.group("name").decode("gb2312")] = {"url": m1.group("url")}
                else:
                # two sub-branches
                    for m1 in r_second_lvl.finditer(m.group("content")):
                        second_lvl_group_name = m1.group("name").decode("gb2312")
                        d_code_url[first_lvl_group_name]["children"][second_lvl_group_name] = {"url": m1.group("url")}
                        d_code_url[first_lvl_group_name]["children"][second_lvl_group_name].setdefault("children", {})
                        for m2 in r_detail_lvl.finditer(m1.group("content")):
                            d_code_url[first_lvl_group_name]["children"][second_lvl_group_name]["children"][m2.group("name").decode("gb2312")] = {"url": m2.group("url")}
            return d_code_url
Example #3
0
 def return_bankuai_in_bankuai(self, bankuai, sort_direction="desc"):
     # bankuai parameter is a list from the top bankuai to the bottom bankuai in the format below
     # ['板块','概念板块']
     # return 
     # ['板块','概念板块',[
     #                    [bankuai_name,increase,amount(in 0.1billion),change_ratio,rising_count,falling_count,leading_stock_code,leading_stock_name,increase],
     #                    [bankuai_name,increase,amount(in 0.1billion),change_ratio,rising_count,falling_count,leading_stock_code,leading_stock_name,increase],
     #                    ...
     #                    ]
     if not sort_direction.lower() in ["desc","asc"]:
         raise RuntimeError,("Incorrect parameter [%(direction)s]" % {"direction": sort_direction},"in Eastmoney.py")
         
     bankuai_url = self.return_url_for_bankuai_bankuai(bankuai)
     while True: # Infinite loop unitl stock download completes successfully
         try:
             bankuai_page = read_url(bankuai_url)
             break
         except:
             warn_log('Connection lost, retry in 10 seconds ...')
             time.sleep(10)
             
     r_return_bankuai_detail_grp = r'\[\[(?P<bankuai_detail_group_desc>[^\]]+)\],\[(?P<bankuai_detail_group_asc>[^\]]+)\]\]'
     match_objs = re.search(r_return_bankuai_detail_grp, bankuai_page)
     bankuai_detail_grp = match_objs.group("bankuai_detail_group_" + sort_direction)
     
     r_return_code_detail = r'"(?P<code_detail>[^"]*)"'
     r_code_detail = re.compile(r_return_code_detail)
     
     bankuais = []
     for m in r_code_detail.finditer(bankuai_detail_grp):
         match_group_into_list = m.group("code_detail").split(",")
         bankuais.append([match_group_into_list[0],match_group_into_list[1],match_group_into_list[2],match_group_into_list[3],match_group_into_list[4],match_group_into_list[5],match_group_into_list[6],match_group_into_list[7],match_group_into_list[8]])
     
     out_list = copy.copy(bankuai)
     out_list.append(bankuais)
     return out_list
Example #4
0
#!/usr/bin/python2.7
# coding:utf-8

import re, sys, pprint, copy, csv, os
reload(sys)
sys.setdefaultencoding("gbk")
from tooling.common_tool import print_log, warn_log, read_url, get_date, return_new_name_for_existing_file
from Sys_paths import Sys_paths

page_code = read_url('http://stock.jrj.com.cn/share,600225,jjcg_3.shtml')

print page_code