Esempio n. 1
0
def qqfinance_InSt(code):
    """获取某只股票的财务指标
    Parameter:
           code:上海、深圳交易所的股票代码
    -------------------------------------
    Return:
          DataFrame:
                    单位均为元(万元或元)。
    """
    url = 'http://stock.finance.qq.com/corp1/inst.php?zqdm={0}'.format(code)
    html = requests.get(url, headers=hds())
    h = lxml.html.parse(StringIO(html.text))
    urls = h.xpath('//div[@class="right"]/table[2]/tr/td/a/@href')
    AT = pd.DataFrame()
    for url in urls:
        html = requests.get(url, headers=hds())
        soup = BeautifulSoup(html.text, 'lxml')
        table = soup.find_all('table', attrs={'class', 'list'})
        df = pd.read_html(str(table))[0]
        df = df.dropna(how='all', axis=1)
        df = df.T
        df.columns = df.ix[0, :]
        df = df.drop(0, axis=0)
        AT = AT.append(df)
    AT = AT.set_index('报表日期')
    AT = AT.dropna(how='all', axis=1)
    AT = AT.applymap(lambda x: _str2fl(x))
    AT = AT.sort_index()
    return AT
Esempio n. 2
0
def get_holdernum_change_EM(year,qu):
    """reportdate='2017-06-30',所有股票的股东户数
    """
    if  year is None:
        reportdate=''
    else:
        reportdate=quarter[qu]%year
        
    url='http://data.eastmoney.com/DataCenter_V3/gdhs/GetList.ashx?reportdate={0}&market=&changerate==&range==&pagesize=50000&page=1&sortRule=-1&sortType=NoticeDate'.format(reportdate)

    r=requests.get(url,headers=hds())
    text=r.text
    data=json.loads(text)
    tp=data['pages']

    df=pd.DataFrame(data['data'])

    if tp>1:
        for i in range(2,tp+1):
            url='http://data.eastmoney.com/DataCenter_V3/gdhs/GetList.ashx?reportdate={0}&market=&changerate==&range==&pagesize=50000&page={1}&sortRule=-1&sortType=NoticeDate'.format(reportdate,i)
            r=requests.get(url,headers=hds())
            text=r.text
            data=json.loads(text)
            df=df.append(pd.DataFrame(data['data']))
    return df
Esempio n. 3
0
def getlist(url, page=2):
    r = requests.get(url, headers=hds())
    txt = r.content.decode('utf8')
    #print(txt)
    html = lxml.html.parse(StringIO(txt))
    lis = html.xpath('//div[@id="container"]/div[@class="sec_list"]/ul/li')
    datasets = {}
    for li in lis:
        name = re.sub(r'\s*', '', li.xpath('a/text()')[0].strip())
        name = name.replace('“', '').replace('”', '')
        date = li.xpath('i/text()')[0]
        name = date + '_' + name
        #print(name)
        href = 'http://www.court.gov.cn' + li.xpath('a/@href')[0]
        #if name.endswith('指导性案例'):
        datasets[name] = href

    for i in range(2, page + 1):
        urll = url + '?page=%s' % i
        r = requests.get(urll, headers=hds())
        txt = r.content.decode('utf8')
        #print(txt)
        html = lxml.html.parse(StringIO(txt))
        lis = html.xpath('//div[@id="container"]/div[@class="sec_list"]/ul/li')
        for li in lis:
            name = re.sub(r'\s*', '', li.xpath('a/text()')[0].strip())
            name = name.replace('“', '').replace('”', '')
            date = li.xpath('i/text()')[0]
            name = date + '_' + name
            #print(name)
            href = 'http://www.court.gov.cn' + li.xpath('a/@href')[0]
            #if name.endswith('指导性案例'):
            datasets[name] = href
    return datasets
Esempio n. 4
0
def get_holder_analys_EM(year,qu):
    """股东持股明细,reportdate='2017-06-30'
    """
    reportdate=quarter[qu]%year
    
    url="http://dcfm.eastmoney.com//em_mutisvcexpandinterface/api/js/get?type=NSHDDETAILLA&token=70f12f2f4f091e459a279469fe49eca5&cmd=&st=NDATE,SCODE,RANK&sr=1&p=1&ps=5000&js={1}&filter=(RDATE=%27^{0}^%27)".format(reportdate,'{%22pages%22:(tp),%22data%22:(x)}')

    #print(url)
    r=requests.get(url,headers=hds())
    text=r.text
    text=text.replace('pages:','"pages":').replace("data:",'"data":')
    #print(text[:30])
    data=json.loads(text)
    df=pd.DataFrame(data['data'])
    tp=data['pages']

    if tp>1:
        for i in range(2,tp+1):
            url="http://dcfm.eastmoney.com//em_mutisvcexpandinterface/api/js/get?type=NSHDDETAILLA&token=70f12f2f4f091e459a279469fe49eca5&cmd=&st=NDATE,SCODE,RANK&sr=1&p={2}&ps=5000&js={1}&filter=(RDATE=%27^{0}^%27)".format(reportdate,'{%22pages%22:(tp),%22data%22:(x)}',i)

            r=requests.get(url,headers=hds())
            text=r.text
            text=text.replace('pages:','"pages":').replace("data:",'"data":')
            data=json.loads(text)
            df=df.append(pd.DataFrame(data['data']))

    df['NDATE']=df['NDATE'].map(lambda x:x[:10])
    df['RDATE']=df['RDATE'].map(lambda x:x[:10])
    df=df.replace('-',np.nan)
    df=df.drop(['SHAREHDCODE','COMPANYCODE', 'SHARESTYPE', 'LTAG'],axis=1)
    df=df.applymap(lambda x:wt._tofl(x))
    df['SCODE']=df['SCODE'].map(lambda x:str(x).split('.')[0].zfill(6))
    return df
Esempio n. 5
0
def get_holders_num_qq(rpdate='2017-06-30'):

    url = 'http://web.ifzq.gtimg.cn/fund/zcjj/zcjj/allzc?colum=3&order=desc&page=1&pagesize=50&bgrq={0}&_var=v_jjcg'.format(
        rpdate)

    r = requests.get(url, headers=hds())
    text = r.text.split("jjcg=")[1]
    data = json.loads(text)
    df = pd.DataFrame(data['data']['data'])
    tpage = data['data']['totalPages']

    if tpage > 1:
        tpage = tpage + 1
        for i in range(2, tpage):
            url = 'http://web.ifzq.gtimg.cn/fund/zcjj/zcjj/allzc?colum=3&order=desc&page={1}&pagesize=50&bgrq={0}&_var=v_jjcg'.format(
                rpdate, i)
            r = requests.get(url, headers=hds())
            text = r.text.split("jjcg=")[1]
            data = json.loads(text)
            df = df.append(pd.DataFrame(data['data']['data']))
    df.columns = [
        'Total.Num(10K)', 'change(10K)', 'inst.Num', 'Curr.Rate%', 'code',
        'name'
    ]
    df['date'] = rpdate
    df['code'] = df['code'].map(lambda x: str(x).zfill(6))
    return df
Esempio n. 6
0
def get_search_inst_num_EM(code=None):
    """机构调研列表或个股机构调研列表
    """
    if code is None:
        url='http://data.eastmoney.com/DataCenter_V3/jgdy/gsjsdy.ashx?pagesize=5000&page=1'
    else:
        url='http://data.eastmoney.com/DataCenter_V3/jgdy/gsjsdy.ashx?pagesize=5000&page=1&code={0}'.format(code)
    
    r=requests.get(url,headers=hds())
    #print(url)
    text=r.text
    data=json.loads(text)
    df=pd.DataFrame(data['data'])
    tp=data['pages']

    if tp>1:
        for i in range(2,tp+1):
            if code is None:
                url='http://data.eastmoney.com/DataCenter_V3/jgdy/gsjsdy.ashx?pagesize=5000&page={0}'.format(i)
            else:
                url='http://data.eastmoney.com/DataCenter_V3/jgdy/gsjsdy.ashx?pagesize=5000&page={1}&code={0}'.format(code,i)
            r=requests.get(url,headers=hds())
            text=r.text
            data=json.loads(text)
            df=df.append(pd.DataFrame(data['data']))
    if df.empty is False:
        del df['CompanyCode']
    #df=df.applymap(lambda x:wt._tofl(x))
    #df['SCode']=df['SCode'].map(lambda x:str(x).zfill(6))
    return df        
Esempio n. 7
0
def get_tick_today_EM(code, mtype=0):
    if code[0] in ['6', '9']:
        code = code + '1'
    if code[0] in ['0', '2', '3']:
        code = code + '2'

    url = 'http://hqdigi2.eastmoney.com/EM_Quote2010NumericApplication/CompatiblePage.aspx?Type=OB&stk={0}&Reference=xml&limit={1}&page=1'.format(
        code, mtype)

    r = requests.get(url, headers=hds())
    text = r.text.split('=')[1]
    data = text.split('data:["')[1].replace('"]};', '').replace('","', '\n')
    df = pd.read_csv(StringIO(data), header=None)

    page = text.split('{pages:')[1].split(',data:')[0]
    page = int(page)

    if page > 1:
        for i in range(2, page + 1):
            url = 'http://hqdigi2.eastmoney.com/EM_Quote2010NumericApplication/CompatiblePage.aspx?Type=OB&stk={0}&Reference=xml&limit={1}&page={2}'.format(
                code, mtype, i)

            r = requests.get(url, headers=hds())
            text = r.text.split('=')[1]
            data = text.split('data:["')[1].replace('"]};',
                                                    '').replace('","', '\n')
            df = df.append(pd.read_csv(StringIO(data), header=None))
    df.columns = ['time', 'price', 'volume', 'type']
    df = df.applymap(lambda x: wt._tofl(x))
    return df
Esempio n. 8
0
def get_sharesholded_change_EM(code=None,mtype='all'):
    """股东对股票的增减持情况
    -------------------------
    mtype: all--所有的变动;jjc--减持;jzc--增持。
    """
    if code is None:
        code=''
        
    url='http://data.eastmoney.com/DataCenter_V3/gdzjc.ashx?pagesize=5000&page=1&param=&sortRule=-1&sortType=BDJZ&tabid={1}&code={0}&name='.format(code,mtype)

    r=requests.get(url,headers=hds())
    text=r.text.split(',data:["')[1].split('"] ,"url')[0]
    text=text.replace('","','\n')
    df=pd.read_csv(StringIO(text),header=None)

    tp=r.text.split(",data:[")[0].split("pages:")[1]
    tp=int(tp)

    if tp>1:
        for i in range(2,tp+1):
            url='http://data.eastmoney.com/DataCenter_V3/gdzjc.ashx?pagesize=5000&page={1}&param=&sortRule=-1&sortType=BDJZ&tabid=all&code={0}&name='.format(code,i)
            r=requests.get(url,headers=hds())
            text=r.text.split(',data:["')[1].split('"] ,"url')[0]
            text=text.replace('","','\n')
            df=df.append(pd.read_csv(StringIO(text),header=None))

    #df=df.drop(16,axis=1)
    df.columns=['code','name','price','chg','holder','type','chg_num','chg.cur.rate','source','hold_num','tt.rate','hold_cur_num','cur.rate','start','end','date','chg.tt.rate']
    df=df.applymap(lambda x:wt._tofl(x))
    df['code']=df['code'].map(lambda x:str(x).zfill(6))
    df=df.reset_index(drop=True)
    return df
Esempio n. 9
0
def qqhyxw(code):
    """
    获取股票所在行业新闻的标题
    """
    url = 'http://stockhtm.finance.qq.com/sstock/ggcx/{0}.shtml'.format(code)

    html = requests.get(url, headers=hds())
    soup = BeautifulSoup(html.content, 'lxml')
    dataset = []

    url = soup.find_all('a', text='行业新闻')[0].get('href')
    html = requests.get(url)
    content = html.content.decode('gbk')
    text = lxml.html.parse(StringIO(content))
    _read_table(text, dataset)

    for _ in range(11):
        soup = BeautifulSoup(content, 'lxml')
        try:
            nexturl = soup.find('a', text='下一页').get('href')
            html = requests.get(nexturl, headers=hds())
            content = html.content.decode('gbk')
            text = lxml.html.parse(StringIO(content))
            _read_table(text, dataset)
        except Exception as e:
            print(e)

    df = pd.DataFrame(dataset)
    df.columns = ['name', 'href', 'datetime']
    return df
Esempio n. 10
0
def get_pepb_Sina():
    dff = pd.DataFrame()
    url = 'http://money.finance.sina.com.cn/d/api/openapi_proxy.php/?__s=[[%22hq%22,%22hs_a%22,%22%22,0,1,500]]&callback=FDC_DC.theTableData'
    r = requests.get(url, headers=hds())
    text = r.text.split('theTableData(')[1]
    text = text.replace(')\n', '')

    d = json.loads(text)
    df = pd.DataFrame(d[0]['items'])
    df.columns = d[0]['fields']
    dff = dff.append(df)

    pageNo = 2
    while True:
        url = 'http://money.finance.sina.com.cn/d/api/openapi_proxy.php/?__s=[[%22hq%22,%22hs_a%22,%22%22,0,{0},500]]&callback=FDC_DC.theTableData'.format(
            pageNo)
        #print(url)
        r = requests.get(url, headers=hds())
        text = r.text.split('theTableData(')[1]
        text = text.replace(')\n', '')

        d = json.loads(text)
        if len(d[0]['items']) < 1:
            print('Exit ....')
            break
        df = pd.DataFrame(d[0]['items'])
        df.columns = d[0]['fields']
        dff = dff.append(df)
        pageNo = pageNo + 1

    dff['date'] = d[0]['day']
    dff = dff.drop(["symbol", "favor", "guba"], axis=1)
    dff = dff.set_index('code')
    return dff
Esempio n. 11
0
def get_financeindex_all_THS(year, qt):
    yqt = seasons[qt] % year
    #print(yqt)

    url = 'http://data.10jqka.com.cn/financial/yjgg/date/{0}/board/ALL/field/DECLAREDATE/order/desc/page/1/ajax/1/'.format(
        yqt)
    r = requests.get(url, timeout=10, headers=hds())
    text = r.text
    html = lxml.html.parse(StringIO(text))
    res = html.xpath(
        '//table[@class="m-table J-ajax-table J-canvas-table"]/tbody//tr')
    if PY3:
        sarr = [etree.tostring(node).decode('utf-8') for node in res]
    else:
        sarr = [etree.tostring(node) for node in res]
    sarr = ''.join(sarr)
    sarr = '<table>%s</table>' % sarr
    df = pd.read_html(sarr)[0]

    pages = html.xpath('//div[@class="m-page J-ajax-page"]//span/text()')
    pages = int(pages[0].split('/')[1])
    #print('Total pages %s'%pages)
    if pages > 1:
        for i in range(2, pages + 1):
            url = 'http://data.10jqka.com.cn/financial/yjgg/date/{0}/board/ALL/field/DECLAREDATE/order/desc/page/{1}/ajax/1/'.format(
                yqt, i)
            r = requests.get(url, timeout=10, headers=hds())
            #print(url)
            text = r.text
            html = lxml.html.parse(StringIO(text))
            res = html.xpath(
                '//table[@class="m-table J-ajax-table J-canvas-table"]/tbody//tr'
            )
            if PY3:
                sarr = [etree.tostring(node).decode('utf-8') for node in res]
            else:
                sarr = [etree.tostring(node) for node in res]
            sarr = ''.join(sarr)
            sarr = '<table>%s</table>' % sarr
            df = df.append(pd.read_html(sarr)[0])
            #print(df.tail())

    df = df.drop([0, 15], axis=1)
    df.columns = [
        'code', 'name', 'pdate', 'rev', 'rev_yoy', 'rev_hb', 'profit',
        'profit_yoy', 'profit_hb', 'eps', 'nav', 'roe', 'cf_ps', 'margin'
    ]
    df = df.applymap(lambda x: _str2fl(x))
    df['code'] = df['code'].map(lambda x: str(x).zfill(6))
    df = df.set_index('code')
    return df
Esempio n. 12
0
def get_forcast_qq(code=None, year=2017, qu=3, mtype=0):
    """获取企业的业绩预告信息
    code:股票代码6个字符
    report:报告期,like 20170630
    mtype:  类型,0-全部的业绩预告,1-预增的业绩预告,2-预减的业绩预告,
                 3-预盈的业绩预告,4-预亏的业绩预告,5-大幅上升的业绩预告,
                 6-大幅下降的业绩预告,7-扭亏的业绩预告,8-减亏的业绩预告,
                 9-无大变的业绩预告,10-不确定的业绩预告
    """
    report = quarter[qu] % year
    if code is None:
        url = 'http://message.finance.qq.com/stock/jbnb/get_yjyg.php?gpdm=&type={1}&bgq={0}&p=1&lmt=50&sort_name=ggrq&order=desc'.format(
            report, mtype)
    elif isinstance(code, str):
        url = 'http://message.finance.qq.com/stock/jbnb/get_yjyg.php?gpdm={0}&type={2}&bgq={1}&p=1&lmt=50&sort_name=ggrq&order=desc'.format(
            code, report, mtype)

    r = requests.get(url, headers=hds())
    #print(url)
    text = r.text.split("yjyg=")[1]
    data = json.loads(text)
    df = pd.DataFrame(data['data']['data'])
    tpage = data['data']['totalPages']

    if tpage > 1:
        tpage = tpage + 1
        for i in range(2, tpage):
            if code is None:
                url = 'http://message.finance.qq.com/stock/jbnb/get_yjyg.php?gpdm=&type={1}&bgq={0}&p={2}&lmt=50&sort_name=ggrq&order=desc'.format(
                    report, mtype, i)
            elif isinstance(code, str):
                url = 'http://message.finance.qq.com/stock/jbnb/get_yjyg.php?gpdm={0}&type={3}&bgq={1}&p={2}&lmt=50&sort_name=ggrq&order=desc'.format(
                    code, report, i, mtype)

            r = requests.get(url, headers=hds())
            text = r.text.split("yjyg=")[1]
            data = json.loads(text)
            df = df.append(pd.DataFrame(data['data']['data']))

    try:
        df.columns = [
            'report.D', 'publish.D', 'code', 'name', 'eps_last', 'type',
            'describe'
        ]
        df['code'] = df['code'].map(lambda x: str(x).zfill(6))
    except:
        pass

    df = df.applymap(lambda x: wt._tofl(x))
    return df
Esempio n. 13
0
def get_sharesGroupby_f10_EM(code, mtype='hy'):
    """
    所属行业、概念、地域中的分析、收集同类股票信息
    --------------------------
    code:为股票代码,为6位数
    """
    if code[0] in ['6', '9']:
        code = 'SH' + code
    if code[0] in ['0', '2', '3']:
        code = 'SZ' + code
    #hy
    if mtype == 'hy':
        url = 'http://emweb.securities.eastmoney.com/PC_HSF10/StockRelationship/StockRelationshipAjax?code={0}&orderBy=1&isAsc=false'.format(
            code)

    #dy
    if mtype == 'dy':
        url = 'http://emweb.securities.eastmoney.com/PC_HSF10/StockRelationship3/StockRelationshipAjax?code={0}&orderBy=1&isAsc=false'.format(
            code)

    #gn
    if mtype == 'gn':
        url = 'http://emweb.securities.eastmoney.com/PC_HSF10/StockRelationship2/GetConceptList?code={0}&orderBy=1&isAsc=true&type=1'.format(
            code)
        r = requests.get(url, headers=hds())
        data = json.loads(r.text)
        dtt = pd.DataFrame(data["Result"])
        dtt = dtt.set_index('glid')
        print(dtt.loc[:, 'glmc'].tolist())
        #for idd in dtt.index:
        bkname = input("输入概念名称:")
        idd = dtt[dtt['glmc'] == bkname].index[0]
        url = 'http://emweb.securities.eastmoney.com/PC_HSF10/StockRelationship2/GetSameConceptStockRankList?code={0}&orderBy=1&typeId={1}&isAsc=false'.format(
            code, idd)

    #print(url)
    r = requests.get(url, headers=hds())
    text = r.text
    #print(text)
    data = json.loads(text)

    try:
        df = pd.DataFrame(data["Result"]["stockRandList"])
    except:
        df = pd.DataFrame(data["Result"])

    df = df.replace('--', np.nan)
    df = df.dropna(how='all', axis=1)

    return df
Esempio n. 14
0
def finance_share_news(code):
    """
    获取股票个股新闻的标题和内容
    --------------------------------
    Parameter:
             code:  股票代码,String like,600026
    Return:
             title:      标题
             url:       网址
             datetime:  新闻发布时间
             id:        识别号
             code:      与相关股份的相应
   
    """
    code = _set_code(code)
    url = 'http://news2.gtimg.cn/lishinews.php?name=finance_news&symbol={0}&page=1'.format(
        code)
    #print(url)
    r = requests.get(url, headers=hds())
    content = r.text
    pageN = re.findall('"total_page":\d+', content)
    pageNo = int(re.findall('\d+', pageN[0])[0]) + 1
    tem = []
    for i in range(pageNo):
        url = 'http://news2.gtimg.cn/lishinews.php?name=finance_news&symbol={0}&page={1}'.format(
            code, i)
        r = requests.get(url, headers=hds())
        content = r.content.decode('unicode-escape')  #decode('gb18030')
        r = content.split(':{"data":[')[1][:-1]
        r = r.split('}],')[0]
        newslist = r.split('},{')
        for news in newslist:
            news = news.split(',')
            tem1 = []
            for n in news:
                try:
                    #name=n.split('":"',1)[0]
                    value = n.split('":"', 1)[1]
                    value = value.replace('"', '')
                    #if '\\' in value:
                    value = value.replace('\\', '')
                    #print(u'%s'%value)
                    tem1.append(value)
                except:
                    pass
            tem.append(tem1)
    df = pd.DataFrame(tem)
    df.columns = ['title', 'url', 'datetime', 'id', 'code']
    return df
Esempio n. 15
0
def LawInterpretation(url='http://www.court.gov.cn/fabu-gengduo-16.html',
                      pgs=3):
    uls = []
    datasets = {}
    uls.append(url)
    if pgs > 1:
        for p in range(2, pgs + 1):
            url = 'http://www.court.gov.cn/fabu-gengduo-16.html?page=%s' % p
            uls.append(url)

    for url in uls:
        r = requests.get(url, headers=hds())
        #urls=[]
        txt = r.content.decode('utf8')
        html = lxml.html.parse(StringIO(txt))
        lis = html.xpath('//div[@id="container"]/div[@class="sec_list"]/ul/li')

        for li in lis:
            name = re.sub(r'\s*', '', li.xpath('a/@title')[0].strip())
            name = name.replace('“', '').replace('”',
                                                 '').replace(':', ':').replace(
                                                     '&nbsp', '')
            #print(name)
            tm = li.xpath('i/text()')[0]
            href = 'http://www.court.gov.cn' + li.xpath('a/@href')[0]
            #if name.startswith('指导案例'):
            name = tm + '_' + name
            datasets[name] = href

    for tt, ul in datasets.items():
        path = 'law/sikao/sifa/' + tt + '.txt'

        if not os.path.exists(path):
            rr = requests.get(ul, headers=hds())
            soup = BeautifulSoup(rr.text, 'lxml')
            txt = soup.find('div', attrs={'class', 'txt_txt'}).text
            print("getting file %s" % path)
            try:
                f = open(path, 'w', encoding='utf8')
                f.write(txt)
                f.close()
            except:
                f = open(path, 'w', encoding='gbk')
                f.write(txt)
                f.close()
            finally:
                f.close()
            time.sleep(0.1)
    return
Esempio n. 16
0
def get_drogan_tiger_qq(code=None, start=None, end=None):
    """查看龙虎榜的信息
    ------------------------
    code : 查询股票的代码
    start:开始查询的时间,20170101
    end  :截止查询的时间,20170630
    """
    if (code is None) and (start is None) and (end is None):
        url = 'http://stock.finance.qq.com/cgi-bin/sstock/q_lhb_js?t=0&c=&b=&e=&p=1&l=&ol=6&o=desc'
    if (start is not None) and (end is not None):
        if code is None:
            code = ''
            url = 'http://stock.finance.qq.com/cgi-bin/sstock/q_lhb_js?t=2&c={0}&b={1}&e={2}&p=1&l=&ol=6&o=desc'.format(
                code, start, end)
        else:
            url = 'http://stock.finance.qq.com/cgi-bin/sstock/q_lhb_js?t=1&c={0}&b={1}&e={2}&p=1&l=&ol=6&o=desc'.format(
                code, start, end)

    #print(url)
    r = requests.get(url, headers=hds())
    text = r.text
    df = _text2pd(text)

    tps = int(r.text.split(",_pages:")[1].split(",_num:")[0])
    tpss = tps + 1
    if tps > 1:
        for i in range(2, tpss):
            if (code is None) and (start is None) and (end is None):
                url = 'http://stock.finance.qq.com/cgi-bin/sstock/q_lhb_js?t=0&c=&b=&e=&p={0}&l=&ol=6&o=desc'.format(
                    i)
            if (start is not None) and (end is not None):
                if code is None:
                    code = ''
                    url = 'http://stock.finance.qq.com/cgi-bin/sstock/q_lhb_js?t=2&c={0}&b={1}&e={2}&p={3}&l=&ol=6&o=desc'.format(
                        code, start, end, i)
                else:
                    url = 'http://stock.finance.qq.com/cgi-bin/sstock/q_lhb_js?t=1&c={0}&b={1}&e={2}&p={3}&l=&ol=6&o=desc'.format(
                        code, start, end, i)
            r = requests.get(url, headers=hds())
            text = r.text
            df = df.append(_text2pd(text))

    df = df.drop(4, axis=1)
    df.columns = ['date', 'code', 'name', 'descrise', 'price', 'chg%']
    df['code'] = df['code'].map(lambda x: str(x).zfill(6))
    df = df.set_index('date')
    df = df.sort_index()
    df = df.applymap(lambda x: wt._tofl(x))
    return df
Esempio n. 17
0
    def hynewEM(self, code=None):
        if code is not None:
            self.setcode(code)
        df=wd.get_cashf_alltoday_EM()[['name', 'price', 'Rank.T',\
                                       'Chg%.T','industry', 'indu.ID']]
        hyid = df.loc[self.__code, 'indu.ID'][3:6]

        url = 'http://stock.eastmoney.com/hangye/hy{}.html'.format(hyid)
        r = requests.get(url, headers=hds())
        try:
            text = r.content.decode('gbk')
        except:
            text = r.text
        html = lxml.html.parse(StringIO(text))
        dataset1 = []

        hyinfo = html.xpath(
            '//div[@class="americaleft mt10"]/div[1]//div[@class="deta"]/ul/li'
        )

        for hy in hyinfo:
            text = hy.xpath('a/text()')[0]
            print(text)
            href = hy.xpath('a/@href')[0]
            dataset1.append([text, href])

        df = pd.DataFrame(dataset1, columns=['title', 'href'])

        dataset = []
        for url in df['href']:
            r = requests.get(url, headers=hds())
            #print(url)
            try:
                text = r.content.decode('utf8')
            except:
                text = r.text
            html = lxml.html.parse(StringIO(text))
            textc = html.xpath('//div[@id="ContentBody"]//text()')
            dataset.append('\n'.join(textc))

        data = [x.strip() for x in dataset]
        text = '\n'.join(data)
        text = text.replace('\r', '').replace(' ', '')
        text1 = re.sub(r'\n{1,}', r'\n\n', text)
        f = open('hynewsEM.txt', 'w')
        f.write(text1)
        f.close()
        return text1
Esempio n. 18
0
def HK_notice_qq(tick):
    """
    获取某家上市公司的公告资讯
    -------------------------------
    tick: 五位数的香港上市公司代码,如00005
    --
    return :
        DataFrame
    """
    tick = _set_code(tick)
    page = 1
    url = wt.notice_hk.format(tick, page)
    #print(url)
    r = requests.get(url, headers=hds())
    text = r.content.decode('utf8')
    #print(text)

    text = text.split('notice=')[1]
    data = json.loads(text)
    df = pd.DataFrame(data['data']['data'])
    df['time'] = df['time'].map(lambda x: x[:10])
    try:
        del df['symbols']
        del df['type']
        del df['url']
        del df['src']
        del df['id']
        del df['summary']
    except:
        pass
    df = df.set_index('time')
    return df
Esempio n. 19
0
def HK_news_qq(tick):
    """
    获取某家上市公司的新闻资讯
    -------------------------------
    tick: 五位数的香港上市公司代码,如00005
    --
    return :
        DataFrame
    """
    tick = _set_code(tick)
    page = 1
    url = wt.news_hk.format(tick, page)
    r = requests.get(url, headers=hds())
    text = r.content.decode('utf8')
    #print(text)

    text = text.split('news=')[1]
    data = json.loads(text)
    df = pd.DataFrame(data['data']['data'])
    try:
        del df['symbols']
        del df['type']
        del df['id']
    except:
        pass

    return df
Esempio n. 20
0
def get_cashfl_m163(code):
    """
    quotes.money.163.com
    获取股票的买卖现金流量
    """
    df = pd.DataFrame()

    url = 'http://quotes.money.163.com/trade/lszjlx_{0},0.html'.format(code)
    while True:
        try:

            r = requests.get(url, headers=hds())
            soup = BeautifulSoup(r.text, 'lxml')

            tb = soup.find('table', attrs={'class': "table_bg001 border_box"})
            df = df.append(pd.read_html(str(tb))[0])
            df = df.drop(0, axis=0)

            page = soup.find('a', text='下一页')
            url = 'http://quotes.money.163.com' + page.get('href')
        except:
            break

    df = df.set_index('日期')
    df = df.sort_index()
    df = df.applymap(lambda x: wc._tofl(x))

    try:
        df['股票代码'] = df['股票代码'].map(lambda x: x.replace("'", ''))
    except:
        pass

    return df
Esempio n. 21
0
def get_mk_data_m163(code):
    """
    获取股票的历史交易数据
    """
    df = pd.DataFrame()
    ty = dt.datetime.today().year
    #years=range(1998,ty+1)
    years = range(ty - 3, ty + 1)
    for year in years:
        for season in [1, 2, 3, 4]:
            url = 'http://quotes.money.163.com/trade/lsjysj_{2}.html?year={0}&season={1}'.format(
                year, season, code)
            try:
                #print(url)
                r = requests.get(url, headers=hds())
                soup = BeautifulSoup(r.text, 'lxml')
                tb = soup.find(
                    attrs={"class": "table_bg001 border_box limit_sale"})
                df = df.append(pd.read_html(str(tb))[0])
                #print(df.head())
            except:
                pass
    df = df.drop(0, axis=0)
    df = df.set_index('日期')
    df = df.sort_index()
    df = df.applymap(lambda x: wc._tofl(x))
    df['股票代码'] = df['股票代码'].map(lambda x: x.replace("'", ''))
    return df
Esempio n. 22
0
def get_cashflhy_m163(code):
    """
    同行业的涨跌幅前10只股票的资金流向情况
    type:jc---表示流出
         zc---表示流入
    """
    Df = pd.DataFrame()
    for mtype in ['zc', 'jc']:
        url = "http://quotes.money.163.com/service/zjlx_table.html?symbol={0}&type={1}".format(
            code, mtype)

        r = requests.get(url, headers=hds())
        text = r.text

        df = pd.read_html(text)[0]
        df = df.drop(0, axis=0)
        Df = Df.append(df)

    Df = Df.drop('排名', axis=1)
    Df.columns = [
        'name', 'price', 'chg', 'turnover', 'amount', 'out_amount', 'in_amout',
        'net_amount'
    ]
    Df = Df.reset_index(drop=True)
    return Df
Esempio n. 23
0
def get_cashf_sharehist_min_EM(code, day=today):
    """获得某只股票的资金流入流出情况,按1min中进行统计,
    day为交易日的条数
    bkname 为板块的名称
    """
    if code[0] in ['6', '9']:
        code = code + '1'
    elif code[0] in ['0', '2', '3']:
        code = code + '2'

    url = 'http://ff.eastmoney.com/EM_CapitalFlowInterface/api/js?id={0}&type=ff&check=MLBMS&cb=var%20aff_data={1}&js={2}&rtntype=3&acces_token=1942f5da9b46b069953c873404aad4b5'.format(
        code, day, '{(x)}')

    r = requests.get(url, headers=hds())
    pc = '=' + day
    text = r.text.split(pc)[1]
    #print(text)
    data = json.loads(text)
    dataset = []
    for i in data['ya']:
        dataset.append(i.split(','))

    df = pd.DataFrame(dataset)

    index = data['xa'].split(',')
    index = index[:df.shape[0]]
    df.index = index
    df = df.replace('', np.nan)
    df = df.dropna(how='all', axis=0)
    df.columns = ['Main', 'Super', 'Big', 'Middle', 'Small']
    df = df.applymap(lambda x: wt._tofl(x))
    return df
Esempio n. 24
0
def grow(year,quarter):
    pn=1
    DF=pd.DataFrame()
    ws._write_head()
    while True:
        try:
            ws._write_console()
            url='http://vip.stock.finance.sina.com.cn/q/go.php/vFinanceAnalyze/kind/grow/index.phtml?s_i=&s_a=&s_c=&reportdate={0}&quarter={1}&p={2}'.format(year,quarter,pn)
            r=requests.get(url,headers=hds())
            r=r.content.decode('gbk')
            html=BeautifulSoup(r,'lxml')
            text=html.find(id='dataTable')
            df=pd.read_html(str(text),header=0)[0]
            if df.empty is True:
                break
            else:
                pn = pn + 1
                DF =DF.append(df)
        except:
            break
    DF=DF.applymap(lambda x:np.where(x=='--',np.nan,x))
    DF=DF.set_index('股票代码')
    DF.index=DF.index.map(lambda x: str(x).split('.')[0].zfill(6))
    DF['date']=str(year)+'_'+str(quarter).zfill(2)
    name=list(DF.columns)
    name.remove('股票名称')
    name.remove('date')
    for label in name:
        DF[label]=DF[label].astype(float)     
    return DF
Esempio n. 25
0
def get_text(url):
    """
    获取网址对应的标题、和内容。
    --------------
    url:输入的网址
    -----------------
    return:
          title:文章的标题
               :相应的网址内容。
    """
    html = requests.get(url, headers=hds())
    con = lxml.html.parse(StringIO(html.text))

    #ti=con.xpath("/h1/text()")
    tt = con.xpath('//div[@id="Cnt-Main-Article-QQ"]/p')
    textset = []
    for t in tt:
        try:
            text = t.xpath('text()')[0]
            text = text.strip()
            textset.append(text)
        except Exception as e:
            #print(e)
            pass
    return '\n\n'.join(textset)
Esempio n. 26
0
def get_cashf_sharehist_EM(code):
    """个股的历史资金流向,资金流入金额的单位均为万元
    --------------------------------------
    Main.I.net :今日主力净流入净额
    Main.I.R  :今日主力净流入净占比
    """
    if code[0] in ['6', '9']:
        code = code + '1'
    if code[0] in ['0', '2', '3']:
        code = code + '2'

    url = 'http://ff.eastmoney.com//EM_CapitalFlowInterface/api/js?type=hff&rtntype=2&check=TMLBMSPROCR&acces_token=1942f5da9b46b069953c873404aad4b5&id={0}'.format(
        code)

    r = requests.get(url, headers=hds())
    text = r.text.replace('(["', '').replace('"])', '')
    text = text.replace('","', '\n')
    df = pd.read_csv(StringIO(text), header=None)
    df.columns = [
        'date', 'Main.I.net', 'Main.I.R', 'Su.I.net', 'Su.I.R', 'Big.I.net',
        'Big.I.R', 'Mid.I.net', 'Mid.I.R', 'Sm.I.net', 'Sm.I.R', 'price',
        'Chg%'
    ]

    df = df.applymap(lambda x: wt._tofl(x))
    df = df.set_index('date')
    return df
Esempio n. 27
0
def get_shares_GroupbyClassify_EM(concept):
    
    fp='webdata/puse/eastmpy/classified.pkl'
    f=open(fp,'rb')
    classify=pickle.load(f)
    f.close()

    if concept in classify.keys():
        url='http://nufm.dfcfw.com/EM_Finance2014NumericApplication/JS.aspx?type=CT&cmd=C.{0}1&sty=FCOIATA&sortType=(ChangePercent)&sortRule=-1&page=1&pageSize=1000&js={1}&token=7bc05d0d4c3c22ef9fca8c2a912d779c'.format(classify[concept],'{%22rank%22:[(x)],%22pages%22:(pc),%22total%22:(tot)}')
        r=requests.get(url,headers=hds())
        dtext=json.loads(r.text)
        dd='\n'.join(dtext['rank'])
        df=pd.read_csv(StringIO(dd),header=None)        
        df=df.drop([0,13,14,15,16,18,17,19,20],axis=1)
        df.columns=['code','name','close','chg','chgp%','zhengfu%','volume','amount','pre_close','open','high','low','chgp_in_5m%','liangbi','turnover%','PE','listed_date']
        df['code']=df['code'].map(lambda x:str(x).zfill(6))
        df=df.set_index('code')
        df=df.replace('-',np.nan)
        for label in df.columns:
            try:
                df[label]=df[label].astype(float)
            except:
                df[label]=df[label].map(lambda x: wt._tofl(x))        
        return df
    else:
        print("%s not in classify......"%concept)
        print(classify.keys())
        sys.exit()
Esempio n. 28
0
def get_financeindex_f9_EM(code, mtype='zhzb'):
    """
    code:公司股票代码
    mtype:类型,有zhzb:按报告期,djzb按单季报告
    """
    if code[0] in ['6', '9']:
        code = code + '01'
    elif code[0] in ['0', '2', '3']:
        code = code + '02'

    url = f9url[mtype].format(code)
    print(url)
    r = requests.get(url, headers=hds())

    text = r.content.decode('utf8')
    soup = BeautifulSoup(text, 'lxml')
    tbs = soup.find_all('table')
    df = pd.read_html(str(tbs[0]))[0]
    df.columns = df.iloc[0, :].tolist()
    df = df.drop(0, axis=0)
    df = df.set_index('报告期日期')
    #df=df.applymap(lambda x:wt._tofl(x))
    df = df.dropna(how='all', axis=0)
    df = df.T
    df = df.sort_index()
    return df
Esempio n. 29
0
def get_dadan(code, opt=4):
    """
    默认是以400手
    Parameters:
             code:  上海和深圳的股票代码
             opt:   1-9:的数字代表成交量,分别为100手,
                    200手,300,400,500,800,1000,1500,2000手
                    10-13:代表成交额大于等于 100万元、200、500、1000万元
   Return:
         time:      股票成交的时间
         price:     股票成交的价格(元)
         volumn     股票成交的股数(手,即100股的整数倍)
         amount:    股票成交金额(万元)
         type:      买(B)或卖(S)
    """

    code = _set_code(code)
    url = 'http://stock.finance.qq.com/sstock/list/view/dadan.php?t=js&c={0}&max=800000&p=1&opt={1}&o=0'.format(
        code, opt)
    r = requests.get(url, headers=hds())
    r = r.text.split(",'")[1]
    r = r.split("']")[0]
    r = r.split('^')
    dataset = []
    for data in r:
        #print(data)
        data = data.split('~')
        dataset.append(data)
    df = pd.DataFrame(dataset)
    df = df.drop(0, axis=1)
    df.columns = ['time', 'price', 'volume', 'amount', 'type']
    for label in ['price', 'volume', 'amount']:
        df[label] = df[label].astype(float)
    return df
Esempio n. 30
0
def get_cashf_concepthist_EM(bkname):
    """板块的历史资金流向,资金流入金额的单位均为万元
    --------------------------------------
    Main.I.net :今日主力净流入净额
    Main.I.R  :今日主力净流入净占比
    """
    try:
        df = pd.read_pickle('output/eastmsource/bk.pkl')
    except:
        df = pd.read_csv('output/eastmsource/bk.csv', encoding='gbk')
    idd = df[df['name'] == bkname]['id'].tolist()[0]
    #idd=df.loc[bkname,'id']
    url = 'http://ff.eastmoney.com//EM_CapitalFlowInterface/api/js?type=hff&rtntype=2&js={1}&cb=var%20aff_data=&check=TMLBMSPROCR&acces_token=1942f5da9b46b069953c873404aad4b5&id={0}1&_=1502340432743'.format(
        idd, '{%22data%22:(x)}')

    r = requests.get(url, headers=hds())
    text = r.text.split('{"data":["')[1].replace('(["', '').replace('"])', '')
    text = text.replace('"]}', '')
    text = text.replace('","', '\n')
    #print(text)
    df = pd.read_csv(StringIO(text), header=None)
    df.columns = [
        'date', 'Main.I.net', 'Main.I.R', 'Su.I.net', 'Su.I.R', 'Big.I.net',
        'Big.I.R', 'Mid.I.net', 'Mid.I.R', 'Sm.I.net', 'Sm.I.R', 'price',
        'Chg%'
    ]

    df = df.applymap(lambda x: wt._tofl(x))
    df = df.set_index('date')
    return df