def qqfinance_InSt(code): """获取某只股票的财务指标 Parameter: code:上海、深圳交易所的股票代码 ------------------------------------- Return: DataFrame: 单位均为元(万元或元)。 """ url = 'http://stock.finance.qq.com/corp1/inst.php?zqdm={0}'.format(code) html = requests.get(url, headers=hds()) h = lxml.html.parse(StringIO(html.text)) urls = h.xpath('//div[@class="right"]/table[2]/tr/td/a/@href') AT = pd.DataFrame() for url in urls: html = requests.get(url, headers=hds()) soup = BeautifulSoup(html.text, 'lxml') table = soup.find_all('table', attrs={'class', 'list'}) df = pd.read_html(str(table))[0] df = df.dropna(how='all', axis=1) df = df.T df.columns = df.ix[0, :] df = df.drop(0, axis=0) AT = AT.append(df) AT = AT.set_index('报表日期') AT = AT.dropna(how='all', axis=1) AT = AT.applymap(lambda x: _str2fl(x)) AT = AT.sort_index() return AT
def get_holdernum_change_EM(year,qu): """reportdate='2017-06-30',所有股票的股东户数 """ if year is None: reportdate='' else: reportdate=quarter[qu]%year url='http://data.eastmoney.com/DataCenter_V3/gdhs/GetList.ashx?reportdate={0}&market=&changerate==&range==&pagesize=50000&page=1&sortRule=-1&sortType=NoticeDate'.format(reportdate) r=requests.get(url,headers=hds()) text=r.text data=json.loads(text) tp=data['pages'] df=pd.DataFrame(data['data']) if tp>1: for i in range(2,tp+1): url='http://data.eastmoney.com/DataCenter_V3/gdhs/GetList.ashx?reportdate={0}&market=&changerate==&range==&pagesize=50000&page={1}&sortRule=-1&sortType=NoticeDate'.format(reportdate,i) r=requests.get(url,headers=hds()) text=r.text data=json.loads(text) df=df.append(pd.DataFrame(data['data'])) return df
def getlist(url, page=2): r = requests.get(url, headers=hds()) txt = r.content.decode('utf8') #print(txt) html = lxml.html.parse(StringIO(txt)) lis = html.xpath('//div[@id="container"]/div[@class="sec_list"]/ul/li') datasets = {} for li in lis: name = re.sub(r'\s*', '', li.xpath('a/text()')[0].strip()) name = name.replace('“', '').replace('”', '') date = li.xpath('i/text()')[0] name = date + '_' + name #print(name) href = 'http://www.court.gov.cn' + li.xpath('a/@href')[0] #if name.endswith('指导性案例'): datasets[name] = href for i in range(2, page + 1): urll = url + '?page=%s' % i r = requests.get(urll, headers=hds()) txt = r.content.decode('utf8') #print(txt) html = lxml.html.parse(StringIO(txt)) lis = html.xpath('//div[@id="container"]/div[@class="sec_list"]/ul/li') for li in lis: name = re.sub(r'\s*', '', li.xpath('a/text()')[0].strip()) name = name.replace('“', '').replace('”', '') date = li.xpath('i/text()')[0] name = date + '_' + name #print(name) href = 'http://www.court.gov.cn' + li.xpath('a/@href')[0] #if name.endswith('指导性案例'): datasets[name] = href return datasets
def get_holder_analys_EM(year,qu): """股东持股明细,reportdate='2017-06-30' """ reportdate=quarter[qu]%year url="http://dcfm.eastmoney.com//em_mutisvcexpandinterface/api/js/get?type=NSHDDETAILLA&token=70f12f2f4f091e459a279469fe49eca5&cmd=&st=NDATE,SCODE,RANK&sr=1&p=1&ps=5000&js={1}&filter=(RDATE=%27^{0}^%27)".format(reportdate,'{%22pages%22:(tp),%22data%22:(x)}') #print(url) r=requests.get(url,headers=hds()) text=r.text text=text.replace('pages:','"pages":').replace("data:",'"data":') #print(text[:30]) data=json.loads(text) df=pd.DataFrame(data['data']) tp=data['pages'] if tp>1: for i in range(2,tp+1): url="http://dcfm.eastmoney.com//em_mutisvcexpandinterface/api/js/get?type=NSHDDETAILLA&token=70f12f2f4f091e459a279469fe49eca5&cmd=&st=NDATE,SCODE,RANK&sr=1&p={2}&ps=5000&js={1}&filter=(RDATE=%27^{0}^%27)".format(reportdate,'{%22pages%22:(tp),%22data%22:(x)}',i) r=requests.get(url,headers=hds()) text=r.text text=text.replace('pages:','"pages":').replace("data:",'"data":') data=json.loads(text) df=df.append(pd.DataFrame(data['data'])) df['NDATE']=df['NDATE'].map(lambda x:x[:10]) df['RDATE']=df['RDATE'].map(lambda x:x[:10]) df=df.replace('-',np.nan) df=df.drop(['SHAREHDCODE','COMPANYCODE', 'SHARESTYPE', 'LTAG'],axis=1) df=df.applymap(lambda x:wt._tofl(x)) df['SCODE']=df['SCODE'].map(lambda x:str(x).split('.')[0].zfill(6)) return df
def get_holders_num_qq(rpdate='2017-06-30'): url = 'http://web.ifzq.gtimg.cn/fund/zcjj/zcjj/allzc?colum=3&order=desc&page=1&pagesize=50&bgrq={0}&_var=v_jjcg'.format( rpdate) r = requests.get(url, headers=hds()) text = r.text.split("jjcg=")[1] data = json.loads(text) df = pd.DataFrame(data['data']['data']) tpage = data['data']['totalPages'] if tpage > 1: tpage = tpage + 1 for i in range(2, tpage): url = 'http://web.ifzq.gtimg.cn/fund/zcjj/zcjj/allzc?colum=3&order=desc&page={1}&pagesize=50&bgrq={0}&_var=v_jjcg'.format( rpdate, i) r = requests.get(url, headers=hds()) text = r.text.split("jjcg=")[1] data = json.loads(text) df = df.append(pd.DataFrame(data['data']['data'])) df.columns = [ 'Total.Num(10K)', 'change(10K)', 'inst.Num', 'Curr.Rate%', 'code', 'name' ] df['date'] = rpdate df['code'] = df['code'].map(lambda x: str(x).zfill(6)) return df
def get_search_inst_num_EM(code=None): """机构调研列表或个股机构调研列表 """ if code is None: url='http://data.eastmoney.com/DataCenter_V3/jgdy/gsjsdy.ashx?pagesize=5000&page=1' else: url='http://data.eastmoney.com/DataCenter_V3/jgdy/gsjsdy.ashx?pagesize=5000&page=1&code={0}'.format(code) r=requests.get(url,headers=hds()) #print(url) text=r.text data=json.loads(text) df=pd.DataFrame(data['data']) tp=data['pages'] if tp>1: for i in range(2,tp+1): if code is None: url='http://data.eastmoney.com/DataCenter_V3/jgdy/gsjsdy.ashx?pagesize=5000&page={0}'.format(i) else: url='http://data.eastmoney.com/DataCenter_V3/jgdy/gsjsdy.ashx?pagesize=5000&page={1}&code={0}'.format(code,i) r=requests.get(url,headers=hds()) text=r.text data=json.loads(text) df=df.append(pd.DataFrame(data['data'])) if df.empty is False: del df['CompanyCode'] #df=df.applymap(lambda x:wt._tofl(x)) #df['SCode']=df['SCode'].map(lambda x:str(x).zfill(6)) return df
def get_tick_today_EM(code, mtype=0): if code[0] in ['6', '9']: code = code + '1' if code[0] in ['0', '2', '3']: code = code + '2' url = 'http://hqdigi2.eastmoney.com/EM_Quote2010NumericApplication/CompatiblePage.aspx?Type=OB&stk={0}&Reference=xml&limit={1}&page=1'.format( code, mtype) r = requests.get(url, headers=hds()) text = r.text.split('=')[1] data = text.split('data:["')[1].replace('"]};', '').replace('","', '\n') df = pd.read_csv(StringIO(data), header=None) page = text.split('{pages:')[1].split(',data:')[0] page = int(page) if page > 1: for i in range(2, page + 1): url = 'http://hqdigi2.eastmoney.com/EM_Quote2010NumericApplication/CompatiblePage.aspx?Type=OB&stk={0}&Reference=xml&limit={1}&page={2}'.format( code, mtype, i) r = requests.get(url, headers=hds()) text = r.text.split('=')[1] data = text.split('data:["')[1].replace('"]};', '').replace('","', '\n') df = df.append(pd.read_csv(StringIO(data), header=None)) df.columns = ['time', 'price', 'volume', 'type'] df = df.applymap(lambda x: wt._tofl(x)) return df
def get_sharesholded_change_EM(code=None,mtype='all'): """股东对股票的增减持情况 ------------------------- mtype: all--所有的变动;jjc--减持;jzc--增持。 """ if code is None: code='' url='http://data.eastmoney.com/DataCenter_V3/gdzjc.ashx?pagesize=5000&page=1¶m=&sortRule=-1&sortType=BDJZ&tabid={1}&code={0}&name='.format(code,mtype) r=requests.get(url,headers=hds()) text=r.text.split(',data:["')[1].split('"] ,"url')[0] text=text.replace('","','\n') df=pd.read_csv(StringIO(text),header=None) tp=r.text.split(",data:[")[0].split("pages:")[1] tp=int(tp) if tp>1: for i in range(2,tp+1): url='http://data.eastmoney.com/DataCenter_V3/gdzjc.ashx?pagesize=5000&page={1}¶m=&sortRule=-1&sortType=BDJZ&tabid=all&code={0}&name='.format(code,i) r=requests.get(url,headers=hds()) text=r.text.split(',data:["')[1].split('"] ,"url')[0] text=text.replace('","','\n') df=df.append(pd.read_csv(StringIO(text),header=None)) #df=df.drop(16,axis=1) df.columns=['code','name','price','chg','holder','type','chg_num','chg.cur.rate','source','hold_num','tt.rate','hold_cur_num','cur.rate','start','end','date','chg.tt.rate'] df=df.applymap(lambda x:wt._tofl(x)) df['code']=df['code'].map(lambda x:str(x).zfill(6)) df=df.reset_index(drop=True) return df
def qqhyxw(code): """ 获取股票所在行业新闻的标题 """ url = 'http://stockhtm.finance.qq.com/sstock/ggcx/{0}.shtml'.format(code) html = requests.get(url, headers=hds()) soup = BeautifulSoup(html.content, 'lxml') dataset = [] url = soup.find_all('a', text='行业新闻')[0].get('href') html = requests.get(url) content = html.content.decode('gbk') text = lxml.html.parse(StringIO(content)) _read_table(text, dataset) for _ in range(11): soup = BeautifulSoup(content, 'lxml') try: nexturl = soup.find('a', text='下一页').get('href') html = requests.get(nexturl, headers=hds()) content = html.content.decode('gbk') text = lxml.html.parse(StringIO(content)) _read_table(text, dataset) except Exception as e: print(e) df = pd.DataFrame(dataset) df.columns = ['name', 'href', 'datetime'] return df
def get_pepb_Sina(): dff = pd.DataFrame() url = 'http://money.finance.sina.com.cn/d/api/openapi_proxy.php/?__s=[[%22hq%22,%22hs_a%22,%22%22,0,1,500]]&callback=FDC_DC.theTableData' r = requests.get(url, headers=hds()) text = r.text.split('theTableData(')[1] text = text.replace(')\n', '') d = json.loads(text) df = pd.DataFrame(d[0]['items']) df.columns = d[0]['fields'] dff = dff.append(df) pageNo = 2 while True: url = 'http://money.finance.sina.com.cn/d/api/openapi_proxy.php/?__s=[[%22hq%22,%22hs_a%22,%22%22,0,{0},500]]&callback=FDC_DC.theTableData'.format( pageNo) #print(url) r = requests.get(url, headers=hds()) text = r.text.split('theTableData(')[1] text = text.replace(')\n', '') d = json.loads(text) if len(d[0]['items']) < 1: print('Exit ....') break df = pd.DataFrame(d[0]['items']) df.columns = d[0]['fields'] dff = dff.append(df) pageNo = pageNo + 1 dff['date'] = d[0]['day'] dff = dff.drop(["symbol", "favor", "guba"], axis=1) dff = dff.set_index('code') return dff
def get_financeindex_all_THS(year, qt): yqt = seasons[qt] % year #print(yqt) url = 'http://data.10jqka.com.cn/financial/yjgg/date/{0}/board/ALL/field/DECLAREDATE/order/desc/page/1/ajax/1/'.format( yqt) r = requests.get(url, timeout=10, headers=hds()) text = r.text html = lxml.html.parse(StringIO(text)) res = html.xpath( '//table[@class="m-table J-ajax-table J-canvas-table"]/tbody//tr') if PY3: sarr = [etree.tostring(node).decode('utf-8') for node in res] else: sarr = [etree.tostring(node) for node in res] sarr = ''.join(sarr) sarr = '<table>%s</table>' % sarr df = pd.read_html(sarr)[0] pages = html.xpath('//div[@class="m-page J-ajax-page"]//span/text()') pages = int(pages[0].split('/')[1]) #print('Total pages %s'%pages) if pages > 1: for i in range(2, pages + 1): url = 'http://data.10jqka.com.cn/financial/yjgg/date/{0}/board/ALL/field/DECLAREDATE/order/desc/page/{1}/ajax/1/'.format( yqt, i) r = requests.get(url, timeout=10, headers=hds()) #print(url) text = r.text html = lxml.html.parse(StringIO(text)) res = html.xpath( '//table[@class="m-table J-ajax-table J-canvas-table"]/tbody//tr' ) if PY3: sarr = [etree.tostring(node).decode('utf-8') for node in res] else: sarr = [etree.tostring(node) for node in res] sarr = ''.join(sarr) sarr = '<table>%s</table>' % sarr df = df.append(pd.read_html(sarr)[0]) #print(df.tail()) df = df.drop([0, 15], axis=1) df.columns = [ 'code', 'name', 'pdate', 'rev', 'rev_yoy', 'rev_hb', 'profit', 'profit_yoy', 'profit_hb', 'eps', 'nav', 'roe', 'cf_ps', 'margin' ] df = df.applymap(lambda x: _str2fl(x)) df['code'] = df['code'].map(lambda x: str(x).zfill(6)) df = df.set_index('code') return df
def get_forcast_qq(code=None, year=2017, qu=3, mtype=0): """获取企业的业绩预告信息 code:股票代码6个字符 report:报告期,like 20170630 mtype: 类型,0-全部的业绩预告,1-预增的业绩预告,2-预减的业绩预告, 3-预盈的业绩预告,4-预亏的业绩预告,5-大幅上升的业绩预告, 6-大幅下降的业绩预告,7-扭亏的业绩预告,8-减亏的业绩预告, 9-无大变的业绩预告,10-不确定的业绩预告 """ report = quarter[qu] % year if code is None: url = 'http://message.finance.qq.com/stock/jbnb/get_yjyg.php?gpdm=&type={1}&bgq={0}&p=1&lmt=50&sort_name=ggrq&order=desc'.format( report, mtype) elif isinstance(code, str): url = 'http://message.finance.qq.com/stock/jbnb/get_yjyg.php?gpdm={0}&type={2}&bgq={1}&p=1&lmt=50&sort_name=ggrq&order=desc'.format( code, report, mtype) r = requests.get(url, headers=hds()) #print(url) text = r.text.split("yjyg=")[1] data = json.loads(text) df = pd.DataFrame(data['data']['data']) tpage = data['data']['totalPages'] if tpage > 1: tpage = tpage + 1 for i in range(2, tpage): if code is None: url = 'http://message.finance.qq.com/stock/jbnb/get_yjyg.php?gpdm=&type={1}&bgq={0}&p={2}&lmt=50&sort_name=ggrq&order=desc'.format( report, mtype, i) elif isinstance(code, str): url = 'http://message.finance.qq.com/stock/jbnb/get_yjyg.php?gpdm={0}&type={3}&bgq={1}&p={2}&lmt=50&sort_name=ggrq&order=desc'.format( code, report, i, mtype) r = requests.get(url, headers=hds()) text = r.text.split("yjyg=")[1] data = json.loads(text) df = df.append(pd.DataFrame(data['data']['data'])) try: df.columns = [ 'report.D', 'publish.D', 'code', 'name', 'eps_last', 'type', 'describe' ] df['code'] = df['code'].map(lambda x: str(x).zfill(6)) except: pass df = df.applymap(lambda x: wt._tofl(x)) return df
def get_sharesGroupby_f10_EM(code, mtype='hy'): """ 所属行业、概念、地域中的分析、收集同类股票信息 -------------------------- code:为股票代码,为6位数 """ if code[0] in ['6', '9']: code = 'SH' + code if code[0] in ['0', '2', '3']: code = 'SZ' + code #hy if mtype == 'hy': url = 'http://emweb.securities.eastmoney.com/PC_HSF10/StockRelationship/StockRelationshipAjax?code={0}&orderBy=1&isAsc=false'.format( code) #dy if mtype == 'dy': url = 'http://emweb.securities.eastmoney.com/PC_HSF10/StockRelationship3/StockRelationshipAjax?code={0}&orderBy=1&isAsc=false'.format( code) #gn if mtype == 'gn': url = 'http://emweb.securities.eastmoney.com/PC_HSF10/StockRelationship2/GetConceptList?code={0}&orderBy=1&isAsc=true&type=1'.format( code) r = requests.get(url, headers=hds()) data = json.loads(r.text) dtt = pd.DataFrame(data["Result"]) dtt = dtt.set_index('glid') print(dtt.loc[:, 'glmc'].tolist()) #for idd in dtt.index: bkname = input("输入概念名称:") idd = dtt[dtt['glmc'] == bkname].index[0] url = 'http://emweb.securities.eastmoney.com/PC_HSF10/StockRelationship2/GetSameConceptStockRankList?code={0}&orderBy=1&typeId={1}&isAsc=false'.format( code, idd) #print(url) r = requests.get(url, headers=hds()) text = r.text #print(text) data = json.loads(text) try: df = pd.DataFrame(data["Result"]["stockRandList"]) except: df = pd.DataFrame(data["Result"]) df = df.replace('--', np.nan) df = df.dropna(how='all', axis=1) return df
def finance_share_news(code): """ 获取股票个股新闻的标题和内容 -------------------------------- Parameter: code: 股票代码,String like,600026 Return: title: 标题 url: 网址 datetime: 新闻发布时间 id: 识别号 code: 与相关股份的相应 """ code = _set_code(code) url = 'http://news2.gtimg.cn/lishinews.php?name=finance_news&symbol={0}&page=1'.format( code) #print(url) r = requests.get(url, headers=hds()) content = r.text pageN = re.findall('"total_page":\d+', content) pageNo = int(re.findall('\d+', pageN[0])[0]) + 1 tem = [] for i in range(pageNo): url = 'http://news2.gtimg.cn/lishinews.php?name=finance_news&symbol={0}&page={1}'.format( code, i) r = requests.get(url, headers=hds()) content = r.content.decode('unicode-escape') #decode('gb18030') r = content.split(':{"data":[')[1][:-1] r = r.split('}],')[0] newslist = r.split('},{') for news in newslist: news = news.split(',') tem1 = [] for n in news: try: #name=n.split('":"',1)[0] value = n.split('":"', 1)[1] value = value.replace('"', '') #if '\\' in value: value = value.replace('\\', '') #print(u'%s'%value) tem1.append(value) except: pass tem.append(tem1) df = pd.DataFrame(tem) df.columns = ['title', 'url', 'datetime', 'id', 'code'] return df
def LawInterpretation(url='http://www.court.gov.cn/fabu-gengduo-16.html', pgs=3): uls = [] datasets = {} uls.append(url) if pgs > 1: for p in range(2, pgs + 1): url = 'http://www.court.gov.cn/fabu-gengduo-16.html?page=%s' % p uls.append(url) for url in uls: r = requests.get(url, headers=hds()) #urls=[] txt = r.content.decode('utf8') html = lxml.html.parse(StringIO(txt)) lis = html.xpath('//div[@id="container"]/div[@class="sec_list"]/ul/li') for li in lis: name = re.sub(r'\s*', '', li.xpath('a/@title')[0].strip()) name = name.replace('“', '').replace('”', '').replace(':', ':').replace( ' ', '') #print(name) tm = li.xpath('i/text()')[0] href = 'http://www.court.gov.cn' + li.xpath('a/@href')[0] #if name.startswith('指导案例'): name = tm + '_' + name datasets[name] = href for tt, ul in datasets.items(): path = 'law/sikao/sifa/' + tt + '.txt' if not os.path.exists(path): rr = requests.get(ul, headers=hds()) soup = BeautifulSoup(rr.text, 'lxml') txt = soup.find('div', attrs={'class', 'txt_txt'}).text print("getting file %s" % path) try: f = open(path, 'w', encoding='utf8') f.write(txt) f.close() except: f = open(path, 'w', encoding='gbk') f.write(txt) f.close() finally: f.close() time.sleep(0.1) return
def get_drogan_tiger_qq(code=None, start=None, end=None): """查看龙虎榜的信息 ------------------------ code : 查询股票的代码 start:开始查询的时间,20170101 end :截止查询的时间,20170630 """ if (code is None) and (start is None) and (end is None): url = 'http://stock.finance.qq.com/cgi-bin/sstock/q_lhb_js?t=0&c=&b=&e=&p=1&l=&ol=6&o=desc' if (start is not None) and (end is not None): if code is None: code = '' url = 'http://stock.finance.qq.com/cgi-bin/sstock/q_lhb_js?t=2&c={0}&b={1}&e={2}&p=1&l=&ol=6&o=desc'.format( code, start, end) else: url = 'http://stock.finance.qq.com/cgi-bin/sstock/q_lhb_js?t=1&c={0}&b={1}&e={2}&p=1&l=&ol=6&o=desc'.format( code, start, end) #print(url) r = requests.get(url, headers=hds()) text = r.text df = _text2pd(text) tps = int(r.text.split(",_pages:")[1].split(",_num:")[0]) tpss = tps + 1 if tps > 1: for i in range(2, tpss): if (code is None) and (start is None) and (end is None): url = 'http://stock.finance.qq.com/cgi-bin/sstock/q_lhb_js?t=0&c=&b=&e=&p={0}&l=&ol=6&o=desc'.format( i) if (start is not None) and (end is not None): if code is None: code = '' url = 'http://stock.finance.qq.com/cgi-bin/sstock/q_lhb_js?t=2&c={0}&b={1}&e={2}&p={3}&l=&ol=6&o=desc'.format( code, start, end, i) else: url = 'http://stock.finance.qq.com/cgi-bin/sstock/q_lhb_js?t=1&c={0}&b={1}&e={2}&p={3}&l=&ol=6&o=desc'.format( code, start, end, i) r = requests.get(url, headers=hds()) text = r.text df = df.append(_text2pd(text)) df = df.drop(4, axis=1) df.columns = ['date', 'code', 'name', 'descrise', 'price', 'chg%'] df['code'] = df['code'].map(lambda x: str(x).zfill(6)) df = df.set_index('date') df = df.sort_index() df = df.applymap(lambda x: wt._tofl(x)) return df
def hynewEM(self, code=None): if code is not None: self.setcode(code) df=wd.get_cashf_alltoday_EM()[['name', 'price', 'Rank.T',\ 'Chg%.T','industry', 'indu.ID']] hyid = df.loc[self.__code, 'indu.ID'][3:6] url = 'http://stock.eastmoney.com/hangye/hy{}.html'.format(hyid) r = requests.get(url, headers=hds()) try: text = r.content.decode('gbk') except: text = r.text html = lxml.html.parse(StringIO(text)) dataset1 = [] hyinfo = html.xpath( '//div[@class="americaleft mt10"]/div[1]//div[@class="deta"]/ul/li' ) for hy in hyinfo: text = hy.xpath('a/text()')[0] print(text) href = hy.xpath('a/@href')[0] dataset1.append([text, href]) df = pd.DataFrame(dataset1, columns=['title', 'href']) dataset = [] for url in df['href']: r = requests.get(url, headers=hds()) #print(url) try: text = r.content.decode('utf8') except: text = r.text html = lxml.html.parse(StringIO(text)) textc = html.xpath('//div[@id="ContentBody"]//text()') dataset.append('\n'.join(textc)) data = [x.strip() for x in dataset] text = '\n'.join(data) text = text.replace('\r', '').replace(' ', '') text1 = re.sub(r'\n{1,}', r'\n\n', text) f = open('hynewsEM.txt', 'w') f.write(text1) f.close() return text1
def HK_notice_qq(tick): """ 获取某家上市公司的公告资讯 ------------------------------- tick: 五位数的香港上市公司代码,如00005 -- return : DataFrame """ tick = _set_code(tick) page = 1 url = wt.notice_hk.format(tick, page) #print(url) r = requests.get(url, headers=hds()) text = r.content.decode('utf8') #print(text) text = text.split('notice=')[1] data = json.loads(text) df = pd.DataFrame(data['data']['data']) df['time'] = df['time'].map(lambda x: x[:10]) try: del df['symbols'] del df['type'] del df['url'] del df['src'] del df['id'] del df['summary'] except: pass df = df.set_index('time') return df
def HK_news_qq(tick): """ 获取某家上市公司的新闻资讯 ------------------------------- tick: 五位数的香港上市公司代码,如00005 -- return : DataFrame """ tick = _set_code(tick) page = 1 url = wt.news_hk.format(tick, page) r = requests.get(url, headers=hds()) text = r.content.decode('utf8') #print(text) text = text.split('news=')[1] data = json.loads(text) df = pd.DataFrame(data['data']['data']) try: del df['symbols'] del df['type'] del df['id'] except: pass return df
def get_cashfl_m163(code): """ quotes.money.163.com 获取股票的买卖现金流量 """ df = pd.DataFrame() url = 'http://quotes.money.163.com/trade/lszjlx_{0},0.html'.format(code) while True: try: r = requests.get(url, headers=hds()) soup = BeautifulSoup(r.text, 'lxml') tb = soup.find('table', attrs={'class': "table_bg001 border_box"}) df = df.append(pd.read_html(str(tb))[0]) df = df.drop(0, axis=0) page = soup.find('a', text='下一页') url = 'http://quotes.money.163.com' + page.get('href') except: break df = df.set_index('日期') df = df.sort_index() df = df.applymap(lambda x: wc._tofl(x)) try: df['股票代码'] = df['股票代码'].map(lambda x: x.replace("'", '')) except: pass return df
def get_mk_data_m163(code): """ 获取股票的历史交易数据 """ df = pd.DataFrame() ty = dt.datetime.today().year #years=range(1998,ty+1) years = range(ty - 3, ty + 1) for year in years: for season in [1, 2, 3, 4]: url = 'http://quotes.money.163.com/trade/lsjysj_{2}.html?year={0}&season={1}'.format( year, season, code) try: #print(url) r = requests.get(url, headers=hds()) soup = BeautifulSoup(r.text, 'lxml') tb = soup.find( attrs={"class": "table_bg001 border_box limit_sale"}) df = df.append(pd.read_html(str(tb))[0]) #print(df.head()) except: pass df = df.drop(0, axis=0) df = df.set_index('日期') df = df.sort_index() df = df.applymap(lambda x: wc._tofl(x)) df['股票代码'] = df['股票代码'].map(lambda x: x.replace("'", '')) return df
def get_cashflhy_m163(code): """ 同行业的涨跌幅前10只股票的资金流向情况 type:jc---表示流出 zc---表示流入 """ Df = pd.DataFrame() for mtype in ['zc', 'jc']: url = "http://quotes.money.163.com/service/zjlx_table.html?symbol={0}&type={1}".format( code, mtype) r = requests.get(url, headers=hds()) text = r.text df = pd.read_html(text)[0] df = df.drop(0, axis=0) Df = Df.append(df) Df = Df.drop('排名', axis=1) Df.columns = [ 'name', 'price', 'chg', 'turnover', 'amount', 'out_amount', 'in_amout', 'net_amount' ] Df = Df.reset_index(drop=True) return Df
def get_cashf_sharehist_min_EM(code, day=today): """获得某只股票的资金流入流出情况,按1min中进行统计, day为交易日的条数 bkname 为板块的名称 """ if code[0] in ['6', '9']: code = code + '1' elif code[0] in ['0', '2', '3']: code = code + '2' url = 'http://ff.eastmoney.com/EM_CapitalFlowInterface/api/js?id={0}&type=ff&check=MLBMS&cb=var%20aff_data={1}&js={2}&rtntype=3&acces_token=1942f5da9b46b069953c873404aad4b5'.format( code, day, '{(x)}') r = requests.get(url, headers=hds()) pc = '=' + day text = r.text.split(pc)[1] #print(text) data = json.loads(text) dataset = [] for i in data['ya']: dataset.append(i.split(',')) df = pd.DataFrame(dataset) index = data['xa'].split(',') index = index[:df.shape[0]] df.index = index df = df.replace('', np.nan) df = df.dropna(how='all', axis=0) df.columns = ['Main', 'Super', 'Big', 'Middle', 'Small'] df = df.applymap(lambda x: wt._tofl(x)) return df
def grow(year,quarter): pn=1 DF=pd.DataFrame() ws._write_head() while True: try: ws._write_console() url='http://vip.stock.finance.sina.com.cn/q/go.php/vFinanceAnalyze/kind/grow/index.phtml?s_i=&s_a=&s_c=&reportdate={0}&quarter={1}&p={2}'.format(year,quarter,pn) r=requests.get(url,headers=hds()) r=r.content.decode('gbk') html=BeautifulSoup(r,'lxml') text=html.find(id='dataTable') df=pd.read_html(str(text),header=0)[0] if df.empty is True: break else: pn = pn + 1 DF =DF.append(df) except: break DF=DF.applymap(lambda x:np.where(x=='--',np.nan,x)) DF=DF.set_index('股票代码') DF.index=DF.index.map(lambda x: str(x).split('.')[0].zfill(6)) DF['date']=str(year)+'_'+str(quarter).zfill(2) name=list(DF.columns) name.remove('股票名称') name.remove('date') for label in name: DF[label]=DF[label].astype(float) return DF
def get_text(url): """ 获取网址对应的标题、和内容。 -------------- url:输入的网址 ----------------- return: title:文章的标题 :相应的网址内容。 """ html = requests.get(url, headers=hds()) con = lxml.html.parse(StringIO(html.text)) #ti=con.xpath("/h1/text()") tt = con.xpath('//div[@id="Cnt-Main-Article-QQ"]/p') textset = [] for t in tt: try: text = t.xpath('text()')[0] text = text.strip() textset.append(text) except Exception as e: #print(e) pass return '\n\n'.join(textset)
def get_cashf_sharehist_EM(code): """个股的历史资金流向,资金流入金额的单位均为万元 -------------------------------------- Main.I.net :今日主力净流入净额 Main.I.R :今日主力净流入净占比 """ if code[0] in ['6', '9']: code = code + '1' if code[0] in ['0', '2', '3']: code = code + '2' url = 'http://ff.eastmoney.com//EM_CapitalFlowInterface/api/js?type=hff&rtntype=2&check=TMLBMSPROCR&acces_token=1942f5da9b46b069953c873404aad4b5&id={0}'.format( code) r = requests.get(url, headers=hds()) text = r.text.replace('(["', '').replace('"])', '') text = text.replace('","', '\n') df = pd.read_csv(StringIO(text), header=None) df.columns = [ 'date', 'Main.I.net', 'Main.I.R', 'Su.I.net', 'Su.I.R', 'Big.I.net', 'Big.I.R', 'Mid.I.net', 'Mid.I.R', 'Sm.I.net', 'Sm.I.R', 'price', 'Chg%' ] df = df.applymap(lambda x: wt._tofl(x)) df = df.set_index('date') return df
def get_shares_GroupbyClassify_EM(concept): fp='webdata/puse/eastmpy/classified.pkl' f=open(fp,'rb') classify=pickle.load(f) f.close() if concept in classify.keys(): url='http://nufm.dfcfw.com/EM_Finance2014NumericApplication/JS.aspx?type=CT&cmd=C.{0}1&sty=FCOIATA&sortType=(ChangePercent)&sortRule=-1&page=1&pageSize=1000&js={1}&token=7bc05d0d4c3c22ef9fca8c2a912d779c'.format(classify[concept],'{%22rank%22:[(x)],%22pages%22:(pc),%22total%22:(tot)}') r=requests.get(url,headers=hds()) dtext=json.loads(r.text) dd='\n'.join(dtext['rank']) df=pd.read_csv(StringIO(dd),header=None) df=df.drop([0,13,14,15,16,18,17,19,20],axis=1) df.columns=['code','name','close','chg','chgp%','zhengfu%','volume','amount','pre_close','open','high','low','chgp_in_5m%','liangbi','turnover%','PE','listed_date'] df['code']=df['code'].map(lambda x:str(x).zfill(6)) df=df.set_index('code') df=df.replace('-',np.nan) for label in df.columns: try: df[label]=df[label].astype(float) except: df[label]=df[label].map(lambda x: wt._tofl(x)) return df else: print("%s not in classify......"%concept) print(classify.keys()) sys.exit()
def get_financeindex_f9_EM(code, mtype='zhzb'): """ code:公司股票代码 mtype:类型,有zhzb:按报告期,djzb按单季报告 """ if code[0] in ['6', '9']: code = code + '01' elif code[0] in ['0', '2', '3']: code = code + '02' url = f9url[mtype].format(code) print(url) r = requests.get(url, headers=hds()) text = r.content.decode('utf8') soup = BeautifulSoup(text, 'lxml') tbs = soup.find_all('table') df = pd.read_html(str(tbs[0]))[0] df.columns = df.iloc[0, :].tolist() df = df.drop(0, axis=0) df = df.set_index('报告期日期') #df=df.applymap(lambda x:wt._tofl(x)) df = df.dropna(how='all', axis=0) df = df.T df = df.sort_index() return df
def get_dadan(code, opt=4): """ 默认是以400手 Parameters: code: 上海和深圳的股票代码 opt: 1-9:的数字代表成交量,分别为100手, 200手,300,400,500,800,1000,1500,2000手 10-13:代表成交额大于等于 100万元、200、500、1000万元 Return: time: 股票成交的时间 price: 股票成交的价格(元) volumn 股票成交的股数(手,即100股的整数倍) amount: 股票成交金额(万元) type: 买(B)或卖(S) """ code = _set_code(code) url = 'http://stock.finance.qq.com/sstock/list/view/dadan.php?t=js&c={0}&max=800000&p=1&opt={1}&o=0'.format( code, opt) r = requests.get(url, headers=hds()) r = r.text.split(",'")[1] r = r.split("']")[0] r = r.split('^') dataset = [] for data in r: #print(data) data = data.split('~') dataset.append(data) df = pd.DataFrame(dataset) df = df.drop(0, axis=1) df.columns = ['time', 'price', 'volume', 'amount', 'type'] for label in ['price', 'volume', 'amount']: df[label] = df[label].astype(float) return df
def get_cashf_concepthist_EM(bkname): """板块的历史资金流向,资金流入金额的单位均为万元 -------------------------------------- Main.I.net :今日主力净流入净额 Main.I.R :今日主力净流入净占比 """ try: df = pd.read_pickle('output/eastmsource/bk.pkl') except: df = pd.read_csv('output/eastmsource/bk.csv', encoding='gbk') idd = df[df['name'] == bkname]['id'].tolist()[0] #idd=df.loc[bkname,'id'] url = 'http://ff.eastmoney.com//EM_CapitalFlowInterface/api/js?type=hff&rtntype=2&js={1}&cb=var%20aff_data=&check=TMLBMSPROCR&acces_token=1942f5da9b46b069953c873404aad4b5&id={0}1&_=1502340432743'.format( idd, '{%22data%22:(x)}') r = requests.get(url, headers=hds()) text = r.text.split('{"data":["')[1].replace('(["', '').replace('"])', '') text = text.replace('"]}', '') text = text.replace('","', '\n') #print(text) df = pd.read_csv(StringIO(text), header=None) df.columns = [ 'date', 'Main.I.net', 'Main.I.R', 'Su.I.net', 'Su.I.R', 'Big.I.net', 'Big.I.R', 'Mid.I.net', 'Mid.I.R', 'Sm.I.net', 'Sm.I.R', 'price', 'Chg%' ] df = df.applymap(lambda x: wt._tofl(x)) df = df.set_index('date') return df