def get_lhblist(date,proxy): url_sh="http://stock.jrj.com.cn/action/lhb/getHsTodaylhb.jspa?vname=list&date=%s&dateType=2&order=desc&sort=netvalue_value&psize=2000"%(date) html_sh = myspyder(url_sh,proxy=proxy).content.decode('utf-8')#[9:-3] url_sz ="http://stock.jrj.com.cn/action/lhb/getHsTodaylhb.jspa?vname=list&date=%s&dateType=1&order=desc&sort=netvalue_value&psize=2000"%(date) html_sz = myspyder(url_sz,proxy=proxy).content.decode('utf-8')#[9:-3] lhblist = list(set(re.findall('\d{6}',html_sh)+re.findall('\d{6}',html_sz))) return lhblist
def zrzdt(stype, sdate): print(sdate, stype) strdate = sdate.strftime("%Y%m%d") url = "http://home.flashdata2.jrj.com.cn/limitStatistic/%s/%s.js" % ( stype, strdate) engine = conn() try: html = myspyder(url, proxy=0) js = html.content.decode('gbk') js = js.replace('var zr_%s=' % (stype), '').replace(';', '') js = eval(js)['Data'] df = pd.DataFrame(js, columns=[ 'code', 'name', 'time', 'close', 'percentage', 'amo', 'amplitude', 'turnoverate', '5day', 'pe', 'concept_e', 'concept' ]) del df['concept_e'] del df['5day'] df['time'] = str(sdate) + ' ' + df['time'] try: df.to_sql('zrzdt', engine, schema='stockdata', if_exists='append', index=False) except Exception as e: print("local:%s[%s]" % (sdate, e)) return df except Exception as e: print(sdate, e) return pd.DataFrame()
def get_tick_detail(code): # print(code[:-1]) url = "http://mdfm.eastmoney.com/EM_UBG_MinuteApi/Js/Get?dtype=all&id=%s&page=1&rows=10000>volume=&sort=desc" % ( code) html = "error!" times_retry = 10 while (html == "error!" or html.status_code != 200) and times_retry != 0: html = myspyder(url, proxy=0) times_retry -= 1 if html.status_code == 200: try: html = json.loads(html.content.decode('utf-8')[1:-1]) table = [] for i in html['value']['data']: table.append(re.split(",", i)) df = pd.DataFrame( table, columns=['time', 'price', 'vol', 1, 2, 3, 4, '成交笔数']) with open( "./data/tick/" + code[:-1] + "_" + str(datetime.date.today()) + ".jz", 'wb') as f: jz = json.dumps(html['value']['data']).encode('utf-8') jz = gzip.compress(jz, compresslevel=9) f.write(jz) except: pass
def split_pg(proxy): # 配股 pg_url = 'http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=NS&sty=NSA&st=6&sr=true&p=1&ps=5000' get_url = "error!" times_retry = 10 pgtable = [] List_Fin_pg = pd.DataFrame() while get_url == "error!" and times_retry != 0: get_url = myspyder(pg_url, proxy) times_retry = times_retry - 1 if get_url != "error!": try: return_list = re.findall("\"(.*?)\"", get_url.text) print("FTSPLIT:配股信息查找完毕,正在写入数据库!") for i in range(len(return_list)): appd = re.split(",", return_list[i]) pgtable.append(appd) pglist = pd.DataFrame(pgtable, columns=[ 'A', 'B', '股票代码', '股票简称', '配售代码', '配售名称', '配股比例(10配)', '配股价', '配股前总股本(万股)', '配股总数', '配股后总股本(万股)', '股权登记日', '缴款起始日期', '缴款截止日期', '配股上市日', '除权日', '募资总额', '募资净额', '承销方式', '公告日', '最新价', 'C' ]) List_Fin_pg = pglist[['股票代码', '配股比例(10配)', '配股价', '除权日']] except: pass return List_Fin_pg
def get_news(url, proxy): source = 'stcn.com' html = "error!" times_retry = 3 while html == "error!" and times_retry != 0: html = myspyder(url, proxy=proxy) times_retry = times_retry - 1 html = html.content # print(news.decode('utf-8')) newsSoup = bs(html, 'html.parser') newslist = newsSoup.select(".mainlist") newslist = bs(str(newslist[0]), 'html.parser') newslist = newslist.find_all("p") result = [] for i in range(len(newslist)): news = str(newslist[i]) stype = bs(news, 'html.parser').a.text title = bs(news, 'html.parser').a.next_element.next_element.text link = bs(news, 'html.parser').a.next_element.next_element.attrs['href'] datetime = str(bs(news, 'html.parser').span.text)[1:20] result.append((source, stype, title, link, datetime)) result = pd.DataFrame( result, columns=['source', 'type', 'title', 'link', 'datetime']) result['datetime'] = result['datetime'].astype('datetime64[ns]') return result
def update_embasedata(stocklist, proxy=0): Errorlist = [] engine = conn() for i in range(len(stocklist)): sqli = "UPDATE `basedata` SET `证券简称` = %s, `公司名称` = %s, `英文名称` = %s, " \ " `曾用名` = %s, `公司简介` = %s, `成立日期` = %s, `工商登记号` = %s, `注册资本` = %s, `法人代表` = %s, " \ " `所属证监会行业` = %s, `员工总数` = %s, `总经理` = %s, `董事会秘书` = %s, `省份` = %s, `城市` = %s, " \ "`注册地址` = %s, `办公地址` = %s, `邮编` = %s, `电话` = %s, `传真` = %s, `电子邮件` = %s, " \ "`公司网站` = %s, `审计机构` = %s, `法律顾问` = %s, `经营分析` = %s, `简史` = %s, `核心题材` = %s " \ "WHERE `basedata`.`证券代码` = %s" symbol = stocklist['证券代码'][i] + "01" if stocklist['证券代码'][i][ 0] == "6" else stocklist['证券代码'][i] + "02" sName = stocklist['证券简称'][i] try: Intr = myspyder( 'http://soft-f9.eastmoney.com/soft/gp3.php?code=%s' % (symbol), proxy=proxy).content Conc = myspyder( 'http://soft-f9.eastmoney.com/soft/gp30.php?code=%s' % (symbol), proxy=proxy).content IntrSoup = bs(Intr, 'html5lib') ConcSoup = bs(Conc, 'html5lib') stockdata = [] stockdata.append(sName) for tr in IntrSoup.find_all(width=880): stockdata.append(txt_pre(tr.text.strip())) point = ConcSoup.p del point['style'] stockdata.append(str(point)) stockdata.append(symbol[:-2]) print("BASEDATA:", symbol[:-2], sName, i + 1, "/", len(stocklist), round((i + 1) / (len(stocklist)) * 100, 2)) engine.execute(sqli, tuple(stockdata)) sleep((random() / 10 + 1)) except Exception as e: print(symbol[:-2], sName, e) Errorlist.append((symbol[:-2], sName, e)) Errorlist = pd.DataFrame(Errorlist, columns=['证券代码', '证券简称', 'error']) Errorlist.to_csv(path() + '/error/update_basedata.csv') return Errorlist
def get_data(self, proxy=0): for url in self.urllist: try: html = "error!" times_retry = 3 while html == "error!" and times_retry != 0: html = myspyder(url, proxy=proxy) times_retry -= 1 except: pass html = html.content.decode('gbk') html = html.replace("\n", "") html = re.split("\;", html) self.htmllist = self.htmllist + html return self.htmllist
def get_lhbdetail(code,date,proxy): url = "http://stock.jrj.com.cn/action/lhb/getStockLhbDetatil.jspa?vname=detailInfo&stockcode=%s&date=%s"%(code,date) html = myspyder(url,proxy=proxy).content.decode('utf-8') html = re.split("\;",html) json_detail = html[0][15:] j=1 while json_detail[-1] != "}": json_detail = json_detail+html[j] j=j+1 # else: # json_detail = html[0][15:] data = json.loads(json_detail)['data'] df_detail =pd.DataFrame() for i in range(len(data)): tmp_detail = pd.DataFrame(data[i][1],columns=['date', 'code', '买入金额', '卖出金额', '净买入金额', '净买入金额占总成交额', 'pl', '上榜原因', '买卖方向', '营业部代码', '营业部名称', '买入金额占总成交额', '卖出金额占总成交额', '上榜总成交额']) df_detail = pd.concat((tmp_detail,df_detail)) return df_detail
def get_is_data(date,code): url="http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=ZLSJ&sty=CCJGMX&p=1&ps=5000&fd=%s&code=%s&js=[(x)]"%(date,code) html=myspyder(url,proxy=0) try: doc = html.content.decode('utf-8') if doc != '[{stats:false}]': doc = eval(doc) table = [] for data in doc: table.append(re.split(',',data)) df = pd.DataFrame(table,columns=['code','stockname','fundcode','fundname','type','vol','amo','percent','percent_cir','date']) df = df[['code','date','fundcode','fundname','type','vol','amo','percent','percent_cir']] return True,df else: return True,pd.DataFrame() except Exception as e: print("%s,%s:%s"%(code,date,e)) return False,pd.DataFrame()
def zdtld(stype, sdate, ser='both'): strdate = sdate.strftime("%Y%m%d") url = "http://home.flashdata2.jrj.com.cn/limitStatistic/%sForce/%s.js" % ( stype, strdate) engine = conn() try: html = myspyder(url, proxy=0) js = html.content.decode('gbk') js = re.findall(u'"Data":(.*)\};', js, re.DOTALL)[0] js = js.replace('Infinity', '0') js = json.loads(js) df = pd.DataFrame(js, columns=[ 'code', 'name', 'close', 'percentage', 'fcb', 'flb', 'fdmoney', 'lasttime', 'firsttime', 'opentimes', 'amplitude', 'force' ]) df['firsttime'] = str(sdate) + ' ' + df['firsttime'] df['lasttime'] = str(sdate) + ' ' + df['lasttime'] if ser == 'local' or ser == 'both': try: df.to_sql('zdt', engine, schema='stockdata', if_exists='append', index=False) except Exception as e: print("local:%s[%s]" % (sdate, e)) if ser == 'local' or ser == 'both': try: df.to_sql('zdt', engine, schema='stockdata', if_exists='append', index=False) except Exception as e: print("server:%s[%s]" % (sdate, e)) return df except Exception as e: print(sdate, e) return pd.DataFrame()
def split_szfh(iLong, proxy=0): today = datetime.date.today() #- datetime.timedelta(days=lastday) iyear = int(str(today)[0:4]) imonth = int(str(today)[5:7]) # List_stock = get_stocklist() # iLong = int((round(len(List_stock) / 1000, 0) + 1) * 1000) Q4 = str(iyear - 1) + '001002' Q3 = str(iyear - 1) + '001005' if imonth <= 9 else str(iyear) + '001005' Q2 = str(iyear - 1) + '001001' if imonth <= 6 else str(iyear) + '001001' Q1 = str(iyear - 1) + '001003' if imonth <= 3 else str(iyear) + '001003' List_Quarter = [Q1, Q2, Q3, Q4] result = [] for param in List_Quarter: try: url = "http://emdatah5.eastmoney.com/FHSZ/V/GetAssignEffectList?SECURITYCODE=&REPORTDATE=%s&TYPE=0&ST=4&SR=2&PAGENUM=1&PAGESIZE=%s&selectedType=2" % ( param, iLong) html = myspyder(url, proxy=proxy) js = html.json() # dict = json.loads(js) list = js['Result']['List'] for i in range(len(list)): content = list[i]['CONTENT'] code = list[i]['SECURITYCODESimple'] date = list[i]['EXDIVIDENDDATE'] d1 = re.findall(u"送([0-9.]*)", content) d2 = re.findall(u"转([0-9.]*)", content) d3 = re.findall(u"派([0-9.]*)", content) d1 = float(d1[0]) if len(d1) == 1 else 0.0 d2 = float(d2[0]) if len(d2) == 1 else 0.0 d3 = float(d3[0]) if len(d3) == 1 else 0.0 result.append([code, date, d1 + d2, d3]) except: pass result = pd.DataFrame(result, columns=['code', 'date', '红股', '红利']).sort_values( 'date', ascending=False).reset_index(drop=True) result['date'] = result['date'].astype('datetime64[ns]') return result
def update_focussql(N=5): engine=conn() urllist = [] for i in range(1, N): if i == 1: urllist.append("http://money.163.com/special/002557S6/newsdata_gp_index.js") else: page = "0" + str(i) if len(str(i)) == 1 else str(i) urllist.append("http://money.163.com/special/002557S6/newsdata_gp_index_%s.js" % (page)) noticelist = [] for url in urllist: html = myspyder(url, proxy=0) doc = html.content.decode('gbk') js = doc.replace('data_callback(', '').replace(')', '') js = eval(js) for i in range(len(js)): # titlelist.append([js[i]['title'],js[i]['time'],js[i]['docurl']]) if '公告汇总' in js[i]['title']: noticelist.append([js[i]['title'], js[i]['time'], js[i]['docurl']]) sql = "select `证券代码`,`证券简称` from `basedata`" stocklist = pd.read_sql(sql, engine) final = [] for i, [title, stime, docurl] in enumerate(noticelist): print(i, title, stime, docurl) html = myspyder(docurl, proxy=0) doc = html.content soup = bs(doc, 'html5lib') result = soup.select('p') updateresult = [] for i in result: if "<strong>" in str(i): if ':' in i.text: sname, stext = re.split(':', i.text) for code, name in stocklist.values: if sname == name: updateresult.append([code, sname, datetime.datetime.strptime(stime[:10], '%m/%d/%Y'), stext]) else: for code, name in stocklist.values: if name in i.text: updateresult.append([code, name, datetime.datetime.strptime(stime[:10], '%m/%d/%Y'), i.text]) df = pd.DataFrame(updateresult, columns=['code', 'name', 'time', 'text']) df['time'] = df['time'].astype('datetime64[ns]') for code in df['code'].values: duplicate = df[df['code'] == code].reset_index(drop=True) if len(df[df['code'] == code]) != 1: text = '' for i in range(len(duplicate) - 1): if duplicate['text'][i][:8] == duplicate['text'][i + 1][:8]: text = duplicate['text'][i] else: text = duplicate['text'][i] + duplicate['text'][i + 1] final.append([code, duplicate['name'][0], duplicate['time'][0], text]) else: final.append([code, duplicate['name'][0], duplicate['time'][0], duplicate['text'][0]]) final = pd.DataFrame(final, columns=['code', 'name', 'time', 'text']) sql_lastdate="select DISTINCT `date` from `indexdb` ORDER BY `date` DESC limit 1" sdate = pd.read_sql(sql_lastdate, engine)['date'][0] checkdatelist=set(final['time'].values) for checkdate in checkdatelist: sql_check="select DISTINCT `focus` from `usefuldata` WHERE `date` = '%s'"%(str(checkdate)) scheck = pd.read_sql(sql_check,engine)['focus'] if len(scheck)<=1: df = final[final['time']==checkdate].reset_index(drop=True).values for scode,sname,stime,stext in df: stime = datetime.date(int(str(stime)[0:4]),int(str(stime)[5:7]),int(str(stime)[8:10])) if stime>sdate: stime=sdate else: stime=stime params = (scode, str(stime), stext, stext) # print(params) sql = "insert into `usefuldata` (`code`,`date`,`focus`) VALUE (%s,%s,%s) ON DUPLICATE KEY UPDATE `focus` = %s;" try: engine.execute(sql,params) except Exception as e: print("local:",e) print(scode,sname,str(stime),stext)
def news_content(): # =================== engine = conn() today = datetime.date.today() - datetime.timedelta(days=5) # ===================set requests================== # # rqs = rq.session() # rqs.keep_alive = False # ===================get news content================ # sql_news_null = text( "SELECT * FROM `news` WHERE `content` IS NULL OR (`title` LIKE :ud and `datetime`>=:dt)" ) df_listurl = pd.read_sql(sql_news_null, engine, params={ "ud": "%更新中%", "dt": str(today) }) list_url = df_listurl['link'].values list_title = df_listurl['title'].values list_content = df_listurl['content'].values errorlist = [] # list_url=['http://kuaixun.stcn.com/2017/1110/13761584.shtml'] #ip_list =pd.read_csv('ip.csv')['ip'].values for i in range(len(list_url)): time.sleep(random.random() / 10 + 3) newsid = re.split("\.", re.split("\/", list_url[i])[-1])[0] newurl = "http://app.stcn.com/?app=article&controller=article&action=fulltext&contentid=%s" % ( newsid) try: html = myspyder(newurl, proxy=0).content.decode('utf-8')[1:-2] newscontent = json.loads(html)['content'] # print(newscontent) # newsSoup = bs(html, 'html.parser') # newsSouptitle = newsSoup.select(".intal_tit")[0].h2.text # newsSoup = newsSoup.select(".txt_con")[0] # [s.extract() for s in newsSoup('a')] # [s.extract() for s in newsSoup('script')] # [s.extract() for s in newsSoup('div')] # newscontent = str(newsSoup) newscontent = "".join( re.split("\|STCNTTTP\|.+\|STCNTTTP\|", newscontent)) if newscontent == list_content[i]: pass else: print("NEW:", list_title[i]) # print(newscontent) sql_update_newscontent = "update `news` set `content`=%s WHERE `link`=%s" param = (newscontent, list_url[i]) engine.execute(sql_update_newscontent, param) except Exception as e: # print(url,e) errorlist.append((list_url[i], e)) df_errorlist = pd.DataFrame(errorlist, columns=['link', 'error']) df_errorlist.to_csv(path() + '/error/update_newscontent.csv')
def get_shareholder_data(stocklist=get_stocklist_prefix('sh', 'sz', 1)): errorlist = [] engine = conn() for code in stocklist: url = "http://emweb.securities.eastmoney.com/PC_HSF10/ShareholderResearch/ShareholderResearchAjax?code=%s" % ( code) html = myspyder(url, proxy=0) try: doc = json.loads(html.content) ## get shareholder data if len(doc['sdgd']) > 0: for i in range(len(doc['sdgd'])): table = pd.DataFrame(doc['sdgd'][i]['sdgd']) del table['bdbl'] table = table.rename( columns={ 'cgs': 'quantity', 'gdmc': 'name', 'gflx': 'type', 'mc': 'rank', 'rq': 'date', 'zj': 'change', 'zltgbcgbl': 'percentage' }) table['code'] = code[2:] table = table[[ 'code', 'date', 'rank', 'name', 'quantity', 'percentage', 'change', 'type' ]] for i in range(len(table)): if table['change'][i] == '不变': table['change'][i] = 0 if table['change'][i] == '新进': table['change'][i] = table['quantity'][i] params = [str(param) for param in table.values[i]] sql_query = "INSERT IGNORE INTO `shareholder`(`code`, `date`, `rank`, `name`, `quantity`, `percentage`, `change`, `type`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s)" engine.execute(sql_query, params) ## get cirholder data if len(doc['sdltgd']) > 0: for j in range(len(doc['sdltgd'])): table2 = pd.DataFrame(doc['sdltgd'][j]['sdltgd']) del table2['bdbl'] table2 = table2.rename( columns={ 'cgs': 'quantity', 'gdmc': 'name', 'gdxz': 'type', 'gflx': 'abh', 'mc': 'rank', 'rq': 'date', 'zj': 'change', 'zltgbcgbl': 'percentage' }) table2['code'] = code[2:] table2 = table2[[ 'code', 'date', 'rank', 'name', 'type', 'quantity', 'percentage', 'change', 'abh' ]] for i in range(len(table2)): if table2['change'][i] == '不变': table2['change'][i] = 0 if table2['change'][i] == '新进': table2['change'][i] = table2['quantity'][i] params = [str(param) for param in table2.values[i]] sql_query = "INSERT ignore INTO `cirholder`(`code`, `date`, `rank`, `name`, `type`, `quantity`, `percentage`, `change`, `abh`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)" engine.execute(sql_query, params) ##股东人数 if len(doc['gdrs']) > 0: table3 = pd.DataFrame(doc['gdrs']) table3 = table3.rename( columns={ 'cmjzd': 'scr', 'gdrs': 'shareholders', 'gdrs_jsqbh': 'shschange', 'qsdgdcghj': 'top10', 'qsdltgdcghj': 'cirtop10', 'gj': 'close', 'rjcgje': 'avgamount', 'rjltg': 'avgcirquantity', 'rjltg_jsqbh': 'avgcirchange', 'rq': 'date' }) table3['code'] = code[2:] table3 = table3[[ 'code', 'date', 'close', 'scr', 'top10', 'cirtop10', 'shareholders', 'shschange', 'avgamount', 'avgcirquantity', 'avgcirchange' ]] for i in range(len(table3)): if '万' in table3['shareholders'][i]: table3['shareholders'][i] = float( table3['shareholders'][i].replace('万', '')) * 10000 if '万' in table3['avgamount'][i]: table3['avgamount'][i] = float( table3['avgamount'][i].replace('万', '')) * 10000 if '万' in table3['avgcirquantity'][i]: table3['avgcirquantity'][i] = float( table3['avgcirquantity'][i].replace('万', '')) * 10000 params = [ str(param) if param != '--' else None for param in table3.values[i] ] sql_query = "INSERT IGNORE INTO `shareholdernumber`(`code`, `date`, `close`, `scr`, `top10`, `cirtop10`, `shareholders`, `shschange`, `avgamount`, `avgcirquantity`, `avgcirchange`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" engine.execute(sql_query, params) except Exception as e: errorlist.append(code) print("%s:%s" % (code, e)) # # if ser == "both" or ser == "local": # conn.close() # if ser == "both" or ser == "server": # conns.close() return errorlist
def update_stocklist(proxy=0): """ :param proxy: 0 close / 1 open :return: errorlist """ url = "http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=NS&sty=NSSTV5&st=12&sr=true&p=1&ps=100" html = "error!" times_retry = 3 while html == "error!" and times_retry != 0: html = myspyder(url=url, proxy=proxy) times_retry -= 1 try: html_doc = str(html.content, 'utf-8') return_list = re.findall("\"(.*?)\"", html_doc) xgtable = [re.split("\,", elem) for elem in return_list] # xg = [[xglist[3],xglist[4],xglist[10],xglist[13]] for xglist in xgtable ] xg = pd.DataFrame(xgtable) xg = xg[[3, 4, 10, 13]].rename(columns={ 3: "name", 4: "code", 10: "ipoprice", 13: "ipodate" }) xg['ipodate'] = xg['ipodate'].astype('datetime64[ns]') xg['name'] = xg['name'].astype('str') xg['code'] = xg['code'].astype('str') xg = xg.dropna().reset_index(drop=True) print("STOCKLIST:数据获取成功,正在写入数据库...") engine = conn() stocklist = get_stocklist() Errorlist = [] sql_ipocheck = "select `证券代码` from `basedata` WHERE `首发价格` is NULL" ipocheck = pd.read_sql(sql_ipocheck, engine)['证券代码'].values for i in range(len(xg)): code, name = xg['code'][i], xg['name'][i] ipoprice, ipodate = xg['ipoprice'][i], xg['ipodate'][i] pinyin = getpinyin(name) if '银行' not in name else getpinyin( name).replace('YX', 'YH') market = '上海证券交易所' if code[0] == '6' else '深圳证券交易所' if code not in stocklist or code in ipocheck: try: sql_xg = "INSERT ignore INTO `stocklist`(`证券代码`, `证券简称`, `上市市场`,`拼音缩写`) VALUES (%s,%s,%s,%s)" sql_ipo = "update `basedata` set `首发日期`=%s ,`首发价格`=%s WHERE `证券代码`=%s" engine.execute(sql_xg, (code, name, market, pinyin)) # conn.commit() engine.execute(sql_ipo, (str(ipodate), ipoprice, code)) # conn.commit() print("STOCKLIST:", code, ":更新成功!") except Exception as e: print("STOCKLIST:", code, ":更新失败!", e) Errorlist.append(code) output = str(datetime.date.today()) + (" 更新完成!" if len(Errorlist) == 0 else " 更新出错!请检查!") print("STOCKLIST:", output) return Errorlist except: return ['数据获取失败...']
def get_forecast(proxy=0,lastday=0,update=1): """ :param ser: 选择更新的数据库,local/server/both :param proxy: 设置是否使用ip代理,0为不开启,1为开启 :param lastday: 上次更新距离今天多长时间,默认当天为0 :param update: 设置更新还是重建,更新设为1,其他为重建 :return: """ errorList=[] today = datetime.date.today()-datetime.timedelta(days=lastday) print("FORECAST:",today) iyear =int(str(today)[0:4]) imonth= int(str(today)[5:7]) List_stock = get_stocklist() iLong = int((round(len(List_stock) / 1000, 0) + 1) * 1000) Q4 = datetime.datetime(iyear-1,12,31).strftime('%Y-%m-%d') if imonth <=2 else \ datetime.datetime(iyear,12,31).strftime('%Y-%m-%d') Q3 = datetime.datetime(iyear-1,9,30).strftime('%Y-%m-%d') if imonth < 8 else \ datetime.datetime(iyear,9,30).strftime('%Y-%m-%d') Q2 = datetime.datetime(iyear-1,6,30).strftime('%Y-%m-%d') if imonth < 5 else \ datetime.datetime(iyear, 6, 30).strftime('%Y-%m-%d') Q1 = datetime.datetime(iyear, 3, 31).strftime('%Y-%m-%d') List_Quarter =[Q1,Q2,Q3,Q4] df_forecast = pd.DataFrame() for Quarter in List_Quarter: url = 'http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=SR&sty=YJYG&fd=%s&st=4&sr=true&p=1&ps=%s' \ % (Quarter, iLong) content = "error!" times_retry = 3 while content =="error!" and times_retry!=0: content = myspyder(url,proxy=proxy).content.decode('utf-8') times_retry -= 1 print("FORECAST:",Quarter,"数据抓取完毕,正在对数据进行处理...") try: return_list = re.findall("\"(.*?)\"", content) fctable = [] for j in range(len(return_list)): appd = re.split("\,", return_list[j]) fctable.append(appd) fctable = pd.DataFrame(fctable, columns=['code', '股票简称', '业绩变动', '变动幅度', '预告类型', '同期净利润' , '预喜预悲', 'date','财报日期']) fctable = fctable[['code', '业绩变动', '变动幅度', '预告类型', '同期净利润', 'date', '财报日期']] df_forecast = pd.concat((df_forecast, fctable)).sort_values('date', ascending=False).drop_duplicates() except Exception as e: errorList.append(e) df_forecast['date'] = df_forecast['date'].astype('datetime64', error='ignore') df_forecast['财报日期'] = df_forecast['财报日期'].astype('datetime64', error='ignore') print("FORECAST: 数据处理完毕,正在更新数据库...") if update == 1: df_forecast = df_forecast[df_forecast['date']>=today] else: df_forecast = df_forecast engine = conn() for j in range(len(df_forecast)): try: Scode = df_forecast.get_value(j,'code') Sdate = str(df_forecast.get_value(j,'date')) Schange = df_forecast.get_value(j, '业绩变动') Spercent = df_forecast.get_value(j, '变动幅度') Stype = df_forecast.get_value(j, '预告类型') Sprofit = df_forecast.get_value(j, '同期净利润') Srepdate = str(df_forecast.get_value(j, '财报日期')) ul = re.split("~", Spercent) if len(ul) == 2: upper = ul[1].replace('%', '') lower = ul[0].replace('%', '') elif len(ul) == 1 and ul[0] != '': upper = ul[0].replace('%', '') lower = None else: upper = None lower = None sql_update = "INSERT IGNORE INTO `forecast`(`code`, `date`, `业绩变动`, `变动幅度`, `预告类型`, `同" \ "期净利润`, `财报日期`,`上限`,`下限`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)" param = (Scode, Sdate, Schange, Spercent, Stype, Sprofit, Srepdate, upper, lower) engine.execute(sql_update,param) except Exception as e: errorList.append(e) dfErrorList = pd.DataFrame({'error': errorList}) dfErrorList.to_csv(path() + '/error/update_forecast.csv') print("FORECAST: 更新数据库完毕!") return dfErrorList
def unusual(): url = "http://nuyd.eastmoney.com/EM_UBG_PositionChangesInterface/api/js?style=top&js=[(x)]&dtformat=HH:mm:ss&ac=normal" html = myspyder(url, 0).content.decode('utf-8') return html
def notices(page, ser='both', proxy=0): today = datetime.date.today() #- datetime.timedelta(days=2) sleep(random() / 10 * 2 + 0.5) print("NOTICE:page", page) engine = conn() try: url = "http://data.eastmoney.com/notices/getdata.ashx?FirstNodeType=0&CodeType=1&PageIndex=%s&PageSize=1000" % ( page) html = myspyder(url, proxy=proxy).content js = json.loads(html.decode('gbk')[7:-1])['data'] table = pd.DataFrame() for i in range(len(js)): output = js[i] output1 = js[i]['CDSY_SECUCODES'][0] output2 = js[i]['ANN_RELCOLUMNS'][0] output3 = js[i]['ANN_RELCODES'][0] del output['CDSY_SECUCODES'], output['ANN_RELCOLUMNS'], output[ 'ANN_RELCODES'] output.update(output1) output.update(output2) output.update(output3) output['NOTICEDATE'] = output['NOTICEDATE'][:-6] output['EUTIME'] = output['EUTIME'][:-6] output['Url'] = "http://pdf.dfcfw.com/pdf/H2_" + output[ 'INFOCODE'] + "_1.pdf" if output[ 'ATTACHTYPE'] == '0' else output['Url'] tmp_table = pd.DataFrame.from_dict(output, orient='index') table = pd.concat((tmp_table.T, table), ignore_index=True) table = table[[ 'NOTICEDATE', 'NOTICETITLE', 'INFOCODE', 'EUTIME', 'Url', 'SECURITYCODE', 'SECURITYFULLNAME', 'SECURITYTYPE', 'TRADEMARKET', 'COLUMNNAME' ]] table['NOTICEDATE'] = table['NOTICEDATE'].astype('datetime64[ns]') table['EUTIME'] = table['EUTIME'].astype('datetime64[ns]') table = table.rename( columns={ 'NOTICEDATE': 'date', 'NOTICETITLE': 'title', 'INFOCODE': 'infocode', 'EUTIME': 'eutime', 'Url': 'url', 'SECURITYCODE': 'code', 'SECURITYFULLNAME': 'name', 'SECURITYTYPE': 'security_type', 'TRADEMARKET': 'market', 'COLUMNNAME': 'type' }) table = table[table['eutime'] >= today] # table.to_csv(path()+'/data/notice/'+str(today)+'.csv',encoding='utf-8') sql_check = "select `infocode` from `notice` where `eutime`>'%s'" % ( today - datetime.timedelta(days=1)) list_infocode = pd.read_sql(sql_check, engine)['infocode'].values for line in table.values: param = [str(ele) for ele in line] if param[2] not in list_infocode: sql_updae =" insert ignore into `notice` (`date`, `title`, `infocode`, `eutime`, `url`, `code`, `name`, " \ "`security_type`, `market`, `type`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" engine.execute(sql_updae, tuple(param)) # conn.commit() else: pass # return None except Exception as e: print(e) return page
def update_mb_single(code): # ser='both' proxy=0 # if ser == 'local' or ser == 'both': # if ser == 'server' or ser == 'both': # conns = serverconn() # print("MAINBUSINESS:",code) url = "http://emweb.securities.eastmoney.com/PC_HSF10/BusinessAnalysis/BusinessAnalysisAjax?code=" try: html = myspyder(url+code,proxy=proxy) js = json.loads(html.content.decode('utf-8')) with open("./data/mainbusiness/"+code[2:]+".json",'w',encoding='utf-8') as f: f.write(str(html.content,encoding='utf-8')) # print(js['zyfw'][0]['ms']) # print(js['jyps'][0]['ms']) table =[] for line in js['zygcfx']: listcp = [list(line['cp'][i].values()) + ['产品'] for i in range(len(line['cp']))] listhy = [list(line['hy'][i].values()) + ['行业'] for i in range(len(line['hy']))] listqy = [list(line['qy'][i].values()) + ['地区'] for i in range(len(line['qy']))] list_all = listcp + listhy + listqy table = table + list_all df = pd.DataFrame(table,columns=['报表日期','主营构成','主营收入(元)','收入比例','主营成本(元)','成本比例', '主营利润(元)','利润比例','毛利率(%)','9','主营收入','分类']) df['code']=code[2:] df= df[['code','报表日期','主营构成','主营收入','收入比例','主营成本(元)','成本比例', '主营利润(元)','利润比例','毛利率(%)','分类']].values for i in range(len(df)): df[i][4] = df[i][4].replace('%', '') if isinstance(df[i][4],str) else df[i][4] df[i][6] = df[i][6].replace('%', '') if isinstance(df[i][6],str) else df[i][6] df[i][8] = df[i][8].replace('%', '') if isinstance(df[i][8],str) else df[i][8] df[i][9] = df[i][9].replace('%', '') if isinstance(df[i][9],str) else df[i][9] if isinstance(df[i][5],str): if '万亿' in df[i][5]: df[i][5] = float(df[i][5].replace('万亿', '')) * 1000000000000 elif '亿' in df[i][5]: df[i][5] = float(df[i][5].replace('亿', '')) * 100000000 elif '万' in df[i][5]: df[i][5] = float(df[i][5].replace('万', '')) * 10000 if '万亿' in df[i][7]: df[i][7] = float(df[i][7].replace('万亿', '')) * 1000000000000 elif '亿' in df[i][7]: df[i][7] = float(df[i][7].replace('亿', '')) * 100000000 elif '万' in df[i][7]: df[i][7] = float(df[i][7].replace('万', '')) * 10000 df = pd.DataFrame(df, columns=['code','报表日期','主营构成','主营收入','收入比例','主营成本','成本比例', '主营利润','利润比例','毛利率','分类']) df['报表日期']=pd.to_datetime(df['报表日期']) df = df[df['报表日期']>datetime.date(2001,1,1)] df = df[df['主营构成']!=''] df = df[df['主营构成']!='--'] df = df.replace('--', '') df['报表日期'] =df['报表日期'].astype('str') params=json.loads(df.to_json(orient='records',force_ascii=False)) db=Database() mainbusiness=db.table.mainbusiness db.c(mainbusiness,params) # df.to_sql('mainbusiness',conn,flavor='mysql',schema='stockdata',if_exists='append',index=False,chunksize=10000) # try: # for elem in df.values: # sql_update= "insert ignore into `mainbusiness` (code, 报表日期, 主营构成, 主营收入, 收入比例, 主营成本," \ # " 成本比例, 主营利润, 利润比例, 毛利率, 分类) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" # # params = [] # for param in elem: # if param != '': # params.append(str(param)) # else: # params.append(None) # # if ser == 'local' or ser == 'both': # # # cur =conn.cursor() # engine.execute(sql_update,params) # # conn.commit() # # if ser == 'server' or ser == 'both': # # curs =conns.cursor() # # curs.execute(sql_update,params) # # conns.commit() # except Exception as e: # print("MAINBUSINESS:",code,e) # return code except Exception as e: print("MAINBUSINESS:",code,e) return code
def spo(proxy=0): """ http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=SR&sty=ZF&p=1&ps=5000&st=5 update the spo data from eastmoney.com to server and local. :param ser: local/server/both :param proxy: user proxy set proxy=1 if not proxy=0,default 0 :return: errorlist """ print("SPO: Running...") errorlist = [] today = datetime.date.today() - datetime.timedelta(days=100) url = "http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=SR&sty=ZF&p=1&ps=1000&st=5" html = myspyder(url,proxy=proxy).content table = eval(html.decode('utf-8')) list =[] for ele in table: list.append(re.split("\,",ele)) df_spo = pd.DataFrame(list,columns=['code','name','发行方式','发行总数','发行价格','现价','发行日期','增发上市日期', '8','增发代码','网上发行','中签号公布日','中签率','13','14','15','16']) df_spo = df_spo[['code','name','发行方式','发行总数','发行价格','发行日期','增发上市日期','增发代码','网上发行', '中签号公布日','中签率']] df_spo = df_spo.drop_duplicates() # df_spo = df_spo.replace('-','') # print(df_spo) df_spo['发行日期']=df_spo['发行日期'].astype('datetime64[ns]') spo = df_spo[df_spo['发行日期']>=today] spo.to_csv(path() + '/data/spo_done/spo_' + str(today) + '.csv',encoding='utf-8') engine = conn() try: for elem in spo.values: sql_update_spo = "INSERT IGNORE INTO `spo_done`(`code`, `name`, `发行方式`, `发行总数`, `发行价格`, " \ "`发行日期`, `增发上市日期`, `增发代码`, `网上发行`, `中签号公布日`, `中签率`) VALUES" \ " (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" params = [] for param in elem: if param!='-' : params.append(str(param)) else: params.append(None) # if ser == 'local' or ser == 'both': # conn = localconn() # cur = conn.cursor() result=engine.execute(sql_update_spo,params) # result.close() # conn.commit() # spo.to_sql('spo_done',localconn(),flavor='mysql',schema='stockdata',if_exists='append', # index=False,chunksize=10000) # if ser == 'server' or ser == 'both': # conns = serverconn() # curs = conns.cursor() # results=conns.execute(sql_update_spo, params) # results.close() # conns.commit() # spo.to_sql('spo_done',serverconn(),flavor='mysql',schema='stockdata',if_exists='append', # index=False,chunksize=10000) # if ser == 'local' or ser == 'both': # conn.close() # if ser == 'server' or ser == 'both': # conns.close() print("SPO: Done!") except Exception as e: print("SPO:",e) errorlist.append(e) return errorlist