def get_daily_data(self, code, expire=60 * 6): """ 获取一支股票所有历史数据保存到本地 """ UT.check_dir(CT.DAILY_DIR) file_path = CT.DAILY_DIR + code expired = UT.check_file_expired(file_path, expire) if expired or not os.path.exists(file_path): symbol = self._code_to_symbol(code) start_date = CT.START end_date = date_time.get_today_str() adjust = 'qfq' if is_index(code): d = ak.stock_zh_index_daily(symbol) else: d = ak.stock_zh_a_daily(symbol, start_date, end_date, adjust) if d is None: return d d.to_csv(file_path, sep='\t') if not os.path.exists(file_path): return None #d = pd.read_csv(file_path, sep='\t', index_col=0) d = pd.read_csv(file_path, sep='\t', skiprows=0, parse_dates=True, header=0, index_col=0) return d
def get_quotation(self, stock_code): ''' 从网络上获取行情数据 ''' stock_symbol = 'sh{0}'.format(stock_code) stock_df = ak.stock_zh_a_daily(symbol=stock_symbol, factor="") print('df:{0}; {1}'.format(type(stock_df), stock_df)) stock_df.to_csv(path_or_buf='./data/tp/{0}.csv'.format(stock_symbol), sep=',')
def get_data_from_internet(self, code): '''获取数据并保存到csv''' # 获取原始数据 original_data = ak.stock_zh_a_daily(symbol=code, adjust="qfq") # 取过去30天数据 df = original_data.reset_index().iloc[:, :6] # 去除空值且从零开始编号索引 df = df.dropna(how='any').reset_index(drop=True) # 按日期排序 df = df.sort_values(by='date', ascending=True) # 均线数据 df['10'] = df.close.rolling(10).mean() df['60'] = df.close.rolling(60).mean() df['250'] = df.close.rolling(250).mean() # 写入csv body_json = ReadConfig().read_json("code_list.json") first_dict = body_json["first_team"] second_dict = body_json["second_team"] if code in first_dict: file_name = self.csv_path + first_dict.get(code) + ".csv" elif code in second_dict: file_name = self.csv_path + second_dict.get(code) + ".csv" else: file_name = self.csv_path + code + ".csv" df.to_csv(file_name) return df
def get_historical_news(self, start_date=None, end_date=None, freq="day"): stock_symbol_list = self.col_basic_info.distinct("symbol") if len(stock_symbol_list) == 0: stock_symbol_list = self.get_stock_code_info() if freq == "day": if os.path.exists(config.STOCK_DAILY_EXCEPTION_TXT_FILE_PATH): with open(config.STOCK_DAILY_EXCEPTION_TXT_FILE_PATH, "r") as file: start_stock_code = file.read() logging.info( "read {} to get start code number is {} ... ".format( config.STOCK_DAILY_EXCEPTION_TXT_FILE_PATH, start_stock_code)) else: start_stock_code = 0 for symbol in stock_symbol_list: if int(symbol[2:]) >= int(start_stock_code): try: # TODO if start_date is None: # 如果该symbol有历史数据,如果有则从API获取从数据库中最近的时间开始直到现在的所有价格数据 # 如果该symbol无历史数据,则从API获取从2015年1月1日开始直到现在的所有价格数据 start_date = "20150101" if end_date is None: pass stock_zh_a_daily_hfq_df = ak.stock_zh_a_daily( symbol=symbol, start_date=start_date, end_date=end_date, adjust="hfq") stock_zh_a_daily_hfq_df.insert( 0, 'date', stock_zh_a_daily_hfq_df.index.tolist()) stock_zh_a_daily_hfq_df.index = range( len(stock_zh_a_daily_hfq_df)) _col = self.db_obj.get_collection( self.database_name, symbol) for _id in range(stock_zh_a_daily_hfq_df.shape[0]): _col.insert_one( stock_zh_a_daily_hfq_df.iloc[_id].to_dict()) logging.info("{} finished saving ... ".format(symbol)) except Exception: with open(config.STOCK_DAILY_EXCEPTION_TXT_FILE_PATH, "w") as file: file.write(symbol[2:]) elif freq == "week": pass elif freq == "month": pass elif freq == "5mins": pass elif freq == "15mins": pass elif freq == "30mins": pass elif freq == "60mins": pass
def k_line(symble, title, last=-30, adjust="qfq", dprint=False): ''' 过去n天个股历史行情均线数据(k线) param: symble: str,股票编号"sh601727" title: str,作图标题 last: int,过去多少天的数据,默认过去30天,例"-30" adjust: str,默认"qfq"前复权,返回复权后数据 dprint: bool,默认false,是否打印原始数据 startday: int,行情查询起始日期(懒得转化,没写功能) endday: int,行情查询起始日期(懒得转化,没写功能) ''' pingan = ak.stock_zh_a_daily(symble, adjust) df3 = pingan.reset_index().iloc[last:, :6] #取过去30天数据 df3 = df3.dropna(how='any').reset_index(drop=True) #去除空值且从零开始编号索引 df3 = df3.sort_values(by='date', ascending=True) print(df3.info()) # 均线数据 df3['5'] = df3.close.rolling(5).mean() df3['10'] = df3.close.rolling(10).mean() df3.tail() #画图 fig, ax = plt.subplots(1, 1, figsize=(8, 3), dpi=200) candlestick2_ohlc(ax, opens=df3['open'].values, highs=df3['high'].values, lows=df3['low'].values, closes=df3['close'].values, width=0.5, colorup="r", colordown="g") # 显示最高点和最低点 ax.text(df3.high.idxmax(), df3.high.max(), s=df3.high.max(), fontsize=8) ax.text(df3.high.idxmin(), df3.high.min() - 2, s=df3.high.min(), fontsize=8) ax.set_facecolor("white") ax.set_title(title) # 画均线 plt.plot(df3['5'].values, alpha=0.5, label='MA5') plt.plot(df3['10'].values, alpha=0.5, label='MA10') ax.legend(facecolor='white', edgecolor='white', fontsize=6) # 修改x轴坐标 plt.xticks(ticks=np.arange(0, len(df3)), labels=df3.date.dt.strftime('%Y-%m-%d').to_numpy()) plt.xticks(rotation=90, size=4) # 修改y轴坐标 ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f')) plt.show()
def get_stockQuotes(self, stockcode, stardate, enddate): if stockcode[0:2] == '60' or stockcode[0:2] == '68': stockcode = 'sh' + stockcode else: stockcode = 'sz' + stockcode response = ak.stock_zh_a_daily(symbol=stockcode, start_date=stardate, end_date=enddate, adjust="qfq") return response
def startup(self, code='sh601318', start_cash=100000.0): print('回测系统示例程序 v0.0.2') cerebro = bt.Cerebro() cerebro.addstrategy(TestStrategy) stock_zh_a_daily_df = ak.stock_zh_a_daily( symbol=code, adjust="hfq") # 通过 AkShare 获取需要的数据 data = bt.feeds.PandasData(dataname=stock_zh_a_daily_df) # 规范化数据格式 cerebro.adddata(data) # 将数据加载至回测系统 cerebro.broker.setcash(start_cash) # 设置初始资金 cerebro.broker.set_commission_obj(AshareCommInfo()) print('期初净值: {0:.2f}'.format(cerebro.broker.getvalue())) cerebro.run() print('期末净值: {0:.2f}'.format(cerebro.broker.getvalue()))
def main(code="sh601318", start_cash=1000000, stake=100, commission_fee=0.001): cerebro = bt.Cerebro() # 创建主控制器 cerebro.optstrategy(MyStrategy, maperiod=range(3, 31)) # 导入策略参数寻优 stock_zh_a_daily_df = ak.stock_zh_a_daily( symbol=code, adjust="hfq") # 通过 AkShare 获取需要的数据 data = bt.feeds.PandasData(dataname=stock_zh_a_daily_df) # 规范化数据格式 cerebro.adddata(data) # 将数据加载至回测系统 cerebro.broker.setcash(start_cash) # broker设置资金 cerebro.broker.setcommission(commission=commission_fee) # broker手续费 cerebro.addsizer(bt.sizers.FixedSize, stake=stake) # 设置买入数量 print("期初总资金: %.2f" % cerebro.broker.getvalue()) cerebro.run(maxcpus=1) # 用单核 CPU 做优化 print("期末总资金: %.2f" % cerebro.broker.getvalue())
def __init__(self, symbol): self.name = 'fas.bktr.MarketDataSource' self.event_tick = None self.symbol = symbol # 'sh600582' self.market_data = MarketData() datas = ak.stock_zh_a_daily(symbol=self.symbol, adjust='hfq') self.open_pds = datas['open'] self.high_pds = datas['high'] self.low_pds = datas['low'] self.close_pds = datas['close'] self.volume_pds = datas['volume'] self.outstanding_share_pds = datas['outstanding_share'] self.turnover_pds = datas['turnover']
def download(self): for type in ['trade','favourite']: tickers = pd.read_csv(self.BASIC + type +'.csv', encoding='gb18030')['tag'].values.tolist() alltickers = tickers.copy() removed_tickers = [] # tickers = list(set(tickers).difference(set([i[:-4] for i in os.listdir(self.AKSHARE + type + '/') ]))) for ticker in tickers: if ticker[:2]=='E_': try: data = ak.fund_etf_hist_sina(symbol=ticker[2:]) data = data.drop_duplicates() data.to_csv(self.AKSHARE + type + '/' + ticker + '.csv', encoding='gb18030', index=False) except : print('Can not download ETF:',ticker) removed_tickers.append(ticker) elif ticker[:2]=='S_': try: data = ak.stock_zh_a_daily(symbol=ticker[2:], adjust="qfq") data.drop(['outstanding_share','turnover'],axis=1,inplace=True) data = data.drop_duplicates() data.to_csv(self.AKSHARE + type + '/' + ticker + '.csv', encoding='gb18030', index=True) except : print('Can not download stock:',ticker) removed_tickers.append(ticker) elif ticker[:2]=='I_': try: data = ak.stock_zh_index_daily(symbol=ticker[2:]) data = data.drop_duplicates() data.to_csv(self.AKSHARE + type + '/' + ticker + '.csv', encoding='gb18030', index=True) except : print('Can not download index:',ticker) removed_tickers.append(ticker) for ticker in removed_tickers: # Remove empty record try: os.remove(raw_path + ticker + '.csv') except: pass if type == 'favourite': data = pd.DataFrame(list(set(alltickers).difference(set(removed_tickers))),columns =['tag']) data.sort_values(by=['tag'],ascending=[True],inplace=True) data.to_csv(self.BASIC + 'favourite.csv', encoding='gb18030',index=False) elif type == 'archive': data = pd.DataFrame(list(set(alltickers).difference(set(removed_tickers))),columns =['tag']) data.sort_values(by=['tag'],ascending=[True],inplace=True) data.to_csv(self.BASIC + 'archive.csv', encoding='gb18030',index=False) return
def insertTodayValue(data,table): today = datetime.datetime.now() configger.init() engine=configger.engine i=0 symbols = tqdm(data['symbol']) now=datetime.datetime.now() sql='select code,max(date) as date,max(updateTime)as updateTime from {} GROUP BY code'.format(table) # print(sql) try: timeData=pd.read_sql(con=engine,sql=sql,index_col='code') except: timeData=pd.DataFrame() # print(timeData) for code in symbols: code=util.removedotBysymbol(code) i=i+1 # code=util.getdotCodeBysymbol(symbol) try: if(not timeData.empty and code in timeData.index and (now-timeData.loc[code,'updateTime']).seconds<3600*24): continue except: pass try: start_date= pd.to_datetime(timeData.loc[code,'date']) + datetime.timedelta(days=1) start_date=start_date.strftime('%Y%m%d') # start_date=start_date.date() except: start_date='19900101' # traceback.print_exc() end_date=today.strftime('%Y-%m-%d') # print(start_date,end_date) if(start_date>end_date): # print(code,start_date,'已存在') continue # print('star_Date') # print(start_date) try: result=ak.stock_zh_a_daily(symbol=code, start_date=start_date, end_date=end_date, adjust="qfq") result.reset_index(inplace=True) result.drop(columns='index',inplace=True) result['updateTime']=now result['code']=code print(result) result.to_sql(table,con=engine,if_exists='append',index=False) symbols.set_description("查询代码为:{},数据条数为{}".format(code,len(result.index))) except: traceback.print_exc()
def query_stock_codes_save(): stock_infos = DBUtils.execute("select * from stock_2020_1218") for data in stock_infos: print(data['symbol']) stock_zh_a_daily_hfq_df = ak.stock_zh_a_daily(symbol=data['symbol'], adjust="qfq") for index, stock_price in stock_zh_a_daily_hfq_df.iterrows(): if stock_price["open"] == 'nan' or stock_price["volume"] == 'nan': continue save_stock_price_info({ "date": index, **dict(stock_price), "stock_code": data['symbol'] })
def addData(self, code, name): tmp_stock = "" if int(code) > 600000: tmp_stock = "sh" + code else: tmp_stock = "sz" + code stodk_data_daily = ak.stock_zh_a_daily(symbol=tmp_stock, adjust="hfq") print(stodk_data_daily) mydb = mysql.connector.connect(host="localhost", user="******", password="******", database="mydatabase", charset='utf8') mycursor = mydb.cursor() for i in range(0, len(stodk_data_daily)): # SQL 插入语句 data = [] date = [t.strftime("%Y-%m-%d") for t in stodk_data_daily.index][i] open = stodk_data_daily.iat[i, 0] high = stodk_data_daily.iat[i, 1] low = stodk_data_daily.iat[i, 2] close = stodk_data_daily.iat[i, 3] volume = stodk_data_daily.iat[i, 4] data.append(date) data.append(open) data.append(high) data.append(low) data.append(close) data.append(volume) self.stock_datas.append(data) # sql = "select * from stock_detail \ # where code = '%s' and data = '%s'" % (code,date) sql = "insert into stock_detail(code, name, date, open, high, low, close, volume) \ values('%s','%s','%s','%f','%f','%f','%f','%f')" % \ (code, name, date, open, high,low,close,volume) try: # 执行sql语句 mycursor.execute(sql) # 提交到数据库执行 mydb.commit() except: # Rollback in case there is any error mydb.rollback() mydb.close()
def query_bond_stock_codes_save(): bond_infos = DBUtils.execute( "select 正股代码, 申购日期, 交易场所 from kezhuanzhai " "where 申购日期>'2015-01-01' and 申购日期< '2020-11-01' order by 正股代码") for data in bond_infos: stock_code = str(data["交易场所"][-2:]).lower() + data['正股代码'] print(data['正股代码']) stock_zh_a_daily_hfq_df = ak.stock_zh_a_daily(symbol=stock_code, adjust="qfq") for index, stock_price in stock_zh_a_daily_hfq_df.iterrows(): if stock_price["open"] == 'nan' or stock_price["volume"] == 'nan': continue save_stock_price_info({ "date": index, **dict(stock_price), "stock_code": data['正股代码'] })
def price_trend( stock_code, start="20160101", end=datetime.datetime.now().strftime("%Y%m%d") ) -> pd.DataFrame: data_file = os.path.join(price_cache_dir(), stock_code + '.csv') try: price_trend_df = pd.read_csv(data_file, index_col=0) price_trend_df.index = pd.to_datetime(price_trend_df.index) return price_trend_df except FileNotFoundError: stock_a_indicator_df = ak.stock_a_lg_indicator(stock=stock_code[-6:]) stock_a_indicator_df['trade_date'] = pd.to_datetime(stock_a_indicator_df['trade_date']) stock_a_indicator_df['date'] = stock_a_indicator_df['trade_date'] stock_zh_a_daily_qfq_df = ak.stock_zh_a_daily(symbol=stock_code, start_date=start, end_date=end, adjust="qfq") price_trend_df = pd.merge(stock_zh_a_daily_qfq_df, stock_a_indicator_df, how='left', on=['date']) price_trend_df.set_index(['date'], drop=True, inplace=True) price_trend_df.to_csv(data_file) return price_trend_df
def get_data_from_internet(self, code, name): '''获取数据并保存到csv''' while len(code) < 6: code = '0' + code if code.startswith('6'): code = "sh" + code if code.startswith('3') or code.startswith('0'): code = "sz" + code print(code) # 获取原始数据 original_data = ak.stock_zh_a_daily(symbol=code, adjust="qfq") # 取过去600天数据 try: df = original_data.reset_index().iloc[-600:, :6] except: df = original_data.reset_index().iloc[:, :6] # 去除空值且从零开始编号索引 df = df.dropna(how='any').reset_index(drop=True) #print(df) # 按日期排序 df = df.sort_values(by='date', ascending=True) # 均线数据 # df['10'] = df.close.rolling(10).mean() # df['60'] = df.close.rolling(60).mean() # df['250'] = df.close.rolling(250).mean() df['120'] = df.close.rolling(120).mean() df['180'] = df.close.rolling(180).mean() df['240'] = df.close.rolling(240).mean() # 写入csv file_name = self.data_path + name + ".csv" df.to_csv(file_name) print("<" + name + ">数据完成") return df
def create_df(self): """创建df模板""" if (len(self.symbols) == 0): log.error("股票列表为空,无法创建dataframe") return try: time_start = time.time() data = ak.stock_zh_a_daily(symbol=self.symbols[0], adjust="hfq") time_end = time.time() log.debug("查询一次数据时间(只测了一次,仅做参考):{0}s".format(time_end - time_start)) except Exception as e: log.error("取不到此股票的历史数据,symbol:{0}".format(self.symbols[0])) log.error(e) return log.debug("行索引个数:{0}".format(len(data.index))) self.open_df = pd.DataFrame(index=data.index) self.high_df = pd.DataFrame(index=data.index) self.low_df = pd.DataFrame(index=data.index) self.close_df = pd.DataFrame(index=data.index) self.turnover_df = pd.DataFrame(index=data.index) self.outstanding_df = pd.DataFrame(index=data.index) log.debug("创建dataframe成功")
def extract_stock_records(code='sh600000', start=None, end =None,adjust = 'qfq', identifier = 'a'): #qfq:前复权, hfq:后复权 if identifier == 'a': result = ak.stock_zh_a_daily(symbol=code, adjust=adjust) elif identifier == 'hk': result = ak.stock_hk_daily(symbol=code, adjust=adjust) elif identifier == 'us': result = ak.stock_us_daily(symbol=code, adjust=adjust) result.index = result.index.tz_localize(None) result['date'] = result.index result = result[['date','open','close','high','low','volume']] result['total'] = 'NaN' result['amp'] = 'NaN' result.columns = ['date','open_price','close_price','high_price','low_price','trancaction','total','amp'] result['rate'] = pd.Series([0]).append(pd.Series((result['close_price'].iloc[1:].values-result['close_price'].iloc[0:-1].values)/result['close_price'].iloc[0:-1].values)).values*100 if start==None and end==None: pass else: result = result[(result['date']<=pd.Timestamp(end)) & (result['date']>=pd.Timestamp(start))] result['date'] = result['date'].apply(lambda x: (x.date()-datetime.date(1, 1, 1)).days) result.index = result.index.rename('') result = result.reset_index() return result[['date','open_price','close_price','low_price','high_price','trancaction','total','rate','amp']]
def get_history_data(self, adjust=""): """获取一种类型的历史数据""" self.adjust_type = adjust self.get_symbols() self.create_df() total = len(self.symbols) for index, symbol in enumerate(self.symbols): try: data = ak.stock_zh_a_daily( symbol=symbol, adjust=self.adjust_type) #adjust="hfq" count = index + 1 if (count % 1 == 0): log.debug("当前获取进度{:.2%}:共{},第{}".format( count / total, total, count)) self.parse_data(data, symbol) if (count == 10): break except Exception as e: if (e.args[0] == "No value to decode"): log.debug("已经退市:{0}".format(symbol)) else: log.error("得到历史数据出错:{0}, {1}".format(symbol, e)) self.save_csv() self.save_pickle()
def get_kline(code, start_date: datetime.date): """ 返回json数据,最新的日期在前面 """ result = [] df = ak.stock_zh_a_daily(symbol=get_type(code) + code, adjust="hfq") # 腾讯,主要是使用amount字段 dftx = ak.stock_zh_index_daily_tx(symbol=get_type(code) + code) if df is None or df.empty: return result if dftx is None or dftx.empty: return result for row in df.itertuples(): w = {} w['datetime'] = getattr(row, 'Index') if w['datetime'] < start_date: continue w['open'] = getattr(row, 'open') w['high'] = getattr(row, 'high') w['low'] = getattr(row, 'low') w['close'] = getattr(row, 'close') w['volume'] = getattr(row, 'volume') result.append(w) return result
elif self.crossover < 0: self.log('创建卖单') self.sell(size=100) ########################## # 主程序开始 ######################### # 创建大脑引擎对象 cerebro = bt.Cerebro() # start_date = datetime(2018, 1, 1) # 回测开始时间 # end_date = datetime(2020, 1, 1) # 回测结束时间 # 利用 AkShare 获取后复权数据 stock_hfq_df = ak.stock_zh_a_daily(symbol="sh600000", adjust="hfq",start_date='20180101',end_date='20200101') # data = bt.feeds.PandasDirectData(dataname=stock_hfq_df, fromdate=start_date, todate=end_date) # 加载数据 print(stock_hfq_df) data = bt.feeds.PandasDirectData(dataname=stock_hfq_df) # 加载数据 cerebro.adddata(data) # 将数据传入回测系统 cerebro.addstrategy(SmaCross) # 将交易策略加载到回测系统中 cerebro.broker.setcash(1000000.0) # 设置初始资金 cerebro.run() # 运行 print('最终市值: %.2f' % cerebro.broker.getvalue())
def stock_zh_a_daily_all(self, symbol): ret = {} temp_df = ak.stock_zh_a_daily(symbol=symbol, adjust="") ret["defaut"] = temp_df # "hfq": temp_df_hfq = temp_df.copy() res = requests.get(zh_sina_a_stock_hfq_url.format(symbol)) hfq_factor_df = pd.DataFrame( eval(res.text.split("=")[1].split("\n")[0])['data']) hfq_factor_df.columns = ["date", "hfq_factor"] hfq_factor_df.index = pd.to_datetime(hfq_factor_df.date) del hfq_factor_df["date"] temp_df_hfq = pd.merge(temp_df_hfq, hfq_factor_df, left_index=True, right_index=True, how="left") temp_df_hfq.fillna(method="ffill", inplace=True) temp_df_hfq = temp_df_hfq.astype(float) temp_df_hfq["open"] = temp_df_hfq["open"] * temp_df_hfq["hfq_factor"] temp_df_hfq["high"] = temp_df_hfq["high"] * temp_df_hfq["hfq_factor"] temp_df_hfq["close"] = temp_df_hfq["close"] * temp_df_hfq["hfq_factor"] temp_df_hfq["low"] = temp_df_hfq["low"] * temp_df_hfq["hfq_factor"] ret["hfq"] = temp_df_hfq.iloc[:, :-1] # "qfq": temp_df_qfq = temp_df.copy() res = requests.get(zh_sina_a_stock_qfq_url.format(symbol)) qfq_factor_df = pd.DataFrame( eval(res.text.split("=")[1].split("\n")[0])['data']) qfq_factor_df.columns = ["date", "qfq_factor"] qfq_factor_df.index = pd.to_datetime(qfq_factor_df.date) del qfq_factor_df["date"] temp_df_qfq = pd.merge(temp_df_qfq, qfq_factor_df, left_index=True, right_index=True, how="left") temp_df_qfq.fillna(method="ffill", inplace=True) temp_df_qfq = temp_df_qfq.astype(float) temp_df_qfq["open"] = temp_df_qfq["open"] / temp_df_qfq["qfq_factor"] temp_df_qfq["high"] = temp_df_qfq["high"] / temp_df_qfq["qfq_factor"] temp_df_qfq["close"] = temp_df_qfq["close"] / temp_df_qfq["qfq_factor"] temp_df_qfq["low"] = temp_df_qfq["low"] / temp_df_qfq["qfq_factor"] ret["qfq"] = temp_df_qfq.iloc[:, :-1] #"hfq-factor" res = requests.get(zh_sina_a_stock_hfq_url.format(symbol)) hfq_factor_df = pd.DataFrame( eval(res.text.split("=")[1].split("\n")[0])['data']) hfq_factor_df.columns = ["date", "hfq_factor"] hfq_factor_df.index = pd.to_datetime(hfq_factor_df.date) del hfq_factor_df["date"] ret["hfq-factor"] = hfq_factor_df #"qfq-factor": res = requests.get(zh_sina_a_stock_qfq_url.format(symbol)) qfq_factor_df = pd.DataFrame( eval(res.text.split("=")[1].split("\n")[0])['data']) qfq_factor_df.columns = ["date", "qfq_factor"] qfq_factor_df.index = pd.to_datetime(qfq_factor_df.date) del qfq_factor_df["date"] ret["qfq-factor"] = qfq_factor_df return ret
import akshare akshare.stock_zh_a_daily() import sqlite3 conn = sqlite3.connect('test.db') print("Opened database successfully")
def get_historical_news(self, start_date=None, end_date=None, freq="day"): if end_date is None: end_date = datetime.datetime.now().strftime("%Y%m%d") stock_symbol_list = self.col_basic_info.distinct("symbol") if len(stock_symbol_list) == 0: self.get_stock_code_info() stock_symbol_list = self.col_basic_info.distinct("symbol") if freq == "day": start_stock_code = 0 if self.redis_client.get( "start_stock_code") is None else int( self.redis_client.get("start_stock_code").decode()) for symbol in stock_symbol_list: if int(symbol[2:]) > start_stock_code: if start_date is None: # 如果该symbol有历史数据,如果有则从API获取从数据库中最近的时间开始直到现在的所有价格数据 # 如果该symbol无历史数据,则从API获取从2015年1月1日开始直到现在的所有价格数据 _latest_date = self.redis_client.get(symbol) if _latest_date is None: symbol_start_date = config.STOCK_PRICE_REQUEST_DEFAULT_DATE else: tmp_date_dt = datetime.datetime.strptime( _latest_date.decode(), "%Y-%m-%d").date() offset = datetime.timedelta(days=1) symbol_start_date = (tmp_date_dt + offset).strftime('%Y%m%d') if symbol_start_date < end_date: stock_zh_a_daily_hfq_df = ak.stock_zh_a_daily( symbol=symbol, start_date=symbol_start_date, end_date=end_date, adjust="qfq") stock_zh_a_daily_hfq_df.insert( 0, 'date', stock_zh_a_daily_hfq_df.index.tolist()) stock_zh_a_daily_hfq_df.index = range( len(stock_zh_a_daily_hfq_df)) _col = self.db_obj.get_collection( self.database_name, symbol) for _id in range(stock_zh_a_daily_hfq_df.shape[0]): _tmp_dict = stock_zh_a_daily_hfq_df.iloc[ _id].to_dict() _tmp_dict.pop("outstanding_share") _tmp_dict.pop("turnover") _col.insert_one(_tmp_dict) self.redis_client.set( symbol, str(_tmp_dict["date"]).split(" ")[0]) logging.info( "{} finished saving from {} to {} ... ".format( symbol, symbol_start_date, end_date)) self.redis_client.set("start_stock_code", int(symbol[2:])) self.redis_client.set("start_stock_code", 0) elif freq == "week": pass elif freq == "month": pass elif freq == "5mins": pass elif freq == "15mins": pass elif freq == "30mins": pass elif freq == "60mins": pass
def statisticsGet(stockCode, stockRenewed=""): datatable = ak.stock_zh_a_daily(symbol=stockCode, adjust=stockRenewed) datatable = datatable.dropna() datatable = datatable.reset_index() return datatable
def query_stock(stock_name): stock = ak.stock_zh_a_daily(symbol=query_dis[stock_name], adjust="hfq") return stock
def stock_zh_a_minute(symbol: str = "sh600751", period: str = "5", adjust: str = "", datalen: int = 2000) -> pd.DataFrame: """ akshare.stock_zh_a_minute 添加bar数可选 """ url = ( "https://quotes.sina.cn/cn/api/jsonp_v2.php/=/CN_MarketDataService.getKLineData" ) params = { "symbol": symbol, "scale": period, "datalen": datalen, } r = requests.get(url, params=params) temp_df = pd.DataFrame(json.loads( r.text.split("=(")[1].split(");")[0])).iloc[:, :6] try: ak.stock_zh_a_daily(symbol=symbol, adjust="qfq") except: return temp_df if adjust == "": return temp_df if adjust == "qfq": temp_df[["date", "time"]] = temp_df["day"].str.split(" ", expand=True) need_df = temp_df[temp_df["time"] == "15:00:00"] need_df.index = pd.to_datetime(need_df["date"]) stock_zh_a_daily_qfq_df = ak.stock_zh_a_daily(symbol=symbol, adjust="qfq") result_df = stock_zh_a_daily_qfq_df.iloc[-len(need_df):, :][ "close"].astype(float) / need_df["close"].astype(float) temp_df.index = pd.to_datetime(temp_df["date"]) merged_df = pd.merge(temp_df, result_df, left_index=True, right_index=True) merged_df["open"] = merged_df["open"].astype( float) * merged_df["close_y"] merged_df["high"] = merged_df["high"].astype( float) * merged_df["close_y"] merged_df["low"] = merged_df["low"].astype( float) * merged_df["close_y"] merged_df["close"] = merged_df["close_x"].astype( float) * merged_df["close_y"] temp_df = merged_df[["day", "open", "high", "low", "close", "volume"]] temp_df.reset_index(drop=True, inplace=True) return temp_df if adjust == "hfq": temp_df[["date", "time"]] = temp_df["day"].str.split(" ", expand=True) need_df = temp_df[temp_df["time"] == "15:00:00"] need_df.index = pd.to_datetime(need_df["date"]) stock_zh_a_daily_qfq_df = ak.stock_zh_a_daily(symbol=symbol, adjust="hfq") result_df = stock_zh_a_daily_qfq_df.iloc[-len(need_df):, :][ "close"].astype(float) / need_df["close"].astype(float) temp_df.index = pd.to_datetime(temp_df["date"]) merged_df = pd.merge(temp_df, result_df, left_index=True, right_index=True) merged_df["open"] = merged_df["open"].astype( float) * merged_df["close_y"] merged_df["high"] = merged_df["high"].astype( float) * merged_df["close_y"] merged_df["low"] = merged_df["low"].astype( float) * merged_df["close_y"] merged_df["close"] = merged_df["close_x"].astype( float) * merged_df["close_y"] temp_df = merged_df[["day", "open", "high", "low", "close", "volume"]] temp_df.reset_index(drop=True, inplace=True) return temp_df
import akshare as ak import streamlit as st df = ak.stock_zh_a_daily(symbol="sz000002", start_date="20101103", end_date="20210318", adjust="qfq") st.write(df)
def GetStockHistory(symbol,startdate,enddate): symbol=removedotBysymbol(symbol) data = ak.stock_zh_a_daily(symbol=symbol, start_date=startdate, end_date=enddate, adjust="qfq") # stock_zh_a_daily_qfq_df = ak.stock_zh_a_daily(symbol="sz000002", start_date="20101103", end_date="20201116", adjust="qfq") data['symbol']=symbol return data
#%% import pandas as pd import akshare as ak import tushare as ts import matplotlib.pyplot as plt import numpy as np import dsw df_1 = ts.get_k_data('601727') df_1.set_index(['date'], inplace=True) df_2 = ak.stock_zh_a_daily("sh601727", adjust="hfq") plt.plot(df_1['close'].values, 'r') plt.plot(df_2['close'].values, 'b') plt.show() print(np.shape(df_1)) print(np.shape(df_2)) print(df_1.head()) print(df_2.head()) #%% [markdown] #### 我们可以看到,akshare的接口查询到的数据比tushare接口因子更多,样本量也更大,所以我选择akshare接口来训练我们的模型 #%% df = df_2 # df.to_csv('data/init_data.csv') # %%散点图 -成交量和股价 plt.scatter(df['volume'], df['close']) plt.xlabel('Volume')