f = open("stock_code_sz.txt") sz = f.read() f.close() sh_code = re.findall(r'[(](.*?)[)]', sh) sz_code = re.findall(r'[(](.*?)[)]', sz) print(sh_code) print(sz_code) for i in range(0, int(len(sz_code))): rs = bs.query_history_k_data_plus( "sz." + sz_code[i], "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,peTTM,pbMRQ,psTTM,pcfNcfTTM,isST", start_date='2019-12-01', end_date='2020-04-07', frequency="d", adjustflag="3") # frequency="d"取日k线,adjustflag="3"默认不复权 print('query_history_k_data_plus respond error_code:' + rs.error_code) print('query_history_k_data_plus respond error_msg:' + rs.error_msg) # 打印结果集 data_list = [] while (rs.error_code == '0') & rs.next(): # 获取一条记录,将记录合并在一起 data_list.append(rs.get_row_data()) result = pd.DataFrame(data_list, columns=rs.fields) # 结果集输出到csv文件
def stock_analysis(): bs.login() with open( os.path.join("..\\data\\industry_insight", "_".join(keywords) + ".csv"), "r") as source_f: with open( os.path.join("..\\data\\industry_insight", "_".join(keywords) + "_result.csv"), "w") as result_f: first_line = True for source_line in source_f: if first_line: result_f.write( source_line[:-2] + ",peTTM(滚动市盈率),psTTM(滚动市销率),pcfNcfTTM(滚动市现率),pbMRQ(市净率),2017MBRevenue, 2017netProfit, 2018MBRevenue, 2018netProfit\n" ) first_line = False continue stock_number = get_full_stock_number(source_line.split(",")[0]) # source info result_f.write(source_line.replace("\n", "")) # pe and ps rs = bs.query_history_k_data_plus( stock_number, "date,code,peTTM,pbMRQ,psTTM,pcfNcfTTM", start_date='2019-04-04', end_date='2019-04-04', frequency="d", adjustflag="3") result_list = [] while (rs.error_code == '0') & rs.next(): result_list.append(rs.get_row_data()) result = pd.DataFrame(result_list, columns=rs.fields) result_f.write("," + ",".join(result.loc[0][-4:])) # 2018 and 2019 finance try: rs_profit = bs.query_profit_data(code=stock_number, year=2017, quarter=4) profit_list = [] while (rs_profit.error_code == '0') & rs_profit.next(): profit_list.append(rs_profit.get_row_data()) result_profit = pd.DataFrame(profit_list, columns=rs_profit.fields) result_f.write("," + str(result_profit.iloc[0]["MBRevenue"]) + "," + str(result_profit.iloc[0]["netProfit"])) rs_profit = bs.query_profit_data(code=stock_number, year=2018, quarter=4) profit_list = [] while (rs_profit.error_code == '0') & rs_profit.next(): profit_list.append(rs_profit.get_row_data()) result_profit = pd.DataFrame(profit_list, columns=rs_profit.fields) result_f.write("," + str(result_profit.iloc[0]["MBRevenue"]) + "," + str(result_profit.iloc[0]["netProfit"])) except IndexError: # 有些18年报还没出 pass result_f.write("\n") # break # debuging bs.logout()
import baostock as bs import pandas as pd #### 登陆系统 #### lg = bs.login() # 显示登陆返回信息 print('login respond error_code:' + lg.error_code) print('login respond error_msg:' + lg.error_msg) #### 获取沪深A股历史K线数据 #### # 详细指标参数,参见“历史行情指标参数”章节;“分钟线”参数与“日线”参数不同。 # 分钟线指标:date,time,code,open,high,low,close,volume,amount,adjustflag rs = bs.query_history_k_data_plus( "sz.300001", "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,isST", start_date='2019-01-01', end_date='2020-01-12', frequency="d", adjustflag="3") print('query_history_k_data_plus respond error_code:' + rs.error_code) print('query_history_k_data_plus respond error_msg:' + rs.error_msg) #### 打印结果集 #### data_list = [] while (rs.error_code == '0') & rs.next(): # 获取一条记录,将记录合并在一起 data_list.append(rs.get_row_data()) result = pd.DataFrame(data_list, columns=rs.fields) # print(type(result)) #### 结果集输出到csv文件 ####
def download_data(stock_index, start_date, end_date): data_dir = data_dict.get("baostock") lg = bs.login(user_id="anonymous", password="******") try: if stock_index[0:2] == "60": stock_index_in = "sh." + stock_index else: stock_index_in = "sz." + stock_index df2 = bs.query_history_k_data_plus( stock_index_in, "date,code,open,high,low,close,volume,amount,turn,tradestatus,pctChg,peTTM,psTTM,pcfNcfTTM,pbMRQ,isST", start_date=start_date, end_date=end_date, frequency="d", adjustflag="1") save_file = os.path.join(data_dir, stock_index + ".csv") df3 = df2.get_data() #print(df3) df3["open"] = [ np.round(float(x), 2) if x != '' else -999 for x in df3["open"].tolist() ] df3["high"] = [ np.round(float(x), 2) if x != '' else -999 for x in df3["high"].tolist() ] df3["close"] = [ np.round(float(x), 2) if x != '' else -999 for x in df3["close"].tolist() ] df3["low"] = [ np.round(float(x), 2) if x != '' else -999 for x in df3["low"].tolist() ] df3["amount"] = [ int(float(x)) if x != '' else -999 for x in df3["amount"].tolist() ] df3["turn"] = [ np.round(float(x), 3) if x != '' else -999 for x in df3["turn"].tolist() ] df3["pctChg"] = [ np.round(float(x), 3) if x != '' else -999 for x in df3["pctChg"].tolist() ] df3["peTTM"] = [ np.round(float(x), 3) if x != '' else -999 for x in df3["peTTM"].tolist() ] df3["psTTM"] = [ np.round(float(x), 3) if x != '' else -999 for x in df3["psTTM"].tolist() ] df3["pbMRQ"] = [ np.round(float(x), 3) if x != '' else -999 for x in df3["pbMRQ"].tolist() ] df3["stock_index"] = [ x.split(".")[1] for x in df3["code"].values.tolist() ] df3["dt"] = df3["date"] df3["volume"] = [ int(x) if x != '' else -999 for x in df3["volume"].tolist() ] #df3 = df3[["stock_index","dt","open","high","close","low","volume"]] df3.to_csv(save_file, index=0) except Exception as e: pass print("error") print(e)
import baostock as bs import pandas as pd #### 登陆系统 #### lg = bs.login() # 显示登陆返回信息 print('login respond error_code:' + lg.error_code) print('login respond error_msg:' + lg.error_msg) #### 获取沪深A股历史K线数据 #### # 详细指标参数,参见“历史行情指标参数”章节;“分钟线”参数与“日线”参数不同。 rs = bs.query_history_k_data_plus( "sh.600000", "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,isST", start_date='2017-07-01', end_date='2017-12-31', frequency="d", adjustflag="2") print('query_history_k_data_plus respond error_code:' + rs.error_code) print('query_history_k_data_plus respond error_msg:' + rs.error_msg) #### 打印结果集 #### data_list = [] while (rs.error_code == '0') & rs.next(): # 获取一条记录,将记录合并在一起 data_list.append(rs.get_row_data()) result = pd.DataFrame(data_list, columns=rs.fields) #### 结果集输出到csv文件 #### result.to_csv("history_A_stock_k_data.csv", index=False) print(result)
import baostock as bs import pandas as pd import pickle #### 登陆系统 #### lg = bs.login() # 显示登陆返回信息 rs = bs.query_history_k_data_plus('sh.000016', "date,code,open,high,low,close,volume,amount,adjustflag", start_date='2010-01-01', end_date='2020-01-31', frequency="d", adjustflag="3") print('query_history_k_data_plus respond error_code:'+rs.error_code) print('query_history_k_data_plus respond error_msg:'+rs.error_msg) data_list = [] while (rs.error_code == '0') & rs.next(): # 获取一条记录,将记录合并在一起 data_list.append(rs.get_row_data()) result = pd.DataFrame(data_list, columns=rs.fields) #file=open('benchmark_sz50.pickle','wb') #pickle.dump(result,file) #file.close()
df.to_csv(保存路径, encoding='utf_8_sig', index=False) #### 登陆系统 #### lg = bs.login() # 显示登陆返回信息 print('login respond error_code:' + lg.error_code) print('login respond error_msg:' + lg.error_msg) #### 获取沪深A股历史K线数据 #### # 详细指标参数,参见“历史行情指标参数”章节;“分钟线”参数与“日线”参数不同。“分钟线”不包含指数。 # 分钟线指标:date,time,code,open,high,low,close,volume,amount,adjustflag # 周月线指标:date,code,open,high,low,close,volume,amount,adjustflag,turn,pctChg rs = bs.query_history_k_data_plus("sz.000778", "date,code,time,open,close", start_date='2021-03-01', end_date='2021-03-03', frequency="5", adjustflag="3") print('query_history_k_data_plus respond error_code:' + rs.error_code) print('query_history_k_data_plus respond error_msg:' + rs.error_msg) #### 打印结果集 #### data_list = [] while (rs.error_code == '0') & rs.next(): # 获取一条记录,将记录合并在一起 data_list.append(rs.get_row_data()) df = pd.DataFrame(data_list, columns=rs.fields) print(df) 保存路径 = 'C:/Users/YcAllenEffy/Desktop/222.csv' 保存csv文件(保存路径, df)
import com.dateUtils as du import numpy as np #### 登陆系统 #### print("-----登陆系统:") lg = bs.login(user_id="anonymous", password="******") while True: print("-----请输入要查询的股票代码:") code = input() #### 登出系统 #### if (code == 'exit'): break #### 获取沪深A股估值指标(日频)数据 ##### peTTM 动态市盈率# psTTM 市销率# pcfNcfTTM 市现率# pbMRQ 市净率 rs = bs.query_history_k_data_plus(code, "date,pbMRQ,peTTM", start_date=du.getTenYearsAgoTime(), end_date=du.getYesterDayTime(), frequency="d", adjustflag="3") #### 打印结果集 #### result_list = [] while (rs.error_code == '0') & rs.next(): result_list.append(rs.get_row_data()) result2 = pd.DataFrame(result_list, columns=rs.fields, dtype=np.float) print(result2.dtypes) result2 = result2.sort_values(by='pbMRQ', ascending=False) result2 = result2.reset_index(drop=True) #### 结果集输出到csv文件 #### my_file = "/Users/mfhj-dz-001-068/pythonData/pe_" + code + "_data.csv" if os._exists(my_file): #删除文件 os.remove(my_file)
import baostock as bs import pandas as pd lg = bs.login() #显示登录信息 print('login respond error_code:' + lg.error_code) print('login respond error_msg:' + lg.error_msg) #获取历史k线数据 rs = bs.query_history_k_data_plus( "sh.600000", "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,peTTM,pbMRQ,psTTM,pcfNcfTTM,isST", start_date='2017-06-01', end_date='2019-2-28', frequency="d", adjustflag="3") #显示信息 print('login respond error_code:' + rs.error_code) print('login respond error_msg:' + rs.error_msg) #打印结果集 data_list = [] while (rs.error_code == '0') & rs.next(): # 获取一条记录,将记录合并到一起 data_list.append(rs.get_row_data()) result = pd.DataFrame(data_list, columns=rs.fields) #结果集输出到csv文件 result.to_csv("D:\gupiaoshuju\history_k_data.csv", encoding="gbk", index=False) print(result) #登出
import matplotlib.pyplot as plt #get all the ticks pro = ts.pro_api() data = pro.stock_basic(exchange='', list_status='L', fields='ts_code,symbol,name,area,industry,list_date') data.to_csv('result.csv', encoding='gbk') # login lg = bs.login() # 获取沪深A股历史K线数据 rs = bs.query_history_k_data_plus("sz.300313", "date,code,open,high,low,close", start_date='2020-06-01', end_date='2020-09-29', frequency="d", adjustflag="3") # get result data_list = [] while (rs.error_code == '0') & rs.next(): # 获取一条记录,将记录合并在一起 data_list.append(rs.get_row_data()) result = pd.DataFrame(data_list, columns=rs.fields) # 结果集输出到csv文件 result.to_csv("szzz.csv", index=False) # 退出系统 bs.logout()
def getAllIndexHistoryData(self, days=1): self.initMysqlConn() # 连接数据库 # 设定获取日线行情的初始日期和终止日期,其中终止日期设定为昨天。 # start_dt = '19901219' time_temp = datetime.datetime.now() end_dt = time_temp.strftime('%Y%m%d') end_dt1 = (datetime.datetime.strptime(end_dt, "%Y%m%d")).strftime('%Y-%m-%d') start_dt = (time_temp - datetime.timedelta(days=days)).strftime('%Y%m%d') start_dt = (datetime.datetime.strptime(start_dt, "%Y%m%d")).strftime('%Y-%m-%d') # 获取开始结束之间所有有交易的日期 # df1 = self.pro.trade_cal(exchange='SSE', is_open='1', # start_date=start_dt, # end_date=end_dt, # fields='cal_date') # 登陆系统 lg = bs.login() # 显示登陆返回信息 print('login respond error_code:' + lg.error_code) print('login respond error_msg:' + lg.error_msg) # 设定需要获取数据的股票池 stock_pool = ['sh.000001', 'sz.399001', 'sz.399006'] ss = ['000001_sh', '399001_sz', '399006_cyb'] name_pool = ['上证指数', '深证成指', '创业板'] # startDate = ['1990-12-19', '1990-12-01', '2009-10-30'] startDate = [start_dt, start_dt, start_dt] #追加一天的数据 total = len(stock_pool) for i in range(len(stock_pool)): # for date in df1['cal_date'].values: try: # 详细指标参数,参见“历史行情指标参数”章节 rs = bs.query_history_k_data_plus( stock_pool[i], "date,code,open,high,low,close,preclose,volume,amount,pctChg", start_date=startDate[i], end_date=end_dt1, frequency="d") print('query_history_k_data_plus respond error_code:' + rs.error_code) print('query_history_k_data_plus respond error_msg:' + rs.error_msg) # 打印结果集 data_list = [] while (rs.error_code == '0') & rs.next(): # 获取一条记录,将记录合并在一起 data_list.append(rs.get_row_data()) df = pd.DataFrame(data_list, columns=rs.fields) filename = 'G:\\stockData\\originData\\' + ss[i] + '.csv' ex = os.path.isfile(filename) #当前文件是否存在,存在即添加,不存在新建 if ex == False: df.to_csv(filename, encoding='utf_8_sig') else: df.to_csv(filename, mode='a', header=False, encoding='utf_8_sig') df1 = pd.read_csv(filename, encoding='utf_8_sig') df1 = df1.dropna() #删除所有包含空值的行 # df=df.drop(df.tail(4).index) df1 = df1.drop_duplicates(['date']) df1 = df1.sort_values('date') #降序 df1.to_csv(filename, index=None, encoding='utf_8_sig') continue # 查询当天所有正常上市交易的股票数据 # df = ts.get_h_data(stock_pool[i],start=start_dt1,end=end_dt1,index=True) # print(df.index[i]) # df = ts.get_h_data('399006', index=True) c_len = df.shape[0] except Exception as aa: print(aa) print('get error') for j in range(c_len): # 按行读取股票列表 resu0 = list(df.iloc[c_len - 1 - j]) resu = [] for k in range(len(resu0)): # 读取股票的字段 if str(resu0[k]) == 'nan': resu.append(-1) else: resu.append(resu0[k]) # state_dt = (datetime.datetime.strptime(str(df.index[i]), "%Y%m%d")).strftime('%Y-%m-%d') # state_dt = str(df.index[c_len-1-j]) state_dt = str(resu[0]) try: pass # sql_insert = "INSERT INTO index_data(date,code,name,open,high,low,close,preclose,volume,amount,p_change,price_change) VALUES ('%s','%s','%s','%.2f','%.2f','%.2f','%.2f','%.2f','%.2f','%.2f','%.2f','%.2f')" % (state_dt,str(stock_pool[i]),str(name_pool[i]),float(resu[2]),float(resu[3]),float(resu[4]),float(resu[5]),float(resu[6]),float(resu[7]),float(resu[8],float(resu[9])) sql_insert = "INSERT INTO index_data_bs(date,code,name,open,high,low,close,preclose,volume,amount,p_change) VALUES ('%s','%s', '%s','%.2f', '%.2f','%.2f','%.2f','%.2f','%.2f','%.2f','%.2f')" % ( state_dt, str(stock_pool[i]), str(name_pool[i]), float(resu[2]), float(resu[3]), float(resu[4]), float(resu[5]), float(resu[6]), float( resu[7]), float(resu[8]), float(resu[9])) # sql_insert = "INSERT INTO index_data_bs(date,code,name,open,high,low,close,preclose,volume,amount,p_change) VALUES ('%s','%s','%s','%.2f','%.2f','%.2f','%.2f','%.2f','%.2f','%.2f','%.2f')" % (state_dt, str(stock_pool[i]), str(name_pool[i]), float(resu[2]), float(resu[3]), float(resu[4]), float(resu[5]), float(resu[6]), float(resu[7]), float(resu[8], float(resu[9])) self.cursor.execute(sql_insert) self.db.commit() # pass except Exception as err: print('insert err', err) continue self.cursor.close() self.db.close() # 登出系统 bs.logout() print('getAllIndexHistoryData Finished!')
#### 登陆系统 #### lg = bs.login() # 显示登陆返回信息 print('login respond error_code:'+lg.error_code) print('login respond error_msg:'+lg.error_msg) stock_code = "sz.000507" #### 获取沪深A股历史K线数据 #### # 详细指标参数,参见“历史行情指标参数”章节;“分钟线”参数与“日线”参数不同。 # 分钟线指标:date,time,code,open,high,low,close,volume,amount,adjustflag # adjustflag:复权类型,默认不复权:3;1:后复权;2:前复权。已支持分钟线、日线、周线、月线前后复权。 rs = bs.query_history_k_data_plus(stock_code, "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,isST,pctChg,pbMRQ,peTTM,psTTM,pcfNcfTTM", start_date='1960-01-01', end_date='2020-02-14', frequency="d", adjustflag="2") # rs = bs.query_history_k_data_plus(stock_code, # "date,code,open,high,low,close,volume,amount,adjustflag,turn,pctChg", # start_date='1960-01-01', # end_date='2020-02-14', # frequency="m", adjustflag="2") print('query_history_k_data_plus respond error_code:'+rs.error_code) print('query_history_k_data_plus respond error_msg:'+rs.error_msg) #### 打印结果集 #### data_list = []
import baostock as bs import pandas as pd ####登录系统#### lg = bs.login() #显示登录返回信息 print('login respond error_code:' + lg.error_code) print('login respond error_msg:' + lg.error_msg) #####获取股票历史K线数据#### #详细指标参数 rs = bs.query_history_k_data_plus( "sz.002739", "date,open,high,low,close,volume,amount,preclose,pctChg", start_date='2020-01-01', end_date='2020-08-10', frequency="d") print('query_history_k_data_plus respond error_code:' + rs.error_code) print('query_history_k_data_plus error_msg:' + rs.error_msg) ####打印结果集#### data_list = [] while (rs.error_code == '0') & rs.next(): #获取一条记录,将记录合并在一起 data_list.append(rs.get_row_data()) result = pd.DataFrame(data_list, columns=rs.fields) ####结果输出到csv文件#### result.to_csv("C:/Users/大可/Desktop/sz.002739_k_data.csv", index=False) print(result)
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', datefmt='%Y-%m-%d %H:%M:%S', filename="E:\\historystock\\getostock.log", filemode='a') if __name__ == '__main__': #### 登陆系统 #### lg = bs.login() # 显示登陆返回信息 print('login respond error_code:'+lg.error_code) print('login respond error_msg:'+lg.error_msg) rs = bs.query_history_k_data_plus("sh.000015", "date,code,close,pctChg", start_date='2017-07-11', end_date='2020-04-30', frequency="m", adjustflag="2") print('query_history_k_data_plus respond error_code:'+rs.error_code) print('query_history_k_data_plus respond error_msg:'+rs.error_msg) #### 打印结果集 #### data_list = [] while (rs.error_code == '0') & rs.next(): # 获取一条记录,将记录合并在一起 data_list.append(rs.get_row_data()) result = pd.DataFrame(data_list, columns=rs.fields) #### 结果集输出到csv文件 #### result.to_csv("E:\\historystock\\zhongxiaoban.csv", index=False) print(result)
def getOnlyKline(stockCodeArray,toFile=1,start_date='2018-01-06',end_date='2023-10-31'): #如果toFile=0就直接读取本地csv文件 if toFile == 0: df = pd.read_csv('/Users/miketam/Downloads/getOnlyKline_300.csv',sep=',') # df = pd.read_csv('/Users/miketam/Downloads/getOnlyKline.csv', header=None, sep=',') # 得先把数据按股票拆分为一个个array,每个股票的k线是一个array dfArray = dfDivide(stockCodeArray,df) kLineArray = [] for i in dfArray: j = i.values kLineArray.append(j) return kLineArray arrayMerage = [] klineArray = [] for i in stockCodeArray: data_list = [] code = codeFormat(i) rs = bs.query_history_k_data_plus(code, # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 # "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,peTTM,pbMRQ,psTTM,pcfNcfTTM,isST", "date,code,open,high,low,close,volume", start_date= start_date, end_date= end_date, frequency="d", adjustflag="2") while (rs.error_code == '0') & rs.next(): # 获取一条记录,将记录合并在一起 data_list.append(rs.get_row_data()) #去掉停牌的日期的数据 temp_list = [] for i in range(len(data_list)): if data_list[i][5] != data_list[i-1][5] or data_list[i][3] != data_list[i-1][3] and i > 0: temp_list.append(data_list[i]) data_list = temp_list arrayMerage.extend(data_list) klineArray.append(data_list) #用在二次处理 result: DataFrame = pd.DataFrame(arrayMerage) result.columns = ["date","code","open","high","low","close","volume"] # 获取周K线数据 # klineMonthArray = getMonthKline(stockCodeArray, start_date, end_date) klineWeekArray = getWeeklyKline(stockCodeArray, start_date, end_date) # KlineHourArray = getHourKline(stockCodeArray, start_date, end_date) # 用日线数据给周线数据补充最后一周数据 for x in range(len(klineWeekArray)): week = klineWeekArray[x] #一个股票的周k线 week.reverse() day = klineArray[x] #一个股票的日k线 day.reverse() #倒序,最后一天在前面 dateDayLast = day[0][0] dateWeekLast = week[0][0] temp = [] #通过日数据计算最后一周数据 weekDate = getYearWeekFromDate(dateDayLast) #计算是本年的第几个周 closeLastWeek = day[0][5] #这周最后一天收盘价就是周收盘价 openLastWeek = day[0][2] highLastWeek = day[0][3] lowLastWeek = day[0][4] for d in day: if getYearWeekFromDate(d[0]) == weekDate:# 从后向前查询这周每天数据 openLastWeek = d[2] #这个周的第一天开盘价就是周开盘价 if d[3] > highLastWeek: #取最大值 highLastWeek = d[3] if d[4] < lowLastWeek: lowLastWeek = d[4] else: break weekLast = [day[0][0], day[0][1], openLastWeek, highLastWeek, lowLastWeek, closeLastWeek ] if dateDayLast == dateWeekLast: #若果最后一日和最后一周是在相同周,就覆盖,否则就新建 week[0] = weekLast else: week.insert(0,weekLast) week.reverse() day.reverse() if toFile == 1: result.to_excel('/Users/miketam/Downloads/getOnlyKline.xlsx', float_format='%.5f', index=False) result.to_csv("/Users/miketam/Downloads/getOnlyKline.csv", encoding="gbk", index=False) # print(result) return [klineArray, klineWeekArray]
code=result["code"] # In[ ]: #获取对应股票近三年价格数据 # In[8]: history_data = pd.DataFrame(columns = ["date", "code", "close"]) for i in range(0,50): data = bs.query_history_k_data_plus(code[i], "date,code,close", start_date='2018-03-01', end_date='2021-3-01',adjustflag="2")#adjustflag="2"该参数为历史数据前复权 print('query_history_k_data_plus respond error_code:'+data.error_code) print('query_history_k_data_plus respond error_msg:'+data.error_msg) data_list = [] while (data.error_code == '0') & data.next(): data_list.append(data.get_row_data()) result = pd.DataFrame(data_list, columns=data.fields) x=result.shape print(x) if (x[0]==729): result.to_csv(code[i], index=0,header=1) else: code=code.drop([i])#删除近三年价格数据有缺失的股票 i=i+1
def getMaLineTrend(stockCodeArray,stockNameArray): array = [] temp = '' maMultiStockPd = '' x = 0 for i in stockCodeArray: stockName = stockNameArray[x] x = x + 1 data_list = [] code = codeFormat(i) rs = bs.query_history_k_data_plus(code, # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 # "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,peTTM,pbMRQ,psTTM,pcfNcfTTM,isST", "date,code,open,high,low,close", start_date='2020-01-01', end_date='2021-12-31', frequency="d", adjustflag="2") while (rs.error_code == '0') & rs.next(): # 获取一条记录,将记录合并在一起 data_list.append(rs.get_row_data()) result = pd.DataFrame(data_list) #添加均线 result[6] = result[5].rolling(window=5).mean() # 5日线 result[7] = result[5].rolling(window=10).mean() # 10日线 #result[8] = result[5].rolling(window=20).mean() # 10日线 #result.columns = ["date","code","open","high","low","close","ma5","ma10","ma20"] #判断均线向上向下 array = result.values #转为数组 ma5 = "" ma10 = "" maArrayOneStock = [] # maArray = [] m = 0 # 双向下个数 n = 0 # ma5向上 for j in range(0, len(array)): #获取上一个,下一个 Current = array[j] Prev = array[j - 1] if Current[6] > 0 and Prev[6] > 0: if Current[6] > Prev[6]: ma5 = "向上_5" else: ma5 = "" if Current[7] > 0 and Prev[7] > 0: if Current[7] > Prev[7]: ma10 = "向上_10" else: ma10 = "" # 这里预留位置,调用函数取"双均线连续3天向下,5日线今天向上" # 在近4天,有1个或2个"5日向上",其余为双向下 if j >= (len(array) - 4): if ma5 == '' and ma10 == '': m = m + 1 if ma5 == '向上_5' and ma10 == '': n = n + 1 if j == (len(array) - 1): if (n == 1 and m == 3) or (n == 2 and m == 2): ma5 = ma5 + '快来看我' maArrayOneDay = [Current[0], ma5, ma10] #后续日期做索引,df merge时不重复 maArrayOneStock.append(maArrayOneDay) temp = pd.DataFrame(maArrayOneStock) temp.columns = ["date", stockName + "_ma5",str(i) + "_ma10"] # temp.set_index('date',inplace=True, drop=True) if x == 1: maMultiStockPd = temp else: maMultiStockPd = pd.merge(maMultiStockPd,temp,on='date') ### 结果集输出到csv文件 #### print(maMultiStockPd) # maMultiStockPd.to_csv("/Users/miketam/Downloads/getMaline.csv", encoding="gbk", index=False) maMultiStockPd.to_excel('/Users/miketam/Downloads/getMaline.xlsx', float_format='%.5f', index=False) return
df = mytech.get_index(df) df["code"] = code df_last = df_last.append(df) if i % 10 == 0: print(i, code) df_last.reset_index(drop=True, inplace=True) ''' SH300 data ''' lg = bs.login() rs = bs.query_history_k_data_plus("sh.000300", "date,code,open,high,low,close,volume", start_date="2000-01-01", frequency="d") data_list = [] while (rs.error_code == '0') & rs.next(): # 获取一条记录,将记录合并在一起 data_list.append(rs.get_row_data()) df = pd.DataFrame(data_list, columns=rs.fields) df.rename({"volume": "vol", "date": "trade_date"}, inplace=True, axis=1) for col in ["open", "high", "low", "close", "vol"]: df[col] = df[col].astype(float) mytech = common.TechnicalIndicatorPriceVol4Model() df = mytech.get_index(df) del df['pct_chg'] df.columns = ["trade_date" ] + [c + "_sh" for c in df.columns if c != "trade_date"] df["trade_date"] = pd.to_datetime(df["trade_date"])
import baostock as bs import pandas as pd ####登录系统#### lg = bs.login() #显示登录返回信息 print('login respond error_code:'+lg.error_code) print('login respond error_msg:'+lg.error_msg) #####获取股票历史K线数据#### #详细指标参数 rs = bs.query_history_k_data_plus("sz.002104", "date,open,high,low,close,volume,amount", start_date='2015-01-01', end_date='2020-08-10', frequency="d") print('query_history_k_data_plus respond error_code:'+rs.error_code) print('query_history_k_data_plus error_msg:'+rs.error_msg) ####打印结果集#### data_list = [] while (rs.error_code == '0') & rs.next(): #获取一条记录,将记录合并在一起 data_list.append(rs.get_row_data()) result = pd.DataFrame(data_list, columns=rs.fields) ####结果输出到csv文件#### result.to_csv("C:/Users/大可/Desktop/sz.002104_k_data.csv", index=False) print(result) ####登出系统#### bs.logout
import baostock as bs import pandas as pd lg = bs.login() fields = 'date,open,high,low,close,volume' df_bs = bs.query_history_k_data_plus("sh.000001", fields, start_date='2009-01-01', end_date='2019-06-01', frequency="d", adjustflag="2") data_list = [] while (df_bs.error_code == '0') & df_bs.next(): data_list.append(df_bs.get_row_data()) result = pd.DataFrame(data_list, columns=df_bs.fields) result.close = result.close.astype('float64') result.open = result.open.astype('float64') result.low = result.low.astype('float64') result.high = result.high.astype('float64') result.volume = result.volume.astype('int') result.index = pd.to_datetime(result.date) print(result.head()) print(result.tail()) print(result.info()) print(result.axes) bs.logout()
if lg.error_code != '0': print('login respond error_code:'+lg.error_code) print('login respond error_msg:'+lg.error_msg) zz500_data = bs.query_zz500_stocks() # 打印结果集 zz500_stocks = [] while (zz500_data.error_code == '0') & zz500_data.next(): # 获取一条记录,将记录合并在一起 row_data = zz500_data.get_row_data() code = row_data[1] # query history k data rs = bs.query_history_k_data_plus(code, "date,code,close,tradeStatus,open,volume", start_date=startdate, end_date=enddate, frequency="d", adjustflag="3") # handle k data result_list = [] while (rs.error_code == '0') & rs.next(): rs_data = rs.get_row_data() if rs_data[3] == '1': result_list.append(rs_data) pd_result = pd.DataFrame(result_list, columns=rs.fields) # MA13 ma=13 pd_result['ma_'+str(ma)] = pd_result['close'].rolling(ma).mean()
+ symbol + "', " + str(last_index) + ")" conn.execute(macd_sql) truncate_tables() for stock_code in stock_df["code"]: symbol_string = stock_code[0:2] + stock_code[3:9] startDay = (datetime.datetime.now() - datetime.timedelta(days=9)).strftime("%Y-%m-%d") interval_type_string = "Min30" min_30_result = bs.query_history_k_data_plus(stock_code, "date,code,open,high,low,close," "volume,amount,adjustflag", start_date=startDay, end_date='', frequency="30", adjustflag="2") day_data_list = [] while (min_30_result.error_code == '0') & min_30_result.next(): day_data_list.append(min_30_result.get_row_data()) day_data_frame = pd.DataFrame(day_data_list, columns=min_30_result.fields) if day_data_frame.size == 0: continue volume_array = day_data_frame['volume'].astype(float).values open_array = day_data_frame['open'].astype(float).values close_array = day_data_frame['close'].astype(float).values day_array = day_data_frame['date'].values high_array = day_data_frame['high'].astype(float).values
def run(self): stock_df = self.get_codes_by_date(self.date_end) #return for index, row in stock_df.iterrows(): print(f'processing {index} {row["code"]} {row["code_name"]}') break start_time = time.time() #code = "sh.600037" #code = "sz.002007" #df_code = bs.query_history_k_data_plus(code, self.fields, # adjustflag:复权类型,默认不复权:3;1:后复权;2:前复权。 df_code = bs.query_history_k_data_plus(row["code"], self.fields, start_date=self.date_start, end_date=self.date_end, frequency="d", adjustflag="2").get_data() #frequency="d", adjustflag="3").get_data() #print('query_history_k_data_plus respond error_code:'+rs.error_code) #print('query_history_k_data_plus respond error_msg :'+rs.error_msg) #df_code = rs.get_data() # code_name = row["code_name"].replace('*', '') code = row["code"].replace('.', '').upper() #print(code) #code = code[0:2].upper()+code[3:9] df_code.columns = [ 'RQ', 'CODE', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'VOLUME', 'AMOUNT', 'ADJUSTFLAG', 'TURN', 'TRADESTATUS', 'PCTCHG', 'PETTM', 'PBMRQ', 'PSTTM', 'PCFNCFTTM', 'ISST' ] #print(df_code.columns) df_code['RQ'] = pd.to_datetime(df_code['RQ'], format='%Y-%m-%d') df_code['CODE'] = code # df_code['CODE'].apply( # lambda x: str(x[0:2]).upper()+x[3:9]) df_code['VOLUME'].replace('', '0', inplace=True) df_code['AMOUNT'].replace('', '0', inplace=True) df_code['TURN'].replace('', '0', inplace=True) df_code['PCTCHG'].replace('', '0', inplace=True) df_code['PETTM'].replace('', '0', inplace=True) df_code['PBMRQ'].replace('', '0', inplace=True) df_code['PSTTM'].replace('', '0', inplace=True) df_code['PCFNCFTTM'].replace('', '0', inplace=True) df_code['ISST'].replace('', '0', inplace=True) convert_dict = { 'CODE': str, 'OPEN': float, 'HIGH': float, 'LOW': float, 'CLOSE': float, 'VOLUME': int, 'AMOUNT': float, 'ADJUSTFLAG': int, 'TURN': float, 'TRADESTATUS': int, 'PCTCHG': float, 'PETTM': float, 'PBMRQ': float, 'PSTTM': float, 'PCFNCFTTM': float, 'ISST': int } #print(df_code.head()) df_code = df_code.astype(convert_dict) print(df_code) #print(df_code.dtypes) print(df_code.shape) df_code.to_sql('hq_baostock', engine, index=False, if_exists='append', dtype={ 'CODE': types.VARCHAR(length=8), 'ISST': types.INTEGER() }) end_time = time.time() print('elapsed ' + str(end_time - start_time)) #break '''
def bs_daily_mysql(pro, cur, engine): sql = 'select ts_code from stock_basic' cur.execute(sql) ts_codes = str(cur.fetchall()) # 处理从数据库提取的关于股票代码的数据 ts_code_list = re.findall("(\'.*?\')", ts_codes) ts_code_list = [re.sub("'", '', each) for each in ts_code_list] print(ts_code_list) bs_code_list = [] for i in range(len(ts_code_list)): stock_code = ts_code_list[i][0:6] # print(stock_code) ex = (ts_code_list[i][-2:]).lower() # print(ex) bs_code = ex + '.' + stock_code bs_code_list.append(bs_code) # print(bs_code_list) sql = "show tables;" cur.execute(sql) tables = [cur.fetchall()] table_list = re.findall('(\'.*?\')', str(tables)) table_list = [re.sub("'", '', each) for each in table_list] # print(table_list) sql = "select table_name, table_rows from information_schema.tables where table_schema = 'baostock' and table_rows <1;" cur.execute(sql) empty_tables = [cur.fetchall()] empty_table_list = re.findall('(\'.*?\')', str(empty_tables)) empty_table_list = [re.sub("'", '', each) for each in empty_table_list] # print(empty_table_list) for bs_code in bs_code_list: # print(bs_code) name = (bs_code.replace('.', '_') + "_daily").lower() if name in table_list and name not in empty_table_list: print(bs_code + "对应的日线表已存在且不为空") continue sql = "CREATE TABLE IF NOT EXISTS "+bs_code.replace('.','_') +"_daily(date CHAR(11)," \ "code CHAR(10) PRIMARY KEY,open float(4,2),high float(4,2),low float(4,2),close float(4,2), preclose float(4,2)," \ "volume float(11,2),amount float(11,2),turn float(11,2),tradestatus float(11,2), pctChg float(11,2),peTTM float(11,2),psTTM float(11,2),pcfNcfTTM float(11,2)," \ "pbMRQ float(11,2),isST INT UNSIGNED) ENGINE=innodb DEFAULT CHARSET=utf8" # print(bs_code.replace('.','_') +"_daily") cur.execute(sql) rs = bs.query_history_k_data_plus( bs_code, "date,code,open,high,low,close,preclose,volume,amount," "turn,tradestatus,pctChg,peTTM,psTTM,pcfNcfTTM,pbMRQ,isST", start_date='2010-01-01', end_date='2020-11-15', frequency="d", adjustflag="1") data_list = [] while (rs.error_code == '0') & rs.next(): # 获取一条记录,将记录合并在一起 data_list.append(rs.get_row_data()) result = pd.DataFrame(data_list, columns=rs.fields) date = result['date'] for index in range(0, len(date)): new_date = str(date[index]).replace("-", "") date[index] = new_date if (result is None) or (result.empty): print(bs_code + "返回数据是空的,不导入数据") else: print(bs_code + "返回数据不为空,导入数据") result.to_sql(str(bs_code.replace('.', '_')) + '_daily', engine, if_exists='replace')
for ts_date in ts_date_series: date = str(ts_date) date_series.append(date[:4] + '-' + date[4:6] + '-' + date[6:]) return date_series if download_stocks: rs = bs.query_hs300_stocks() # 获取股票名称与代码 result = data_load(rs) result.to_csv(base_data_path+'hs300_stocks.csv') for code in tqdm(result['code']): for start_date, end_date, path in ((data_start_date, data_end_date, data_path), (train_data_start_date, train_data_end_date, train_data_path)): rs = bs.query_history_k_data_plus(code, fields, start_date=start_date, end_date=end_date, frequency="d", adjustflag="3") status(rs) # baostock日线数据 df1 = data_load(rs) df1.set_index('date', inplace=True) # tushare指标数据 df2 = pro.daily_basic(ts_code=ts_c(code), start_date=ts_d(start_date), end_date=ts_d(end_date), fields=ts_basic_fields) df2['trade_date'] = bs_d(df2['trade_date']) df2.set_index('trade_date', inplace=True) # 按index横向拼接 df1 = df1.join(df2)
def downBarBySymbol(self, symbol, start_date=None, end_date=None, freq='d'): """下载某一合约的分钟线数据""" start = time.time() # 登陆系统 lg = bs.login() # 显示登陆返回信息 print('login respond error_code:' + lg.error_code) print('login respond error_msg:' + lg.error_msg) db_freq = freq req_colume = "date,code,open,high,low,close,volume,amount,adjustflag" if (freq[0] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']): if (freq[-1] not in ['N']): db_freq = freq + 'MIN' else: freq = freq[0:-3] req_colume = "date,time,code,open,high,low,close,volume,amount,adjustflag" if (freq.startswith('d')): db_freq = 'D' self.dbop.dbEnsureIndex(DB_NAME_DICT[db_freq], symbol, 'datetime') # 时间字符串 if isinstance(start_date, datetime): start_date = start_date.strftime('%Y-%m-%d') if isinstance(end_date, datetime): end_date = end_date.strftime('%Y-%m-%d') rs = bs.query_history_k_data_plus(symbol, req_colume, start_date, end_date, frequency=freq, adjustflag="2") data_list = [] while (rs.error_code == '0') & rs.next(): # 获取一条记录,将记录合并在一起 data_list.append(rs.get_row_data()) df = pd.DataFrame(data_list, columns=rs.fields) for ix, row in df.iterrows(): bar = self.generateVtBar(row) d = bar.__dict__ flt = {'datetime': bar.datetime} self.dbop.dbUpdate(DB_NAME_DICT[db_freq], symbol, d, flt, upsert=True) end = time.time() cost = (end - start) * 1000 print(u'合约%s 周期%s数据下载完成%s - %s,耗时%s毫秒' % (symbol, freq, df.index[0], df.index[-1], cost))
data_dir = tmp_data_dict.get("baostock") lg = bs.login(user_id="anonymous", password="******") stock_index = sys.argv[1] #stock_index = "601398" #stock_index = "601398" if stock_index[0:2] == "60": stock_index_in = "sh." + stock_index else: stock_index_in = "sz." + stock_index df2 = bs.query_history_k_data_plus( stock_index_in, "date,code,open,high,low,close,volume,turn,amount,tradestatus,pctChg,peTTM,pbMRQ,psTTM,pcfNcfTTM,isST", start_date='2006-10-27', end_date='2020-08-11', frequency="d", adjustflag="1") save_file = os.path.join(data_dir, stock_index + ".csv") df3 = df2.get_data() df3.to_csv(save_file, index=0) ''' 参数名称 参数描述 date 交易所行情日期 code 证券代码 open 开盘价 high 最高价 low 最低价 close 收盘价 preclose 昨日收盘价
def test_1(): #### 登陆系统 #### lg = bs.login() # 显示登陆返回信息 # print('login respond error_code:' + lg.error_code) # print('login respond error_msg:' + lg.error_msg) #### 获取沪深A股历史K线数据 #### # 详细指标参数,参见“历史行情指标参数”章节;“分钟线”参数与“日线”参数不同。 # 分钟线指标:date,time,code,open,high,low,close,volume,amount,adjustflag rs = bs.query_history_k_data_plus( "sz.000001", "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,isST", start_date='2018-04-01', end_date='2020-04-18', frequency="d", adjustflag="3") # print('query_history_k_data_plus respond error_code:' + rs.error_code) # print('query_history_k_data_plus respond error_msg:' + rs.error_msg) #### 打印结果集 #### data_list = [] while (rs.error_code == '0') & rs.next(): # 获取一条记录,将记录合并在一起 data_list.append(rs.get_row_data()) result = pd.DataFrame(data_list, columns=rs.fields) #### 结果集输出到csv文件 #### # result.to_csv("D:\\history_A_stock_k_data.csv", index=False) # print(result) #### 登出系统 #### bs.logout() ohlc = result.rename( columns={ 'date': 'Date', 'open': 'O', 'high': 'H', 'low': 'L', 'close': 'C', #'Adj Close': 'AC', 'volume': 'V' }) ohlc['Date'] = pd.to_datetime(ohlc['Date']) ohlc.set_index("Date", inplace=True) ohlc["O"] = ohlc["O"].astype("float") ohlc["H"] = ohlc["H"].astype("float") ohlc["L"] = ohlc["L"].astype("float") ohlc["C"] = ohlc["C"].astype("float") ohlc["V"] = ohlc["V"].astype("int") ohlc.info() # ohlc = ohlc.set_index('Date', drop=False) # ohlc = pybacktest.load_from_yahoo('GOOG') ohlc.tail() short_ma = 10 long_ma = 20 ms = ohlc.C.rolling(short_ma).mean() ml = ohlc.C.rolling(long_ma).mean() buy = cover = (ms > ml) & (ms.shift() < ml.shift()) # ma cross up sell = short = (ms < ml) & (ms.shift() > ml.shift()) # ma cross down print('> Short MA\n%s\n' % ms.tail()) print('> Long MA\n%s\n' % ml.tail()) print('> Buy/Cover signals\n%s\n' % buy.tail()) print('> Short/Sell signals\n%s\n' % sell.tail()) bt = pybacktest.Backtest(locals(), 'ma_cross') print(list(filter(lambda x: not x.startswith('_'), dir(bt)))) print('\n> bt.signals\n%s' % bt.signals.tail()) print('\n> bt.trades\n%s' % bt.trades.tail()) print('\n> bt.positions\n%s' % bt.positions.tail()) print('\n> bt.equity\n%s' % bt.equity.tail()) print('\n> bt.trade_price\n%s' % bt.trade_price.tail()) bt.summary() import matplotlib import matplotlib.pyplot as plt matplotlib.rcParams['figure.figsize'] = (15.0, 8.0) bt.plot_equity() bt.plot_trades() ohlc.C.rolling(short_ma).mean().plot(c='green') ohlc.C.rolling(long_ma).mean().plot(c='blue') plt.legend(loc='upper left') # bt.trdplot['2018':'2020'] # ohlc.C['2018':'2020'].rolling(short_ma).mean().plot(c='green') # ohlc.C['2018':'2020'].rolling(long_ma).mean().plot(c='blue') # plt.show()
while (cd.error_code == '0') & cd.next(): # 获取一条记录,将记录合并在一起 cd_list.append(cd.get_row_data()) result1 = pd.DataFrame(cd_list, columns=cd.fields) print(result1) # 结果切片 result1.drop([i for i in range(0,4700)],inplace=True) # dataframe columns to list rlist = result1.code.values.tolist() # 求数组长度 rnum = len(rlist) data_list = [] for r in rlist: rs = bs.query_history_k_data_plus(r, "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,peTTM,pbMRQ,psTTM,pcfNcfTTM,isST", start_date='2021-03-04', end_date='2021-03-05', frequency="d", adjustflag="3") #frequency="d"取日k线,adjustflag="3"默认不复权 print('query_history_k_data_plus respond error_code:{}, error_msg:{}'.format(rs.error_code, rs.error_msg)) while (cd.error_code == '0') & cd.next(): # 获取一条记录,将记录合并在一起 cd_list.append(cd.get_row_data()) result1 = pd.DataFrame(cd_list, columns=cd.fields) print(result1) result1.drop([i for i in range(0,4700)],inplace=True) print(result1) #result1 = result1.drop(’code_name‘,axis=1) del result1['code_name'] print(result1) #### 转换数据型 #### result1 = result1['tradeStatus'].astype(float)
def query_history_k_data_plus( taskEnum, frequency, adjustflag, load_data_func, load_data_func_params: dict = None, ): """ 按照任务表获取历史A股K线数据 """ fields = _get_fields(frequency) #### 登陆系统 #### lg = bs.login() with concurrent.futures.ThreadPoolExecutor() as executor: with session_scope() as sm: rp = sm.query(TaskTable).filter(TaskTable.task == taskEnum.value, TaskTable.finished == False).all() for task in tqdm(rp): if task.finished: continue start_date = task.begin_date.strftime("%Y-%m-%d") end_date = task.end_date.strftime("%Y-%m-%d") max_try = 8 # 失败重连的最大次数 for i in range(max_try): rs = bs.query_history_k_data_plus(task.bs_code, fields, start_date=start_date, end_date=end_date, frequency=frequency, adjustflag=adjustflag) if rs.error_code == '0': data_list = [] while (rs.error_code == '0') & rs.next(): # 获取一条记录,将记录合并在一起 data_list.append(rs.get_row_data()) #_logger.info('{}下载成功,数据{}条'.format(task.ts_code, len(data_list))) result = pd.DataFrame(data_list, columns=rs.fields) if load_data_func_params: params = copy.deepcopy(load_data_func_params) else: params = {} params['result'] = result params['bs_code'] = task.bs_code params['frequency'] = frequency params['adjustflag'] = adjustflag executor.submit(load_data_func, params) task.finished = True break elif i < (max_try - 1): time.sleep(2) continue else: _logger.error( '获取历史A股K线数据失败/query_history_k_data_plus respond error_code:' + rs.error_code) _logger.error( '获取历史A股K线数据失败/query_history_k_data_plus respond error_msg:' + rs.error_msg) sm.commit() #### 登出系统 #### bs.logout()