def get_benchmark_data(benchmark, start_date, end_data): if Settings.data_source == DataSource.DXDataCenter: benchmark_data = api.GetIndexBarEOD(instrumentIDList=benchmark, startDate=start_date, endDate=end_data, field=['closePrice']) elif Settings.data_source == DataSource.DataYes: import os import tushare as ts try: ts.set_token(os.environ['DATAYES_TOKEN']) except KeyError: raise mt = ts.Market() benchmark_data = mt.MktIdxd(benchmark, beginDate=start_date.replace('-', ''), endDate=end_data.replace('-', ''), field='closeIndex,tradeDate') benchmark_data = benchmark_data.set_index('tradeDate') benchmark_data = benchmark_data.rename(columns={'closeIndex': 'closePrice'}) benchmark_data.index = pd.to_datetime(benchmark_data.index, format="%Y-%m-%d") return benchmark_data
def import_datayes_daily_data(start_date, end_date, cont_list = [], is_replace = False): numdays = (end_date - start_date).days + 1 date_list = [start_date + datetime.timedelta(days=x) for x in range(0, numdays) ] date_list = [ d for d in date_list if (d.weekday()< 5) and (d not in misc.CHN_Holidays)] for d in date_list: cnt = 0 dstring = d.strftime('%Y%m%d') ts.set_token(misc.datayes_token) mkt = ts.Market() df = mkt.MktFutd(tradeDate = dstring) if len(df.ticker) == 0: continue for cont in df.ticker: if (len(cont_list) > 0) and (cont not in cont_list): continue data = df[df.ticker==cont] if len(data) == 0: print 'no data for %s for %s' % (cont, dstring) else: data_dict = {} data_dict['date'] = d data_dict['open'] = float(data.openPrice) data_dict['close'] = float(data.closePrice) data_dict['high'] = float(data.highestPrice) data_dict['low'] = float(data.lowestPrice) data_dict['volume'] = int(data.turnoverVol) data_dict['openInterest'] = int(data.openInt) if data_dict['volume'] > 0: cnt += 1 db.insert_daily_data(cont, data_dict, is_replace = is_replace, dbtable = 'fut_daily') print 'date=%s, insert count = %s' % (d, cnt)
def run(): ts.set_token(ct.DATA_YES_TOKEN) st = ts.Market() today = datetime.strftime(datetime.today(),"%Y%m%d") stock_list = st.MktEqud(tradeDate="20160513",field="ticker,PE,secShortName") if not isinstance(stock_list,pd.DataFrame) or stock_list.empty: return stock_list['ticker'] = stock_list['ticker'].map(lambda x: str(x).zfill(6)) result = [] mongo = Mongo() db = mongo.getDB() for i in stock_list.index: code = stock_list.loc[i,'ticker'] pe = stock_list.loc[i,'PE'] name = stock_list.loc[i,'secShortName'] if np.isnan(pe): continue cursor = db.year_min_value.find({"ticker":code}) if cursor.count() <= 0: continue pe_list = [] for row in cursor: pe_list.append(row['pe']) min_pe = min(pe_list) rate = (pe - min_pe)/min_pe result.append({"code":code,"name":name,"pe":pe,"min_pe":min_pe,"rate":rate}) df = pd.DataFrame(result) if db.lowest_pe_stock.find().count() > 0: db.lowest_pe_stock.remove() db.lowest_pe_stock.insert(json.loads(df.to_json(orient='records')))
def load_Internet_data(self): mt = ts.Market() data = None for sid in self.ashare_stocks.get_all_sids(): df = mt.MktAdjf(ticker=sid, field=','.join(FETCH_FIELDS)) if df is not None and not df.empty: df[FETCH_FIELDS[0]] = df[FETCH_FIELDS[0]].astype('string') # df.astype('string') df[FETCH_FIELDS[0]] = sid if data is None or data.empty: if df is not None and not df.empty: data = df elif df is not None and not df.empty: data = data.append(df, ignore_index=True) data.rename(columns={ FETCH_FIELDS[0]: TICKER_FIELD, FETCH_FIELDS[1]: DIV_DATE_FIELD, FETCH_FIELDS[2]: PER_CASH_FIELD, FETCH_FIELDS[3]: PER_SHARE_DIV_FIELD, FETCH_FIELDS[4]: PER_SHARE_TRANS_FIELD, FETCH_FIELDS[5]: ALLOTMENT_RATIO_FIELD, FETCH_FIELDS[6]: ALLOTMENT_PRICE_FIELD, FETCH_FIELDS[7]: ADJ_FACTOR_FIELD, FETCH_FIELDS[8]: ACCUM_ADJ_FACTOR_FIELD }, inplace=True) return data
def get_equity_eod(instruments, start_date, end_date): if Settings.data_source == DataSource.DXDataCenter: data = api.GetEquityBarEOD(instrumentIDList=instruments, startDate=start_date, endDate=end_date, field='closePrice', instrumentIDasCol=True, baseDate='end') elif Settings.data_source == DataSource.DataYes: import os import tushare as ts try: ts.set_token(os.environ['DATAYES_TOKEN']) except KeyError: raise mt = ts.Market() res = [] for ins in instruments: data = mt.MktEqud(ticker=ins, beginDate=start_date.replace('-', ''), endDate=end_date.replace('-', ''), field='tradeDate,ticker,closePrice') res.append(data) data = pd.concat(res) data['tradeDate'] = pd.to_datetime(data['tradeDate'], format='%Y-%m-%d') data['ticker'] = data['ticker'].apply(lambda x: '{0:06d}'.format(x)) data.set_index(['tradeDate', 'ticker'], inplace=True, verify_integrity=True) data = data.unstack(level=-1) return data
def getNews(self): token = '60517739976b768e07823056c6f9cb0fee33ed55a1709b3eaa14a76c6a1b7a56' ts.set_token(token) print(ts.get_token()) mkt = ts.Market() df = mkt.TickRTSnapshot(securityID='000001.XSHE') print(df)
def quanshan(): #Failed ts.set_token('de0596189f600d1dc59c509e5b6a1387e4e29cb6225697a25ef9d5d2a425d854') ts.get_token() mt = ts.Market() print(mt) df = mt.TickRTSnapshot(securityID='000001.XSHE') print(df)
def get_hs300(): st = ts.Market() hs300_df = st.MktIdxd(indexID='000300.ZICN', field=u"tradeDate,closeIndex") hs300_df.columns = ['nav_date', 'close_price'] db_config = setting.DATABASES['fund_stat'] conn, cr = create_db_connection(db_config['HOST'], db_config['DB'], db_config['PORT'], db_config['USER'], db_config['PASSWD']) pd.io.sql.to_sql(hs300_df, "hs300_info", conn, flavor='mysql', if_exists='append', index=False) db_close(conn, cr)
def __init__(self, **kwargs): super(DataYesMarketDataHandler, self).__init__(kwargs['logger'], kwargs['symbolList']) if kwargs['token']: ts.set_token(kwargs['token']) else: try: token = os.environ['DATAYES_TOKEN'] ts.set_token(token) except KeyError: raise ValueError("Please input token or set up DATAYES_TOKEN in the envirement.") self.idx = ts.Idx() self.mt = ts.Market() self.startDate = kwargs['startDate'].strftime("%Y%m%d") self.endDate = kwargs['endDate'].strftime("%Y%m%d") self._getDatas() if kwargs['benchmark']: self._getBenchmarkData(kwargs['benchmark'], self.startDate, self.endDate)
def run(): '''get qoute data ''' #set log LOGGER_NAME = "HISTORY_DATA" mylog = logger.getLogger(LOGGER_NAME) #get the stock list today = datetime.strftime(datetime.today(),"%Y%m%d") mongo = Mongo() db = mongo.getDB() cursor = db.stock_list.find({"listStatusCD":"L"}) for row in cursor: ticker = str(row['ticker']) mylog.info("update history data of %s"%(ticker)) exchangeCD = str(row['exchangeCD']) listDate = str(row['listDate']).replace("-", "").replace("NaN", "") if exchangeCD == 'XSHG' and not ticker.startswith("6"): continue #get hist data cursor2 = db.cn_stock_hist.find({"ticker":ticker}).sort("tradeDate",pymongo.DESCENDING).limit(1) st = ts.Market() if cursor2.count() > 0: start_date = str(cursor2[0]['tradeDate']).replace("-", "") df = st.MktEqud(ticker=ticker,beginDate=start_date, endDate=today, field="") if df is not None and not df.empty: if df['accumAdjFactor'][0] == 1: mylog.info("update new data of %s from %s"%(ticker,start_date)) df = df[1:] else: mylog.info("factor change remove data of %s"%(ticker)) db.cn_stock_hist.remove({"ticker":ticker}) df = st.MktEqud(ticker=ticker,beginDate=listDate, endDate=today, field="") else: mylog.info("fisrt insert data of %s"%(ticker)) df = st.MktEqud(ticker=ticker,beginDate=listDate, endDate=today, field="") if df is not None and not df.empty: mylog.info("processing insert of %s"%(ticker)) df['ticker'] = df['ticker'].map(lambda x: str(x).zfill(6)) db.cn_stock_hist.insert(json.loads(df.to_json(orient='records'))) time.sleep(1)
def getVolumeRatio(startDate, endDate, path, n=30): stocklist = getStkcdList(path) tradingDays = pd.Series( w.tdays(startDate, endDate, "").Data[0]).map(lambda x: x.strftime("%Y-%m-%d")).tolist() volume_ratio_df = pd.DataFrame(columns=stocklist, index=tradingDays) t1 = time.time() wrong_list = [] for stk in stocklist: try: print(stk) data_stk = pd.read_csv(path + stk + ".csv", encoding='GB2312') # 取前n分钟数据 data_stk = data_stk.sort_values(by=['dataDate', 'barTime'], ascending=[True, True]) data_stk_n = data_stk.groupby(['dataDate']).head(n) data_stk_n_avg = pd.DataFrame( data_stk_n.groupby(['dataDate' ])['totalVolume'].mean()) #使用成交量,并考虑复权因子。 #考虑复权因子 ts.set_token( 'b974d4912cd4b2cf9637a940100ae5b872576fcce85abd3db5e4c8173b130c47' ) st = ts.Market() df_adjfactor = st.MktEqud(secID=stk, beginDate=startDate, endDate=endDate, field="tradeDate,accumAdjFactor") data_stk_n_avg_adj = pd.merge(data_stk_n_avg, df_adjfactor, left_index=True, right_on="tradeDate") data_stk_n_avg_adj["adjtotalVolume"] = data_stk_n_avg_adj[ "totalVolume"] / data_stk_n_avg_adj["accumAdjFactor"] data_stk_n_avg_adj.index = data_stk_n_avg_adj['tradeDate'] volume_ratio_df[stk] = data_stk_n_avg_adj['adjtotalVolume'] except: wrong_list.append(stk) t2 = time.time() print(t2 - t1) return volume_ratio_df, wrong_list
def collect(self): begin_date = self.__get_begin_date() end_date = Util.get_today() _logger.info( 'collect stock(%s) history data, begin date: %r, end date: %r.' % (self.__stock_code, begin_date, end_date)) market = ts.Market() if begin_date == end_date: return elif not begin_date or len(begin_date) == 0: result = market.MktEqud(ticker=self.__stock_code, field=self.FIELDS) else: result = market.MktEqud(ticker=self.__stock_code, beginDate=begin_date, endDate=end_date, field=self.FIELDS) if result is None: _logger.warn('could get stock(%r) history data from tushare.' % self.__stock_code) return if begin_date: self.__get_history_close_price(begin_date) for i in range(len(result)): record = result.iloc[i].to_dict() if record['isOpen'] == 1: fq_factor = record['accumAdjFactor'] record['fqPrice'] = record['closePrice'] * fq_factor self.__hist_close_price.append(record['fqPrice']) record['ma5'] = self.__get_ma5_price() record['ma10'] = self.__get_ma10_price() record['ma20'] = self.__get_ma20_price() self.__collection.insert_and_update('date', record['tradeDate'], **record)
def downDBFunc(codeid, startdate, enddate): try: tstool.set_token( 'c8697bdda449438ececb003f8ec3ce15ab785d49d825b07d84330f33c2a614cf') st = tstool.Market() # cid = codeid # if len(codeid) > 6: # cid = codeid[2:] print 'get id =', codeid df2 = st.MktEqudAdj(beginDate=startdate, endDate=enddate, ticker=codeid) savename = dbDir + os.sep + codeid + '.csv' df2.to_csv(savename) print 'save file:%s' % (savename) return True except TimeoutError: print 'time out' return False
def fetch_cnstock_hist_to_mongo(): '''get qoute data ''' #set log LOGGER_NAME = "TONGLIAN_DATA" mylog = logger.getLogger(LOGGER_NAME) #get the stock list today = datetime.strftime(datetime.today(), "%Y%m%d") mongo = Mongo() db = mongo.getDB() cursor = db.stock_list.find({ "exchangeCD": { "$in": ["XSHE", "XSHG"] }, "listStatusCD": "L" }) for row in cursor: ticker = str(row['ticker']) mylog.info("update history data of %s" % (ticker)) exchangeCD = str(row['exchangeCD']) listDate = str(row['listDate']).replace("-", "").replace("NaN", "") if exchangeCD == 'XSHG' and not ticker.startswith("6"): continue #get hist data cursor2 = db.cn_stock_hist.find({"ticker": ticker}) if cursor2.count() > 0: continue mylog.info("insert data of %s" % (ticker)) st = ts.Market() df = st.MktEqud(ticker=ticker, beginDate=listDate, endDate=today, field="") df['ticker'] = df['ticker'].map(lambda x: str(x).zfill(6)) db.cn_stock_hist.insert(json.loads(df.to_json(orient='records'))) time.sleep(1)
def export_ratio_table(code, start, end, thread_id): # queue_lock.acquire() ts.set_token(cfg.get_datayes_key()) mkt = ts.Market() # print("exporting " + code + " from " + start + " to " + end) st = time.time() df = mkt.MktEqud(ticker=code, beginDate=start, endDate=end, field='ticker,tradeDate,preClosePrice,openPrice,highestPrice,lowestPrice,closePrice') print(" Thread {0} fetch online: {1}".format(thread_id, time.time()-st)) # queue_lock.release() # df = ts.get_h_data(code, start, end) # print(df) wave_ratio_df = pd.DataFrame(columns=["max_ratio", "min_ratio"]) for i, row in df.iterrows(): dict = wv.calc_wave_ratio(row["preClosePrice"], row["openPrice"], row["highestPrice"], row["lowestPrice"]) wave_ratio_df.loc[row["tradeDate"]] = dict st = time.time() idx_col = wv.calc_ratio_table_index_and_columns(max_ratio=0.03, min_ratio=-0.03) index, columns = idx_col["index"], idx_col["columns"] ratio_table = wv.calc_ratio_table(wave_ratio_df, index, columns) print(" Thread {0} calc ratio table: {1}".format(thread_id, time.time()-st)) st = time.time() length_ratio_df = wv.calc_length_ratio(ratio_table, len(wave_ratio_df.index)) print(" Thread {0} calc length ratio: {1}".format(thread_id, time.time()-st)) # write csv st = time.time() ratio_table.to_csv(cfg.get_ratio_table_path(code, start, end)) length_ratio_df.to_csv(cfg.get_length_ratio_path(code, start, end)) # print(" save csv: {0}".format(time.time()-st)) return length_ratio_df
def load_Internet_data(self): mt = ts.Market() data = None for sid in self.ashare_stocks.get_all_sids(): df = mt.MktEqud(ticker=sid, field=','.join(FETCH_FIELDS)) if df is not None and not df.empty: df[FETCH_FIELDS[0]] = df[FETCH_FIELDS[0]].astype('string') df[FETCH_FIELDS[0]] = sid if data is None or data.empty: if df is not None and not df.empty: data = df elif df is not None and not df.empty: data = data.append(df, ignore_index=True) print 'sid: ' + str(sid) + ' completed, size ' + str(len(data)) data.rename(columns={ FETCH_FIELDS[0]: TICKER_FIELD, FETCH_FIELDS[1]: TRADE_DATE_FIELD, FETCH_FIELDS[2]: PRE_CLOSE_PRICE_FIELD, FETCH_FIELDS[3]: OPEN_PRICE_FIELD, FETCH_FIELDS[4]: HIGHEST_PRICE_FIELD, FETCH_FIELDS[5]: LOWEST_PRICE_FIELD, FETCH_FIELDS[6]: CLOSE_PRICE_FIELD, FETCH_FIELDS[7]: TURNOVER_VOL_FIELD, FETCH_FIELDS[8]: TURNOVER_VALUE_FIELD, FETCH_FIELDS[9]: DEAL_AMOUNT_FIELD, FETCH_FIELDS[10]: TURNOVER_RATE_FIELD, FETCH_FIELDS[11]: NEG_MARKET_VALUE_FIELD, FETCH_FIELDS[12]: MARKET_VALUE_FIELD, FETCH_FIELDS[13]: IS_OPEN_FIELD, FETCH_FIELDS[14]: PE_FIELD, FETCH_FIELDS[15]: SUPPOSED_PE_FIELD, FETCH_FIELDS[16]: SUPPOSED_PB_FIELD }, inplace=True) return data
'1075d9a4eb51461266520905807f7d68806f68511ad0c90d938dc81af9ea6dee') #get all stock code, name ticker,secShortName,exchangeCD,listDate b01 = ts.Master().SecID(assetClass='E', field='ticker,exchangeCD') b01 = b01[((b01['exchangeCD'] == 'XSHE') | (b01['exchangeCD'] == 'XSHG')) & (b01['ticker'].str.len() == 6)].rename(columns={u'ticker': 'code'}) b02 = ts.get_stock_basics().loc[:, ['name', 'outstanding', 'timeToMarket']] b02['code'] = b02.index.values b0 = pd.merge(b02, b01, how='left', on='code') b1 = list(b0.code) #get Adj factor adj0 = pd.DataFrame() for i in list(range(len(b1)))[::451]: adj1 = ts.Market().MktAdjf(ticker=','.join(b1[i:min(i + 450, len(b1))]), field='ticker,exDivDate,adjFactor') if adj0.empty: adj0 = adj1 else: adj0 = pd.concat([adj0, adj1]) adj0 = adj0.rename(columns={u'ticker': 'code', u'exDivDate': 'date'}) adj0['code'] = adj0['code'].map(lambda x: str(x).zfill(6)) #if csv file not exist, then get all history data if not [ x for x in os.listdir('./rawdata') if os.path.splitext(x)[1] == '.csv' ]: h0 = pd.DataFrame() for code in b1: h1 = ts.get_hist_data(code)
import pandas as pd import math import os from datetime import datetime from pandas.stats.api import ols from pandas.tools.plotting import scatter_matrix #from __future__ import print_function import statsmodels.api as sm import matplotlib.pyplot as plt import matplotlib from statsmodels.sandbox.regression.predstd import wls_prediction_std from scipy.integrate import quad from sympy import * #ts.set_token('a76443ee238b9549846083d5535bae10b278fa59daddba4bb6dbae6b9f2b6d1a') mkt = ts.Market() print(start_date) # In[13]: CACHE_FOLDER = "./cache/" def CacheConstructor(function): def CachedFunction(*args, **kargs): cached = True file_name = CACHE_FOLDER + function.__module__ + '.' + function.__name__ + '().hdf5' with pd.HDFStore(file_name, format='table') as storage: #Build key and check for alpha_numerics: karg_list = []
# 市场行情数据 db_MktEqud_coll = DB_GETRICH.MktEqud ## 沪深股票日行情 db_MktFutd_coll = DB_GETRICH.MktFutd ## 期货日行情(主力以持仓量计算) db_MktIdxd_coll = DB_GETRICH.MktIdxd ## 指数日行情 db_MktBlockd_coll = DB_GETRICH.MktBlockd ## 沪深大宗交易 db_MktRepod_coll = DB_GETRICH.MktRepod ## 债券回购交易日行情 db_MktBondd_coll = DB_GETRICH.MktBondd ## 债券日行情 db_MktHKEqud_coll = DB_GETRICH.MktHKEqud ## 港股日行情 db_TickRTSnapshot_coll = DB_GETRICH.TickRTSnapshot ## 获取最新市场信息快照 db_TickRTSnapshotIndex_coll = DB_GETRICH.TickRTSnapshotIndex ## 获取指数成分股的最新市场信息快照 db_FutureTickRTSnapshot_coll = DB_GETRICH.FutureTickRTSnapshot ## 获取期货最新市场信息快照 # 获取历史某一日股票行情数据,包括了停牌股票(停牌的报价都是0) tlMarket = ts.Market() print("Get market data") calendarDate = db_TradeCal_coll.find() for i in calendarDate: print(i.replace(lambda x:x.)) #for date in db_TradeCal_coll.find({"calendarDate"}): for date in ['20160316']: df = tlMarket.MktEqud(tradeDate=date) js_df = df[1:].to_json(orient="records") db_json = json.loads(js_df) db_MktEqud_coll.insert(db_json) df = tlMarket.MktIdxd(tradeDate=date) js_df = df[1:].to_json(orient="records") db_json = json.loads(js_df)
path = 'D:\\Box Sync\\DATAbase\\Finance-Data\\' # get stock basic information eq = ts.Equity() stock_basic = eq.Equ( equTypeCD='A', listStatusCD='L', field='secID,ticker,secShortName,totalShares,nonrestFloatShares,TShEquity') stock_small_list = stock_basic.loc[stock_basic['secID'].str.contains('002'), ] # get stock industy eq = ts.Equity() stock_industry = eq.EquIndustry(industryVersionCD='010301') mk = ts.Market() stock_all_history = mk.MktEqud(secID=','.join( stock_small_list['secID'].values[0:5]), beginDate='20150101', endDate='20170905') stock_all_history.to_csv(path + "5_15-17.csv", header=True, encoding='utf-8', sep='\t') # financial report info 要分银行 证券 和一般工商业 bd = ts.Fundamental() df_BSIndu_temp = bd.FdmtBSIndu( ticker='000002', field= 'reportType,secID,ticker,endDate,cashCEquiv,tradingFA,NotesReceiv,AR,prepayment,inventories,TCA,LTReceive,LTEquityInvest,investRealEstate,fixedAssets,RD,TNCA,TAssets,STBorr,tradingFL,NotesPayable,AP,advanceReceipts,TCL,LTBorr,TNCL,TLiab,paidInCapital,retainedEarnings,TShEquity'
# -*- coding: utf-8 -*- ########################################################################### ##dzhdaydata.py 大智慧上深股日线数据处理 ########################################################################### import tushare as ts st = ts.Market() from sqlalchemy import create_engine import os import struct import datetime import pandas as pd import time nowtime = time.strftime('%Y-%m-%d %H:%M:%S') engine = create_engine('mysql://*****:*****@127.0.0.1/db_dzh?charset=utf8') import MySQLdb db = MySQLdb.connect("127.0.0.1", "root", "root", "db_dzh", charset='utf8') cursor = db.cursor() #获取目录下所有文件列表名,形如['000001.day', '000002.day'] def get_recursive_file_list(path): current_files = os.listdir(path) all_files = [] for file_name in current_files: #full_file_name = os.path.join(path, file_name)#全路径名 full_file_name = file_name all_files.append(full_file_name) if os.path.isdir(full_file_name): next_level_files = get_recursive_file_list(full_file_name)