Beispiel #1
0
def DownloadData(local, st, q):
	'''
	code is stockcode in string type
	st is time to market in string 'YYYY-MM-DD' type
	'''
	t = local.split('/')
	kind = t[1]
	code = t[2]

	if kind == 'cq':
		kind = None
	try:
		if len(st) > 8:
			df = ts.get_h_data(code, start=st, autype=kind)
		else:
			df = ts.get_h_data(code, autype=kind)

		if len(df) > 0:
			df = df.sort_index()
			df = df.sort_index(axis=1)
			filename = q.get()
			try:
				df.to_hdf(filename, local)
			except:
				store = pd.HDFStore(filename, mode='a')
				store[local] = df
				store.flush()
				store.close()

			q.put(filename)
			return [True, code]
	except:
		return [False, code]
Beispiel #2
0
def plotstock(name,realname=u'代码'):
    sdatestring=getstartdate()
    quotes = ts.get_h_data(name,start=sdatestring)
    quotes.index = quotes.index.astype('datetime64[D]')
    sh = ts.get_h_data('sh',start=sdatestring)
    sh.index = sh.index.astype('datetime64[D]')
    quotes.sort_index(inplace=True)
    sh.sort_index(inplace=True)
    opens = quotes["close"]
    opensh=sh["close"]
    means=sum(opens)/len(opens)
    meansh=sum(opensh)/len(opensh)
    scale=meansh/means
    plt.figure()
    plt.subplot(211)
    quotes["close"].plot()
    sh["close"]=sh["close"]/scale
    sh["close"].plot()
    plt.legend([realname+':'+name,u'上证综指'])
    plt.subplot(212)
    qs=quotes["close"][-10:]
    qs.plot()
    shs=sh["close"][-10:]
    means=sum(qs)/len(qs)
    meansh=sum(shs)/len(shs)
    shs=shs/meansh*means
    shs.plot()
    plt.show()
Beispiel #3
0
def load_data_from_tushare(stock_number, start_date, end_date):

    #stock_number = stock_number[2:]
    try:
        print "stock number for tushare {}".format(stock_number)
        raw_data = ts.get_h_data(stock_number, start = start_date, end = end_date, autype=None)
        raw_data = raw_data.sort()
        open_price = raw_data["open"]
        high_price = raw_data["high"]
        low_price = raw_data["low"]
        close_price = raw_data["close"]
        vol = raw_data["volume"]
        amount = raw_data["amount"]
        
        fuquan_data = ts.get_h_data(stock_number, start = start_date, end = end_date)
        fuquan_data = fuquan_data.sort()
        fuquan_close_price = fuquan_data["close"]
        fuquan_open_price = fuquan_data["open"]
        data = {"open price":open_price, "high price":high_price, "low price":low_price, "close price":close_price, "vol":vol, "amount": amount, "fuquan close price": fuquan_close_price, "fuquan open price": fuquan_open_price}
        dfdata = pd.DataFrame(data)
        return dfdata 
    except:
        data = {"open price":[], "high price":[], "low price":[], "close price":[], "vol":[], "amount": []}
        dfdata = pd.DataFrame(data)
        return dfdata
Beispiel #4
0
def initData(code,Index=False):
    """
    code 证券代码,LD  最后交易日,Index   指数 --> ds 数据集,LASTDATE 最后交易日
    检查最新数据,如果mysql DB没有,就下载,并写库。
    如果不是没有最新交易日数据(例如:没数据或停牌),返回None
    """
    b2.log('获取%s数据..........'%code)
    print('获取%s数据..........'%code)    
    if Index :      
        ds=ts.get_h_data(code,index=Index)
        #LASTDATE=ds.index[0].strftime('%Y-%m-%d')
        ds.to_sql('zs'+code,engine,if_exists='replace') 
    else:
        tbstatus=readydata('qfq%s'%code)
        if  tbstatus==1:
            b2.log('qfq%s......Data Ready'%code)
            sqlcmd='select * from qfq%s order by date'%code
            ds=sql.read_sql(sqlcmd,engine,index_col='date')                
        elif tbstatus==0:
            ds=ts.get_h_data(code).sort()
            ds.to_sql('qfq'+code,engine,if_exists='replace') 
            engine.execute('''insert into tb_stamp values ('qfq%s',curdate())'''%code)
            sqlcmd='''select count(*) from qfq%s where date=(select max(date) from zs159915)'''%(code)
            result=engine.execute(sqlcmd)
            if list(result)[0][0]==0:
                ds=None            
        else:
            ds=None
    return ds
 def download_stock_hist_price(self, code, start, end=None):
     print "downloading " + code
     if end is None:
         price = ts.get_h_data(code, start)
     else:
         price = ts.get_h_data(code, start, end)
     return price
def get_adj_price_data(stocks_index, timeToMarket):
	'''
	stocks_index ::  a string of the stock number like '002292'
	timeToMarket ::  a string of the IPO date like '20120229'

	return :: a DataFrame containing the adj. price data
	'''
	year = int( timeToMarket[0:4] )
	month = int( timeToMarket[4:6] )
	date = int( timeToMarket[6:8] )
	d0 = datetime.datetime(year, month, date)
	d1 = d0 + datetime.timedelta(days = 730)    # getting data of 2 years each time
	today = datetime.datetime.today()
	if (today - d1).days > 0:
		final = ts.get_h_data(stocks_index, autype='hfq', start=d0.strftime('%Y-%m-%d'), end=d1.strftime('%Y-%m-%d'))
	else:
		final = ts.get_h_data(stocks_index, autype='hfq', start=d0.strftime('%Y-%m-%d'), end=today.strftime('%Y-%m-%d'))
		return final

	while (today - d1).days > 0:
		d0 = d1 + datetime.timedelta(days = 1)
		d1 = d0 + datetime.timedelta(days = 730)
		if (today - d1).days > 0:
			tmp = ts.get_h_data(stocks_index, autype='hfq', start=d0.strftime('%Y-%m-%d'), end=d1.strftime('%Y-%m-%d'))
			try:
				final = tmp.append(final)
			except:
				pass
		else:
			tmp = ts.get_h_data(stocks_index, autype='hfq', start=d0.strftime('%Y-%m-%d'), end=today.strftime('%Y-%m-%d'))
			try:
				final = tmp.append(final)
			except:
				pass
			return final
Beispiel #7
0
def getdata(code):
	global lock
	dayhistory = ts.get_h_data(code, stock_timetomarket[code])
	if not dayhistory:
		dayhistory = ts.get_h_data(code, stock_timetomarket[code], index=True)
	dayhistory['code'] = code
	dayhistory.index = dayhistory.index.date
	print '\n code %s insert......\n' % code
	dayhistory.to_sql('h_data', engine, if_exists='append')
Beispiel #8
0
	def __init__(self, industry, benchmark, datatype, dic, start,end):
		global ms
		ms = ts.Master()
		self.stock = dic['stock']
		self.interest = dic['interest']
		self.industry = industry
		self.date = pd.date_range(start, end)
		self.days = len(self.date)
		self.beta, self.alpha, self.ir = None, None, None
		first_prev = ms.TradeCal(exchangeCD='XSHG',
			beginDate=start.replace("-",""),
			endDate=start.replace("-",""),
			field='calendarDate,prevTradeDate').prevTradeDate.iloc[0]

		#Build Benchmark
		self.benchmark = ts.get_hist_data(benchmark, first_prev, end)
		self.benchreturn = returns(
			self.benchmark[self.benchmark.index >= start].close,
			self.benchmark.close[:-1])
		self.benchreturn.index = pd.DatetimeIndex([pd.to_datetime(i) for i in self.benchreturn.keys()])
		
		#Datatype Effect
		if datatype == 'cap':
			#import pdb; pdb.set_trace()
			weight = self.stock.totalAssets[self.stock.industry == industry]
			weight = 1.0 * weight / weight.sum() #A series of cap weights
			d = 0
			for i in weight.index:
				a = ts.get_h_data(i, first_prev, end)
				a_close = a.close
				if len(a.index) != len(self.benchmark.index):
					a_close = equal_len(a.close, self.benchmark)
				return_of_i = returns(
					a_close[a_close.index >= start],
					a_close[a_close.index < end]
					) * weight.loc[i] #A series of cap-weighted return of the industry (index is date)
				d = d + return_of_i
		elif datatype == 'beta':
			index_list = self.stock.index[self.stock.industry == industry]
			total_beta, d = 0, 0
			for i in index_list:
				a = ts.get_h_data(i, first_prev, end)
				return_of_i = returns(
					a[a.index >= start].close,
					a.close[:-1])
				beta_i = 1.0 / beta(return_of_i, self.benchreturn)
				d += return_of_i * beta_i
				total_beta += beta_is
			d = 1.0 * d / total_beta
		else:
			raise KeyError('datatype can only be cap or beta')

		self.returns = d
Beispiel #9
0
def request_dayk(table, code, engine, start_date = '1990-01-01', end_date = '2050-01-01'):
    try:
        dayK_bfq = ts.get_h_data(code, start_date, end_date, None, retry_count=500)
        dayK_hfq = ts.get_h_data(code, start_date, end_date, 'hfq', retry_count=500)
        dayK_bfq['open_hfq'] = dayK_hfq['open']
        dayK_bfq['high_hfq'] = dayK_hfq['high']
        dayK_bfq['low_hfq'] = dayK_hfq['low']
        dayK_bfq['close_hfq'] = dayK_hfq['close']
        dayK_bfq['code'] = code
        dayK_bfq.to_sql(table, engine, if_exists='append', dtype={'date': Date})
        logging.info(str(code) + ', request_dayk success')
    except Exception:
        logging.error(str(code) + ' request_dayk failed on ' + str(threading.currentThread()))
Beispiel #10
0
def DownloadCqAll(code,st):
	'''
	code is stockcode in string type
	st is time to market in string 'YYYY-MM-DD' type
	'''
	if len(st)>2:
		df=ts.get_h_data(code,start=st,autype=None,retry_count=5,pause=1)
		df=df.sort_index(ascending=1)
	else:
		df=ts.get_h_data(code,autype=None,retry_count=5,pause=1)
	#print code+':'+st+' finished!'
	df=df.sort_index(axis=0)
	df=df.sort_index(axis=1)
	return [code,df]
Beispiel #11
0
def load_data_from_tushare_real_time(stock_number, start_date):

    try:
        print "stock number for tushare {}".format(stock_number)
        raw_data = ts.get_h_data(stock_number, start = start_date, autype=None)
        raw_data = raw_data.sort()
        open_price = list(raw_data["open"].values)
        high_price = list(raw_data["high"].values)
        low_price = list(raw_data["low"].values)
        close_price = list(raw_data["close"].values)
        vol = list(raw_data["volume"].values)
        amount = list(raw_data["amount"].values)
        f = lambda x:str(x).split(" ")[0]
        date = map(f,list(raw_data.index))

        fuquan_data = ts.get_h_data(stock_number, start = start_date)
        fuquan_data = fuquan_data.sort()
        fuquan_close_price = list(fuquan_data["close"].values)
        fuquan_open_price = list(fuquan_data["open"].values)

        o, h, l, c, v, a, d = get_sina_data(stock_number)
        open_price.append(o) 
        close_price.append(c) 
        fuquan_close_price.append(c) 
        fuquan_open_price.append(o) 
        high_price.append(h) 
        low_price.append(l)
        vol.append(v)
        amount.append(a)
        date.append(d) 

        ff = lambda x:float(x)
        open_price = map(ff,open_price)
        high_price = map(ff,high_price)
        low_price = map(ff,low_price)
        close_price = map(ff,close_price)
        fuquan_close_price = map(ff,fuquan_close_price)
        fuquan_open_price = map(ff,fuquan_open_price)
        vol = map(ff,vol)
        amount = map(ff,amount)
        

        data = {"open price":open_price, "high price":high_price, "low price":low_price, "close price":close_price, "vol":vol, "amount": amount, "date": date, "fuquan close price": fuquan_close_price, "fuquan open price": fuquan_open_price}
        dfdata = pd.DataFrame(data)
        return dfdata
    except:
        data = {"open price":[], "high price":[], "low price":[], "close price":[], "vol":[], "amount": []}
        dfdata = pd.DataFrame(data)
        return dfdata
Beispiel #12
0
def get_data():
    global DatasrcMap
    DatasrcMap.clear()
    #startDate = '2014-04-06'
    #endDate = '2015-02-01'
    try:
        dataframe = ts.get_h_data(gl.STCode, start=startDate, end=endDate)  #, retry_count=10
    except Exception as e:
        print(e)
        print('sleep。。。。。。。。。。。。。。。。。')
        time.sleep(1) #网络异常,等待30s
        return -1
        
    if dataframe is None:
        print('\nNone。。。。。。。。。。。。。。。。。。。。')
        return -1

    print('\n0:tushare获取成功')
    dataframe.sort_index(inplace=True)  #按date升序排列
    dataframe = dataframe.tail(10+60)  #截取最近10天的数据#@@@@@@@@@@@@@@@@@@@@@@
    #print(dataframe)
    day = 0
    for each in dataframe.index:
        date = each.strftime('%Y-%m-%d')
        开 = float(dataframe[day:day+1]['open'])
        高 = float(dataframe[day:day+1]['high'])
        低 = float(dataframe[day:day+1]['low'])
        收 = float(dataframe[day:day+1]['close'])
        量 = float(dataframe[day:day+1]['volume']) 
        金额 = float(dataframe[day:day+1]['amount'])                                     
        DatasrcMap[day] = [date,开,高,低,收,量,金额]
        day = day + 1
    #endof 'for' 
    return 1
Beispiel #13
0
def down_dk_all(code, i):
	global G_CODE
	is_succ = False
	t_name = 'a' + code
	if me.IsTableExist(t_name, G_DBengine) == False:
		s_date = '2013-01-01'
	else:	
		s_date = me.GetLatestDateFromTable(t_name, G_DBengine)
		
	while is_succ == False:
		try:
			df = ts.get_h_data(code, autype='hfq', start=s_date)
			if str(type(df)) == '<class \'pandas.core.frame.DataFrame\'>': 			
				print s_date, df.index.size
				del df['open']
				del df['high']
				del df['low']
				del df['volume']
				df['amount'] = df['amount'] / 10000 
				df = df.drop(df.index.values[df.index.size-1])
				G_CODE.iat[i,4] = df.index.size
				if df.index.size != 0:
					df.to_sql(t_name, G_DBengine, if_exists='append')
			is_succ	= True
		except ValueError, e:
			print 'ValueError:', e
Beispiel #14
0
def download_kline_source_select(code, date_start, date_end):
    try:
        if len(code)==6:
            df_qfq = ts.get_h_data(str(code), start=date_start, end=date_end) # 前复权
        else:
            # import pandas.io.data as web
            # price = web.get_data_yahoo('000001.SS', '1991-07-15')

            df_qfq = ts.get_hist_data(str(code), start=date_start, end=date_end)
        if len(df_qfq)==0:
            return None
        #if df_qfq is None:
        #df_qfq = ts.get_hist_data(code, start=date_start, end=date_end)
        # df_qfq = df_qfq[::-1]
        df_qfq[KEY_CODE] = code
        df_qfq[KEY_DATE] = df_qfq.index


        columns = [KEY_CODE, KEY_DATE, KEY_OPEN, KEY_HIGH, KEY_CLOSE, KEY_LOW, KEY_VOLUME]
        df_qfq = df_qfq[columns]

        print df_qfq.head()

        return df_qfq
    except Exception as e:
        print str(e)
Beispiel #15
0
def get_stock_data(stock_list, start_date):
    stock_data_list = []
    for stockcode in stock_list:
        stock_data_list.append({})
        stock_data_list[-1]['code'] = stockcode
        stock_data_list[-1]['data']=ts.get_h_data(code = stockcode, start = start_date).sort(ascending = True)
    return stock_data_list
Beispiel #16
0
def __get_data_tushare(code, start, end, look_back):
    # FIXME: look_back_pos may not equal (start - look_back),think!!!
    # Adjust:look_back应该是交易日的天数而不是绝对天数,适当的加上一个日期长度,
    # 使得我们取得的数据足够
    ADJUST = 210
    look_back_pos = parse(start) - timedelta(look_back + ADJUST)
    look_back_pos = str(look_back_pos.date())

    if parse(look_back_pos).year <= datetime.now().year - 3:
        hist_data = ts.get_h_data(
            code=code,
            start=look_back_pos,
            end=end,
            index=True,
            pause=1,
            retry_count=5)
    else:
        hist_data = ts.get_hist_data(
            code=code, start=look_back_pos, end=end)

    if len(hist_data) == 0 or hist_data is None:
        print('fetched data returned 0 row, something was wrong')
    if len(hist_data) < look_back:
        print('did not retrieve enough data')

    # we only need selected columns
    columns = ['open', 'high', 'low', 'close', 'volume']
    ret = pd.DataFrame(hist_data[columns])
    return ret
    def _getIndexDaysFromTuShare(self, code, startDate, endDate, fields, name=None):
        """
            从TuShare获取指数日线数据
        """
        tuShareCode = code[:-3]

        sleep(self.tuShareDaysSleepTimeConst)
        try:
            df = ts.get_h_data(tuShareCode, startDate, endDate, index=True)
            if df is None or df.empty: # If no data, TuShare return None
                df = pd.DataFrame(columns=['open', 'high', 'close', 'low', 'volume', 'amount'])
            else:
                df = df.sort_index()
        except Exception as ex:
            self._info.print("从TuShare获取{}({})日线数据[{}, {}]失败: {}".format(code, name, startDate, endDate, ex), DyLogData.error)
            return None

        # no turn and factor for index
        df['turnover'] = 0
        df['factor'] = 1
        df.index.name = None

        # change to Wind's indicators
        df.reset_index(inplace=True) # 把时间索引转成列
        df.rename(columns={'index': 'datetime', 'amount': 'amt', 'turnover': 'turn', 'factor': 'adjfactor'}, inplace=True)

        # 把日期的HH:MM:SS转成 00:00:00
        df['datetime'] = df['datetime'].map(lambda x: x.strftime('%Y-%m-%d'))
        df['datetime'] = pd.to_datetime(df['datetime'], format='%Y-%m-%d')

        # select according @fields
        df = df[['datetime'] + fields]

        return df
Beispiel #18
0
def main():
    now = time.strftime("%Y-%m-%d")
    # print(now)
    token = '60517739976b768e07823056c6f9cb0fee33ed55a1709b3eaa14a76c6a1b7a56'
    sb = StockBox()
    # sb.looper(id)
    id = '300333'
    # sb.realtime(id)
    sb.base_function("300333")
    # pandas_test=Pandas_test()
    # pandas_test.test_function()
    # sb.longhuban('2016-04-05')
    # sb.getNews()
    # sb.fund()
    # sb.get_stock_chengfeng()
    # sb.date_store()
    # sb.profit_test()
    # sb.daily_longhu()

    # 获取历史数据 近3年的数据
    history = ts.get_hist_data(id)

    print(u"历史3年的数据")

    print(history.head(10))

    history_all = ts.get_h_data(id, '20015101', '20160101')

    print(u'所有的历史数据')
    print(history_all)
Beispiel #19
0
def getHistoryData(datapath, sid, start=None, end=None):
    '''

    :param datapath:
    :param sid:
    :param start:
    :param end:
    :return:
    '''
    DAYFORMAT = '%Y-%m-%d'
    MAXINTERVAL = 365 * 3
    totalData = []
    if end == None:
        end = datetime.strftime(datetime.today(), DAYFORMAT)

    endday = end

    if (datetime.strptime(end, DAYFORMAT) - datetime.strptime(start, DAYFORMAT)).days > MAXINTERVAL:
        startday = datetime.strftime(datetime.strptime(endday, DAYFORMAT) - timedelta(days=MAXINTERVAL), DAYFORMAT)
    else:
        startday = start
    while ( datetime.strptime(startday, DAYFORMAT) >= datetime.strptime(start, DAYFORMAT)):
        # 分成三年三年这种调用方式,然后汇总
        temp = tushare.get_h_data(sid, start=startday, end=endday)
        totalData.append(temp)
        endday = startday
        startday = datetime.strftime(datetime.strptime(endday, DAYFORMAT) - timedelta(days=MAXINTERVAL), DAYFORMAT)

    AllData = pandas.concat(totalData)
    filenameX = lambda x: '_'.join(x)
    filename = os.path.join(datapath, filenameX([sid, start, end])) + '.csv'
    AllData.to_csv(filename.lower())
Beispiel #20
0
 def append_days(self,stock, start, end):
     '''
     添加stock,指定时间范围内的数据
     '''
     data = ts.get_h_data(stock,start=start,end=end)
     data = data.sort_index(ascending=True)  
     data.to_sql('day_'+stock, self.engine,if_exists='append')
Beispiel #21
0
def getDayLine(from_num, to_num,startDay = 0, endDay = time.strftime('%Y-%m-%d', time.localtime())):

    global df_base, control_num, engine
    for row_index, row in df_base.iterrows():
        try:
            if control_num < from_num:
                pass
            elif control_num == to_num:
                break
            else:
        
                stocknum = row_index
                if startDay == 0: #download the day line data from the beginning
                    
                    timeToMarket = df_base.ix[stocknum]['timeToMarket']
                    
                    startDay = str(timeToMarket)
                    startDay = startDay[:4] + '-' + startDay[4:6] + '-' + startDay[6:8]
                    
                    
                qfq_history= ts.get_h_data(stocknum, start = startDay, end = endDay, retry_count=10)
                qfq_history.insert(0,'stocknum',stocknum)
                qfq_history.to_sql('qfq_day',engine,if_exists='append')
            control_num += 1
        
    
        except:
            s = stocknum +'\n'
            f = open('qfq_err' + endDay, 'a')
            f.write(s)
            f.close()
            pass
Beispiel #22
0
def recoveDayline(startDay, endDay):
    global df_base, control_num, engine
    if os.path.exists('qfq_err' + endDay):
        f = open('qfq_err' + endDay)
        lines = f.readlines()
        lineNos = range(len(lines))
        
        for lineNo in lineNos:
            try:
                
                line = lines[0]
                stocknum = line[:6]
                if startDay == 0:
                    timeToMarket = df_base.ix[stocknum]['timeToMarket']
                    startDay = str(timeToMarket)
                    startDay = startDay[:4] + '-' + startDay[4:6] + '-' + startDay[6:8]
                qfq_history = ts.get_h_data(stocknum, start = startDay, end = endDay, retry_count=10)
                qfq_history.insert(0,'stocknum',stocknum)
                qfq_history.to_sql('qfq_day',engine,if_exists='append')
                del lines[0] #delect the recovered data
                print(lineNo)
                
            except:
                f.close()
                f = open('qfq_err' + endDay, 'w')
                f.writelines(lines)
                f.close()
        if os.path.getsize('qfq_err' + endDay) == 0: 
            os.remove('qfq_err' + endDay)
    def get_url_data_(self):

        # 从 tushare.org 获取股票市场的代码列表
        code_list_ = pandas.DataFrame((tushare.get_today_all())['code'])

        # 排序
        code_list_ = code_list_.sort(columns='code', ascending=True)

        # 增加一自然数列做为 index
        code_list_['index'] = pandas.DataFrame([i for i in range(0, len(code_list_))], code_list_.index)
        code_list_.reindex(index=code_list_['code'])

        # 写库
        code_list_.to_sql('code_list_', self.engine_, if_exists='replace', index=True, index_label='index')

        # 把 index 设为主键
        self.engine_.connect().execute('alter table testdb.code_list_ add primary key(`index`)')

        # 根据上面股票列表逐个获取个股数据
        for i in range(0, len(code_list_)):
            # 取的是已经复权的数据
            stock_data_ = tushare.get_h_data(code_list_['code'][i])

            # 因为获取的数据以 date 为 index,但是写库时不能把 date 当 index 写入,所以复制该列
            stock_data_['date'] = pandas.Series(stock_data_.index, stock_data_.index)
            stock_data_ = stock_data_.sort_values(by='date', ascending=True)
            stock_data_['index'] = pandas.DataFrame([i for i in range(0, len(stock_data_))], stock_data_.index)
            stock_data_.to_sql(code_list_['code'][i], self.engine_, if_exists='replace', index=True,
                               index_label='index')
            self.engine_.connect().execute('alter table testdb.' + code_list_['code'][i] + ' add primary key(`index`)')
def getLowestGrowth(startDate, endDate,stockList):
    result = {}
    while len(stockList) > 0:
        try:
            stockCode = stockList[-1]
            print  stockCode,'is started'
            #取当天有交易的股票
            if float(ts.get_realtime_quotes(stockCode).price) > 0:
                df_tran = ts.get_h_data(stockCode, start=startDate, end=endDate) 
                #将收盘价转化为数值
                df_tran['close'] = df_tran['close'].convert_objects(convert_numeric=True)
                #按日期由远及近进行排序
                df_tran = df_tran.sort_index()
                stock = {}
               
                stock['maxPxAll']  = max(df_tran.close)
                stock['minPxAll'] = min(df_tran.close)
                stock['maxGrowthRate'] = (stock['maxPxAll'] - stock['minPxAll'])/stock['minPxAll']      
                result[stockCode] = stock       
                print  stockCode,'is finished'
                stockList.pop()
            else:
                stockList.pop()
     
        except URLError,e:
            print 'Error',stockCode,str(e)
            continue  
        except BaseException, e:
            print 'Error',stockCode,str(e)
            stockList.pop()
            continue     
def inital_stock_data(stock_code):   
    '''
            初始化股票数据,
            首先创建股票表格,如果没有的话,第一次加载会自动创建表格
            然后获取开始时间,开始时间是股票上市的时间与上一次加载的时间中的最小时间。
            加载股票数据是每三年加载一次,防止一次加载过多导致网络终端。
            本方法可以自动全量以及增量加载股票数据
    '''
        #初始化表格,如果没有该表格,则进行创建表格.
    try:
        create_stock_info_table(stock_code)    
        #开始时间要加一天
        startdate=int(dt.mktime(last_date_table(stock_code).timetuple()))+3600*24;
        startdate=dt.localtime(startdate)
        startdate=dt.strftime("%Y-%m-%d", startdate)
    
        enddate=dt.strftime("%Y-%m-%d",dt.localtime())
        print("startdate is :",startdate,"   end date is:",enddate, "  stock code is:",stock_code)
        rs=ts.get_h_data(stock_code,start=startdate,end=enddate)
        if rs is None:#如果得到的结果为空,比如停牌,或者刚好获取的数据没有,要使用continue,不然下面的语句调用不起来
            print("股票%s在这段日期内没有交易数据\n"%stock_code)
            return 1
        pd.DataFrame.to_sql(rs, "hdata_"+stock_code, con=conn, flavor='mysql', if_exists='append',index=True)
        print("提取股票%s数据正确"%stock_code)
        return 1  #如果抽取成功,那返回1
    except:
        print("提取股票%s数据出错"%stock_code)
        return 0  #抽取失败,返回0
Beispiel #26
0
def get_his_data(code, start='2012-01-01', end=str(datetime.datetime.today())[0:10], ma=[5, 12, 13, 18, 20, 30, 60, 120], period='day',
                 column_name='close',index=False,if_ma=True):
    _start = str(datetime.datetime.strptime(start,'%Y-%m-%d')-datetime.timedelta(max(ma)*2))[:10]
    #print(_start,end)
    try :
        df = ts.get_h_data(code,start=_start,end=end,index=index)
    except Exception as e:
        print(e)
        print('Using ts.get_hist_data instead,only achieve 3 year data')
        df = ts.get_hist_data(code,start=_start,end=end)
    #print(df)
    try:
        df.index = df.index.astype(np.str)
        print(df.index)
    except Exception as e:
        print(e)
        print('Probabaily because of no data entry')
    if if_ma == True:
        df = MA_CALCULATOR(df)
        df = df.get_ma(ll=ma, period=period, column_name=column_name)
        df = df[start:]
        if df.isnull().any().any():
            print(df)
            raise 'Need more date info for calculating MA before last NaN'
    return df
Beispiel #27
0
def fetchStockData(code, output_csv=None):
    StockDf = ts.get_h_data(code)
    StockDf = StockDf.sort_index(axis=0, ascending=True)
    #adding EMA feature
    StockDf['ema'] = StockDf['close']
    StockDf['rise'] = StockDf['close']
    DfLen = len(StockDf.index)
    EMA = 0;
    RISE = 0;
    for n in range(0,DfLen):
        idx = n
        Close = StockDf.ix[idx, 'close']
        if(n==0):
            EMA = Close
            RISE = 0
        else:
            EMA = StockDf.ix[idx-11, 'ema']
            EMA = ((n-1)*EMA + 2*Close)/(n+1)
            CloseP = StockDf.ix[idx-1, 'close']
            RISE = (Close - CloseP)/CloseP
        
        StockDf.ix[idx,'ema'] = EMA
        StockDf.ix[idx,'rise'] = RISE

    if(output_csv != None):
        StockDf.to_csv(output_csv)
        
    return StockDf
def get_security_all_history(code_list):
    '''
    获取code_list中所有证券股票历史行情信息,并将结果保存到对应csv文件

    tushare.get_h_data()可以查询指定股票所有的历史行情,
    数据只有7列:  date, open, hight, close, low, volume, amount

    tushare.get_hist_data()只能查询指定股票3年的历史行情,
    数据有14列: date, open, hight, close, low, volume, price_change, p_change, ma5, ma10, ma20, v_ma5,  v_ma10, v_ma20, turnover

    Parameters
    ------
        无
    return
    -------
        无
    '''

    for code in code_list:
        try:
            tmp_data_h = tushare.get_h_data(code, start='2000-01-01', end='2016-08-01')
        except Exception as exception:
            stl_logger.data_manager_logger(__file__).error('tushare.get_h_data(%s) excpetion, args: %s' % (code, exception.args.__str__()))

        if tmp_data_h is None:
            stl_logger.data_manager_logger(__file__).warning('tushare.get_h_data(%s) return none' % code)
        else:
            data_str_h = tmp_data_h.to_csv()
            with open('../data/origin/tushare/sh/%s.csv' % code, 'wt') as fout:
                fout.write(data_str_h)
Beispiel #29
0
def get_stock_his_day_Data(code, startDay, endDay):###generator for the stock data share by year
	df = ts.get_stock_basics()
	tmDate = df.ix[code]['timeToMarket']

	if '-' in startDay:
		_d = startDay.split('-')
		startDay = _d[0]+_d[1]+_d[2]

	if '-' in endDay:
		_d = endDay.split('-')
		endDay = _d[0]+_d[1]+_d[2]

	if not isinstance(startDay, np.int64):
		startDay = np.int64(startDay)
	if not isinstance(endDay, np.int64):
		endDay = np.int64(endDay)

	if startDay < tmDate:
		startDay = tmDate

	today = np.int64( str(datetime.date.today()).replace('-','') )

	if endDay > today:
		endDay = today
 
 	#search by year, for the reliability
 	nyears = endDay/10000 - startDay/10000 + 1
 	sstartDay, sendDay = str(startDay), str(endDay)
	for nyear in xrange(startDay/10000,endDay/10000+1):
		tmpStart = sstartDay[0:4]+'-'+sstartDay[4:6]+'-'+sstartDay[6:8] if nyear==startDay/10000 else str(nyear)+'-01-01'
		tmpEnd = sendDay[0:4]+'-'+sendDay[4:6]+'-'+sendDay[6:8] if nyear==(endDay/10000) else str(nyear)+'-12-31'
		logging.debug("get code:%s history data from %s to %s" %(code, tmpStart, tmpEnd))
		tmpdata = ts.get_h_data(code, start=tmpStart, end=tmpEnd)
		yield(tmpdata)
Beispiel #30
0
 def load_data(self, pcontract, dt_start=None, dt_end=None):
     dt_start = _process_dt(dt_start)
     if not dt_start: dt_start = _VERY_EARLY_START
     dt_end = _process_dt(dt_end)
     data = ts.get_h_data(pcontract.contract.code,
                          start=dt_start, end=dt_end)
     return _process_tushare_data(data.iloc[::-1])
    def get_history_index_data_by_date(self, code, start_date_str,
                                       end_date_str, frequency):
        if len(code) != 9:
            return None
        code = code[0:6]
        df1 = ts.get_hist_data(code,
                               start=start_date_str,
                               end=end_date_str,
                               ktype=frequency)
        df1['tradedate'] = df1.index
        df1 = df1.reset_index(drop=True)
        df1['date'] = df1['tradedate'].map(
            lambda x: datetime.strptime(x, '%Y-%m-%d'))
        dfNew = df1.set_index('date')
        df = ts.get_h_data(code,
                           start=start_date_str,
                           end=end_date_str,
                           index=True)
        # df = df2.copy()
        df['pct_chg'] = dfNew['p_change']
        # del df['amount']
        # print df

        return df
Beispiel #32
0
    def getByDate(self, mongo, func, code, date):
        isNeedDate = False

        if (func == "tick_data"):
            df = ts.get_tick_data(code, date=date)
        elif (func == "h_data"):
            df = ts.get_h_data(code, start=date, end=date)
        elif (func == "hist_data"):
            print(date)
            print(type(date))
            df = ts.get_hist_data(code, date=date)
        elif (func == "sina_dd"):
            df = ts.get_sina_dd(code, date=date)
            # df = ts.get_sina_dd('600848', date='2015-12-24')

        else:
            df = {}
        tmpJson = json.loads(df.to_json(orient='records'))
        for i in range(len(tmpJson)):
            tmpJson[i][u'code'] = code
            tmpJson[i][u'date'] = date
            print(tmpJson[i])
        coll = mongo.trading[func]
        coll.insert(tmpJson)
Beispiel #33
0
def download_history_data_fq(autype='qfq', startTime=None):
    '''
    获取前复权的历史k线数据
    '''

    conn = db.get_history_data_db('D')
    start = startTime
    if startTime is None:
        start = utils.today_last_year(6)

    for code in get_all_stock_code():
        df = ts.get_h_data(code, start=start, drop_factor=False)
        if df is not None:
            try:
                df.insert(0, 'code', code)
                sql_df = df.loc[:, :]
                sql.to_sql(sql_df,
                           name='history_data_%s' % autype,
                           con=conn,
                           index=True,
                           if_exists='append')
                log.info('%s,%s history qfq data download ok.' % (code, start))
            except Exception as e:
                log.error('error:code:%s,start:%s' % (code, start))
Beispiel #34
0
 def update_everyday(self):
     '''
     每天更新股票数据
     '''
     data = TS.memchaced_data(ts.get_stock_basics, 'get_stock_basics')
     for stock in data.index:
         try:
             search_sql = "select * from {0} order by date desc limit 1".format(
                 'day_' + stock)
             origin = SQL.read_sql(search_sql, self.engine)
             date_64 = (origin.tail(1))['date'].values[0]
             next = pd.to_datetime(str(date_64)) + timedelta(1)
             start = next.strftime("%Y-%m-%d")
             end = datetime.datetime.now().strftime("%Y-%m-%d")
             self.append_days(stock, start=start, end=end)
         except Exception, data:
             print "更新股票数据失败:{0} {1}".format(stock, data)
             data = ts.get_h_data(stock)
             if data is not None:
                 data = data.sort_index(ascending=True)
                 data.to_sql('day_' + stock,
                             self.engine,
                             if_exists='append')
                 print "尝试加载该新股票成功"
Beispiel #35
0
    def get_h_data(self, symbol, expire=60*6):
        """
        获取一支股票所有历史数据保存到本地
        """
        if not os.path.exists(CT.HIS_DIR):
            os.makedirs(CT.HIS_DIR)
        file_path = CT.HIS_DIR + symbol
        expired = date_time.check_file_expired(file_path, expire)
        if expired or not os.path.exists(file_path):
            today = date_time.get_today_str()
            d = ts.get_h_data(symbol, autype=None, start=CT.START, end=today, drop_factor=False)
            #index = []
            #for i in list(d.index):
            #    index.append(date_time.date_to_str(i))
            #d = d.reindex(index, method='ffill')
            if d is None:
                return d
            d.to_csv(CT.HIS_DIR + symbol, sep='\t')
            #return d

        if not os.path.exists(file_path):
            return None
        d = pd.read_csv(file_path, sep='\t', index_col=0)
        return d
Beispiel #36
0
 def __init__(self):
     hs300File = Path(FILE_LOCATION)
     if not hs300File.is_file():
         print("HS300 file created!\n")
         self.download_hs300()
     else:
         hs300T = pd.read_csv(FILE_LOCATION)
         currentDate = time.strftime("%Y-%m-%d")
         latestDate = hs300T.loc[0, 'date']
         if currentDate == latestDate:
             print("HS300 file already exists!\n")
             self.__hs300 = hs300T
         else:
             print("Updating HS300 file!\n")
             hs300P = ts.get_h_data(HS300_INDEX,
                                    index=True,
                                    start=latestDate)
             hs300P = hs300P.reset_index()
             hs300P['date'] = hs300P['date'].apply(
                 lambda x: pd.to_datetime(x).date().isoformat())
             self.__hs300 = pd.concat([hs300P, hs300T[1:]])
             self.__hs300.to_csv(FILE_LOCATION,
                                 encoding='utf-8',
                                 index=False)
Beispiel #37
0
    def _getCodeDaysFromTuShare(self, code, startDate, endDate, fields, name=None):
        """
            从TuShare获取个股日线数据
        """
        tuShareCode = code[:-3]

        try:
            # 从网易获取换手率
            netEasyDf = self._getDaysFrom163(code, startDate, endDate).sort_index()

            # 从新浪获取复权因子,成交量是股。新浪的数据是后复权的,无复权方式是tushare根据复权因子实现的。
            sinaDf = ts.get_h_data(tuShareCode, startDate, endDate, autype=None, drop_factor=False)
            if sinaDf is None: # If no data, TuShare return None
                sinaDf = pd.DataFrame(columns=['open', 'high', 'close', 'low', 'volume', 'amount', 'factor'])
            else:
                sinaDf = sinaDf.sort_index()
        except Exception as ex:
            self._info.print("从TuShare获取{}({})日线数据[{}, {}]失败: {}".format(code, name, startDate, endDate, ex), DyLogData.error)
            return None

        # construct new DF
        df = pd.concat([sinaDf[['open', 'high', 'close', 'low', 'volume', 'amount', 'factor']], netEasyDf['turnover']], axis=1)
        df.index.name = None

        # change to Wind's indicators
        df.reset_index(inplace=True) # 把时间索引转成列
        df.rename(columns={'index': 'datetime', 'amount': 'amt', 'turnover': 'turn', 'factor': 'adjfactor'}, inplace=True)

        # 把日期的HH:MM:SS转成 00:00:00
        df['datetime'] = df['datetime'].map(lambda x: x.strftime('%Y-%m-%d'))
        df['datetime'] = pd.to_datetime(df['datetime'], format='%Y-%m-%d')

        # select according @fields
        df = df[['datetime'] + fields]

        return df
Beispiel #38
0
def get_all_hist_data(code, stock_basics, year_interval=3):
    def format_date(d):
        return d.strftime('%Y-%m-%d')

    proxies = ts.get_proxies(count=10)
    now_date = datetime.now().date()
    # 获取上市时间
    ipo_date = datetime.strptime(str(stock_basics.loc[code]['timeToMarket']), '%Y%m%d').date() \
        if stock_basics.loc[code]['timeToMarket'] else date(2000, 1, 1)
    start_date = ipo_date
    end_date = date(start_date.year + year_interval, 1, 1)
    data_frames = []
    while now_date >= start_date:
        try:
            batch_df = ts.get_h_data(code,
                                     start=format_date(start_date),
                                     end=format_date(end_date),
                                     proxies=proxies)
        except:
            continue
        data_frames.append(batch_df)
        start_date = end_date
        end_date = date(start_date.year + year_interval, 1, 1)
    return pd.concat(data_frames)
Beispiel #39
0
def import_h_data(code, start, end):
    print('=== import h data ===')
    print('start: {:%Y-%m-%d} end: {:%Y-%m-%d}'.format(start, end))

    df: pd.DataFrame = tushare.get_h_data(code=code,
                                          start='{:%Y-%m-%d}'.format(start),
                                          end='{:%Y-%m-%d}'.format(end),
                                          index=True)

    if df is not None:
        for index, row in df.iterrows():
            h_data = HData(code=code,
                           date=index,
                           open=row['open'],
                           close=row['close'],
                           high=row['high'],
                           volume=row['volume'],
                           amount=row['amount'])

            session.add(h_data)

        session.commit()

    print('done')
Beispiel #40
0
def _update(stock, conn):
    try:
        print "update ----- :", stock
        query = "select * from '%s' order by date" % stock
        df = pd.read_sql(query, conn)
        df = df.set_index('date')

        # print df.head(10),df.ix[-1],df.ix[-1].name
        if dt.now().weekday() == 5:
            today = str(pd.Timestamp(dt.now()) - pd.Timedelta(days=1))[:10]
        elif dt.now().weekday() == 6:
            today = str(pd.Timestamp(dt.now()) - pd.Timedelta(days=2))[:10]
        else:
            today = str(pd.Timestamp(dt.now()))[:10]
        if today != df.ix[-1].name[:10]:
            df = ts.get_h_data(stock,
                               start=df.ix[-1].name[:10],
                               retry_count=5,
                               pause=1)
            df[['open', 'high', 'close', 'low',
                'volume']].to_sql(stock, conn, if_exists='append')
    except Exception, arg:
        print "exceptionu:", stock, arg
        errorlist.append(stock)
def saveHistoricalDailyTrade_qfq(_symbols, _start_dte, _end_dte):
    try:
        print('----Start processing historical data')
        # get DB connection
        engine = APIs.getDBConn()

        # config parameters
        # _ktype = 'D' # D=日k线 W=周 M=月 5=5分钟 15=15分钟 30=30分钟 60=60分钟'
        _autype = 'qfq'  # qfq-前复权 hfq-后复权 None-不复权,默认为qfq
        _index = False  # 设定为True时认为code为指数代码
        # start = _start_dte # YYYY-MM-DD 为空时取当前日期
        # end = _end_dte # YYYY-MM-DD 为空时取当前日期

        df = ts.get_h_data(code=_symbols,
                           start=_start_dte,
                           end=_end_dte,
                           autype=_autype,
                           index=_index,
                           retry_count=3,
                           pause=0.5)
        df['symbol'] = _symbols
        df.to_sql('AStocks_Trade_Daily_qfq', con=engine, if_exists='append')
        # print(df)
        print('----Finish processing historical data')
    except Exception as e:
        print('----Failed at {}'.format(sys._getframe().f_code.co_name))
        dlog.writeLog(
            time.strftime('%Y%m%d %H:%M:%S', time.localtime()),
            sys._getframe().f_code.co_name,
            'symbol = {}, start = {}, end = {}'.format(
                _symbols,
                _start_dte,
                _end_dte,
            ), e)
    finally:
        engine.dispose()
Beispiel #42
0
def get_high_test():
    df = ts.get_h_data('300141', start=day30, end=day0)

    # 这个函数可以获取所有的历史数据

    # print(df)
    # current= df[:1]
    # current=df.iloc[0]
    print(df)
    current = df.ix['2016-07-15']
    print(current)
    current_high = current['high'].values[0]
    print(current_high)
    highest = df['high']
    lowest = df['low']

    price_30_max = highest.max()
    price_30_min = lowest.min()

    print(df[df.high >= price_30_max])

    # 得出出现最大值的那一天
    print(df[df.low <= price_30_min])
    # 得出出现最小值的那一天

    print(price_30_max)
    print(price_30_min)
    # oneData= df.ix['2016-07-11']
    # print(oneData.iloc[0,1])
    # print(type(oneData))
    # for i in highest.len:
    #    print(i)

    # print(type(t))
    if current_high >= price_30_max:
        print(stock_info.ix['300141']['name'].decode('utf-8'))
Beispiel #43
0
import tushare as ts
df = ts.get_h_data('600606', autype='hfq',start='2015-01-01',end='2017-12-31')
df.to_csv('D:/day/600606.csv')
df.to_csv('D:/day/600606.csv',columns=['open','high','close','low','volume','amount'])
import tushare as ts

# load stock codes:
tt = loadtxt('stock_codes_2017-06-04.txt', dtype=np.int)
codes = array([str(x).zfill(6) for x in tt])

# get and save qian-fu-quan
for i, c in enum(codes):
    print('\n\n', i, ':', c)
    '''
    # tons of data (till 2016-06-08) have been downloaded::
    df = ts.get_h_data(c,start='2010-01-01',end='2010-12-31')
    if shape(df) != (): df.to_csv('qfq_'+c+'_2010.csv')
    df = ts.get_h_data(c,start='2011-01-01',end='2011-12-31')
    if shape(df) != (): df.to_csv('qfq_'+c+'_2011.csv')
    df = ts.get_h_data(c,start='2012-01-01',end='2012-12-31')
    if shape(df) != (): df.to_csv('qfq_'+c+'_2012.csv')
    df = ts.get_h_data(c,start='2013-01-01',end='2013-12-31')
    if shape(df) != (): df.to_csv('qfq_'+c+'_2013.csv')
    df = ts.get_h_data(c,start='2014-01-01',end='2014-12-31')
    if shape(df) != (): df.to_csv('qfq_'+c+'_2014.csv')
    df = ts.get_h_data(c,start='2015-01-01',end='2015-12-31')
    if shape(df) != (): df.to_csv('qfq_'+c+'_2015.csv')
    df = ts.get_h_data(c,start='2016-01-01',end='2016-12-31')
    if shape(df) != (): df.to_csv('qfq_'+c+'_2016.csv')
    '''
    df = ts.get_h_data(c, start='2016-06-09', end='2016-06-08')
    if shape(df) != (): df.to_csv('qfq_' + c + '_16to17.csv')
Beispiel #45
0
import tushare as ts
import os
import numpy


filename = 'c:/bigfile.csv'


ts.set_token("3e3ed9ba576210c210d0aa08959fdd3b32de36515af178850f05e151")

pro = ts.pro_api()

data = pro.query('stock_basic', exchange='', list_status='L', fields='ts_code,symbol,name,area,industry,list_date')

allcode=data['symbol'].values.tolist()

for a in allcode:
    b=ts.get_h_data(a, start='2018-08-01', end='2019-03-26')
    closeprice=b['close'].values.tolist()
    narray=numpy.array(closeprice)
    sum1=narray.sum()
    narray2=narray*narray
    sum2=narray2.sum()
    N=len(closeprice)
    mean=sum1/N
    var=sum2/N-mean**2
    print(var^0.5)
Beispiel #46
0
#-*- coding: utf-8 -*-
"""
   Created  on 2017/3/27.
"""

import pandas
import tushare
from matplotlib import pyplot as plt
import matplotlib

matplotlib.style.use("ggplot")

codes = tushare.get_stock_basics()

market = tushare.get_h_data('000001', index=True,
                            start="2006-01-01").sort_index()
market_sz = tushare.get_h_data('399004', index=True,
                               start="2006-01-01").sort_index()
market_hs = tushare.get_h_data('000300', index=True,
                               start="2006-01-01").sort_index()
market_cy = tushare.get_h_data('399606', index=True,
                               start="2010-01-01").sort_index()


def try_para(market, p):
    market = market.sort_index()
    base = market["amount"].resample("w").mean().dropna().rolling(p).mean()

    delta = ((market.close.resample("w").last() -
              market.open.resample("w").first()) /
             market.open.resample("w").first()).dropna()
Beispiel #47
0



缺点:??
'''

print '#####1. 数据获取和处理'
CODE = '002398'
BODY_SIZE = 0.03  #锤子线实体线大小,波动范围%,不超过3%
HEAD_SIZE = 0.5  #锤子线上影线大小,不能超过下影线长度的0.5倍
TAIL_SIZE = 2  #锤子线下影线大小,大于实体线长度的2倍以上
LENGTH = 10  #观察周期
STOP_LOST_TRIGGER = 1  #止损值,倍数, 表示价格偏离均线满足几倍标准差时止损   mean- (1* σ)   --- σ: 标准差 standard deviation

data = ts.get_h_data(CODE, '2012-01-01', '2017-01-01')
data.sort_index(ascending=True, inplace=True)

data.reset_index(inplace=True)  ###重设Index, 有默认数字为Index, 而非日期)
data['pct_change'] = data['close'].pct_change()
data['ma'] = data['close'].rolling(LENGTH).mean()
data['std'] = data['close'].rolling(LENGTH).std()
del data['volume']
del data['amount']
data['yesterday_ma'] = data['ma'].shift(1)  #前一天的移动平均值
data['yesterday_std'] = data['std'].shift(1)  #前一天的标准差

print '#####2. 识别锤子形态和特征'

data['body'] = abs(data['close'] - data['open'])
data['head'] = data['high'] - data[['close', 'open']].max(axis=1)
Beispiel #48
0
#df=ts.get_hist_data('600848',ktype='M')   #获取月k线数据
#df=ts.get_hist_data('600848',ktype='5')   #获取5分钟k线数据
#ts.get_hist_data('600848',ktype='15')     #获取15分钟k线数据
#ts.get_hist_data('600848',ktype='30')     #获取30分钟k线数据
#df=ts.get_hist_data('000425',ktype='60')  #获取60分钟k线数据
#df=ts.get_hist_data('sh')                 #获取上证指数k线数据,其它参数与个股一致,下同
#df=ts.get_hist_data('sz')                 #获取深圳成指k线数据
#df=ts.get_hist_data('hs300')                 #获取沪深300指数k线数据
#ts.get_hist_data('sz50')                  #获取上证50指数k线数据
#df=ts.get_hist_data('zxb')                   #获取中小板指数k线数据
#ts.get_hist_data('cyb')                   #获取创业板指数k线数据
    
#df = ts.get_stock_basics()
#date = df.ix['600848']['timeToMarket'] #上市日期YYYYMMDD
#
df=ts.get_h_data('002292',start='2000-11-12',end='2018-11-12')                #前复权
df[['open','close']].plot(kind='line')
#ts.get_h_data('002337',autype='hfq')   #后复权
#ts.get_h_data('002337',autype=None)    #不复权
#ts.get_h_data('002337',start='2015-01-01',end='2015-03-16') #两个日期之间的前复权数据
#
#df=ts.get_h_data('000425', index=True)    #深圳综合指数
#df=ts.get_today_all()                     #实时行情

#
#df = ts.get_tick_data('600848',date='2014-01-09')
#df.head(10)                            #历史分笔
#df = ts.get_today_ticks('601333')
#df.head(10)                            #当日历史分笔
#df = ts.get_realtime_quotes('000581') #Single stock symbol
#df[['code','name','price','bid','ask','volume','amount','time']] #实时分笔
Beispiel #49
0
__author__ = 'xierui774'
import tushare as ts
import pandas as pd
import numpy as np
import os
import datetime
import matplotlib.pyplot as plt


import tushare as ts
import numpy as np
import pandas as pd
df_07 = ts.get_h_data('000001',start='2007-10-01',end='2009-10-01',index=True)
df_15 = ts.get_h_data('000001',start='2014-10-01',end='2017-04-27',index=True)
df = df_07['close'].sort_index()
df2 = df_15['close'].sort_index()
idx = np.argmax(df.values)
idx2 = np.argmax(df2.values)
df_new = df2.values[idx2-idx:]
df = pd.DataFrame(df)
df['new_close']=np.nan
df.ix[:len(df_new),'new_close']=df_new
df.plot()
plt.show()
Beispiel #50
0
__author__ = 'Administrator'
import tushare as ts
import pandas as pd

data = ts.get_hist_data('000016')
print(data)
data2a = ts.get_h_data('000300')  #前复权
print(data2a)

data2 = ts.get_h_data('002337', start='2010-01-15',
                      end='2019-01-01')  #两个日期之间的前复权数据
#df = ts.get_today_ticks('601333')
#print(df)

data3 = ts.get_hist_data('sh')  #获取上证指数k线数据,其它参数与个股一致,下同
data4 = ts.get_hist_data('sz')  #获取深圳成指k线数据
data5 = ts.get_hist_data('hs300')  #获取沪深300指数k线数据
data6 = ts.get_hist_data('sz50')  #获取上证50指数k线数据
data7 = ts.get_hist_data('zxb')  #获取中小板指数k线数据
data8 = ts.get_hist_data('cyb')  #获取创业板指数k线数据

#print(data5)
pd.set_option('display.width', 1000)
df = ts.get_stock_basics()
date = df.ix['002337']['timeToMarket']  #上市日期YYYYMMDD
#print(df)
#print(date)

df = ts.get_index()
print(data5)
print(df)
Beispiel #51
0
    def _getDaysFromTuShareOld(self,
                               code,
                               startDate,
                               endDate,
                               fields,
                               name=None,
                               verify=False):
        """
            从tushare获取股票日线数据。
            保持跟Wind接口一致,由于没法从网上获取净流入量和金额,所以这两个字段没有。
            策略角度看,其实这两个字段也没什么用。
            @verify: True - 不同网上的相同字段会相互做验证。
            @return: df['datetime', indicators]
                     None - errors
                     [] - no data
        """
        code = code[:-3]

        try:
            # 从凤凰网获取换手率,成交量是手(没有整数化过,比如2004.67手)
            ifengDf = ts.get_hist_data(code, startDate, endDate).sort_index()

            # 以无复权方式从腾讯获取OHCLV,成交量是手(整数化过)
            if verify:
                tcentDf = ts.get_k_data(code, startDate, endDate,
                                        autype=None).sort_index()

            # 从新浪获取复权因子,成交量是股。新浪的数据是后复权的,无复权方式是tushare根据复权因子实现的。
            sinaDf = ts.get_h_data(code,
                                   startDate,
                                   endDate,
                                   autype=None,
                                   drop_factor=False)
            if sinaDf is None:  # If no data, TuShare return None
                sinaDf = pd.DataFrame(columns=[
                    'open', 'high', 'close', 'low', 'volume', 'amount',
                    'factor'
                ])
            else:
                sinaDf = sinaDf.sort_index()
        except Exception as ex:
            self._info.print(
                "从TuShare获取{}({})日线数据[{}, {}]失败: {}".format(
                    code, name, startDate, endDate, ex), DyLogData.error)
            return None

        # 数据相互验证
        if verify:
            # OHLC
            for indicator in ['open', 'high', 'close', 'low']:
                if len(tcentDf[indicator].values) != len(
                        sinaDf[indicator].values):
                    self._info.print(
                        "{}({})日线数据OHLC[{}, {}]: 腾讯和新浪不相同".format(
                            code, name, startDate, endDate), DyLogData.error)
                    return None

                if (tcentDf[indicator].values !=
                        sinaDf[indicator].values).sum() > 0:
                    self._info.print(
                        "{}({})日线数据OHLC[{}, {}]: 腾讯和新浪不相同".format(
                            code, name, startDate, endDate), DyLogData.error)
                    return None

            # volume
            if len(ifengDf['volume'].values) != len(sinaDf['volume'].values):
                self._info.print(
                    "{}({})日线数据Volume[{}, {}]: 凤凰网和新浪不相同".format(
                        code, name, startDate, endDate), DyLogData.error)
                return None

            if (np.round(ifengDf['volume'].values * 100) != np.round(
                    sinaDf['volume'].values)).sum() > 0:
                self._info.print(
                    "{}({})日线数据Volume[{}, {}]: 凤凰网和新浪不相同".format(
                        code, name, startDate, endDate), DyLogData.error)
                return None

        # construct new DF
        df = pd.concat([
            sinaDf[[
                'open', 'high', 'close', 'low', 'volume', 'amount', 'factor'
            ]], ifengDf['turnover']
        ],
                       axis=1)
        df.index.name = None

        # change to Wind's indicators
        df.reset_index(inplace=True)  # 把时间索引转成列
        df.rename(columns={
            'index': 'datetime',
            'amount': 'amt',
            'turnover': 'turn',
            'factor': 'adjfactor'
        },
                  inplace=True)

        # 把日期的HH:MM:SS转成 00:00:00
        df['datetime'] = df['datetime'].map(lambda x: x.strftime('%Y-%m-%d'))
        df['datetime'] = pd.to_datetime(df['datetime'], format='%Y-%m-%d')

        # select according @fields
        df = df[['datetime'] + fields]

        return df
Beispiel #52
0
import tushare as ts

df = ts.get_h_data('sz50', start='2006-01-01', end='2017-12-31')

df.to_csv('C:/Users/jacky/Desktop/XMU/HMM/sz501.csv')
Beispiel #53
0
import tushare as ts
import sys

df = ts.get_h_data(sys.argv[2], start='2005-01-01', end='2018-01-01', autype=sys.argv[3])
path=sys.argv[1]
df.to_csv(path,encoding="utf8")
Beispiel #54
0
 def Get_hist_data(self, code):
     data_h_data_all = ts.get_h_data(code)
     Mysql().SaveMySqlTWO(data_h_data_all, 'Stock_Basics_Info',
                          code + 'stock_basics')
Beispiel #55
0
#coding=UTF-8

from sqlalchemy import create_engine
import tushare as ts
import time
import MySQLdb as mariadb

mariadb_connection = mariadb.connect('localhost', 'xxxxxxx', 'xxxxxx',
                                     'xxxxxx')
cursor = mariadb_connection.cursor()

df_basics = ts.get_stock_basics()
for code in df_basics.index.get_values():
    print code
    tb_name = 'hist' + code
    cursor.execute("show tables like '%s'" % (tb_name))
    if cursor.fetchall():
        continue
    else:
        df = ts.get_h_data(code, start='2008-02-10', end='2018-02-14', pause=5)
        engine = create_engine(
            'mysql://*****:*****@127.0.0.1/xxxxx?charset=utf8')
        df.to_sql(tb_name, engine)
        print " "
    #    time.sleep(2)
Beispiel #56
0
def M1_notification(bot):
    '''
    消息推送:M0/M1占比分析
    :return: null
    '''
    print(strftime("%Y-%m-%d %H:%M:%S", localtime()) + ' - Start M1_notification thread ')
    path = sys.path[0] + '/notification_monitoring_files/'
    
    while True:
        now = strftime("%H:%M", localtime())
        today_ISO = datetime.today().date().isoformat()
        ## 2018 holiday calendar
        holiday_calendar = ['2018-04-05', '2018-04-06', '2018-04-30', \
                            '2018-05-01', '2018-06-18', '2018-09-24', \
                            '2018-10-01', '2018-10-02', '2018-10-03', '2018-10-04', '2018-10-05']
        if datetime.today().weekday() >= 5 or (today_ISO in holiday_calendar):
            holiday = True
        else:
            holiday = False

        if (now > '15:30') and (not holiday):
            # check if today's notification has sent?
            filename = '成交量M1占比'
            try:
                M1_last = pd.read_excel(path + filename + '.xlsx')
            except:
                M1_last = pd.DataFrame(columns=['date',
                                        'M1',
                                        'index_volume_total',
                                        'M1_percentage'
                                        ], index=["0"])
            M1_last_msg_sent_date = M1_last.iloc[0, 0]
        
            #消息推送
            if M1_last_msg_sent_date != today_ISO:
                try:
                    index_sh = ts.get_h_data('000001', index=True, start=today_ISO, end=today_ISO)
                    index_sz = ts.get_h_data('399001', index=True, start=today_ISO, end=today_ISO)
                    if (not index_sh.empty) and (not index_sz.empty):
                        #M0/M1占比分析
                        M1_index_amount = (index_sh.iloc[0, 5] + index_sz.iloc[0, 5]) / 100000000
                        M1_index_volume = (index_sh.iloc[0, 4] + index_sz.iloc[0, 4]) / 100000000
                        filename = '货币供应量_宏观数据_新浪财经'
                        M1_sina = pd.read_excel(path + filename + '.xlsx')
                        ##################################
                        M0 = M1_sina.iloc[0, 5]
                        M0_percentage = 100 * M1_index_amount / M0
                        M1 = M1_sina.iloc[0, 3]
                        M1_percentage = 100 * M1_index_amount / M1

                        M1_row = pd.DataFrame(columns=['date',
                                'M1',
                                'index_volume_total',
                                'M1_percentage'
                                ], index=["0"])

                        M1_row.iloc[0, 0] = today_ISO
                        M1_row.iloc[0, 1] = M1
                        M1_row.iloc[0, 2] = M1_index_amount
                        M1_row.iloc[0, 3] = M1_percentage

                        M1_last = M1_last.append(M1_row)
                        M1_last.sort_values(by='date', ascending=False, inplace=True)
                        filename = '成交量M1占比'
                        M1_last.to_excel(path + filename + '.xlsx',
                                    encoding='GBK')
                    
                        msg =   '==========================' + '\n' + \
                            today_ISO + ' - \n' + \
                            '==========================' + '\n' + \
                            '两市总成交额:' + str(round(M1_index_amount, 3)) + '(亿)\n' + \
                            '两市总成交量:' + str(round(M1_index_volume, 3)) + '(亿)\n' + \
                            'M0:' + str(M0) + '(亿)\n' + \
                            'M1:' + str(M1) + '(亿)\n' + \
                            '两市总成交额占M0:' + str(round(M0_percentage, 3)) + '% \n' + \
                            '两市总成交额占M1:' + str(round(M1_percentage, 3)) + '% \n' + \
                            '=========================='

                        #发送消息
                        bot.friends().search('Yang Hui')[0].send(msg)
                        bot.friends().search('欣')[0].send(msg)  
                except:
                    pass
        time.sleep(600)
    print(strftime("%Y-%m-%d %H:%M:%S", localtime()) + ' - End M1_notification thread ')
    return
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 26 11:57:59 2016

@author: Richard
修改股票代码和保存文件名称,得到不同股票的价格
"""

import tushare as ts

if __name__ == "__main__":

    myData = ts.get_h_data('600036', start='2015-10-01',
                           end='2016-10-01')  #在这修改股票代码,前复权,从今天数前一年
    myData.to_csv(
        r'E:\study\master of TJU\0Subject research\data\core\price_600036.csv'
    )  #在这修改保存文件名称
    #myData.to_csv(r'E:\study\master of TJU\0Subject research\code\Important\get_features_of_firm\test_price.csv')#在这修改保存文件名称
Beispiel #58
0
    def _getCodeDaysFromTuShare(self,
                                code,
                                startDate,
                                endDate,
                                fields,
                                name=None):
        """
            从TuShare获取个股日线数据
        """
        print("{}, {} ~ {}".format(code, startDate, endDate))

        tuShareCode = code[:-3]

        try:
            # 从网易获取换手率
            netEasyDf = self._getDaysFrom163(code, startDate,
                                             endDate).sort_index()
            netEasyDf = netEasyDf[netEasyDf['volume'] > 0]  # drop停牌日期的数据

            netEasyDf.index = pd.to_datetime(netEasyDf.index,
                                             format='%Y-%m-%d')

            # 从新浪获取复权因子,成交量是股。新浪的数据是后复权的,无复权方式是tushare根据复权因子实现的。
            sleepTime = self.tuShareDaysSleepTimeConst + self.tuShareDaysSleepTime
            try:
                sinaDf = ts.get_h_data(tuShareCode,
                                       startDate,
                                       endDate,
                                       autype=None,
                                       drop_factor=False,
                                       pause=sleepTime)
            except IOError:  # We think Sina is anti-crawling
                self.tuShareDaysSleepTime += self.tuShareDaysSleepTimeStep
                print(
                    "Sina is anti-crawling, setting additional sleep time to {}s for each request"
                    .format(self.tuShareDaysSleepTime))
                raise

            if self.tuShareDaysSleepTime > 0:
                self.tuShareDaysSleepTime -= self.tuShareDaysSleepTimeStep

            if sinaDf is None or sinaDf.empty:  # If no data, TuShare return None
                sinaDf = pd.DataFrame(columns=[
                    'open', 'high', 'close', 'low', 'volume', 'amount',
                    'factor'
                ])
            else:
                sinaDf = sinaDf.sort_index()
        except Exception as ex:
            self._info.print(
                "从TuShare获取{}({})日线数据[{}, {}]失败: {}".format(
                    code, name, startDate, endDate, ex), DyLogData.warning)
            return None

        # construct new DF
        try:
            df = pd.concat([
                sinaDf[[
                    'open', 'high', 'close', 'low', 'volume', 'amount',
                    'factor'
                ]], netEasyDf['turnover']
            ],
                           axis=1)
            df.index.name = None
        except Exception as ex:
            print("netEasyDf")
            print(netEasyDf)
            print("sinaDf")
            print(sinaDf)

            self._info.print(
                "从TuShare获取的{}({})日线数据[{}, {}]格式错误: {}".format(
                    code, name, startDate, endDate, ex), DyLogData.warning)
            return None

        if df.isnull().sum().sum() > 0:
            self._info.print(
                "{}({})新浪日线和网易日线数据不一致[{}, {}]".format(code, name, startDate,
                                                      endDate),
                DyLogData.warning)
            return None

        # change to Wind's indicators
        df.reset_index(inplace=True)  # 把时间索引转成列
        df.rename(columns={
            'index': 'datetime',
            'amount': 'amt',
            'turnover': 'turn',
            'factor': 'adjfactor'
        },
                  inplace=True)

        # 把日期的HH:MM:SS转成 00:00:00
        df['datetime'] = df['datetime'].map(lambda x: x.strftime('%Y-%m-%d'))
        df['datetime'] = pd.to_datetime(df['datetime'], format='%Y-%m-%d')

        # select according @fields
        df = df[['datetime'] + fields]

        return df
Beispiel #59
0
# -*- coding: utf-8 -*-
# <nbformat>3.0</nbformat>

# <codecell>

import tushare as ts
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib
matplotlib.style.use('ggplot')

# <codecell>

id = '300220'
df = ts.get_h_data(id, autype='qfq', start='2013-06-10')

# <codecell>

df = pd.DataFrame(df.query('date > "2015-07-01"'))

# <codecell>

df['m1'] = pd.rolling_mean(df['close'], window=15, min_periods=1, center=True)
df['m2'] = pd.rolling_mean(df['close'], window=30, min_periods=1, center=True)
df['m4'] = pd.rolling_mean(df['close'], window=45, min_periods=1, center=True)

# <codecell>

print df.plot(y=['close', 'm1', 'm2', 'm4', 'volume'], title=id,  secondary_y='volume', grid=True, legend=True, figsize=(16, 10))
Beispiel #60
0
def down_stk_cn010(qx, startTime):
    #def down_stk_cn010(qx):
    ''' 中国A股数据下载子程序
    【输入】
        qx (zwDatX): 
        xtyp (str):数据类型,9,Day9,简版股票数据,可下载到2001年,其他的全部是扩充版数据,只可下载近3年数据
            D=日k线 W=周 M=月 默认为D
    :ivar xcod (int): 股票代码
    :ivar fss (str): 保存数据文件名
    '''

    xcod, rss, = qx.code, qx.rDay
    if os.path.exists(rss) == False:
        os.makedirs(rss)

    #tim0='1994-01-01';#tim0='2012-01-01';
    tim0 = startTime
    #tim0='2016-01-01';
    #
    #fss=rss+xcod+'.csv'
    fss = os.path.join(rss, xcod + '.csv')
    #-------------------
    #warning Tom change 6/24

    xfg = os.path.exists(fss)
    xd0 = []
    xd = []
    if xfg:
        xd0 = pd.read_csv(fss, index_col=0, parse_dates=[0], encoding='gbk')
        #print(xd0.head())
        xd0 = xd0.sort_index(ascending=False)
        #tim0=xd0.index[0];
        _xt = xd0.index[0]
        #xt=xd0.index[-1];###
        s2 = str(_xt)
        tim0 = s2.split(" ")[0]

    print('\n', xfg, fss, ",", tim0)
    #-----------
    try:
        xd = ts.get_h_data(xcod, start=tim0, end=None, retry_count=5,
                           pause=1)  #Day9
        #xd=ts.get_hist_data(xcod,start=tim0,end=None,retry_count=5,pause=1,ktype=xtyp);
        #-------------
        if xd is not None:
            if (len(xd0) > 0):
                xd2 = xd0.append(xd)
                #  flt.dup
                xd2["index"] = xd2.index
                xd2.drop_duplicates(subset='index', keep='last', inplace=True)
                del (xd2["index"])
                #xd2.index=pd.to_datetime(xd2.index)
                xd = xd2

            xd = xd.sort_index(ascending=False)
            xd = np.round(xd, 3)
            xd.to_csv(fss, encoding='gbk')
    except IOError:
        pass  #skip,error

    return xd