def cmodel(company, refcompany, dt1, dt2, num_of_states): quotes = quotes_historical_yahoo_ochl(company, dt1, dt2) #Here we set the time range quotes2 = quotes_historical_yahoo_ochl(refcompany, dt1, dt2) #Here we set the time range dates = np.array([q[0] for q in quotes], dtype=int) close_v = np.array([q[2] for q in quotes]) volume = np.array([q[5] for q in quotes])[1:] # Take diff of close value. Note that this makes # len(diff) = len(close_t) - 1 therefore, other quantities also need to be shifted by 1 diff = np.diff(close_v) dates = dates[1:] close_v = close_v[1:] # Unpack quotes Company2 close_v2 = np.array([q[2] for q in quotes2]) diff2 = np.diff(close_v2) close_v2 = close_v2[1:] delta = diff2.shape[0]-diff.shape[0] delta = abs(delta) diff0=np.pad(diff, (delta,0), mode='constant', constant_values=0) close_v=np.pad(close_v, (delta,0), mode='constant', constant_values=0) X = np.column_stack([diff0,diff2]) # Create HMM instance and fit model = GaussianHMM(n_components=num_of_states, covariance_type="full", n_iter=1000).fit(X) fname = str(company)+"_"+str(num_of_states)+"_states_model_adv.pkl" joblib.dump(model, os.path.join('./sims3', fname))
def stock_month_plot(): ''' Show how to make date plots in matplotlib using date tick locators and formatters. ''' date1 = datetime.date(2002, 1, 5) date2 = datetime.date(2003, 12, 1) # every monday mondays = WeekdayLocator(MONDAY) # every 3rd month months = MonthLocator(range(1, 13), bymonthday=1, interval=3) monthsFmt = DateFormatter("%b '%y") quotes = quotes_historical_yahoo_ochl('INTC', date1, date2) if len(quotes) == 0: print('Found no quotes') raise SystemExit dates = [q[0] for q in quotes] opens = [q[1] for q in quotes] fig, ax = plt.subplots() ax.plot_date(dates, opens, '-') ax.xaxis.set_major_locator(months) ax.xaxis.set_major_formatter(monthsFmt) ax.xaxis.set_minor_locator(mondays) ax.autoscale_view() #ax.xaxis.grid(False, 'major') #ax.xaxis.grid(True, 'minor') ax.grid(True) fig.autofmt_xdate() plt.show()
def ret_f(ticker, begdate, enddate): p = quotes_historical_yahoo_ochl(ticker, begdate, enddate, asobject=True, adjusted=True) return ((p.aclose[1:] - p.aclose[:-1]) / p.aclose[:-1])
def get_close(symbol): today = date.today() start = (today.year - 1, today.month, today.day) quotes = quotes_historical_yahoo_ochl(symbol, start, today) quotes = np.array(quotes) return quotes.T
def PlotData(code, start, end, list): start_date = _wxdate2pydate(start) end_date = _wxdate2pydate(end) print code print start_date print end_date print list #根据公司代码,起止时间得到所有数据 quotes = quotes_historical_yahoo_ochl(code, start_date, end_date) fields = ['date', 'open', 'close', 'high', 'low', 'volume'] list1 = [] #格式化时间,将时间参数放入list1列表 for i in range(0, len(quotes)): x = date.fromordinal(int(quotes[i][0])) y = datetime.strftime(x, '%Y-%m-%d') list1.append(y) print list1 #根据数据,时间列表,所有指标生成dataFrame quotesdf = pd.DataFrame(quotes, index=list1, columns=fields) #剔除date数据,这里是因为格式不一致 quotesdf = quotesdf.drop(['date'], axis=1) quotesdftemp = pd.DataFrame() #将所选择的指标,如close,open的dateFrame赋予一个临时dateFrame中 for i in range(0, len(list)): quotesdftemp[list[i]] = quotesdf[list[i]] print quotesdftemp print "ready to plot" #画图 quotesdftemp.plot(marker='o') plt.show()
def PlotData(code, start, end, list): start_date = _wxdate2pydate(start) end_date = _wxdate2pydate(end) print code print start_date print end_date quotes = quotes_historical_yahoo_ochl(code, start_date, end_date) fields = ['date', 'open', 'close', 'high', 'low', 'volume'] list1 = [] for i in range(0, len(quotes)): x = date.fromordinal(int(quotes[i][0])) y = datetime.strftime(x, '%Y-%m-%d') list1.append(y) print list1 quotesdf = pd.DataFrame(quotes, index=list1, columns=fields) quotesdf = quotesdf.drop(['date'], axis=1) quotesdftemp = pd.DataFrame() print quotesdftemp for i in range(0, len(list)): quotesdftemp[list[i]] = quotesdf[list[i]] print "ready to plot" quotesdftemp.plot() plt.title(code) plt.xlabel('Time') plt.show()
def getrealprice(company, dt2, days_future): dt3 = dt2 + datetime.timedelta(days=days_future) quotes = quotes_historical_yahoo_ochl(company, dt2,dt3) close_v = np.array([q[2] for q in quotes]) return close_v[-1]
def stock_year_plot(): ''' Show how to make date plots in matplotlib using date tick locators and formatters. ''' date1 = datetime.date(1995, 1, 1) date2 = datetime.date(2004, 4, 12) years = YearLocator() # every year months = MonthLocator() # every month yearsFmt = DateFormatter('%Y') quotes = quotes_historical_yahoo_ochl('INTC', date1, date2) if len(quotes) == 0: print('Found no quotes') raise SystemExit dates = [q[0] for q in quotes] opens = [q[1] for q in quotes] fig, ax = plt.subplots() ax.plot_date(dates, opens, '-') # format the ticks ax.xaxis.set_major_locator(years) ax.xaxis.set_major_formatter(yearsFmt) ax.xaxis.set_minor_locator(months) ax.autoscale_view() # format the coords message box def price(x): return '$%1.2f' % x ax.fmt_xdata = DateFormatter('%Y-%m-%d') ax.fmt_ydata = price ax.grid(True) fig.autofmt_xdate() plt.show()
def symbolsToPriceDict(symbols,startDate,endDate): """ From a list of stock symbols and a range of dates, returns a prices by symbols numpy array listing the opening prices for the stocks in the given date range. """ #add check to account for missing values in data quotes = [list(finance.quotes_historical_yahoo_ochl(symbol, startDate, endDate,asobject = True).open) for symbol in symbols] return dict(zip(symbols,quotes))
def predict_one(filename, company, dt1, dt2,num_of_states, days_future, tr_prob): # Generate samples starting in the most likely actual current state model = joblib.load(filename) rp = getrealprice_series(company, dt2,days_future) days = rp.size quotes = quotes_historical_yahoo_ochl(company, dt1, dt2) dates = np.array([q[0] for q in quotes], dtype=int) close_v = np.array([q[2] for q in quotes]) # Take diff of close value and shift by 1 diff = np.diff(close_v) dates = dates[1:] close_v = close_v[1:] X = np.column_stack([diff]) # Predict the most likely current internal hidden state hidden_probs = model.predict_proba(X) lstate_prob = hidden_probs[-1] # If more than one state, make sure we start at the most likely current state if (num_of_states>1): startprob = np.zeros(num_of_states) startprob[lstate_prob.argmax()] = 1.0 else: startprob = [ 1.] # Prepare the model for sampling model_2_sample = GaussianHMM(n_components=num_of_states, covariance_type="full") model_2_sample.startprob_ = startprob model_2_sample.transmat_ = model.transmat_ model_2_sample.means_ = model.means_ model_2_sample.covars_ = model.covars_ #Make sure to randomize the samples random.seed() rseed = random.randrange(0,max_int_value) X, Z = model_2_sample.sample(days, random_state=rseed) # Make predictions predictions = np.zeros(days) #added two in case there was a weekend at the end final_price = rp[0] #start at day 0 of the real prices predictions[0] = final_price #day 0 prediction same as current real price for i in range(1, days): final_price += X[i][0] predictions[i] = final_price return predictions
def predictions_rand(filename, company, dt1, dt2, num_of_states, test_num, days_future): # Generate samples starting in a random state model = joblib.load(filename) quotes = quotes_historical_yahoo_ochl(company, dt1, dt2) dates = np.array([q[0] for q in quotes], dtype=int) close_v = np.array([q[2] for q in quotes]) volume = np.array([q[5] for q in quotes])[1:] # Take diff of close value. Note that this makes # len(diff) = len(close_t) - 1 therefore, other quantities also need to be shifted by 1 diff = np.diff(close_v) dates = dates[1:] close_v = close_v[1:] X = np.column_stack([diff]) # Predict the most likely current internal hidden state hidden_probs = model.predict_proba(X) lstate_prob = hidden_probs[-1] total2active = 364 / 251 # Ratio of days the market is open to all days days = days_future // total2active # 251 open market days in a year predictions = [] # Might be useful to store the predictions for future use print(days) startprob = np.zeros(num_of_states) for start_st_prob in range(num_of_states): startprob[start_st_prob] = 1.0 / num_of_states model_2_sample = GaussianHMM(n_components=num_of_states, covariance_type="full") model_2_sample.startprob_ = startprob model_2_sample.transmat_ = model.transmat_ model_2_sample.means_ = model.means_ model_2_sample.covars_ = model.covars_ random.seed() rseed = random.randrange(0, sys.maxint) X, Z = model_2_sample.sample(days, random_state=rseed) avg_prediction = 0 for test in range(test_num): final_price = close_v[-1] for i in range(days): if ((final_price + X[i]) > 0): final_price += X[i] predictions.append(final_price[0]) rseed = random.randrange(0, sys.maxint) X, Z = model_2_sample.sample(days, random_state=rseed) return predictions
def symbolsToPrices(symbols,startDate,endDate): """ From a list of stock symbols and a range of dates, returns a prices by symbols numpy array listing the opening prices for the stocks in the given date range. """ #add check to account for missing values in data quotes = [finance.quotes_historical_yahoo_ochl(symbol, startDate, endDate,asobject = True).open for symbol in symbols] print "prices shape:" print np.array(quotes).T.shape return np.array(quotes).T
def cmodel(company, dt1, dt2, num_of_states): quotes = quotes_historical_yahoo_ochl(company, dt1, dt2) #Here we set the time range # Unpack the quotes ! dates = np.array([q[0] for q in quotes], dtype=int) close_v = np.array([q[2] for q in quotes]) # Take diff of close value and shift by 1 diff = np.diff(close_v) dates = dates[1:] close_v = close_v[1:] # Pack diff for training. X = np.column_stack([diff]) # Create HMM instance and fit model = GaussianHMM(n_components=num_of_states, covariance_type="full", n_iter=1000).fit(X) #print ("Model Covars: ", model.covars_) expected_days = 1 tr_mls = 1 if (num_of_states > 1): #Identify the most likely last hidden state try: hidden_probs = model.predict_proba(X) except: model = GaussianHMM(n_components=num_of_states, covariance_type="diag", n_iter=1000).fit(X) hidden_probs = model.predict_proba(X) lstate_prob = hidden_probs[-1] mls = lstate_prob.argmax() # self transition probability for the most likely last hidden state tr_mls = model.transmat_[mls][mls] # we make use of the geometric series formula to calculate the number # of days expected to stay at the current state expected_days = (1.0 / (1 - tr_mls)) # we save the model for future use fname = str(company) + "_" + str(num_of_states) + "_states_model_final.pkl" joblib.dump(model, os.path.join('./sims_final', fname)) #return expected days return expected_days, tr_mls
def ret_monthly(ticker): x = quotes_historical_yahoo_ochl(ticker, (begYear, 1, 1), (endYear, 12, 31), asobject=True, adjusted=True) logret = log(x.aclose[1:] / x.aclose[:-1]) date = [] d0 = x.date for i in range(0, size(logret)): date.append(''.join([d0[i].strftime("%Y"), d0[i].strftime("%m")])) y = pd.DataFrame(logret, date, columns=[ticker]) return y.groupby(y.index).sum()
def main(symbols, percent, days, verbose): if verbose: print "Symbols: %s" % symbols print "Percent: %s" % percent print "Days: %d" % days print "Verbose: %s" % verbose print if days > 0: days = -days print "Checking for a %s%% decline over the past " \ "%d days" % (percent, abs(days)) print for ticker in symbols: ticker = ticker.upper() print "Stock: %s" % ticker start_date = datetime.datetime.now() + datetime.timedelta(days) end_date = datetime.datetime.now() quotes_objects = quotes_historical_yahoo_ochl(ticker, start_date, end_date, asobject=True) max_value = round(float(max(quotes_objects.close)), 5) print "- max value over %d days: %s" % (abs(days), max_value) most_recent_close = round(float(quotes_objects.close[-1]), 5) print "- most recent close: %s" % most_recent_close percent_change = round(float( rate_of_return(max_value, most_recent_close) ), 2) print "- percent change: %s%%" % percent_change target_price = max_value - (max_value * (percent / 100.0)) print "- target price: %s (%s%% below %s)" % (target_price, percent, max_value) if percent_change < -percent: print print "ALERT! %s has dropped %s%% " \ "over the last %s days" % (ticker, percent_change, abs(days)) print
def main(symbols, percent, days, verbose): if verbose: print "Symbols: %s" % symbols print "Percent: %s" % percent print "Days: %d" % days print "Verbose: %s" % verbose print if days > 0: days = -days print "Checking for a %s%% decline over the past " \ "%d days" % (percent, abs(days)) print for ticker in symbols: ticker = ticker.upper() print "Stock: %s" % ticker start_date = datetime.datetime.now() + datetime.timedelta(days) end_date = datetime.datetime.now() quotes_objects = quotes_historical_yahoo_ochl(ticker, start_date, end_date, asobject=True) max_value = round(float(max(quotes_objects.close)), 5) print "- max value over %d days: %s" % (abs(days), max_value) most_recent_close = round(float(quotes_objects.close[-1]), 5) print "- most recent close: %s" % most_recent_close percent_change = round( float(rate_of_return(max_value, most_recent_close)), 2) print "- percent change: %s%%" % percent_change target_price = max_value - (max_value * (percent / 100.0)) print "- target price: %s (%s%% below %s)" % (target_price, percent, max_value) if percent_change < -percent: print print "ALERT! %s has dropped %s%% " \ "over the last %s days" % (ticker, percent_change, abs(days)) print
def symbolsToPriceDict(symbols, startDate, endDate): """ From a list of stock symbols and a range of dates, returns a prices by symbols numpy array listing the opening prices for the stocks in the given date range. """ #add check to account for missing values in data quotes = [ list( finance.quotes_historical_yahoo_ochl(symbol, startDate, endDate, asobject=True).open) for symbol in symbols ] return dict(zip(symbols, quotes))
def symbolsToPrices(symbols, startDate, endDate): """ From a list of stock symbols and a range of dates, returns a prices by symbols numpy array listing the opening prices for the stocks in the given date range. """ #add check to account for missing values in data quotes = [ finance.quotes_historical_yahoo_ochl(symbol, startDate, endDate, asobject=True).open for symbol in symbols ] print "prices shape:" print np.array(quotes).T.shape return np.array(quotes).T
def draw_k_svg(id_str,from_date_str,to_date_str): u""" Parameters: id_str (str): - 6位数的上证股票编号 from_date_str (str): - '2016-6-20'形式的日期数据,表示起始日期 to_date_str (str): - '2016-6-20'形式的日期数据,表示结束日期 Returns: str : - svg的字符串内容 """ #设置x轴坐标刻度 mondays = WeekdayLocator(MONDAY) # 主要刻度 alldays = DayLocator() # 次要刻度 mondayFormatter = DateFormatter('%m-%d-%Y') # 如:2-29-2015 dayFormatter = DateFormatter('%d') from_date = tuple((int(i) for i in from_date_str.strip().split("-"))) to_date = tuple((int(i) for i in to_date_str.strip().split("-"))) quotes_ochl = quotes_historical_yahoo_ochl(id_str+'.ss', from_date ,to_date) fig, ax = plt.subplots() fig.subplots_adjust(bottom=0.2) ax.xaxis.set_major_locator(mondays) ax.xaxis.set_minor_locator(alldays) ax.xaxis.set_major_formatter(mondayFormatter) candlestick_ochl(ax, quotes_ochl, width=0.6, colorup='r', colordown='g') ax.xaxis_date() ax.autoscale_view() plt.setp(plt.gca().get_xticklabels(), rotation=45, horizontalalignment='right') ax.grid(True) plt.title(symbol_dict.get(id_str,u"未知")) f = BytesIO() plt.savefig("ts_o.svg", format="svg") plt.savefig(f, format="svg") value = f.getvalue() result = deal_with_svg(f) f.close() return result
def symbols(stock_symbol): today = date.today() start = (today.year , today.month, today.day - 1) # Here we have a bug. If report running on weekend, you will get error because we only minus one day which possible is NOT value market date. quotes = quotes_historical_yahoo_ochl(stock_symbol, start, today) df = pd.DataFrame(quotes) df.columns = [u'Date', u'Open',u'Close',u'High',u'Low',u'Volume'] ##### #df.to_csv('stock_%s.csv' %stock_symbol) #test = pd.read_csv('stock_FTNT.csv') #print "TEST \n " #print test ##### sum = 0 for i in xrange(df.shape[0]): #print "%.2f + %.2f = %.2f" %(sum,df['Close'][i],sum+df['Close'][i]) sum += df['Close'][i] return (sum/df.shape[0])
def getVariations(companySymbol, startDate, endDate): # Get trading information for every company defined in companies{} quotes = [ quotes_historical_yahoo_ochl(companySymbol, startDate - oneDay, endDate, asobject=True) ] # Calculate the variance in trading prices variation = [] tradingDays = [] for quote in quotes: for i in range(0, quote.open.size): # Skip the first day because we can't compare it to anything # Also skip Friday->Monday because it's too large of a time gap if (i != 0) and (quote.date[i].weekday() != 0): # Append the difference between today's opening price and yesterday's variation.append(quote.open[i] - quote.open[i - 1]) tradingDays.append(quote.date[i]) return variation, tradingDays
def PlotData(code, start, end, list): start_date = _wxdate2pydate(start) end_date = _wxdate2pydate(end) print code print start_date print end_date quotes = quotes_historical_yahoo_ochl(code, start_date, end_date) fields = ['date', 'open', 'close', 'high', 'low', 'volume'] list1 = [] for i in range(0, len(quotes)): x = date.fromordinal(int(quotes[i][0])) y = datetime.strftime(x, '%Y-%m-%d') list1.append(y) print list1 quotesdf = pd.DataFrame(quotes, index=list1, columns=fields) quotesdf = quotesdf.drop(['date'], axis=1) quotesdftemp = pd.DataFrame() print quotesdftemp for i in range(0, len(list)): quotesdftemp[list[i]] = quotesdf[list[i]] print "ready to plot" quotesdftemp.plot(marker='o')
"601668": "中国建筑", "601688": "华泰证券", "601766": "中国中车", "601800": "中国交建", "601818": "光大银行", "601857": "中国石油", "601901": "方正证券", "601988": "中国银行", "601989": "中国重工", "601998": "中信银行"} symbols, names = np.array(list(symbol_dict.items())).T #quotes = [ts.get_hist_data(symbol,start=d1,end=d2) for symbol in symbols] ## 一次最多30个 然后需要等待了 quotes = [pd.DataFrame(quotes_historical_yahoo_ochl(symbol+".ss", d1, d2),columns=['date','open','high','low','close','volume']) for symbol in symbols] for symbol in symbols: quotes.append(pd.DataFrame(quotes_historical_yahoo_ochl(symbol+".ss", d1, d2),columns=['date','open','high','low','close','volume'])) open_price = np.array([q.open.values for q in quotes]) close_price = np.array([q.close.values for q in quotes]) # 每日价格浮动包含了重要信息! variation = close_price - open_price ############################################################################### # Learn a graphical structure from the correlations edge_model = covariance.GraphLassoCV()
import matplotlib.pyplot as plt from matplotlib.dates import MONDAY from matplotlib.finance import quotes_historical_yahoo_ochl from matplotlib.dates import MonthLocator, WeekdayLocator, DateFormatter date1 = datetime.date(2002, 1, 5) date2 = datetime.date(2003, 12, 1) # every monday mondays = WeekdayLocator(MONDAY) # every 3rd month months = MonthLocator(range(1, 13), bymonthday=1, interval=3) monthsFmt = DateFormatter("%b '%y") quotes = quotes_historical_yahoo_ochl('INTC', date1, date2) if len(quotes) == 0: print('Found no quotes') raise SystemExit dates = [q[0] for q in quotes] opens = [q[1] for q in quotes] fig, ax = plt.subplots() ax.plot_date(dates, opens, '-') ax.xaxis.set_major_locator(months) ax.xaxis.set_major_formatter(monthsFmt) ax.xaxis.set_minor_locator(mondays) ax.autoscale_view() #ax.xaxis.grid(False, 'major') #ax.xaxis.grid(True, 'minor')
import datetime import numpy as np import pylab as pl from matplotlib.finance import quotes_historical_yahoo_ochl from matplotlib.dates import YearLocator, MonthLocator, DateFormatter from hmmlearn.hmm import GaussianHMM print(__doc__) ############################################################################### # Downloading the data date1 = datetime.date(1995, 1, 1) # start date date2 = datetime.date(2012, 1, 6) # end date # get quotes from yahoo finance quotes = quotes_historical_yahoo_ochl("INTC", date1, date2) if len(quotes) == 0: raise SystemExit # unpack quotes dates = np.array([q[0] for q in quotes], dtype=int) close_v = np.array([q[2] for q in quotes]) volume = np.array([q[5] for q in quotes])[1:] # take diff of close value # this makes len(diff) = len(close_t) - 1 # therefore, others quantity also need to be shifted diff = close_v[1:] - close_v[:-1] dates = dates[1:] close_v = close_v[1:]
from matplotlib.dates import MonthLocator from matplotlib.finance import quotes_historical_yahoo_ochl from matplotlib.finance import candlestick_ochl from datetime import date today = date.today() start = (today.year - 1, today.month, today.day) alldays = DayLocator() months = MonthLocator() month_formatter = DateFormatter("%b %Y") # 从财经频道下载股价数据 symbol = 'BIDU' # 百度的股票代码 quotes = quotes_historical_yahoo_ochl(symbol, start, today) # 创建figure对象,这是绘图组件的顶层容器 fig = plt.figure() # 增加一个子图 ax = fig.add_subplot(111) # x轴上的主定位器设置为月定位器,该定位器负责x轴上较粗的刻度 ax.xaxis.set_major_locator(months) # x轴上的次定位器设置为日定位器,该定位器负责x轴上较细的刻度 ax.xaxis.set_minor_locator(alldays) # x轴上的主格式化器设置为月格式化器,该格式化器负责x轴上较粗刻度的标签 ax.xaxis.set_major_formatter(month_formatter) # 使用matplotlib.finance包的candlestick函数绘制k线图 candlestick_ochl(ax, quotes) # 将x轴上的标签格式化为日期
#from numpy import vstack #import numpy from scipy.cluster.vq import kmeans, vq from matplotlib.finance import quotes_historical_yahoo_ochl from datetime import datetime start = datetime(2014, 7, 1) end = datetime(2014, 9, 30) listDji = [ 'AXP', 'BA', 'CAT', 'CSCO', 'CVX', 'DD', 'DIS', 'GE', 'GS', 'HD', 'IBM', 'INTC', 'JNJ', 'JPM', 'KO', 'MCD', 'MMM', 'MRK', 'MSFT', 'NKE', 'PFE', 'PG', 'T', 'TRV', 'UNH', 'UTX', 'V', 'VZ', 'WMT', 'XOM' ] quotes = [[0 for col in range(90)] for row in range(30)] listTemp = [[0 for col in range(90)] for row in range(30)] for i in range(30): quotes[i] = quotes_historical_yahoo_ochl(listDji[i], start, end) days = len(quotes[0]) for i in range(30): for j in range(days - 1): if (quotes[i][j][2] and quotes[i][j + 1][2] and (quotes[i][j + 1][2] >= quotes[i][j][2])): listTemp[i][j] = 1.0 else: listTemp[i][j] = -1.0 data = vstack(listTemp) centroids, _ = kmeans(data, 4) #float or double is supported result, _ = vq(data, centroids) print result
# Technical indicator plots are shown last ax = subplots[i - len(technicals)] ax.plot(x, technical) if i < len(technicals_titles): ax.set_title(technicals_titles[i]) plt.show() fig.savefig(stock + '.png', facecolor=fig.get_facecolor()) for stock in stocklist: print("Fetching", stock) columns = ['date', 'open_price', 'close_price', 'high', 'low', 'volume'] ticker = quotes_historical_yahoo_ochl(stock, start, today) list1 = [] for i in range(0, len(ticker)): x = date.fromordinal(int(ticker[i][0])) y = date.strftime(x, '%Y-%m-%d') list1.append(y) day_pricing = pd.DataFrame(ticker, index=list1, columns=columns) # day_pricing = day_pricing.drop(['date'], axis=1) last_hour = day_pricing[-100:] openp = last_hour['open_price'].as_matrix() highp = last_hour['high'].as_matrix() lowp = last_hour['low'].as_matrix() closep = last_hour['close_price'].as_matrix()
import numpy as np import pylab as pl from matplotlib.finance import quotes_historical_yahoo_ochl from matplotlib.dates import YearLocator, MonthLocator, DateFormatter import matplotlib.pyplot as plt #from sklearn.hmm import GaussianHMM print(__doc__) ############################################################################### # Downloading the data date1 = datetime.date(1995, 1, 1) # start date date2 = datetime.date(2012, 1, 6) # end date # get goog quotes from yahoo finance goog_quotes = quotes_historical_yahoo_ochl("GOOG", date1, date2) if len(goog_quotes) == 0: raise SystemExit # get yahoo quotes from yahoo finance yhoo_quotes = quotes_historical_yahoo_ochl("YHOO", date1, date2) if len(yhoo_quotes) == 0: raise SystemExit # unpack goog quotes goog_dates = np.array([q[0] for q in goog_quotes], dtype=int) goog_close_v = np.array([q[2] for q in goog_quotes]) goog_volume = np.array([q[5] for q in goog_quotes])[1:] # unpack yhoo quotes yhoo_dates = np.array([q[0] for q in yhoo_quotes], dtype=int)
# coding:utf-8 """ pandas 作图 """ from matplotlib.finance import quotes_historical_yahoo_ochl # 注matplotlib包里已经没有了quotes_historical_yahoo方法了,改为quotes_historical_yahoo_ochl from datetime import date from datetime import datetime import pandas as pd import numpy as np import time import matplotlib.pyplot as plt # 使用pylab 就直接import pylab as pl today = date.today() start = (today.year - 1, today.month, today.day) quotes = quotes_historical_yahoo_ochl('AXP', start, today) #美国运通公司最近一年股票代码 fields = ['date', 'open', 'close', 'high', 'low', 'volume'] list1 = [] for i in range(0, len(quotes)): x = date.fromordinal(int(quotes[i][0])) y = datetime.strftime(x, "%Y-%m-%d") list1.append(y) qutoesdf = pd.DataFrame(quotes, index=list1, columns=fields) # 利用index属性可以将索引改变。 日期为格里高利时间,用函数改变 qutoesdf = qutoesdf.drop(['date'], axis=1) # print qutoesdf # 统计近一年每个月的股票开盘天数 listtemp = [] for i in range(0, len(qutoesdf)):
import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt import matplotlib.finance as mpf start = (2014, 5, 1) end = (2014, 6, 30) ''' Nowadays, a couple of Python libraries provide convenience functions to retrieve data from Yahoo! Finance. Be aware that, although this is a convenient way to visualize financial data sets, the data quality is not sufficient to base any important investment decision on it. For example, stock splits, leading to “price drops,” are often not correctly accounted for in the data provided by Yahoo! Finance. This holds true for a number of other freely available data sources as well. ''' quotes = mpf.quotes_historical_yahoo_ochl('^GDAXI', start, end) quotes[:2] fig, ax = plt.subplots(figsize=(8, 5)) fig.subplots_adjust(bottom=0.2) mpf.candlestick_ochl(ax, quotes, width=0.6, colorup='b', colordown='r') plt.grid(True) ax.xaxis_date() # dates on the x-axis ax.autoscale_view() plt.setp(plt.gca().get_xticklabels(), rotation=30) fig, ax = plt.subplots(figsize=(8, 5)) mpf.plot_day_summary2_ochl(ax, quotes, ticksize=4, colorup='k', colordown='r') plt.grid(True) ax.xaxis_date()
#stock max from matplotlib.finance import quotes_historical_yahoo_ochl from datetime import datetime from datetime import date import pandas as pd import matplotlib.pyplot as plt import numpy as np import time start = datetime(2014,1,1) end = datetime(2014,12,31) quotesMS = quotes_historical_yahoo_ochl('MSFT', start, end) quotesIntl = quotes_historical_yahoo_ochl('INTC', start, end) fields = ['date','open','close', 'high', 'low', 'volume'] #quotedfMS = pd.DataFrame(quotesMS, columns= fields) list1 = [] for i in range(0, len(quotesMS)): x = date.fromordinal(int(quotesMS[i][0])) y = date.strftime(x, '%Y-%m-%d') list1.append(y) list2 = [] for i in range(0, len(quotesIntl)): x = date.fromordinal(int(quotesIntl[i][0])) y = date.strftime(x, '%Y-%m-%d') list2.append(y) quotedfMS = pd.DataFrame(quotesMS, index = list1, columns= fields) quotedfMS = quotedfMS.drop(['date'], axis = 1) quotedfIntl = pd.DataFrame(quotesIntl, index = list2, columns= fields) quotedfIntl = quotedfIntl.drop(['date'], axis = 1)
from matplotlib.dates import MonthLocator from matplotlib.finance import quotes_historical_yahoo_ochl from matplotlib.finance import candlestick_ochl import sys from datetime import date today = date.today() start = (today.year - 1, today.month, today.day) alldays = DayLocator() months = MonthLocator() month_formatter = DateFormatter("%b %Y") # 从财经频道下载股价数据 symbol = 'BIDU' # 百度的股票代码 quotes = quotes_historical_yahoo_ochl(symbol, start, today) # 创建figure对象,这是绘图组件的顶层容器 fig = plt.figure() # 增加一个子图 ax = fig.add_subplot(111) # x轴上的主定位器设置为月定位器,该定位器负责x轴上较粗的刻度 ax.xaxis.set_major_locator(months) # x轴上的次定位器设置为日定位器,该定位器负责x轴上较细的刻度 ax.xaxis.set_minor_locator(alldays) # x轴上的主格式化器设置为月格式化器,该格式化器负责x轴上较粗刻度的标签 ax.xaxis.set_major_formatter(month_formatter) # 使用matplotlib.finance包的candlestick函数绘制k线图 candlestick_ochl(ax, quotes) # 将x轴上的标签格式化为日期
'MMM', 'MRK', 'MSFT', 'NKE', 'PFE', 'PG', 'T', 'TRV', 'UNH', 'UTX', 'V', 'VZ', 'WMT', 'XOM'] quotes = [[0 for col in range(90)] for row in range(30)] listTemp = [[0 for col in range(90)] for row in range(30)] for i in range(30): quotes[i] = quotes_historical_yahoo_ochl(listDji[i], start, end) days = len(quotes[0]) for i in range(30): for j in range(days - 1): if (quotes[i][j][2] and quotes[i][j + 1][2] and (quotes[i][j + 1][2] >= quotes[i][j][2])): listTemp[i][j] = 1.0 else: listTemp[i][j] = -1.0 data = vstack(listTemp) centroids, _ = kmeans(data, 4) # float or double is supported result, _ = vq(data, centroids) print(result)
import datetime import numpy as np import matplotlib.pyplot as plt from hmmlearn.hmm import GaussianHMM try: from matplotlib.finance import quotes_historical_yahoo_ochl except ImportError: from matplotlib.finance import quotes_historical_yahoo as quotes_historical_yahoo_ochl # Get quotes from Yahoo finance quotes = quotes_historical_yahoo_ochl("INTC", datetime.date(1994, 4, 5), datetime.date(2015, 7, 3)) # Extract the required values dates = np.array([quote[0] for quote in quotes], dtype=np.int) closing_values = np.array([quote[2] for quote in quotes]) volume_of_shares = np.array([quote[5] for quote in quotes])[1:] # Take diff of closing values and computing rate of change diff_percentage = 100.0 * np.diff(closing_values) / closing_values[:-1] dates = dates[1:] # Stack the percentage diff and volume values column-wise for training X = np.column_stack([diff_percentage, volume_of_shares]) # Create and train Gaussian HMM print "\nTraining HMM...." model = GaussianHMM(n_components=5, covariance_type="diag", n_iter=1000) model.fit(X)
import time from matplotlib.finance import quotes_historical_yahoo_ochl from datetime import date from datetime import datetime import pandas as pd import matplotlib.pyplot as plt import pylab as pl import numpy as np start = datetime(2014, 1, 1) end = datetime(2014, 12, 31) quotesMSFT = quotes_historical_yahoo_ochl("MSFT", start, end) quotesINTC = quotes_historical_yahoo_ochl("INTC", start, end) fields = ["date", "open", "close", "high", "low", "volume"] # quotesdf = pd.DataFrame(quotes, columns = fields) # quotesdf = pd.DataFrame(quotes, index = range(1,len(quotes)+1),columns = fields) list1 = [] for i in range(0, len(quotesMSFT)): x = date.fromordinal(int(quotesMSFT[i][0])) y = datetime.strftime(x, "%Y-%m-%d") list1.append(y) # print list1 list2 = [] for i in range(0, len(quotesINTC)): x = date.fromordinal(int(quotesINTC[i][0])) y = datetime.strftime(x, "%Y-%m-%d") list2.append(y) quotesmsftdf = pd.DataFrame(quotesMSFT, index=list1, columns=fields) print "----------------", type(quotesmsftdf["open"]) quotesmsftdf = quotesmsftdf.drop(["date"], axis=1)
try: from matplotlib.finance import quotes_historical_yahoo_ochl except ImportError: # For Matplotlib prior to 1.5. from matplotlib.finance import ( quotes_historical_yahoo as quotes_historical_yahoo_ochl ) from hmmlearn.hmm import GaussianHMM print(__doc__) ############################################################################### # Get quotes from Yahoo! finance quotes = quotes_historical_yahoo_ochl( "INTC", datetime.date(1995, 1, 1), datetime.date(2012, 1, 6)) # Unpack quotes dates = np.array([q[0] for q in quotes], dtype=int) close_v = np.array([q[2] for q in quotes]) volume = np.array([q[5] for q in quotes])[1:] # Take diff of close value. Note that this makes # ``len(diff) = len(close_t) - 1``, therefore, other quantities also # need to be shifted by 1. diff = np.diff(close_v) dates = dates[1:] close_v = close_v[1:] # Pack diff and volume for training. X = np.column_stack([diff, volume])
def myindex(): "Basic data analysis of one stock or index." myquote = input('Enter your interested symbol (example:GE ^NY):') # Yahoo Finance 数据接口 today = date.today() start = (today.year - 5, today.month, today.day) aapl = quotes_historical_yahoo_ochl(myquote, start, today) aapldf = pd.DataFrame(aapl) #print(df) # 一、 数据清洗 # 加columns & Index属性 fields = ['Date', 'Open', 'Close', 'High', 'Low', 'Volume'] aapldf = pd.DataFrame(aapl, index=range(1, len(aapl) + 1), columns=fields) #quotesdf.head() #日期格式处理 #firstday=date.fromordinal(735883) #加columns & Index属性 + 改变yahoo财经数据的日期格式 list1 = [] for i in range(0, len(aapl)): x1 = date.fromordinal(int(aapl[i][0])) #转化成常规时间 x2 = date.strftime(x1, '%Y-%m-%d') #转化成固定格式 list1.append(x2) aapldf = pd.DataFrame(aapl, index=list1, columns=fields) aapldf.drop(['Date'], axis=1) aapldret = aapldf['Close'] / aapldf['Open'] - 1 aapldf['DRet'] = pd.Series(aapldret, index=aapldf.index) # 二、 plotting画直方图以及概率分布 bins = ([ -0.06, -0.05, -0.04, -0.03, -0.02, -0.01, 0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06 ]) #plt.hist(aapldf['DRet'],bins,normed=1, histtype='bar', facecolor='green', rwidth=1) #plt.show() # Fit a normal distribution to the data: mu, sigma = norm.fit(aapldf.DRet) # Plot the histogram and fitted line. plt.hist(aapldf.DRet, bins, normed=True, alpha=0.6, color='green', rwidth=1) # Plot the PDF. xmin, xmax = plt.xlim() x = np.linspace(xmin, xmax, 100) p = norm.pdf(x, mu, sigma) plt.plot(x, p, 'k', linewidth=1) title = "Fit results: $\mu = %.4f$, $\sigma = %.4f$" % (mu, sigma) plt.title(title) plt.show() # 三、 数据分析 print('1) Statistics', '\n') print(aapldf.describe().T, '\n') print('The autocorrelation of daily return is %.4f \n' % pd.Series.autocorr(aapldf.DRet, lag=1)) #统计股票涨价的每个月天数据 list1 = [] tmpdf = aapldf[:] for i in range(0, len(tmpdf)): list1.append(tmpdf.index[i][:7]) # list1.append(int(tmpdf.index[i][:4])*100+int(tmpdf.index[i][5:7])) tmpdf['YearMon'] = list1 print('2) Stock increase summary', '\n') print('The days when the stock increase in a month:\n') print(tmpdf[tmpdf.Close > tmpdf.Open]['YearMon'].value_counts().head(3)) #统计股票每个月的volatility。 print('\n') print('3) Volatility by month', '\n') print('The monthly volatility of the stock returns:\n') # 加columns & Index属性 #voldf=pd.DataFrame(tmpdf.groupby('YearMon')['DRet'].std(),index=range(1,tmpdf['YearMon'].value_counts()+1), fields=['YM','Vol']) voldf = tmpdf.groupby('YearMon')['DRet'].std() print(voldf.head(3)) plt.figure() voldf.plot(figsize=(6, 3), title='Monthly Volatility', grid=True, legend=True) plt.show()
# coding:utf-8 """ 求微软公司(MSFT)2016年第一季股票收盘价平均值 """ from matplotlib.finance import quotes_historical_yahoo_ochl from datetime import date import pandas as pd today = date.today() # 获取今天日期 start = (today.year-2, today.month, today.day) # 设置开始时间 2年前 quotesMS = quotes_historical_yahoo_ochl('MSFT', start, today) # 获取数据 attributes=['date','open','close','high','low','volume'] # 添加属性值 quotesdfMS = pd.DataFrame(quotesMS, columns= attributes) # 生成DataFrame格式数据 list = [] for i in range(0, len(quotesMS)): x = date.fromordinal(int(quotesMS[i][0])) # 将格里高利历转换为时间 y = date.strftime(x, '%y/%m/%d') list.append(y) quotesdfMS.index = list # 将时间变成数据的索引值 quotesdfMS = quotesdfMS.drop(['date'], axis = 1) # 去掉原有的日期项 list = [] quotesdfMS16 = quotesdfMS['16/01/01':'16/04/01'] # 选择第一季度数据 for i in range(0, len(quotesdfMS16)): list.append(int(quotesdfMS16.index[i][3:5])) #get month just like '02' quotesdfMS16['month'] = list print(quotesdfMS16.groupby('month').mean().close)
""" get google's stock exchange data using matplotlib """ from matplotlib.finance import quotes_historical_yahoo_ochl from datetime import date, datetime, timedelta import pandas as pd today = date.today() start = today - timedelta(days=365) quotes = quotes_historical_yahoo_ochl("GOOG", start, today) fields = ["date", "open", "close", "high", "low", "volume"] # convert date format dates = [] for i in range(0, len(quotes)): x = date.fromordinal(int(quotes[i][0])) y = datetime.strftime(x, "%Y-%m-%d") dates.append(y) # set dates to index quotesdf = pd.DataFrame(quotes, index=dates, columns=fields) quotesdf = quotesdf.drop(["date"], axis=1) # print # print quotesdf # print quotesdf[u'2014-12-02' : u'2014-12-09'] # print quotesdf.loc[1:5, ] # [row, col] # print quotesdf.loc[:, ['low', 'volume']] print quotesdf[(quotesdf.index >= u"2015-01-30") & (quotesdf.close > 600)] # group (example) # g = tempdf.groupby('month') # gvolume = g['volume'] # print gvolume.sum()
This example requires an active internet connection since it uses yahoo finance to get the data for plotting """ import matplotlib.pyplot as plt from matplotlib.finance import quotes_historical_yahoo_ochl from matplotlib.dates import YearLocator, MonthLocator, DateFormatter import datetime date1 = datetime.date(1995, 1, 1) date2 = datetime.date(2004, 4, 12) years = YearLocator() # every year months = MonthLocator() # every month yearsFmt = DateFormatter('%Y') quotes = quotes_historical_yahoo_ochl( 'INTC', date1, date2) if len(quotes) == 0: raise SystemExit dates = [q[0] for q in quotes] opens = [q[1] for q in quotes] fig, ax = plt.subplots() ax.plot_date(dates, opens, '-') # format the ticks ax.xaxis.set_major_locator(years) ax.xaxis.set_major_formatter(yearsFmt) ax.xaxis.set_minor_locator(months) ax.autoscale_view()
Created on Wed Jul 6 21:07:16 2016 @author: liulei """ ''' 时间序列 ''' from matplotlib.finance import quotes_historical_yahoo_ochl from datetime import date import pandas as pd today = date.today() start = (today.year-1, today.month, today.day) quotes = quotes_historical_yahoo_ochl('AXP', start , today) fields = ['date', 'open', 'close', 'high','low','volume'] list1=[] for i in range(0, len(quotes)): x= date.fromordinal(int(quotes[i][0])) y = date.strftime(x,'%Y-%m-%d') list1.append(y) quotesdf = pd.DataFrame(quotes, index = list1, columns=fields) quotesdf = quotesdf.drop(['date'], axis=1) #print quotesdf
import time from matplotlib.finance import quotes_historical_yahoo_ochl from datetime import date from datetime import datetime import pandas as pd import matplotlib.pyplot as plt import pylab as pl import numpy as np start = datetime(2014, 1, 1) end = datetime(2014, 12, 31) quotesMS14 = quotes_historical_yahoo_ochl("MSFT", start, end) fields = ["date", "open", "close", "high", "low", "volume"] list1 = [] for i in range(0, len(quotesMS14)): x = date.fromordinal(int(quotesMS14[i][0])) y = datetime.strftime(x, "%Y-%m-%d") list1.append(y) # print list1 quotesdfMS14 = pd.DataFrame(quotesMS14, index=list1, columns=fields) # print quotesMS14 listtemp1 = [] for i in range(0, len(quotesdfMS14)): temp = time.strptime(quotesdfMS14.index[i], "%Y-%m-%d") listtemp1.append(temp.tm_mon) # print listtemp1 quotesdfMS14["month"] = listtemp1 # print quotesdfMS14 # closemaxINTC = quotesdfMS14.groupby('month').max().close openMS = quotesdfMS14.groupby("month").mean().open listopen = []
From a list of stock symbols and a range of dates, returns a prices by symbols numpy array listing the opening prices for the stocks in the given date range. """ #add check to account for missing values in data quotes = [list(finance.quotes_historical_yahoo_ochl(symbol, startDate, endDate,asobject = True).open) for symbol in symbols] return dict(zip(symbols,quotes)) if __name__ == "__main__": disk_engine = create_engine('sqlite:///returnData.db') symbols = ['COP', 'AXP', 'RTN', 'BA', 'AAPL', 'PEP', 'NAV', 'GSK', 'MSFT', 'KMB', 'R', 'SAP', 'GS', 'CL', 'WMT', 'GE', 'SNE', 'PFE', 'AMZN', 'MAR', 'NVS', 'KO', 'MMM', 'CMCSA', 'SNY', 'IBM', 'CVX', 'WFC', 'DD', 'CVS', 'TOT', 'CAT', 'CAJ', 'BAC', 'WBA', 'AIG', 'TWX', 'HD', 'TXN', 'VLO', 'F', 'CVC', 'TM', 'PG', 'LMT', 'HMC', 'GD', 'HPQ', 'MTU', 'XRX', 'YHOO', 'XOM', 'MCD', 'CSCO', 'NOC', 'MDLZ','ORCL','INTC','BP','EBAY'] startDate = dt.datetime(2004, 1, 1) endDate = dt.datetime(2014, 1, 1) #create price matrix prices = symbolsToPriceDict(symbols,startDate,endDate) #pdb.set_trace() #find list of dates for trading days. initially we'll assume all assets trade on all days #so we pull dates frome the first symbol #in the future we need to find all trading dates and fill in missing data for symbols with misssing days dates = list(finance.quotes_historical_yahoo_ochl(symbols[0], startDate, endDate,asobject = True).date) df = pd.DataFrame(data = prices, columns = symbols, index = dates) df.to_sql('returns', disk_engine)
def stockmarket(): "Calculate the correlation between stock and market of your choice." astock = input('Enter your interested symbol (example:GE):') amarket = input('Enter the relative market index (example:^NY):') # Yahoo Finance 数据接口 today = date.today() start = (today.year - 2, today.month, today.day) astock = quotes_historical_yahoo_ochl(astock, start, today) astockdf = pd.DataFrame(astock) amarket = quotes_historical_yahoo_ochl(amarket, start, today) amarketdf = pd.DataFrame(amarket) #print(df) # 一、 数据清洗 # 加columns & Index属性 fields = ['Date', 'Open', 'Close', 'High', 'Low', 'Volume'] #加columns & Index属性 + 改变yahoo财经数据的日期格式 list1 = [] for i in range(0, len(astock)): x1 = date.fromordinal(int(astock[i][0])) #转化成常规时间 x2 = date.strftime(x1, '%Y-%m-%d') #转化成固定格式 list1.append(x2) astockdf = pd.DataFrame(astock, index=list1, columns=fields) astockdf.drop(['Date'], axis=1) amarketdf = pd.DataFrame(amarket, index=list1, columns=fields) amarketdf.drop(['Date'], axis=1) astockret = astockdf['Close'] / astockdf['Open'] - 1 amarketret = amarketdf['Close'] / amarketdf['Open'] - 1 astockdf['DRet'] = pd.Series(astockret, index=astockdf.index) amarketdf['DRet'] = pd.Series(amarketret, index=amarketdf.index) s = astockdf['DRet'] m = amarketdf['DRet'] print( '\n', 'The correlation between Stock and Market is: %.4f.' % np.corrcoef(s, m)[0][1]) # 二、 plotting画直方图以及概率分布 bins = ([ -0.06, -0.05, -0.04, -0.03, -0.02, -0.01, 0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06 ]) #plt.hist(aapldf['DRet'],bins,normed=1, histtype='bar', facecolor='green', rwidth=1) #plt.show() # Fit a normal distribution to the data: astockmu, astocksigma = norm.fit(astockdf.DRet) amarketmu, amarketsigma = norm.fit(amarketdf.DRet) #Stock # Plot the histogram and fitted line. plt.hist(astockdf.DRet, bins, normed=True, alpha=0.6, color='green', rwidth=1) # Plot the PDF. xmin, xmax = plt.xlim() sx = np.linspace(xmin, xmax, 100) sp = norm.pdf(sx, astockmu, astocksigma) plt.plot(sx, sp, 'k', linewidth=1) titles = "Stock fit results: $\mu = %.4f$, $\sigma = %.4f$" % ( astockmu, astocksigma) plt.title(titles) plt.show() #Market # Fit a normal distribution to the data: # Plot the histogram and fitted line. plt.hist(amarketdf.DRet, bins, normed=True, alpha=0.6, color='blue', rwidth=1) # Plot the PDF. xmin, xmax = plt.xlim() mx = np.linspace(xmin, xmax, 100) mp = norm.pdf(mx, amarketmu, amarketsigma) plt.plot(mx, mp, 'k', linewidth=1) titlem = "Market fit results: $\mu = %.4f$, $\sigma = %.4f$" % ( amarketmu, amarketsigma) plt.title(titlem) plt.show() # 三、 数据分析 print('============For Stock============', '\n') print('1) Statistics', '\n') print(astockdf.describe().T, '\n') print('The autocorrelation of daily return is %.4f \n' % pd.Series.autocorr(astockdf.DRet, lag=1)) #统计股票涨价的每个月天数据 list1 = [] tmpdf = astockdf[:] for i in range(0, len(tmpdf)): list1.append(tmpdf.index[i][:7]) # list1.append(int(tmpdf.index[i][:4])*100+int(tmpdf.index[i][5:7])) tmpdf['YearMon'] = list1 print('2) Stock increase summary', '\n') print('The days when the stock increase in a month:\n') print(tmpdf[tmpdf.Close > tmpdf.Open]['YearMon'].value_counts().head(3)) #统计股票每个月的volatility。 print('\n') print('3) Volatility by month', '\n') print('The monthly volatility of the stock returns:\n') # 加columns & Index属性 #voldf=pd.DataFrame(tmpdf.groupby('YearMon')['DRet'].std(),index=range(1,tmpdf['YearMon'].value_counts()+1), fields=['YM','Vol']) voldf = tmpdf.groupby('YearMon')['DRet'].std() print(voldf.head(3)) plt.figure() voldf.plot(figsize=(6, 3), title='Stock Monthly Volatility', color='green', grid=True, legend=True) plt.show() # 三、 数据分析 print('============For Market============', '\n') print('1) Statistics', '\n') print(amarketdf.describe().T, '\n') print('The autocorrelation of daily return is %.4f \n' % pd.Series.autocorr(amarketdf.DRet, lag=1)) #统计股票涨价的每个月天数据 list1 = [] tmpdf = amarketdf[:] for i in range(0, len(tmpdf)): list1.append(tmpdf.index[i][:7]) # list1.append(int(tmpdf.index[i][:4])*100+int(tmpdf.index[i][5:7])) tmpdf['YearMon'] = list1 print('2) Stock increase summary', '\n') print('The days when the stock increase in a month:\n') print(tmpdf[tmpdf.Close > tmpdf.Open]['YearMon'].value_counts().head(3)) #统计股票每个月的volatility。 print('\n') print('3) Volatility by month', '\n') print('The monthly volatility of the stock returns:\n') # 加columns & Index属性 #voldf=pd.DataFrame(tmpdf.groupby('YearMon')['DRet'].std(),index=range(1,tmpdf['YearMon'].value_counts()+1), fields=['YM','Vol']) voldf = tmpdf.groupby('YearMon')['DRet'].std() print(voldf.head(3)) plt.figure() voldf.plot(figsize=(6, 3), title='Market Monthly Volatility', color='blue', grid=True, legend=True) plt.show()
def predictions_mls(filename, company, refcompany, dt1, dt2, num_of_states, test_num): # Generate samples starting in the most likely actual current state days_future = 365 model = joblib.load(filename) quotes = quotes_historical_yahoo_ochl(company, dt1, dt2) dates = np.array([q[0] for q in quotes], dtype=int) close_v = np.array([q[2] for q in quotes]) volume = np.array([q[5] for q in quotes])[1:] # Take diff of close value. Note that this makes # len(diff) = len(close_t) - 1 therefore, other quantities also need to be shifted by 1 diff = np.diff(close_v) dates = dates[1:] close_v = close_v[1:] # Unpack quotes Company2 quotes2 = quotes_historical_yahoo_ochl(refcompany, dt1, dt2) close_v2 = np.array([q[2] for q in quotes2]) diff2 = np.diff(close_v2) close_v2 = close_v2[1:] #print (diff2.shape) delta = diff2.shape[0] - diff.shape[0] delta = abs(delta) diff0 = np.pad(diff, (delta, 0), mode='constant', constant_values=0) close_v = np.pad(close_v, (delta, 0), mode='constant', constant_values=0) #print (diff.shape) #print (diff0.shape) X = np.column_stack([diff0, diff2]) # Predict the most likely current internal hidden state hidden_probs = model.predict_proba(X) lstate_prob = hidden_probs[-1] days = int(days_future // total2active) # 251 open market days in a year print(days, strftime("%Y-%m-%d %H:%M:%S", gmtime())) #debugging purposes if (num_of_states > 1): startprob = np.zeros(num_of_states) startprob[lstate_prob.argmax()] = 1.0 else: startprob = [1.] model_2_sample = GaussianHMM(n_components=num_of_states, covariance_type="full") model_2_sample.startprob_ = startprob model_2_sample.transmat_ = model.transmat_ model_2_sample.means_ = model.means_ model_2_sample.covars_ = model.covars_ random.seed() rseed = random.randrange(0, max_int_value) X, Z = model_2_sample.sample(days, random_state=rseed) avg_prediction = 0 allpredictions = np.zeros((test_num, yr)) for test in range(test_num): final_price = close_v[-1] j = 0 for i in range(days): if ((final_price + X[i][0]) > 0): final_price += X[i][0] if (j > 1 and i % 5 == 0): allpredictions[test][j] = final_price allpredictions[test][j + 1] = final_price allpredictions[test][j + 2] = final_price j = j + 3 else: allpredictions[test][j] = final_price j = j + 1 while (j < allpredictions.shape[1]): allpredictions[test][j] = final_price j = j + 1 rseed = random.randrange(0, max_int_value) X, Z = model_2_sample.sample(days, random_state=rseed) predictions_year = allpredictions.mean(axis=0) print("Avg. Prediction: ", predictions_year[-1]) fname = "Year_of_predictions_" + str(company) + "_States_" + str( num_of_states) + "_adv.csv" fname = os.path.join('./sims3', fname) np.savetxt(fname, predictions_year, delimiter=",") return allpredictions[:, days_future - 2], allpredictions[:, (days_future - 2) / 4], allpredictions[:, (days_future - 2) / 36]
if len(sys.argv) < 4 : print "Usage : python stock_draw.py code contry fontpath." print "eg : python stock_draw.py AAPL US D:\Dev\stockdb\ipagp.ttf" raise SystemExit symbol = sys.argv[1] contry = sys.argv[2] fontpath = sys.argv[3] # (Year, month, day) tuples suffice as args for quotes_historical_yahoo date0 = (2016, 1, 1) date1 = (2016, 3, 1) date2 = (2016, 6, 1) start = '2016-01-01' quotes = quotes_historical_yahoo_ochl(symbol, date1, date2) if len(quotes) == 0: raise SystemExit ds, opens, closes, highs, lows, volumes = zip(*quotes) days = len(closes) ''' idx = pd.Index(closes) vales = np.arange(len(idx)).astype(float) s = Series(vales, index=idx) # SMA EMA sma5 = pd.rolling_mean(s, window=5) sma5 = sma5.dropna() ewma = pd.stats.moments.ewma
symbols_all, names_all = stock_list.Symbol.values, stock_list.Name.values print symbols_all print(type(symbols_all)) # print 'this are the symbols' # print symbols # print 'this are the names' # print names ticker_index = 1 for symbol in symbols_all: # print 'starting...', ticker_index quotes = [finance.quotes_historical_yahoo_ochl(symbol, d1, d2, asobject=True)] # print 'working...', ticker_index # ticker_index = ticker_index + 1 # quotes = [finance.quotes_historical_yahoo_ochl(symbol, d1, d2, asobject = True) for symbol in symbols_all] # print quotes open = np.array([q.open for q in quotes]).astype(np.float) # print open close = np.array([q.close for q in quotes]).astype(np.float) # print close variation = close - open # print variation
symbols_all, names_all = stock_list.Symbol.values, stock_list.Name.values print symbols_all print(type(symbols_all)) # print 'this are the symbols' # print symbols # print 'this are the names' # print names ticker_index = 1 for symbol in symbols_all: # print 'starting...', ticker_index quotes = [ finance.quotes_historical_yahoo_ochl(symbol, d1, d2, asobject=True) ] # print 'working...', ticker_index # ticker_index = ticker_index + 1 # quotes = [finance.quotes_historical_yahoo_ochl(symbol, d1, d2, asobject = True) for symbol in symbols_all] # print quotes open = np.array([q.open for q in quotes]).astype(np.float) # print open close = np.array([q.close for q in quotes]).astype(np.float) # print close variation = close - open
Created on Wed Dec 02 11:25:23 2015 @author: welion """ #Filename:quotes.py #Get Dow Jones Industrial Avarage from Yahoo with quotes_historical_yahoo #!/bin/python from matplotlib.finance import quotes_historical_yahoo_ochl from datetime import date from datetime import datetime import pandas as pd today = date.today() start = (today.year,today.month-3,today.day) quotes = quotes_historical_yahoo_ochl('AXP',start,today,asobject=False) fields = ['date','open','close','high','low','volume'] timelist=[] for i in range(0,len(quotes)): x = date.fromordinal(int(quotes[i][0])) y = datetime.strftime(x,'%Y%m%d') timelist.append(y) quotesdf = pd.DataFrame(quotes,index=timelist,columns=fields) quotesdf = quotesdf.drop(['date'],axis=1) print quotesdf
""" 数据的简单处理与筛选 """ from matplotlib.finance import quotes_historical_yahoo_ochl # 注matplotlib包里已经没有了quotes_historical_yahoo方法了,改为quotes_historical_yahoo_ochl from datetime import date from datetime import datetime import pandas as pd import numpy as np import time today = date.today() start = (today.year - 5, today.month, today.day) quotes = quotes_historical_yahoo_ochl('AXP', start, today) #美国运通公司最近一年股票代码 fields = ['date', 'open', 'close', 'high', 'low', 'volume'] list1 = [] for i in range(0,len(quotes)): x = date.fromordinal(int(quotes[i][0])) y = datetime.strftime(x, "%Y-%m-%d") list1.append(y) qutoesdf = pd.DataFrame(quotes, index=list1, columns=fields) # 利用index属性可以将索引改变。 日期为格里高利时间,用函数改变 qutoesdf = qutoesdf.drop(['date'], axis = 1) # print qutoesdf #求平均值 print qutoesdf.mean(columns='close') #求开盘价大于80的成交量
def hist_stock(symbol, start_date, end_date): # returns array of tuples (date, year, month, day, d, open, close, high, low, volume, adjusted_close) stock = [(x[0], x[6]) for x in fin.quotes_historical_yahoo_ochl( symbol, start_date, end_date, asobject=True)] return stock
from matplotlib.finance import quotes_historical_yahoo_ochl # retrieve symbol lists markets = ['amex','nasdaq','nyse','otcbb'] symbols = [] for m in markets: fname = 'symbols-' + m + '-unique.txt' with open(fname, 'r') as f: symbols += f.read().splitlines() print len(symbols), 'symbols listed' exit # set date range date1 = date(1984, 1, 1) date2 = date(2014, 12, 31) # date2 = date.today() # date1 = date2 - timedelta(days=14) # retrieve all data for symbol in symbols: try: data = quotes_historical_yahoo_ochl(symbol, date1, date2) if None != data and len(data) > 0: print symbol, len(data) with open('csv/' + symbol + '.csv', 'w') as f: writer = csv.writer(f) writer.writerows(data) except: True
#!/usr/bin/env python # -*- coding: utf-8 -*- import time from matplotlib.finance import quotes_historical_yahoo_ochl from datetime import date from datetime import datetime import pandas as pd import matplotlib.pyplot as plt __author__ = 'wangjj' __mtime__ = '20161022下午 11:39' today = date.today() start = (today.year - 1, today.month, today.day) quotes = quotes_historical_yahoo_ochl('KO', start, today) fields = ['date', 'open', 'close', 'high', 'low', 'volume'] list1 = [] for i in range(0, len(quotes)): x = date.fromordinal(int(quotes[i][0])) y = datetime.strftime(x, '%Y-%m-%d') list1.append(y) # print(list1) quoteskodf = pd.DataFrame(quotes, index=list1, columns=fields) quoteskodf = quoteskodf.drop(['date'], axis=1) # print(quotesdf) listtemp = [] for i in range(0, len(quoteskodf)): temp = time.strptime(quoteskodf.index[i], "%Y-%m-%d") listtemp.append(temp.tm_mon) print(listtemp) # “print listtemp” in Python 2.x tempkodf = quoteskodf.copy() tempkodf['month'] = listtemp closeMeansKO = tempkodf.groupby('month').mean().close
def predictions_mls(filename, company, dt1, dt2,num_of_states,test_num, days_future, tr_prob): # Generate samples starting in the most likely actual current state model = joblib.load(filename) rp = getrealprice_series(company, dt2,days_future) days = rp.size quotes = quotes_historical_yahoo_ochl(company, dt1, dt2) dates = np.array([q[0] for q in quotes], dtype=int) close_v = np.array([q[2] for q in quotes]) # Take diff of close value and shift by 1 diff = np.diff(close_v) dates = dates[1:] close_v = close_v[1:] X = np.column_stack([diff]) # Predict the most likely current internal hidden state hidden_probs = model.predict_proba(X) lstate_prob = hidden_probs[-1] # If more than one state, make sure we start at the most likely current state if (num_of_states>1): startprob = np.zeros(num_of_states) startprob[lstate_prob.argmax()] = 1.0 else: startprob = [ 1.] # Prepare the model for sampling model_2_sample = GaussianHMM(n_components=num_of_states, covariance_type="full") model_2_sample.startprob_ = startprob model_2_sample.transmat_ = model.transmat_ model_2_sample.means_ = model.means_ model_2_sample.covars_ = model.covars_ #Make sure to randomize the samples random.seed() rseed = random.randrange(0,max_int_value) X, Z = model_2_sample.sample(days, random_state=rseed) # Make predictions avg_prediction = 0 allpredictions = np.zeros((test_num, days)) #added two in case there was a weekend at the end for test in range(test_num): final_price = rp[0] #start at day 0 of the real prices allpredictions[test][0] = final_price #day 0 prediction same as current real price for i in range(1, days): final_price += X[i][0] allpredictions[test][i] = final_price rseed = random.randrange(0,max_int_value) X, Z = model_2_sample.sample(days, random_state=rseed) predictions = allpredictions.mean(axis=0) predictions_var = allpredictions.var(axis=0) predictions_median = np.median(allpredictions, axis=0) errors = predictions - rp tr_prob_vector = np.full((predictions.size),tr_prob) data = [predictions,rp, errors, tr_prob_vector, predictions_var,predictions_median] err_final = errors[-1] print ("Start Price: ",rp[0],"Avg. Prediction: ",str(num_of_states),"states:" , predictions[-1]," Real Price:", rp[-1]) print (" Error end of predictions:", err_final,"Delta Start-End:", rp[0]-rp[-1],"\n") #print ("Real prices:", rp) #print ("Predicted prices", predictions) fname = "Predictions_"+str(company)+"_States_"+str(num_of_states)+"_stats.csv" fname = os.path.join('./sims_final', fname) np.savetxt(fname, data, delimiter=",") return
#!/usr/bin/env python # -*- coding: utf-8 -*- import time from matplotlib.finance import quotes_historical_yahoo_ochl from datetime import date from datetime import datetime import pandas as pd import matplotlib.pyplot as plt import ch ch.set_ch() __author__ = 'wangjj' __mtime__ = '20161023下午 10:11' start = datetime(2015, 1, 1) end = datetime(2015, 12, 31) quotes = quotes_historical_yahoo_ochl('MSFT', start, end) fields = ['date', 'open', 'close', 'high', 'low', 'volume'] list1 = [] list2 = [] for i in range(0, len(quotes)): x = date.fromordinal(int(quotes[i][0])) y = datetime.strftime(x, '%y/%m/%d') list1.append(y) list2.append(x.month) # print(list1) quotes_of_MSFT = pd.DataFrame(quotes, index=list1, columns=fields) quotes_of_MSFT = quotes_of_MSFT.drop(['date'], axis=1) quotes_of_MSFT['month'] = list2 # print(quotes_of_MSFT) open_MSFT = quotes_of_MSFT.groupby('month').mean().open print open_MSFT, open_MSFT.index list_open = []
'AMZN': 'Amazon', 'KO': 'Coca Cola', 'PEP': 'Pepsi', 'MCD': 'Mc Donalds', 'YUM': 'Taco Bell', 'CMG': 'Chipotle Mexican Grill', 'WMT': 'Wal-Mart', 'HD': 'Home Depot', 'CVS': 'CVS' } symbols, names = np.array(list(symbol_dict.items())).T print('_'*140 + "\n>>> quotes:") for symbol in symbols: try: quote = finance.quotes_historical_yahoo_ochl(symbol, d1, d2, asobject=True) print("\n%s %s:\n%s" % (symbol, symbol_dict[symbol], quote)) except Exception as e: # this is usually a 404 error, coz of the date range (ie a stock may not have existed), # so we could enter a zeroed entry ??? print('_'*140 + "\n") cls1 = datetime.datetime(2009, 1, 1) cls2 = datetime.datetime(2015, 1, 1) symbol = 'YUM' # Taco Bell # symbol = 'CMG' # Chipotle Mexican Grill quote = finance.quotes_historical_yahoo_ochl(symbol, cls1, cls2, asobject=True) print("\n%s %s\ntype(quote)=%s=\nquote:\n%s" % (symbol, symbol_dict[symbol], type(quote), quote)) # quotes = [finance.quotes_historical_yahoo(symbol, d1, d2, asobject=True) for symbol in symbols]