Beispiel #1
0
def cmodel(company, refcompany, dt1, dt2, num_of_states):
    
    quotes = quotes_historical_yahoo_ochl(company, dt1, dt2) #Here we set the time range
    quotes2 = quotes_historical_yahoo_ochl(refcompany, dt1, dt2) #Here we set the time range

    dates = np.array([q[0] for q in quotes], dtype=int)
    close_v = np.array([q[2] for q in quotes])
    volume = np.array([q[5] for q in quotes])[1:]

    # Take diff of close value. Note that this makes
    # len(diff) = len(close_t) - 1 therefore, other quantities also need to be shifted by 1
    
    diff = np.diff(close_v)
    dates = dates[1:]
    close_v = close_v[1:]
    
    # Unpack quotes Company2
    close_v2 = np.array([q[2] for q in quotes2])
    diff2 = np.diff(close_v2)
    close_v2 = close_v2[1:]
    
    delta = diff2.shape[0]-diff.shape[0]
    delta = abs(delta)
    
    diff0=np.pad(diff, (delta,0), mode='constant', constant_values=0)
    close_v=np.pad(close_v, (delta,0), mode='constant', constant_values=0)
       
    X = np.column_stack([diff0,diff2])

    # Create HMM instance and fit
    model = GaussianHMM(n_components=num_of_states, covariance_type="full", n_iter=1000).fit(X)
    fname = str(company)+"_"+str(num_of_states)+"_states_model_adv.pkl"
    joblib.dump(model, os.path.join('./sims3', fname))
def stock_month_plot():
    '''
    Show how to make date plots in matplotlib using date tick locators and formatters.
    '''
    date1 = datetime.date(2002, 1, 5)
    date2 = datetime.date(2003, 12, 1)

    # every monday
    mondays = WeekdayLocator(MONDAY)
    # every 3rd month
    months = MonthLocator(range(1, 13), bymonthday=1, interval=3)
    monthsFmt = DateFormatter("%b '%y")

    quotes = quotes_historical_yahoo_ochl('INTC', date1, date2)
    if len(quotes) == 0:
        print('Found no quotes')
        raise SystemExit
    dates = [q[0] for q in quotes]
    opens = [q[1] for q in quotes]

    fig, ax = plt.subplots()
    ax.plot_date(dates, opens, '-')
    ax.xaxis.set_major_locator(months)
    ax.xaxis.set_major_formatter(monthsFmt)
    ax.xaxis.set_minor_locator(mondays)
    ax.autoscale_view()
    #ax.xaxis.grid(False, 'major')
    #ax.xaxis.grid(True, 'minor')
    ax.grid(True)

    fig.autofmt_xdate()
    plt.show()
Beispiel #3
0
def ret_f(ticker, begdate, enddate):
    p = quotes_historical_yahoo_ochl(ticker,
                                     begdate,
                                     enddate,
                                     asobject=True,
                                     adjusted=True)
    return ((p.aclose[1:] - p.aclose[:-1]) / p.aclose[:-1])
Beispiel #4
0
def get_close(symbol):
    today = date.today()
    start = (today.year - 1, today.month, today.day)
    quotes = quotes_historical_yahoo_ochl(symbol, start, today)
    quotes = np.array(quotes)

    return quotes.T
Beispiel #5
0
def PlotData(code, start, end, list):
    start_date = _wxdate2pydate(start)
    end_date = _wxdate2pydate(end)
    print code
    print start_date
    print end_date
    print list
    #根据公司代码,起止时间得到所有数据
    quotes = quotes_historical_yahoo_ochl(code, start_date, end_date)
    fields = ['date', 'open', 'close', 'high', 'low', 'volume']
    list1 = []
    #格式化时间,将时间参数放入list1列表
    for i in range(0, len(quotes)):
        x = date.fromordinal(int(quotes[i][0]))
        y = datetime.strftime(x, '%Y-%m-%d')
        list1.append(y)
    print list1
    #根据数据,时间列表,所有指标生成dataFrame
    quotesdf = pd.DataFrame(quotes, index=list1, columns=fields)
    #剔除date数据,这里是因为格式不一致
    quotesdf = quotesdf.drop(['date'], axis=1)
    quotesdftemp = pd.DataFrame()
    #将所选择的指标,如close,open的dateFrame赋予一个临时dateFrame中
    for i in range(0, len(list)):
        quotesdftemp[list[i]] = quotesdf[list[i]]
    print quotesdftemp
    print "ready to plot"
    #画图
    quotesdftemp.plot(marker='o')
    plt.show()
Beispiel #6
0
def PlotData(code, start, end, list):
    start_date = _wxdate2pydate(start)
    end_date = _wxdate2pydate(end)
    print code
    print start_date
    print end_date
    quotes = quotes_historical_yahoo_ochl(code, start_date, end_date)
    fields = ['date', 'open', 'close', 'high', 'low', 'volume']
    list1 = []
    for i in range(0, len(quotes)):
        x = date.fromordinal(int(quotes[i][0]))
        y = datetime.strftime(x, '%Y-%m-%d')
        list1.append(y)
    print list1

    quotesdf = pd.DataFrame(quotes, index=list1, columns=fields)
    quotesdf = quotesdf.drop(['date'], axis=1)
    quotesdftemp = pd.DataFrame()
    print quotesdftemp

    for i in range(0, len(list)):
        quotesdftemp[list[i]] = quotesdf[list[i]]
    print "ready to plot"
    quotesdftemp.plot()
    plt.title(code)
    plt.xlabel('Time')
    plt.show()
Beispiel #7
0
def getrealprice(company, dt2, days_future):
    
    dt3 = dt2 + datetime.timedelta(days=days_future)
    quotes = quotes_historical_yahoo_ochl(company, dt2,dt3) 
    close_v = np.array([q[2] for q in quotes])
    
    return close_v[-1]
def stock_year_plot():
    '''
    Show how to make date plots in matplotlib using date tick locators and formatters.
    '''
    date1 = datetime.date(1995, 1, 1)
    date2 = datetime.date(2004, 4, 12)

    years = YearLocator()   # every year
    months = MonthLocator()  # every month
    yearsFmt = DateFormatter('%Y')

    quotes = quotes_historical_yahoo_ochl('INTC', date1, date2)
    if len(quotes) == 0:
        print('Found no quotes')
        raise SystemExit
    dates = [q[0] for q in quotes]
    opens = [q[1] for q in quotes]

    fig, ax = plt.subplots()
    ax.plot_date(dates, opens, '-')
    # format the ticks
    ax.xaxis.set_major_locator(years)
    ax.xaxis.set_major_formatter(yearsFmt)
    ax.xaxis.set_minor_locator(months)
    ax.autoscale_view()
    # format the coords message box
    def price(x):
        return '$%1.2f' % x
    ax.fmt_xdata = DateFormatter('%Y-%m-%d')
    ax.fmt_ydata = price
    ax.grid(True)

    fig.autofmt_xdate()
    plt.show()
def symbolsToPriceDict(symbols,startDate,endDate):
    """
    From a list of stock symbols and a range of dates, returns a prices by symbols numpy array
    listing the opening prices for the stocks in the given date range.
    """
    #add check to account for missing values in data
    quotes = [list(finance.quotes_historical_yahoo_ochl(symbol, startDate, endDate,asobject = True).open) for symbol in symbols]
    return dict(zip(symbols,quotes))
Beispiel #10
0
def predict_one(filename, company, dt1, dt2,num_of_states, days_future, tr_prob):
# Generate samples starting in the most likely actual current state
       
    model = joblib.load(filename) 
    
    rp = getrealprice_series(company, dt2,days_future)
    days = rp.size
    
    quotes = quotes_historical_yahoo_ochl(company, dt1, dt2) 
    dates = np.array([q[0] for q in quotes], dtype=int)
    close_v = np.array([q[2] for q in quotes])

    # Take diff of close value and shift by 1    
    diff = np.diff(close_v)

    dates = dates[1:]
    close_v = close_v[1:]    
    
    X = np.column_stack([diff])

    # Predict the most likely current internal hidden state
    hidden_probs = model.predict_proba(X)
    lstate_prob = hidden_probs[-1] 
    

    
    # If more than one state, make sure we start at the most likely current state
    if (num_of_states>1):
        startprob = np.zeros(num_of_states)
        startprob[lstate_prob.argmax()] = 1.0
    else:
        startprob = [ 1.]

    # Prepare the model for sampling
    model_2_sample = GaussianHMM(n_components=num_of_states, covariance_type="full")
    model_2_sample.startprob_ = startprob
    model_2_sample.transmat_ = model.transmat_
    model_2_sample.means_ = model.means_
    model_2_sample.covars_ = model.covars_

    #Make sure to randomize the samples
    random.seed()
    rseed = random.randrange(0,max_int_value)
    X, Z = model_2_sample.sample(days, random_state=rseed)
    
    # Make predictions
    predictions = np.zeros(days) #added two in case there was a weekend at the end
    
        
    final_price = rp[0] #start at day 0 of the real prices
    predictions[0] = final_price   #day 0 prediction same as current real price

    for i in range(1, days):
        final_price += X[i][0]
        predictions[i] = final_price
            
    return predictions
def predictions_rand(filename, company, dt1, dt2, num_of_states, test_num,
                     days_future):
    # Generate samples starting in a random state

    model = joblib.load(filename)

    quotes = quotes_historical_yahoo_ochl(company, dt1, dt2)
    dates = np.array([q[0] for q in quotes], dtype=int)
    close_v = np.array([q[2] for q in quotes])
    volume = np.array([q[5] for q in quotes])[1:]

    # Take diff of close value. Note that this makes
    # len(diff) = len(close_t) - 1 therefore, other quantities also need to be shifted by 1

    diff = np.diff(close_v)
    dates = dates[1:]
    close_v = close_v[1:]

    X = np.column_stack([diff])

    # Predict the most likely current internal hidden state
    hidden_probs = model.predict_proba(X)
    lstate_prob = hidden_probs[-1]

    total2active = 364 / 251  # Ratio of days the market is open to all days
    days = days_future // total2active  # 251 open market days in a year
    predictions = []  # Might be useful to store the predictions for future use
    print(days)

    startprob = np.zeros(num_of_states)
    for start_st_prob in range(num_of_states):
        startprob[start_st_prob] = 1.0 / num_of_states

    model_2_sample = GaussianHMM(n_components=num_of_states,
                                 covariance_type="full")
    model_2_sample.startprob_ = startprob
    model_2_sample.transmat_ = model.transmat_
    model_2_sample.means_ = model.means_
    model_2_sample.covars_ = model.covars_

    random.seed()
    rseed = random.randrange(0, sys.maxint)
    X, Z = model_2_sample.sample(days, random_state=rseed)
    avg_prediction = 0

    for test in range(test_num):
        final_price = close_v[-1]
        for i in range(days):
            if ((final_price + X[i]) > 0):
                final_price += X[i]

        predictions.append(final_price[0])
        rseed = random.randrange(0, sys.maxint)
        X, Z = model_2_sample.sample(days, random_state=rseed)

    return predictions
Beispiel #12
0
def symbolsToPrices(symbols,startDate,endDate):
    """
    From a list of stock symbols and a range of dates, returns a prices by symbols numpy array
    listing the opening prices for the stocks in the given date range.
    """
    #add check to account for missing values in data
    quotes = [finance.quotes_historical_yahoo_ochl(symbol, startDate, endDate,asobject = True).open for symbol in symbols]
    print "prices shape:"
    print np.array(quotes).T.shape
    return np.array(quotes).T
Beispiel #13
0
def cmodel(company, dt1, dt2, num_of_states):

    quotes = quotes_historical_yahoo_ochl(company, dt1,
                                          dt2)  #Here we set the time range

    # Unpack the quotes !
    dates = np.array([q[0] for q in quotes], dtype=int)
    close_v = np.array([q[2] for q in quotes])

    # Take diff of close value and shift by 1

    diff = np.diff(close_v)

    dates = dates[1:]
    close_v = close_v[1:]

    # Pack diff for training.
    X = np.column_stack([diff])

    # Create HMM instance and fit
    model = GaussianHMM(n_components=num_of_states,
                        covariance_type="full",
                        n_iter=1000).fit(X)
    #print ("Model Covars: ", model.covars_)

    expected_days = 1
    tr_mls = 1

    if (num_of_states > 1):
        #Identify the most likely last hidden state

        try:
            hidden_probs = model.predict_proba(X)
        except:
            model = GaussianHMM(n_components=num_of_states,
                                covariance_type="diag",
                                n_iter=1000).fit(X)
            hidden_probs = model.predict_proba(X)

        lstate_prob = hidden_probs[-1]
        mls = lstate_prob.argmax()

        # self transition probability for the most likely last hidden state
        tr_mls = model.transmat_[mls][mls]

        # we make use of the geometric series formula to calculate the number
        # of days expected to stay at the current state
        expected_days = (1.0 / (1 - tr_mls))

    # we save the model for future use
    fname = str(company) + "_" + str(num_of_states) + "_states_model_final.pkl"
    joblib.dump(model, os.path.join('./sims_final', fname))

    #return expected days
    return expected_days, tr_mls
Beispiel #14
0
def ret_monthly(ticker):
    x = quotes_historical_yahoo_ochl(ticker, (begYear, 1, 1),
                                     (endYear, 12, 31),
                                     asobject=True,
                                     adjusted=True)
    logret = log(x.aclose[1:] / x.aclose[:-1])
    date = []
    d0 = x.date
    for i in range(0, size(logret)):
        date.append(''.join([d0[i].strftime("%Y"), d0[i].strftime("%m")]))
    y = pd.DataFrame(logret, date, columns=[ticker])
    return y.groupby(y.index).sum()
def main(symbols, percent, days, verbose):
    if verbose:
        print "Symbols: %s" % symbols
        print "Percent: %s" % percent
        print "Days: %d" % days
        print "Verbose: %s" % verbose

    print

    if days > 0:
        days = -days

    print "Checking for a %s%% decline over the past " \
          "%d days" % (percent, abs(days))
    print

    for ticker in symbols:
        ticker = ticker.upper()
        print "Stock: %s" % ticker

        start_date = datetime.datetime.now() + datetime.timedelta(days)
        end_date = datetime.datetime.now()

        quotes_objects = quotes_historical_yahoo_ochl(ticker,
                                                      start_date,
                                                      end_date,
                                                      asobject=True)

        max_value = round(float(max(quotes_objects.close)), 5)
        print "- max value over %d days: %s" % (abs(days), max_value)

        most_recent_close = round(float(quotes_objects.close[-1]), 5)
        print "- most recent close: %s" % most_recent_close

        percent_change = round(float(
                               rate_of_return(max_value, most_recent_close)
                               ), 2)
        print "- percent change: %s%%" % percent_change

        target_price = max_value - (max_value * (percent / 100.0))

        print "- target price: %s (%s%% below %s)" % (target_price, percent,
                                                      max_value)

        if percent_change < -percent:
            print
            print "ALERT! %s has dropped %s%% " \
                  "over the last %s days" % (ticker,
                                             percent_change,
                                             abs(days))

        print
def main(symbols, percent, days, verbose):
    if verbose:
        print "Symbols: %s" % symbols
        print "Percent: %s" % percent
        print "Days: %d" % days
        print "Verbose: %s" % verbose

    print

    if days > 0:
        days = -days

    print "Checking for a %s%% decline over the past " \
          "%d days" % (percent, abs(days))
    print

    for ticker in symbols:
        ticker = ticker.upper()
        print "Stock: %s" % ticker

        start_date = datetime.datetime.now() + datetime.timedelta(days)
        end_date = datetime.datetime.now()

        quotes_objects = quotes_historical_yahoo_ochl(ticker,
                                                      start_date,
                                                      end_date,
                                                      asobject=True)

        max_value = round(float(max(quotes_objects.close)), 5)
        print "- max value over %d days: %s" % (abs(days), max_value)

        most_recent_close = round(float(quotes_objects.close[-1]), 5)
        print "- most recent close: %s" % most_recent_close

        percent_change = round(
            float(rate_of_return(max_value, most_recent_close)), 2)
        print "- percent change: %s%%" % percent_change

        target_price = max_value - (max_value * (percent / 100.0))

        print "- target price: %s (%s%% below %s)" % (target_price, percent,
                                                      max_value)

        if percent_change < -percent:
            print
            print "ALERT! %s has dropped %s%% " \
                  "over the last %s days" % (ticker,
                                             percent_change,
                                             abs(days))

        print
Beispiel #17
0
def symbolsToPriceDict(symbols, startDate, endDate):
    """
    From a list of stock symbols and a range of dates, returns a prices by symbols numpy array
    listing the opening prices for the stocks in the given date range.
    """
    #add check to account for missing values in data
    quotes = [
        list(
            finance.quotes_historical_yahoo_ochl(symbol,
                                                 startDate,
                                                 endDate,
                                                 asobject=True).open)
        for symbol in symbols
    ]
    return dict(zip(symbols, quotes))
Beispiel #18
0
def symbolsToPrices(symbols, startDate, endDate):
    """
    From a list of stock symbols and a range of dates, returns a prices by symbols numpy array
    listing the opening prices for the stocks in the given date range.
    """
    #add check to account for missing values in data
    quotes = [
        finance.quotes_historical_yahoo_ochl(symbol,
                                             startDate,
                                             endDate,
                                             asobject=True).open
        for symbol in symbols
    ]
    print "prices shape:"
    print np.array(quotes).T.shape
    return np.array(quotes).T
Beispiel #19
0
def draw_k_svg(id_str,from_date_str,to_date_str):
    u"""
    Parameters:
        id_str (str): - 6位数的上证股票编号
        from_date_str (str): - '2016-6-20'形式的日期数据,表示起始日期
        to_date_str (str): - '2016-6-20'形式的日期数据,表示结束日期
    Returns:
        str : - svg的字符串内容
    """

    #设置x轴坐标刻度
    mondays = WeekdayLocator(MONDAY)            # 主要刻度
    alldays = DayLocator()                      # 次要刻度

    mondayFormatter = DateFormatter('%m-%d-%Y') # 如:2-29-2015
    dayFormatter = DateFormatter('%d')

    from_date = tuple((int(i) for i in from_date_str.strip().split("-")))
    to_date = tuple((int(i) for i in to_date_str.strip().split("-")))
    quotes_ochl = quotes_historical_yahoo_ochl(id_str+'.ss', from_date ,to_date)

    fig, ax = plt.subplots()
    fig.subplots_adjust(bottom=0.2)

    ax.xaxis.set_major_locator(mondays)
    ax.xaxis.set_minor_locator(alldays)
    ax.xaxis.set_major_formatter(mondayFormatter)


    candlestick_ochl(ax, quotes_ochl, width=0.6, colorup='r', colordown='g')
    ax.xaxis_date()
    ax.autoscale_view()
    plt.setp(plt.gca().get_xticklabels(), rotation=45, horizontalalignment='right')

    ax.grid(True)
    plt.title(symbol_dict.get(id_str,u"未知"))

    f = BytesIO()
    plt.savefig("ts_o.svg", format="svg")
    plt.savefig(f, format="svg")
    value = f.getvalue()
    result = deal_with_svg(f)
    f.close()
    return result
Beispiel #20
0
def symbols(stock_symbol):
    today = date.today()
    start = (today.year , today.month, today.day - 1)   # Here we have a bug. If report running on weekend, you will get error because we only minus one day which possible is NOT value market date.
    quotes = quotes_historical_yahoo_ochl(stock_symbol, start, today)
    df = pd.DataFrame(quotes)
    df.columns = [u'Date', u'Open',u'Close',u'High',u'Low',u'Volume']
    #####
    #df.to_csv('stock_%s.csv' %stock_symbol)
    #test = pd.read_csv('stock_FTNT.csv')
    #print "TEST \n "
    #print test
    ##### 
    sum = 0
    
    for i in xrange(df.shape[0]):
        #print "%.2f + %.2f = %.2f" %(sum,df['Close'][i],sum+df['Close'][i])
        sum += df['Close'][i]
    
    return (sum/df.shape[0])
Beispiel #21
0
def getVariations(companySymbol, startDate, endDate):
    # Get trading information for every company defined in companies{}
    quotes = [
        quotes_historical_yahoo_ochl(companySymbol,
                                     startDate - oneDay,
                                     endDate,
                                     asobject=True)
    ]
    # Calculate the variance in trading prices
    variation = []
    tradingDays = []
    for quote in quotes:
        for i in range(0, quote.open.size):
            # Skip the first day because we can't compare it to anything
            # Also skip Friday->Monday because it's too large of a time gap
            if (i != 0) and (quote.date[i].weekday() != 0):
                # Append the difference between today's opening price and yesterday's
                variation.append(quote.open[i] - quote.open[i - 1])
                tradingDays.append(quote.date[i])
    return variation, tradingDays
Beispiel #22
0
def PlotData(code, start, end, list):
    start_date = _wxdate2pydate(start)
    end_date = _wxdate2pydate(end)
    print code
    print start_date
    print end_date
    quotes = quotes_historical_yahoo_ochl(code, start_date, end_date)
    fields = ['date', 'open', 'close', 'high', 'low', 'volume']
    list1 = []
    for i in range(0, len(quotes)):
        x = date.fromordinal(int(quotes[i][0]))
        y = datetime.strftime(x, '%Y-%m-%d')
        list1.append(y)
    print list1

    quotesdf = pd.DataFrame(quotes, index=list1, columns=fields)
    quotesdf = quotesdf.drop(['date'], axis=1)
    quotesdftemp = pd.DataFrame()
    print quotesdftemp

    for i in range(0, len(list)):
        quotesdftemp[list[i]] = quotesdf[list[i]]
    print "ready to plot"
    quotesdftemp.plot(marker='o')
Beispiel #23
0
    "601668": "中国建筑",
    "601688": "华泰证券",
    "601766": "中国中车",
    "601800": "中国交建",
    "601818": "光大银行",
    "601857": "中国石油",
    "601901": "方正证券",
    "601988": "中国银行",
    "601989": "中国重工",
    "601998": "中信银行"}

symbols, names = np.array(list(symbol_dict.items())).T

#quotes = [ts.get_hist_data(symbol,start=d1,end=d2) for symbol in symbols]
## 一次最多30个 然后需要等待了
quotes = [pd.DataFrame(quotes_historical_yahoo_ochl(symbol+".ss", d1, d2),columns=['date','open','high','low','close','volume'])
          for symbol in symbols]
              
for symbol in symbols:
    quotes.append(pd.DataFrame(quotes_historical_yahoo_ochl(symbol+".ss", d1, d2),columns=['date','open','high','low','close','volume']))              
              
open_price  = np.array([q.open.values for q in quotes])
close_price = np.array([q.close.values for q in quotes])

# 每日价格浮动包含了重要信息!
variation = close_price - open_price

###############################################################################
# Learn a graphical structure from the correlations
edge_model = covariance.GraphLassoCV()
Beispiel #24
0
import matplotlib.pyplot as plt
from matplotlib.dates import MONDAY
from matplotlib.finance import quotes_historical_yahoo_ochl
from matplotlib.dates import MonthLocator, WeekdayLocator, DateFormatter

date1 = datetime.date(2002, 1, 5)
date2 = datetime.date(2003, 12, 1)

# every monday
mondays = WeekdayLocator(MONDAY)

# every 3rd month
months = MonthLocator(range(1, 13), bymonthday=1, interval=3)
monthsFmt = DateFormatter("%b '%y")

quotes = quotes_historical_yahoo_ochl('INTC', date1, date2)
if len(quotes) == 0:
    print('Found no quotes')
    raise SystemExit

dates = [q[0] for q in quotes]
opens = [q[1] for q in quotes]

fig, ax = plt.subplots()
ax.plot_date(dates, opens, '-')
ax.xaxis.set_major_locator(months)
ax.xaxis.set_major_formatter(monthsFmt)
ax.xaxis.set_minor_locator(mondays)
ax.autoscale_view()
#ax.xaxis.grid(False, 'major')
#ax.xaxis.grid(True, 'minor')
Beispiel #25
0
import datetime
import numpy as np
import pylab as pl
from matplotlib.finance import quotes_historical_yahoo_ochl
from matplotlib.dates import YearLocator, MonthLocator, DateFormatter
from hmmlearn.hmm import GaussianHMM

print(__doc__)

###############################################################################
# Downloading the data
date1 = datetime.date(1995, 1, 1)  # start date
date2 = datetime.date(2012, 1, 6)  # end date
# get quotes from yahoo finance
quotes = quotes_historical_yahoo_ochl("INTC", date1, date2)
if len(quotes) == 0:
    raise SystemExit

# unpack quotes
dates = np.array([q[0] for q in quotes], dtype=int)
close_v = np.array([q[2] for q in quotes])
volume = np.array([q[5] for q in quotes])[1:]

# take diff of close value
# this makes len(diff) = len(close_t) - 1
# therefore, others quantity also need to be shifted
diff = close_v[1:] - close_v[:-1]
dates = dates[1:]
close_v = close_v[1:]
Beispiel #26
0
from matplotlib.dates import MonthLocator
from matplotlib.finance import quotes_historical_yahoo_ochl
from matplotlib.finance import candlestick_ochl
from datetime import date

today = date.today()
start = (today.year - 1, today.month, today.day)

alldays = DayLocator()
months = MonthLocator()

month_formatter = DateFormatter("%b %Y")

# 从财经频道下载股价数据
symbol = 'BIDU'  # 百度的股票代码
quotes = quotes_historical_yahoo_ochl(symbol, start, today)

# 创建figure对象,这是绘图组件的顶层容器
fig = plt.figure()
# 增加一个子图
ax = fig.add_subplot(111)
# x轴上的主定位器设置为月定位器,该定位器负责x轴上较粗的刻度
ax.xaxis.set_major_locator(months)
# x轴上的次定位器设置为日定位器,该定位器负责x轴上较细的刻度
ax.xaxis.set_minor_locator(alldays)
# x轴上的主格式化器设置为月格式化器,该格式化器负责x轴上较粗刻度的标签
ax.xaxis.set_major_formatter(month_formatter)

# 使用matplotlib.finance包的candlestick函数绘制k线图
candlestick_ochl(ax, quotes)
# 将x轴上的标签格式化为日期
Beispiel #27
0
#from numpy import vstack
#import numpy
from scipy.cluster.vq import kmeans, vq
from matplotlib.finance import quotes_historical_yahoo_ochl
from datetime import datetime
start = datetime(2014, 7, 1)
end = datetime(2014, 9, 30)
listDji = [
    'AXP', 'BA', 'CAT', 'CSCO', 'CVX', 'DD', 'DIS', 'GE', 'GS', 'HD', 'IBM',
    'INTC', 'JNJ', 'JPM', 'KO', 'MCD', 'MMM', 'MRK', 'MSFT', 'NKE', 'PFE',
    'PG', 'T', 'TRV', 'UNH', 'UTX', 'V', 'VZ', 'WMT', 'XOM'
]
quotes = [[0 for col in range(90)] for row in range(30)]
listTemp = [[0 for col in range(90)] for row in range(30)]
for i in range(30):
    quotes[i] = quotes_historical_yahoo_ochl(listDji[i], start, end)
days = len(quotes[0])
for i in range(30):
    for j in range(days - 1):
        if (quotes[i][j][2] and quotes[i][j + 1][2]
                and (quotes[i][j + 1][2] >= quotes[i][j][2])):
            listTemp[i][j] = 1.0
        else:
            listTemp[i][j] = -1.0
data = vstack(listTemp)
centroids, _ = kmeans(data, 4)  #float or double is supported
result, _ = vq(data, centroids)
print result
Beispiel #28
0
        # Technical indicator plots are shown last
        ax = subplots[i - len(technicals)]
        ax.plot(x, technical)
        if i < len(technicals_titles):
            ax.set_title(technicals_titles[i])

    plt.show()
    fig.savefig(stock + '.png', facecolor=fig.get_facecolor())


for stock in stocklist:

    print("Fetching", stock)

    columns = ['date', 'open_price', 'close_price', 'high', 'low', 'volume']
    ticker = quotes_historical_yahoo_ochl(stock, start, today)

    list1 = []
    for i in range(0, len(ticker)):
        x = date.fromordinal(int(ticker[i][0]))
        y = date.strftime(x, '%Y-%m-%d')
        list1.append(y)

    day_pricing = pd.DataFrame(ticker, index=list1, columns=columns)
    # day_pricing = day_pricing.drop(['date'], axis=1)
    last_hour = day_pricing[-100:]

    openp = last_hour['open_price'].as_matrix()
    highp = last_hour['high'].as_matrix()
    lowp = last_hour['low'].as_matrix()
    closep = last_hour['close_price'].as_matrix()
Beispiel #29
0
import numpy as np
import pylab as pl
from matplotlib.finance import quotes_historical_yahoo_ochl
from matplotlib.dates import YearLocator, MonthLocator, DateFormatter
import matplotlib.pyplot as plt
#from sklearn.hmm import GaussianHMM

print(__doc__)

###############################################################################
# Downloading the data
date1 = datetime.date(1995, 1, 1)  # start date
date2 = datetime.date(2012, 1, 6)  # end date

# get goog quotes from yahoo finance
goog_quotes = quotes_historical_yahoo_ochl("GOOG", date1, date2)
if len(goog_quotes) == 0:
    raise SystemExit

# get yahoo quotes from yahoo finance
yhoo_quotes = quotes_historical_yahoo_ochl("YHOO", date1, date2)
if len(yhoo_quotes) == 0:
    raise SystemExit

# unpack goog quotes
goog_dates = np.array([q[0] for q in goog_quotes], dtype=int)
goog_close_v = np.array([q[2] for q in goog_quotes])
goog_volume = np.array([q[5] for q in goog_quotes])[1:]

# unpack yhoo quotes
yhoo_dates = np.array([q[0] for q in yhoo_quotes], dtype=int)
Beispiel #30
0
# coding:utf-8
"""
pandas 作图
"""
from matplotlib.finance import quotes_historical_yahoo_ochl  # 注matplotlib包里已经没有了quotes_historical_yahoo方法了,改为quotes_historical_yahoo_ochl
from datetime import date
from datetime import datetime
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt  # 使用pylab 就直接import pylab as pl

today = date.today()
start = (today.year - 1, today.month, today.day)
quotes = quotes_historical_yahoo_ochl('AXP', start, today)  #美国运通公司最近一年股票代码
fields = ['date', 'open', 'close', 'high', 'low', 'volume']
list1 = []
for i in range(0, len(quotes)):
    x = date.fromordinal(int(quotes[i][0]))
    y = datetime.strftime(x, "%Y-%m-%d")
    list1.append(y)

qutoesdf = pd.DataFrame(quotes, index=list1,
                        columns=fields)  # 利用index属性可以将索引改变。 日期为格里高利时间,用函数改变
qutoesdf = qutoesdf.drop(['date'], axis=1)
# print qutoesdf

# 统计近一年每个月的股票开盘天数

listtemp = []
for i in range(0, len(qutoesdf)):
Beispiel #31
0
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.finance as mpf

start = (2014, 5, 1)
end = (2014, 6, 30)
'''
Nowadays, a couple of Python libraries provide convenience functions to retrieve data from Yahoo! Finance. Be
aware that, although this is a convenient way to visualize financial data sets, the data quality is not sufficient to
base any important investment decision on it. For example, stock splits, leading to “price drops,” are often not
correctly accounted for in the data provided by Yahoo! Finance. This holds true for a number of other freely
available data sources as well.
'''
quotes = mpf.quotes_historical_yahoo_ochl('^GDAXI', start, end)
quotes[:2]

fig, ax = plt.subplots(figsize=(8, 5))
fig.subplots_adjust(bottom=0.2)
mpf.candlestick_ochl(ax, quotes, width=0.6, colorup='b', colordown='r')
plt.grid(True)
ax.xaxis_date()
# dates on the x-axis
ax.autoscale_view()
plt.setp(plt.gca().get_xticklabels(), rotation=30)

fig, ax = plt.subplots(figsize=(8, 5))
mpf.plot_day_summary2_ochl(ax, quotes, ticksize=4, colorup='k', colordown='r')
plt.grid(True)
ax.xaxis_date()
Beispiel #32
0
#stock max
from matplotlib.finance import quotes_historical_yahoo_ochl
from datetime import datetime
from datetime import date
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import time

start = datetime(2014,1,1)
end = datetime(2014,12,31)
quotesMS = quotes_historical_yahoo_ochl('MSFT', start, end)
quotesIntl = quotes_historical_yahoo_ochl('INTC', start, end)
fields = ['date','open','close', 'high', 'low', 'volume']
#quotedfMS = pd.DataFrame(quotesMS, columns= fields)
list1 = []
for i in range(0, len(quotesMS)):
    x = date.fromordinal(int(quotesMS[i][0]))
    y = date.strftime(x, '%Y-%m-%d')
    list1.append(y)
list2 = []
for i in range(0, len(quotesIntl)):
    x = date.fromordinal(int(quotesIntl[i][0]))
    y = date.strftime(x, '%Y-%m-%d')
    list2.append(y)
quotedfMS = pd.DataFrame(quotesMS, index = list1, columns= fields)
quotedfMS = quotedfMS.drop(['date'], axis = 1)

quotedfIntl = pd.DataFrame(quotesIntl, index = list2, columns= fields)
quotedfIntl = quotedfIntl.drop(['date'], axis = 1)
from matplotlib.dates import MonthLocator
from matplotlib.finance import quotes_historical_yahoo_ochl
from matplotlib.finance import candlestick_ochl
import sys
from datetime import date

today = date.today()
start = (today.year - 1, today.month, today.day)

alldays = DayLocator()
months = MonthLocator()
month_formatter = DateFormatter("%b %Y")

# 从财经频道下载股价数据
symbol = 'BIDU' # 百度的股票代码
quotes = quotes_historical_yahoo_ochl(symbol, start, today)

# 创建figure对象,这是绘图组件的顶层容器
fig = plt.figure()
# 增加一个子图
ax = fig.add_subplot(111)
# x轴上的主定位器设置为月定位器,该定位器负责x轴上较粗的刻度
ax.xaxis.set_major_locator(months)
# x轴上的次定位器设置为日定位器,该定位器负责x轴上较细的刻度
ax.xaxis.set_minor_locator(alldays)
# x轴上的主格式化器设置为月格式化器,该格式化器负责x轴上较粗刻度的标签
ax.xaxis.set_major_formatter(month_formatter)

# 使用matplotlib.finance包的candlestick函数绘制k线图
candlestick_ochl(ax, quotes)
# 将x轴上的标签格式化为日期
    'MMM',
    'MRK',
    'MSFT',
    'NKE',
    'PFE',
    'PG',
    'T',
    'TRV',
    'UNH',
    'UTX',
    'V',
    'VZ',
    'WMT',
    'XOM']
quotes = [[0 for col in range(90)] for row in range(30)]
listTemp = [[0 for col in range(90)] for row in range(30)]
for i in range(30):
    quotes[i] = quotes_historical_yahoo_ochl(listDji[i], start, end)
    days = len(quotes[0])
for i in range(30):
    for j in range(days - 1):
        if (quotes[i][j][2] and quotes[i][j + 1][2]
                and (quotes[i][j + 1][2] >= quotes[i][j][2])):
            listTemp[i][j] = 1.0
        else:
            listTemp[i][j] = -1.0
data = vstack(listTemp)
centroids, _ = kmeans(data, 4)  # float or double is supported
result, _ = vq(data, centroids)
print(result)
import datetime

import numpy as np
import matplotlib.pyplot as plt
from hmmlearn.hmm import GaussianHMM

try:
    from matplotlib.finance import quotes_historical_yahoo_ochl
except ImportError:
    from matplotlib.finance import quotes_historical_yahoo as quotes_historical_yahoo_ochl

# Get quotes from Yahoo finance
quotes = quotes_historical_yahoo_ochl("INTC", 
        datetime.date(1994, 4, 5), datetime.date(2015, 7, 3))

# Extract the required values
dates = np.array([quote[0] for quote in quotes], dtype=np.int)
closing_values = np.array([quote[2] for quote in quotes])
volume_of_shares = np.array([quote[5] for quote in quotes])[1:]

# Take diff of closing values and computing rate of change
diff_percentage = 100.0 * np.diff(closing_values) / closing_values[:-1]
dates = dates[1:]

# Stack the percentage diff and volume values column-wise for training
X = np.column_stack([diff_percentage, volume_of_shares])

# Create and train Gaussian HMM 
print "\nTraining HMM...."
model = GaussianHMM(n_components=5, covariance_type="diag", n_iter=1000)
model.fit(X)
Beispiel #36
0
import time
from matplotlib.finance import quotes_historical_yahoo_ochl
from datetime import date
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
import pylab as pl
import numpy as np

start = datetime(2014, 1, 1)
end = datetime(2014, 12, 31)
quotesMSFT = quotes_historical_yahoo_ochl("MSFT", start, end)
quotesINTC = quotes_historical_yahoo_ochl("INTC", start, end)
fields = ["date", "open", "close", "high", "low", "volume"]
# quotesdf = pd.DataFrame(quotes, columns = fields)
# quotesdf = pd.DataFrame(quotes, index = range(1,len(quotes)+1),columns = fields)
list1 = []
for i in range(0, len(quotesMSFT)):
    x = date.fromordinal(int(quotesMSFT[i][0]))
    y = datetime.strftime(x, "%Y-%m-%d")
    list1.append(y)
# print list1
list2 = []
for i in range(0, len(quotesINTC)):
    x = date.fromordinal(int(quotesINTC[i][0]))
    y = datetime.strftime(x, "%Y-%m-%d")
    list2.append(y)
quotesmsftdf = pd.DataFrame(quotesMSFT, index=list1, columns=fields)
print "----------------", type(quotesmsftdf["open"])
quotesmsftdf = quotesmsftdf.drop(["date"], axis=1)
try:
    from matplotlib.finance import quotes_historical_yahoo_ochl
except ImportError:
    # For Matplotlib prior to 1.5.
    from matplotlib.finance import (
        quotes_historical_yahoo as quotes_historical_yahoo_ochl
    )

from hmmlearn.hmm import GaussianHMM


print(__doc__)

###############################################################################
# Get quotes from Yahoo! finance
quotes = quotes_historical_yahoo_ochl(
    "INTC", datetime.date(1995, 1, 1), datetime.date(2012, 1, 6))

# Unpack quotes
dates = np.array([q[0] for q in quotes], dtype=int)
close_v = np.array([q[2] for q in quotes])
volume = np.array([q[5] for q in quotes])[1:]

# Take diff of close value. Note that this makes
# ``len(diff) = len(close_t) - 1``, therefore, other quantities also
# need to be shifted by 1.
diff = np.diff(close_v)
dates = dates[1:]
close_v = close_v[1:]

# Pack diff and volume for training.
X = np.column_stack([diff, volume])
Beispiel #38
0
def myindex():
    "Basic data analysis of one stock or index."
    myquote = input('Enter your interested symbol (example:GE ^NY):')
    # Yahoo Finance 数据接口

    today = date.today()
    start = (today.year - 5, today.month, today.day)
    aapl = quotes_historical_yahoo_ochl(myquote, start, today)
    aapldf = pd.DataFrame(aapl)
    #print(df)

    # 一、 数据清洗
    # 加columns & Index属性
    fields = ['Date', 'Open', 'Close', 'High', 'Low', 'Volume']
    aapldf = pd.DataFrame(aapl, index=range(1, len(aapl) + 1), columns=fields)
    #quotesdf.head()

    #日期格式处理
    #firstday=date.fromordinal(735883)

    #加columns & Index属性 + 改变yahoo财经数据的日期格式
    list1 = []
    for i in range(0, len(aapl)):
        x1 = date.fromordinal(int(aapl[i][0]))  #转化成常规时间
        x2 = date.strftime(x1, '%Y-%m-%d')  #转化成固定格式
        list1.append(x2)
    aapldf = pd.DataFrame(aapl, index=list1, columns=fields)
    aapldf.drop(['Date'], axis=1)

    aapldret = aapldf['Close'] / aapldf['Open'] - 1

    aapldf['DRet'] = pd.Series(aapldret, index=aapldf.index)

    # 二、 plotting画直方图以及概率分布
    bins = ([
        -0.06, -0.05, -0.04, -0.03, -0.02, -0.01, 0, 0.01, 0.02, 0.03, 0.04,
        0.05, 0.06
    ])
    #plt.hist(aapldf['DRet'],bins,normed=1, histtype='bar', facecolor='green', rwidth=1)
    #plt.show()

    # Fit a normal distribution to the data:
    mu, sigma = norm.fit(aapldf.DRet)

    # Plot the histogram and fitted line.
    plt.hist(aapldf.DRet,
             bins,
             normed=True,
             alpha=0.6,
             color='green',
             rwidth=1)

    # Plot the PDF.
    xmin, xmax = plt.xlim()
    x = np.linspace(xmin, xmax, 100)
    p = norm.pdf(x, mu, sigma)
    plt.plot(x, p, 'k', linewidth=1)
    title = "Fit results: $\mu = %.4f$,  $\sigma = %.4f$" % (mu, sigma)
    plt.title(title)
    plt.show()

    # 三、 数据分析
    print('1) Statistics', '\n')

    print(aapldf.describe().T, '\n')

    print('The autocorrelation of daily return is %.4f \n' %
          pd.Series.autocorr(aapldf.DRet, lag=1))

    #统计股票涨价的每个月天数据
    list1 = []
    tmpdf = aapldf[:]
    for i in range(0, len(tmpdf)):
        list1.append(tmpdf.index[i][:7])

    #    list1.append(int(tmpdf.index[i][:4])*100+int(tmpdf.index[i][5:7]))
    tmpdf['YearMon'] = list1

    print('2) Stock increase summary', '\n')

    print('The days when the stock increase in a month:\n')
    print(tmpdf[tmpdf.Close > tmpdf.Open]['YearMon'].value_counts().head(3))

    #统计股票每个月的volatility。
    print('\n')
    print('3) Volatility by month', '\n')
    print('The monthly volatility of the stock returns:\n')

    # 加columns & Index属性
    #voldf=pd.DataFrame(tmpdf.groupby('YearMon')['DRet'].std(),index=range(1,tmpdf['YearMon'].value_counts()+1), fields=['YM','Vol'])
    voldf = tmpdf.groupby('YearMon')['DRet'].std()
    print(voldf.head(3))

    plt.figure()
    voldf.plot(figsize=(6, 3),
               title='Monthly Volatility',
               grid=True,
               legend=True)
    plt.show()
# coding:utf-8

"""
求微软公司(MSFT)2016年第一季股票收盘价平均值
"""
from matplotlib.finance import quotes_historical_yahoo_ochl
from datetime import date
import pandas as pd

today = date.today() # 获取今天日期
start = (today.year-2, today.month, today.day) # 设置开始时间 2年前
quotesMS = quotes_historical_yahoo_ochl('MSFT', start, today) # 获取数据
attributes=['date','open','close','high','low','volume'] # 添加属性值
quotesdfMS = pd.DataFrame(quotesMS, columns= attributes) # 生成DataFrame格式数据
list = []
for i in range(0, len(quotesMS)):
    x = date.fromordinal(int(quotesMS[i][0])) # 将格里高利历转换为时间
    y = date.strftime(x, '%y/%m/%d')
    list.append(y)
quotesdfMS.index = list # 将时间变成数据的索引值
quotesdfMS = quotesdfMS.drop(['date'], axis = 1) # 去掉原有的日期项

list = []
quotesdfMS16 = quotesdfMS['16/01/01':'16/04/01'] # 选择第一季度数据

for i in range(0, len(quotesdfMS16)):
    list.append(int(quotesdfMS16.index[i][3:5])) #get month just like '02'

quotesdfMS16['month'] = list
print(quotesdfMS16.groupby('month').mean().close)
"""
get google's stock exchange data using matplotlib
"""
from matplotlib.finance import quotes_historical_yahoo_ochl
from datetime import date, datetime, timedelta
import pandas as pd

today = date.today()
start = today - timedelta(days=365)
quotes = quotes_historical_yahoo_ochl("GOOG", start, today)
fields = ["date", "open", "close", "high", "low", "volume"]
# convert date format
dates = []
for i in range(0, len(quotes)):
    x = date.fromordinal(int(quotes[i][0]))
    y = datetime.strftime(x, "%Y-%m-%d")
    dates.append(y)
# set dates to index
quotesdf = pd.DataFrame(quotes, index=dates, columns=fields)
quotesdf = quotesdf.drop(["date"], axis=1)
# print
# print quotesdf
# print quotesdf[u'2014-12-02' : u'2014-12-09']
# print quotesdf.loc[1:5, ] # [row, col]
# print quotesdf.loc[:, ['low', 'volume']]
print quotesdf[(quotesdf.index >= u"2015-01-30") & (quotesdf.close > 600)]
# group (example)
# g = tempdf.groupby('month')
# gvolume = g['volume']
# print gvolume.sum()
This example requires an active internet connection since it uses
yahoo finance to get the data for plotting
"""

import matplotlib.pyplot as plt
from matplotlib.finance import quotes_historical_yahoo_ochl
from matplotlib.dates import YearLocator, MonthLocator, DateFormatter
import datetime
date1 = datetime.date(1995, 1, 1)
date2 = datetime.date(2004, 4, 12)

years = YearLocator()   # every year
months = MonthLocator()  # every month
yearsFmt = DateFormatter('%Y')

quotes = quotes_historical_yahoo_ochl(
    'INTC', date1, date2)
if len(quotes) == 0:
    raise SystemExit

dates = [q[0] for q in quotes]
opens = [q[1] for q in quotes]

fig, ax = plt.subplots()
ax.plot_date(dates, opens, '-')

# format the ticks
ax.xaxis.set_major_locator(years)
ax.xaxis.set_major_formatter(yearsFmt)
ax.xaxis.set_minor_locator(months)
ax.autoscale_view()
Created on Wed Jul  6 21:07:16 2016

@author: liulei
"""

'''
时间序列
'''

from matplotlib.finance import quotes_historical_yahoo_ochl
from datetime import date
import pandas as pd

today = date.today()
start = (today.year-1, today.month, today.day)
quotes = quotes_historical_yahoo_ochl('AXP', start , today)
fields = ['date', 'open', 'close', 'high','low','volume']

list1=[]

for i in range(0, len(quotes)):
    x= date.fromordinal(int(quotes[i][0]))
    y = date.strftime(x,'%Y-%m-%d')
    list1.append(y)

quotesdf = pd.DataFrame(quotes, index = list1, columns=fields)
quotesdf = quotesdf.drop(['date'], axis=1)

#print quotesdf

Beispiel #43
0
import time
from matplotlib.finance import quotes_historical_yahoo_ochl
from datetime import date
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
import pylab as pl
import numpy as np

start = datetime(2014, 1, 1)
end = datetime(2014, 12, 31)
quotesMS14 = quotes_historical_yahoo_ochl("MSFT", start, end)
fields = ["date", "open", "close", "high", "low", "volume"]
list1 = []
for i in range(0, len(quotesMS14)):
    x = date.fromordinal(int(quotesMS14[i][0]))
    y = datetime.strftime(x, "%Y-%m-%d")
    list1.append(y)
# print list1
quotesdfMS14 = pd.DataFrame(quotesMS14, index=list1, columns=fields)
# print quotesMS14
listtemp1 = []
for i in range(0, len(quotesdfMS14)):
    temp = time.strptime(quotesdfMS14.index[i], "%Y-%m-%d")
    listtemp1.append(temp.tm_mon)
# print listtemp1
quotesdfMS14["month"] = listtemp1
# print quotesdfMS14
# closemaxINTC = quotesdfMS14.groupby('month').max().close
openMS = quotesdfMS14.groupby("month").mean().open
listopen = []
    From a list of stock symbols and a range of dates, returns a prices by symbols numpy array
    listing the opening prices for the stocks in the given date range.
    """
    #add check to account for missing values in data
    quotes = [list(finance.quotes_historical_yahoo_ochl(symbol, startDate, endDate,asobject = True).open) for symbol in symbols]
    return dict(zip(symbols,quotes))


if __name__ == "__main__":
    disk_engine = create_engine('sqlite:///returnData.db')
    symbols = ['COP', 'AXP', 'RTN', 'BA', 'AAPL', 'PEP', 'NAV', 'GSK', 'MSFT',
       'KMB', 'R', 'SAP', 'GS', 'CL', 'WMT', 'GE', 'SNE', 'PFE', 'AMZN',
       'MAR', 'NVS', 'KO', 'MMM', 'CMCSA', 'SNY', 'IBM', 'CVX', 'WFC',
       'DD', 'CVS', 'TOT', 'CAT', 'CAJ', 'BAC', 'WBA', 'AIG', 'TWX', 'HD',
       'TXN', 'VLO', 'F', 'CVC', 'TM', 'PG', 'LMT', 'HMC', 'GD',
       'HPQ', 'MTU', 'XRX', 'YHOO', 'XOM', 'MCD', 'CSCO',
       'NOC', 'MDLZ','ORCL','INTC','BP','EBAY']
    startDate = dt.datetime(2004, 1, 1)
    endDate = dt.datetime(2014, 1, 1)
    #create price matrix
    prices = symbolsToPriceDict(symbols,startDate,endDate)

    #pdb.set_trace()
    #find list of dates for trading days.  initially we'll assume all assets trade on all days
    #so we pull dates frome the first symbol
    #in the future we need to find all trading dates and fill in missing data for symbols with misssing days
    dates = list(finance.quotes_historical_yahoo_ochl(symbols[0], startDate, endDate,asobject = True).date)
    df = pd.DataFrame(data = prices, columns = symbols, index = dates)
    df.to_sql('returns', disk_engine)

Beispiel #45
0
def stockmarket():
    "Calculate the correlation between stock and market of your choice."

    astock = input('Enter your interested symbol (example:GE):')
    amarket = input('Enter the relative market index (example:^NY):')
    # Yahoo Finance 数据接口

    today = date.today()
    start = (today.year - 2, today.month, today.day)
    astock = quotes_historical_yahoo_ochl(astock, start, today)
    astockdf = pd.DataFrame(astock)
    amarket = quotes_historical_yahoo_ochl(amarket, start, today)
    amarketdf = pd.DataFrame(amarket)
    #print(df)

    # 一、 数据清洗
    # 加columns & Index属性
    fields = ['Date', 'Open', 'Close', 'High', 'Low', 'Volume']
    #加columns & Index属性 + 改变yahoo财经数据的日期格式
    list1 = []
    for i in range(0, len(astock)):
        x1 = date.fromordinal(int(astock[i][0]))  #转化成常规时间
        x2 = date.strftime(x1, '%Y-%m-%d')  #转化成固定格式
        list1.append(x2)
    astockdf = pd.DataFrame(astock, index=list1, columns=fields)
    astockdf.drop(['Date'], axis=1)
    amarketdf = pd.DataFrame(amarket, index=list1, columns=fields)
    amarketdf.drop(['Date'], axis=1)

    astockret = astockdf['Close'] / astockdf['Open'] - 1
    amarketret = amarketdf['Close'] / amarketdf['Open'] - 1

    astockdf['DRet'] = pd.Series(astockret, index=astockdf.index)
    amarketdf['DRet'] = pd.Series(amarketret, index=amarketdf.index)

    s = astockdf['DRet']
    m = amarketdf['DRet']
    print(
        '\n', 'The correlation between Stock and Market is: %.4f.' %
        np.corrcoef(s, m)[0][1])

    # 二、 plotting画直方图以及概率分布
    bins = ([
        -0.06, -0.05, -0.04, -0.03, -0.02, -0.01, 0, 0.01, 0.02, 0.03, 0.04,
        0.05, 0.06
    ])
    #plt.hist(aapldf['DRet'],bins,normed=1, histtype='bar', facecolor='green', rwidth=1)
    #plt.show()

    # Fit a normal distribution to the data:
    astockmu, astocksigma = norm.fit(astockdf.DRet)
    amarketmu, amarketsigma = norm.fit(amarketdf.DRet)

    #Stock
    # Plot the histogram and fitted line.
    plt.hist(astockdf.DRet,
             bins,
             normed=True,
             alpha=0.6,
             color='green',
             rwidth=1)

    # Plot the PDF.
    xmin, xmax = plt.xlim()
    sx = np.linspace(xmin, xmax, 100)
    sp = norm.pdf(sx, astockmu, astocksigma)
    plt.plot(sx, sp, 'k', linewidth=1)
    titles = "Stock fit results: $\mu = %.4f$,  $\sigma = %.4f$" % (
        astockmu, astocksigma)
    plt.title(titles)
    plt.show()

    #Market
    # Fit a normal distribution to the data:
    # Plot the histogram and fitted line.
    plt.hist(amarketdf.DRet,
             bins,
             normed=True,
             alpha=0.6,
             color='blue',
             rwidth=1)

    # Plot the PDF.
    xmin, xmax = plt.xlim()
    mx = np.linspace(xmin, xmax, 100)
    mp = norm.pdf(mx, amarketmu, amarketsigma)
    plt.plot(mx, mp, 'k', linewidth=1)
    titlem = "Market fit results: $\mu = %.4f$,  $\sigma = %.4f$" % (
        amarketmu, amarketsigma)
    plt.title(titlem)
    plt.show()

    # 三、 数据分析

    print('============For Stock============', '\n')
    print('1) Statistics', '\n')

    print(astockdf.describe().T, '\n')

    print('The autocorrelation of daily return is %.4f \n' %
          pd.Series.autocorr(astockdf.DRet, lag=1))

    #统计股票涨价的每个月天数据
    list1 = []
    tmpdf = astockdf[:]
    for i in range(0, len(tmpdf)):
        list1.append(tmpdf.index[i][:7])

    #    list1.append(int(tmpdf.index[i][:4])*100+int(tmpdf.index[i][5:7]))
    tmpdf['YearMon'] = list1

    print('2) Stock increase summary', '\n')

    print('The days when the stock increase in a month:\n')
    print(tmpdf[tmpdf.Close > tmpdf.Open]['YearMon'].value_counts().head(3))

    #统计股票每个月的volatility。
    print('\n')
    print('3) Volatility by month', '\n')
    print('The monthly volatility of the stock returns:\n')

    # 加columns & Index属性
    #voldf=pd.DataFrame(tmpdf.groupby('YearMon')['DRet'].std(),index=range(1,tmpdf['YearMon'].value_counts()+1), fields=['YM','Vol'])
    voldf = tmpdf.groupby('YearMon')['DRet'].std()
    print(voldf.head(3))

    plt.figure()
    voldf.plot(figsize=(6, 3),
               title='Stock Monthly Volatility',
               color='green',
               grid=True,
               legend=True)
    plt.show()

    # 三、 数据分析

    print('============For Market============', '\n')
    print('1) Statistics', '\n')

    print(amarketdf.describe().T, '\n')

    print('The autocorrelation of daily return is %.4f \n' %
          pd.Series.autocorr(amarketdf.DRet, lag=1))

    #统计股票涨价的每个月天数据
    list1 = []
    tmpdf = amarketdf[:]
    for i in range(0, len(tmpdf)):
        list1.append(tmpdf.index[i][:7])

    #    list1.append(int(tmpdf.index[i][:4])*100+int(tmpdf.index[i][5:7]))
    tmpdf['YearMon'] = list1

    print('2) Stock increase summary', '\n')

    print('The days when the stock increase in a month:\n')
    print(tmpdf[tmpdf.Close > tmpdf.Open]['YearMon'].value_counts().head(3))

    #统计股票每个月的volatility。
    print('\n')
    print('3) Volatility by month', '\n')
    print('The monthly volatility of the stock returns:\n')

    # 加columns & Index属性
    #voldf=pd.DataFrame(tmpdf.groupby('YearMon')['DRet'].std(),index=range(1,tmpdf['YearMon'].value_counts()+1), fields=['YM','Vol'])
    voldf = tmpdf.groupby('YearMon')['DRet'].std()
    print(voldf.head(3))

    plt.figure()
    voldf.plot(figsize=(6, 3),
               title='Market Monthly Volatility',
               color='blue',
               grid=True,
               legend=True)
    plt.show()
def predictions_mls(filename, company, refcompany, dt1, dt2, num_of_states,
                    test_num):
    # Generate samples starting in the most likely actual current state

    days_future = 365

    model = joblib.load(filename)

    quotes = quotes_historical_yahoo_ochl(company, dt1, dt2)
    dates = np.array([q[0] for q in quotes], dtype=int)
    close_v = np.array([q[2] for q in quotes])
    volume = np.array([q[5] for q in quotes])[1:]

    # Take diff of close value. Note that this makes
    # len(diff) = len(close_t) - 1 therefore, other quantities also need to be shifted by 1

    diff = np.diff(close_v)
    dates = dates[1:]
    close_v = close_v[1:]

    # Unpack quotes Company2
    quotes2 = quotes_historical_yahoo_ochl(refcompany, dt1, dt2)
    close_v2 = np.array([q[2] for q in quotes2])
    diff2 = np.diff(close_v2)
    close_v2 = close_v2[1:]

    #print (diff2.shape)

    delta = diff2.shape[0] - diff.shape[0]
    delta = abs(delta)

    diff0 = np.pad(diff, (delta, 0), mode='constant', constant_values=0)
    close_v = np.pad(close_v, (delta, 0), mode='constant', constant_values=0)

    #print (diff.shape)
    #print (diff0.shape)

    X = np.column_stack([diff0, diff2])

    # Predict the most likely current internal hidden state
    hidden_probs = model.predict_proba(X)
    lstate_prob = hidden_probs[-1]

    days = int(days_future // total2active)  # 251 open market days in a year
    print(days, strftime("%Y-%m-%d %H:%M:%S", gmtime()))  #debugging purposes

    if (num_of_states > 1):
        startprob = np.zeros(num_of_states)
        startprob[lstate_prob.argmax()] = 1.0
    else:
        startprob = [1.]

    model_2_sample = GaussianHMM(n_components=num_of_states,
                                 covariance_type="full")
    model_2_sample.startprob_ = startprob
    model_2_sample.transmat_ = model.transmat_
    model_2_sample.means_ = model.means_
    model_2_sample.covars_ = model.covars_

    random.seed()
    rseed = random.randrange(0, max_int_value)
    X, Z = model_2_sample.sample(days, random_state=rseed)
    avg_prediction = 0

    allpredictions = np.zeros((test_num, yr))
    for test in range(test_num):
        final_price = close_v[-1]
        j = 0
        for i in range(days):
            if ((final_price + X[i][0]) > 0):
                final_price += X[i][0]
            if (j > 1 and i % 5 == 0):
                allpredictions[test][j] = final_price
                allpredictions[test][j + 1] = final_price
                allpredictions[test][j + 2] = final_price
                j = j + 3
            else:
                allpredictions[test][j] = final_price
                j = j + 1

        while (j < allpredictions.shape[1]):
            allpredictions[test][j] = final_price
            j = j + 1

        rseed = random.randrange(0, max_int_value)
        X, Z = model_2_sample.sample(days, random_state=rseed)

    predictions_year = allpredictions.mean(axis=0)
    print("Avg. Prediction: ", predictions_year[-1])

    fname = "Year_of_predictions_" + str(company) + "_States_" + str(
        num_of_states) + "_adv.csv"
    fname = os.path.join('./sims3', fname)
    np.savetxt(fname, predictions_year, delimiter=",")

    return allpredictions[:, days_future -
                          2], allpredictions[:, (days_future - 2) /
                                             4], allpredictions[:,
                                                                (days_future -
                                                                 2) / 36]
    if len(sys.argv) < 4 :
        print "Usage : python stock_draw.py code contry fontpath."
        print "eg : python stock_draw.py AAPL US D:\Dev\stockdb\ipagp.ttf"
        raise SystemExit

    symbol = sys.argv[1]
    contry = sys.argv[2]
    fontpath = sys.argv[3]

    # (Year, month, day) tuples suffice as args for quotes_historical_yahoo
    date0 = (2016, 1, 1)
    date1 = (2016, 3, 1)
    date2 = (2016, 6, 1)
    start = '2016-01-01'

    quotes = quotes_historical_yahoo_ochl(symbol, date1, date2)
    if len(quotes) == 0:
        raise SystemExit

    ds, opens, closes, highs, lows, volumes = zip(*quotes)
    days = len(closes)

    '''
    idx = pd.Index(closes)
    vales = np.arange(len(idx)).astype(float)
    s = Series(vales, index=idx)

    # SMA EMA
    sma5 = pd.rolling_mean(s, window=5)
    sma5 = sma5.dropna()
    ewma = pd.stats.moments.ewma
Beispiel #48
0
symbols_all, names_all = stock_list.Symbol.values, stock_list.Name.values


print symbols_all

print(type(symbols_all))
# print 'this are the symbols'
# print symbols

# print 'this are the names'
# print names
ticker_index = 1
for symbol in symbols_all:
    # print 'starting...', ticker_index
    quotes = [finance.quotes_historical_yahoo_ochl(symbol, d1, d2, asobject=True)]
    # print 'working...', ticker_index
    # ticker_index = ticker_index + 1

# quotes = [finance.quotes_historical_yahoo_ochl(symbol, d1, d2, asobject = True) for symbol in symbols_all]

# print quotes

open = np.array([q.open for q in quotes]).astype(np.float)
# print open

close = np.array([q.close for q in quotes]).astype(np.float)
# print close

variation = close - open
# print variation
Beispiel #49
0
symbols_all, names_all = stock_list.Symbol.values, stock_list.Name.values

print symbols_all

print(type(symbols_all))
# print 'this are the symbols'
# print symbols

# print 'this are the names'
# print names
ticker_index = 1
for symbol in symbols_all:
    # print 'starting...', ticker_index
    quotes = [
        finance.quotes_historical_yahoo_ochl(symbol, d1, d2, asobject=True)
    ]
    # print 'working...', ticker_index
    # ticker_index = ticker_index + 1

# quotes = [finance.quotes_historical_yahoo_ochl(symbol, d1, d2, asobject = True) for symbol in symbols_all]

# print quotes

open = np.array([q.open for q in quotes]).astype(np.float)
# print open

close = np.array([q.close for q in quotes]).astype(np.float)
# print close

variation = close - open
import datetime
import numpy as np
import pylab as pl
from matplotlib.finance import quotes_historical_yahoo_ochl
from matplotlib.dates import YearLocator, MonthLocator, DateFormatter
from hmmlearn.hmm import GaussianHMM


print(__doc__)

###############################################################################
# Downloading the data
date1 = datetime.date(1995, 1, 1)  # start date
date2 = datetime.date(2012, 1, 6)  # end date
# get quotes from yahoo finance
quotes = quotes_historical_yahoo_ochl("INTC", date1, date2)
if len(quotes) == 0:
    raise SystemExit

# unpack quotes
dates = np.array([q[0] for q in quotes], dtype=int)
close_v = np.array([q[2] for q in quotes])
volume = np.array([q[5] for q in quotes])[1:]

# take diff of close value
# this makes len(diff) = len(close_t) - 1
# therefore, others quantity also need to be shifted
diff = close_v[1:] - close_v[:-1]
dates = dates[1:]
close_v = close_v[1:]
Beispiel #51
0
Created on Wed Dec 02 11:25:23 2015

@author: welion
"""


#Filename:quotes.py
#Get Dow Jones Industrial Avarage from Yahoo with quotes_historical_yahoo
#!/bin/python

from matplotlib.finance import quotes_historical_yahoo_ochl
from datetime import date
from datetime import datetime
import pandas as pd

today = date.today()
start = (today.year,today.month-3,today.day)
quotes = quotes_historical_yahoo_ochl('AXP',start,today,asobject=False)
fields = ['date','open','close','high','low','volume']
timelist=[]
for i in range(0,len(quotes)):
    x = date.fromordinal(int(quotes[i][0]))
    y = datetime.strftime(x,'%Y%m%d')
    timelist.append(y)
quotesdf = pd.DataFrame(quotes,index=timelist,columns=fields)
quotesdf = quotesdf.drop(['date'],axis=1)

print quotesdf


Beispiel #52
0
"""
数据的简单处理与筛选

"""
from matplotlib.finance import quotes_historical_yahoo_ochl  # 注matplotlib包里已经没有了quotes_historical_yahoo方法了,改为quotes_historical_yahoo_ochl
from datetime import date
from datetime import datetime
import pandas as pd 
import numpy as np 
import time


today = date.today()
start = (today.year - 5, today.month, today.day)
quotes = quotes_historical_yahoo_ochl('AXP', start, today) #美国运通公司最近一年股票代码
fields = ['date', 'open', 'close', 'high', 'low', 'volume']
list1 = []
for i in range(0,len(quotes)):
	x = date.fromordinal(int(quotes[i][0]))
	y = datetime.strftime(x, "%Y-%m-%d")
	list1.append(y)

qutoesdf = pd.DataFrame(quotes, index=list1, columns=fields) # 利用index属性可以将索引改变。 日期为格里高利时间,用函数改变
qutoesdf = qutoesdf.drop(['date'], axis = 1)
# print qutoesdf

#求平均值
print qutoesdf.mean(columns='close')
#求开盘价大于80的成交量
Beispiel #53
0
def hist_stock(symbol, start_date, end_date):
    #  returns array of tuples (date, year, month, day, d, open, close, high, low, volume, adjusted_close)
    stock = [(x[0], x[6]) for x in fin.quotes_historical_yahoo_ochl(
        symbol, start_date, end_date, asobject=True)]
    return stock
Beispiel #54
0
from matplotlib.finance import quotes_historical_yahoo_ochl

# retrieve symbol lists
markets = ['amex','nasdaq','nyse','otcbb']
symbols = []
for m in markets:
    fname = 'symbols-' + m + '-unique.txt'
    with open(fname, 'r') as f:
        symbols += f.read().splitlines()

print len(symbols), 'symbols listed'

exit
# set date range
date1 = date(1984, 1, 1)
date2 = date(2014, 12, 31)
# date2 = date.today()
# date1 = date2 - timedelta(days=14)

# retrieve all data
for symbol in symbols:
    try:
        data = quotes_historical_yahoo_ochl(symbol, date1, date2)
        if None != data and len(data) > 0:
            print symbol, len(data)
            with open('csv/' + symbol + '.csv', 'w') as f:
                writer = csv.writer(f)
                writer.writerows(data)
    except:
        True
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import time
from matplotlib.finance import quotes_historical_yahoo_ochl
from datetime import date
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
__author__ = 'wangjj'
__mtime__ = '20161022下午 11:39'
today = date.today()
start = (today.year - 1, today.month, today.day)
quotes = quotes_historical_yahoo_ochl('KO', start, today)
fields = ['date', 'open', 'close', 'high', 'low', 'volume']
list1 = []
for i in range(0, len(quotes)):
    x = date.fromordinal(int(quotes[i][0]))
    y = datetime.strftime(x, '%Y-%m-%d')
    list1.append(y)
# print(list1)
quoteskodf = pd.DataFrame(quotes, index=list1, columns=fields)
quoteskodf = quoteskodf.drop(['date'], axis=1)
# print(quotesdf)
listtemp = []
for i in range(0, len(quoteskodf)):
    temp = time.strptime(quoteskodf.index[i], "%Y-%m-%d")
    listtemp.append(temp.tm_mon)
print(listtemp)  # “print listtemp” in Python 2.x
tempkodf = quoteskodf.copy()
tempkodf['month'] = listtemp
closeMeansKO = tempkodf.groupby('month').mean().close
Beispiel #56
0
def predictions_mls(filename, company, dt1, dt2,num_of_states,test_num, days_future, tr_prob):
# Generate samples starting in the most likely actual current state
       
    model = joblib.load(filename) 
    
    rp = getrealprice_series(company, dt2,days_future)
    days = rp.size
    
    quotes = quotes_historical_yahoo_ochl(company, dt1, dt2) 
    dates = np.array([q[0] for q in quotes], dtype=int)
    close_v = np.array([q[2] for q in quotes])


    # Take diff of close value and shift by 1    
    diff = np.diff(close_v)

    dates = dates[1:]
    close_v = close_v[1:]    
    
    X = np.column_stack([diff])

    # Predict the most likely current internal hidden state
    hidden_probs = model.predict_proba(X)
    lstate_prob = hidden_probs[-1] 
    

    
    # If more than one state, make sure we start at the most likely current state
    if (num_of_states>1):
        startprob = np.zeros(num_of_states)
        startprob[lstate_prob.argmax()] = 1.0
    else:
        startprob = [ 1.]

    # Prepare the model for sampling
    model_2_sample = GaussianHMM(n_components=num_of_states, covariance_type="full")
    model_2_sample.startprob_ = startprob
    model_2_sample.transmat_ = model.transmat_
    model_2_sample.means_ = model.means_
    model_2_sample.covars_ = model.covars_

    #Make sure to randomize the samples
    random.seed()
    rseed = random.randrange(0,max_int_value)
    X, Z = model_2_sample.sample(days, random_state=rseed)
    
    # Make predictions
    avg_prediction = 0 
    allpredictions = np.zeros((test_num, days)) #added two in case there was a weekend at the end
    
    for test in range(test_num): 
        
        final_price = rp[0] #start at day 0 of the real prices
        allpredictions[test][0] = final_price   #day 0 prediction same as current real price
        
        for i in range(1, days):
            final_price += X[i][0]

            allpredictions[test][i] = final_price
            
        rseed = random.randrange(0,max_int_value)
        X, Z = model_2_sample.sample(days, random_state=rseed)



    predictions = allpredictions.mean(axis=0)
    predictions_var = allpredictions.var(axis=0)
    predictions_median =  np.median(allpredictions, axis=0)    

    
    errors = predictions - rp 
    tr_prob_vector = np.full((predictions.size),tr_prob)
    
    data = [predictions,rp, errors, tr_prob_vector, 
            predictions_var,predictions_median]

    err_final = errors[-1]
    
    print ("Start Price: ",rp[0],"Avg. Prediction: ",str(num_of_states),"states:" ,
           predictions[-1]," Real Price:", rp[-1]) 
    print (" Error end of predictions:", err_final,"Delta Start-End:", rp[0]-rp[-1],"\n")
    #print ("Real prices:", rp)
    #print ("Predicted prices", predictions)
    
    fname = "Predictions_"+str(company)+"_States_"+str(num_of_states)+"_stats.csv"
    fname = os.path.join('./sims_final', fname)
    np.savetxt(fname, data, delimiter=",")

    
    return
Beispiel #57
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import time
from matplotlib.finance import quotes_historical_yahoo_ochl
from datetime import date
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
import ch
ch.set_ch()
__author__ = 'wangjj'
__mtime__ = '20161023下午 10:11'
start = datetime(2015, 1, 1)
end = datetime(2015, 12, 31)
quotes = quotes_historical_yahoo_ochl('MSFT', start, end)
fields = ['date', 'open', 'close', 'high', 'low', 'volume']
list1 = []
list2 = []
for i in range(0, len(quotes)):
    x = date.fromordinal(int(quotes[i][0]))
    y = datetime.strftime(x, '%y/%m/%d')
    list1.append(y)
    list2.append(x.month)
# print(list1)
quotes_of_MSFT = pd.DataFrame(quotes, index=list1, columns=fields)
quotes_of_MSFT = quotes_of_MSFT.drop(['date'], axis=1)
quotes_of_MSFT['month'] = list2
# print(quotes_of_MSFT)
open_MSFT = quotes_of_MSFT.groupby('month').mean().open
print open_MSFT, open_MSFT.index
list_open = []
  'AMZN': 'Amazon',
  'KO':   'Coca Cola',
  'PEP':  'Pepsi',
  'MCD':  'Mc Donalds',
  'YUM':  'Taco Bell',
  'CMG':  'Chipotle Mexican Grill',
  'WMT':  'Wal-Mart',
  'HD':   'Home Depot',
  'CVS':  'CVS'
}

symbols, names = np.array(list(symbol_dict.items())).T
print('_'*140 + "\n>>> quotes:")
for symbol in symbols:
  try:
    quote = finance.quotes_historical_yahoo_ochl(symbol, d1, d2, asobject=True)
    print("\n%s %s:\n%s" % (symbol, symbol_dict[symbol], quote))
  except Exception as e:
    # this is usually a 404 error, coz of the date range (ie a stock may not have existed),
    # so we could enter a zeroed entry ???
print('_'*140 + "\n")

cls1 = datetime.datetime(2009, 1, 1)
cls2 = datetime.datetime(2015, 1, 1)
symbol = 'YUM' # Taco Bell
# symbol = 'CMG' # Chipotle Mexican Grill
quote = finance.quotes_historical_yahoo_ochl(symbol, cls1, cls2, asobject=True)
print("\n%s %s\ntype(quote)=%s=\nquote:\n%s" % (symbol, symbol_dict[symbol], type(quote), quote))


# quotes = [finance.quotes_historical_yahoo(symbol, d1, d2, asobject=True) for symbol in symbols]