Ejemplo n.º 1
0
    def run(self):
        """Collects data from Yahoo finance and preprocesses them."""

        # Get reference dates from SP500
        df_ref = DataReader('SPY', 'yahoo', self.start, self.end)['Adj Close']
        df_ref.sort_index(inplace=True)

        if 'SPY' in assets:
            self.df = df_ref
            self.df.rename({'AdjClose': 'SPY'})
        else:
            self.df = pd.DataFrame(index=df_ref.index)

        # Retrieve AdjClose price for other assets
        for asset in self.assets:
            df_asset = DataReader(asset, 'yahoo', self.start,
                                  self.end)['Adj Close']
            df_asset.rename(asset, inplace=True)
            self.df = self.df.join(df_asset, how='left')

        # Fill NaN
        self.df.fillna(method='ffill', inplace=True)
        self.df.fillna(method='bfill', inplace=True)

        # Compute assets simple returns
        self.df = (self.df / self.df.shift(1) - 1.0).ix[1:, :]

        # Reset indices
        self.df.reset_index(drop=True, inplace=True)
Ejemplo n.º 2
0
def Econ_env(YYYY, m, dd):	
	start_date = datetime.datetime(YYYY, m, dd)
	GDP = DataReader('GDP', "fred", start=start_date)
	sp500 = DataReader('^GSPC', "yahoo", start=start_date)

	Array = DataFrame({'S&P':sp500["Adj Close"]})

	return Array
Ejemplo n.º 3
0
def _load_raw_yahoo_data(indexes=None, stocks=None, start=None, end=None):
    """Load closing prices from yahoo finance.

    :Optional:
        indexes : dict (Default: {'SPX': '^GSPC'})
            Financial indexes to load.
        stocks : list (Default: ['AAPL', 'GE', 'IBM', 'MSFT',
                                 'XOM', 'AA', 'JNJ', 'PEP', 'KO'])
            Stock closing prices to load.
        start : datetime (Default: datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc))
            Retrieve prices from start date on.
        end : datetime (Default: datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc))
            Retrieve prices until end date.

    :Note:
        This is based on code presented in a talk by Wes McKinney:
        http://wesmckinney.com/files/20111017/notebook_output.pdf
    """

    assert indexes is not None or stocks is not None, """
must specify stocks or indexes"""

    if start is None:
        start = pd.datetime(1990, 1, 1, 0, 0, 0, 0, pytz.utc)

    if start is not None and end is not None:
        assert start < end, "start date is later than end date."

    data = OrderedDict()

    if stocks is not None:
        for stock in stocks:
            print(stock)
            stock_pathsafe = stock.replace(os.path.sep, '--')
            cache_filename = "{stock}-{start}-{end}.csv".format(
                stock=stock_pathsafe,
                start=start,
                end=end).replace(':', '-')
            cache_filepath = get_cache_filepath(cache_filename)
            if os.path.exists(cache_filepath):
                stkd = pd.DataFrame.from_csv(cache_filepath)
            else:
                stkd = DataReader(stock, 'yahoo', start, end).sort_index()
                stkd.to_csv(cache_filepath)
            data[stock] = stkd

    if indexes is not None:
        for name, ticker in iteritems(indexes):
            print(name)
            stkd = DataReader(ticker, 'yahoo', start, end).sort_index()
            data[name] = stkd

    return data
Ejemplo n.º 4
0
def create_lagged_series(symbol, start_date, end_date, lags=5):
    """
    这个函数创建一个pandas的DataFrame,存储某个来自于Yahoo财经的股票
    的以调整收盘价计算的收益,以及一系列滞后的收益,还包括交易量以及某一天
    变动的方向
    """
    ts = DataReader(symbol, "yahoo", start_date - datetime.timedelta(days=365),
                    end_date)

    tslag = pd.DataFrame(index=ts.index)
    tslag["Today"] = ts["Adj Close"]
    tslag["Volume"] = ts["Volume"]

    for i in range(0, lags):
        tslag["Lag%s" % str(i + 1)] = ts["Adj Close"].shift(i + 1)

    tsret = pd.DataFrame(index=tslag.index)
    tsret["Volume"] = tslag["Volume"]
    tsret["Today"] = tslag["Today"].pct_change() * 100.0

    for i, x in enumerate(tsret["Today"]):
        if (abs(x) < 0.0001):
            tsret["Today"][i] = 0.0001

    for i in range(0, lags):
        tsret["Lag%s" %
              str(i + 1)] = tslag["Lag%s" % str(i + 1)].pct_change() * 100.0

    tsret["Direction"] = np.sign(tsret["Today"])
    tsret = tsret[tsret.index >= start_date]
    return tsret
Ejemplo n.º 5
0
 def get_prices_df(self,ticker, date_start, date_end):
     try:
         cotation_data = DataReader(ticker,  "yahoo", date_start, date_end)
         cotation_data = cotation_data[cotation_data.Volume != 0] # on ne prend pas les jours feriés p.ex 01/01
     except Exception as e:
         raise ErrorInternetConnexion('yahoo DataReader',  e)
     return cotation_data
Ejemplo n.º 6
0
def stockhistorynobackfilltodataframeusingcache(symbol, fromdate, todate):
    print('--------------------------')
    print('Initialized pullprices.stockhistorydailytodataframeusingcache')
    import pandas as pd
    #import numpy as np
    from pandas.io.data import DataReader
    #from datetime import datetime, timedelta

    import config
    mycachefolder = config.mycachefolder
    import mytools
    mytools.general().make_sure_path_exists(mycachefolder)

    cachedfilepathname = mycachefolder + '\\stockhistorynobackfill ' + symbol + ' ' + fromdate + ' ' + todate + '.csv'
    import os
    if os.path.isfile(cachedfilepathname):

        print('   Found cached file:  ' + cachedfilepathname)
        df_hist = pd.read_csv(cachedfilepathname, index_col=0)
    else:
        print('   Getting new file:' + cachedfilepathname)
        df_hist = DataReader(symbol, "yahoo", fromdate, todate)
        df_hist.to_csv(cachedfilepathname,
                       columns=('Open', 'High', 'Low', 'Close', 'Volume',
                                'Adj Close'))

    return df_hist
Ejemplo n.º 7
0
def stockhistory(symbol, fromdate, todate):
    from pandas.io.data import DataReader
    #from datetime import datetime
    #dfromdate = fromdate.strftime('%b%d')
    #datetime(2000,1,1), datetime(2012,1,1)
    hist = DataReader(symbol, "yahoo", fromdate, todate)
    return hist
Ejemplo n.º 8
0
 def test_read_famafrench(self):
     for name in ("F-F_Research_Data_Factors",
                  "F-F_Research_Data_Factors_weekly", "6_Portfolios_2x3",
                  "F-F_ST_Reversal_Factor"):
         ff = DataReader(name, "famafrench")
         assert ff
         assert isinstance(ff, dict)
Ejemplo n.º 9
0
 def test_read_famafrench(self):
     for name in ("F-F_Research_Data_Factors",
                  "F-F_Research_Data_Factors_weekly", "6_Portfolios_2x3",
                  "F-F_ST_Reversal_Factor", "F-F_Momentum_Factor"):
         ff = DataReader(name, "famafrench")
         self.assertTrue(ff is not None)
         self.assertIsInstance(ff, dict)
Ejemplo n.º 10
0
def ADF(ticker, start, end):
    print('ADF')

    stock = DataReader(ticker, "yahoo", start, end)

    result = ts.adfuller(stock['Adj Close'], 1)
    print(result)
    print('')

    test = result[0]
    crit = result[4]
    one = crit['1%']
    five = crit['5%']
    ten = crit['10%']

    if test < one:
        print('Lesser than 1%')
        print('-----------------------------------------')
        return stock

    if test < five:
        print('Lesser than 5%')
        print('-----------------------------------------')
        return stock

    if test < ten:
        print('Lesser than 10%')
        print('-----------------------------------------')
        return stock

    print('Cannot reject Null Hypothesis')
    print('-----------------------------------------')
    return stock
Ejemplo n.º 11
0
def fetch_timeseries(symbol, dir_name='data', use_cache=True):
    """
    Read time series data. Use cached version if it exists and
    use_cache is True, otherwise retrive, cache, then read.
    """
    base_dir = ''
    try:
        conf = pf.read_config()
        base_dir = conf['base_dir']
    except:
        pass
    finally:
        dir_name = os.path.join(base_dir, dir_name)

    if not os.path.exists(dir_name):
        os.makedirs(dir_name)

    timeseries_cache = os.path.join(dir_name, symbol + '.csv')

    if os.path.isfile(timeseries_cache) and use_cache:
        pass
    else:
        ts = DataReader(symbol, 'yahoo', start=datetime.datetime(1900, 1, 1))
        ts.to_csv(timeseries_cache, encoding='utf-8')

    ts = pd.read_csv(timeseries_cache, index_col='Date', parse_dates=True)
    ts = _adj_column_names(ts)
    return ts
Ejemplo n.º 12
0
def historical_pandas_yahoo(symbol, source='yahoo', start=None, end=None):
    '''
    Fetch from yahoo! finance historical quotes
    '''
    #NOTE Panel for multiple symbols ?
    #NOTE Adj Close column  name not cool (a space)
    return DataReader(symbol, source, start=start, end=end)
Ejemplo n.º 13
0
    def importData(self):
        """
        Import (New) Data from Yahoo.
        """

        start = self._getLatestDate()
        end = self._getTodaysDate()

        Logger.log(logging.INFO, "Loading Data", {"scope":__name__, "tickerCode":self._tickerCode, "start":str(start), "end":str(end)})
        self._data = DataReader(self._tickerCode, "yahoo", start, end)

        self._data['Code'] = self._tickerCode

        for item in ['Open', 'High', 'Low']:
            self._data[item] = self._data[item] * self._data['Adj Close'] / self._data['Close']

        self._data.drop('Close', axis=1, inplace=True)
        self._data.rename(columns={'Adj Close':'Close'}, inplace=True)
        self._data['Volume'] = self._data['Volume'].astype(float)

        connection = sqlite3.connect(pyswing.database.pySwingDatabase)

        query = "insert or replace into Equities (Date, Open, High, Low, Volume, Close, Code) values (?,?,?,?,?,?,?)"
        connection.executemany(query, self._data.to_records(index=True))
        connection.commit()

        connection.close()
def peak_begin_dates(start="01/01/1972", end=datetime.now()):
    """
    Use the fred dataset `USRECQ` to determine the beginning of the
    peaks before all recessions between dates start and end

    Parameters
    ----------
    start : string or datetime.datetime, optional(default='01/01/1972')
        A string or other acceptable pandas date identifier that marks
        the beginning of the window for which we will search for starts
        of peaks

    end : string or datetime.datetime, optional(default=datetime.now())
        The ending date of the search window

    Returns
    -------
    rec_startind : pd.DatetimeIndex
        A pandas DatetimeIndex representing the starting points of each
        "peak" from start to end
    """
    # Get quarterly recession dates from FRED
    rec_dates = DataReader("USRECQ", "fred", start=start)
    one_vals = np.where(rec_dates == 1)[0]
    rec_start = [one_vals[0]]

    # Find the beginning of the recession dates (Don't include ones that
    # begin within three years of a previous one -- hence the `+12`)
    for d in one_vals:
        if d > max(rec_start) + 12:
            rec_start.append(d)

    rec_startind = rec_dates.index[rec_start]

    return rec_startind
Ejemplo n.º 15
0
def historical_volatility(sym, days):
    "Return the annualized stddev of daily log returns of `sym`."
    try:
        quotes = DataReader(sym, 'yahoo')['Close'][-days:]
    except Exception, e:
        print "Error getting data for symbol '{}'.\n".format(sym), e
        return None, None
Ejemplo n.º 16
0
def get_data(stock, starttime, endtime):
    ibm = DataReader(stock,  'yahoo', starttime, endtime)
    #print(ibm['Adj Close'])
    
    daily_returns = deque(maxlen=c.normalize_std_len)
    daily_ret_arr = []
    size = len(ibm['Adj Close'])
    return_array = []

    i=0
    lastAc = ibm['Adj Close'][0]
    for stock in ibm['Adj Close']:
            return_array.append(stock)
            i+=1
            #for rec_date in (c.start + timedelta(days=n) for n in xrange((c.end-c.start).days)):
            #idx = next(i for i,d in enumerate(segment_start_dates) if rec_date >= d)
            try:
                    #d = rec_date.strftime("%Y-%m-%d")
                    ac = stock
                    daily_return = (ac - lastAc)/lastAc
                    #if len(daily_returns) == daily_returns.maxlen:
                    #    seq[idx].append(daily_return/np.std(daily_returns))
                    daily_returns.append(daily_return*scale)
                    daily_ret_arr.append(daily_return*scale)
                    lastAc = ac
                    #print "---"
                    #print stock 
                    #print daily_return
            except KeyError:
                    pass

    print "Records found:" + str(len(daily_ret_arr))
    return daily_ret_arr, return_array
Ejemplo n.º 17
0
    def set_source(self, source, tickers, start, end):
        prices = pd.DataFrame()
        counter = 0.
        for ticker in tickers:
            try:
                self._logger.info('Loading ticker %s' %
                                  (counter / len(tickers)))
                prices[ticker] = DataReader(ticker, source, start,
                                            end).loc[:, 'Close']
            except Exception as e:
                self._logger.error(e)
                pass
            counter += 1

        events = []
        for row in prices.iterrows():
            timestamp = row[0]
            series = row[1]
            vals = series.values
            indx = series.index
            for k in np.random.choice(len(vals), replace=False,
                                      size=len(vals)):  # Shuffle!
                if np.isfinite(vals[k]):
                    events.append((timestamp, indx[k], vals[k]))

        self._source = events

        self._logger.info('Loaded data!')
Ejemplo n.º 18
0
def stockhistorybackfilledtodictionary(symbol, fromdate, todate):

    from pandas.io.data import DataReader
    from datetime import datetime, timedelta

    hist = DataReader(symbol, "yahoo", fromdate, todate)

    date_format = "%Y-%m-%d"
    d = datetime.strptime(fromdate, date_format)
    delta = timedelta(days=1)
    last_adjclose = 'NaN'

    dictAdjClose = {}

    while d <= datetime.strptime(todate, date_format):
        #print(d.strftime(date_format))
        d_string = d.strftime(date_format)
        if d_string in hist.index:
            last_adjclose = hist.ix[d_string]['Adj Close']
            print(d_string, last_adjclose)
        else:
            print(d_string, 'nothing', last_adjclose)
        dictAdjClose[d_string] = [('AdjClose', last_adjclose)]
        d += delta

    return dictAdjClose
Ejemplo n.º 19
0
def main():
    '''
    1. The data from Yahoo! Finance is not grabbed by calling url apis, is by using Pandas APIs.
    2. This program is to get TWSE data only, if wants OTC data, need to modify code.
    '''
    #Setup figure
    stock_fig = plt.figure()
    stock_plt = plt.subplot2grid((1, 1), (0, 0), colspan=1)
    stock_title = "{} day price".format(stock_num)
    plt.suptitle(stock_title)
    startday = dtime.date(2000, 1, 1)

    # Add ".TW" to tell yahoo!Finance to query TWSE stock data.
    # If want to query OTC, please add ".TWO"
    stock_str = "{}.TW".format(stock_num)
    #print stock_str

    #about how the DataReader() works, please refer to data.py from pandas
    try:
        stock_data = DataReader(stock_str, 'yahoo', startday)
        #Clear the current axes
        stock_plt.cla()
        #Turn the axes grids on
        stock_plt.grid(True)
        #plot date and price
        stock_plt.plot(stock_data.index, stock_data['Close'])
        #show
        plt.show()
    except:
        exit("Error happened!!\nTry: python TwanStkEx1.py 2330")
Ejemplo n.º 20
0
 def DownloadStocks(self, startingDate, endDate):
     for stock in self._names:
         print("Getting data from {0}...".format(stock))
         stockData = DataReader(stock, "google", startingDate, endDate)
         self._columns = stockData.columns
         print("    Number of lines:{0}".format(stockData.shape[0]))
         self._data.append(stockData)
     return self._data
Ejemplo n.º 21
0
def load_from_yahoo(indexes=None, stocks=None, start=None, end=None):
    """Load closing prices from yahoo finance.

    :Optional:
        indexes : dict (Default: {'SPX': '^GSPC'})
            Financial indexes to load.
        stocks : list (Default: ['AAPL', 'GE', 'IBM', 'MSFT',
                                 'XOM', 'AA', 'JNJ', 'PEP', 'KO'])
            Stock closing prices to load.
        start : datetime (Default: datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc))
            Retrieve prices from start date on.
        end : datetime (Default: datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc))
            Retrieve prices until end date.

    :Note:
        This is based on code presented in a talk by Wes McKinney:
        http://wesmckinney.com/files/20111017/notebook_output.pdf
    """

    if indexes is None:
        indexes = {'SPX': '^GSPC'}
    if stocks is None:
        stocks = ['AAPL', 'GE', 'IBM', 'MSFT', 'XOM', 'AA', 'JNJ', 'PEP', 'KO']
    if start is None:
        start = pd.datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc)
    if end is None:
        end = pd.datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc)

    assert start < end, "start date is later than end date."

    data = OrderedDict()

    for stock in stocks:
        print stock
        stkd = DataReader(stock, 'yahoo', start, end).sort_index()
        data[stock] = stkd

    for name, ticker in indexes.iteritems():
        print name
        stkd = DataReader(ticker, 'yahoo', start, end).sort_index()
        data[name] = stkd

    df = pd.DataFrame({key: d['Close'] for key, d in data.iteritems()})
    df.index = df.index.tz_localize(pytz.utc)

    return df
Ejemplo n.º 22
0
def get_stock_history(stock):
    prices = None
    try:
        start_date = date.today() - timedelta(days=365)
        prices = DataReader(stock, "yahoo", start=start_date)
    except (HTTPError, BadStatusLine):
        pass
    return prices
Ejemplo n.º 23
0
def downloadStock(ticker, dataSource, start, end):
    gtemp = pd.DataFrame()
    try:
        gtemp = DataReader(ticker, dataSource, start, end)
        print ticker
    except:
        pass
    return gtemp
Ejemplo n.º 24
0
 def test_read_famafrench(self):
     raise nose.SkipTest('buggy as of 2/14/16; maybe a data revision?')
     for name in ("F-F_Research_Data_Factors",
                  "F-F_Research_Data_Factors_weekly", "6_Portfolios_2x3",
                  "F-F_ST_Reversal_Factor", "F-F_Momentum_Factor"):
         ff = DataReader(name, "famafrench")
         self.assertTrue(ff is not None)
         self.assertIsInstance(ff, dict)
Ejemplo n.º 25
0
def factors_df(end=True):
    ff = pd.DataFrame(DataReader("F-F_Research_Data_Factors", "famafrench")[0])
    ff.columns = ['Mkt_rf', 'SMB', 'HML', 'rf']

    ff.index = [dt.datetime(d / 100, d % 100, 1) for d in ff.index]
    if end:
        ff.index = ff.index.to_period('M').to_timestamp('M')

    return ff
Ejemplo n.º 26
0
    def get(self, ticker):
        ''' Retrieves EOD data from cache or the web.

        :param ticker: The stock symbol, such as `AAPL`.

        :returns: The data as a pandas `DataFrame`.
        '''
        start = datetime(1900, 1, 1, 0, 0, 0, 0)
        return DataReader(ticker, data_source=self.data_source, start=start)
Ejemplo n.º 27
0
 def get_history(self, stock):
     print "Retrieving data for %s" % stock
     prices = None
     try:
         start_date = datetime.today() - timedelta(days=365)
         prices = DataReader(stock, "yahoo", start=start_date)
     except (HTTPError, BadStatusLine):
         pass
     return prices
Ejemplo n.º 28
0
    def data_StockView_import(self):
        self.data = DataReader("GOOGL", "google", self.start_date,
                               self.end_date)

        source = urllib2.urlopen(
            'http://hopey.netfonds.no/posdump.php?date=20140530&paper=AAPL.O&csv_format=txt'
        )
        data = pandas.read_table(source)
        print data
Ejemplo n.º 29
0
def stocks():
  tickers = ['AAPL', 'GOOG', 'MSFT', 'AMZN']
  end = datetime.now()
  start = datetime(end.year-1, end.month, end.day)
  for ticker in tickers:
    globals()[ticker] = DataReader(ticker, 'yahoo', start, end)

  build_stock_analyses()
  return render_template('stocks.html', AAPL=AAPL, GOOG=GOOG, MSFT=MSFT, AMZN=AMZN)
Ejemplo n.º 30
0
def _load_raw_yahoo_data(indexes=None, stocks=None, start=None, end=None):
    """Load closing prices from yahoo finance.

    :Optional:
        indexes : dict (Default: {'SPX': '^GSPC'})
            Financial indexes to load.
        stocks : list (Default: ['AAPL', 'GE', 'IBM', 'MSFT',
                                 'XOM', 'AA', 'JNJ', 'PEP', 'KO'])
            Stock closing prices to load.
        start : datetime (Default: datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc))
            Retrieve prices from start date on.
        end : datetime (Default: datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc))
            Retrieve prices until end date.

    :Note:
        This is based on code presented in a talk by Wes McKinney:
        http://wesmckinney.com/files/20111017/notebook_output.pdf
    """

    assert indexes is not None or stocks is not None, """
must specify stocks or indexes"""

    if start is None:
        start = pd.datetime(1990, 1, 1, 0, 0, 0, 0, pytz.utc)

    if not start is None and not end is None:
        assert start < end, "start date is later than end date."

    data = OrderedDict()

    if stocks is not None:
        for stock in stocks:
            print stock
            stkd = DataReader(stock, 'yahoo', start, end).sort_index()
            data[stock] = stkd

    if indexes is not None:
        for name, ticker in indexes.iteritems():
            print name
            stkd = DataReader(ticker, 'yahoo', start, end).sort_index()
            data[name] = stkd

    return data