Example #1
0
    def importData(self):
        """
        Import (New) Data from Yahoo.
        """

        start = self._getLatestDate()
        end = self._getTodaysDate()

        Logger.log(logging.INFO, "Loading Data", {"scope":__name__, "tickerCode":self._tickerCode, "start":str(start), "end":str(end)})
        self._data = DataReader(self._tickerCode, "yahoo", start, end)

        self._data['Code'] = self._tickerCode

        for item in ['Open', 'High', 'Low']:
            self._data[item] = self._data[item] * self._data['Adj Close'] / self._data['Close']

        self._data.drop('Close', axis=1, inplace=True)
        self._data.rename(columns={'Adj Close':'Close'}, inplace=True)
        self._data['Volume'] = self._data['Volume'].astype(float)

        connection = sqlite3.connect(pyswing.database.pySwingDatabase)

        query = "insert or replace into Equities (Date, Open, High, Low, Volume, Close, Code) values (?,?,?,?,?,?,?)"
        connection.executemany(query, self._data.to_records(index=True))
        connection.commit()

        connection.close()
Example #2
0
def stockhistorynobackfilltodataframeusingcache(symbol, fromdate, todate):
    print('--------------------------')
    print('Initialized pullprices.stockhistorydailytodataframeusingcache')
    import pandas as pd
    #import numpy as np
    from pandas.io.data import DataReader
    #from datetime import datetime, timedelta

    import config
    mycachefolder = config.mycachefolder
    import mytools
    mytools.general().make_sure_path_exists(mycachefolder)

    cachedfilepathname = mycachefolder + '\\stockhistorynobackfill ' + symbol + ' ' + fromdate + ' ' + todate + '.csv'
    import os
    if os.path.isfile(cachedfilepathname):

        print('   Found cached file:  ' + cachedfilepathname)
        df_hist = pd.read_csv(cachedfilepathname, index_col=0)
    else:
        print('   Getting new file:' + cachedfilepathname)
        df_hist = DataReader(symbol, "yahoo", fromdate, todate)
        df_hist.to_csv(cachedfilepathname,
                       columns=('Open', 'High', 'Low', 'Close', 'Volume',
                                'Adj Close'))

    return df_hist
Example #3
0
 def readData(self, lookupTicker, source, start, end):
     
     '''Read the data - assumes start and end are datetime.date objects'''
     
     try:  
         lookupTicker = str(lookupTicker)
         if source == 'Quandl':
             #use Quandl reader
             start = str(start)
             end = str(end)
             data = Quandl.get(lookupTicker,
                               authtoken = self.quandlAuthToken,
                               trim_start = start, 
                               trim_end= end)
         else:
             #use pandas.io DataReader
             data = DataReader(lookupTicker, source , start, end)
             
         data = data.reset_index()
         logging.info("Read ticker {}".format(lookupTicker))
     except:
         logging.error("importData: Can't read ticker {}".format(lookupTicker))
         raise
     else:
         return data
Example #4
0
def main():
    #    db = psql.get_db()
    #    product_codes = db.prepare('''SELECT p.code AS code, p.id as id
    #        FROM products p LEFT JOIN companies c ON p.company_id = c.id
    #        WHERE c.sector IS NOT NULL
    #        and p.id < 9354
    #        ORDER BY p.id''')()

    product_codes = get_components_yahoo('^DJI').index
    total = len(product_codes)
    cur = 0
    products=[]

    logger.info('Start downloading of %d products.', len(product_codes))
    for code in product_codes:
        cur += 1
        try:
            product = DataReader(code, 'yahoo', start=date_from, end=date_to)
        except:
            logger.warn('(%d/%d) Fail downloading %s.', cur, total, code)
            continue

        product.code = code
        products.append(product)
        logger.info('(%d/%d) Downloaded %s.', cur, total, code)

    logger.info('Download complete.')
    return opt(products)
Example #5
0
def save_data():
    start = '1/1/1990'

    # Get S&P 500 data from yahoo
    sp500 = get_data_yahoo('^GSPC', start=start)['Adj Close']
    sp500.name = 'SP500'

    vix = get_data_yahoo('^VIX', start=start)['Adj Close']
    vix.name = 'VIX'

    # Get ten year and 3 month t-bill rates
    ten_yr = DataReader('DGS10', 'fred', start=start)
    three_mon = DataReader('DGS3MO', 'fred', start=start)

    ten_yr = ten_yr.ix[ten_yr.DGS10.str.count(r'^\.') != 1].astype(float)
    three_mon = three_mon.ix[three_mon.DGS3MO.str.count(r'^\.') != 1].astype(float)

    data = ten_yr.join(three_mon)
    data = data.join(sp500)
    data = data.join(vix)

    # Drop non-like observations (obs on different days)
    data = data.dropna()

    data.save('SP_YC.db')
    data.to_csv('the_data.csv')
Example #6
0
def getTimewindowStockPrice(ticker):
    timewindowStockPrice = {}#key: year; value: weekly price
    stockDF = DataReader(ticker,  "google", "2009-01-01", datetime.today().date())
#     print stockDF
    for idx, row in stockDF.iterrows():
#         print row[0], row['Close']
#         print datetime.fromtimestamp(idx)
#         print str(idx)
        dt = dateutil.parser.parse(str(idx)).date()
        year = dt.isocalendar()[0]
        week = dt.isocalendar()[1]
        price = row['Close']
        print year, week, price
        if year not in timewindowStockPrice:
            timewindowStockPrice[year] = {}
        if week not in timewindowStockPrice[year]:
            timewindowStockPrice[year][week] = []
#         print row['Close']
        timewindowStockPrice[year][week].append(price)
        
    #normalized weekly price
    for year in timewindowStockPrice.keys():
        for week in timewindowStockPrice[year].keys():
            timewindowStockPrice[year][week] = scipy.mean(timewindowStockPrice[year][week])
    
#     for year in timewindowStockPrice.keys():
#         print timewindowStockPrice[year]
    return timewindowStockPrice
def pull_stocks_data(retries=2, start_date=None, end_date=None):
    """
    Pulling stocks raw data, of the stocks in the symbol list.
    :param retries: number of retries for getting each stock's data
    :param start_date: the first day of the data (datetime format), default value is 2 years before end_date.
    :param end_date: the last day of the data (datetime format), default value is today
    """
    symbols = get_stocks_symbols(write_to_files=False)
    log.notice("Starting to pull stocks data")
    end_date = datetime.today() if end_date is None else end_date
    start_date = end_date - timedelta(365*2)  # take as a default 2 years backwards

    for retry in range(retries):
        for symbol in symbols:
            filepath = make_filepath(DATA_PATH+"symbols", symbol, 'csv')  # optimize by avoiding calling this function every time
            try:
                data = DataReader(symbol,  'yahoo', start_date, end_date, retry_count=1)
            except IOError as e:
                log.error("IOError for data query of symbol: {}\n\tError msg: {}".format(symbol, e))
                continue
            data.to_csv(filepath)
            symbols.pop(symbols.index(symbol))
        log.warning("Unable to get {} symbols on try #{}".format(len(symbols), retry+1))

    log.error("Unable to get {} symbols after {} retries:\n{}".format(len(symbols), retries, symbols))
Example #8
0
def refreshYahooDatabase(db):
    
    rs = yahoo.yahooRecordset(db)
    ts = timeseries.timeseriesRecordset(db)
    
    # step 1: get the yahoo index end date
    idx = rs.select('*')
    
    for i in idx:
        
        try:
            r = ts.getLastDate(i['id'])
            
            start = r[0]['max_date']
            
            data = pd.DataFrame()
            
            if start != None:
                start = start + dt.timedelta(days=1)
                
                data = DataReader(  str(i['key']), 'yahoo', start)
            else:
                data = DataReader(  str(i['key']), 'yahoo')
            
            # insert new data
            add = []
            
            for index, row in data.iterrows():
                add.append((i['id'], index, row[i['field']]))
                
            ts.insert(add)
            
        except Exception as e:
            print('error processing index ' + str(i['key'] \
                + '/' + str(i['field']) + '\n'))
Example #9
0
 def get_riskfree_rate(self,startdate,enddate,freq="M",maturity='1M'):
     """
     Rates from FRED
     http://research.stlouisfed.org/fred2/categories/116
     """
     rfcache = self.__class__._cacherfrate
     grabdata = False
     if rfcache == None:
         grabdata = True
     elif rfcache[0]< startdate or rfcache[1] > enddate:
         grabdata = True
          
     if grabdata:
         dt          = DataReader('DTB4WK',"fred", startdate,enddate)
         dt.columns  = ['RFRate']
         dt.fillna(method='backfill',inplace=True)
         rfcache     = (startdate,enddate,dt)
         self.__class__._cacherfrate= rfcache
     else:
         dt          = rfcache[2]
     
     dsm     = dt[startdate:enddate].resample('M')
     return dsm
     
     
     
     
     
Example #10
0
def fetch_timeseries(symbol, dir_name='data', use_cache=True):
    """
    Read time series data. Use cached version if it exists and
    use_cache is True, otherwise retrive, cache, then read.
    """
    base_dir = ''
    try:
        conf = pf.read_config()
        base_dir = conf['base_dir']
    except:
        pass
    finally:
        dir_name = os.path.join(base_dir, dir_name)

    if not os.path.exists(dir_name):
        os.makedirs(dir_name)

    timeseries_cache = os.path.join(dir_name, symbol + '.csv')

    if os.path.isfile(timeseries_cache) and use_cache:
        pass
    else:
        ts = DataReader(symbol, 'yahoo', start=datetime.datetime(1900, 1, 1))
        ts.to_csv(timeseries_cache, encoding='utf-8')

    ts = pd.read_csv(timeseries_cache, index_col='Date', parse_dates=True)
    ts = _adj_column_names(ts)
    return ts
Example #11
0
def get_data_from_yahoo(ticker):
    # No older computations OK
    start_date = date(2010, 01, 01)
    end_date = date.today()
    df = DataReader(ticker, 'yahoo', start_date, end_date)
    df.to_pickle(TMP_dir + ticker)
    print "Getting data for " + ticker + " from yahoo"
    return df
Example #12
0
def Econ_env(YYYY, m, dd):	
	start_date = datetime.datetime(YYYY, m, dd)
	GDP = DataReader('GDP', "fred", start=start_date)
	sp500 = DataReader('^GSPC', "yahoo", start=start_date)

	Array = DataFrame({'S&P':sp500["Adj Close"]})

	return Array
Example #13
0
def getPayOff(year,month):
    nifty = DataReader("^NSEI","yahoo",datetime(year,month,1),datetime(year,month,25))
    bn=DataReader("^NSEBANK","yahoo",datetime(year,month,1),datetime(year,month,25))

    nifty_daily=nifty.diff()['Close']
    bn_daily=bn.diff()['Close']

    nifty_daily_return =50*(nifty_daily)
    bn_daily_return =25*(bn_daily)
    return -nifty_daily_return.sum()+bn_daily_return.sum()
Example #14
0
def hist_vol(sym, days=10):
    try:
        quotes = DataReader(sym, 'yahoo')['Close'][-days:]
    except Exception:
        print "Problem getting historical volatility!"
        raise SystemExit(code)
        return None, None
    logreturns = np.log(quotes / quotes.shift(1))
    vol = np.sqrt(252*logreturns.var()) #252 trading days in year (annualized volatility)
    return float(vol)
def main(symbol):

    t1 = datetime.now()
    t2 = t1 - timedelta(days=PAST_DAYS)

    df  = DataReader(symbol,  FIN_SERVICE_PROVIDER , t2, t1)
    print df.head()
    print '...' * 20
    print df.tail()

    return df
Example #16
0
    def ts(self, symbol):
        parse = self.parsesymbol(symbol)
        df    = DataReader(parse['eq'], parse['proto'],start=datetime.datetime(1950,1,1))
        df    = df.rename(columns=lambda x: '_'.join(x.split()).lower()) # Need for Adj Close :(
        #print df.columns
        ts = df[parse['hlocv']]
        ts.index = map(lambda x: x.date(), ts.index)
        ts.name  = parse['eq']
        if '@' in symbol:
	    ts.name += '@%s' % (parse['hlocv'])
        return ts
Example #17
0
def getLongShortPayOff(year,month,longSymbol,longQty,shortSymbol,shortQty):
    shortStk = DataReader(shortSymbol,"yahoo",datetime(year,month,1),datetime(year,month,25))
    longStk=DataReader(longSymbol,"yahoo",datetime(year,month,1),datetime(year,month,25))

    short_daily=shortStk.diff()['Close']
    long_daily=longStk.diff()['Close']

    short_daily_return =shortQty*(short_daily)
    long_daily_return =longQty*(long_daily)
    #print abs(long_daily_return.sum())-abs(short_daily_return.sum())
    return abs(long_daily_return.sum())-abs(short_daily_return.sum())
Example #18
0
def _load_raw_yahoo_data(indexes=None, stocks=None, start=None, end=None):
    """Load closing prices from yahoo finance.

    :Optional:
        indexes : dict (Default: {'SPX': '^GSPC'})
            Financial indexes to load.
        stocks : list (Default: ['AAPL', 'GE', 'IBM', 'MSFT',
                                 'XOM', 'AA', 'JNJ', 'PEP', 'KO'])
            Stock closing prices to load.
        start : datetime (Default: datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc))
            Retrieve prices from start date on.
        end : datetime (Default: datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc))
            Retrieve prices until end date.

    :Note:
        This is based on code presented in a talk by Wes McKinney:
        http://wesmckinney.com/files/20111017/notebook_output.pdf
    """

    assert indexes is not None or stocks is not None, """
must specify stocks or indexes"""

    if start is None:
        start = pd.datetime(1990, 1, 1, 0, 0, 0, 0, pytz.utc)

    if start is not None and end is not None:
        assert start < end, "start date is later than end date."

    data = OrderedDict()

    if stocks is not None:
        for stock in stocks:
            print(stock)
            stock_pathsafe = stock.replace(os.path.sep, '--')
            cache_filename = "{stock}-{start}-{end}.csv".format(
                stock=stock_pathsafe,
                start=start,
                end=end).replace(':', '-')
            cache_filepath = get_cache_filepath(cache_filename)
            if os.path.exists(cache_filepath):
                stkd = pd.DataFrame.from_csv(cache_filepath)
            else:
                stkd = DataReader(stock, 'yahoo', start, end).sort_index()
                stkd.to_csv(cache_filepath)
            data[stock] = stkd

    if indexes is not None:
        for name, ticker in iteritems(indexes):
            print(name)
            stkd = DataReader(ticker, 'yahoo', start, end).sort_index()
            data[name] = stkd

    return data
Example #19
0
 def get_prices_yahoo(self):
     """
     It get prices data from yahoo
     """
     try:
         self.df_prices = DataReader(self.symbol, "yahoo", self.from_date,
                                           self.to_date)
         self.df_prices['pct Adj Close'] = self.df_prices.pct_change()['Adj Close'] 
         self.data = True
     except Exception, e:
         print e
         sleep(20)
Example #20
0
 def importData():
     
     #Start Time
     start = datetime(2010,1,1)
     end = datetime.date(datetime.now())
     data = DataReader(sp500constituents[0], "yahoo", start, end)
     
     
     en = enumerate(sp500constituents)
     [i for i, x in en if x=='WFMI']
     
     
     sp500constituents[200:len(sp500constituents)]
     problems = []
     dataImportProblems = []
     for series in sp500constituents[485:len(sp500constituents)]:
         print series 
         try:  
             data = DataReader(series, "yahoo", start, end)
             data = data.reset_index()
         except:
             print "Can't read {}".format(series)
             dataImportProblems.append(series)
             continue
         con = sqlite3.connect("/home/phcostello/Documents/Data/FinanceData.sqlite")
         try:
             psql.write_frame( data, series, con)
             con.commit()
         except:
             print "Problems with {}".format(series)
             problems.append(series)
         finally:
             con.close()
     
     #changing tables to have date formats so RODBC driver recognizes
     #Should check that this is occuring above.
     con = sqlite3.connect("/home/phcostello/Documents/Data/FinanceData.sqlite")
     for tb in sp500constituents:
         if psql.has_table(tb, con):
             sqltxt = "SELECT * FROM {}".format(tb)
             #print sqltxt
             data = psql.read_frame(sqltxt, con)
             sqlDropTxt = 'DROP TABLE "main"."{}"'.format(tb)
             #print sqlDropTxt
             psql.execute(sqlDropTxt, con)
             con.commit()
             psql.write_frame( data, tb, con)
             con.commit()
     
     con.close()
Example #21
0
 def get_prices_yahoo(self):
     """
     It get prices data from yahoo (last year)
     """
     try:
         self.df_prices = DataReader(self.symbol, "yahoo", self.from_date,
                                           self.to_date)
         
         self.df_prices['pct Adj Close'] = self.df_prices.pct_change()['Adj Close'] 
         
         
     except Exception, e:
         print e
         raise 
Example #22
0
    def run(self):
        """Collects data from Yahoo finance and preprocesses them."""

        # Get reference dates from SP500
        df_ref = DataReader('SPY', 'yahoo', self.start, self.end)['Adj Close']
        df_ref.sort_index(inplace=True)

        if 'SPY' in assets:
            self.df = df_ref
            self.df.rename({'AdjClose': 'SPY'})
        else:
            self.df = pd.DataFrame(index=df_ref.index)

        # Retrieve AdjClose price for other assets
        for asset in self.assets:
            df_asset = DataReader(asset, 'yahoo', self.start,
                                  self.end)['Adj Close']
            df_asset.rename(asset, inplace=True)
            self.df = self.df.join(df_asset, how='left')

        # Fill NaN
        self.df.fillna(method='ffill', inplace=True)
        self.df.fillna(method='bfill', inplace=True)

        # Compute assets simple returns
        self.df = (self.df / self.df.shift(1) - 1.0).ix[1:, :]

        # Reset indices
        self.df.reset_index(drop=True, inplace=True)
Example #23
0
def download_ohlc(sector_tickers, start, end):
    sector_ohlc = {}
    for sector, tickers in sector_tickers.iteritems():
        print 'Downloading data from Yahoo for %s sector' % sector
        data = DataReader(tickers, 'yahoo', start, end)
        for item in ['Open', 'High', 'Low']:
            data[item] = data[item] * data['Adj Close'] / data['Close']
        data.rename(items={'Open': 'open', 'High': 'high', 'Low': 'low',
                           'Adj Close': 'close', 'Volume': 'volume'},
                    inplace=True)
        data.drop(['Close'], inplace=True)
        sector_ohlc[sector] = data
    print 'Finished downloading data'
    return sector_ohlc
Example #24
0
 def getHistoricalQuotes(self, symbol, index, market=None):
     assert (isinstance(index, pd.Index))
     source = 'yahoo'
     try:
         quotes = DataReader(symbol, source, index[0], index[-1])
     except:
         log.error('** Could not get {} quotes'.format(symbol))
         return pd.DataFrame()
     if index.freq != pd.datetools.BDay() or index.freq != pd.datetools.Day():
         #NOTE reIndexDF has a column arg but here not provided
         quotes = utils.reIndexDF(quotes, delta=index.freq, reset_hour=False)
     if not quotes.index.tzinfo:
         quotes.index = quotes.index.tz_localize(self.tz)
     quotes.columns = utils.Fields.QUOTES
     return quotes
Example #25
0
 def getHistoricalQuotes(self, symbol, index, market=None):
     assert (isinstance(index, pd.Index))
     source = 'yahoo'
     try:
         quotes = DataReader(symbol, source, index[0], index[-1])
     except:
         log.error('** Could not get {} quotes'.format(symbol))
         return pd.DataFrame()
     if index.freq != pd.datetools.BDay() or index.freq != pd.datetools.Day():
         #NOTE reIndexDF has a column arg but here not provided
         quotes = utils.reIndexDF(quotes, delta=index.freq, reset_hour=False)
     if not quotes.index.tzinfo:
         quotes.index = quotes.index.tz_localize(self.tz)
     quotes.columns = utils.Fields.QUOTES
     return quotes
Example #26
0
def ADF(ticker, start, end):
    print('ADF')

    stock = DataReader(ticker, "yahoo", start, end)

    result = ts.adfuller(stock['Adj Close'], 1)
    print(result)
    print('')

    test = result[0]
    crit = result[4]
    one = crit['1%']
    five = crit['5%']
    ten = crit['10%']

    if test < one:
        print('Lesser than 1%')
        print('-----------------------------------------')
        return stock

    if test < five:
        print('Lesser than 5%')
        print('-----------------------------------------')
        return stock

    if test < ten:
        print('Lesser than 10%')
        print('-----------------------------------------')
        return stock

    print('Cannot reject Null Hypothesis')
    print('-----------------------------------------')
    return stock
def peak_begin_dates(start="01/01/1972", end=datetime.now()):
    """
    Use the fred dataset `USRECQ` to determine the beginning of the
    peaks before all recessions between dates start and end

    Parameters
    ----------
    start : string or datetime.datetime, optional(default='01/01/1972')
        A string or other acceptable pandas date identifier that marks
        the beginning of the window for which we will search for starts
        of peaks

    end : string or datetime.datetime, optional(default=datetime.now())
        The ending date of the search window

    Returns
    -------
    rec_startind : pd.DatetimeIndex
        A pandas DatetimeIndex representing the starting points of each
        "peak" from start to end
    """
    # Get quarterly recession dates from FRED
    rec_dates = DataReader("USRECQ", "fred", start=start)
    one_vals = np.where(rec_dates == 1)[0]
    rec_start = [one_vals[0]]

    # Find the beginning of the recession dates (Don't include ones that
    # begin within three years of a previous one -- hence the `+12`)
    for d in one_vals:
        if d > max(rec_start) + 12:
            rec_start.append(d)

    rec_startind = rec_dates.index[rec_start]

    return rec_startind
def create_lagged_series(symbol, start_date, end_date, lags=5):
    """
    这个函数创建一个pandas的DataFrame,存储某个来自于Yahoo财经的股票
    的以调整收盘价计算的收益,以及一系列滞后的收益,还包括交易量以及某一天
    变动的方向
    """
    ts = DataReader(symbol, "yahoo", start_date - datetime.timedelta(days=365),
                    end_date)

    tslag = pd.DataFrame(index=ts.index)
    tslag["Today"] = ts["Adj Close"]
    tslag["Volume"] = ts["Volume"]

    for i in range(0, lags):
        tslag["Lag%s" % str(i + 1)] = ts["Adj Close"].shift(i + 1)

    tsret = pd.DataFrame(index=tslag.index)
    tsret["Volume"] = tslag["Volume"]
    tsret["Today"] = tslag["Today"].pct_change() * 100.0

    for i, x in enumerate(tsret["Today"]):
        if (abs(x) < 0.0001):
            tsret["Today"][i] = 0.0001

    for i in range(0, lags):
        tsret["Lag%s" %
              str(i + 1)] = tslag["Lag%s" % str(i + 1)].pct_change() * 100.0

    tsret["Direction"] = np.sign(tsret["Today"])
    tsret = tsret[tsret.index >= start_date]
    return tsret
Example #29
0
def stockhistory(symbol, fromdate, todate):
    from pandas.io.data import DataReader
    #from datetime import datetime
    #dfromdate = fromdate.strftime('%b%d')
    #datetime(2000,1,1), datetime(2012,1,1)
    hist = DataReader(symbol, "yahoo", fromdate, todate)
    return hist
Example #30
0
def historical_volatility(sym, days):
    "Return the annualized stddev of daily log returns of `sym`."
    try:
        quotes = DataReader(sym, 'yahoo')['Close'][-days:]
    except Exception, e:
        print "Error getting data for symbol '{}'.\n".format(sym), e
        return None, None
Example #31
0
class Individual_screener():
    """
    This class will store the relevant information and functions for the 
    signal detection of entry points of one Strategy in one Security
    """
    def __init__(self, symbol, to_date=dt.datetime.today()):
        self.data = False
        self.symbol = symbol
        self.to_date = to_date
        self.from_date = self.to_date.replace(to_date.year - 1)
        self.signal = False

        self.get_prices_yahoo()

    def get_prices_yahoo(self):
        """
        It get prices data from yahoo (last year)
        """
        try:
            self.df_prices = DataReader(self.symbol, "yahoo", self.from_date,
                                        self.to_date)

            self.df_prices['pct Adj Close'] = self.df_prices.pct_change(
            )['Adj Close']
            self.data = True

        except Exception, e:
            print e
            sleep(5)
Example #32
0
class Individual_screener():
    """
    This class will store the relevant information and functions for the 
    signal detection of entry points of one Strategy in one Security
    """
    def __init__(self, symbol, to_date=dt.datetime.today()):
        self.data = False
        self.symbol = symbol
        self.to_date = to_date  
        self.from_date = self.to_date.replace(to_date.year - 1)
        self.signal = False
        
        self.get_prices_yahoo()
        
    def get_prices_yahoo(self):
        """
        It get prices data from yahoo (last year)
        """
        try:
            self.df_prices = DataReader(self.symbol, "yahoo", self.from_date,
                                              self.to_date)
            
            self.df_prices['pct Adj Close'] = self.df_prices.pct_change()['Adj Close'] 
            self.data = True
            
            
        except Exception, e:
            print e
            sleep(5)
Example #33
0
 def get_prices_df(self,ticker, date_start, date_end):
     try:
         cotation_data = DataReader(ticker,  "yahoo", date_start, date_end)
         cotation_data = cotation_data[cotation_data.Volume != 0] # on ne prend pas les jours feriés p.ex 01/01
     except Exception as e:
         raise ErrorInternetConnexion('yahoo DataReader',  e)
     return cotation_data
Example #34
0
def main():
    '''
    1. The data from Yahoo! Finance is not grabbed by calling url apis, is by using Pandas APIs.
    2. This program is to get TWSE data only, if wants OTC data, need to modify code.
    '''
    #Setup figure
    stock_fig = plt.figure()
    stock_plt = plt.subplot2grid((1, 1), (0, 0), colspan=1)
    stock_title = "{} day price".format(stock_num)
    plt.suptitle(stock_title)
    startday = dtime.date(2000, 1, 1)

    # Add ".TW" to tell yahoo!Finance to query TWSE stock data.
    # If want to query OTC, please add ".TWO"
    stock_str = "{}.TW".format(stock_num)
    #print stock_str

    #about how the DataReader() works, please refer to data.py from pandas
    try:
        stock_data = DataReader(stock_str, 'yahoo', startday)
        #Clear the current axes
        stock_plt.cla()
        #Turn the axes grids on
        stock_plt.grid(True)
        #plot date and price
        stock_plt.plot(stock_data.index, stock_data['Close'])
        #show
        plt.show()
    except:
        exit("Error happened!!\nTry: python TwanStkEx1.py 2330")
Example #35
0
 def test_read_famafrench(self):
     for name in ("F-F_Research_Data_Factors",
                  "F-F_Research_Data_Factors_weekly", "6_Portfolios_2x3",
                  "F-F_ST_Reversal_Factor"):
         ff = DataReader(name, "famafrench")
         assert ff
         assert isinstance(ff, dict)
Example #36
0
    def set_source(self, source, tickers, start, end):
        prices = pd.DataFrame()
        counter = 0.
        for ticker in tickers:
            try:
                self._logger.info('Loading ticker %s' %
                                  (counter / len(tickers)))
                prices[ticker] = DataReader(ticker, source, start,
                                            end).loc[:, 'Close']
            except Exception as e:
                self._logger.error(e)
                pass
            counter += 1

        events = []
        for row in prices.iterrows():
            timestamp = row[0]
            series = row[1]
            vals = series.values
            indx = series.index
            for k in np.random.choice(len(vals), replace=False,
                                      size=len(vals)):  # Shuffle!
                if np.isfinite(vals[k]):
                    events.append((timestamp, indx[k], vals[k]))

        self._source = events

        self._logger.info('Loaded data!')
Example #37
0
def get_data(stock, starttime, endtime):
    ibm = DataReader(stock,  'yahoo', starttime, endtime)
    #print(ibm['Adj Close'])
    
    daily_returns = deque(maxlen=c.normalize_std_len)
    daily_ret_arr = []
    size = len(ibm['Adj Close'])
    return_array = []

    i=0
    lastAc = ibm['Adj Close'][0]
    for stock in ibm['Adj Close']:
            return_array.append(stock)
            i+=1
            #for rec_date in (c.start + timedelta(days=n) for n in xrange((c.end-c.start).days)):
            #idx = next(i for i,d in enumerate(segment_start_dates) if rec_date >= d)
            try:
                    #d = rec_date.strftime("%Y-%m-%d")
                    ac = stock
                    daily_return = (ac - lastAc)/lastAc
                    #if len(daily_returns) == daily_returns.maxlen:
                    #    seq[idx].append(daily_return/np.std(daily_returns))
                    daily_returns.append(daily_return*scale)
                    daily_ret_arr.append(daily_return*scale)
                    lastAc = ac
                    #print "---"
                    #print stock 
                    #print daily_return
            except KeyError:
                    pass

    print "Records found:" + str(len(daily_ret_arr))
    return daily_ret_arr, return_array
Example #38
0
def stockhistorybackfilledtodictionary(symbol, fromdate, todate):

    from pandas.io.data import DataReader
    from datetime import datetime, timedelta

    hist = DataReader(symbol, "yahoo", fromdate, todate)

    date_format = "%Y-%m-%d"
    d = datetime.strptime(fromdate, date_format)
    delta = timedelta(days=1)
    last_adjclose = 'NaN'

    dictAdjClose = {}

    while d <= datetime.strptime(todate, date_format):
        #print(d.strftime(date_format))
        d_string = d.strftime(date_format)
        if d_string in hist.index:
            last_adjclose = hist.ix[d_string]['Adj Close']
            print(d_string, last_adjclose)
        else:
            print(d_string, 'nothing', last_adjclose)
        dictAdjClose[d_string] = [('AdjClose', last_adjclose)]
        d += delta

    return dictAdjClose
Example #39
0
 def test_read_famafrench(self):
     for name in ("F-F_Research_Data_Factors",
                  "F-F_Research_Data_Factors_weekly", "6_Portfolios_2x3",
                  "F-F_ST_Reversal_Factor", "F-F_Momentum_Factor"):
         ff = DataReader(name, "famafrench")
         self.assertTrue(ff is not None)
         self.assertIsInstance(ff, dict)
Example #40
0
def historical_pandas_yahoo(symbol, source='yahoo', start=None, end=None):
    '''
    Fetch from yahoo! finance historical quotes
    '''
    #NOTE Panel for multiple symbols ?
    #NOTE Adj Close column  name not cool (a space)
    return DataReader(symbol, source, start=start, end=end)
Example #41
0
 def DownloadStocks(self, startingDate, endDate):
     for stock in self._names:
         print("Getting data from {0}...".format(stock))
         stockData = DataReader(stock, "google", startingDate, endDate)
         self._columns = stockData.columns
         print("    Number of lines:{0}".format(stockData.shape[0]))
         self._data.append(stockData)
     return self._data
Example #42
0
def downloadStock(ticker, dataSource, start, end):
    gtemp = pd.DataFrame()
    try:
        gtemp = DataReader(ticker, dataSource, start, end)
        print ticker
    except:
        pass
    return gtemp
Example #43
0
def load_from_yahoo(indexes=None, stocks=None, start=None, end=None):
    """Load closing prices from yahoo finance.

    :Optional:
        indexes : dict (Default: {'SPX': '^GSPC'})
            Financial indexes to load.
        stocks : list (Default: ['AAPL', 'GE', 'IBM', 'MSFT',
                                 'XOM', 'AA', 'JNJ', 'PEP', 'KO'])
            Stock closing prices to load.
        start : datetime (Default: datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc))
            Retrieve prices from start date on.
        end : datetime (Default: datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc))
            Retrieve prices until end date.

    :Note:
        This is based on code presented in a talk by Wes McKinney:
        http://wesmckinney.com/files/20111017/notebook_output.pdf
    """

    if indexes is None:
        indexes = {'SPX': '^GSPC'}
    if stocks is None:
        stocks = ['AAPL', 'GE', 'IBM', 'MSFT', 'XOM', 'AA', 'JNJ', 'PEP', 'KO']
    if start is None:
        start = pd.datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc)
    if end is None:
        end = pd.datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc)

    assert start < end, "start date is later than end date."

    data = OrderedDict()

    for stock in stocks:
        print stock
        stkd = DataReader(stock, 'yahoo', start, end).sort_index()
        data[stock] = stkd

    for name, ticker in indexes.iteritems():
        print name
        stkd = DataReader(ticker, 'yahoo', start, end).sort_index()
        data[name] = stkd

    df = pd.DataFrame({key: d['Close'] for key, d in data.iteritems()})
    df.index = df.index.tz_localize(pytz.utc)

    return df
Example #44
0
def get_stock_history(stock):
    prices = None
    try:
        start_date = date.today() - timedelta(days=365)
        prices = DataReader(stock, "yahoo", start=start_date)
    except (HTTPError, BadStatusLine):
        pass
    return prices
Example #45
0
 def test_read_famafrench(self):
     raise nose.SkipTest('buggy as of 2/14/16; maybe a data revision?')
     for name in ("F-F_Research_Data_Factors",
                  "F-F_Research_Data_Factors_weekly", "6_Portfolios_2x3",
                  "F-F_ST_Reversal_Factor", "F-F_Momentum_Factor"):
         ff = DataReader(name, "famafrench")
         self.assertTrue(ff is not None)
         self.assertIsInstance(ff, dict)
Example #46
0
 def get_history(self, stock):
     print "Retrieving data for %s" % stock
     prices = None
     try:
         start_date = datetime.today() - timedelta(days=365)
         prices = DataReader(stock, "yahoo", start=start_date)
     except (HTTPError, BadStatusLine):
         pass
     return prices
Example #47
0
    def data_StockView_import(self):
        self.data = DataReader("GOOGL", "google", self.start_date,
                               self.end_date)

        source = urllib2.urlopen(
            'http://hopey.netfonds.no/posdump.php?date=20140530&paper=AAPL.O&csv_format=txt'
        )
        data = pandas.read_table(source)
        print data
Example #48
0
def stocks():
  tickers = ['AAPL', 'GOOG', 'MSFT', 'AMZN']
  end = datetime.now()
  start = datetime(end.year-1, end.month, end.day)
  for ticker in tickers:
    globals()[ticker] = DataReader(ticker, 'yahoo', start, end)

  build_stock_analyses()
  return render_template('stocks.html', AAPL=AAPL, GOOG=GOOG, MSFT=MSFT, AMZN=AMZN)
Example #49
0
def fetch_timeseries(symbol, dir_name='data', use_cache=True):
    """
    Read time series data. Use cached version if it exists and
    use_cache is True, otherwise retrive, cache, then read.
    """
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)    

    timeseries_cache = os.path.join(dir_name, symbol + '.csv')
    
    if os.path.isfile(timeseries_cache) and use_cache:
        pass
    else:
        ts = DataReader(symbol, 'yahoo', start=datetime.datetime(1900, 1, 1))
        ts.to_csv(timeseries_cache, encoding='utf-8')
    
    ts = pd.read_csv(timeseries_cache, index_col='Date', parse_dates=True)
    ts = _adj_column_names(ts)
    return ts
Example #50
0
def import_data_yahoo_to_files( list_symbols,path,startdate):       
        list_error=[]
        logger.info("importing from "+str(startdate))    
        for symbol in  list_symbols:
            try :                
                    prices_df = DataReader(symbol, "yahoo", startdate)                
                    count_newdata = len(prices_df)
                    print  symbol , " ", count_newdata     
                    if(count_newdata <=0):
                        raise Exception("NO DATA for Dates for %s"%symbol)           
                    prices_df = prices_df.rename(columns={'Date': 'date', 'Open': 'open', 'High': 'high',
                                                   'Low': 'low', 'Close': 'actualclose', 'Adj Close': 'close',
                                                   'Volume': 'volume', 'Symbol': 'symbol'})
                    prices_df['symbol'] = symbol             
                    prices_df['symbol'] = prices_df.apply(lambda x: x['symbol'].replace('\r','').upper(), axis=1 )    
                    prices_df.to_csv(path + "/" + symbol + '.csv')                    
            except Exception as ex:
                logger.error(ex)
                list_error.append(symbol)
                logger.error(traceback.format_exc())
def calculateTrend(sym, edate):
    DD = datetime.timedelta(days=365*5)
    sdate = (datetime.datetime.now() - DD).strftime("%Y-%m-%d")
    df = DataReader(sym, 'yahoo', sdate, edate)
    dfb = DataReader('^GSPC', 'yahoo', sdate, edate)

    # create a time-series of monthly data points
    rts = df.resample('M',how='last')
    rbts = dfb.resample('M',how='last')
    dfsm = pd.DataFrame({'s_adjclose' : rts['Adj Close'],
                            'b_adjclose' : rbts['Adj Close']},
                            index=rts.index)

    # compute returns
    dfsm[['s_returns', 'b_returns']] = dfsm[['s_adjclose', 'b_adjclose']]/\
        dfsm[['s_adjclose', 'b_adjclose']].shift(1) -1
    dfsm = dfsm.dropna()
    covmat = np.cov(dfsm["s_returns"], dfsm["b_returns"])

    # calculate measures now
    beta = covmat[0,1]/covmat[1,1]
    alpha= np.mean(dfsm["s_returns"])-beta*np.mean(dfsm["b_returns"])

    # r_squared     = 1. - SS_res/SS_tot
    ypred = alpha + beta * dfsm["b_returns"]
    SS_res = np.sum(np.power(ypred-dfsm["s_returns"],2))
    SS_tot = covmat[0,0]*(len(dfsm)-1) # SS_tot is sample_variance*(n-1)
    r_squared = 1. - SS_res/SS_tot
    # 5- year volatiity and 1-year momentum
    volatility = np.sqrt(covmat[0,0])
    momentum = np.prod(1+dfsm["s_returns"].tail(12).values) -1

    # annualize the numbers
    prd = 12. # used monthly returns; 12 periods to annualize
    alpha = alpha*prd
    volatility = volatility*np.sqrt(prd)

    return beta, alpha, r_squared, volatility, momentum
Example #52
0
def determine_trend(symbol, trade_date=datetime.datetime.now(), 
                    trend_length=20, trend_end_days_ago=1): 
    """
    returns a "trend score" derived from performing a linear 
    regression on the daily closing price of the stock 
    identified by symbol.  
    
    This score is on the following scale: 
    "Negative Trend" ----- "Positive Trend"
       -1.0 -----------0-----------1.0

    The score considers both the slope of the linear model and
    the "fit" (based on the r^2 output of the ols function)

    trade_date -- date used to determine the trend from
    symbol -- the stock symbol to determine trend for
    trend_length -- the number of days to derive trend for
    trend_end_days_ago -- the number of days prior to trend_date to determine
                          when to end the trend analysis
    """
    end_date = datetime.date.today() - datetime.timedelta(days=trend_end_days_ago)
    start_date = end_date - datetime.timedelta(days=trend_length)
    stock_df = DataReader(symbol, "yahoo", start=start_date, end=end_date)
    stock_df = stock_df.reset_index()
    result = ols(y=stock_df['Adj Close'], x=Series(stock_df.index))
    
    # This is the formula for the score without adjusting to fit within the
    # -1.0 - 1.0 scale.  Basically this takes the slope/starting price to get
    # the % change per day.  This is divided by a somewhat arbitrary value of 
    score = (result.beta['x']/result.beta['intercept'])/LARGE_DAILY_GAIN * result.r2

    # Now adjust the score to keep it in our trend range:
    if score > 1.0:
        return 1.0
    elif score < -1.0:
        return -1.0
    else:
        return score
Example #53
0
class Indicator1():
    def __init__(self, indicator, stock_name, start_date, end_date, buy_value, sell_value):
        if not isinstance(stock_name, str):
            raise TypeError("Sorry. 'Stock Name' must be string")
        self.stock = DataReader(stock_name, "yahoo", start_date, end_date)
        self.stock['returnValue'] = 'NaN'
        self.stock['returnValue'] = 'NaN'

        high_array = []
        for index, row in self.stock.iterrows():
            high_array.append(row['High'])
        high_nparray = numpy.asarray(high_array)

        low_array = []
        for index, row in self.stock.iterrows():
            low_array.append(row['Low'])
        low_nparray = numpy.asarray(low_array)

        close_array = []
        for index, row in self.stock.iterrows():
            close_array.append(row['Close'])
        close_nparray = numpy.asarray(close_array)
        real = getattr(talib, indicator)(high_nparray, low_nparray, close_nparray, timeperiod=14)

        self.stock['indicator'] = real
        self.stock['indicator_trigger'] = "NaN"
        count = 0
        flag = False
        for index, row in self.stock.iterrows():
            if count < len(self.stock.index):
                if float(row['indicator']) < buy_value and (not flag):
                    self.stock['indicator_trigger'][count] = "Buy"
                    flag = True
                elif float(row['indicator']) > sell_value and flag:
                    self.stock['indicator_trigger'][count] = "Sell"
                    flag = False
            count += 1
def load(symbol, startDate="19910428", forceDownload=False):
    symbol = symbol.lower()
    startDate = datetools.to_datetime(startDate) + datetools.bday - datetools.bday

    dataPath = "data_from_yhoo"

    # first check if data is in directory, or if there is even a directory
    if not os.path.exists(dataPath):
        print "data path doesn't exist. creating."
        os.makedirs(dataPath)

        # check if file is even there
        # get all stock symbols by reading csv names from data folder
    symbols = []
    os.chdir(dataPath + "/")
    for afile in glob.glob("*.df"):
        symbols.append(afile[:-3])  # slice out csv extension
    os.chdir("../")  # reset dir

    # if it is, read data. right now, if you don't download
    if symbol in symbols and not forceDownload:

        df = DataFrame.load(dataPath + "/" + symbol + ".df")

        print "read " + symbol + " data from binary file"
        df = df[df.index >= startDate]
        return df

    else:
        # otherwise, redownload data from yahoo
        print symbol + " data not downloaded. downloading from yhoo now..."
        df = DataReader(symbol, "yahoo", startDate)
        # save locally
        df.save(dataPath + "/" + symbol + ".df")

        # return data df
        return df
Example #55
0
class Simulation():
    """
    Class that will store all data and functions related to a simulation
    """
    def __init__(self, symbol, from_date=None, to_date=None):
        """
        If dates not entered it will take a default to determine ######### TODO
        """
        self.symbol = symbol
        self.from_date = from_date
        self.to_date = to_date
        self.df_prices = pd.DataFrame()
        self.open = None # Price of current open trade
        self.close = None # Price of current closed trade
        self.status = 'out' # 'out' not invested, 'in' invested
        self.signal = False  # It will get records in which the signal activates
        self.max_open = 0.0 # Max individual investment (for % profit calculation)
        ### Measures
        self.nperiods = 0
        self.ntrades = 0
        self.abs_profit = 0.0 # Accumulated abs_profit ($ value gained/loss)
#         self.pct_simple_profit = 0.0 # Profit over max investment (without reinvestment)
        self.pct_compound_profit = 1.0 # Profit over max investment (with reinvestment)
#         self.pct_annual_simple_profit = 0.0 # Annualized simple profit
#         self.pct_annual_compound_profit = 0.0 # Annualized compound profit
        self.volatility = 0.0 # Volatility of returns (annualized)
        self.sharpe = 0.0 # Sharpe ratio (Rf = 0)
        self.drawdown = 0.0 # It will store the worst abs_profit
        
        ### Years calculation
        d_from_date = date(int(from_date[0:4]), int(from_date[4:6]), int(from_date[6:8]))
        d_to_date = date(int(to_date[0:4]), int(to_date[4:6]), int(to_date[6:8]))
        self.years = (d_to_date-d_from_date).days/365.0
        self.profit_trades = []
        
    def get_prices_yahoo(self):
        """
        It get prices data from yahoo
        """
        try:
            self.df_prices = DataReader(self.symbol, "yahoo", self.from_date,
                                              self.to_date)
            
            self.df_prices['pct Adj Close'] = self.df_prices.pct_change()['Adj Close'] 
            
            
        except Exception, e:
            print e
            raise 
Example #56
0
 def get_historical(self):
   """
   If mode = "online", download Yahoo quotes from start to end date in a pandas dataframe
   If mode = "disk", the data is extracted from the hard drive
   """
   if self._mode == "online":
     self._historical = DataReader(self._symbol,'yahoo',self._start,self._end)
   else:
     # check dictionary of symbols and open the file at the correct location
     data = stock_database.extract_series(self._symbol)
     df = pd.DataFrame(data,columns = ['date','Open','High','Low','Close','Volume','Adj Close'])
     self._historical = df[pd.to_datetime(df['date']) > self._start]
     self._historical = self._historical[pd.to_datetime(df['date']) < self._end]
   self._historical = self._historical.set_index('date')
   return self._historical
Example #57
0
# Parameters---------------------------------------------------------
start=dt.datetime(2007, 01, 01)
end=dt.date.today()
LI3=pd.date_range(start, end, freq='D')


curr=['USD','GBP','EUR']
libor_1m=[]
for i in curr:
    tick=i+'1MTD156N'
    libor_1m.append(tick)


df_libor_1m=pd.DataFrame(index=LI3)
for i in libor_1m:
    df2 = DataReader(i, "fred", start,end)
    df2=df2.applymap(f)
    df2=df2.ffill()
    df_libor_1m[i]=df2
df_libor_1m=df_libor_1m.ffill()
df_libor_1m.columns=curr
print df_libor_1m.head(8)


libor_1w=[]
for i in curr:
    tick=i+'1WKD156N'
    libor_1w.append(tick)

df_libor_1w=pd.DataFrame(index=LI3)
for i in libor_1w: