Example #1
0
class DataAgent(object):
    ''' Quote object - fill everything related:
            name, code, current value, bought value,
            market, actions owned,
            and date, open, close, volume, of asked period'''
    def __init__(self, sources=None, tz=pytz.utc, logger=None, lvl='debug'):
        self.tz = tz
        if logger is None:
            self._logger = LogSubsystem(DataAgent.__name__, lvl).getLog()
        else:
            self._logger = logger
        self.connected = {'remote': False, 'database': False, 'csv': False}
        if isinstance(sources, list):
            self.connectTo(sources, lvl=lvl, timezone=tz, logger=logger)

    def connectTo(self, connections, **kwargs):
        ''' Set up allowed data sources '''
        tz = kwargs.get('timezone', self.tz)
        log = kwargs.get('logger', None)
        lvl = kwargs.get('lvl', 'debug')
        if 'database' in connections:
            db_name = kwargs.get('db_name', str(os.environ['QTRADEDB']))
            self.db = QuantSQLite(db_name, timezone=tz, logger=log, lvl=lvl)
            self._logger.info('Database location provided, Connected to %s' %
                              db_name)
            self.connected['database'] = True
        if 'remote' in connections:
            self.remote = Fetcher(timezone=tz, logger=log, lvl=lvl)
            self.connected['remote'] = True
        if 'csv' in connections:
            raise NotImplementedError()

    def _makeIndex(self, args):
        ''' Take getQuotes input and produce suitable index '''
        #TODO implement elapse
        start_day = args.get('start', None)
        end_day = args.get('end', dt.datetime.now(self.tz))
        delta = args.get('delta', self._guessResolution(start_day, end_day))
        period = args.get('period', None)
        if period:
            if not start_day:
                start_day = end_day - period
        if not start_day:
            if not delta:
                start_day = dt.datetime.now(self.tz).date()
                delta = pd.datetools.Minute()
            else:
                self._logger.error('** No suitable date informations provided')
                return None
        return pd.date_range(start=start_day, end=end_day, freq=delta)

    def _inspectDB(self, ticker, request_idx, fields=Fields.QUOTES):
        ''' Check available df in db, according to the requested index '''
        self._logger.info('Inspecting database.')
        assert (isinstance(ticker, str))
        assert (isinstance(request_idx, pd.Index))
        assert (isinstance(fields, list))
        #TODO comparisons are too strics
        #TODO identificate NaN columns, that will erase everything at dropna
        if not self.connected['database']:
            self._logger.info('No database access allowed.')
            return DataFrame(), request_idx

        db_index = self.db.getDataIndex(ticker, fields, summary=False)
        if not isinstance(db_index, pd.Index):
            self._logger.info('No quotes stored in database.')
            return DataFrame(), request_idx
        elif db_index.freq > request_idx.freq and not db_index.freq == request_idx.freq:
            self._logger.info('Superior asked frequency,\
                    dropping and downloading along whole timestamp')
            return DataFrame(), request_idx
        else:
            if db_index[0] > request_idx[0] and db_index[-1] < request_idx[-1]:
                return DataFrame(), request_idx
                raise NotImplementedError()
                #TODO Two different timestamps to download !
                #to dl: start_day -> first_db_date and last_db_date -> end_day
            else:
                #NOTE Intersection and other seemed cool but don't work
                if db_index[0].hour != 0:
                    db_index = pd.date_range(
                        db_index[0] -
                        pd.datetools.relativedelta(hours=db_index[0].hour),
                        db_index[-1] -
                        pd.datetools.relativedelta(hours=db_index[0].hour),
                        freq=db_index.freq)
                intersect = db_index & request_idx
                #start_to_get = intersect[0] - request_idx.freq
                #if not start_to_get.tzinfo:
                #self.tz.localize(start_to_get)
                #idx_to_get = pd.date_range(start_to_get, intersect[-1], freq=request_idx.freq)
                idx_to_get = pd.date_range(intersect[0] - request_idx.freq,
                                           intersect[-1],
                                           freq=request_idx.freq)
                if idx_to_get is None or idx_to_get.size == 0:
                    self._logger.info('No quotes available in database.')
                    return DataFrame(), request_idx
                #NOTE try union minus intersect
                #TODO getQuotesDB and others take care of dataframe cast and fields cut
                if db_index[0] > request_idx[0]:
                    idx_to_dl = request_idx[request_idx < idx_to_get[0]]
                else:
                    idx_to_dl = request_idx[request_idx > idx_to_get[-1]]
                db_df = DataFrame(self.db.getQuotesDB(ticker, idx_to_get),
                                  columns=fields).dropna()
                if not db_df.index.tzinfo:
                    db_df.index.tz_localize(self.tz)
        return db_df, idx_to_dl

    #TODO each retriever, remote and db, takes care of dropna and columns
    def getQuotes(self, tickers, fields=Fields.QUOTES, index=None, **kwargs):
        '''
        @summary: retrieve google finance data asked while initializing
        and store it: Date, open, low, high, close, volume
        @param quotes: list of quotes to fetch
        @param fields: list of fields to store per quotes
        @param index: pandas.Index object, used for dataframes
        @param kwargs.start: date or datetime of the first values
               kwargs.end: date or datetime of the last value
               kwargs.delta: datetime.timedelta object, period of time to fill
               kwargs.save: save to database downloaded quotes
               kwargs.reverse: reverse companie name and field in panel
               kwargs.symbols
               kwargs.markets
        @return a panel/dataframe/timeserie like close = data['google']['close'][date]
        '''
        ''' ----------------------------------------------------------------------------'''
        ''' ----------------------------------  Index check and build  -----------------'''
        #FIXME reversed dataframe could be store in database ?
        df = dict()
        save = kwargs.get('save', False)
        reverse = kwargs.get('reverse', False)
        markets = kwargs.get('markets', None)
        symbols = kwargs.get('symbols', None)
        if not isinstance(index, pd.DatetimeIndex):
            index = self._makeIndex(kwargs)
            if not isinstance(index, pd.DatetimeIndex):
                return None
        if not index.tzinfo:
            index = index.tz_localize(self.tz)
        assert (index.tzinfo)

        if self.connected['database']:
            symbols, markets = self.db.getTickersCodes(tickers)
        elif not symbols or not markets:
            self._logger.error('** No database neither informations provided')
            return None

        for ticker in tickers:
            if not ticker in symbols:
                self._logger.warning(
                    'No code availablefor {}, going on'.format(ticker))
                continue
            self._logger.info('Processing {} stock'.format(ticker))
            ''' ----------------------------------------------------------------------------'''
            ''' ----------------------------------------------  Database check  ------------'''
            db_df, index = self._inspectDB(ticker, index, fields)
            assert (index.tzinfo)
            if not db_df.empty:
                assert (db_df.index.tzinfo)
                if index.size == 0:
                    save = False
                    df[ticker] = db_df
                    continue
            ''' ----------------------------------------------------------------------------'''
            ''' ----------------------------------------------  Remote retrievers  ---------'''
            self._logger.info('Downloading missing data, from {} to {}'.format(
                index[0], index[-1]))
            #FIXME No index.freq for comaprison?
            #if (index[1] - index[0]) < pd.datetools.timedelta(days=1):
            if index.freq > pd.datetools.BDay():
                self._logger.info('Fetching minutely quotes ({})'.format(
                    index.freq))
                #TODO truncate in the method
                network_df = DataFrame(
                    self.remote.getMinutelyQuotes(symbols[ticker],
                                                  markets[ticker], index),
                    columns=fields).truncate(after=index[-1])
            else:
                network_df = DataFrame(self.remote.getHistoricalQuotes(
                    symbols[ticker], index),
                                       columns=fields)
            ''' ----------------------------------------------------------------------------'''
            ''' ----------------------------------------------  Merging  -------------------'''
            if not db_df.empty:
                self._logger.debug(
                    'Checking db index ({}) vs network index ({})'.format(
                        db_df.index, network_df.index))
                if db_df.index[0] > network_df.index[0]:
                    df[ticker] = pd.concat([network_df, db_df])
                else:
                    df[ticker] = pd.concat([db_df, network_df]).sort_index()
            else:
                df[ticker] = network_df
        ''' ----------------------------------------------------------------------------'''
        ''' ----------------------------------------------  Manage final panel  --------'''
        data = Panel.from_dict(df, intersect=True)
        if save:
            #TODO: accumulation and compression of data issue, drop always true at the moment
            if self.connected['database']:
                self.db.updateStockDb(data, Fields.QUOTES, drop=True)
            else:
                self._logger.warning('! No database connection for saving.')
        if reverse:
            return Panel.from_dict(df, intersect=True, orient='minor')
        #NOTE if data used here, insert every FIELD.QUOTES columns
        #NOTE Only return Panel when one ticker and/or one field ?
        return Panel.from_dict(df, intersect=True)

    def _guessResolution(self, start, end):
        if not start or not end:
            return None
        elapse = end - start
        if abs(elapse.days) > 5 and abs(elapse.days) < 30:
            delta = pd.datetools.BDay()
        elif abs(elapse.days) <= 5 and abs(elapse.days) > 1:
            delta = pd.datetools.Hour()
        elif abs(elapse.days) >= 30:
            delta = pd.datetools.BMonthEnd(round(elapse.days / 30))
        else:
            delta = pd.datetools.Minute(10)
        self._logger.info('Automatic delta fixing: {}'.format(delta))
        return delta

    def load_from_csv(self, tickers, index, fields=Fields.QUOTES, **kwargs):
        ''' Return a quote panel '''
        #TODO Replace adj_close with actual_close
        #TODO Add reindex methods, and start, end, delta parameters
        reverse = kwargs.get('reverse', False)
        verbose = kwargs.get('verbose', False)
        if self.connected['database']:
            symbols, markets = self.db.getTickersCodes(tickers)
        elif not symbols:
            self._logger.error('** No database neither informations provided')
            return None
        timestamps = du.getNYSEdays(index[0], index[-1],
                                    dt.timedelta(hours=16))
        csv = da.DataAccess('Yahoo')
        df = csv.get_data(timestamps,
                          symbols.values(),
                          fields,
                          verbose=verbose)
        quotes_dict = dict()
        for ticker in tickers:
            j = 0
            quotes_dict[ticker] = dict()
            for field in fields:
                serie = df[j][symbols[ticker]].groupby(
                    index.freq.rollforward).aggregate(np.mean)
                #TODO add a function parameter to decide what to do about it
                clean_serie = serie.fillna(method='pad')
                quotes_dict[ticker][field] = clean_serie
                j += 1
        if reverse:
            return Panel.from_dict(quotes_dict, intersect=True, orient='minor')
        return Panel.from_dict(quotes_dict, intersect=True)

    def help(self, category):
        #TODO: stuff to know like fields and functions
        print('{} help'.format(category))
Example #2
0
class DataAgent(object):
    ''' Quote object - fill everything related:
            name, code, current value, bought value,
            market, actions owned,
            and date, open, close, volume, of asked period'''
    def __init__(self, sources=None, tz=pytz.utc, logger=None, lvl='debug'):
        self.tz = tz
        if logger is None:
            self._logger = LogSubsystem(DataAgent.__name__, lvl).getLog()
        else:
            self._logger = logger
        self.connected = {'remote': False, 'database': False, 'csv': False}
        if isinstance(sources, list):
            self.connectTo(sources, lvl=lvl, timezone=tz, logger=logger)

    def connectTo(self, connections, **kwargs):
        ''' Set up allowed data sources '''
        tz = kwargs.get('timezone', self.tz)
        log = kwargs.get('logger', None)
        lvl = kwargs.get('lvl', 'debug')
        if 'database' in connections:
            db_name = kwargs.get('db_name', str(os.environ['QTRADEDB']))
            self.db = QuantSQLite(db_name, timezone=tz, logger=log, lvl=lvl)
            self._logger.info('Database location provided, Connected to %s' % db_name)
            self.connected['database'] = True
        if 'remote' in connections:
            self.remote = Fetcher(timezone=tz, logger=log, lvl=lvl)
            self.connected['remote'] = True
        if 'csv' in connections:
            raise NotImplementedError()

    def _makeIndex(self, args):
        ''' Take getQuotes input and produce suitable index '''
        #TODO implement elapse
        start_day = args.get('start', None)
        end_day = args.get('end', dt.datetime.now(self.tz))
        delta = args.get('delta', self._guessResolution(start_day, end_day))
        period = args.get('period', None)
        if period:
            if not start_day:
                start_day = end_day - period
        if not start_day:
            if not delta:
                start_day = dt.datetime.now(self.tz).date()
                delta = pd.datetools.Minute()
            else:
                self._logger.error('** No suitable date informations provided')
                return None
        return pd.date_range(start=start_day, end=end_day, freq=delta)

    def _inspectDB(self, ticker, request_idx, fields=Fields.QUOTES):
        ''' Check available df in db, according to the requested index '''
        self._logger.info('Inspecting database.')
        assert (isinstance(ticker, str))
        assert (isinstance(request_idx, pd.Index))
        assert (isinstance(fields, list))
        #TODO comparisons are too strics
        #TODO identificate NaN columns, that will erase everything at dropna
        if not self.connected['database']:
            self._logger.info('No database access allowed.')
            return DataFrame(), request_idx

        db_index = self.db.getDataIndex(ticker, fields, summary=False)
        if not isinstance(db_index, pd.Index):
            self._logger.info('No quotes stored in database.')
            return DataFrame(), request_idx
        elif db_index.freq > request_idx.freq and not db_index.freq == request_idx.freq:
            self._logger.info('Superior asked frequency,\
                    dropping and downloading along whole timestamp')
            return DataFrame(), request_idx
        else:
            if db_index[0] > request_idx[0] and db_index[-1] < request_idx[-1]:
                return DataFrame(), request_idx
                raise NotImplementedError()
                #TODO Two different timestamps to download !
                #to dl: start_day -> first_db_date and last_db_date -> end_day
            else:
                #NOTE Intersection and other seemed cool but don't work
                if db_index[0].hour != 0:
                    db_index = pd.date_range(db_index[0] - pd.datetools.relativedelta(hours=db_index[0].hour),
                            db_index[-1] - pd.datetools.relativedelta(hours=db_index[0].hour), freq=db_index.freq)
                intersect = db_index & request_idx
                #start_to_get = intersect[0] - request_idx.freq
                #if not start_to_get.tzinfo:
                    #self.tz.localize(start_to_get)
                #idx_to_get = pd.date_range(start_to_get, intersect[-1], freq=request_idx.freq)
                idx_to_get = pd.date_range(intersect[0] - request_idx.freq,
                                           intersect[-1], freq=request_idx.freq)
                if idx_to_get is None or idx_to_get.size == 0:
                    self._logger.info('No quotes available in database.')
                    return DataFrame(), request_idx
                #NOTE try union minus intersect
                #TODO getQuotesDB and others take care of dataframe cast and fields cut
                if db_index[0] > request_idx[0]:
                    idx_to_dl = request_idx[request_idx < idx_to_get[0]]
                else:
                    idx_to_dl = request_idx[request_idx > idx_to_get[-1]]
                db_df = DataFrame(self.db.getQuotesDB(ticker, idx_to_get),
                                  columns=fields).dropna()
                if not db_df.index.tzinfo:
                    db_df.index.tz_localize(self.tz)
        return db_df, idx_to_dl

    #TODO each retriever, remote and db, takes care of dropna and columns
    def getQuotes(self, tickers, fields=Fields.QUOTES, index=None, **kwargs):
        '''
        @summary: retrieve google finance data asked while initializing
        and store it: Date, open, low, high, close, volume
        @param quotes: list of quotes to fetch
        @param fields: list of fields to store per quotes
        @param index: pandas.Index object, used for dataframes
        @param kwargs.start: date or datetime of the first values
               kwargs.end: date or datetime of the last value
               kwargs.delta: datetime.timedelta object, period of time to fill
               kwargs.save: save to database downloaded quotes
               kwargs.reverse: reverse companie name and field in panel
               kwargs.symbols
               kwargs.markets
        @return a panel/dataframe/timeserie like close = data['google']['close'][date]
        '''
        ''' ----------------------------------------------------------------------------'''
        ''' ----------------------------------  Index check and build  -----------------'''
        #FIXME reversed dataframe could be store in database ?
        df      = dict()
        save    = kwargs.get('save', False)
        reverse = kwargs.get('reverse', False)
        markets = kwargs.get('markets', None)
        symbols = kwargs.get('symbols', None)
        if not isinstance(index, pd.DatetimeIndex):
            index = self._makeIndex(kwargs)
            if not isinstance(index, pd.DatetimeIndex):
                return None
        if not index.tzinfo:
            index = index.tz_localize(self.tz)
        assert (index.tzinfo)

        if self.connected['database']:
            symbols, markets = self.db.getTickersCodes(tickers)
        elif not symbols or not markets:
            self._logger.error('** No database neither informations provided')
            return None

        for ticker in tickers:
            if not ticker in symbols:
                self._logger.warning('No code availablefor {}, going on'.format(ticker))
                continue
            self._logger.info('Processing {} stock'.format(ticker))

            ''' ----------------------------------------------------------------------------'''
            ''' ----------------------------------------------  Database check  ------------'''
            db_df, index = self._inspectDB(ticker, index, fields)
            assert (index.tzinfo)
            if not db_df.empty:
                assert (db_df.index.tzinfo)
                if index.size == 0:
                    save       = False
                    df[ticker] = db_df
                    continue

            ''' ----------------------------------------------------------------------------'''
            ''' ----------------------------------------------  Remote retrievers  ---------'''
            self._logger.info('Downloading missing data, from {} to {}'
                              .format(index[0], index[-1]))
            #FIXME No index.freq for comaprison?
            #if (index[1] - index[0]) < pd.datetools.timedelta(days=1):
            if index.freq > pd.datetools.BDay():
                self._logger.info('Fetching minutely quotes ({})'.format(index.freq))
                #TODO truncate in the method
                network_df = DataFrame(self.remote.getMinutelyQuotes(
                                       symbols[ticker], markets[ticker], index),
                                       columns=fields).truncate(after=index[-1])
            else:
                network_df = DataFrame(self.remote.getHistoricalQuotes(
                                       symbols[ticker], index), columns=fields)

            ''' ----------------------------------------------------------------------------'''
            ''' ----------------------------------------------  Merging  -------------------'''
            if not db_df.empty:
                self._logger.debug('Checking db index ({}) vs network index ({})'
                                   .format(db_df.index, network_df.index))
                if db_df.index[0] > network_df.index[0]:
                    df[ticker] = pd.concat([network_df, db_df])
                else:
                    df[ticker] = pd.concat([db_df, network_df]).sort_index()
            else:
                df[ticker] = network_df

        ''' ----------------------------------------------------------------------------'''
        ''' ----------------------------------------------  Manage final panel  --------'''
        data = Panel.from_dict(df, intersect=True)
        if save:
            #TODO: accumulation and compression of data issue, drop always true at the moment
            if self.connected['database']:
                self.db.updateStockDb(data, Fields.QUOTES, drop=True)
            else:
                self._logger.warning('! No database connection for saving.')
        if reverse:
            return Panel.from_dict(df, intersect=True, orient='minor')
        #NOTE if data used here, insert every FIELD.QUOTES columns
        #NOTE Only return Panel when one ticker and/or one field ?
        return Panel.from_dict(df, intersect=True)

    def _guessResolution(self, start, end):
        if not start or not end:
            return None
        elapse = end - start
        if abs(elapse.days) > 5 and abs(elapse.days) < 30:
            delta = pd.datetools.BDay()
        elif abs(elapse.days) <= 5 and abs(elapse.days) > 1:
            delta = pd.datetools.Hour()
        elif abs(elapse.days) >= 30:
            delta = pd.datetools.BMonthEnd(round(elapse.days / 30))
        else:
            delta = pd.datetools.Minute(10)
        self._logger.info('Automatic delta fixing: {}'.format(delta))
        return delta

    def load_from_csv(self, tickers, index, fields=Fields.QUOTES, **kwargs):
        ''' Return a quote panel '''
        #TODO Replace adj_close with actual_close
        #TODO Add reindex methods, and start, end, delta parameters
        reverse = kwargs.get('reverse', False)
        verbose = kwargs.get('verbose', False)
        if self.connected['database']:
            symbols, markets = self.db.getTickersCodes(tickers)
        elif not symbols:
            self._logger.error('** No database neither informations provided')
            return None
        timestamps = du.getNYSEdays(index[0], index[-1], dt.timedelta(hours=16))
        csv = da.DataAccess('Yahoo')
        df = csv.get_data(timestamps, symbols.values(), fields, verbose=verbose)
        quotes_dict = dict()
        for ticker in tickers:
            j = 0
            quotes_dict[ticker] = dict()
            for field in fields:
                serie = df[j][symbols[ticker]].groupby(index.freq.rollforward).aggregate(np.mean)
                #TODO add a function parameter to decide what to do about it
                clean_serie = serie.fillna(method='pad')
                quotes_dict[ticker][field] = clean_serie
                j += 1
        if reverse:
            return Panel.from_dict(quotes_dict, intersect=True, orient='minor')
        return Panel.from_dict(quotes_dict, intersect=True)

    def help(self, category):
        #TODO: stuff to know like fields and functions
        print('{} help'.format(category))
Example #3
0
class QuantSQLite(SQLiteWrapper):
    #NOTE @singleton
    """
    Specific database facilities for quanTrade software
    """
    def __init__(self, db_file='stocks.db', timezone=pytz.utc,
                 logger=None, lvl='debug'):
        self.tz = timezone
        if logger is None:
            self._log = LogSubsystem(self.__class__.__name__, lvl).getLog()
        else:
            self._log = logger
        dbRootDir = os.environ['QTRADEDATA']
        db_file = '/'.join((dbRootDir, db_file))
        SQLiteWrapper.__init__(self, db_file, self._log)

    #TODO: code that !
    @deprecated
    def findDB(self, dbName):
        ''' search database specified in QuanTrade directories '''

    def getTickersCodes(self, tickers):
        ''' simple function often used '''
        if self._db_name.find('stocks.db') == -1:
            self._log.error('** Bad DB, You are not connected to stocks.db')
            return None, None
        symbols = dict()
        markets = dict()
        table = 'properties'
        fields = ('symbol', 'market')
        for t in tickers:
            selector = {'ticker': t}
            res = self.getData(table, selector, fields)
            if res is None:
                self._log.warning('No references found in database.')
                continue
            symbols[t] = res[fields[0]]
            markets[t] = res[fields[1]]
        return symbols, markets

    def updateStockDb(self, quotes, fields, drop=False):
        '''
        @summary store quotes and information in SQLiteDB
        @param quotes: pandas.Panel not reversed,
                       like quotes[companies][fields]
        '''
        #TODO: maybe a general update function ?
        self._log.info('Updating database...')
        #TODO: Handling data accumulation and compression, right now just drop
        for name, frame in quotes.iteritems():
            self._log.info('saving {} quotes'.format(name))
            for item in fields:
                if item not in frame.columns:
                    frame[item] = np.empty(len(frame.index))
                    frame[item].fill(np.NaN)
            try:
                if drop:
                    self.execute('drop table if exists ' + name)
                    self.execute('create table ' + name +
                                 '(date int, {} real, {} real, {} real,\
                                 {} real, {} int, {} real)'
                                 .format(*Fields.QUOTES))
                #NOTE: could it be possible in one line with executemany ?
                frame = frame.fillna(method='pad')
                #frame = frame.fillna(-1)
                #frame = frame.dropna()
                for i in range(len(frame.index)):
                    raw = (dateToEpoch(frame.index[i]),
                           frame['open'][i],
                           frame['close'][i],
                           frame['high'][i],
                           frame['low'][i],
                           frame['volume'][i],
                           frame['adj_close'][i])
                    self.execute('insert into ' + name +
                                 ' values(?, ?, ?, ?, ?, ?, ?)', raw)
            except:
                self._log.error('While insering new values in database')
                return 1
        return 0

    def getDataIndex(self, ticker, fields=None, summary=True):
        '''
        @summary retrieve the ticker data index from db,
                 for storage optimization
        @return a pandas.Index
        '''
        assert isinstance(ticker, str)
        if not self.isTableExists(ticker):
            self._log.warning('Table does not exist')
            return None, None, None
        self._log.info('Getting index quotes properties.')
        try:
            dates = self.execute('select date from {}'.format(ticker))
        except sql.Error, e:
            self._log.error('** While getting index: {}'.format(e.args[0]))
            return None
        if len(dates) < 2 or not isinstance(dates[0][0], int):
            return None
        if isinstance(fields, list):
            selector = {'date': dates[0][0]}
            res = self.getData(ticker, selector, fields)
            if not res:
                self._log.error('** No data index found in database about {}'.format(ticker))
                return None
            #FIXME res is sometime a float, sometime a list
            for value in res:
                if not value:
                    #TODO may be keep available fields and pop the others
                    return None
        if summary:
            oldest_date = epochToDate(dates[0][0], self.tz)
            latest_date = epochToDate(dates[-1][0], self.tz)
            freq = epochToDate(dates[1][0], self.tz) - oldest_date
            self._log.debug('Data from {} to {} with an interval \
                                of {} days, {}mins'
                .format(oldest_date, latest_date,
                        freq.days, freq.seconds / 60))
            return [oldest_date, latest_date, freq]
        else:
            dates = [epochToDate(dates[i][0]) for i in range(len(dates))]
            return dateToIndex(dates, self.tz)
Example #4
0
class QuantSQLite(SQLiteWrapper):
    #NOTE @singleton
    """
    Specific database facilities for quanTrade software
    """
    def __init__(self,
                 db_file='stocks.db',
                 timezone=pytz.utc,
                 logger=None,
                 lvl='debug'):
        self.tz = timezone
        if logger is None:
            self._log = LogSubsystem(self.__class__.__name__, lvl).getLog()
        else:
            self._log = logger
        dbRootDir = os.environ['QTRADEDATA']
        db_file = '/'.join((dbRootDir, db_file))
        SQLiteWrapper.__init__(self, db_file, self._log)

    #TODO: code that !
    @deprecated
    def findDB(self, dbName):
        ''' search database specified in QuanTrade directories '''

    def getTickersCodes(self, tickers):
        ''' simple function often used '''
        if self._db_name.find('stocks.db') == -1:
            self._log.error('** Bad DB, You are not connected to stocks.db')
            return None, None
        symbols = dict()
        markets = dict()
        table = 'properties'
        fields = ('symbol', 'market')
        for t in tickers:
            selector = {'ticker': t}
            res = self.getData(table, selector, fields)
            if res is None:
                self._log.warning('No references found in database.')
                continue
            symbols[t] = res[fields[0]]
            markets[t] = res[fields[1]]
        return symbols, markets

    def updateStockDb(self, quotes, fields, drop=False):
        '''
        @summary store quotes and information in SQLiteDB
        @param quotes: pandas.Panel not reversed,
                       like quotes[companies][fields]
        '''
        #TODO: maybe a general update function ?
        self._log.info('Updating database...')
        #TODO: Handling data accumulation and compression, right now just drop
        for name, frame in quotes.iteritems():
            self._log.info('saving {} quotes'.format(name))
            for item in fields:
                if item not in frame.columns:
                    frame[item] = np.empty(len(frame.index))
                    frame[item].fill(np.NaN)
            try:
                if drop:
                    self.execute('drop table if exists ' + name)
                    self.execute('create table ' + name +
                                 '(date int, {} real, {} real, {} real,\
                                 {} real, {} int, {} real)'.format(
                                     *Fields.QUOTES))
                #NOTE: could it be possible in one line with executemany ?
                frame = frame.fillna(method='pad')
                #frame = frame.fillna(-1)
                #frame = frame.dropna()
                for i in range(len(frame.index)):
                    raw = (dateToEpoch(frame.index[i]), frame['open'][i],
                           frame['close'][i], frame['high'][i],
                           frame['low'][i], frame['volume'][i],
                           frame['adj_close'][i])
                    self.execute(
                        'insert into ' + name + ' values(?, ?, ?, ?, ?, ?, ?)',
                        raw)
            except:
                self._log.error('While insering new values in database')
                return 1
        return 0

    def getDataIndex(self, ticker, fields=None, summary=True):
        '''
        @summary retrieve the ticker data index from db,
                 for storage optimization
        @return a pandas.Index
        '''
        assert isinstance(ticker, str)
        if not self.isTableExists(ticker):
            self._log.warning('Table does not exist')
            return None, None, None
        self._log.info('Getting index quotes properties.')
        try:
            dates = self.execute('select date from {}'.format(ticker))
        except sql.Error, e:
            self._log.error('** While getting index: {}'.format(e.args[0]))
            return None
        if len(dates) < 2 or not isinstance(dates[0][0], int):
            return None
        if isinstance(fields, list):
            selector = {'date': dates[0][0]}
            res = self.getData(ticker, selector, fields)
            if not res:
                self._log.error(
                    '** No data index found in database about {}'.format(
                        ticker))
                return None
            #FIXME res is sometime a float, sometime a list
            for value in res:
                if not value:
                    #TODO may be keep available fields and pop the others
                    return None
        if summary:
            oldest_date = epochToDate(dates[0][0], self.tz)
            latest_date = epochToDate(dates[-1][0], self.tz)
            freq = epochToDate(dates[1][0], self.tz) - oldest_date
            self._log.debug('Data from {} to {} with an interval \
                                of {} days, {}mins'.format(
                oldest_date, latest_date, freq.days, freq.seconds / 60))
            return [oldest_date, latest_date, freq]
        else:
            dates = [epochToDate(dates[i][0]) for i in range(len(dates))]
            return dateToIndex(dates, self.tz)