Exemple #1
0
    def get_index_eod_data(
            self, index_list=None, index_type=None,
            start=None, end=None,
        ):
        '''
        TODO
        If SYMBOL_DATA_PATH exists grab data from file.
        Update data if data in the file is older than 5 days.
        Else fetch symbol data from NSE website.
        '''
        if NSE.__INDEX_EOD_DATA_KEY in get_store_keys(NSE.__NSE_DATA_PATH):
            eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__INDEX_EOD_DATA_KEY)
            eod_data = eod_data.reset_index()
        else:
            self.force_load_data(force_load='index_eod_data')
            eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__INDEX_EOD_DATA_KEY)
            eod_data = eod_data.reset_index()

        index_list = self.get_index_list(
            index_list=index_list, index_type=index_type
        )

        eod_data = eod_data[eod_data.symbol.isin(index_list)]
        start = get_date(start, out='dt', start=True)
        end = get_date(end, out='dt', start=False)
        eod_data = eod_data.ix[
            (eod_data.date >= start) & (eod_data.date <= end)
        ]
        return eod_data
Exemple #2
0
 def get_symbol_eod_data(
         self, symbol_list=None,
         index=None, index_type=None, start=None, end=None,
         min_rows=0, missing_count=0
     ):
     '''
     If SYMBOL_DATA_PATH exists grab data from file.
     Update data if data in the file is older than 5 days.
     Else fetch symbol data from NSE website.
     '''
     if NSE.__SYMBOL_EOD_DATA_KEY in get_store_keys(NSE.__NSE_DATA_PATH):
         eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__SYMBOL_EOD_DATA_KEY)
         eod_data = eod_data.reset_index()
     else:
         self.force_load_data(force_load='symbol_eod_data')
         eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__SYMBOL_EOD_DATA_KEY)
         eod_data = eod_data.reset_index()
     symbol_list = self.get_symbol_list(
         symbol_list=symbol_list, index=index, index_type=index_type,
         start=start, missing_count=missing_count, min_rows=min_rows
     )
     eod_data = eod_data[eod_data.symbol.isin(symbol_list)]
     start = get_date(start, out='dt', start=True)
     end = get_date(end, out='dt', start=False)
     eod_data = eod_data.loc[
         (eod_data.date >= start) & (eod_data.date <= end)
     ]
     return eod_data
Exemple #3
0
    def get_profits_per_day(self, trades=None, trades_type='all'):
        '''
        Get overall profits per day of symbols based on trades.
        If trades is none, Local stored trades is used.
        '''
        if trades is None and Zerodha.PROFITS_PER_DAY_KEY in get_store_keys(
                Zerodha.TRADES_DATA_PATH):
            profits_per_day = pd.read_hdf(Zerodha.TRADES_DATA_PATH,
                                          Zerodha.PROFITS_PER_DAY_KEY)
            return profits_per_day
        elif trades is None:
            trades = self.get_trades(trades_type=trades_type)

        profits_per_day = pd.DataFrame(
            0,
            index=self.get_traded_dates(start=trades.date.min()).index,
            columns=trades.symbol.unique())
        profits = self.get_profits(trades=trades, trades_type=trades_type)

        for trade in trades.itertuples():
            symbol = trade.symbol
            date = trade.date
            profits_per_day.loc[date:,
                                symbol] = (profits.loc[date:, symbol] -
                                           profits.loc[date:, symbol].shift(1))
            profits_per_day.loc[date, symbol] = profits.loc[date, symbol]

        return profits_per_day
Exemple #4
0
    def get_profits(self, trades=None, trades_type='all'):
        '''
        Get overall profits of symbols based on trades.
        If trades is none, Local stored trades is used.
        '''
        if trades is None and Zerodha.PROFITS_KEY in get_store_keys(
                Zerodha.TRADES_DATA_PATH):
            profits = pd.read_hdf(Zerodha.TRADES_DATA_PATH,
                                  Zerodha.PROFITS_KEY)
            return profits
        elif trades is None:
            trades = self.get_trades(trades_type=trades_type)

        profits = pd.DataFrame(
            0,
            index=self.get_traded_dates(start=trades.date.min()).index,
            columns=trades.symbol.unique())
        investments = self.get_investments(trades=trades)
        holdings = self.get_holdings(trades=trades)

        intra_trades, inter_trades = trades.query(
            'trade_type == "intra"'), trades.query('trade_type == "inter"')
        for trade in inter_trades.itertuples():
            symbol = trade.symbol
            date = trade.date
            profits.loc[date:, symbol] = (holdings.loc[date:, symbol] -
                                          investments.loc[date:, symbol])

        for trade in intra_trades.itertuples():
            profits.loc[trade.date, trade.symbol] = profits.loc[
                trade.date, trade.symbol] + trade.cash_flow
        return profits
Exemple #5
0
    def get_holdings(self, trades=None):
        '''
        Get holdings of symbols based on trades.
        If trades is none, Local stored trades is used.
        '''
        if trades is None and Zerodha.HOLDINGS_KEY in get_store_keys(
                Zerodha.TRADES_DATA_PATH):
            holdings = pd.read_hdf(Zerodha.TRADES_DATA_PATH,
                                   Zerodha.HOLDINGS_KEY)
            return holdings
        elif trades is None:
            trades = self.get_trades(trades_type='all')

        holdings = pd.DataFrame(
            0,
            index=self.get_traded_dates(start=trades.date.min()).index,
            columns=trades.symbol.unique())
        close = self.get_symbol_eod_values(data='close')
        for trade in trades.itertuples():
            symbol = trade.symbol
            date = trade.date
            total_qty = trade.total_qty
            if symbol not in close.columns:
                holdings[symbol] = np.nan
                continue
            if trade.total_qty == 0:
                holdings.loc[date:, symbol] = 0
            else:
                holdings.loc[date:, symbol] = total_qty * close[symbol][date:]
        return holdings
Exemple #6
0
    def get_investments(self, trades=None):
        '''
        Get investments in symbols based on trades.
        If trades is none, Local stored trades is used.
        '''
        if trades is None and Zerodha.INVESTMENTS_KEY in get_store_keys(
                Zerodha.TRADES_DATA_PATH):
            investments = pd.read_hdf(Zerodha.TRADES_DATA_PATH,
                                      Zerodha.INVESTMENTS_KEY)
            return investments
        elif trades is None:
            trades = self.get_trades(trades_type='all')

        investments = pd.DataFrame(
            0,
            index=self.get_traded_dates(start=trades.date.min()).index,
            columns=trades.symbol.unique())
        for trade in trades.itertuples():
            symbol = trade.symbol
            date = trade.date
            rate = trade.trade_rate
            qty = trade.trade_qty
            if trade.total_qty == 0:
                investments.loc[date:, symbol] = 0
            else:
                investments.loc[
                    date:,
                    symbol] = investments.loc[date:, symbol] + (qty * rate)
        return investments
Exemple #7
0
    def get_index_meta(self):
        'Get meta data for index and its components'
        if Market.__INDEX_META_KEY in get_store_keys(Market.__Market_PATH):
            index_meta = pd.read_hdf(Market.__Market_PATH,
                                     Market.__INDEX_META_KEY)
        else:
            warnings.warn(
                'Unable to read symbol_meta locally. Fetching data from NSE website'
            )
            self.force_load_data('index')
            index_meta = pd.read_hdf(Market.__Market_PATH,
                                     Market.__INDEX_META_KEY)

        index_meta = index_meta.replace('nan', np.nan)
        return index_meta
Exemple #8
0
 def get_symbol_meta(self):
     '''
     If symbol meta data exists grab data from file.
     Else fetch symbol meta data from NSE website.
     '''
     if Market.__SYMBOL_META_KEY in get_store_keys(Market.__Market_PATH):
         symbol_meta = pd.read_hdf(Market.__Market_PATH,
                                   Market.__SYMBOL_META_KEY)
     else:
         warnings.warn(
             'Unable to read symbol_meta locally. Fetching data from NSE website'
         )
         self.force_load_data('symbol')
         symbol_meta = pd.read_hdf(Market.__Market_PATH,
                                   Market.__SYMBOL_META_KEY)
     return symbol_meta
Exemple #9
0
    def get_trades(self, trades_type='all', path=None):
        '''
        Get trades.
        trades_type: (all, intra, inter)
        path: path of file for trades
        '''
        if path is not None:
            trades = self.read_trades(path=path)
        elif Zerodha.TRADES_KEY in get_store_keys(Zerodha.TRADES_DATA_PATH):
            trades = pd.read_hdf(Zerodha.TRADES_DATA_PATH, Zerodha.TRADES_KEY)
        else:
            raise AttributeError('Data not avaliable')
        if trades_type != 'all':
            trades = trades.query('trade_type == @trades_type')

        return trades
Exemple #10
0
    def get_symbol_eod_values(
            self, data='returns', symbol_list=None,
            index=None, index_type=None, start=None, end=None,
            min_rows=0, missing_count=0
        ):
        '''Get Close prices for historical as a separate dataframe'''

        symbol_list = self.get_symbol_list(
            symbol_list=symbol_list, index=index, index_type=index_type,
            start=start, missing_count=missing_count, min_rows=min_rows
        )
        eod_data_schema = [
            'symbol', 'date', 'prev_close', 'open', 'high',
            'low', 'last', 'close', 'vwap',
            'trades', 'volume', 'turnover', 'pct_deliverble',
            'simple_returns', 'log_returns', 'high_low_spread', 'open_close_spread'
        ]
        if data in eod_data_schema:
            values = data
        elif data == 'returns':
            values = 'log_returns'
        elif data == 'deliverble':
            values = 'pct_deliverble'
        else:
            warnings.warn(
                'Invalid type of data requested. Returning returns data'
            )
            values = 'log_returns'
        if 'symbol_eod_values_{0}'.format(values) in get_store_keys(NSE.__NSE_DATA_PATH):
            data = pd.read_hdf(
                NSE.__NSE_DATA_PATH, 'symbol_eod_values_{0}'.format(values)
            )
        else:
            self.force_load_data(force_load='symbol_eod_values', values=values)
            data = pd.read_hdf(
                NSE.__NSE_DATA_PATH, 'symbol_eod_values_{0}'.format(values)
            )
        column_list = data.columns
        column_list = data.columns.intersection(symbol_list)
        data = data[column_list]
        start = get_date(start, 'str', True)
        end = get_date(end, 'str', False)
        data = data[start:end]
        data = data.dropna(how='all', axis=1)
        return data
Exemple #11
0
    def get_returns(self, trades=None):
        '''
        Get overall returns per share of symbols based on trades.
        If trades is none, Local stored trades is used.
        '''
        if trades is None and Zerodha.RETURNS_KEY in get_store_keys(
                Zerodha.TRADES_DATA_PATH):
            returns = pd.read_hdf(Zerodha.TRADES_DATA_PATH,
                                  Zerodha.RETURNS_KEY)
            return returns
        elif trades is None:
            trades = self.get_trades(trades_type='inter')

        returns = pd.DataFrame(
            index=self.get_traded_dates(start=trades.date.min()).index,
            columns=trades.symbol.unique())
        investments = self.get_investments(trades=trades)
        holdings = self.get_holdings(trades=trades)
        close = self.get_symbol_eod_values(data='close')

        inter_trades = trades.query('trade_type == "inter"')
        for trade in inter_trades.itertuples():
            symbol = trade.symbol
            date = trade.date
            rate = trade.trade_rate
            total_qty = trade.total_qty
            previous_date, next_date = get_adjacent_dates(index=returns.index,
                                                          date=date)

            returns.loc[date:,
                        symbol] = np.log(holdings.loc[date:, symbol] /
                                         holdings.loc[date:, symbol].shift(1))
            if trade.type == 'buy':
                returns.loc[date, symbol] = np.log(
                    holdings.loc[date, symbol] / investments.loc[date, symbol])
            elif previous_date is not None:
                returns.loc[date, symbol] = np.log(
                    rate / close.loc[previous_date, symbol])

            if total_qty == 0:
                returns.loc[next_date:, symbol] = np.nan
        return returns.astype(np.float)
Exemple #12
0
    def get_traded_dates(self, start=None, end=None):
        'Generate Traded dates for NSE'

        if Market.__TRADED_DATES_KEY in get_store_keys(Market.__Market_PATH):
            traded_dates = pd.read_hdf(Market.__Market_PATH,
                                       Market.__TRADED_DATES_KEY)
        else:
            self.force_load_data('traded_dates')
            traded_dates = pd.read_hdf(Market.__Market_PATH,
                                       Market.__TRADED_DATES_KEY)

        start = get_date(start, 'str', True)
        end = get_date(end, 'str', False)

        traded_dates = traded_dates[start:end]
        traded_dates['specific_date_count'] = [
            i + 1 for i in range(len(traded_dates))
        ]

        return traded_dates
Exemple #13
0
    def get_quantity(self, trades=None):
        '''
        Get quantity of symbols based on trades.
        If trades is none, Local stored trades is used.
        '''
        if trades is None and Zerodha.QUANTITY_KEY in get_store_keys(
                Zerodha.TRADES_DATA_PATH):
            quantity = pd.read_hdf(Zerodha.TRADES_DATA_PATH,
                                   Zerodha.QUANTITY_KEY)
            return quantity
        elif trades is None:
            trades = self.get_trades(trades_type='all')

        quantity = pd.DataFrame(
            0,
            index=self.get_traded_dates(start=trades.date.min()).index,
            columns=trades.symbol.unique())
        for trade in trades.itertuples():
            symbol = trade.symbol
            date = trade.date
            total_qty = trade.total_qty
            quantity.loc[date:, symbol] = total_qty
        return quantity
Exemple #14
0
    def get_risk_free_rate(self,
                           returns=None,
                           freq=None,
                           start=None,
                           end=None,
                           excess=False):
        '''Get risk free rate'''
        if Market.__RISK_FREE_RATE_KEY in get_store_keys(Market.__Market_PATH):
            risk_free_rate = pd.read_hdf(Market.__Market_PATH,
                                         Market.__RISK_FREE_RATE_KEY)
        else:
            self.force_load_data('rf')
            risk_free_rate = pd.read_hdf(Market.__Market_PATH,
                                         Market.__RISK_FREE_RATE_KEY)

        if returns is None:
            traded_dates = self.get_traded_dates(start, end)
            returns = pd.DataFrame(0,
                                   index=traded_dates.index,
                                   columns=['returns'])
        elif isinstance(returns, pd.Series):
            returns = pd.DataFrame(returns)
        else:
            returns = returns.copy()

        if freq in ['daily', 'd', None]:
            risk_free_rate = risk_free_rate['rf_daily']
        elif freq in ['monthly', 'm']:
            risk_free_rate = risk_free_rate['rf_monthly']
        elif freq in ['yearly', 'a', 'annual', 'y']:
            risk_free_rate = risk_free_rate['rf_yearly']

        for symbol in returns.columns:
            returns[symbol] = (returns[symbol] -
                               risk_free_rate) if excess else risk_free_rate

        return returns
Exemple #15
0
    def get_eod_meta(self, eod_data=None, eod_type='symbol'):
        'Calculate meta data for EOD Data'

        if eod_data is None:
            if eod_type == 'symbol':
                symbol_meta = self.get_symbol_meta()
                eod_data_meta = pd.DataFrame(
                    index=symbol_meta.index.copy(),
                )
                if NSE.__SYMBOL_EOD_META_KEY in get_store_keys(NSE.__NSE_DATA_PATH):
                    eod_data_meta = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__SYMBOL_EOD_META_KEY)
                    return eod_data_meta
                elif NSE.__SYMBOL_EOD_DATA_KEY in get_store_keys(NSE.__NSE_DATA_PATH):
                    eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__SYMBOL_EOD_DATA_KEY)
                    eod_data = eod_data.reset_index()
                else:
                    eod_data_meta['from_date'] = pd.to_datetime('1994-01-01')
                    eod_data_meta['to_date'] = pd.to_datetime('1994-01-01')
                    eod_data_meta['row_count'] = 0
                    eod_data_meta['missing_count'] = np.inf
                    eod_data_meta['non_traded_dates'] = np.inf
                    eod_data_meta['missing_dates'] = np.nan
                    return eod_data_meta
            elif eod_type == 'index':
                index_meta = self.get_index_meta()
                eod_data_meta = pd.DataFrame(
                    index=index_meta.index.copy(),
                )
                if NSE.__INDEX_EOD_META_KEY in get_store_keys(NSE.__NSE_DATA_PATH):
                    eod_data_meta = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__INDEX_EOD_META_KEY)
                    return eod_data_meta
                elif NSE.__INDEX_EOD_DATA_KEY in get_store_keys(NSE.__NSE_DATA_PATH):
                    eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__INDEX_EOD_DATA_KEY)
                    eod_data = eod_data.reset_index()
                else:
                    eod_data_meta['from_date'] = pd.to_datetime('1994-01-01')
                    eod_data_meta['to_date'] = pd.to_datetime('1994-01-01')
                    eod_data_meta['row_count'] = 0
                    eod_data_meta['missing_count'] = np.inf
                    eod_data_meta['non_traded_dates'] = np.inf
                    eod_data_meta['missing_dates'] = np.nan
                    return eod_data_meta
            else:
                raise KeyError(
                    'Wrong eod_type'
                )
        else:
            if eod_type == 'symbol':
                symbol_meta = self.get_symbol_meta()
                eod_data_meta = pd.DataFrame(
                    index=symbol_meta.index.copy(),
                )
            elif eod_type == 'index':
                index_meta = self.get_index_meta()
                eod_data_meta = pd.DataFrame(
                    index=index_meta.index.copy(),
                )
        def counts(data):
            '''Calculate count data'''
            data = data.set_index('date')
            name = data.symbol.unique()[0]
            count_data = pd.Series(name=name)
            count_data['from_date'] = data.index.min()
            count_data['to_date'] = data.index.max()
            count_data['row_count'] = len(data)

            traded_dates = self.get_traded_dates(
                start=count_data['from_date'],
                end=count_data['to_date']
            )
            missing_dates = traded_dates.index.difference(data.index)
            count_data['missing_count'] = len(traded_dates) - len(data)
            count_data['non_traded_dates'] = len(data.query('volume == 0'))
            count_data['missing_dates'] = missing_dates.tolist()
            return count_data

        count_data = eod_data.groupby('symbol').apply(counts)
        eod_data_meta = eod_data_meta.join(count_data)
        eod_data_meta['from_date'] = eod_data_meta['from_date'].fillna(datetime(1994, 1, 1))
        eod_data_meta['to_date'] = eod_data_meta['to_date'].fillna(datetime(1994, 1, 1))
        eod_data_meta['row_count'] = eod_data_meta['row_count'].fillna(0).astype(int)
        eod_data_meta['missing_count'] = eod_data_meta['missing_count'].fillna(np.inf).astype(np.float)
        eod_data_meta['non_traded_dates'] = eod_data_meta['non_traded_dates'].fillna(np.inf).astype(np.float)

        return eod_data_meta
Exemple #16
0
    def force_load_data(self, force_load, values=None):
        '''
        Force loading helper method for saving EOD data from NSE Website to local HDFStores
        '''

        if force_load == 'symbol_eod_meta':
            print('Updating symbol eod metadata')
            if NSE.__SYMBOL_EOD_DATA_KEY in get_store_keys(NSE.__NSE_DATA_PATH):
                eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__SYMBOL_EOD_DATA_KEY)
                eod_data = eod_data.reset_index()
            else:
                eod_data = None

            eod_data_meta = self.get_eod_meta(eod_data, eod_type='symbol')
            eod_data_meta.to_hdf(NSE.__NSE_DATA_PATH, NSE.__SYMBOL_EOD_META_KEY)

        elif force_load == 'symbol_eod_data':
            eod_data_meta = self.get_eod_meta(eod_type='symbol')
            date_diff = (TODAY - eod_data_meta.to_date).dt.days
            eod_data_meta = eod_data_meta[
                (date_diff >= 5) | (eod_data_meta.row_count == 0)
            ]

            # return if less indices need to be refreshed
            if len(eod_data_meta) < 120:
                print(eod_data_meta.row_count)
                return

            if len(eod_data_meta) > 500:
                eod_data_meta = eod_data_meta.ix[0:200]

            print('Fetching Data from NSE website for {0} symbols'.format(len(eod_data_meta)))

            fresh_eod_data = pd.DataFrame()
            for symbol in eod_data_meta.itertuples():
                eod_data = self.fetch_eod_data(
                    symbol=symbol.Index,
                    start=symbol.to_date,
                    index=False,
                )
                if eod_data.empty:
                    continue
                else:
                    eod_data = eod_data.reset_index()
                    print(
                        'Recieved {0} records from NSE for {1} from {2} to {3}'.
                        format(len(eod_data), symbol.Index,
                               eod_data.date.min().date(),
                               eod_data.date.max().date())
                    )
                    fresh_eod_data = fresh_eod_data.append(eod_data)

            if NSE.__SYMBOL_EOD_DATA_KEY in get_store_keys(NSE.__NSE_DATA_PATH):
                old_eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__SYMBOL_EOD_DATA_KEY)
                old_eod_data = old_eod_data.reset_index()
            else:
                old_eod_data = pd.DataFrame()

            fresh_eod_data = fresh_eod_data.append(old_eod_data)
            del old_eod_data

            fresh_eod_data = fresh_eod_data.drop_duplicates(['symbol', 'date'], keep='last')
            fresh_eod_data = fresh_eod_data.sort_values(['symbol', 'date'])
            eod_data_schema = [
                'symbol', 'date', 'prev_close', 'open', 'high',
                'low', 'last', 'close', 'vwap',
                'trades', 'volume', 'turnover', 'pct_deliverble',
                'simple_returns', 'log_returns', 'high_low_spread', 'open_close_spread'
            ]
            fresh_eod_data = fresh_eod_data.reset_index()[eod_data_schema]
            fresh_eod_data = fresh_eod_data.set_index(['symbol', 'date'])
            fresh_eod_data.to_hdf(NSE.__NSE_DATA_PATH, NSE.__SYMBOL_EOD_DATA_KEY)
            del fresh_eod_data
            self.force_load_data('symbol_eod_meta')
            self.force_load_data('traded_dates')
            eod_data_meta = self.get_eod_meta(eod_type='symbol')
            date_diff = (TODAY - eod_data_meta.to_date).dt.days
            eod_data_meta = eod_data_meta[
                (date_diff >= 5) | (eod_data_meta.row_count == 0)
            ]
            if len(eod_data_meta) > 20:
                self.force_load_data('symbol_eod_data')
            eod_data_columns = [
                'open', 'high', 'low', 'close', 'vwap',
                'simple_returns', 'log_returns',
                'high_low_spread', 'open_close_spread'
            ]
            for column in eod_data_columns:
                self.force_load_data(force_load='symbol_eod_values', values=column)
            # clean_file(NSE.__NSE_DATA_PATH)

        elif force_load == 'symbol_eod_values':
            print('Generating time series data for {0} from local data'.format(values))
            eod_data = self.get_symbol_eod_data(symbol_list='all')

            data = pd.pivot_table(data=eod_data, index='date',
                                  columns='symbol', values=values)
            data.to_hdf(NSE.__NSE_DATA_PATH, 'symbol_eod_values_{0}'.format(values))

        elif force_load == 'index_eod_meta':
            print('Updating index eod metadata')
            if NSE.__INDEX_EOD_DATA_KEY in get_store_keys(NSE.__NSE_DATA_PATH):
                eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__INDEX_EOD_DATA_KEY)
                eod_data = eod_data.reset_index()
            else:
                eod_data = None

            eod_data_meta = self.get_eod_meta(eod_data, eod_type='index')
            eod_data_meta.to_hdf(NSE.__NSE_DATA_PATH, NSE.__INDEX_EOD_META_KEY)

        elif force_load == 'index_eod_data':
            eod_data_meta = self.get_eod_meta(eod_type='index')
            index_meta = self.get_index_meta()
            eod_data_meta = eod_data_meta.join(index_meta.index_code)
            date_diff = (TODAY - eod_data_meta.to_date).dt.days
            eod_data_meta = eod_data_meta[
                (date_diff >= 5) | (eod_data_meta.row_count == 0)
            ]
            eod_data_meta = eod_data_meta.dropna(subset=['index_code'])

            # return if less indices need to be refreshed
            if len(eod_data_meta) < 20:
                return

            print('Fetching Data from NSE website for {0} indices'.format(len(eod_data_meta)))
            fresh_eod_data = pd.DataFrame()
            for index in eod_data_meta.itertuples():
                eod_data = self.fetch_eod_data(
                    symbol=index.index_code,
                    start=index.to_date,
                    index=True,
                )
                if eod_data.empty:
                    continue
                else:
                    eod_data = eod_data.reset_index()
                    eod_data['symbol'] = [index.Index for i in range(len(eod_data))]
                    print(
                        'Recieved {0} records from NSE for {1} from {2} to {3}'.
                        format(len(eod_data), index.Index,
                               eod_data.date.min().date(),
                               eod_data.date.max().date())
                    )
                    fresh_eod_data = fresh_eod_data.append(eod_data)
            if fresh_eod_data.empty:
                return

            if NSE.__INDEX_EOD_DATA_KEY in get_store_keys(NSE.__NSE_DATA_PATH):
                old_eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__INDEX_EOD_DATA_KEY)
                old_eod_data = old_eod_data.reset_index()
            else:
                old_eod_data = pd.DataFrame()

            fresh_eod_data = fresh_eod_data.append(old_eod_data)
            del old_eod_data

            fresh_eod_data = fresh_eod_data.drop_duplicates(['symbol', 'date'], keep='last')
            fresh_eod_data = fresh_eod_data.sort_values(['symbol', 'date'])
            eod_data_schema = [
                'symbol', 'date', 'open', 'high',
                'low', 'close', 'volume', 'turnover',
                'simple_returns', 'log_returns', 'high_low_spread', 'open_close_spread'
            ]
            fresh_eod_data = fresh_eod_data.reset_index()[eod_data_schema]
            fresh_eod_data = fresh_eod_data.set_index(['symbol', 'date'])
            fresh_eod_data.to_hdf(NSE.__NSE_DATA_PATH, NSE.__INDEX_EOD_DATA_KEY)
            del fresh_eod_data
            self.force_load_data('index_eod_meta')
            self.force_load_data('traded_dates')
            eod_data_meta = self.get_eod_meta(eod_type='index')
            date_diff = (TODAY - eod_data_meta.to_date).dt.days
            eod_data_meta = eod_data_meta[
                (date_diff >= 5) | (eod_data_meta.row_count == 0)
            ]
            if len(eod_data_meta) > 5:
                self.force_load_data('index_eod_data')
            eod_data_columns = [
                'open', 'high', 'low', 'close',
                'simple_returns', 'log_returns',
                'high_low_spread', 'open_close_spread'
            ]
            for column in eod_data_columns:
                self.force_load_data(force_load='index_eod_values', values=column)
            clean_file(NSE.__NSE_DATA_PATH)

        elif force_load == 'index_eod_values':
            print('Generating time series data for {0} from local data'.format(values))
            eod_data = self.get_index_eod_data(index_list='all')

            data = pd.pivot_table(data=eod_data, index='date',
                                  columns='symbol', values=values)
            data.to_hdf(NSE.__NSE_DATA_PATH, 'index_eod_values_{0}'.format(values))


        elif force_load == 'all':
            self.force_load_data('traded_dates')
            self.force_load_data('symbol_eod_data')
            self.force_load_data('index_eod_data')
        else:
            super().force_load_data(force_load)
Exemple #17
0
    def force_load_data(self, force_load, values=None):
        '''
        Force loading helper method for saving symbol data from NSE Website to local HDFStores
        '''
        if force_load == 'symbol':
            print('Loading Symbol Meta data from NSE website')
            symbol_meta = self.fetch_symbol_meta()
            if not os.path.isdir(os.path.join(Market.__CURRENT_PATH, 'data')):
                os.mkdir(os.path.join(Market.__CURRENT_PATH, 'data'))
            symbol_meta.to_hdf(Market.__Market_PATH, Market.__SYMBOL_META_KEY)

        elif force_load == 'index':
            print('Loading Index components data from NSE website')
            symbol_meta = self.get_symbol_meta()
            symbol_meta['industry'] = np.nan
            session = requests.session()
            url = 'https://www.nseindia.com/products/content/equities/indices/historical_index_data.htm'
            response = session.get(url)
            if response.status_code != 200:
                print('Unable to load base url data due to {0} status code'.
                      format(response.status_code))
                return
            soup = BeautifulSoup(response.text, 'html.parser')
            index_meta = pd.DataFrame(columns=[
                'index_code', 'index_name', 'index_type', 'url',
                'number_of_symbols'
            ])
            index_type_group = soup.find('select', {'id': 'indexType'})
            index_type_group = index_type_group.find_all('optgroup')

            for index_type in index_type_group:
                index_type_label = index_type['label'].strip()
                index_type_label = index_type_label.split(' ')
                index_type_label = '_'.join(index_type_label[0:-1]).lower()
                index_type_label = index_type_label.replace(
                    'strategy', 'strategic')
                index_list = index_type.find_all('option')
                for index in index_list:
                    index_code = index['value'].strip()
                    index_code_asindex = index_code.lower().replace(
                        ' ', '_').replace('%', '')
                    index_name = index.text.strip()
                    index_meta.loc[index_code_asindex] = [
                        index_code, index_name, index_type_label, np.nan,
                        np.nan
                    ]
                if index_type_label != 'broad_market':
                    index_components_data = pd.DataFrame(
                        index=symbol_meta.index)
                    info_url = 'https://www.nseindia.com/products/content/equities/indices/{0}_indices.htm'
                    response = session.get(info_url.format(index_type_label))
                    if response.status_code != 200:
                        print(
                            'Unable to load url data for {0} index type due to {1} status code'
                            .format(index_type_label, response.status_code))
                        continue
                    soup = BeautifulSoup(response.text, 'html.parser')
                    content = soup.find('div',
                                        {'class': 'abt_equities_content'})
                    download_links = content.find_all('a',
                                                      {'class': 'download'})
                    for link in download_links:
                        text = link.text
                        text = re.sub(r'\r\n', ' ', text)
                        text = re.sub(' +', ' ', text)
                        text = text[text.find('NIFTY'):text.find('Index')]
                        if text[-3:] == 'csv':
                            text = text[text.find('NIFTY'):text.find('stocks')]
                        if text[-2:] == 'cs':
                            text = text[text.find('NIFTY'):text.find('Indices'
                                                                     )]
                        text = text.lower().strip()
                        link = link['href']
                        link = 'https://www.nseindia.com' + link
                        if link[-3:] == 'csv':
                            try:
                                index = index_meta[index_meta.index_name.str.
                                                   lower() == text].index[0]
                            except:
                                warnings.warn(
                                    '{0} index not found in index_meta table'.
                                    format(text))
                                continue
                            response = session.get(link)
                            if response.status_code != 200:
                                print(
                                    'Unable to fetch csv data for {0} index due to {1} status code'
                                    .format(index, response.status_code))
                                continue
                            index_components = pd.read_csv(StringIO(
                                response.text),
                                                           index_col='Symbol')
                            index_components.index = index_components.index.str.lower(
                            )
                            symbol_meta['industry'] = symbol_meta[
                                'industry'].fillna(
                                    index_components['Industry'])
                            index_meta.loc[index, 'url'] = link
                            index_meta.loc[index, 'number_of_symbols'] = len(
                                index_components)
                            index_components = pd.Series(
                                True, index=index_components.index, name=index)
                            index_components_data = index_components_data.join(
                                index_components)
                            print('Component data loaded successfully for {0}'.
                                  format(index))
                elif index_type_label == 'broad_market':
                    index_components_data = pd.DataFrame(
                        index=symbol_meta.index)
                    info_url = 'https://www.nseindia.com/products/content/equities/indices/broad_indices.htm'
                    response = session.get(info_url.format(index_type_label))
                    soup = BeautifulSoup(response.text, 'html.parser')
                    content = soup.find('div', {'class': 'content'})
                    download_links = content.find_all('a')
                    for link in download_links:
                        text = link.text
                        text = re.sub(r'\r\n', ' ', text)
                        text = re.sub(' +', ' ', text)
                        text = text[text.find('NIFTY'):text.find('Index')]
                        if text[-3:] == 'csv':
                            text = text[text.find('NIFTY'):text.find('stocks')]
                        if text[-2:] == 'cs':
                            text = text[text.find('NIFTY'):text.find('Indices'
                                                                     )]
                        text = text.lower().strip()
                        link = link['href']
                        link = 'https://www.nseindia.com/products/content/equities/indices/' + link
                        try:
                            index = index_meta[index_meta.index_name.str.lower(
                            ) == text].index[0]
                        except:
                            warnings.warn(
                                '{0} index not found in index_meta table'.
                                format(text))
                            continue
                        response = session.get(link)
                        soup = BeautifulSoup(response.text, 'html.parser')
                        link_list = soup.find_all('a', {'class': 'download'})
                        for link in link_list:
                            link = link['href']
                            if link[-3:] == 'csv':
                                csv_link = 'https://www.nseindia.com' + link
                                break
                        response = session.get(csv_link)
                        if response.status_code != 200:
                            print(
                                'Unable to fetch csv data for {0} index due to {1} status code'
                                .format(index, response.status_code))
                            continue
                        index_components = pd.read_csv(StringIO(response.text),
                                                       index_col='Symbol')
                        index_components.index = index_components.index.str.lower(
                        )
                        symbol_meta['industry'] = symbol_meta[
                            'industry'].fillna(index_components['Industry'])
                        index_meta.loc[index, 'url'] = link
                        index_meta.loc[index, 'number_of_symbols'] = len(
                            index_components)
                        index_components = pd.Series(
                            True, index=index_components.index, name=index)
                        index_components_data = index_components_data.join(
                            index_components)
                        print('Component data loaded successfully for {0}'.
                              format(index))
                index_components_data = index_components_data.fillna(
                    False).astype(bool)
                hdf_key = index_type_label + '_components'
                index_components_data.to_hdf(Market.__Market_PATH, hdf_key)
            symbol_meta['name_of_company'] = symbol_meta[
                'name_of_company'].astype(str)
            symbol_meta['isin_number'] = symbol_meta['isin_number'].astype(str)
            symbol_meta['industry'] = symbol_meta['industry'].fillna('unknown')
            symbol_meta['industry'] = symbol_meta['industry'].str.lower(
            ).str.replace(' ', '_')
            symbol_meta.to_hdf(Market.__Market_PATH, Market.__SYMBOL_META_KEY)
            index_meta = index_meta.astype(str)
            index_meta.to_hdf(Market.__Market_PATH, Market.__INDEX_META_KEY)

        elif force_load == 'traded_dates':
            print('Updating traded dates')
            from nse import NSE

            if (('symbol_eod_values_close' in get_store_keys(
                    NSE.NSE_DATA_PATH))
                    and ('index_eod_values_close' in get_store_keys(
                        NSE.NSE_DATA_PATH))):

                nse = NSE(symbol_list='infy', index='nifty_50')
                symbol_returns = nse.get_symbol_eod_values()
                index_returns = nse.get_index_eod_values()

                traded_dates_symbol = symbol_returns.index
                traded_dates_index = index_returns.index

                traded_dates = traded_dates_symbol.union(traded_dates_index)
            else:
                traded_dates = pd.read_hdf(Market.__CONSTANTS_PATH,
                                           Market.__TRADED_DATES_KEY)
                traded_dates = traded_dates.index

            traded_dates = pd.DataFrame(index=traded_dates)
            traded_dates['date'] = traded_dates.index
            traded_dates['date_count'] = [
                i + 1 for i in range(len(traded_dates))
            ]
            traded_dates['day'] = traded_dates.date.dt.day
            traded_dates['month'] = traded_dates.date.dt.month
            traded_dates['year'] = traded_dates.date.dt.year
            traded_dates['day_of_week'] = traded_dates.date.dt.dayofweek
            traded_dates['day_of_year'] = traded_dates.date.dt.dayofyear
            traded_dates['week_of_year'] = traded_dates.date.dt.week
            traded_dates = traded_dates.drop(['date'], axis=1)
            traded_dates.to_hdf(Market.__Market_PATH,
                                Market.__TRADED_DATES_KEY)

        elif force_load == 'rf' or force_load == 'risk_free_rate':
            intervals = ['Daily', 'Monthly', 'Yearly']
            date_formats = ['%Y%m%d', '%Y%m', '%Y']
            month_end = TODAY + MonthEnd(-1)
            month_end = datetime.strftime(month_end, format='%Y%m%d')
            link = 'http://www.iimahd.ernet.in/~iffm/Indian-Fama-French-Momentum/DATA/{0}_FourFactors_and_Market_Returns_{1}.csv'

            session = requests.session()
            traded_dates = self.get_traded_dates()
            fama_french_rf = pd.DataFrame(index=traded_dates.index)
            for interval, date_format in zip(intervals, date_formats):
                interval_link = link.format(month_end, interval)
                response = session.get(interval_link)
                interval_fama_french = pd.read_csv(StringIO(response.text))
                rename_columns(interval_fama_french)
                interval_fama_french.columns.values[0] = 'date'
                interval_fama_french = interval_fama_french[[
                    'date', 'rf_pct_'
                ]]
                interval_fama_french.columns = [
                    'date', 'rf_{0}'.format(interval.lower())
                ]

                interval_fama_french.date = pd.to_datetime(
                    interval_fama_french.date, format=date_format)
                interval_fama_french['rf_{0}'.format(
                    interval.lower())] = interval_fama_french['rf_{0}'.format(
                        interval.lower())] / 100
                interval_fama_french = interval_fama_french.set_index('date')

                fama_french_rf = fama_french_rf.join(interval_fama_french,
                                                     how='outer')
            fama_french_rf = fama_french_rf.ffill()
            fama_french_rf.to_hdf(Market.__Market_PATH,
                                  Market.__RISK_FREE_RATE_KEY)
        elif force_load == 'all':
            value = values
            self.force_load_data('symbol')
            self.force_load_data('index')
            self.force_load_data('traded_dates')
            self.force_load_data('rf')
            return value