def SearchForSeriesCode(self, series_code): """ :param series_code: str :return: pandas.DataFrame """ # Now for the ugly solution... flist = glob.glob(os.path.join(self.Directory, '*.xls')) # Search until we find it; if we don't, puke for fname in flist: # This query pattern will work on the "data" sheets; ignore the index. try: sheets = pandas.read_excel(fname, sheet_name=None, header=None, index_col=0) except: econ_platform_core.log_last_error() log_warning('Problem with Excel {0}'.format(fname)) continue for sheet_name in sheets: sheet = sheets[sheet_name] list_index = list(sheet.index) # We ignore sheets that do not match the desired format. if 'Series ID' not in list_index: continue for c in sheet.columns: if sheet[c]["Series ID"] == series_code: list_index[0] = 'series_name' sheet.index = list_index return sheet[c] # Did not find it; puke. raise econ_platform_core.TickerNotFoundError( 'Could not find series ID = {0}'.format(series_code))
def Retrieve(self, series_meta): self.GetConnection() ticker_full = str(series_meta.ticker_full) series_id = self.GetSeriesID(ticker_full) if series_id is None: raise econ_platform_core.TickerNotFoundError( '{0} not found on database'.format(ticker_full)) cmd = """ SELECT series_dates, series_values FROM {0} WHERE series_id = ? """.format(self.DataTable) res = self.Execute(cmd, series_id, commit_after=False).fetchall() def mapper(s): try: return econ_platform_core.utils.iso_string_to_date(s) except: float(s) try: dates = [mapper(x[0]) for x in res] except: raise econ_platform_core.PlatformError( 'Corrupted date axis for {0}'.format(ticker_full)) valz = [x[1] for x in res] ser = pandas.Series(valz) ser.index = dates ser.name = ticker_full return ser
def GetMeta(self, full_ticker): self.CheckDirectory() full_name = os.path.join(self.Directory, DatabaseText.GetFileName(full_ticker)) if not os.path.exists(full_name): raise econ_platform_core.TickerNotFoundError( 'Unknown ticker: {0}'.format(full_ticker)) return self.GetMetaFromFile(full_name)
def fetch(self, series_meta): """ Do the fetch. Can only support single series queries... :param series_meta: econ_platform_core.SeriesMetaData :return: list """ query_ticker = str(series_meta.ticker_query) try: table_name, vector = query_ticker.split('|') except: raise econ_platform_core.TickerError( 'CANSIM_CSV ticker format: <table>|<vector>; invalid ticker = {0}' .format(query_ticker)) parsed_name = self.GetTimeSeriesFile(table_name) if not os.path.exists(parsed_name): econ_platform_core.log( 'Table file does not exist, attempting to unzip') try: self.UnzipFile(table_name) except: raise econ_platform_core.TickerNotFoundError( 'Table {0} needs to be downloaded as a zip file'.format( table_name)) self.ParseUnzipped(table_name) items = [] with open(parsed_name, 'r') as f: for row_raw in f: row = row_raw.split('\t') if row[0] == vector: ddate = econ_platform_core.utils.iso_string_to_date(row[1]) items.append((ddate, float(row[2]))) if len(items) == 0: raise econ_platform_core.TickerNotFoundError( 'Vector {0} not found in CANSIM table {1}'.format( vector, table_name)) items.sort() values = [x[1] for x in items] dates = [x[0] for x in items] data = pandas.Series(values) data.index = dates data.name = '{0}@{1}'.format(self.ProviderCode, query_ticker) return [ data, ]
def fetch(self, series_meta): if self.Directory is None: self.Directory = econ_platform_core.utils.parse_config_path( econ_platform_core.PlatformConfiguration['P_JST']['directory']) flist = glob.glob(os.path.join(self.Directory, '*.xlsx')) # Excel can lock files, throw them out... flist = [x for x in flist if not '~' in x] if len(flist) == 0: raise econ_platform_core.PlatformError( 'No XLSX file in {0}'.format(self.Directory)) if len(flist) > 1: raise econ_platform_core.PlatformError( 'More than one XLSX file in {0}: cannot tell which to use'. format(self.Directory)) fname = flist[0] log_debug('Reading %s', fname) data_sheet = pandas.read_excel(fname, sheet_name='Data', header=0) description_sheet = pandas.read_excel( fname, sheet_name='Variable description', index_col=0, header=None) # All data is in one giant honkin' DataFrame, with one row per country per year. # To generate time series, need to select one country at a time. country_list = set(data_sheet['country']) self.TableWasFetched = True self.TableMeta = {} self.TableSeries = {} for country in country_list: df = data_sheet.loc[data_sheet['country'] == country] iso_code = df['iso'][df.index[0]] # Now, blast through the data types. dates = df['year'] cal_dates = [datetime.date(x, 1, 1) for x in dates] exclusions = ('year', 'iso', 'country', 'ifs') for c in df.columns: if c in exclusions: continue ser = pandas.Series(df[c]) ser.index = cal_dates meta = econ_platform_core.SeriesMetadata() meta.series_provider_code = econ_platform_core.tickers.TickerProviderCode( self.ProviderCode) meta.ticker_query = econ_platform_core.tickers.TickerFetch( '{0} {1}'.format(iso_code, c)) meta.ticker_full = econ_platform_core.tickers.create_ticker_full( meta.series_provider_code, meta.ticker_query) meta.series_name = '{0} {1}'.format(country, description_sheet.at[c, 1]) meta.series_description = '{0} from Jordà-Schularick-Taylor Macrohistory Database'.format( meta.series_name) full_str = str(meta.ticker_full) self.TableSeries[full_str] = ser self.TableMeta[full_str] = meta try: ser = self.TableSeries[str(series_meta.ticker_full)] meta = self.TableMeta[str(series_meta.ticker_full)] return ser, meta except KeyError: raise econ_platform_core.TickerNotFoundError( '{0} not found'.format(str(series_meta.ticker_full)))
def MapTicker(self, query_ticker): """ Monkey patch this method if you have a complex set of user functions :param query_ticker: str :return: """ try: return self.SeriesMapper[str(query_ticker)] except KeyError: raise econ_platform_core.TickerNotFoundError( 'There is no function that handles the query ticker: {0}'. format(query_ticker))
def fetch(self, series_meta): """ :param series_meta: econ_platform_core.SeriesMetadata :return: pandas.Series """ query_ticker = str(series_meta.ticker_query) # Give a name and description series_meta.series_name = 'Test Series {0}'.format(query_ticker) series_meta.series_description = 'Series generated by test code.' try: data = get_test_series(query_ticker) return data except KeyError: raise econ_platform_core.TickerNotFoundError( 'Not found on TEST: {0}'.format(query_ticker))
def fetch(self, series_meta): """ Do the fetch from CSV. Increments may be via JSON. :param series_meta: econ_platform_core.SeriesMetadata :return: pandas.Series """ query_ticker = str(series_meta.ticker_query) try: table_name, vector = query_ticker.split('|') except: raise econ_platform_core.TickerError( 'CANSIM_CSV ticker format: <table>|<vector>; invalid ticker = {0}' .format(query_ticker)) parsed_name = self.GetTimeSeriesFile(table_name) if not os.path.exists(parsed_name): econ_platform_core.log( 'Table file does not exist, attempting to unzip') try: self.UnzipFile(table_name) except: raise econ_platform_core.PlatformError( 'Table {0} needs to be downloaded as a zip file'.format( table_name)) # Do the whole table self.TableWasFetched = True self.TableMeta = {} self.TableSeries = {} self.MetaMapper = {} self.ParseUnzipped(table_name) self.BuildSeries() self.ArchiveFiles(table_name) try: ser = self.TableSeries[str(series_meta.ticker_full)] meta = self.TableMeta[str(series_meta.ticker_full)] except KeyError: raise econ_platform_core.TickerNotFoundError( '{0} was not found'.format(str(series_meta.ticker_full))) return ser, meta
def SearchForSeriesCode(self, series_code): """ :param series_code: str :return: pandas.DataFrame """ # Now for the ugly solution... flist = glob.glob(os.path.join(self.Directory, '*.xls')) # Search until we find it; if we don't, puke out = [] for fname in flist: log_debug('Reading %s', fname) # This query pattern will work on the "data" sheets; ignore the index. try: sheets = pandas.read_excel(fname, sheet_name=None, header=None, index_col=0) except: econ_platform_core.log_last_error() log_warning('Problem with Excel {0}'.format(fname)) continue for sheet_name in sheets: sheet = sheets[sheet_name] list_index = list(sheet.index) # We ignore sheets that do not match the desired format. targ_field = self.TickerLabel if targ_field not in list_index: continue for c in sheet.columns: if sheet[c][targ_field] == series_code: # Fixes ABS spreadsheet list_index[0] = 'series_name' sheet.index = list_index out.append(sheet[c]) # Did not find it; puke. if len(out) == 0: raise econ_platform_core.TickerNotFoundError( 'Could not find series ID = {0}'.format(series_code)) else: return out
def fetch(self, series_meta): """ :param series_meta: econ_platform_core.SeriesMetaData :return: list """ query_ticker = str(series_meta.ticker_query) if query_ticker == 'TEST1': x = [ 1, 2, ] data = pandas.Series(x) data.index = [datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)] data.name = str(series_meta.ticker_full) return [ data, ] raise econ_platform_core.TickerNotFoundError( 'Not found on TEST: {0}'.format(query_ticker))
def fetch(self, series_meta): """ :param series_meta: econ_platform_core.SeriesMetadata :return: pandas.Series """ if not self.Dialect == 'Australian': raise NotImplementedError('Only "Australian" XLS format supported') if self.Directory is None: self.SetDirectory() self.TableWasFetched = True self.TableMeta = {} self.TableSeries = {} full_ticker = series_meta.ticker_full self.BuildTable() try: ser = self.TableSeries[str(full_ticker)] meta = self.TableMeta[str(full_ticker)] return ser, meta except: raise econ_platform_core.TickerNotFoundError( '{0} not found'.format(str(full_ticker)))