def fetch(self, series_meta): if self.Directory is None: self.Directory = econ_platform_core.utils.parse_config_path( econ_platform_core.PlatformConfiguration['P_JST']['directory']) flist = glob.glob(os.path.join(self.Directory, '*.xlsx')) # Excel can lock files, throw them out... flist = [x for x in flist if not '~' in x] if len(flist) == 0: raise econ_platform_core.PlatformError( 'No XLSX file in {0}'.format(self.Directory)) if len(flist) > 1: raise econ_platform_core.PlatformError( 'More than one XLSX file in {0}: cannot tell which to use'. format(self.Directory)) fname = flist[0] log_debug('Reading %s', fname) data_sheet = pandas.read_excel(fname, sheet_name='Data', header=0) description_sheet = pandas.read_excel( fname, sheet_name='Variable description', index_col=0, header=None) # All data is in one giant honkin' DataFrame, with one row per country per year. # To generate time series, need to select one country at a time. country_list = set(data_sheet['country']) self.TableWasFetched = True self.TableMeta = {} self.TableSeries = {} for country in country_list: df = data_sheet.loc[data_sheet['country'] == country] iso_code = df['iso'][df.index[0]] # Now, blast through the data types. dates = df['year'] cal_dates = [datetime.date(x, 1, 1) for x in dates] exclusions = ('year', 'iso', 'country', 'ifs') for c in df.columns: if c in exclusions: continue ser = pandas.Series(df[c]) ser.index = cal_dates meta = econ_platform_core.SeriesMetadata() meta.series_provider_code = econ_platform_core.tickers.TickerProviderCode( self.ProviderCode) meta.ticker_query = econ_platform_core.tickers.TickerFetch( '{0} {1}'.format(iso_code, c)) meta.ticker_full = econ_platform_core.tickers.create_ticker_full( meta.series_provider_code, meta.ticker_query) meta.series_name = '{0} {1}'.format(country, description_sheet.at[c, 1]) meta.series_description = '{0} from Jordà-Schularick-Taylor Macrohistory Database'.format( meta.series_name) full_str = str(meta.ticker_full) self.TableSeries[full_str] = ser self.TableMeta[full_str] = meta try: ser = self.TableSeries[str(series_meta.ticker_full)] meta = self.TableMeta[str(series_meta.ticker_full)] return ser, meta except KeyError: raise econ_platform_core.TickerNotFoundError( '{0} not found'.format(str(series_meta.ticker_full)))
def Retrieve(self, series_meta): self.GetConnection() ticker_full = str(series_meta.ticker_full) series_id = self.GetSeriesID(ticker_full) if series_id is None: raise econ_platform_core.TickerNotFoundError( '{0} not found on database'.format(ticker_full)) cmd = """ SELECT series_dates, series_values FROM {0} WHERE series_id = ? """.format(self.DataTable) res = self.Execute(cmd, series_id, commit_after=False).fetchall() def mapper(s): try: return econ_platform_core.utils.iso_string_to_date(s) except: float(s) try: dates = [mapper(x[0]) for x in res] except: raise econ_platform_core.PlatformError( 'Corrupted date axis for {0}'.format(ticker_full)) valz = [x[1] for x in res] ser = pandas.Series(valz) ser.index = dates ser.name = ticker_full return ser
def Delete(self, series_meta, warn_if_non_existent=True): """ Deletes a series; if it does not exist, does nothing. :param series_meta: econ_platform_core.SeriesMetadata :return: None """ if (series_meta.ticker_full is None) or (len(series_meta.ticker_full) == 0): raise NotImplementedError( 'Must delete by TickerFull specification') cmd = """ DELETE FROM {0} WHERE ticker_full = ?""".format(self.TableMeta) self.GetConnection() self.Execute(cmd, str(series_meta.ticker_full), commit_after=False) if self.Cursor.rowcount > 1: # pragma: nocover # This should never be hit, but it could happen if the SQL command is mangled. # Unless cascade deletions are counted... raise econ_platform_core.PlatformError( 'Internal Error! Attempted to delete more than one row!') if warn_if_non_existent and 0 == self.Cursor.rowcount: econ_platform_core.log_warning( 'Series to be deleted did not exist: {0}'.format( str(series_meta.ticker_full))) self.Connection.commit()
def TestTablesExist(self): cursor = self.Connection.cursor() # If the meta table is there, so it the values table? # Don't know how to validate existence of SQL objects, so do it this way try: cursor.execute('SELECT * FROM {0} LIMIT 1'.format(self.MetaTable)) except sqlite3.OperationalError as ex: # Expected error message: 'no such table: {table_name}' econ_platform_core.log_warning('sqlite3 error %s', ex) if self.MetaTable.lower() in ex.args[0].lower(): raise econ_platform_core.PlatformError( 'Tables do not exist. Need to run the initialisation script init_sqlite.py in the scripts directory.' ) else: print(str(ex)) raise econ_platform_core.PlatformError( 'Error when testing for table existence')
def _GetSeriesUrlImplementation(self, series_meta): """ Gets the series-specific URL. (Does not validate existence or validity of string...) :param series_meta: :return: """ if series_meta.ticker_query is None: raise econ_platform_core.PlatformError('Need to see the query ticker in order to fetch the metadata') return 'https://fred.stlouisfed.org/series/{0}'.format(str(series_meta.ticker_query))
def GetMetaFromFile(self, full_name): # Just read the first line. f = open(full_name, 'r') header = f.readline() header = header.rstrip() try: dummy, full_ticker = header.split('\t') except: raise econ_platform_core.PlatformError( 'Corrupt file: {0}'.format(full_name)) meta = econ_platform_core.SeriesMetaData() meta.ticker_full = full_ticker meta.Exists = True try: meta.series_provider_code, meta.ticker_query = econ_platform_core.utils.split_ticker_information( full_ticker) except: raise econ_platform_core.PlatformError('Invalid full ticker') return meta
def fetch(self, series_meta): """ Do the fetch from CSV. Increments may be via JSON. :param series_meta: econ_platform_core.SeriesMetadata :return: pandas.Series """ query_ticker = str(series_meta.ticker_query) try: table_name, vector = query_ticker.split('|') except: raise econ_platform_core.TickerError( 'CANSIM_CSV ticker format: <table>|<vector>; invalid ticker = {0}' .format(query_ticker)) parsed_name = self.GetTimeSeriesFile(table_name) if not os.path.exists(parsed_name): econ_platform_core.log( 'Table file does not exist, attempting to unzip') try: self.UnzipFile(table_name) except: raise econ_platform_core.PlatformError( 'Table {0} needs to be downloaded as a zip file'.format( table_name)) # Do the whole table self.TableWasFetched = True self.TableMeta = {} self.TableSeries = {} self.MetaMapper = {} self.ParseUnzipped(table_name) self.BuildSeries() self.ArchiveFiles(table_name) try: ser = self.TableSeries[str(series_meta.ticker_full)] meta = self.TableMeta[str(series_meta.ticker_full)] except KeyError: raise econ_platform_core.TickerNotFoundError( '{0} was not found'.format(str(series_meta.ticker_full))) return ser, meta