def SearchForSeriesCode(self, series_code): """ :param series_code: str :return: pandas.DataFrame """ # Now for the ugly solution... flist = glob.glob(os.path.join(self.Directory, '*.xls')) # Search until we find it; if we don't, puke for fname in flist: # This query pattern will work on the "data" sheets; ignore the index. try: sheets = pandas.read_excel(fname, sheet_name=None, header=None, index_col=0) except: econ_platform_core.log_last_error() log_warning('Problem with Excel {0}'.format(fname)) continue for sheet_name in sheets: sheet = sheets[sheet_name] list_index = list(sheet.index) # We ignore sheets that do not match the desired format. if 'Series ID' not in list_index: continue for c in sheet.columns: if sheet[c]["Series ID"] == series_code: list_index[0] = 'series_name' sheet.index = list_index return sheet[c] # Did not find it; puke. raise econ_platform_core.entity_and_errors.TickerNotFoundError('Could not find series ID = {0}'.format(series_code))
def Delete(self, series_meta, warn_if_non_existent=True): """ Deletes a series; if it does not exist, does nothing. :param series_meta: econ_platform_core.SeriesMetadata :return: None """ if (series_meta.ticker_full is None) or (len(series_meta.ticker_full) == 0): raise NotImplementedError( 'Must delete by TickerFull specification') cmd = """ DELETE FROM {0} WHERE ticker_full = ?""".format(self.TableMeta) self.Connect() self.Execute(cmd, str(series_meta.ticker_full), commit_after=False) if self.Cursor.rowcount > 1: # pragma: nocover # This should never be hit, but it could happen if the SQL command is mangled. # Unless cascade deletions are counted... raise econ_platform_core.entity_and_errors.PlatformError( 'Internal Error! Attempted to delete more than one row!') if warn_if_non_existent and 0 == self.Cursor.rowcount: econ_platform_core.log_warning( 'Series to be deleted did not exist: {0}'.format( str(series_meta.ticker_full))) self.Connection.commit()
def UnzipFile(self, table_name): fname = table_name + self.ZipTail full_name = os.path.join(self.DataDirectory, fname) log('Unzipping %s', full_name) with zipfile.ZipFile(full_name, 'r') as myzip: info = myzip.infolist() expected_names = [table_name + '.csv', table_name + '_MetaData.csv'] for i in info: if i.filename in expected_names: log('Extracting file %s', i.filename) else: log_warning('Unexpected file name: %s', i.filename) myzip.extract(i, self.DataDirectory)
def TestTablesExist(self): cursor = self.Connection.cursor() # If the meta table is there, so it the values table? # Don't know how to validate existence of SQL objects, so do it this way try: cursor.execute('SELECT * FROM {0} LIMIT 1'.format(self.MetaTable)) except sqlite3.OperationalError as ex: # Expected error message: 'no such table: {table_name}' econ_platform_core.log_warning('sqlite3 error %s', ex) if self.MetaTable.lower() in ex.args[0].lower(): raise econ_platform_core.PlatformError( 'Tables do not exist. Need to run the initialisation script init_sqlite.py in the scripts directory.' ) else: print(str(ex)) raise econ_platform_core.PlatformError( 'Error when testing for table existence')
def TestTablesExist(self): self.Connect() cursor = self.Connection.cursor() # Have not looked up the right way to do this. TODO: Fix. try: cursor.execute('SELECT * FROM {0} LIMIT 1'.format(self.TableMeta)) except sqlite3.OperationalError as ex: # Expected error message: 'no such table: {table_name}' econ_platform_core.log_warning('sqlite3 error %s', ex) if self.TableMeta.lower() in ex.args[0].lower(): raise econ_platform_core.entity_and_errors.PlatformError( 'Tables do not exist. Need to run the initialisation script init_sqlite.py in the scripts directory.' ) else: print(str(ex)) raise econ_platform_core.entity_and_errors.PlatformError( 'Error when testing for table existence')
def BuildTable(self): """ Get all series in all xls in directory (!). """ # Now for the ugly solution... flist = glob.glob(os.path.join(self.Directory, '*.xls')) # Search until we find it; if we don't, puke out = [] for fname in flist: log_debug('Reading %s', fname) # This query pattern will work on the "data" sheets; ignore the index. try: sheets = pandas.read_excel(fname, sheet_name=None, header=None, index_col=0) except: econ_platform_core.log_last_error() log_warning('Problem with Excel {0}'.format(fname)) continue for sheet_name in sheets: sheet = sheets[sheet_name] sheet = self.PatchSheet(sheet) list_index = list(sheet.index) # We ignore sheets that do not match the desired format. for targ_field in self.TickerLabels: if targ_field not in list_index: continue list_index = self.FixIndex(list_index) sheet.index = list_index for c in sheet.columns: try: [ser, meta] = self.ConvertDFtoSeries(sheet[c]) except SkipColumn: continue full_ticker = str(meta.ticker_full) if full_ticker in self.TableSeries: ser = ser.combine_first( self.TableSeries[full_ticker]) self.TableSeries[full_ticker] = ser else: self.TableSeries[full_ticker] = ser self.TableMeta[full_ticker] = meta
def SearchForSeriesCode(self, series_code): """ :param series_code: str :return: pandas.DataFrame """ # Now for the ugly solution... flist = glob.glob(os.path.join(self.Directory, '*.xls')) # Search until we find it; if we don't, puke out = [] for fname in flist: log_debug('Reading %s', fname) # This query pattern will work on the "data" sheets; ignore the index. try: sheets = pandas.read_excel(fname, sheet_name=None, header=None, index_col=0) except: econ_platform_core.log_last_error() log_warning('Problem with Excel {0}'.format(fname)) continue for sheet_name in sheets: sheet = sheets[sheet_name] list_index = list(sheet.index) # We ignore sheets that do not match the desired format. targ_field = self.TickerLabel if targ_field not in list_index: continue for c in sheet.columns: if sheet[c][targ_field] == series_code: # Fixes ABS spreadsheet list_index[0] = 'series_name' sheet.index = list_index out.append(sheet[c]) # Did not find it; puke. if len(out) == 0: raise econ_platform_core.TickerNotFoundError( 'Could not find series ID = {0}'.format(series_code)) else: return out