def download_file_from_s3( filename: str, file_dir: str, dl_dir: str = TEMP_DIR) -> str: """Download a file from AWS S3 Args: filename (str): Name of file to restore file_dir (str): Path to dir where file is stored dl_dir (str): Path to directory to download file Returns: str: Path to downloaded file """ from botocore.exceptions import ClientError # create temp dir if doesn't exist if not os.path.exists(TEMP_DIR): os.mkdir(TEMP_DIR) s3 = boto3.resource('s3') download_from_path: str = os.path.join(file_dir, filename) download_to_path: str = os.path.join(dl_dir, filename) try: s3.Bucket(BUCKET).download_file(download_from_path, download_to_path) except ClientError as err: msg = 'The file requested was not found on AWS S3.\n' \ if err.response['Error']['Code'] == '404' \ else 'An error occurred while trying to download from AWS S3.\n' msg += '- File requested: ' + download_from_path raise PmaApiDbInteractionError(msg) return download_to_path
def run_proc(cmd, shell: bool = False, raises: bool = True, prints: bool = False) -> Dict[str, str]: """Run a process Wrapper for run_proc_simple. Only adds exception handling. Args: cmd (str | list): Command to run shell (bool): Shell argument to pass to subprocess.Popen raises (bool): Raise exceptions? prints (bool): Print captured stderr and stdout? Prints both to stdout. Raises: PmaApiDbInteractionError: If output['stderr'] Returns: dict: { 'stdout': Popen.stdout.read(), 'stderr': Popen.stderr.read() } """ cmd_line: List[str] = \ cmd.split(' ') if isinstance(cmd, str) \ else cmd this_bin: str = cmd_line[0] # Maybe try project binary first? try: # Try command passed literally output: {str: str} = run_proc_simple(cmd_line, shell) except FileNotFoundError: try: # Try designated system binary system_binary: str = _get_bin_path_from_ref_config( bin_name=this_bin, system=True) cmd_line[0] = system_binary output: {str: str} = run_proc_simple(cmd_line, shell) except FileNotFoundError: # Try designated project binary project_binary: str = _get_bin_path_from_ref_config( bin_name=this_bin, project=True) cmd_line[0] = project_binary output: {str: str} = run_proc_simple(cmd_line, shell) stdout: str = output['stdout'] stderr: str = output['stderr'] if stdout and prints: print(stdout) if stderr and raises: msg = '\n' + stderr + \ 'Offending command: ' + str(cmd) raise PmaApiDbInteractionError(msg) if stderr and prints: print(stderr) return output
def commit_from_sheet(ws: Sheet, model: db.Model, **kwargs): """Initialize DB table data from XLRD Worksheet. Initialize table data from source data associated with corresponding data model. Args: ws (xlrd.sheet.Sheet): XLRD worksheet object. model (class): SqlAlchemy model class. """ survey, indicator, characteristic = '', '', '' if model == Data: survey = kwargs['survey'] indicator = kwargs['indicator'] characteristic = kwargs['characteristic'] header = None for i, row in enumerate(ws.get_rows()): row = [r.value for r in row] if i == 0: header = row else: row_dict = {k: v for k, v in zip(header, row)} if model == Data: survey_code = row_dict.get('survey_code') survey_id = survey.get(survey_code) row_dict['survey_id'] = survey_id indicator_code = row_dict.get('indicator_code') indicator_id = indicator.get(indicator_code) row_dict['indicator_id'] = indicator_id char1_code = row_dict.get('char1_code') char1_id = characteristic.get(char1_code) row_dict['char1_id'] = char1_id char2_code = row_dict.get('char2_code') char2_id = characteristic.get(char2_code) row_dict['char2_id'] = char2_id try: record = model(**row_dict) except (DatabaseError, ValueError, AttributeError, KeyError, IntegrityError, Exception) as err: msg = 'Error when processing data import.\n' \ '- Worksheet name: {}\n' \ '- Row number: {}\n' \ '- Cell values: {}\n\n' \ '- Original Error:\n' + \ type(err).__name__ + ': ' + str(err) msg = msg.format(ws.name, i + 1, row) logging.error(msg) raise PmaApiDbInteractionError(msg) db.session.add(record)
def restore_using_heroku_postgres( s3_url: str = '', s3_url_is_signed: bool = False, app_name: str = APP_NAME, silent: bool = False, ok_tells: tuple = ('Restoring... done',)): """Restore Heroku PostgreSQL DB using Heroku CLI Args: s3_url (str): AWS S3 unsigned object url. If signed, should pass 's3_url_is_signed' param as True. s3_url_is_signed (bool): Is this a S3 signed url? If not, will attempt to sign before doing restore. app_name (str): Name of app as recognized by Heroku silent (bool): Don't print updates? ok_tells (tuple(str)): A list of strings to look for in the command result output. If any given 'tell' strings are in the output, we will consider the result to be ok. It is important to note that if using a different version of the Heroku CLI, it is possible that the output will appear to be different. If so, try to find another 'ok tell' inside the output, and add it to the list. Side effects: - Signs url if needed - Restores database - Drops any tables and other database objects before recreating them """ signed_url: str = s3_url if s3_url_is_signed else s3_signed_url(s3_url) cmd_str_base: str = \ 'heroku pg:backups:restore "{s3_url}" DATABASE_URL ' \ '--confirm {app} --app {app}' cmd_str: str = cmd_str_base.format( s3_url=signed_url, app=app_name) output: Dict[str, str] = run_proc( cmd=cmd_str, prints=not silent, raises=False) possible_err = output['stderr'] apparent_success = not possible_err or \ any(x in possible_err for x in ok_tells) if not apparent_success: msg = '\n' + possible_err + \ 'Offending command: ' + str(cmd_str) raise PmaApiDbInteractionError(msg)
def run(self) -> Dict: """Create a fresh database instance. Runs task execution, handles errors, and reports result. Side effects: - Runs subtasks - Sets attributes Raises: PmaApiDbInteractionError: If tried to recover from issues but failed. Returns: dict: Final results """ with self._app.app_context(): try: for subtask_name in self.subtask_queue: self.begin(subtask_name) except (DatabaseError, AttributeError) as err: db.session.rollback() restore_db(self.backup_path) print(self.restore_msg) msg: str = str(err) if isinstance(err, OperationalError): msg: str = connection_error.format(str(err)) elif isinstance(err, AttributeError): if env_access_err_tell in str(err): msg: str = env_access_err_msg\ .format(type(err).__name__ + ': ' + str(err)) raise PmaApiDbInteractionError(msg) self.seconds_elapsed = int(time() - self.start_time) self.final_status['seconds_elapsed'] = self.seconds_elapsed self.final_status['success'] = True self.complete(seconds_elapsed=self.seconds_elapsed) return self.final_status
def backup_using_pgdump(path: str = new_backup_path()) -> str: """Backup using pg_dump Args: path (str): Path of file to save Side effects: - Grants full permissions to .pgpass file - Reads and writes to .pgpass file - Runs pg_dump process, storing result to file system Raises: PmaApiDbInteractionError: If errors during process Returns: str: path to backup file saved """ pgpass_url_base = '{hostname}:{port}:{database}:{username}:{password}' pgpass_url: str = pgpass_url_base.format(**db_connection_info) pgpass_path = os.path.expanduser('~/.pgpass') grant_full_permissions_to_file(pgpass_path) update_pgpass(path=pgpass_path, creds=pgpass_url) cmd_base: str = \ 'pg_dump --format=custom --host={hostname} --port={port} ' \ '--username={username} --dbname={database} --file {path}' cmd: str = cmd_base.format(**db_connection_info, path=path) output: Dict = run_proc(cmd) errors: str = output['stderr'] if errors: with open(os.path.expanduser('~/.pgpass'), 'r') as file: pgpass_contents: str = file.read() msg = '\n' + errors + \ 'Offending command: ' + cmd + \ 'Pgpass contents: ' + pgpass_contents raise PmaApiDbInteractionError(msg) return path
def restore_db_local(path: str, silent: bool = False): """Restore database Args: path (str): Path to backup file silent (bool): Don't print updates? Side effects: Reverts database to state of backup file """ err_msg = '\n\nAn error occurred while trying to restore db from file: {}'\ '.In the process of db restoration, a last-minute backup of db' \ ' was created: {}. If you are seeing this message, then this ' \ 'last-minute backup should have already been restored. However' \ ', if it appears that your db has been dropped, you may restore'\ ' from this file manually.\n\n' \ '- Original error: \n{}' emergency_backup = new_backup_path() backup_local(emergency_backup) if os.path.getsize(emergency_backup) == 0: # no db existed os.remove(emergency_backup) # noinspection PyBroadException # drop_db(hard=True) try: restore_using_pgrestore(path=path, dropdb=True, silent=silent) except Exception as err: if os.path.exists(emergency_backup): restore_using_pgrestore(path=emergency_backup, dropdb=True) err_msg = err_msg.format(path, emergency_backup, str(err)) raise PmaApiDbInteractionError(err_msg) else: raise err if os.path.exists(emergency_backup): os.remove(emergency_backup)
def wrap(*args, **kwargs): """Wrapped function""" verbose = kwargs and 'verbose' in kwargs and kwargs['verbose'] if verbose: print('Executing: ' + func.__name__) wrapper_kwargs_removed = \ {k: v for k, v in kwargs.items() if k != 'silent'} try: from test import SuppressStdoutStderr if verbose: return func(*args, **wrapper_kwargs_removed) else: # TODO: suppression works when running tests in Pycharm, # but running `make backup` from terminal hangs # S3 has unfixed resource warnings # with SuppressStdoutStderr(): # return func(*args, **wrapper_kwargs_removed) return func(*args, **wrapper_kwargs_removed) except ClientError as err: if 'Access Denied' in str(err) or 'AccessDenied' in str(err): raise PmaApiDbInteractionError(msg) else: raise err
def restore_using_pgrestore( path: str, attempt: int = 1, dropdb: bool = False, silent: bool = False): """Restore postgres datagbase using pg_restore Args: path (str): Path of file to restore attempt (int): Attempt number dropdb (bool): Drop database in process? silent (bool): Don't print updates? Side effects: - Restores database - Drops database (if dropdb) """ system_bin_paths: List[str] = \ ['pg_restore', '/usr/local/bin/pg_restore'] system_bin_path_registered: str = \ _get_bin_path_from_ref_config(bin_name='pg_restore', system=True) if system_bin_path_registered not in system_bin_paths: system_bin_paths.append(system_bin_path_registered) project_bin_path: str = \ _get_bin_path_from_ref_config(bin_name='pg_restore', project=True) pg_restore_paths: List[str] = [] if system_bin_paths: pg_restore_paths += system_bin_paths if project_bin_path: pg_restore_paths.append(project_bin_path) pg_restore_path: str = pg_restore_paths[attempt-1] if pg_restore_paths \ else '' max_attempts: int = len(pg_restore_paths) try: cmd_base: str = '{pg_restore_path} --exit-on-error --create {drop}' \ '--dbname={database} --host={hostname} --port={port} ' \ '--username={username} {path}' cmd_str: str = cmd_base.format( **root_connection_info, path=path, pg_restore_path=pg_restore_path, drop='--clean ' if dropdb else '') cmd: List[str] = cmd_str.split(' ') proc = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) if not silent: try: for line in iter(proc.stdout.readline, ''): print(line.encode('utf-8')) except AttributeError: print(proc.stdout) errors = proc.stderr.read() if errors: msg = '\n' + errors + \ 'Offending command: ' + cmd_str log_process_stderr(msg, err_msg=db_mgmt_err) raise PmaApiDbInteractionError(msg) proc.stderr.close() proc.stdout.close() proc.wait() except FileNotFoundError as err: if attempt < max_attempts: restore_using_pgrestore( path=path, dropdb=dropdb, silent=silent, attempt=attempt+1) else: raise err