예제 #1
0
def download_file_from_s3(
        filename: str, file_dir: str, dl_dir: str = TEMP_DIR) -> str:
    """Download a file from AWS S3

    Args:
        filename (str): Name of file to restore
        file_dir (str): Path to dir where file is stored
        dl_dir (str): Path to directory to download file

    Returns:
        str: Path to downloaded file
    """
    from botocore.exceptions import ClientError

    # create temp dir if doesn't exist
    if not os.path.exists(TEMP_DIR):
        os.mkdir(TEMP_DIR)

    s3 = boto3.resource('s3')
    download_from_path: str = os.path.join(file_dir, filename)
    download_to_path: str = os.path.join(dl_dir, filename)

    try:
        s3.Bucket(BUCKET).download_file(download_from_path, download_to_path)
    except ClientError as err:
        msg = 'The file requested was not found on AWS S3.\n' \
            if err.response['Error']['Code'] == '404' \
            else 'An error occurred while trying to download from AWS S3.\n'
        msg += '- File requested: ' + download_from_path
        raise PmaApiDbInteractionError(msg)

    return download_to_path
예제 #2
0
파일: utils.py 프로젝트: samvalour1/pma-api
def run_proc(cmd, shell: bool = False, raises: bool = True,
             prints: bool = False) -> Dict[str, str]:
    """Run a process

    Wrapper for run_proc_simple. Only adds exception handling.

    Args:
        cmd (str | list): Command to run
        shell (bool): Shell argument to pass to subprocess.Popen
        raises (bool): Raise exceptions?
        prints (bool): Print captured stderr and stdout? Prints both to stdout.

    Raises:
        PmaApiDbInteractionError: If output['stderr']

    Returns:
        dict: {
            'stdout': Popen.stdout.read(),
            'stderr': Popen.stderr.read()
        }
    """
    cmd_line: List[str] = \
        cmd.split(' ') if isinstance(cmd, str) \
        else cmd
    this_bin: str = cmd_line[0]

    # Maybe try project binary first?
    try:  # Try command passed literally
        output: {str: str} = run_proc_simple(cmd_line, shell)
    except FileNotFoundError:
        try:  # Try designated system binary
            system_binary: str = _get_bin_path_from_ref_config(
                bin_name=this_bin, system=True)
            cmd_line[0] = system_binary
            output: {str: str} = run_proc_simple(cmd_line, shell)
        except FileNotFoundError:  # Try designated project binary
            project_binary: str = _get_bin_path_from_ref_config(
                bin_name=this_bin, project=True)
            cmd_line[0] = project_binary
            output: {str: str} = run_proc_simple(cmd_line, shell)

    stdout: str = output['stdout']
    stderr: str = output['stderr']

    if stdout and prints:
        print(stdout)

    if stderr and raises:
        msg = '\n' + stderr + \
              'Offending command: ' + str(cmd)
        raise PmaApiDbInteractionError(msg)
    if stderr and prints:
        print(stderr)

    return output
예제 #3
0
def commit_from_sheet(ws: Sheet, model: db.Model, **kwargs):
    """Initialize DB table data from XLRD Worksheet.

    Initialize table data from source data associated with corresponding
    data model.

    Args:
        ws (xlrd.sheet.Sheet): XLRD worksheet object.
        model (class): SqlAlchemy model class.
    """
    survey, indicator, characteristic = '', '', ''
    if model == Data:
        survey = kwargs['survey']
        indicator = kwargs['indicator']
        characteristic = kwargs['characteristic']
    header = None

    for i, row in enumerate(ws.get_rows()):
        row = [r.value for r in row]
        if i == 0:
            header = row
        else:
            row_dict = {k: v for k, v in zip(header, row)}
            if model == Data:
                survey_code = row_dict.get('survey_code')
                survey_id = survey.get(survey_code)
                row_dict['survey_id'] = survey_id
                indicator_code = row_dict.get('indicator_code')
                indicator_id = indicator.get(indicator_code)
                row_dict['indicator_id'] = indicator_id
                char1_code = row_dict.get('char1_code')
                char1_id = characteristic.get(char1_code)
                row_dict['char1_id'] = char1_id
                char2_code = row_dict.get('char2_code')
                char2_id = characteristic.get(char2_code)
                row_dict['char2_id'] = char2_id
            try:
                record = model(**row_dict)
            except (DatabaseError, ValueError, AttributeError, KeyError,
                    IntegrityError, Exception) as err:
                msg = 'Error when processing data import.\n' \
                      '- Worksheet name: {}\n' \
                      '- Row number: {}\n' \
                      '- Cell values: {}\n\n' \
                      '- Original Error:\n' + \
                      type(err).__name__ + ': ' + str(err)
                msg = msg.format(ws.name, i + 1, row)
                logging.error(msg)
                raise PmaApiDbInteractionError(msg)

            db.session.add(record)
예제 #4
0
def restore_using_heroku_postgres(
        s3_url: str = '',
        s3_url_is_signed: bool = False,
        app_name: str = APP_NAME,
        silent: bool = False,
        ok_tells: tuple = ('Restoring... done',)):
    """Restore Heroku PostgreSQL DB using Heroku CLI

    Args:
        s3_url (str): AWS S3 unsigned object url. If signed, should pass
        's3_url_is_signed' param as True.
        s3_url_is_signed (bool): Is this a S3 signed url? If not, will attempt
        to sign before doing restore.
        app_name (str): Name of app as recognized by Heroku
        silent (bool): Don't print updates?
        ok_tells (tuple(str)): A list of strings to look for in the command
        result output. If any given 'tell' strings are in the output, we will
        consider the result to be ok. It is important to note that if using a
        different version of the Heroku CLI, it is possible that the output
        will appear to be different. If so, try to find another 'ok tell'
        inside the output, and add it to the list.

    Side effects:
        - Signs url if needed
        - Restores database
        - Drops any tables and other database objects before recreating them
    """
    signed_url: str = s3_url if s3_url_is_signed else s3_signed_url(s3_url)

    cmd_str_base: str = \
        'heroku pg:backups:restore "{s3_url}" DATABASE_URL ' \
        '--confirm {app} --app {app}'
    cmd_str: str = cmd_str_base.format(
        s3_url=signed_url,
        app=app_name)

    output: Dict[str, str] = run_proc(
        cmd=cmd_str,
        prints=not silent,
        raises=False)

    possible_err = output['stderr']
    apparent_success = not possible_err or \
        any(x in possible_err for x in ok_tells)
    if not apparent_success:
        msg = '\n' + possible_err + \
              'Offending command: ' + str(cmd_str)
        raise PmaApiDbInteractionError(msg)
예제 #5
0
    def run(self) -> Dict:
        """Create a fresh database instance.

        Runs task execution, handles errors, and reports result.

        Side effects:
            - Runs subtasks
            - Sets attributes

        Raises:
            PmaApiDbInteractionError: If tried to recover from issues but
            failed.

        Returns:
            dict: Final results
        """
        with self._app.app_context():
            try:
                for subtask_name in self.subtask_queue:
                    self.begin(subtask_name)
            except (DatabaseError, AttributeError) as err:
                db.session.rollback()
                restore_db(self.backup_path)
                print(self.restore_msg)

                msg: str = str(err)
                if isinstance(err, OperationalError):
                    msg: str = connection_error.format(str(err))
                elif isinstance(err, AttributeError):
                    if env_access_err_tell in str(err):
                        msg: str = env_access_err_msg\
                            .format(type(err).__name__ + ': ' + str(err))
                raise PmaApiDbInteractionError(msg)

        self.seconds_elapsed = int(time() - self.start_time)
        self.final_status['seconds_elapsed'] = self.seconds_elapsed
        self.final_status['success'] = True
        self.complete(seconds_elapsed=self.seconds_elapsed)

        return self.final_status
예제 #6
0
def backup_using_pgdump(path: str = new_backup_path()) -> str:
    """Backup using pg_dump

    Args:
        path (str): Path of file to save

    Side effects:
        - Grants full permissions to .pgpass file
        - Reads and writes to .pgpass file
        - Runs pg_dump process, storing result to file system

    Raises:
        PmaApiDbInteractionError: If errors during process

    Returns:
        str: path to backup file saved
    """
    pgpass_url_base = '{hostname}:{port}:{database}:{username}:{password}'
    pgpass_url: str = pgpass_url_base.format(**db_connection_info)
    pgpass_path = os.path.expanduser('~/.pgpass')
    grant_full_permissions_to_file(pgpass_path)
    update_pgpass(path=pgpass_path, creds=pgpass_url)

    cmd_base: str = \
        'pg_dump --format=custom --host={hostname} --port={port} ' \
        '--username={username} --dbname={database} --file {path}'
    cmd: str = cmd_base.format(**db_connection_info, path=path)
    output: Dict = run_proc(cmd)

    errors: str = output['stderr']
    if errors:
        with open(os.path.expanduser('~/.pgpass'), 'r') as file:
            pgpass_contents: str = file.read()
        msg = '\n' + errors + \
            'Offending command: ' + cmd + \
            'Pgpass contents: ' + pgpass_contents
        raise PmaApiDbInteractionError(msg)

    return path
예제 #7
0
def restore_db_local(path: str, silent: bool = False):
    """Restore database

    Args:
        path (str): Path to backup file
        silent (bool): Don't print updates?

    Side effects:
        Reverts database to state of backup file
    """
    err_msg = '\n\nAn error occurred while trying to restore db from file: {}'\
        '.In the process of db restoration, a last-minute backup of db' \
        ' was created: {}. If you are seeing this message, then this ' \
        'last-minute backup should have already been restored. However' \
        ', if it appears that your db has been dropped, you may restore'\
        ' from this file manually.\n\n' \
        '- Original error: \n{}'
    emergency_backup = new_backup_path()

    backup_local(emergency_backup)

    if os.path.getsize(emergency_backup) == 0:  # no db existed
        os.remove(emergency_backup)

    # noinspection PyBroadException
    # drop_db(hard=True)

    try:
        restore_using_pgrestore(path=path, dropdb=True, silent=silent)
    except Exception as err:
        if os.path.exists(emergency_backup):
            restore_using_pgrestore(path=emergency_backup, dropdb=True)
            err_msg = err_msg.format(path, emergency_backup, str(err))
            raise PmaApiDbInteractionError(err_msg)
        else:
            raise err

    if os.path.exists(emergency_backup):
        os.remove(emergency_backup)
예제 #8
0
    def wrap(*args, **kwargs):
        """Wrapped function"""
        verbose = kwargs and 'verbose' in kwargs and kwargs['verbose']
        if verbose:
            print('Executing: ' + func.__name__)
        wrapper_kwargs_removed = \
            {k: v for k, v in kwargs.items() if k != 'silent'}
        try:
            from test import SuppressStdoutStderr

            if verbose:
                return func(*args, **wrapper_kwargs_removed)
            else:
                # TODO: suppression works when running tests in Pycharm,
                #  but running `make backup` from terminal hangs
                # S3 has unfixed resource warnings
                # with SuppressStdoutStderr():
                #     return func(*args, **wrapper_kwargs_removed)
                return func(*args, **wrapper_kwargs_removed)
        except ClientError as err:
            if 'Access Denied' in str(err) or 'AccessDenied' in str(err):
                raise PmaApiDbInteractionError(msg)
            else:
                raise err
예제 #9
0
def restore_using_pgrestore(
        path: str, attempt: int = 1, dropdb: bool = False,
        silent: bool = False):
    """Restore postgres datagbase using pg_restore

    Args:
        path (str): Path of file to restore
        attempt (int): Attempt number
        dropdb (bool): Drop database in process?
        silent (bool): Don't print updates?

    Side effects:
        - Restores database
        - Drops database (if dropdb)
    """
    system_bin_paths: List[str] = \
        ['pg_restore', '/usr/local/bin/pg_restore']
    system_bin_path_registered: str = \
        _get_bin_path_from_ref_config(bin_name='pg_restore', system=True)
    if system_bin_path_registered not in system_bin_paths:
        system_bin_paths.append(system_bin_path_registered)
    project_bin_path: str = \
        _get_bin_path_from_ref_config(bin_name='pg_restore', project=True)
    pg_restore_paths: List[str] = []
    if system_bin_paths:
        pg_restore_paths += system_bin_paths
    if project_bin_path:
        pg_restore_paths.append(project_bin_path)
    pg_restore_path: str = pg_restore_paths[attempt-1] if pg_restore_paths \
        else ''
    max_attempts: int = len(pg_restore_paths)

    try:
        cmd_base: str = '{pg_restore_path} --exit-on-error --create {drop}' \
            '--dbname={database} --host={hostname} --port={port} ' \
            '--username={username} {path}'

        cmd_str: str = cmd_base.format(
            **root_connection_info,
            path=path,
            pg_restore_path=pg_restore_path,
            drop='--clean ' if dropdb else '')
        cmd: List[str] = cmd_str.split(' ')

        proc = subprocess.Popen(
            cmd, stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            universal_newlines=True)

        if not silent:
            try:
                for line in iter(proc.stdout.readline, ''):
                    print(line.encode('utf-8'))
            except AttributeError:
                print(proc.stdout)

        errors = proc.stderr.read()
        if errors:
            msg = '\n' + errors + \
                'Offending command: ' + cmd_str
            log_process_stderr(msg, err_msg=db_mgmt_err)
            raise PmaApiDbInteractionError(msg)

        proc.stderr.close()
        proc.stdout.close()
        proc.wait()
    except FileNotFoundError as err:
        if attempt < max_attempts:
            restore_using_pgrestore(
                path=path, dropdb=dropdb, silent=silent, attempt=attempt+1)
        else:
            raise err