Example #1
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=__doc__)

    parser.add_argument('-k', '--aws-access-key-id',
                        help='public AWS access key. Can also be defined in an '
                        'environment variable. If both are defined, '
                        'the one defined in the programs arguments takes '
                        'precedence.')

    parser.add_argument('--s3-prefix',
                        help='S3 prefix to run all commands against.  '
                        'Can also be defined via environment variable '
                        'WALE_S3_PREFIX')

    subparsers = parser.add_subparsers(title='subcommands',
                                       dest='subcommand')

    # Common options for backup-fetch and backup-push
    backup_fetchpush_parent = argparse.ArgumentParser(add_help=False)
    backup_fetchpush_parent.add_argument('PG_CLUSTER_DIRECTORY',
                                         help="Postgres cluster path, "
                                         "such as '/var/lib/database'")
    backup_fetchpush_parent.add_argument('--pool-size', '-p',
                                         type=int, default=4,
                                         help='Download pooling size')

    wal_fetchpush_parent = argparse.ArgumentParser(add_help=False)
    wal_fetchpush_parent.add_argument('WAL_SEGMENT',
                                      help='Path to a WAL segment to upload')

    backup_fetch_parser = subparsers.add_parser(
        'backup-fetch', help='fetch a hot backup from S3',
        parents=[backup_fetchpush_parent])
    subparsers.add_parser('backup-list', help='list backups in S3')
    backup_push_parser = subparsers.add_parser(
        'backup-push', help='pushing a fresh hot backup to S3',
        parents=[backup_fetchpush_parent])
    backup_push_parser.add_argument(
        '--cluster-read-rate-limit',
        help='Rate limit reading the PostgreSQL cluster directory to a '
        'tunable number of bytes per second', dest='rate_limit',
        metavar='BYTES_PER_SECOND',
        type=int, default=None)

    wal_fetch_parser = subparsers.add_parser(
        'wal-fetch', help='fetch a WAL file from S3',
        parents=[wal_fetchpush_parent])
    subparsers.add_parser('wal-push', help='push a WAL file to S3',
                          parents=[wal_fetchpush_parent])

    wal_fark_parser = subparsers.add_parser('wal-fark',
                                            help='The FAke Arkiver')

    # XXX: Partial copy paste, because no parallel archiving support
    # is supported and to have the --pool option would be confusing.
    wal_fark_parser.add_argument('PG_CLUSTER_DIRECTORY',
                                 help="Postgres cluster path, "
                                 "such as '/var/lib/database'")

    # backup-fetch operator section
    backup_fetch_parser.add_argument('BACKUP_NAME',
                                     help='the name of the backup to fetch')

    # wal-push operator section
    wal_fetch_parser.add_argument('WAL_DESTINATION',
                                 help='Path to download the WAL segment to')

    args = parser.parse_args()

    secret_key = os.getenv('AWS_SECRET_ACCESS_KEY')
    if secret_key is None:
        print >>sys.stderr, ('Must define AWS_SECRET_ACCESS_KEY ask S3 to do '
                             'anything')
        sys.exit(1)

    s3_prefix = args.s3_prefix or os.getenv('WALE_S3_PREFIX')

    if s3_prefix is None:
        print >>sys.stderr, ('Must pass --s3-prefix or define environment '
                             'variable WALE_S3_PREFIX')
        sys.exit(1)

    if args.aws_access_key_id is None:
        aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
        if aws_access_key_id is None:
            print >>sys.stderr, ('Must define an AWS_ACCESS_KEY_ID, '
                                 'using environment variable or '
                                 '--aws_access_key_id')

    else:
        aws_access_key_id = args.aws_access_key_id

    backup_cxt = S3Backup(aws_access_key_id, secret_key, s3_prefix)

    subcommand = args.subcommand

    try:
        if subcommand == 'backup-fetch':
            external_program_check([S3CMD_BIN, DECODE_BIN])
            backup_cxt.database_s3_fetch(args.PG_CLUSTER_DIRECTORY,
                                         args.BACKUP_NAME,
                                         pool_size=args.pool_size)
        elif subcommand == 'backup-list':
            external_program_check([S3CMD_BIN])
            backup_cxt.backup_list()
        elif subcommand == 'backup-push':
            external_program_check([S3CMD_BIN, ENCODE_BIN, PSQL_BIN, MBUFFER_BIN])
            rate_limit = args.rate_limit
            if rate_limit is not None and rate_limit < 8192:
                print >>sys.stderr, ('--cluster-read-rate-limit must be a '
                                     'positive integer over or equal to 8192')
                sys.exit(1)

            backup_cxt.database_s3_backup(
                args.PG_CLUSTER_DIRECTORY, rate_limit=rate_limit,
                pool_size=args.pool_size)
        elif subcommand == 'wal-fetch':
            external_program_check([S3CMD_BIN, DECODE_BIN])
            backup_cxt.wal_s3_restore(args.WAL_SEGMENT, args.WAL_DESTINATION)
        elif subcommand == 'wal-push':
            external_program_check([S3CMD_BIN, ENCODE_BIN])
            backup_cxt.wal_s3_archive(args.WAL_SEGMENT)
        elif subcommand == 'wal-fark':
            external_program_check([S3CMD_BIN, ENCODE_BIN])
            backup_cxt.wal_fark(args.PG_CLUSTER_DIRECTORY)
        else:
            print >>sys.stderr, ('Subcommand {0} not implemented!'
                                 .format(subcommand))
            sys.exit(127)
    except UserException, e:
        logger.log(level=e.severity,
                   msg=log_help.fmt_logline(e.msg, e.detail, e.hint))
        sys.exit(1)
Example #2
0
    def database_s3_backup(self, *args, **kwargs):
        """
        Uploads a PostgreSQL file cluster to S3

        Mechanism: just wraps _s3_upload_pg_cluster_dir with
        start/stop backup actions with exception handling.

        In particular there is a 'finally' block to stop the backup in
        most situations.

        """

        upload_good = False
        backup_stop_good = False
        try:
            start_backup_info = PgBackupStatements.run_start_backup()
            version = PgBackupStatements.pg_version()['version']
            uploaded_to, expanded_size_bytes = \
                self._s3_upload_pg_cluster_dir(
                start_backup_info, version=version,
                *args, **kwargs)
            upload_good = True
        finally:
            if not upload_good:
                logger.warning(
                    log_help.fmt_logline(
                        'blocking on sending WAL segments',
                        'The backup was not completed successfully, '
                        'but we have to wait anyway.  '
                        'See README: TODO about pg_cancel_backup'))

            stop_backup_info = PgBackupStatements.run_stop_backup()
            backup_stop_good = True

        if upload_good and backup_stop_good:
            # Make a best-effort attempt to write a sentinel file to
            # the cluster backup directory that indicates that the
            # base backup upload has definitely run its course (it may
            # have, even without this file, though) and also
            # communicates what WAL segments are needed to get to
            # consistency.
            try:
                with self.s3cmd_temp_config as s3cmd_config:
                    with tempfile.NamedTemporaryFile(mode='w') as sentinel:
                        json.dump(
                            {'wal_segment_backup_stop':
                                 stop_backup_info['file_name'],
                             'wal_segment_offset_backup_stop':
                                 stop_backup_info['file_offset'],
                             'expanded_size_bytes': expanded_size_bytes},
                            sentinel)
                        sentinel.flush()

                        # Avoid using do_lzo_aes_s3_put to store
                        # uncompressed: easier to read/double click
                        # on/dump to terminal
                        check_call_wait_sigint(
                            [S3CMD_BIN, '-c', s3cmd_config.name,
                             '--mime-type=application/json', 'put',
                             sentinel.name,
                             uploaded_to + '_backup_stop_sentinel.json'])
            except KeyboardInterrupt, e:
                # Specially re-raise exception on SIGINT to allow
                # propagation.
                raise
            except: