Example #1
0
def setup_database(cluster, cursor):
    """
    Configures a database (the provided cursor) for use with pgshovel.

    This function can also be used to repair a broken installation, or update
    an existing installation's log trigger function.
    """
    # Install PGQ if it doesn't already exist.
    logger.info('Creating PgQ extension (if it does not already exist)...')
    cursor.execute('CREATE EXTENSION IF NOT EXISTS pgq')

    # Install pypythonu if it doesn't already exist.
    logger.info('Creating (or updating) plpythonu language...')
    cursor.execute('CREATE OR REPLACE LANGUAGE plpythonu')

    # Create the schema if it doesn't already exist.
    logger.info('Creating schema (if it does not already exist)...')
    cursor.execute('CREATE SCHEMA IF NOT EXISTS {schema}'.format(
        schema=quote(cluster.schema),
    ))

    # Create the configuration table if it doesn't already exist.
    logger.info('Creating configuration table (if it does not already exist)...')
    create_configuration_table(cluster, cursor)

    # Create the replication state table if it doesn't already exist.
    logger.info('Creating replication state table (if it does not already exist)...')
    create_replication_state_table(cluster, cursor)

    version = get_configuration_value(cluster, cursor, 'version')
    if version is None:
        set_configuration_value(cluster, cursor, 'version', __version__)
    elif version is not None and str(version) != __version__:
        update_configuration_value(cluster, cursor, 'version', __version__)

    # Ensure that this database has already had an identifier associated with it.
    logger.info('Checking for node ID...')
    node_id = get_or_set_node_identifier(cluster, cursor)

    logger.info('Installing (or updating) log trigger function...')
    create_log_trigger_function(cluster, cursor, node_id)

    return node_id
Example #2
0
def setup_database(cluster, cursor):
    """
    Configures a database (the provided cursor) for use with pgshovel.

    This function can also be used to repair a broken installation, or update
    an existing installation's log trigger function.
    """
    # Install PGQ if it doesn't already exist.
    logger.info('Creating PgQ extension (if it does not already exist)...')
    cursor.execute('CREATE EXTENSION IF NOT EXISTS pgq')

    # Install pypythonu if it doesn't already exist.
    logger.info('Creating (or updating) plpythonu language...')
    cursor.execute('CREATE OR REPLACE LANGUAGE plpythonu')

    # Create the schema if it doesn't already exist.
    logger.info('Creating schema (if it does not already exist)...')
    cursor.execute('CREATE SCHEMA IF NOT EXISTS {schema}'.format(schema=quote(
        cluster.schema), ))

    # Create the configuration table if it doesn't already exist.
    logger.info(
        'Creating configuration table (if it does not already exist)...')
    create_configuration_table(cluster, cursor)

    version = get_configuration_value(cluster, cursor, 'version')
    if version is None:
        set_configuration_value(cluster, cursor, 'version', __version__)
    elif version is not None and str(version) != __version__:
        update_configuration_value(cluster, cursor, 'version', __version__)

    # Ensure that this database has already had an identifier associated with it.
    logger.info('Checking for node ID...')
    node_id = get_or_set_node_identifier(cluster, cursor)

    logger.info('Installing (or updating) log trigger function...')
    create_log_trigger_function(cluster, cursor, node_id)

    return node_id
Example #3
0
def get_managed_databases(cluster,
                          dsns,
                          configure=True,
                          skip_inaccessible=False,
                          same_version=True):
    """
    Returns a dictionary of managed databases by their unique node ID. If the
    same node is referenced multiple times (either by the same, or by different
    DSNs), an error is raised.

    If the database has not already been configured for use with pgshovel, the
    database will be implicitly configured, unless the ``configure`` argument
    is ``False``, in which case it will error. If the same node is attempted to
    be configured multiple times (by providing the same DSN multiple times, or
    diffrent DSNs that point to the same database) an error is raised to
    prevent deadlocking during configuration.

    By default, all databases must be accessible. If partial results are
    acceptable (such as cases where databases may be expected to have
    permanently failed), the ``skip_inaccessible`` arguments allows returning
    only those databases that are able to be connected to and an error is
    logged.
    """
    if not dsns:
        return {}

    nodes = {}

    if same_version:
        ztransaction = check_version(cluster)
    else:
        ztransaction = cluster.zookeeper.transaction()

    lock_id = random.randint(-2**63, 2**63 - 1)  # bigint max/min
    logger.debug('Connecting to databases: %s', FormattedSequence(dsns))

    transactions = []

    for dsn in dsns:
        try:
            connection = psycopg2.connect(dsn)
        except Exception as error:
            if skip_inaccessible:
                logger.warning('%s is inaccessible due to error, skipping: %s',
                               dsn, error)
                continue
            else:
                raise

        logger.debug('Checking if %s has been configured...', dsn)
        try:
            with connection.cursor() as cursor:
                node_id = get_node_identifier(cluster, cursor)
                assert node_id is not None
        except psycopg2.ProgrammingError:
            if not configure:
                raise

            # TODO: Check this better to ensure this is the right type of error
            # (make sure that is specific enough to the table not being
            # present.)
            logger.info(
                '%s has not been configured for use, setting up now...', dsn)
            connection.rollback()  # start over

            transaction = Transaction(connection, 'setup-database')
            transactions.append(transaction)
            with connection.cursor() as cursor:
                # To ensure that we're not attempting to configure the same
                # database multiple times (which would result in a deadlock,
                # since the second transaction will block indefinitely, waiting
                # for the first transaction to be committed or rolled back) we
                # take out an advisory lock to check that we haven't already
                # prepared this database. (We can't simply check for the
                # existence of the configuration table at this point, since
                # that transaction has not been committed yet.)
                cursor.execute('SELECT pg_try_advisory_lock(%s) as acquired',
                               (lock_id, ))
                ((acquired, ), ) = cursor.fetchall()
                assert acquired, 'could not take out advisory lock on %s (possible deadlock?)' % (
                    connection, )

                node_id = setup_database(cluster, cursor)
        else:
            # Check to ensure that the remote database is configured using the
            # same version as the local version. This is important since a
            # previously configured database that has not been used for some
            # time can still have an old version of the schema, log trigger,
            # etc. Adding it back to the cluster without upgrading it can cause
            # strange compatibility issues.
            # TODO: It would make sense here to provide an easy upgrade path --
            # right now, there is no direct path to upgrading a database that
            # has no groups associated with it!
            with connection.cursor() as cursor:
                version = str(
                    get_configuration_value(cluster, cursor, 'version'))
                assert version == __version__, 'local and node versions do not match (local: %s, node: %s)' % (
                    __version__, version)

            logger.debug('%s is already configured as %s (version %s).', dsn,
                         node_id, version)
            connection.commit()  # don't leave idle in transaction

        assert node_id not in nodes, 'found duplicate node: %s and %s' % (
            connection, nodes[node_id])
        nodes[node_id] = connection

    if transactions:
        with managed(transactions):
            commit(ztransaction)

    return nodes
Example #4
0
def get_managed_databases(cluster, dsns, configure=True, skip_inaccessible=False, same_version=True):
    """
    Returns a dictionary of managed databases by their unique node ID. If the
    same node is referenced multiple times (either by the same, or by different
    DSNs), an error is raised.

    If the database has not already been configured for use with pgshovel, the
    database will be implicitly configured, unless the ``configure`` argument
    is ``False``, in which case it will error. If the same node is attempted to
    be configured multiple times (by providing the same DSN multiple times, or
    diffrent DSNs that point to the same database) an error is raised to
    prevent deadlocking during configuration.

    By default, all databases must be accessible. If partial results are
    acceptable (such as cases where databases may be expected to have
    permanently failed), the ``skip_inaccessible`` arguments allows returning
    only those databases that are able to be connected to and an error is
    logged.
    """
    if not dsns:
        return {}

    nodes = {}

    if same_version:
        ztransaction = check_version(cluster)
    else:
        ztransaction = cluster.zookeeper.transaction()

    lock_id = random.randint(-2**63, 2**63-1)  # bigint max/min
    logger.debug('Connecting to databases: %s', FormattedSequence(dsns))

    transactions = []

    for dsn in dsns:
        try:
            connection = psycopg2.connect(dsn)
        except Exception as error:
            if skip_inaccessible:
                logger.warning('%s is inaccessible due to error, skipping: %s', dsn, error)
                continue
            else:
                raise

        logger.debug('Checking if %s has been configured...', dsn)
        try:
            with connection.cursor() as cursor:
                node_id = get_node_identifier(cluster, cursor)
                assert node_id is not None
        except psycopg2.ProgrammingError:
            if not configure:
                raise

            # TODO: Check this better to ensure this is the right type of error
            # (make sure that is specific enough to the table not being
            # present.)
            logger.info('%s has not been configured for use, setting up now...', dsn)
            connection.rollback()  # start over

            transaction = Transaction(connection, 'setup-database')
            transactions.append(transaction)
            with connection.cursor() as cursor:
                # To ensure that we're not attempting to configure the same
                # database multiple times (which would result in a deadlock,
                # since the second transaction will block indefinitely, waiting
                # for the first transaction to be committed or rolled back) we
                # take out an advisory lock to check that we haven't already
                # prepared this database. (We can't simply check for the
                # existence of the configuration table at this point, since
                # that transaction has not been committed yet.)
                cursor.execute('SELECT pg_try_advisory_lock(%s) as acquired', (lock_id,))
                ((acquired,),) = cursor.fetchall()
                assert acquired, 'could not take out advisory lock on %s (possible deadlock?)' % (connection,)

                node_id = setup_database(cluster, cursor)
        else:
            # Check to ensure that the remote database is configured using the
            # same version as the local version. This is important since a
            # previously configured database that has not been used for some
            # time can still have an old version of the schema, log trigger,
            # etc. Adding it back to the cluster without upgrading it can cause
            # strange compatibility issues.
            # TODO: It would make sense here to provide an easy upgrade path --
            # right now, there is no direct path to upgrading a database that
            # has no groups associated with it!
            with connection.cursor() as cursor:
                version = str(get_configuration_value(cluster, cursor, 'version'))
                assert version == __version__, 'local and node versions do not match (local: %s, node: %s)' % (__version__, version)

            logger.debug('%s is already configured as %s (version %s).', dsn, node_id, version)
            connection.commit()  # don't leave idle in transaction

        assert node_id not in nodes, 'found duplicate node: %s and %s' % (connection, nodes[node_id])
        nodes[node_id] = connection

    if transactions:
        with managed(transactions):
            commit(ztransaction)

    return nodes