Beispiel #1
0
def session_scope(id_, versioned=True):
    """
    Provide a transactional scope around a series of operations.

    Takes care of rolling back failed transactions and closing the session
    when it goes out of scope.

    Note that sqlalchemy automatically starts a new database transaction when
    the session is created, and restarts a new transaction after every commit()
    on the session. Your database backend's transaction semantics are important
    here when reasoning about concurrency.

    Parameters
    ----------
    id_ : int
        Object primary key to grab a session for.

    versioned : bool
        Do you want to enable the transaction log?

    Yields
    ------
    Session
        The created session.

    """
    engine = engine_manager.get_for_id(id_)
    session = new_session(engine, versioned)

    try:
        if config.get("LOG_DB_SESSIONS"):
            start_time = time.time()
            calling_frame = sys._getframe().f_back.f_back
            call_loc = "{}:{}".format(
                calling_frame.f_globals.get("__name__"), calling_frame.f_lineno
            )
            logger = log.bind(
                engine_id=id(engine), session_id=id(session), call_loc=call_loc
            )
            logger.info("creating db_session", sessions_used=engine.pool.checkedout())
        yield session
        session.commit()
    except BaseException as exc:
        try:
            session.rollback()
            raise
        except OperationalError:
            log.warn(
                "Encountered OperationalError on rollback", original_exception=type(exc)
            )
            raise exc
    finally:
        if config.get("LOG_DB_SESSIONS"):
            lifetime = time.time() - start_time
            logger.info(
                "closing db_session",
                lifetime=lifetime,
                sessions_used=engine.pool.checkedout(),
            )
        session.close()
Beispiel #2
0
def empty_db(config):
    from inbox.ignition import engine_manager
    from inbox.models.session import new_session
    setup_test_db()
    engine = engine_manager.get_for_id(0)
    engine.session = new_session(engine)
    yield engine
    engine.session.close()
Beispiel #3
0
def empty_db(config):
    from inbox.ignition import engine_manager
    from inbox.models.session import new_session
    setup_test_db()
    engine = engine_manager.get_for_id(0)
    engine.session = new_session(engine)
    yield engine
    engine.session.close()
Beispiel #4
0
def session_scope(id_, versioned=True, explicit_begin=False):
    """
    Provide a transactional scope around a series of operations.

    Takes care of rolling back failed transactions and closing the session
    when it goes out of scope.

    Note that sqlalchemy automatically starts a new database transaction when
    the session is created, and restarts a new transaction after every commit()
    on the session. Your database backend's transaction semantics are important
    here when reasoning about concurrency.

    Parameters
    ----------
    versioned : bool
        Do you want to enable the transaction log?
    explicit_begin: bool
        If True, issue an explicit BEGIN statement instead of relying on
        implicit transactional semantics.

    Yields
    ------
    Session
        The created session.

    """
    engine = engine_manager.get_for_id(id_)
    session = new_session(engine, versioned)

    try:
        if config.get('LOG_DB_SESSIONS'):
            start_time = time.time()
            calling_frame = sys._getframe().f_back.f_back
            call_loc = '{}:{}'.format(calling_frame.f_globals.get('__name__'),
                                      calling_frame.f_lineno)
            logger = log.bind(engine_id=id(engine),
                              session_id=id(session), call_loc=call_loc)
            logger.info('creating db_session',
                        sessions_used=engine.pool.checkedout())
        yield session
        session.commit()
    except (gevent.GreenletExit, gevent.Timeout) as exc:
        log.info('Invalidating connection on gevent exception', exc_info=True)
        session.invalidate()
    except BaseException as exc:
        try:
            session.rollback()
            raise
        except OperationalError:
            log.warn('Encountered OperationalError on rollback',
                     original_exception=type(exc))
            raise exc
    finally:
        if config.get('LOG_DB_SESSIONS'):
            lifetime = time.time() - start_time
            logger.info('closing db_session', lifetime=lifetime,
                        sessions_used=engine.pool.checkedout())
        session.close()
Beispiel #5
0
def db(dbloader):
    from inbox.ignition import engine_manager
    from inbox.models.session import new_session
    engine = engine_manager.get_for_id(0)
    # TODO(emfree): tests should really either instantiate their own sessions,
    # or take a fixture that is itself a session.
    engine.session = new_session(engine)
    yield engine
    engine.session.close()
Beispiel #6
0
def db(dbloader):
    from inbox.ignition import engine_manager
    from inbox.models.session import new_session
    engine = engine_manager.get_for_id(0)
    # TODO(emfree): tests should really either instantiate their own sessions,
    # or take a fixture that is itself a session.
    engine.session = new_session(engine)
    yield engine
    engine.session.close()
Beispiel #7
0
def session_scope(id_, versioned=True):
    """
    Provide a transactional scope around a series of operations.

    Takes care of rolling back failed transactions and closing the session
    when it goes out of scope.

    Note that sqlalchemy automatically starts a new database transaction when
    the session is created, and restarts a new transaction after every commit()
    on the session. Your database backend's transaction semantics are important
    here when reasoning about concurrency.

    Parameters
    ----------
    versioned : bool
        Do you want to enable the transaction log?
    debug : bool
        Do you want to turn on SQL echoing? Use with caution. Engine is not
        cached in this case!

    Yields
    ------
    Session
        The created session.

    """
    engine = engine_manager.get_for_id(id_)
    session = new_session(engine, versioned)

    try:
        if config.get('LOG_DB_SESSIONS'):
            start_time = time.time()
            calling_frame = sys._getframe().f_back.f_back
            call_loc = '{}:{}'.format(calling_frame.f_globals.get('__name__'),
                                      calling_frame.f_lineno)
            logger = log.bind(engine_id=id(engine),
                              session_id=id(session), call_loc=call_loc)
            logger.info('creating db_session',
                        sessions_used=engine.pool.checkedout())
        yield session
        session.commit()
    except BaseException as exc:
        try:
            session.rollback()
            raise
        except OperationalError:
            log.warn('Encountered OperationalError on rollback',
                     original_exception=type(exc))
            raise exc
    finally:
        if config.get('LOG_DB_SESSIONS'):
            lifetime = time.time() - start_time
            logger.info('closing db_session', lifetime=lifetime,
                        sessions_used=engine.pool.checkedout())
        session.close()
Beispiel #8
0
def delete_namespace(account_id, namespace_id, throttle=False, dry_run=False):
    """
    Delete all the data associated with a namespace from the database.
    USE WITH CAUTION.

    NOTE: This function is only called from bin/delete-account-data.
    It prints to stdout.

    """
    from inbox.ignition import engine_manager

    # Bypass the ORM for performant bulk deletion;
    # we do /not/ want Transaction records created for these deletions,
    # so this is okay.
    engine = engine_manager.get_for_id(namespace_id)

    # Chunk delete for tables that might have a large concurrent write volume
    # to prevent those transactions from blocking.
    # NOTE: ImapFolderInfo does not fall into this category but we include it
    # here for simplicity.

    filters = OrderedDict()

    for table in ['message', 'block', 'thread', 'transaction', 'actionlog',
                  'contact', 'event', 'dataprocessingcache']:
        filters[table] = ('namespace_id', namespace_id)

    with session_scope(namespace_id) as db_session:
        account = db_session.query(Account).get(account_id)
        if account.discriminator != 'easaccount':
            filters['imapuid'] = ('account_id', account_id)
            filters['imapfoldersyncstatus'] = ('account_id', account_id)
            filters['imapfolderinfo'] = ('account_id', account_id)
        else:
            filters['easuid'] = ('easaccount_id', account_id)
            filters['easfoldersyncstatus'] = ('account_id', account_id)

    for cls in filters:
        _batch_delete(engine, cls, filters[cls], throttle=throttle,
                      dry_run=dry_run)

    # Use a single delete for the other tables. Rows from tables which contain
    # cascade-deleted foreign keys to other tables deleted here (or above)
    # are also not always explicitly deleted, except where needed for
    # performance.
    #
    # NOTE: Namespace, Account are deleted at the end too.

    query = 'DELETE FROM {} WHERE {}={};'

    filters = OrderedDict()
    for table in ('category', 'calendar'):
        filters[table] = ('namespace_id', namespace_id)
    for table in ('folder', 'label'):
        filters[table] = ('account_id', account_id)
    filters['namespace'] = ('id', namespace_id)

    for table, (column, id_) in filters.iteritems():
        log.info('Performing bulk deletion', table=table)
        start = time.time()

        if throttle and check_throttle():
            log.info("Throttling deletion")
            gevent.sleep(60)

        if not dry_run:
            engine.execute(query.format(table, column, id_))
        else:
            log.debug(query.format(table, column, id_))

        end = time.time()
        log.info('Completed bulk deletion', table=table, time=end - start)

    # Delete the account object manually to get rid of the various objects
    # associated with it (e.g: secrets, tokens, etc.)
    with session_scope(account_id) as db_session:
        account = db_session.query(Account).get(account_id)
        if dry_run is False:
            db_session.delete(account)
            db_session.commit()
Beispiel #9
0
def delete_namespace(account_id, namespace_id, dry_run=False):
    """
    Delete all the data associated with a namespace from the database.
    USE WITH CAUTION.

    NOTE: This function is only called from bin/delete-account-data.
    It prints to stdout.

    """
    from inbox.models.session import session_scope
    from inbox.models import Account
    from inbox.ignition import engine_manager

    # Bypass the ORM for performant bulk deletion;
    # we do /not/ want Transaction records created for these deletions,
    # so this is okay.
    engine = engine_manager.get_for_id(namespace_id)

    # Chunk delete for tables that might have a large concurrent write volume
    # to prevent those transactions from blocking.
    # NOTE: ImapFolderInfo does not fall into this category but we include it
    # here for simplicity.

    filters = OrderedDict()

    for table in [
            'message', 'block', 'thread', 'transaction', 'actionlog',
            'contact', 'event', 'dataprocessingcache'
    ]:
        filters[table] = ('namespace_id', namespace_id)

    with session_scope(namespace_id) as db_session:
        account = db_session.query(Account).get(account_id)
        if account.discriminator != 'easaccount':
            filters['imapuid'] = ('account_id', account_id)
            filters['imapfoldersyncstatus'] = ('account_id', account_id)
            filters['imapfolderinfo'] = ('account_id', account_id)
        else:
            filters['easuid'] = ('easaccount_id', account_id)
            filters['easfoldersyncstatus'] = ('account_id', account_id)

    for cls in filters:
        _batch_delete(engine, cls, filters[cls], dry_run=dry_run)

    # Use a single delete for the other tables. Rows from tables which contain
    # cascade-deleted foreign keys to other tables deleted here (or above)
    # are also not always explicitly deleted, except where needed for
    # performance.
    #
    # NOTE: Namespace, Account are deleted at the end too.

    query = 'DELETE FROM {} WHERE {}={};'

    filters = OrderedDict()
    for table in ('category', 'calendar'):
        filters[table] = ('namespace_id', namespace_id)
    for table in ('folder', 'label'):
        filters[table] = ('account_id', account_id)
    filters['namespace'] = ('id', namespace_id)

    for table, (column, id_) in filters.iteritems():
        print 'Performing bulk deletion for table: {}'.format(table)
        start = time.time()

        if not dry_run:
            engine.execute(query.format(table, column, id_))
        else:
            print query.format(table, column, id_)

        end = time.time()
        print 'Completed bulk deletion for table: {}, time taken: {}'.\
            format(table, end - start)

    # Delete the account object manually to get rid of the various objects
    # associated with it (e.g: secrets, tokens, etc.)
    with session_scope(account_id) as db_session:
        account = db_session.query(Account).get(account_id)
        if dry_run is False:
            db_session.delete(account)
            db_session.commit()
Beispiel #10
0
def delete_namespace(namespace_id, throttle=False, dry_run=False):
    """
    Delete all the data associated with a namespace from the database.
    USE WITH CAUTION.

    NOTE: This function is only called from bin/delete-account-data.
    It prints to stdout.

    Raises AccountDeletionErrror with message if there are problems
    """

    with session_scope(namespace_id) as db_session:
        try:
            account = (db_session.query(Account).join(Namespace).filter(
                Namespace.id == namespace_id).one())
        except NoResultFound:
            raise AccountDeletionErrror("Could not find account in database")

        if not account.is_marked_for_deletion:
            raise AccountDeletionErrror(
                "Account is_marked_for_deletion is False. "
                "Change this to proceed with deletion.")
        account_id = account.id
        account_discriminator = account.discriminator

    log.info("Deleting account", account_id=account_id)
    start_time = time.time()

    # These folders are used to configure batch deletion in chunks for
    # specific tables that are prone to transaction blocking during
    # large concurrent write volume.  See _batch_delete
    # NOTE: ImapFolderInfo doesn't reall fall into this category but
    # we include here for simplicity anyway.

    filters = OrderedDict()
    for table in [
            "message",
            "block",
            "thread",
            "transaction",
            "actionlog",
            "event",
            "contact",
            "dataprocessingcache",
    ]:
        filters[table] = ("namespace_id", namespace_id)

    if account_discriminator == "easaccount":
        filters["easuid"] = ("easaccount_id", account_id)
        filters["easfoldersyncstatus"] = ("account_id", account_id)
    else:
        filters["imapuid"] = ("account_id", account_id)
        filters["imapfoldersyncstatus"] = ("account_id", account_id)
        filters["imapfolderinfo"] = ("account_id", account_id)

    from inbox.ignition import engine_manager

    # Bypass the ORM for performant bulk deletion;
    # we do /not/ want Transaction records created for these deletions,
    # so this is okay.
    engine = engine_manager.get_for_id(namespace_id)

    for cls in filters:
        _batch_delete(engine,
                      cls,
                      filters[cls],
                      account_id,
                      throttle=throttle,
                      dry_run=dry_run)

    # Use a single delete for the other tables. Rows from tables which contain
    # cascade-deleted foreign keys to other tables deleted here (or above)
    # are also not always explicitly deleted, except where needed for
    # performance.
    #
    # NOTE: Namespace, Account are deleted at the end too.

    query = "DELETE FROM {} WHERE {}={};"

    filters = OrderedDict()
    for table in ("category", "calendar"):
        filters[table] = ("namespace_id", namespace_id)
    for table in ("folder", "label"):
        filters[table] = ("account_id", account_id)
    filters["namespace"] = ("id", namespace_id)

    for table, (column, id_) in iteritems(filters):
        log.info("Performing bulk deletion", table=table)
        start = time.time()

        if throttle:
            bulk_throttle()

        if not dry_run:
            engine.execute(query.format(table, column, id_))
        else:
            log.debug(query.format(table, column, id_))

        end = time.time()
        log.info("Completed bulk deletion", table=table, time=end - start)

    # Delete the account object manually to get rid of the various objects
    # associated with it (e.g: secrets, tokens, etc.)
    with session_scope(account_id) as db_session:
        account = db_session.query(Account).get(account_id)
        if dry_run is False:
            db_session.delete(account)
            db_session.commit()

    # Delete liveness data ( heartbeats)
    log.debug("Deleting liveness data", account_id=account_id)
    clear_heartbeat_status(account_id)

    statsd_client.timing("mailsync.account_deletion.queue.deleted",
                         time.time() - start_time)
Beispiel #11
0
def delete_namespace(namespace_id, throttle=False, dry_run=False):
    """
    Delete all the data associated with a namespace from the database.
    USE WITH CAUTION.

    NOTE: This function is only called from bin/delete-account-data.
    It prints to stdout.

    Raises AccountDeletionErrror with message if there are problems
    """

    with session_scope(namespace_id) as db_session:
        try:
            account = db_session.query(Account).join(Namespace).filter(Namespace.id == namespace_id).one()
        except NoResultFound:
            raise AccountDeletionErrror(
                'Could not find account in database')

        if not account.is_marked_for_deletion:
            raise AccountDeletionErrror(
                'Account is_marked_for_deletion is False. '
                'Change this to proceed with deletion.')
        account_id = account.id
        account_discriminator = account.discriminator

    log.info('Deleting account', account_id=account_id)
    start_time = time.time()

    # These folders are used to configure batch deletion in chunks for
    # specific tables that are prone to transaction blocking during
    # large concurrent write volume.  See _batch_delete
    # NOTE: ImapFolderInfo doesn't reall fall into this category but
    # we include here for simplicity anyway.

    filters = OrderedDict()
    for table in ['message', 'block', 'thread', 'transaction', 'actionlog',
                  'contact', 'event', 'dataprocessingcache']:
        filters[table] = ('namespace_id', namespace_id)

    if account_discriminator == 'easaccount':
        filters['easuid'] = ('easaccount_id', account_id)
        filters['easfoldersyncstatus'] = ('account_id', account_id)
    else:
        filters['imapuid'] = ('account_id', account_id)
        filters['imapfoldersyncstatus'] = ('account_id', account_id)
        filters['imapfolderinfo'] = ('account_id', account_id)

    from inbox.ignition import engine_manager
    # Bypass the ORM for performant bulk deletion;
    # we do /not/ want Transaction records created for these deletions,
    # so this is okay.
    engine = engine_manager.get_for_id(namespace_id)

    for cls in filters:
        _batch_delete(engine, cls, filters[cls], throttle=throttle,
                      dry_run=dry_run)

    # Use a single delete for the other tables. Rows from tables which contain
    # cascade-deleted foreign keys to other tables deleted here (or above)
    # are also not always explicitly deleted, except where needed for
    # performance.
    #
    # NOTE: Namespace, Account are deleted at the end too.

    query = 'DELETE FROM {} WHERE {}={};'

    filters = OrderedDict()
    for table in ('category', 'calendar'):
        filters[table] = ('namespace_id', namespace_id)
    for table in ('folder', 'label'):
        filters[table] = ('account_id', account_id)
    filters['namespace'] = ('id', namespace_id)

    for table, (column, id_) in filters.iteritems():
        log.info('Performing bulk deletion', table=table)
        start = time.time()

        if throttle and check_throttle():
            log.info("Throttling deletion")
            gevent.sleep(60)

        if not dry_run:
            engine.execute(query.format(table, column, id_))
        else:
            log.debug(query.format(table, column, id_))

        end = time.time()
        log.info('Completed bulk deletion', table=table, time=end - start)

    # Delete the account object manually to get rid of the various objects
    # associated with it (e.g: secrets, tokens, etc.)
    with session_scope(account_id) as db_session:
        account = db_session.query(Account).get(account_id)
        if dry_run is False:
            db_session.delete(account)
            db_session.commit()

    # Delete liveness data ( heartbeats)
    log.debug('Deleting liveness data', account_id=account_id)
    clear_heartbeat_status(account_id)

    statsd_client.timing('mailsync.account_deletion.queue.deleted',
                         time.time() - start_time)