Esempio n. 1
0
def main(options):
    """The main function."""
    store = get_shard_store(options.from_shard_id)

    user_ids = options.user_ids or ""
    if options.action == DUMP:
        get_column_names(store, True)
        for c in COPYINFO:
            table = c[0]
            columns = c[1]['columns']
            select = c[1]['query']
            replacements = dict(columns=columns,
                                userids=user_ids,
                                shard_id=options.to_shard_id)
            sql = "COPY (%s) TO '%s/%s.dat';" % (select % replacements,
                                                 options.path, table)
            print "Dumping %s TO %s/%s.dat" % (table, options.path, table),
            storage_tm.begin()
            execute_sql(store, sql)
            storage_tm.abort()
        return

    if options.action == LOAD:
        get_column_names(store, False)
        shard_store = get_shard_store(options.to_shard_id)
        storage_tm.begin()
        print "Dropping Indexes..."
        for sql in DROP_NO_DUPLICATE_SQL:
            shard_store.execute(sql)
        for sql in CREATE_NO_DUPLICATE_SQL:
            shard_store.execute(sql)
        for sql in DROP_INDEXES_SQL:
            shard_store.execute(sql)
        for c in COPYINFO:
            table = c[0]
            columns = c[1]['columns']
            sql = "COPY %s (%s) FROM '%s/%s.dat';" % (table, columns,
                                                      options.path, table)
            print "Loading %s From %s/%s.dat" % (table, options.path,
                                                 table),
            execute_sql(shard_store, sql)
        user_store = get_user_store()
        user_store.execute(INITUSER % dict(shard_id=options.to_shard_id,
                                           userids=user_ids))
        print "Rebuilding Indexes..."
        for sql in DROP_NO_DUPLICATE_SQL:
            execute_sql(shard_store, sql)
        for sql in CREATE_INDEXES_SQL:
            execute_sql(shard_store, sql)
        storage_tm.commit()
        return

    if options.action == DELETE:
        shard_store = get_shard_store(options.from_shard_id)
        storage_tm.begin()
        print "Deleting user data..."
        for sql in DELETE_SQL:
            execute_sql(shard_store, sql % dict(userids=user_ids))
        storage_tm.commit()
Esempio n. 2
0
def main(options):
    """The main function."""
    store = get_shard_store(options.from_shard_id)

    user_ids = options.user_ids or ""
    if options.action == DUMP:
        get_column_names(store, True)
        for c in COPYINFO:
            table = c[0]
            columns = c[1]['columns']
            select = c[1]['query']
            replacements = dict(columns=columns,
                                userids=user_ids,
                                shard_id=options.to_shard_id)
            sql = "COPY (%s) TO '%s/%s.dat';" % (select % replacements,
                                                 options.path, table)
            print "Dumping %s TO %s/%s.dat" % (table, options.path, table),
            storage_tm.begin()
            execute_sql(store, sql)
            storage_tm.abort()
        return

    if options.action == LOAD:
        get_column_names(store, False)
        shard_store = get_shard_store(options.to_shard_id)
        storage_tm.begin()
        print "Dropping Indexes..."
        for sql in DROP_NO_DUPLICATE_SQL:
            shard_store.execute(sql)
        for sql in CREATE_NO_DUPLICATE_SQL:
            shard_store.execute(sql)
        for sql in DROP_INDEXES_SQL:
            shard_store.execute(sql)
        for c in COPYINFO:
            table = c[0]
            columns = c[1]['columns']
            sql = "COPY %s (%s) FROM '%s/%s.dat';" % (table, columns,
                                                      options.path, table)
            print "Loading %s From %s/%s.dat" % (table, options.path, table),
            execute_sql(shard_store, sql)
        user_store = get_user_store()
        user_store.execute(
            INITUSER % dict(shard_id=options.to_shard_id, userids=user_ids))
        print "Rebuilding Indexes..."
        for sql in DROP_NO_DUPLICATE_SQL:
            execute_sql(shard_store, sql)
        for sql in CREATE_INDEXES_SQL:
            execute_sql(shard_store, sql)
        storage_tm.commit()
        return

    if options.action == DELETE:
        shard_store = get_shard_store(options.from_shard_id)
        storage_tm.begin()
        print "Deleting user data..."
        for sql in DELETE_SQL:
            execute_sql(shard_store, sql % dict(userids=user_ids))
        storage_tm.commit()
    def test_stores(self):
        """Make sure we get the stores work"""
        #run through all the shards and try to talk to them
        for k in dbm.get_shard_ids():
            s = dbm.get_shard_store(k)
            s.execute('SELECT 1')

        #make sure get_shard_store is working as designed
        s1 = dbm.get_shard_store('shard0')
        s2 = dbm.get_shard_store('shard0')
        #even though they are different, they are the same
        self.assertTrue(s1 is s2)
        s3 = dbm.get_shard_store('shard1')
        self.assertFalse(s1 is s3)
    def test_stores(self):
        """Make sure we get the stores work"""
        #run through all the shards and try to talk to them
        for k in dbm.get_shard_ids():
            s = dbm.get_shard_store(k)
            s.execute('SELECT 1')

        #make sure get_shard_store is working as designed
        s1 = dbm.get_shard_store('shard0')
        s2 = dbm.get_shard_store('shard0')
        #even though they are different, they are the same
        self.assertTrue(s1 is s2)
        s3 = dbm.get_shard_store('shard1')
        self.assertFalse(s1 is s3)
Esempio n. 5
0
def keep_last_rows_for_worker_names(store_name, worker_names):
    """Clean rows from txlog.db_worker_last_row that don't match the given
    worker names."""
    store = dbmanager.get_shard_store(store_name)
    query = ("DELETE FROM txlog.db_worker_last_row "
             "WHERE worker_id NOT IN ?;")
    store.execute(query, (tuple(worker_names), ))
Esempio n. 6
0
def delete_old_txlogs(store_name, timestamp_limit, quantity_limit=None):
    """Deletes the old transaction logs.

    Use the store name, rather than a store reference, to sidestep potential
    thread safety problems.

    Has to be given a datetime.datetime as a timestamp_limit; datetimes later
    than that will be filtered out, and won't be deleted.

    If quantity_limit is given, this will be the maximum number of entries to
    be deleted.
    """

    store = dbmanager.get_shard_store(store_name)
    parameters = [timestamp_limit]
    inner_select = "SELECT id FROM txlog.transaction_log WHERE timestamp <= ?"

    if quantity_limit is not None:
        inner_select += " LIMIT ?"
        parameters.append(quantity_limit)

    basic_query = ("DELETE FROM txlog.transaction_log WHERE id IN (%s);" %
                   inner_select)
    result = store.execute(basic_query, parameters)

    return result.rowcount
Esempio n. 7
0
def delete_expired_unseen(store_name,
                          worker_id,
                          unseen_ids=None,
                          expire_secs=None):
    """Deletes expired unseen ids for a given worker id.

    If a list of unseen ids is given, also delete those explicitly.
    """
    if expire_secs is None:
        expire_secs = UNSEEN_EXPIRES
    worker_id = unicode(worker_id)
    store = dbmanager.get_shard_store(store_name)
    deleted = 0
    condition = (u"created < TIMEZONE('UTC'::text, NOW()) "
                 "     - INTERVAL '{} seconds'".format(expire_secs))
    query = (u"DELETE FROM txlog.db_worker_unseen " "WHERE worker_id = ? ")
    if unseen_ids is not None:
        fmt = u"({})".format
        if getattr(unseen_ids, "next", None) is None:
            unseen_ids = iter(unseen_ids)
        while True:
            unseen_args = u",".join(imap(fmt, ichunk(unseen_ids, CHUNK_SIZE)))
            if not unseen_args:
                break
            result = store.execute(
                query + u"AND ({} OR id = ANY(VALUES {}));".format(
                    condition, unseen_args), (worker_id, ))
            deleted += result.rowcount
    else:
        query += u"AND {};".format(condition)
        result = store.execute(query, (worker_id, ))
        deleted = result.rowcount
    return deleted
Esempio n. 8
0
def update_last_row(worker_name, row_id, timestamp, store_name):
    """Update the id and timestamp of the most recently processed transation
    log entry for a given worker.

    Use the store name, rather than a store reference, to sidestep potential
    thread safety problems.

    Transaction management should be performed by the caller.  Since this
    function writes to the database, it should be called from code blocks
    decorated with fsync_commit.
    """
    worker_name = unicode(worker_name)
    store = dbmanager.get_shard_store(store_name)
    result = store.execute(u"""UPDATE txlog.db_worker_last_row
        SET row_id=?, timestamp=?
        WHERE worker_id=?""", (row_id, timestamp, worker_name))
    if result.rowcount == 0:
        result = store.execute(
            u"""INSERT INTO txlog.db_worker_last_row
            (worker_id, row_id, timestamp) VALUES (?, ?, ?)""",
            (worker_name, row_id, timestamp))
        if result.rowcount == 0:
            raise RuntimeError(
                'Failed to update or insert last row id for worker %s' %
                worker_name)
Esempio n. 9
0
def get_last_row(worker_name, store_name):
    """Try to get the id and timestamp of the last processed row for the
    specific worker name.

    If not found, get from the oldest possible row from the table.

    If still not found, return a default tuple for new workers.

    Use the store name, rather than a store reference, to sidestep potential
    thread safety problems.

    Transaction management should be performed by the caller.  Since this
    function is read-only, it may be called from code decorated with
    fsync_readonly, or as part of a block of operations decorated with
    fsync_commit.
    """
    worker_name = unicode(worker_name)
    store = dbmanager.get_shard_store(store_name)

    last_row = store.execute(u"""SELECT row_id, timestamp
        FROM txlog.db_worker_last_row
        WHERE worker_id=?""", (worker_name,)).get_one()
    if not last_row:
        last_row = store.execute(u"""SELECT row_id, timestamp
            FROM txlog.db_worker_last_row
            ORDER BY row_id LIMIT 1""").get_one()
    if not last_row:
        last_row = NEW_WORKER_LAST_ROW

    return last_row
Esempio n. 10
0
def keep_last_rows_for_worker_names(store_name, worker_names):
    """Clean rows from txlog.db_worker_last_row that don't match the given
    worker names."""
    store = dbmanager.get_shard_store(store_name)
    query = ("DELETE FROM txlog.db_worker_last_row "
             "WHERE worker_id NOT IN ?;")
    store.execute(query, (tuple(worker_names), ))
Esempio n. 11
0
def delete_old_txlogs(store_name, timestamp_limit, quantity_limit=None):
    """Deletes the old transaction logs.

    Use the store name, rather than a store reference, to sidestep potential
    thread safety problems.

    Has to be given a datetime.datetime as a timestamp_limit; datetimes later
    than that will be filtered out, and won't be deleted.

    If quantity_limit is given, this will be the maximum number of entries to
    be deleted.
    """

    store = dbmanager.get_shard_store(store_name)
    parameters = [timestamp_limit]
    inner_select = "SELECT id FROM txlog.transaction_log WHERE timestamp <= ?"

    if quantity_limit is not None:
        inner_select += " LIMIT ?"
        parameters.append(quantity_limit)

    basic_query = ("DELETE FROM txlog.transaction_log WHERE id IN (%s);"
                   % inner_select)
    result = store.execute(basic_query, parameters)

    return result.rowcount
Esempio n. 12
0
def delete_expired_unseen(store_name, worker_id, unseen_ids=None,
                          expire_secs=None):
    """Deletes expired unseen ids for a given worker id.

    If a list of unseen ids is given, also delete those explicitly.
    """
    if expire_secs is None:
        expire_secs = UNSEEN_EXPIRES
    worker_id = unicode(worker_id)
    store = dbmanager.get_shard_store(store_name)
    deleted = 0
    condition = (u"created < TIMEZONE('UTC'::text, NOW()) "
                 "     - INTERVAL '{} seconds'".format(expire_secs))
    query = (u"DELETE FROM txlog.db_worker_unseen "
             "WHERE worker_id = ? ")
    if unseen_ids is not None:
        fmt = u"({})".format
        if getattr(unseen_ids, "next", None) is None:
            unseen_ids = iter(unseen_ids)
        while True:
            unseen_args = u",".join(imap(fmt, ichunk(unseen_ids, CHUNK_SIZE)))
            if not unseen_args:
                break
            result = store.execute(
                query + u"AND ({} OR id = ANY(VALUES {}));".format(
                    condition, unseen_args), (worker_id,))
            deleted += result.rowcount
    else:
        query += u"AND {};".format(condition)
        result = store.execute(query, (worker_id,))
        deleted = result.rowcount
    return deleted
Esempio n. 13
0
def get_last_row(worker_name, store_name):
    """Try to get the id and timestamp of the last processed row for the
    specific worker name.

    If not found, get from the oldest possible row from the table.

    If still not found, return a default tuple for new workers.

    Use the store name, rather than a store reference, to sidestep potential
    thread safety problems.

    Transaction management should be performed by the caller.  Since this
    function is read-only, it may be called from code decorated with
    fsync_readonly, or as part of a block of operations decorated with
    fsync_commit.
    """
    worker_name = unicode(worker_name)
    store = dbmanager.get_shard_store(store_name)

    last_row = store.execute(
        u"""SELECT row_id, timestamp
        FROM txlog.db_worker_last_row
        WHERE worker_id=?""", (worker_name, )).get_one()
    if not last_row:
        last_row = store.execute(u"""SELECT row_id, timestamp
            FROM txlog.db_worker_last_row
            ORDER BY row_id LIMIT 1""").get_one()
    if not last_row:
        last_row = NEW_WORKER_LAST_ROW

    return last_row
Esempio n. 14
0
def update_last_row(worker_name, row_id, timestamp, store_name):
    """Update the id and timestamp of the most recently processed transation
    log entry for a given worker.

    Use the store name, rather than a store reference, to sidestep potential
    thread safety problems.

    Transaction management should be performed by the caller.  Since this
    function writes to the database, it should be called from code blocks
    decorated with fsync_commit.
    """
    worker_name = unicode(worker_name)
    store = dbmanager.get_shard_store(store_name)
    result = store.execute(
        u"""UPDATE txlog.db_worker_last_row
        SET row_id=?, timestamp=?
        WHERE worker_id=?""", (row_id, timestamp, worker_name))
    if result.rowcount == 0:
        result = store.execute(
            u"""INSERT INTO txlog.db_worker_last_row
            (worker_id, row_id, timestamp) VALUES (?, ?, ?)""",
            (worker_name, row_id, timestamp))
        if result.rowcount == 0:
            raise RuntimeError(
                'Failed to update or insert last row id for worker %s' %
                worker_name)
Esempio n. 15
0
    def test_bootstrap_picks_up_user(self):
        user = self.obj_factory.make_user()

        TransactionLog.bootstrap(user)

        txlog = get_shard_store(user.shard_id).find(
            TransactionLog, op_type=TransactionLog.OP_USER_CREATED).one()
        self.assertTxLogDetailsMatchesUserDetails(user, txlog)
Esempio n. 16
0
    def test_bootstrap_picks_up_user(self):
        user = self.obj_factory.make_user()

        TransactionLog.bootstrap(user)

        txlog = get_shard_store(user.shard_id).find(
            TransactionLog, op_type=TransactionLog.OP_USER_CREATED).one()
        self.assertTxLogDetailsMatchesUserDetails(user, txlog)
Esempio n. 17
0
 def setUp(self):
     super(TransactionLogUtilsTestCase, self).setUp()
     self._orig_make_user = self.obj_factory.make_user
     # Overwrite .obj_factory.make_user with a custom version that
     # doesn't create TransactionLogs as that would pollute our tests.
     p = patch.object(self.obj_factory, 'make_user')
     self.addCleanup(p.stop)
     mock_factory = p.start()
     mock_factory.side_effect = self._make_user_without_txlog
     self.store = dbmanager.get_shard_store(self.obj_factory.sstore_name)
Esempio n. 18
0
 def setUp(self):
     super(TransactionLogUtilsTestCase, self).setUp()
     self._orig_make_user = self.obj_factory.make_user
     # Overwrite .obj_factory.make_user with a custom version that
     # doesn't create TransactionLogs as that would pollute our tests.
     p = patch.object(self.obj_factory, 'make_user')
     self.addCleanup(p.stop)
     mock_factory = p.start()
     mock_factory.side_effect = self._make_user_without_txlog
     self.store = dbmanager.get_shard_store(self.obj_factory.sstore_name)
Esempio n. 19
0
    def test_txlog_for_new_storageuser(self):
        user_id = self.obj_factory.get_unique_integer()
        name = self.obj_factory.get_unique_unicode()
        visible_name = self.obj_factory.get_unique_unicode()

        user = StorageUser.new(
            self.ustore, user_id, name, visible_name, default_shard_id)

        store = get_shard_store(user.shard_id)
        txlog = store.find(TransactionLog, owner_id=user.id).one()
        self.assertTxLogDetailsMatchesUserDetails(user, txlog)
Esempio n. 20
0
    def test_txlog_for_new_storageuser(self):
        user_id = self.obj_factory.get_unique_integer()
        name = self.obj_factory.get_unique_unicode()
        visible_name = self.obj_factory.get_unique_unicode()

        user = StorageUser.new(self.ustore, user_id, name, visible_name,
                               default_shard_id)

        store = get_shard_store(user.shard_id)
        txlog = store.find(TransactionLog, owner_id=user.id).one()
        self.assertTxLogDetailsMatchesUserDetails(user, txlog)
Esempio n. 21
0
    def test_bootstrap_picks_up_shares(self):
        user = self.obj_factory.make_user()
        directory = self.obj_factory.make_directory(user)
        share = self.obj_factory.make_share(directory)
        self.sstore.commit()

        TransactionLog.bootstrap(user)

        txlog = get_shard_store(user.shard_id).find(
            TransactionLog, op_type=TransactionLog.OP_SHARE_ACCEPTED).one()
        expected_attrs = self._get_dict_with_txlog_attrs_from_share(
            share, directory, TransactionLog.OP_SHARE_ACCEPTED)
        self.assert_txlog_correct(txlog, expected_attrs)
Esempio n. 22
0
def get_row_by_time(store_name, timestamp):
    """Return the smaller txlog row id in that timestamp (or greater)."""
    store = dbmanager.get_shard_store(store_name)
    query = """
        SELECT id, timestamp FROM txlog.transaction_log
        WHERE timestamp >= ? ORDER BY id LIMIT 1;
    """
    result = store.execute(query, (timestamp, )).get_one()
    if result is None:
        txid, tstamp = None, None
    else:
        txid, tstamp = result
    return txid, tstamp
Esempio n. 23
0
    def test_bootstrap_picks_up_shares(self):
        user = self.obj_factory.make_user()
        directory = self.obj_factory.make_directory(user)
        share = self.obj_factory.make_share(directory)
        self.sstore.commit()

        TransactionLog.bootstrap(user)

        txlog = get_shard_store(user.shard_id).find(
            TransactionLog, op_type=TransactionLog.OP_SHARE_ACCEPTED).one()
        expected_attrs = self._get_dict_with_txlog_attrs_from_share(
            share, directory, TransactionLog.OP_SHARE_ACCEPTED)
        self.assert_txlog_correct(txlog, expected_attrs)
Esempio n. 24
0
def get_row_by_time(store_name, timestamp):
    """Return the smaller txlog row id in that timestamp (or greater)."""
    store = dbmanager.get_shard_store(store_name)
    query = """
        SELECT id, timestamp FROM txlog.transaction_log
        WHERE timestamp >= ? ORDER BY id LIMIT 1;
    """
    result = store.execute(query, (timestamp,)).get_one()
    if result is None:
        txid, tstamp = None, None
    else:
        txid, tstamp = result
    return txid, tstamp
Esempio n. 25
0
 def _record_share_accepted_or_deleted(cls, share, op_type):
     store = get_shard_store(share.sharedbyuser.shard_id)
     node = store.get(StorageObject, share.subtree)
     when_last_changed = share.when_last_changed
     extra_data = dict(
         shared_to=share.shared_to, share_id=str(share.id),
         share_name=share.name, access_level=share.access,
         when_shared=get_epoch_secs(share.when_shared),
         when_last_changed=get_epoch_secs(when_last_changed))
     txlog = cls(
         node.id, node.owner_id, node.volume_id, op_type, node.full_path,
         node.mimetype, generation=None,
         extra_data=json.dumps(extra_data).decode('ascii'))
     return Store.of(node).add(txlog)
Esempio n. 26
0
    def record_user_created(cls, user):
        """Create a TransactionLog entry representing a new user.

        We abuse the TransactionLog table to store the details of newly
        created users because our derived services need information about
        users as well as their files.

        A TransactionLog representing a newly created user will have
        no node_id, volume_id, generation or path. And its owner_id will be
        the ID of the newly created user.
        """
        extra_data = json.dumps(dict(name=user.username, visible_name=user.visible_name))
        txlog = cls(None, user.id, None, cls.OP_USER_CREATED, None, None, extra_data=extra_data.decode("ascii"))
        store = get_shard_store(user.shard_id)
        return store.add(txlog)
Esempio n. 27
0
def delete_txlogs_slice(store_name, date, quantity_limit):
    """Deletes txlogs from a certain slice, by date and quantity limit.

    Almost the same as delete_old_txlogs, except that it deletes txlogs
    precisely from the provided date (a datetime.date object). Also, the
    quantity_limit parameter is mandatory."""

    store = dbmanager.get_shard_store(store_name)
    parameters = [date, quantity_limit]
    inner_select = ("SELECT id FROM txlog.transaction_log "
                    "WHERE timestamp::date = ? LIMIT ?")

    basic_query = ("DELETE FROM txlog.transaction_log WHERE id IN (%s);"
                   % inner_select)
    result = store.execute(basic_query, parameters)

    return result.rowcount
Esempio n. 28
0
def delete_txlogs_slice(store_name, date, quantity_limit):
    """Deletes txlogs from a certain slice, by date and quantity limit.

    Almost the same as delete_old_txlogs, except that it deletes txlogs
    precisely from the provided date (a datetime.date object). Also, the
    quantity_limit parameter is mandatory."""

    store = dbmanager.get_shard_store(store_name)
    parameters = [date, quantity_limit]
    inner_select = ("SELECT id FROM txlog.transaction_log "
                    "WHERE timestamp::date = ? LIMIT ?")

    basic_query = ("DELETE FROM txlog.transaction_log WHERE id IN (%s);" %
                   inner_select)
    result = store.execute(basic_query, parameters)

    return result.rowcount
Esempio n. 29
0
    def record_user_created(cls, user):
        """Create a TransactionLog entry representing a new user.

        We abuse the TransactionLog table to store the details of newly
        created users because our derived services need information about
        users as well as their files.

        A TransactionLog representing a newly created user will have
        no node_id, volume_id, generation or path. And its owner_id will be
        the ID of the newly created user.
        """
        extra_data = json.dumps(dict(
            name=user.username, visible_name=user.visible_name))
        txlog = cls(
            None, user.id, None, cls.OP_USER_CREATED, None, None,
            extra_data=extra_data.decode('ascii'))
        store = get_shard_store(user.shard_id)
        return store.add(txlog)
Esempio n. 30
0
    def test_over_quota(self):
        """Test that 0 bytes free (versus a negative number) is reported
        when over quota."""
        self.usr0.update(max_storage_bytes=2**16)
        #need to do something that just can't happen normally
        store = dbmanager.get_shard_store(self.usr0.shard_id)
        info = store.get(model.StorageUserInfo, 0)
        info.used_storage_bytes = 2**17
        store.commit()

        @defer.inlineCallbacks
        def do_test(client):
            """Do the actual test."""
            yield client.dummy_authenticate("open sesame")
            result = yield client.get_free_space(request.ROOT)
            self.assertEqual(0, result.free_bytes)
            self.assertEqual(request.ROOT, result.share_id)

        return self.callback_test(do_test, add_default_callbacks=True)
Esempio n. 31
0
    def test_over_quota(self):
        """Test that 0 bytes free (versus a negative number) is reported
        when over quota."""
        self.usr0.update(max_storage_bytes=2 ** 16)
        #need to do something that just can't happen normally
        store = dbmanager.get_shard_store(self.usr0.shard_id)
        info = store.get(model.StorageUserInfo, 0)
        info.used_storage_bytes = 2 ** 17
        store.commit()

        @defer.inlineCallbacks
        def do_test(client):
            """Do the actual test."""
            yield client.dummy_authenticate("open sesame")
            result = yield client.get_free_space(request.ROOT)
            self.assertEqual(0, result.free_bytes)
            self.assertEqual(request.ROOT, result.share_id)
        return self.callback_test(do_test,
                                  add_default_callbacks=True)
Esempio n. 32
0
 def _record_share_accepted_or_deleted(cls, share, op_type):
     store = get_shard_store(share.sharedbyuser.shard_id)
     node = store.get(StorageObject, share.subtree)
     when_last_changed = share.when_last_changed
     extra_data = dict(
         shared_to=share.shared_to,
         share_id=str(share.id),
         share_name=share.name,
         access_level=share.access,
         when_shared=get_epoch_secs(share.when_shared),
         when_last_changed=get_epoch_secs(when_last_changed),
     )
     txlog = cls(
         node.id,
         node.owner_id,
         node.volume_id,
         op_type,
         node.full_path,
         node.mimetype,
         generation=None,
         extra_data=json.dumps(extra_data).decode("ascii"),
     )
     return Store.of(node).add(txlog)
Esempio n. 33
0
    def bootstrap(cls, user):
        store = get_shard_store(user.shard_id)
        cls.record_user_created(user)
        # Number of TransactionLog rows we inserted.
        rows = 1

        for udf in store.find(UserVolume, owner_id=user.id, status=STATUS_LIVE):
            cls.record_udf_created(udf)
            rows += 1

        # If this becomes a problem it can be done as a single INSERT, but
        # we'd need to duplicate the get_public_file_url() in plpython.
        udf_join = Join(StorageObject, UserVolume, StorageObject.volume_id == UserVolume.id)
        conditions = [
            StorageObject.kind == "Directory",
            StorageObject.owner_id == user.id,
            StorageObject.status == STATUS_LIVE,
            StorageObject._publicfile_id != None,  # NOQA
            UserVolume.status == STATUS_LIVE,
        ]
        dirs = store.using(udf_join).find(StorageObject, *conditions)
        for directory in dirs:
            cls.record_public_access_change(directory)
            rows += 1

        # XXX: If this takes too long it will get killed by the transaction
        # watcher. Need to check what's the limit we could have here.
        # Things to check:
        #  * If it still takes too long, we could find out the IDs of the
        #    people who have a lot of music/photos, run it just for them with
        #    the transaction watcher disabled and then run it for everybody
        #    else afterwards.
        query = """
            INSERT INTO txlog.transaction_log (
                node_id, owner_id, volume_id, op_type, path, generation,
                mimetype, extra_data)
            SELECT O.id, O.owner_id, O.volume_id, ?,
                   txlog.path_join(O.path, O.name), O.generation, O.mimetype,
                   txlog.get_extra_data_to_recreate_file_1(
                        kind, size, storage_key, publicfile_id,
                        public_uuid, content_hash,
                        extract(epoch from O.when_created at time zone 'UTC'),
                        extract(epoch
                                from O.when_last_modified at time zone 'UTC'),
                        UserDefinedFolder.path
                    ) as extra_data
            FROM Object as O
            JOIN UserDefinedFolder on UserDefinedFolder.id = O.volume_id
            LEFT JOIN ContentBlob on ContentBlob.hash = O.content_hash
            WHERE
                O.kind != 'Directory'
                AND O.owner_id = ?
                AND O.status = 'Live'
                AND UserDefinedFolder.status = 'Live'
            """
        params = (cls.OPERATIONS_MAP[cls.OP_PUT_CONTENT], user.id)
        rows += store.execute(query, params=params).rowcount

        # Cannot create TransactionLogs for Shares in a single INSERT like
        # above because TransactionLogs and Shares live in separate databases.
        share_join = LeftJoin(Share, StorageUser, Share.shared_to == StorageUser.id)
        conditions = [Share.shared_by == user.id, Share.status == STATUS_LIVE, Share.accepted == True]  # NOQA
        shares = get_user_store().using(share_join).find(Share, *conditions)
        for share in shares:
            cls.record_share_accepted(share)
            rows += 1

        return rows
Esempio n. 34
0
def get_txn_recs(store_name,
                 num_recs,
                 last_id=0,
                 worker_id=None,
                 expire_secs=None,
                 num_partitions=None,
                 partition_id=None):
    """Attempt to read num_recs records from the transaction log, starting from
    the row after last_id, plus any records whose ID is in the db_worker_unseen
    table.

    Use the store name, rather than a store reference, to sidestep potential
    thread safety problems.

    Return a list of up to num_rec dicts representing records from the
    transaction log, starting from the row after last_id, or the beginning of
    the table if last_id is None.  If num_recs records are not available, all
    remaining records will be returned.  If no new records are available, an
    empty list is returned.

    Transaction management should be performed by the caller.  Since this
    function is read-only, it may be called from code decorated with
    fsync_readonly, or as part of a block of operations decorated with
    fsync_commit.
    """
    if expire_secs is None:
        expire_secs = UNSEEN_EXPIRES
    store = dbmanager.get_shard_store(store_name)
    parameters = (last_id, )
    select = u"""
        SELECT txlog.id, owner_id, node_id, volume_id, op_type, path,
               generation, timestamp, mimetype, old_path, extra_data
        FROM txlog.transaction_log AS txlog"""
    condition = u"WHERE id > ?"
    order_limit = u"ORDER BY id LIMIT {}".format(num_recs)

    if num_partitions is not None and partition_id is not None:
        unfilter_op_types = (
            TransactionLog.OP_SHARE_ACCEPTED,
            TransactionLog.OP_SHARE_DELETED,
        )
        unfilter_op_ids = (str(TransactionLog.OPERATIONS_MAP[id])
                           for id in unfilter_op_types)
        condition += u" AND (MOD(owner_id, ?) = ? OR op_type in ({}))".format(
            ','.join(unfilter_op_ids))
        parameters = parameters + (num_partitions, partition_id)

    query = "({} {} {})".format(select, condition, order_limit)

    if worker_id is not None and expire_secs:
        worker_id = unicode(worker_id)
        join = (u"NATURAL JOIN txlog.db_worker_unseen as unseen "
                "WHERE unseen.worker_id = ? "
                "AND unseen.created > "
                "TIMEZONE('UTC'::text, NOW()) - INTERVAL '{} seconds'".format(
                    expire_secs))
        query += u" UNION ({} {} {}) {};".format(select, join, order_limit,
                                                 order_limit)
        parameters = parameters + (worker_id, )

    records = store.execute(query, parameters)

    result = []
    for record in records:
        (row_id, owner_id, node_id, volume_id, op_type, path, generation,
         timestamp, mimetype, old_path, extra_data) = record
        op_type = TransactionLog.OPERATIONS_REVERSED_MAP[op_type]
        result.append(
            dict(txn_id=row_id,
                 node_id=node_id,
                 owner_id=owner_id,
                 volume_id=volume_id,
                 op_type=op_type,
                 path=path,
                 generation=generation,
                 timestamp=timestamp,
                 mimetype=mimetype,
                 old_path=old_path,
                 extra_data=extra_data))

    # Now insert unseen directly into db_worker_unseen, avoiding duplicates by
    # joining again with db_worker_unseen.
    if result and result[-1]["txn_id"] > last_id and worker_id is not None:
        insert = (u"INSERT INTO txlog.db_worker_unseen (id, worker_id) "
                  "SELECT gs.id, ? FROM ("
                  "SELECT gs.id "
                  "FROM generate_series(?, ?) AS gs(id) "
                  "LEFT OUTER JOIN txlog.transaction_log AS txlog "
                  "ON gs.id = txlog.id "
                  "WHERE txlog.id IS NULL) AS gs "
                  "LEFT OUTER JOIN txlog.db_worker_unseen AS unseen "
                  "ON gs.id = unseen.id "
                  "AND unseen.worker_id = ? "
                  "WHERE unseen.id IS NULL;")
        store.execute(insert,
                      (worker_id, last_id, result[-1]["txn_id"], worker_id))

    return result
Esempio n. 35
0
def get_txn_recs(store_name, num_recs, last_id=0,
                 worker_id=None, expire_secs=None,
                 num_partitions=None, partition_id=None):
    """Attempt to read num_recs records from the transaction log, starting from
    the row after last_id, plus any records whose ID is in the db_worker_unseen
    table.

    Use the store name, rather than a store reference, to sidestep potential
    thread safety problems.

    Return a list of up to num_rec dicts representing records from the
    transaction log, starting from the row after last_id, or the beginning of
    the table if last_id is None.  If num_recs records are not available, all
    remaining records will be returned.  If no new records are available, an
    empty list is returned.

    Transaction management should be performed by the caller.  Since this
    function is read-only, it may be called from code decorated with
    fsync_readonly, or as part of a block of operations decorated with
    fsync_commit.
    """
    if expire_secs is None:
        expire_secs = UNSEEN_EXPIRES
    store = dbmanager.get_shard_store(store_name)
    parameters = (last_id, )
    select = u"""
        SELECT txlog.id, owner_id, node_id, volume_id, op_type, path,
               generation, timestamp, mimetype, old_path, extra_data
        FROM txlog.transaction_log AS txlog"""
    condition = u"WHERE id > ?"
    order_limit = u"ORDER BY id LIMIT {}".format(num_recs)

    if num_partitions is not None and partition_id is not None:
        unfilter_op_types = (
            TransactionLog.OP_SHARE_ACCEPTED,
            TransactionLog.OP_SHARE_DELETED,
        )
        unfilter_op_ids = (str(TransactionLog.OPERATIONS_MAP[id])
                           for id in unfilter_op_types)
        condition += u" AND (MOD(owner_id, ?) = ? OR op_type in ({}))".format(
            ','.join(unfilter_op_ids)
        )
        parameters = parameters + (num_partitions, partition_id)

    query = "({} {} {})".format(select, condition, order_limit)

    if worker_id is not None and expire_secs:
        worker_id = unicode(worker_id)
        join = (u"NATURAL JOIN txlog.db_worker_unseen as unseen "
                "WHERE unseen.worker_id = ? "
                "AND unseen.created > "
                "TIMEZONE('UTC'::text, NOW()) - INTERVAL '{} seconds'".format(
                    expire_secs))
        query += u" UNION ({} {} {}) {};".format(
            select, join, order_limit, order_limit
        )
        parameters = parameters + (worker_id,)

    records = store.execute(query, parameters)

    result = []
    for record in records:
        (row_id, owner_id, node_id, volume_id, op_type, path, generation,
         timestamp, mimetype, old_path, extra_data) = record
        op_type = TransactionLog.OPERATIONS_REVERSED_MAP[op_type]
        result.append(
            dict(txn_id=row_id, node_id=node_id, owner_id=owner_id,
                 volume_id=volume_id, op_type=op_type, path=path,
                 generation=generation, timestamp=timestamp,
                 mimetype=mimetype, old_path=old_path, extra_data=extra_data))

    # Now insert unseen directly into db_worker_unseen, avoiding duplicates by
    # joining again with db_worker_unseen.
    if result and result[-1]["txn_id"] > last_id and worker_id is not None:
        insert = (u"INSERT INTO txlog.db_worker_unseen (id, worker_id) "
                  "SELECT gs.id, ? FROM ("
                  "SELECT gs.id "
                  "FROM generate_series(?, ?) AS gs(id) "
                  "LEFT OUTER JOIN txlog.transaction_log AS txlog "
                  "ON gs.id = txlog.id "
                  "WHERE txlog.id IS NULL) AS gs "
                  "LEFT OUTER JOIN txlog.db_worker_unseen AS unseen "
                  "ON gs.id = unseen.id "
                  "AND unseen.worker_id = ? "
                  "WHERE unseen.id IS NULL;")
        store.execute(insert, (worker_id, last_id,
                               result[-1]["txn_id"], worker_id))

    return result
Esempio n. 36
0
 def sstore(self):
     """ gets the store, dont cache, threading issues may arise
     """
     return get_shard_store(self.sstore_name)
Esempio n. 37
0
 def sstore(self):
     """ gets the store, dont cache, threading issues may arise
     """
     return get_shard_store(self.sstore_name)
Esempio n. 38
0
    def bootstrap(cls, user):
        store = get_shard_store(user.shard_id)
        cls.record_user_created(user)
        # Number of TransactionLog rows we inserted.
        rows = 1

        for udf in store.find(UserVolume, owner_id=user.id,
                              status=STATUS_LIVE):
            cls.record_udf_created(udf)
            rows += 1

        # If this becomes a problem it can be done as a single INSERT, but
        # we'd need to duplicate the get_public_file_url() in plpython.
        udf_join = Join(
            StorageObject,
            UserVolume, StorageObject.volume_id == UserVolume.id)
        conditions = [StorageObject.kind == 'Directory',
                      StorageObject.owner_id == user.id,
                      StorageObject.status == STATUS_LIVE,
                      StorageObject._publicfile_id != None,  # NOQA
                      UserVolume.status == STATUS_LIVE]
        dirs = store.using(udf_join).find(StorageObject, *conditions)
        for directory in dirs:
            cls.record_public_access_change(directory)
            rows += 1

        # XXX: If this takes too long it will get killed by the transaction
        # watcher. Need to check what's the limit we could have here.
        # Things to check:
        #  * If it still takes too long, we could find out the IDs of the
        #    people who have a lot of music/photos, run it just for them with
        #    the transaction watcher disabled and then run it for everybody
        #    else afterwards.
        query = """
            INSERT INTO txlog.transaction_log (
                node_id, owner_id, volume_id, op_type, path, generation,
                mimetype, extra_data)
            SELECT O.id, O.owner_id, O.volume_id, ?,
                   txlog.path_join(O.path, O.name), O.generation, O.mimetype,
                   txlog.get_extra_data_to_recreate_file_1(
                        kind, size, storage_key, publicfile_id,
                        public_uuid, content_hash,
                        extract(epoch from O.when_created at time zone 'UTC'),
                        extract(epoch
                                from O.when_last_modified at time zone 'UTC'),
                        UserDefinedFolder.path
                    ) as extra_data
            FROM Object as O
            JOIN UserDefinedFolder on UserDefinedFolder.id = O.volume_id
            LEFT JOIN ContentBlob on ContentBlob.hash = O.content_hash
            WHERE
                O.kind != 'Directory'
                AND O.owner_id = ?
                AND O.status = 'Live'
                AND UserDefinedFolder.status = 'Live'
            """
        params = (cls.OPERATIONS_MAP[cls.OP_PUT_CONTENT], user.id)
        rows += store.execute(query, params=params).rowcount

        # Cannot create TransactionLogs for Shares in a single INSERT like
        # above because TransactionLogs and Shares live in separate databases.
        share_join = LeftJoin(
            Share, StorageUser, Share.shared_to == StorageUser.id)
        conditions = [Share.shared_by == user.id,
                      Share.status == STATUS_LIVE,
                      Share.accepted == True]  # NOQA
        shares = get_user_store().using(share_join).find(Share, *conditions)
        for share in shares:
            cls.record_share_accepted(share)
            rows += 1

        return rows