def main(options): """The main function.""" store = get_shard_store(options.from_shard_id) user_ids = options.user_ids or "" if options.action == DUMP: get_column_names(store, True) for c in COPYINFO: table = c[0] columns = c[1]['columns'] select = c[1]['query'] replacements = dict(columns=columns, userids=user_ids, shard_id=options.to_shard_id) sql = "COPY (%s) TO '%s/%s.dat';" % (select % replacements, options.path, table) print "Dumping %s TO %s/%s.dat" % (table, options.path, table), storage_tm.begin() execute_sql(store, sql) storage_tm.abort() return if options.action == LOAD: get_column_names(store, False) shard_store = get_shard_store(options.to_shard_id) storage_tm.begin() print "Dropping Indexes..." for sql in DROP_NO_DUPLICATE_SQL: shard_store.execute(sql) for sql in CREATE_NO_DUPLICATE_SQL: shard_store.execute(sql) for sql in DROP_INDEXES_SQL: shard_store.execute(sql) for c in COPYINFO: table = c[0] columns = c[1]['columns'] sql = "COPY %s (%s) FROM '%s/%s.dat';" % (table, columns, options.path, table) print "Loading %s From %s/%s.dat" % (table, options.path, table), execute_sql(shard_store, sql) user_store = get_user_store() user_store.execute(INITUSER % dict(shard_id=options.to_shard_id, userids=user_ids)) print "Rebuilding Indexes..." for sql in DROP_NO_DUPLICATE_SQL: execute_sql(shard_store, sql) for sql in CREATE_INDEXES_SQL: execute_sql(shard_store, sql) storage_tm.commit() return if options.action == DELETE: shard_store = get_shard_store(options.from_shard_id) storage_tm.begin() print "Deleting user data..." for sql in DELETE_SQL: execute_sql(shard_store, sql % dict(userids=user_ids)) storage_tm.commit()
def main(options): """The main function.""" store = get_shard_store(options.from_shard_id) user_ids = options.user_ids or "" if options.action == DUMP: get_column_names(store, True) for c in COPYINFO: table = c[0] columns = c[1]['columns'] select = c[1]['query'] replacements = dict(columns=columns, userids=user_ids, shard_id=options.to_shard_id) sql = "COPY (%s) TO '%s/%s.dat';" % (select % replacements, options.path, table) print "Dumping %s TO %s/%s.dat" % (table, options.path, table), storage_tm.begin() execute_sql(store, sql) storage_tm.abort() return if options.action == LOAD: get_column_names(store, False) shard_store = get_shard_store(options.to_shard_id) storage_tm.begin() print "Dropping Indexes..." for sql in DROP_NO_DUPLICATE_SQL: shard_store.execute(sql) for sql in CREATE_NO_DUPLICATE_SQL: shard_store.execute(sql) for sql in DROP_INDEXES_SQL: shard_store.execute(sql) for c in COPYINFO: table = c[0] columns = c[1]['columns'] sql = "COPY %s (%s) FROM '%s/%s.dat';" % (table, columns, options.path, table) print "Loading %s From %s/%s.dat" % (table, options.path, table), execute_sql(shard_store, sql) user_store = get_user_store() user_store.execute( INITUSER % dict(shard_id=options.to_shard_id, userids=user_ids)) print "Rebuilding Indexes..." for sql in DROP_NO_DUPLICATE_SQL: execute_sql(shard_store, sql) for sql in CREATE_INDEXES_SQL: execute_sql(shard_store, sql) storage_tm.commit() return if options.action == DELETE: shard_store = get_shard_store(options.from_shard_id) storage_tm.begin() print "Deleting user data..." for sql in DELETE_SQL: execute_sql(shard_store, sql % dict(userids=user_ids)) storage_tm.commit()
def test_stores(self): """Make sure we get the stores work""" #run through all the shards and try to talk to them for k in dbm.get_shard_ids(): s = dbm.get_shard_store(k) s.execute('SELECT 1') #make sure get_shard_store is working as designed s1 = dbm.get_shard_store('shard0') s2 = dbm.get_shard_store('shard0') #even though they are different, they are the same self.assertTrue(s1 is s2) s3 = dbm.get_shard_store('shard1') self.assertFalse(s1 is s3)
def keep_last_rows_for_worker_names(store_name, worker_names): """Clean rows from txlog.db_worker_last_row that don't match the given worker names.""" store = dbmanager.get_shard_store(store_name) query = ("DELETE FROM txlog.db_worker_last_row " "WHERE worker_id NOT IN ?;") store.execute(query, (tuple(worker_names), ))
def delete_old_txlogs(store_name, timestamp_limit, quantity_limit=None): """Deletes the old transaction logs. Use the store name, rather than a store reference, to sidestep potential thread safety problems. Has to be given a datetime.datetime as a timestamp_limit; datetimes later than that will be filtered out, and won't be deleted. If quantity_limit is given, this will be the maximum number of entries to be deleted. """ store = dbmanager.get_shard_store(store_name) parameters = [timestamp_limit] inner_select = "SELECT id FROM txlog.transaction_log WHERE timestamp <= ?" if quantity_limit is not None: inner_select += " LIMIT ?" parameters.append(quantity_limit) basic_query = ("DELETE FROM txlog.transaction_log WHERE id IN (%s);" % inner_select) result = store.execute(basic_query, parameters) return result.rowcount
def delete_expired_unseen(store_name, worker_id, unseen_ids=None, expire_secs=None): """Deletes expired unseen ids for a given worker id. If a list of unseen ids is given, also delete those explicitly. """ if expire_secs is None: expire_secs = UNSEEN_EXPIRES worker_id = unicode(worker_id) store = dbmanager.get_shard_store(store_name) deleted = 0 condition = (u"created < TIMEZONE('UTC'::text, NOW()) " " - INTERVAL '{} seconds'".format(expire_secs)) query = (u"DELETE FROM txlog.db_worker_unseen " "WHERE worker_id = ? ") if unseen_ids is not None: fmt = u"({})".format if getattr(unseen_ids, "next", None) is None: unseen_ids = iter(unseen_ids) while True: unseen_args = u",".join(imap(fmt, ichunk(unseen_ids, CHUNK_SIZE))) if not unseen_args: break result = store.execute( query + u"AND ({} OR id = ANY(VALUES {}));".format( condition, unseen_args), (worker_id, )) deleted += result.rowcount else: query += u"AND {};".format(condition) result = store.execute(query, (worker_id, )) deleted = result.rowcount return deleted
def update_last_row(worker_name, row_id, timestamp, store_name): """Update the id and timestamp of the most recently processed transation log entry for a given worker. Use the store name, rather than a store reference, to sidestep potential thread safety problems. Transaction management should be performed by the caller. Since this function writes to the database, it should be called from code blocks decorated with fsync_commit. """ worker_name = unicode(worker_name) store = dbmanager.get_shard_store(store_name) result = store.execute(u"""UPDATE txlog.db_worker_last_row SET row_id=?, timestamp=? WHERE worker_id=?""", (row_id, timestamp, worker_name)) if result.rowcount == 0: result = store.execute( u"""INSERT INTO txlog.db_worker_last_row (worker_id, row_id, timestamp) VALUES (?, ?, ?)""", (worker_name, row_id, timestamp)) if result.rowcount == 0: raise RuntimeError( 'Failed to update or insert last row id for worker %s' % worker_name)
def get_last_row(worker_name, store_name): """Try to get the id and timestamp of the last processed row for the specific worker name. If not found, get from the oldest possible row from the table. If still not found, return a default tuple for new workers. Use the store name, rather than a store reference, to sidestep potential thread safety problems. Transaction management should be performed by the caller. Since this function is read-only, it may be called from code decorated with fsync_readonly, or as part of a block of operations decorated with fsync_commit. """ worker_name = unicode(worker_name) store = dbmanager.get_shard_store(store_name) last_row = store.execute(u"""SELECT row_id, timestamp FROM txlog.db_worker_last_row WHERE worker_id=?""", (worker_name,)).get_one() if not last_row: last_row = store.execute(u"""SELECT row_id, timestamp FROM txlog.db_worker_last_row ORDER BY row_id LIMIT 1""").get_one() if not last_row: last_row = NEW_WORKER_LAST_ROW return last_row
def delete_expired_unseen(store_name, worker_id, unseen_ids=None, expire_secs=None): """Deletes expired unseen ids for a given worker id. If a list of unseen ids is given, also delete those explicitly. """ if expire_secs is None: expire_secs = UNSEEN_EXPIRES worker_id = unicode(worker_id) store = dbmanager.get_shard_store(store_name) deleted = 0 condition = (u"created < TIMEZONE('UTC'::text, NOW()) " " - INTERVAL '{} seconds'".format(expire_secs)) query = (u"DELETE FROM txlog.db_worker_unseen " "WHERE worker_id = ? ") if unseen_ids is not None: fmt = u"({})".format if getattr(unseen_ids, "next", None) is None: unseen_ids = iter(unseen_ids) while True: unseen_args = u",".join(imap(fmt, ichunk(unseen_ids, CHUNK_SIZE))) if not unseen_args: break result = store.execute( query + u"AND ({} OR id = ANY(VALUES {}));".format( condition, unseen_args), (worker_id,)) deleted += result.rowcount else: query += u"AND {};".format(condition) result = store.execute(query, (worker_id,)) deleted = result.rowcount return deleted
def get_last_row(worker_name, store_name): """Try to get the id and timestamp of the last processed row for the specific worker name. If not found, get from the oldest possible row from the table. If still not found, return a default tuple for new workers. Use the store name, rather than a store reference, to sidestep potential thread safety problems. Transaction management should be performed by the caller. Since this function is read-only, it may be called from code decorated with fsync_readonly, or as part of a block of operations decorated with fsync_commit. """ worker_name = unicode(worker_name) store = dbmanager.get_shard_store(store_name) last_row = store.execute( u"""SELECT row_id, timestamp FROM txlog.db_worker_last_row WHERE worker_id=?""", (worker_name, )).get_one() if not last_row: last_row = store.execute(u"""SELECT row_id, timestamp FROM txlog.db_worker_last_row ORDER BY row_id LIMIT 1""").get_one() if not last_row: last_row = NEW_WORKER_LAST_ROW return last_row
def update_last_row(worker_name, row_id, timestamp, store_name): """Update the id and timestamp of the most recently processed transation log entry for a given worker. Use the store name, rather than a store reference, to sidestep potential thread safety problems. Transaction management should be performed by the caller. Since this function writes to the database, it should be called from code blocks decorated with fsync_commit. """ worker_name = unicode(worker_name) store = dbmanager.get_shard_store(store_name) result = store.execute( u"""UPDATE txlog.db_worker_last_row SET row_id=?, timestamp=? WHERE worker_id=?""", (row_id, timestamp, worker_name)) if result.rowcount == 0: result = store.execute( u"""INSERT INTO txlog.db_worker_last_row (worker_id, row_id, timestamp) VALUES (?, ?, ?)""", (worker_name, row_id, timestamp)) if result.rowcount == 0: raise RuntimeError( 'Failed to update or insert last row id for worker %s' % worker_name)
def test_bootstrap_picks_up_user(self): user = self.obj_factory.make_user() TransactionLog.bootstrap(user) txlog = get_shard_store(user.shard_id).find( TransactionLog, op_type=TransactionLog.OP_USER_CREATED).one() self.assertTxLogDetailsMatchesUserDetails(user, txlog)
def setUp(self): super(TransactionLogUtilsTestCase, self).setUp() self._orig_make_user = self.obj_factory.make_user # Overwrite .obj_factory.make_user with a custom version that # doesn't create TransactionLogs as that would pollute our tests. p = patch.object(self.obj_factory, 'make_user') self.addCleanup(p.stop) mock_factory = p.start() mock_factory.side_effect = self._make_user_without_txlog self.store = dbmanager.get_shard_store(self.obj_factory.sstore_name)
def test_txlog_for_new_storageuser(self): user_id = self.obj_factory.get_unique_integer() name = self.obj_factory.get_unique_unicode() visible_name = self.obj_factory.get_unique_unicode() user = StorageUser.new( self.ustore, user_id, name, visible_name, default_shard_id) store = get_shard_store(user.shard_id) txlog = store.find(TransactionLog, owner_id=user.id).one() self.assertTxLogDetailsMatchesUserDetails(user, txlog)
def test_txlog_for_new_storageuser(self): user_id = self.obj_factory.get_unique_integer() name = self.obj_factory.get_unique_unicode() visible_name = self.obj_factory.get_unique_unicode() user = StorageUser.new(self.ustore, user_id, name, visible_name, default_shard_id) store = get_shard_store(user.shard_id) txlog = store.find(TransactionLog, owner_id=user.id).one() self.assertTxLogDetailsMatchesUserDetails(user, txlog)
def test_bootstrap_picks_up_shares(self): user = self.obj_factory.make_user() directory = self.obj_factory.make_directory(user) share = self.obj_factory.make_share(directory) self.sstore.commit() TransactionLog.bootstrap(user) txlog = get_shard_store(user.shard_id).find( TransactionLog, op_type=TransactionLog.OP_SHARE_ACCEPTED).one() expected_attrs = self._get_dict_with_txlog_attrs_from_share( share, directory, TransactionLog.OP_SHARE_ACCEPTED) self.assert_txlog_correct(txlog, expected_attrs)
def get_row_by_time(store_name, timestamp): """Return the smaller txlog row id in that timestamp (or greater).""" store = dbmanager.get_shard_store(store_name) query = """ SELECT id, timestamp FROM txlog.transaction_log WHERE timestamp >= ? ORDER BY id LIMIT 1; """ result = store.execute(query, (timestamp, )).get_one() if result is None: txid, tstamp = None, None else: txid, tstamp = result return txid, tstamp
def get_row_by_time(store_name, timestamp): """Return the smaller txlog row id in that timestamp (or greater).""" store = dbmanager.get_shard_store(store_name) query = """ SELECT id, timestamp FROM txlog.transaction_log WHERE timestamp >= ? ORDER BY id LIMIT 1; """ result = store.execute(query, (timestamp,)).get_one() if result is None: txid, tstamp = None, None else: txid, tstamp = result return txid, tstamp
def _record_share_accepted_or_deleted(cls, share, op_type): store = get_shard_store(share.sharedbyuser.shard_id) node = store.get(StorageObject, share.subtree) when_last_changed = share.when_last_changed extra_data = dict( shared_to=share.shared_to, share_id=str(share.id), share_name=share.name, access_level=share.access, when_shared=get_epoch_secs(share.when_shared), when_last_changed=get_epoch_secs(when_last_changed)) txlog = cls( node.id, node.owner_id, node.volume_id, op_type, node.full_path, node.mimetype, generation=None, extra_data=json.dumps(extra_data).decode('ascii')) return Store.of(node).add(txlog)
def record_user_created(cls, user): """Create a TransactionLog entry representing a new user. We abuse the TransactionLog table to store the details of newly created users because our derived services need information about users as well as their files. A TransactionLog representing a newly created user will have no node_id, volume_id, generation or path. And its owner_id will be the ID of the newly created user. """ extra_data = json.dumps(dict(name=user.username, visible_name=user.visible_name)) txlog = cls(None, user.id, None, cls.OP_USER_CREATED, None, None, extra_data=extra_data.decode("ascii")) store = get_shard_store(user.shard_id) return store.add(txlog)
def delete_txlogs_slice(store_name, date, quantity_limit): """Deletes txlogs from a certain slice, by date and quantity limit. Almost the same as delete_old_txlogs, except that it deletes txlogs precisely from the provided date (a datetime.date object). Also, the quantity_limit parameter is mandatory.""" store = dbmanager.get_shard_store(store_name) parameters = [date, quantity_limit] inner_select = ("SELECT id FROM txlog.transaction_log " "WHERE timestamp::date = ? LIMIT ?") basic_query = ("DELETE FROM txlog.transaction_log WHERE id IN (%s);" % inner_select) result = store.execute(basic_query, parameters) return result.rowcount
def record_user_created(cls, user): """Create a TransactionLog entry representing a new user. We abuse the TransactionLog table to store the details of newly created users because our derived services need information about users as well as their files. A TransactionLog representing a newly created user will have no node_id, volume_id, generation or path. And its owner_id will be the ID of the newly created user. """ extra_data = json.dumps(dict( name=user.username, visible_name=user.visible_name)) txlog = cls( None, user.id, None, cls.OP_USER_CREATED, None, None, extra_data=extra_data.decode('ascii')) store = get_shard_store(user.shard_id) return store.add(txlog)
def test_over_quota(self): """Test that 0 bytes free (versus a negative number) is reported when over quota.""" self.usr0.update(max_storage_bytes=2**16) #need to do something that just can't happen normally store = dbmanager.get_shard_store(self.usr0.shard_id) info = store.get(model.StorageUserInfo, 0) info.used_storage_bytes = 2**17 store.commit() @defer.inlineCallbacks def do_test(client): """Do the actual test.""" yield client.dummy_authenticate("open sesame") result = yield client.get_free_space(request.ROOT) self.assertEqual(0, result.free_bytes) self.assertEqual(request.ROOT, result.share_id) return self.callback_test(do_test, add_default_callbacks=True)
def test_over_quota(self): """Test that 0 bytes free (versus a negative number) is reported when over quota.""" self.usr0.update(max_storage_bytes=2 ** 16) #need to do something that just can't happen normally store = dbmanager.get_shard_store(self.usr0.shard_id) info = store.get(model.StorageUserInfo, 0) info.used_storage_bytes = 2 ** 17 store.commit() @defer.inlineCallbacks def do_test(client): """Do the actual test.""" yield client.dummy_authenticate("open sesame") result = yield client.get_free_space(request.ROOT) self.assertEqual(0, result.free_bytes) self.assertEqual(request.ROOT, result.share_id) return self.callback_test(do_test, add_default_callbacks=True)
def _record_share_accepted_or_deleted(cls, share, op_type): store = get_shard_store(share.sharedbyuser.shard_id) node = store.get(StorageObject, share.subtree) when_last_changed = share.when_last_changed extra_data = dict( shared_to=share.shared_to, share_id=str(share.id), share_name=share.name, access_level=share.access, when_shared=get_epoch_secs(share.when_shared), when_last_changed=get_epoch_secs(when_last_changed), ) txlog = cls( node.id, node.owner_id, node.volume_id, op_type, node.full_path, node.mimetype, generation=None, extra_data=json.dumps(extra_data).decode("ascii"), ) return Store.of(node).add(txlog)
def bootstrap(cls, user): store = get_shard_store(user.shard_id) cls.record_user_created(user) # Number of TransactionLog rows we inserted. rows = 1 for udf in store.find(UserVolume, owner_id=user.id, status=STATUS_LIVE): cls.record_udf_created(udf) rows += 1 # If this becomes a problem it can be done as a single INSERT, but # we'd need to duplicate the get_public_file_url() in plpython. udf_join = Join(StorageObject, UserVolume, StorageObject.volume_id == UserVolume.id) conditions = [ StorageObject.kind == "Directory", StorageObject.owner_id == user.id, StorageObject.status == STATUS_LIVE, StorageObject._publicfile_id != None, # NOQA UserVolume.status == STATUS_LIVE, ] dirs = store.using(udf_join).find(StorageObject, *conditions) for directory in dirs: cls.record_public_access_change(directory) rows += 1 # XXX: If this takes too long it will get killed by the transaction # watcher. Need to check what's the limit we could have here. # Things to check: # * If it still takes too long, we could find out the IDs of the # people who have a lot of music/photos, run it just for them with # the transaction watcher disabled and then run it for everybody # else afterwards. query = """ INSERT INTO txlog.transaction_log ( node_id, owner_id, volume_id, op_type, path, generation, mimetype, extra_data) SELECT O.id, O.owner_id, O.volume_id, ?, txlog.path_join(O.path, O.name), O.generation, O.mimetype, txlog.get_extra_data_to_recreate_file_1( kind, size, storage_key, publicfile_id, public_uuid, content_hash, extract(epoch from O.when_created at time zone 'UTC'), extract(epoch from O.when_last_modified at time zone 'UTC'), UserDefinedFolder.path ) as extra_data FROM Object as O JOIN UserDefinedFolder on UserDefinedFolder.id = O.volume_id LEFT JOIN ContentBlob on ContentBlob.hash = O.content_hash WHERE O.kind != 'Directory' AND O.owner_id = ? AND O.status = 'Live' AND UserDefinedFolder.status = 'Live' """ params = (cls.OPERATIONS_MAP[cls.OP_PUT_CONTENT], user.id) rows += store.execute(query, params=params).rowcount # Cannot create TransactionLogs for Shares in a single INSERT like # above because TransactionLogs and Shares live in separate databases. share_join = LeftJoin(Share, StorageUser, Share.shared_to == StorageUser.id) conditions = [Share.shared_by == user.id, Share.status == STATUS_LIVE, Share.accepted == True] # NOQA shares = get_user_store().using(share_join).find(Share, *conditions) for share in shares: cls.record_share_accepted(share) rows += 1 return rows
def get_txn_recs(store_name, num_recs, last_id=0, worker_id=None, expire_secs=None, num_partitions=None, partition_id=None): """Attempt to read num_recs records from the transaction log, starting from the row after last_id, plus any records whose ID is in the db_worker_unseen table. Use the store name, rather than a store reference, to sidestep potential thread safety problems. Return a list of up to num_rec dicts representing records from the transaction log, starting from the row after last_id, or the beginning of the table if last_id is None. If num_recs records are not available, all remaining records will be returned. If no new records are available, an empty list is returned. Transaction management should be performed by the caller. Since this function is read-only, it may be called from code decorated with fsync_readonly, or as part of a block of operations decorated with fsync_commit. """ if expire_secs is None: expire_secs = UNSEEN_EXPIRES store = dbmanager.get_shard_store(store_name) parameters = (last_id, ) select = u""" SELECT txlog.id, owner_id, node_id, volume_id, op_type, path, generation, timestamp, mimetype, old_path, extra_data FROM txlog.transaction_log AS txlog""" condition = u"WHERE id > ?" order_limit = u"ORDER BY id LIMIT {}".format(num_recs) if num_partitions is not None and partition_id is not None: unfilter_op_types = ( TransactionLog.OP_SHARE_ACCEPTED, TransactionLog.OP_SHARE_DELETED, ) unfilter_op_ids = (str(TransactionLog.OPERATIONS_MAP[id]) for id in unfilter_op_types) condition += u" AND (MOD(owner_id, ?) = ? OR op_type in ({}))".format( ','.join(unfilter_op_ids)) parameters = parameters + (num_partitions, partition_id) query = "({} {} {})".format(select, condition, order_limit) if worker_id is not None and expire_secs: worker_id = unicode(worker_id) join = (u"NATURAL JOIN txlog.db_worker_unseen as unseen " "WHERE unseen.worker_id = ? " "AND unseen.created > " "TIMEZONE('UTC'::text, NOW()) - INTERVAL '{} seconds'".format( expire_secs)) query += u" UNION ({} {} {}) {};".format(select, join, order_limit, order_limit) parameters = parameters + (worker_id, ) records = store.execute(query, parameters) result = [] for record in records: (row_id, owner_id, node_id, volume_id, op_type, path, generation, timestamp, mimetype, old_path, extra_data) = record op_type = TransactionLog.OPERATIONS_REVERSED_MAP[op_type] result.append( dict(txn_id=row_id, node_id=node_id, owner_id=owner_id, volume_id=volume_id, op_type=op_type, path=path, generation=generation, timestamp=timestamp, mimetype=mimetype, old_path=old_path, extra_data=extra_data)) # Now insert unseen directly into db_worker_unseen, avoiding duplicates by # joining again with db_worker_unseen. if result and result[-1]["txn_id"] > last_id and worker_id is not None: insert = (u"INSERT INTO txlog.db_worker_unseen (id, worker_id) " "SELECT gs.id, ? FROM (" "SELECT gs.id " "FROM generate_series(?, ?) AS gs(id) " "LEFT OUTER JOIN txlog.transaction_log AS txlog " "ON gs.id = txlog.id " "WHERE txlog.id IS NULL) AS gs " "LEFT OUTER JOIN txlog.db_worker_unseen AS unseen " "ON gs.id = unseen.id " "AND unseen.worker_id = ? " "WHERE unseen.id IS NULL;") store.execute(insert, (worker_id, last_id, result[-1]["txn_id"], worker_id)) return result
def get_txn_recs(store_name, num_recs, last_id=0, worker_id=None, expire_secs=None, num_partitions=None, partition_id=None): """Attempt to read num_recs records from the transaction log, starting from the row after last_id, plus any records whose ID is in the db_worker_unseen table. Use the store name, rather than a store reference, to sidestep potential thread safety problems. Return a list of up to num_rec dicts representing records from the transaction log, starting from the row after last_id, or the beginning of the table if last_id is None. If num_recs records are not available, all remaining records will be returned. If no new records are available, an empty list is returned. Transaction management should be performed by the caller. Since this function is read-only, it may be called from code decorated with fsync_readonly, or as part of a block of operations decorated with fsync_commit. """ if expire_secs is None: expire_secs = UNSEEN_EXPIRES store = dbmanager.get_shard_store(store_name) parameters = (last_id, ) select = u""" SELECT txlog.id, owner_id, node_id, volume_id, op_type, path, generation, timestamp, mimetype, old_path, extra_data FROM txlog.transaction_log AS txlog""" condition = u"WHERE id > ?" order_limit = u"ORDER BY id LIMIT {}".format(num_recs) if num_partitions is not None and partition_id is not None: unfilter_op_types = ( TransactionLog.OP_SHARE_ACCEPTED, TransactionLog.OP_SHARE_DELETED, ) unfilter_op_ids = (str(TransactionLog.OPERATIONS_MAP[id]) for id in unfilter_op_types) condition += u" AND (MOD(owner_id, ?) = ? OR op_type in ({}))".format( ','.join(unfilter_op_ids) ) parameters = parameters + (num_partitions, partition_id) query = "({} {} {})".format(select, condition, order_limit) if worker_id is not None and expire_secs: worker_id = unicode(worker_id) join = (u"NATURAL JOIN txlog.db_worker_unseen as unseen " "WHERE unseen.worker_id = ? " "AND unseen.created > " "TIMEZONE('UTC'::text, NOW()) - INTERVAL '{} seconds'".format( expire_secs)) query += u" UNION ({} {} {}) {};".format( select, join, order_limit, order_limit ) parameters = parameters + (worker_id,) records = store.execute(query, parameters) result = [] for record in records: (row_id, owner_id, node_id, volume_id, op_type, path, generation, timestamp, mimetype, old_path, extra_data) = record op_type = TransactionLog.OPERATIONS_REVERSED_MAP[op_type] result.append( dict(txn_id=row_id, node_id=node_id, owner_id=owner_id, volume_id=volume_id, op_type=op_type, path=path, generation=generation, timestamp=timestamp, mimetype=mimetype, old_path=old_path, extra_data=extra_data)) # Now insert unseen directly into db_worker_unseen, avoiding duplicates by # joining again with db_worker_unseen. if result and result[-1]["txn_id"] > last_id and worker_id is not None: insert = (u"INSERT INTO txlog.db_worker_unseen (id, worker_id) " "SELECT gs.id, ? FROM (" "SELECT gs.id " "FROM generate_series(?, ?) AS gs(id) " "LEFT OUTER JOIN txlog.transaction_log AS txlog " "ON gs.id = txlog.id " "WHERE txlog.id IS NULL) AS gs " "LEFT OUTER JOIN txlog.db_worker_unseen AS unseen " "ON gs.id = unseen.id " "AND unseen.worker_id = ? " "WHERE unseen.id IS NULL;") store.execute(insert, (worker_id, last_id, result[-1]["txn_id"], worker_id)) return result
def sstore(self): """ gets the store, dont cache, threading issues may arise """ return get_shard_store(self.sstore_name)
def bootstrap(cls, user): store = get_shard_store(user.shard_id) cls.record_user_created(user) # Number of TransactionLog rows we inserted. rows = 1 for udf in store.find(UserVolume, owner_id=user.id, status=STATUS_LIVE): cls.record_udf_created(udf) rows += 1 # If this becomes a problem it can be done as a single INSERT, but # we'd need to duplicate the get_public_file_url() in plpython. udf_join = Join( StorageObject, UserVolume, StorageObject.volume_id == UserVolume.id) conditions = [StorageObject.kind == 'Directory', StorageObject.owner_id == user.id, StorageObject.status == STATUS_LIVE, StorageObject._publicfile_id != None, # NOQA UserVolume.status == STATUS_LIVE] dirs = store.using(udf_join).find(StorageObject, *conditions) for directory in dirs: cls.record_public_access_change(directory) rows += 1 # XXX: If this takes too long it will get killed by the transaction # watcher. Need to check what's the limit we could have here. # Things to check: # * If it still takes too long, we could find out the IDs of the # people who have a lot of music/photos, run it just for them with # the transaction watcher disabled and then run it for everybody # else afterwards. query = """ INSERT INTO txlog.transaction_log ( node_id, owner_id, volume_id, op_type, path, generation, mimetype, extra_data) SELECT O.id, O.owner_id, O.volume_id, ?, txlog.path_join(O.path, O.name), O.generation, O.mimetype, txlog.get_extra_data_to_recreate_file_1( kind, size, storage_key, publicfile_id, public_uuid, content_hash, extract(epoch from O.when_created at time zone 'UTC'), extract(epoch from O.when_last_modified at time zone 'UTC'), UserDefinedFolder.path ) as extra_data FROM Object as O JOIN UserDefinedFolder on UserDefinedFolder.id = O.volume_id LEFT JOIN ContentBlob on ContentBlob.hash = O.content_hash WHERE O.kind != 'Directory' AND O.owner_id = ? AND O.status = 'Live' AND UserDefinedFolder.status = 'Live' """ params = (cls.OPERATIONS_MAP[cls.OP_PUT_CONTENT], user.id) rows += store.execute(query, params=params).rowcount # Cannot create TransactionLogs for Shares in a single INSERT like # above because TransactionLogs and Shares live in separate databases. share_join = LeftJoin( Share, StorageUser, Share.shared_to == StorageUser.id) conditions = [Share.shared_by == user.id, Share.status == STATUS_LIVE, Share.accepted == True] # NOQA shares = get_user_store().using(share_join).find(Share, *conditions) for share in shares: cls.record_share_accepted(share) rows += 1 return rows