def __init__(self, compression_pipeline): """Initialize.""" self.compression_pipeline = compression_pipeline if self.compression_pipeline.is_lossy(): self.lossless_pipeline = NoCompressionPipeline() else: self.lossless_pipeline = compression_pipeline
def test_decompress_no_metadata(tensor_key, named_tensor): """Test that decompress raises exception without metadata.""" tensor_codec = TensorCodec(NoCompressionPipeline()) metadata = [] with pytest.raises(AssertionError): tensor_codec.decompress( tensor_key, named_tensor.data_bytes, metadata )
def test_find_dependencies_without_send_model_deltas(tensor_key): """Test that find_dependencies returns empty list when send_model_deltas = False.""" tensor_codec = TensorCodec(NoCompressionPipeline()) tensor_name, origin, round_number, report, tags = tensor_key tensor_key = TensorKey( tensor_name, origin, 5, report, ('model',) ) tensor_key_dependencies = tensor_codec.find_dependencies(tensor_key, False) assert len(tensor_key_dependencies) == 0
def test_find_dependencies_with_zero_round(tensor_key): """Test that find_dependencies returns empty list when round number is 0.""" tensor_codec = TensorCodec(NoCompressionPipeline()) tensor_name, origin, round_number, report, tags = tensor_key tensor_key = TensorKey( tensor_name, origin, round_number, report, ('model',) ) tensor_key_dependencies = tensor_codec.find_dependencies(tensor_key, True) assert len(tensor_key_dependencies) == 0
def test_decompress_no_tags(tensor_key, named_tensor): """Test that decompress raises exception without tags.""" tensor_codec = TensorCodec(NoCompressionPipeline()) metadata = [{'int_to_float': proto.int_to_float, 'int_list': proto.int_list, 'bool_list': proto.bool_list } for proto in named_tensor.transformer_metadata] with pytest.raises(AssertionError): tensor_codec.decompress( tensor_key, named_tensor.data_bytes, metadata )
def test_decompress_require_lossless_no_compressed_in_tags(tensor_key, named_tensor): """Test that decompress raises error when require_lossless is True and is no compressed tag.""" tensor_codec = TensorCodec(NoCompressionPipeline()) tensor_name, origin, round_number, report, tags = tensor_key tensor_key = TensorKey( tensor_name, origin, round_number, report, ('lossy_compressed',) ) metadata = [{'int_to_float': proto.int_to_float, 'int_list': proto.int_list, 'bool_list': proto.bool_list } for proto in named_tensor.transformer_metadata] with pytest.raises(AssertionError): tensor_codec.decompress( tensor_key, named_tensor.data_bytes, metadata, require_lossless=True )
def test_find_dependencies(tensor_key): """Test that find_dependencies works correctly.""" tensor_codec = TensorCodec(NoCompressionPipeline()) tensor_name, origin, round_number, report, tags = tensor_key round_number = 2 tensor_key = TensorKey( tensor_name, origin, round_number, report, ('model',) ) tensor_key_dependencies = tensor_codec.find_dependencies(tensor_key, True) assert len(tensor_key_dependencies) == 2 tensor_key_dependency_0, tensor_key_dependency_1 = tensor_key_dependencies assert tensor_key_dependency_0.round_number == round_number - 1 assert tensor_key_dependency_0.tags == tensor_key.tags assert tensor_key_dependency_1.tags == ('aggregated', 'delta', 'compressed')
def test_decompress_compressed_in_tags(tensor_key, named_tensor): """Test that decompress works correctly when there is compressed tag.""" tensor_codec = TensorCodec(NoCompressionPipeline()) tensor_name, origin, round_number, report, tags = tensor_key tensor_key = TensorKey( tensor_name, origin, round_number, report, ('compressed',) ) metadata = [{'int_to_float': proto.int_to_float, 'int_list': proto.int_list, 'bool_list': proto.bool_list } for proto in named_tensor.transformer_metadata] decompressed_tensor_key, decompressed_nparray = tensor_codec.decompress( tensor_key, named_tensor.data_bytes, metadata ) assert 'compressed' not in decompressed_tensor_key.tags
def test_generate(tensor_key, named_tensor): """Test that generate_delta works correctly.""" tensor_codec = TensorCodec(NoCompressionPipeline()) metadata = [{'int_to_float': proto.int_to_float, 'int_list': proto.int_list, 'bool_list': proto.bool_list } for proto in named_tensor.transformer_metadata] array_shape = tuple(metadata[0]['int_list']) flat_array = np.frombuffer(named_tensor.data_bytes, dtype=np.float32) nparray = np.reshape(flat_array, newshape=array_shape, order='C') delta_tensor_key, delta_nparray = tensor_codec.generate_delta(tensor_key, nparray, nparray) assert np.array_equal(delta_nparray, nparray - nparray) assert 'delta' in delta_tensor_key.tags
def __init__(self, collaborator_name, aggregator_uuid, federation_uuid, client, task_runner, tensor_pipe, task_config, opt_treatment=OptTreatment.RESET, delta_updates=False, db_store_rounds=1, **kwargs): """Initialize.""" self.single_col_cert_common_name = None if self.single_col_cert_common_name is None: self.single_col_cert_common_name = '' # for protobuf compatibility # we would really want this as an object self.collaborator_name = collaborator_name self.aggregator_uuid = aggregator_uuid self.federation_uuid = federation_uuid self.tensor_pipe = tensor_pipe or NoCompressionPipeline() self.tensor_codec = TensorCodec(self.tensor_pipe) self.tensor_db = TensorDB() self.db_store_rounds = db_store_rounds self.task_runner = task_runner self.delta_updates = delta_updates self.client = client self.task_config = task_config self.logger = getLogger(__name__) # RESET/CONTINUE_LOCAL/CONTINUE_GLOBAL if hasattr(OptTreatment, opt_treatment): self.opt_treatment = OptTreatment[opt_treatment] else: self.logger.error("Unknown opt_treatment: %s." % opt_treatment) raise NotImplementedError( "Unknown opt_treatment: %s." % opt_treatment) self.task_runner.set_optimizer_treatment(self.opt_treatment.name)
def test_decompress_call_compression_pipeline(tensor_key, named_tensor): """Test that decompress calls compression pipeline when there is no compressed tag.""" tensor_codec = TensorCodec(NoCompressionPipeline()) tensor_name, origin, round_number, report, tags = tensor_key tensor_key = TensorKey( tensor_name, origin, round_number, report, ('lossy_compressed',) ) metadata = [{'int_to_float': proto.int_to_float, 'int_list': proto.int_list, 'bool_list': proto.bool_list } for proto in named_tensor.transformer_metadata] tensor_codec.compression_pipeline = mock.Mock() tensor_codec.decompress( tensor_key, named_tensor.data_bytes, metadata ) tensor_codec.compression_pipeline.backward.assert_called_with( named_tensor.data_bytes, metadata)
def test_decompress_call_lossless_pipeline_with_require_lossless(tensor_key, named_tensor): """Test that decompress calls lossless pipeline when require_lossless is True.""" tensor_codec = TensorCodec(NoCompressionPipeline()) tensor_name, origin, round_number, report, tags = tensor_key tensor_key = TensorKey( tensor_name, origin, round_number, report, ('compressed',) ) metadata = [{'int_to_float': proto.int_to_float, 'int_list': proto.int_list, 'bool_list': proto.bool_list } for proto in named_tensor.transformer_metadata] tensor_codec.lossless_pipeline = mock.Mock() tensor_codec.decompress( tensor_key, named_tensor.data_bytes, metadata, require_lossless=True ) tensor_codec.lossless_pipeline.backward.assert_called_with( named_tensor.data_bytes, metadata)
def test_compress_lossless(tensor_key, named_tensor): """Test that compress works correctly with require_lossless flag.""" tensor_codec = TensorCodec(NoCompressionPipeline()) metadata = [{'int_to_float': proto.int_to_float, 'int_list': proto.int_list, 'bool_list': proto.bool_list } for proto in named_tensor.transformer_metadata] array_shape = tuple(metadata[0]['int_list']) flat_array = np.frombuffer(named_tensor.data_bytes, dtype=np.float32) nparray = np.reshape(flat_array, newshape=array_shape, order='C') compressed_tensor_key, compressed_nparray, metadata = tensor_codec.compress( tensor_key, nparray, require_lossless=True) assert 'compressed' in compressed_tensor_key.tags assert compressed_tensor_key.tensor_name == tensor_key.tensor_name assert compressed_tensor_key.origin == tensor_key.origin assert compressed_tensor_key.round_number == tensor_key.round_number
def test_generate_delta_assert_model_in_tags(tensor_key, named_tensor): """Test that generate_delta raises exception when there is model tag.""" tensor_codec = TensorCodec(NoCompressionPipeline()) tensor_name, origin, round_number, report, tags = tensor_key tensor_key = TensorKey( tensor_name, origin, round_number, report, ('model',) ) metadata = [{'int_to_float': proto.int_to_float, 'int_list': proto.int_list, 'bool_list': proto.bool_list } for proto in named_tensor.transformer_metadata] array_shape = tuple(metadata[0]['int_list']) flat_array = np.frombuffer(named_tensor.data_bytes, dtype=np.float32) nparray = np.reshape(flat_array, newshape=array_shape, order='C') with pytest.raises(AssertionError): tensor_codec.generate_delta(tensor_key, nparray, nparray)
def test_apply_delta_agg(tensor_key, named_tensor): """Test that apply_delta works for aggregator tensor_key.""" tensor_codec = TensorCodec(NoCompressionPipeline()) tensor_name, origin, round_number, report, tags = tensor_key tensor_key = TensorKey( tensor_name, 'aggregator_1', round_number, report, ('delta',) ) metadata = [{'int_to_float': proto.int_to_float, 'int_list': proto.int_list, 'bool_list': proto.bool_list } for proto in named_tensor.transformer_metadata] array_shape = tuple(metadata[0]['int_list']) flat_array = np.frombuffer(named_tensor.data_bytes, dtype=np.float32) nparray = np.reshape(flat_array, newshape=array_shape, order='C') new_model_tensor_key, nparray_with_delta = tensor_codec.apply_delta( tensor_key, nparray, nparray) assert 'delta' not in new_model_tensor_key.tags assert np.array_equal(nparray_with_delta, nparray + nparray)
def __init__(self, aggregator_uuid, federation_uuid, authorized_cols, init_state_path, best_state_path, last_state_path, assigner, rounds_to_train=256, single_col_cert_common_name=None, compression_pipeline=None, db_store_rounds=1, **kwargs): """Initialize.""" self.round_number = 0 self.single_col_cert_common_name = single_col_cert_common_name if self.single_col_cert_common_name is not None: self._log_big_warning() else: # FIXME: '' instead of None is just for protobuf compatibility. # Cleaner solution? self.single_col_cert_common_name = '' self.rounds_to_train = rounds_to_train # if the collaborator requests a delta, this value is set to true self.authorized_cols = authorized_cols self.uuid = aggregator_uuid self.federation_uuid = federation_uuid self.assigner = assigner self.quit_job_sent_to = [] self.tensor_db = TensorDB() self.db_store_rounds = db_store_rounds self.compression_pipeline = compression_pipeline \ or NoCompressionPipeline() self.tensor_codec = TensorCodec(self.compression_pipeline) self.logger = getLogger(__name__) self.init_state_path = init_state_path self.best_state_path = best_state_path self.last_state_path = last_state_path self.best_tensor_dict: dict = {} self.last_tensor_dict: dict = {} self.best_model_score = None self.model: ModelProto = utils.load_proto(self.init_state_path) self._load_initial_tensors() # keys are TensorKeys self.log_dir = f'logs/{self.uuid}_{self.federation_uuid}' # TODO use native tensorboard # self.tb_writer = tb.SummaryWriter(self.log_dir, flush_secs = 10) self.collaborator_tensor_results = {} # {TensorKey: nparray}} # these enable getting all tensors for a task # {TaskResultKey: list of TensorKeys} self.collaborator_tasks_results = {} # {TaskResultKey: data_size} self.collaborator_task_weight = {}
class TensorCodec: """TensorCodec is responsible for the following. 1. Tracking the compression/decompression related dependencies of a given tensor 2. Acting as a TensorKey aware wrapper for the compression_pipeline functionality """ def __init__(self, compression_pipeline): """Initialize.""" self.compression_pipeline = compression_pipeline if self.compression_pipeline.is_lossy(): self.lossless_pipeline = NoCompressionPipeline() else: self.lossless_pipeline = compression_pipeline def set_lossless_pipeline(self, lossless_pipeline): """Set lossless pipeline.""" assert lossless_pipeline.is_lossy() is False, ( "The provided pipeline is not lossless") self.lossless_pipeline = lossless_pipeline def compress(self, tensor_key, data, require_lossless=False, **kwargs): """ Function-wrapper around the tensor_pipeline.forward function. It also keeps track of the tensorkeys associated with the compressed nparray Args: tensor_key: TensorKey is provided to verify it should be compressed, and new TensorKeys returned will be derivatives of the existing tensor_name data: (uncompressed) numpy array associated with the tensor_key require_lossless: boolean. Does tensor require compression Returns: compressed_tensor_key: Tensorkey corresponding to the decompressed tensor compressed_nparray: The compressed tensor metadata: metadata associated with compressed tensor """ if require_lossless: compressed_nparray, metadata = self.lossless_pipeline.forward( data, **kwargs) else: compressed_nparray, metadata = self.compression_pipeline.forward( data, **kwargs) # Define the compressed tensorkey that should be # returned ('trained.delta'->'trained.delta.lossy_compressed') tensor_name, origin, round_number, report, tags = tensor_key if not self.compression_pipeline.is_lossy() or require_lossless: new_tags = tuple(list(tags) + ['compressed']) else: new_tags = tuple(list(tags) + ['lossy_compressed']) compressed_tensor_key = TensorKey(tensor_name, origin, round_number, report, new_tags) return compressed_tensor_key, compressed_nparray, metadata def decompress(self, tensor_key, data, transformer_metadata, require_lossless=False, **kwargs): """ Function-wrapper around the tensor_pipeline.backward function. It also keeps track of the tensorkeys associated with the decompressed nparray Args: tensor_key: TensorKey is provided to verify it should be decompressed, and new TensorKeys returned will be derivatives of the existing tensor_name data: (compressed) numpy array associated with the tensor_key transformer_metadata: metadata associated with the compressed tensor require_lossless: boolean, does data require lossless decompression Returns: decompressed_tensor_key: Tensorkey corresponding to the decompressed tensor decompressed_nparray: The decompressed tensor """ tensor_name, origin, round_number, report, tags = tensor_key assert (len(transformer_metadata) > 0), ('metadata must be included for decompression') assert (('compressed' in tags) or ('lossy_compressed' in tags)), ("Cannot decompress an uncompressed tensor") if require_lossless: assert ('compressed' in tags), ("Cannot losslessly decompress lossy tensor") if require_lossless or 'compressed' in tags: decompressed_nparray = self.lossless_pipeline.backward( data, transformer_metadata, **kwargs) else: decompressed_nparray = self.compression_pipeline.backward( data, transformer_metadata, **kwargs) # Define the decompressed tensorkey that should be returned if 'lossy_compressed' in tags: lc_idx = tags.index('lossy_compressed') new_tags = list(tags) new_tags[lc_idx] = 'lossy_decompressed' decompressed_tensor_key = TensorKey(tensor_name, origin, round_number, report, tuple(new_tags)) elif 'compressed' in tags: # 'compressed' == lossless compression; no need for # compression related tag after decompression new_tags = list(tags) new_tags.remove('compressed') decompressed_tensor_key = TensorKey(tensor_name, origin, round_number, report, tuple(new_tags)) else: raise NotImplementedError( "Decompression is only supported on compressed data") return decompressed_tensor_key, decompressed_nparray @staticmethod def generate_delta(tensor_key, nparray, base_model_nparray): """ Create delta from the updated layer and base layer. Args: tensor_key: This is the tensor_key associated with the nparray. Should have a tag of 'trained' or 'aggregated' nparray: The nparray that corresponds to the tensorkey base_model_nparray: The base model tensor that will be subtracted from the new weights Returns: delta_tensor_key: Tensorkey that corresponds to the delta weight array delta: Difference between the provided tensors """ tensor_name, origin, round_number, report, tags = tensor_key if not np.isscalar(nparray): assert nparray.shape == base_model_nparray.shape, ( 'Shape of updated layer ({}) is not equal to base ' 'layer shape of ({})'.format(nparray.shape, base_model_nparray.shape)) assert 'model' not in tags, ( 'The tensorkey should be provided ' 'from the layer with new weights, not the base model') if type(tags) == str: new_tags = tuple([tensor_key[3]] + ['delta']) else: new_tags = tuple(list(tags) + ['delta']) delta_tensor_key = TensorKey(tensor_name, origin, round_number, report, new_tags) return delta_tensor_key, nparray - base_model_nparray @staticmethod def apply_delta(tensor_key, delta, base_model_nparray): """ Add delta to the nparray. Args: tensor_key: This is the tensor_key associated with the delta. Should have a tag of 'trained' or 'aggregated' delta: Weight delta between the new model and old model base_model_nparray: The nparray that corresponds to the prior weights Returns: new_model_tensor_key: Latest model layer tensorkey new_model_nparray: Latest layer weights """ tensor_name, origin, round_number, report, tags = tensor_key if not np.isscalar(base_model_nparray): assert (delta.shape == base_model_nparray.shape), ( 'Shape of delta ({}) is not equal to shape of model' ' layer ({})'.format(delta.shape, base_model_nparray.shape)) # assert('model' in tensor_key[3]), 'The tensorkey should be provided # from the base model' # Aggregator UUID has the prefix 'aggregator' if 'aggregator' in origin: tags = list(tags) tags.remove('delta') new_tags = tuple(tags) new_model_tensor_key = TensorKey(tensor_name, origin, round_number, report, new_tags) else: new_model_tensor_key = TensorKey(tensor_name, origin, round_number, report, ('model', )) return new_model_tensor_key, base_model_nparray + delta def find_dependencies(self, tensor_key, send_model_deltas): """Resolve the tensors required to do the specified operation.""" tensor_key_dependencies = [] tensor_name, origin, round_number, report, tags = tensor_key if 'model' in tags and send_model_deltas: if round_number >= 1: # The new model can be generated by previous model + delta tensor_key_dependencies.append( TensorKey(tensor_name, origin, round_number - 1, report, tags)) if self.compression_pipeline.is_lossy(): new_tags = ('aggregated', 'delta', 'lossy_compressed') else: new_tags = ('aggregated', 'delta', 'compressed') tensor_key_dependencies.append( TensorKey(tensor_name, origin, round_number, report, new_tags)) return tensor_key_dependencies
def test_find_dependencies_without_model_in_tags(tensor_key): """Test that find_dependencies returns empty list when there is no model tag.""" tensor_codec = TensorCodec(NoCompressionPipeline()) tensor_key_dependencies = tensor_codec.find_dependencies(tensor_key, True) assert len(tensor_key_dependencies) == 0