def test_build_network(): cfg_model = { "network": { "import": "tests.unit.test_model.simple_net", "params": {"dense_dim": 7}, } } with tf.name_scope("result"): net = model.build_network(cfg_model, {"input_dim": 6}) assert isinstance(net, tf.keras.models.Model) result = net.get_config() # TODO remove tf.keras.backend.reset_uids() with tf.name_scope("expected"): expected = simple_net(dense_dim=7, input_dim=6).get_config() assert result == expected cfg_model = {"network": {"import": "tests.unit.test_model.fake_net"}} with pytest.raises(TypeError): model.build_network(cfg_model, {}) cfg_model = {"network": {"import": "tests.unit.models.net_bear"}} with pytest.raises(ImportError): model.build_network(cfg_model, {})
def test_sequential_from_config(): cfg_model = { "network": { "import": "barrage.model.sequential_from_config", "params": { "layers": [ {"import": "Input", "params": {"shape": 6, "name": "input"}}, {"import": "Dense", "params": {"units": 5, "activation": "relu"}}, { "import": "Dense", "params": { "units": 4, "name": "output", "activation": "linear", }, }, ] }, } } with tf.name_scope("result1"): net = model.build_network(cfg_model, {}) result1 = net.get_config() # TODO remove tf.keras.backend.reset_uids() with tf.name_scope("result2"): net = model.sequential_from_config(cfg_model["network"]["params"]["layers"]) result2 = net.get_config() # TODO remove tf.keras.backend.reset_uids() with tf.name_scope("expected"): expected = simple_net(output_dim=4, dense_dim=5, input_dim=6).get_config() assert result1 == expected assert result2 == expected with pytest.raises(KeyError): invalid_layers = [{"params": {"shape": 6, "name": "input"}}] model.sequential_from_config(invalid_layers) with pytest.raises(KeyError): invalid_layers = [ { "import": "Input", "extra_param": 1, "params": {"shape": 6, "name": "input"}, } ] model.sequential_from_config(invalid_layers)
def load(self): """Load the best performing checkpoint.""" # Load artifacts needed to recreate the network self.cfg = io_utils.load_pickle("config.pkl", self.artifact_dir) network_params = io_utils.load_pickle("network_params.pkl", self.artifact_dir) # Build network self.net = model.build_network(self.cfg["model"], network_params) # Load best checkpoint path = services.get_best_checkpoint_filepath(self.artifact_dir) self.net.load_weights(path).expect_partial() # not loading optimizer return self
def test_sequential_from_config_render_params(): cfg_model = { "network": { "import": "barrage.model.sequential_from_config", "params": { "layers": [ { "import": "Input", "params": {"shape": "{{input_shape}}", "name": "input"}, }, {"import": "Dense", "params": {"units": 5, "activation": "relu"}}, { "import": "Dense", "params": { "units": "{{num_classes}}", "name": "output", "activation": "linear", }, }, ] }, } } network_params = {"num_classes": 4, "input_shape": 6} with tf.name_scope("result1"): net = model.build_network(cfg_model, network_params) result1 = net.to_json() tf.keras.backend.reset_uids() with tf.name_scope("result2"): net = model.sequential_from_config( cfg_model["network"]["params"]["layers"], **network_params ) result2 = net.to_json() tf.keras.backend.reset_uids() with tf.name_scope("expected"): expected = simple_net(output_dim=4, dense_dim=5, input_dim=6).to_json() assert result1 == expected assert result2 == expected
def load(self): """Load the best performing checkpoint.""" # Load artifacts needed to recreate the network self.cfg = io_utils.load_pickle("config.pkl", self._artifact_dir) network_params = io_utils.load_pickle("network_params.pkl", self._artifact_dir) # Build network self.net = model.build_network(self.cfg["model"], network_params) # Load best checkpoint path = services.get_best_checkpoint_filepath(self._artifact_dir) # TODO: remove expect_partial, _make_predict_function self.net.load_weights(path).expect_partial() # not loading optimizer self.net._make_predict_function() # needed for threading in scoring self._is_loaded = True return self
def train( self, cfg: dict, records_train: api.InputRecords, records_validation: api.InputRecords, ) -> tf.keras.Model: """Train the network. Args: cfg: dict, config. records_train: InputRecords, training records. records_validation: InputRecords, validation records. Returns: tf.keras.Model, trained network. """ logger.info("Starting training") tf_utils.reset() cfg = config.prepare_config(cfg) logger.info(f"Creating artifact directory: {self.artifact_dir}") services.make_artifact_dir(self.artifact_dir) io_utils.save_json(cfg, "config.json", self.artifact_dir) io_utils.save_pickle(cfg, "config.pkl", self.artifact_dir) logger.info("Creating datasets") ds_train = dataset.RecordDataset( artifact_dir=self.artifact_dir, cfg_dataset=cfg["dataset"], records=records_train, mode=api.RecordMode.TRAIN, batch_size=cfg["solver"]["batch_size"], ) ds_validation = dataset.RecordDataset( artifact_dir=self.artifact_dir, cfg_dataset=cfg["dataset"], records=records_validation, mode=api.RecordMode.VALIDATION, batch_size=cfg["solver"]["batch_size"], ) network_params = ds_train.transformer.network_params io_utils.save_json(network_params, "network_params.json", self.artifact_dir) io_utils.save_pickle(network_params, "network_params.pkl", self.artifact_dir) logger.info("Building network") net = model.build_network(cfg["model"], network_params) model.check_output_names(cfg["model"], net) logger.info("Compiling network") opt = solver.build_optimizer(cfg["solver"]) objective = model.build_objective(cfg["model"]) net.compile(optimizer=opt, **objective) logger.info("Creating services") callbacks = services.create_all_services(self.artifact_dir, cfg["services"]) if "learning_rate_reducer" in cfg["solver"]: logger.info("Creating learning rate reducer") callbacks.append(solver.create_learning_rate_reducer(cfg["solver"])) logger.info("Training network") net.summary() net.fit( ds_train, validation_data=ds_validation, epochs=cfg["solver"]["epochs"], steps_per_epoch=cfg["solver"].get("steps"), callbacks=callbacks, verbose=1, ) return net
def train( self, cfg: dict, records_train: Union[pd.DataFrame, api.Records], records_validation: Union[pd.DataFrame, api.Records], workers: int = 10, max_queue_size: int = 10, ) -> tf.keras.Model: """Train the network. Args: cfg: dict, config. records_train: Union[pd.DataFrame, Records], training records. records_validation: Union[pd.DataFrame, Records], validation records. workers: int (OPTIONAL = 10), number of process threads for the sequence. max_queue_size: int (OPTIONAL = 10), queue size for the sequence. Returns: tf.keras.Model, trained network. """ logger.info("Starting training") tf_utils.reset() logger.info("Validating config schema and applying defaults") cfg = config.prepare_config(cfg) logger.info(f"Making artifact directory: {self._artifact_dir}") services.make_artifact_dir(self._artifact_dir) logger.info("Saving config") io_utils.save_json(cfg, "config.json", self._artifact_dir) io_utils.save_pickle(cfg, "config.pkl", self._artifact_dir) logger.info("Building datasets") ds_train = dataset.RecordDataset( artifact_dir=self._artifact_dir, cfg_dataset=cfg["dataset"], records=records_train, mode=api.RecordMode.TRAIN, batch_size=cfg["solver"]["batch_size"], ) ds_validation = dataset.RecordDataset( artifact_dir=self._artifact_dir, cfg_dataset=cfg["dataset"], records=records_validation, mode=api.RecordMode.VALIDATION, batch_size=cfg["solver"]["batch_size"], ) network_params = ds_train.transformer.network_params io_utils.save_json(network_params, "network_params.json", self._artifact_dir) io_utils.save_pickle(network_params, "network_params.pkl", self._artifact_dir) logger.info("Building network") net = model.build_network(cfg["model"], network_params) logger.info("Checking network output names match config output names") model.check_output_names(cfg["model"], net) logger.info("Building optimizer") opt = solver.build_optimizer(cfg["solver"]) logger.info("Building objective") objective = model.build_objective(cfg["model"]) logger.info("Compiling network") net.compile(optimizer=opt, **objective) metrics_names = net.metrics_names logger.info("Creating services") callbacks = services.create_all_services( self._artifact_dir, cfg["services"], metrics_names ) if "learning_rate_reducer" in cfg["solver"]: logger.info("Creating learning rate reducer") callbacks.append( solver.create_learning_rate_reducer(cfg["solver"], metrics_names) ) logger.info("Training network") logger.info(net.summary()) net.fit_generator( ds_train, validation_data=ds_validation, epochs=cfg["solver"]["epochs"], steps_per_epoch=cfg["solver"].get("steps"), callbacks=callbacks, use_multiprocessing=(workers > 1), max_queue_size=max_queue_size, workers=workers, verbose=1, ) return net