def _test_evaluate( self, feature_shape, model_def, model_params="", dataset_name=DatasetName.IMAGE_DEFAULT, ): num_ps_pods = 2 grads_to_wait = 1 _, ps_channels, pservers = create_pserver(_model_zoo_path, model_def, grads_to_wait, False, num_ps_pods) try: model_version = distributed_train_and_evaluate( feature_shape, _model_zoo_path, model_def, model_params=model_params, training=False, dataset_name=dataset_name, ps_channels=ps_channels, pservers=pservers, ) finally: for pserver in pservers: pserver.server.stop(0) return model_version
def _create_pserver(self, model_def, num): self._ports, self._channels, self._pservers = create_pserver( self._model_zoo_path, model_def, grads_to_wait=1, use_async=True, num_ps_pods=num, ) self._model_def = model_def
def _test_train( self, feature_shape, model_def, model_params="", dataset_name=DatasetName.IMAGE_DEFAULT, ): num_ps_pods = 2 use_asyncs = [False, True] model_versions = [] for use_async in use_asyncs: grads_to_wait = 1 if use_async else 2 _, ps_channels, pservers = create_pserver( _model_zoo_path, model_def, grads_to_wait, use_async, num_ps_pods, ) try: model_version = distributed_train_and_evaluate( feature_shape, _model_zoo_path, model_def, model_params=model_params, training=True, dataset_name=dataset_name, use_async=use_async, ps_channels=ps_channels, pservers=pservers, ) finally: for pserver in pservers: pserver.server.stop(0) for channel in ps_channels: channel.close() model_versions.append(model_version) return model_versions