def forward_datasets(stub, client, secret, last_fetch): if not last_fetch or (datetime.datetime.now() - last_fetch).seconds > 60 * 60 * 2: last_fetch = datetime.datetime.now() response=requests.get(f"https://google.ch") try: response = requests.get(f"{os.getenv('DATA_WRAPPER_URL', 'http://data_wrapper/')}get_available_datasets") datasets = response.json() except Exception as error: server_ok, stub, start_experiment_response = get_grpc_connection(stub=stub, grpc_function='send_datasets', request=globalserver_pb2.DefaultRequest( client=client, secret=secret, protocol=str(error))) server_ok, stub, start_experiment_response = get_grpc_connection(stub=stub, grpc_function='send_datasets', request=globalserver_pb2.DefaultRequest( client=client, secret=secret, protocol=str(response.content))) server_ok, stub, start_experiment_response = get_grpc_connection(stub=stub, grpc_function='send_datasets', request=globalserver_pb2.DefaultRequest( client=client, secret=secret, protocol=str(response.raw.data))) datasets = [] for i_dataset, dataset in enumerate(datasets): for i_feature, feature in enumerate(dataset['features']): if feature['type'] == 'categorical': continue feature_reduced = {key: value for key, value in feature.items() if key in ['feature', 'type', 'categories']} feature_reduced['warning'] = [] # if feature['max_value'] > ((feature['q3'] - feature['q1']) * 10 + feature['mean']): # feature_reduced['warning'].append("This feature has large +outliers (iqr=3)") # if feature['min_value'] < (feature['mean'] - (feature['q3'] - feature['q1']) * 10): # feature_reduced['warning'].append("This feature has large -outliers (iqr=3)") if feature['iqr_outliers'] > 0: feature_reduced['warning'].append("This feature has outliers (iqr=1.5)") if feature['3std-percentage'] > 0.03: feature_reduced['warning'].append("This feature many outliers (3std>0.97)") datasets[i_dataset]['features'][i_feature] = feature_reduced server_ok, stub, start_experiment_response = get_grpc_connection(stub=stub, grpc_function='send_datasets', request=globalserver_pb2.DefaultRequest( client=client, secret=secret, protocol=json.dumps(datasets))) return stub, last_fetch
def P2P_train_model(self, experiment_id, task_id): logging.info("Training...%s", self.experiment_id) generator = self.data_generator( "train", preprocessing=self.preprocessing_function, config=self.config, client=self.client) train_X, train_y = next(generator) train_data_local = xgb.DMatrix(train_X, label=train_y) train_params_dict = self.config['compile']['model_params'].copy() train_params_dict['nthread'] = self.config['training'].get( 'nthread', -1) train_params_dict['verbosity'] = self.config['training'].get( 'verbosity', 0) self.model = xgb.train( train_params_dict, train_data_local, num_boost_round=self.config['training']['client_steps_per_round'], xgb_model=self.model) _, self.stub, _ = grpc_util.get_grpc_connection( grpc_function='train_model_response', request=globalserver_pb2.DefaultRequest( client=self.client, secret=self.secret, task_id=task_id, experiment_id=experiment_id)) logging.info("Training finished. %s", self.experiment_id) gc.collect() return True
def RF_train_model(self, experiment_id, task_id): """Computes local histogram data for given information. Assumes RF_fetch_model is previously called and that the following fields have been set by the server process in the model-configuration-file: - current_condition_list - current_feature_list - random_state This function then writes the result into the local model under the attribute model_update NOTE: Function assumes positive-label=1, negative-label=0, need to incorporate how we can pass this information to the worker. """ logging.info("Training...%s", self.experiment_id) batch = self.batch histograms = utils.RF_create_histograms(batch, self.config, self.model) self.model.model_update = histograms # store as string _, self.stub, _ = grpc_util.get_grpc_connection( grpc_function='train_model_response', request=globalserver_pb2.DefaultRequest( client=self.client, secret=self.secret, task_id=task_id, experiment_id=experiment_id)) logging.info("Training finished. %s", self.experiment_id) gc.collect() return True
def fetch_model(self, experiment_id, task_id, protocol): logging.info(f"Parsing Model...%s", experiment_id) _, self.stub, responses = grpc_util.get_grpc_connection( grpc_function='fetch_model_request', request=globalserver_pb2.DefaultRequest( client=self.client, task_id=task_id, secret=self.secret, experiment_id=experiment_id)) for row in responses: # pseudo stream load_model = getattr( self, protocol + "_load_model" ) #todo split into load config and compile model. In load config set custom config! load_model(model=row) logging.info(f"Model parsed...%s", experiment_id) self._set_dataset() self._set_custom_training_config() #todo allow to change everything self._set_preprocessing() tf.keras.backend.clear_session() gc.collect() return True
def stop_workers(worker_instances, stub, client, secret): if worker_instances: _, stub, stop_experiment_response = get_grpc_connection(stub=stub, grpc_function='stop_experiment', request=globalserver_pb2.DefaultRequest( client=client, secret=secret)) stop_experiments = json.loads(stop_experiment_response.experiment_id) for experiment_id in stop_experiments: worker_instances, stub = cancel_worker(worker_instances=worker_instances, experiment_id=experiment_id, stub=stub, client=client, secret=secret, grpc_function='stopped_experiment_response') return worker_instances, stub
def cancel_worker(worker_instances, experiment_id, stub, client, secret, grpc_function, error_msg=''): if experiment_id in worker_instances: logging.info(f"Try to cancel {experiment_id} Worker. {grpc_function}") worker_instances[experiment_id].cancel() time.sleep(1) if not worker_instances[experiment_id].done(): return worker_instances, stub server_ok, stub, response = get_grpc_connection(stub=stub, grpc_function=grpc_function, request=globalserver_pb2.DefaultRequest( experiment_id=experiment_id, protocol=error_msg, client=client, secret=secret)) return worker_instances, stub
def __init__(self, client, secret, experiment_id): logging.info(f"Initialize worker...") self.experiment_id = experiment_id self.client = client self.secret = secret self.model = None self.global_weights = None self.config = {} self.data_generator = None self.preprocessing_function = None # This is only needed until memory leak is fixed, to read memory usage self.process = psutil.Process(os.getpid()) self.batch = None # todo ugly _, self.stub, _ = grpc_util.get_grpc_connection( grpc_function='test_connection', request=globalserver_pb2.DefaultRequest( client=client, secret=secret, experiment_id=experiment_id)) self.dataset_metadata = {} logging.info(f"GRPC Connection established...")
def start_workers(worker, worker_instances, error_queue, stub, client, secret): server_ok, stub, start_experiment_response = get_grpc_connection(stub=stub, grpc_function='start_experiment', request=globalserver_pb2.DefaultRequest( client=client, secret=secret)) start_experiments = json.loads(start_experiment_response.experiment_id) for experiment_id in start_experiments: if experiment_id not in worker_instances: logging.info(f"starting {experiment_id} Worker") worker_instances[experiment_id] = worker(client=client, error_queue=error_queue, secret=secret, experiment_id=experiment_id) for experiment_id in list(worker_instances): # kill running instances that have no runnin experiment if experiment_id not in start_experiments: if worker_instances[experiment_id].done(): worker_instances.pop(experiment_id) else: worker_instances, stub = cancel_worker(worker_instances=worker_instances, experiment_id=experiment_id, stub=stub, client=client, secret=secret, grpc_function='stopped_experiment_response') return worker_instances, stub
def NN_train_model(self, experiment_id, task_id): logging.info("Training...%s", self.experiment_id) self.model.fit( self.data_generator("train", preprocessing=self.preprocessing_function, config=self.config, client=self.client), epochs=self.config['training'].get("epochs", 1), verbose=self.config['training'].get("verbose", 0), callbacks=self.config['training'].get("callback", []), shuffle=self.config['training'].get("shuffle", True), class_weight={ int(key): value for key, value in self.config['training'].get( "class_weight").items() } if self.config['training'].get("class_weight", None) else None, initial_epoch=self.config['training'].get("initial_epoch", 0), steps_per_epoch=self.config['training'].get("steps_per_epoch", 12), max_queue_size=self.config['training'].get("max_queue_size", 10), workers=1, # self.config['training'].get("workers", 1), use_multiprocessing=self.config['training'].get( "use_multiprocessing", False), ) _, self.stub, _ = grpc_util.get_grpc_connection( grpc_function='train_model_response', request=globalserver_pb2.DefaultRequest( client=self.client, secret=self.secret, task_id=task_id, experiment_id=experiment_id)) logging.info("Training finished. %s", self.experiment_id) tf.keras.backend.clear_session() gc.collect() return True