def delete_data(self, data_id, var_name, phase): backend = self._get_backend(data_id, var_name, phase) if data_id not in self._db[backend].get_data_ids(): logger.info( f'data_id:{data_id} does not exist in backend:{backend}') return self._db[backend].delete_data(data_id, var_name, phase)
def setup_tensorflow(seed=None, igpu=0): # Tensorflow import warnings warnings.simplefilter(action='ignore', category=FutureWarning) warnings.simplefilter(action="ignore", category=DeprecationWarning) import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' import tensorflow as tf tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) # This is necessary for K.random_normal # tf.compat.v1.disable_eager_execution() physical_devices = tf.config.list_physical_devices('GPU') useGPU = len(physical_devices) > 0 from multiml import logger logger.info(f"useGPU = {useGPU}") if isinstance(igpu, int): igpu = [igpu] # GPU memory option if useGPU: if tf.__version__[0] == '2': gpus = [physical_devices[i] for i in igpu] tf.config.set_visible_devices(gpus, 'GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) logical_gpus = tf.config.list_logical_devices('GPU') print('available GPU:', logical_gpus) else: tf_config = tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True)) from keras import backend as K sess = tf.Session(config=tf_config) K.set_session(sess) # Random seed import random import numpy as np if seed is None: random.seed(None) np.random.seed(None) if tf.__version__[0] == '2': tf.random.set_seed(None) else: tf.set_random_seed(None) else: random.seed(1234 + seed) np.random.seed(12345 + seed) if tf.__version__[0] == '2': tf.random.set_seed(123456 + seed) else: tf.set_random_seed(123456 + seed)
def _get_best_submodels(alphas): import tensorflow as tf subtask_index = [] for var in alphas: if tf.executing_eagerly(): values = var.numpy().reshape(-1) else: values = tf.keras.backend.eval(var).reshape(-1) idx_max = values.argmax() subtask_index.append(idx_max) logger.info(f'darts_final_alpha_index_{var.name}: {idx_max}') return subtask_index
def _dump_keras(self, key, model=None, model_path=None, **kwargs): """Dump keras model and parameters. """ kwargs = self._get_basic_kwargs(**kwargs) kwargs['model_type'] = 'keras' kwargs['timestamp'] = logger.get_now() if model is not None: if model_path is None: model_path = f'{self._save_dir}/{key}' logger.info(f'keras model saved path = {model_path}') model.save_weights(model_path, save_format='tf') # model.save(model_path, save_format='tf') # not work if the model is complicated kwargs['model_path'] = model_path self[key] = kwargs
def get_best_submodels(self): """ Returns indices of the best submodels determined by the alpha Returns: list (int): list of index of the selected submodels """ subtask_ids_best = [] for subtask_env, i_model in zip(self._subtasks, self._index_of_best_submodels): subtask_ids_best.append(subtask_env.get_submodel(i_model)) logger.info( f"Submodels used in DARTS = {[m.subtask_id for m in subtask_env._subtasks]}" ) logger.info(f" Selected = {subtask_ids_best[-1].subtask_id}") return subtask_ids_best
def show_info(self): """Show information of registered tasks and subtasks. """ logger.header2('') for task_id in self._tasktuples: in_dag = self._in_dag(task_id) parents = self.get_parents_task_ids(task_id) children = self.get_children_task_ids(task_id) logger.info( f' task_id: {task_id}, DAG: {in_dag} (parents: {parents}, children: {children}):' ) for subtask in self._tasktuples[task_id].subtasks: hp_name = subtask.hps.get_hp_names() logger.info( f' subtask_id: {subtask.subtask_id}, hps: {hp_name}') logger.header2('')
def load_boston(storegate, target_var_name='true', phase='train', shuffle=True): from sklearn.datasets import load_boston logger.info(f'Load boston dataset: {target_var_name}') boston_dataset = load_boston() feature_names = boston_dataset.feature_names.tolist() data = boston_dataset.data.astype(np.float32) target = boston_dataset.target.astype(np.float32) storegate.add_data(feature_names, data, phase=phase, shuffle=shuffle) storegate.add_data(target_var_name, target, phase=phase, shuffle=shuffle) storegate.compile()
def load_iris(storegate, data_var_name='data', target_var_name='true', var_names=None, phase='train', shuffle=True): from sklearn.datasets import load_iris if var_names is not None: data_var_name, target_var_name = var_names.split() logger.info(f'Load iris dataset: {data_var_name}, {target_var_name}') iris_dataset = load_iris() data = iris_dataset.data.astype(np.float32) target = iris_dataset.target storegate.add_data(data_var_name, data, phase=phase, shuffle=shuffle) storegate.add_data(target_var_name, target, phase=phase, shuffle=shuffle) storegate.compile()
def _print_result(self, result): """ Show result. """ logger.header2('Result') for task_id, subtask_id, subtask_hp in zip(result.task_ids, result.subtask_ids, result.subtask_hps): logger.info(f'task_id {task_id} and subtask_id {subtask_id} with:') if subtask_hp is None or len(subtask_hp) == 0: logger.info(' No hyperparameters') else: for key, value in subtask_hp.items(): logger.info(f' {key} = {value}') logger.info(f'Metric ({self._metric.name}) is {result.metric_value}')
def show_info(self): """Show information currently registered in storegate. """ self._check_valid_data_id() headers = dict( phase='phase'.ljust(6), backend='backend'.ljust(8), var_names='var_names'.ljust(15), var_types='var_types'.ljust(15), total_events='total_events'.ljust(15), var_shape='var_shape'.ljust(15), ) is_compiled = self._is_compiled() logger.header3('') logger.info(f'data_id : {self._data_id}, compiled : {is_compiled}') for phase in ['train', 'valid', 'test']: metadata = self._db.get_metadata(self._data_id, phase) if not metadata.keys(): continue logger.header2('') logger.info(' '.join(headers.values())) logger.header3('') for var_name, data in metadata.items(): phase = phase.ljust(6) backend = data['backend'].ljust(8) var_name = var_name.ljust(15) dtype = data['type'].ljust(15) total_events = str(data["total_events"]).ljust(15) shape = data["shape"] logger.info( f'{phase} {backend} {var_name} {dtype} {total_events} {shape}' ) logger.header3('')
agent.execute() with timer(timer_reg, "finalize"): agent.finalize() if not args.load_weights: with open(f"{saver.save_dir}/timer.pkl", 'wb') as f: import pickle pickle.dump(timer_reg, f) # Evaluate the best parameters result = agent.result task_scheduler.show_info() for task_id, subtask_id, params in zip(result['task_ids'], result['subtask_ids'], result['subtask_hps']): subtask = task_scheduler.get_subtask(task_id=task_id, subtask_id=subtask_id) params.update(save_weights=False, load_weights=True, phases=['test']) subtask.env.set_hps(params) agent._execute_subtask(subtask) metric.storegate = storegate result_metric = metric.calculate() from multiml import logger logger.info(f'metric = {result_metric}') from run_utils import postprocessing postprocessing(saver, storegate, args, do_probability=True, do_tau4vec=True)
def main(conf: str, seed: int, gpu_index: int, data_path: str, event: int, weight: float, load_weights: bool, nopretraining: bool): global DEVICE from utils import load_config from run_utils import get_multi_loss, set_seed config = load_config(conf) if seed is not None: config.seed = seed if gpu_index is not None and DEVICE == device('cuda'): DEVICE = device(f'cuda:{gpu_index}') if data_path is not None: config['dataset']['params']['data_path'] = data_path if event is not None: config['dataset']['params']['max_events'] = int(event) set_seed(config.seed) use_multi_loss, loss_weights = get_multi_loss(weight) from run_utils import preprocessing saver, storegate, task_scheduler, metric = preprocessing( save_dir=save_dir, config=config, device=DEVICE, tau4vec_tasks=['conv2D', 'MLP', 'SF'], higgsId_tasks=['lstm', 'mlp', 'mass'], ) # Time measurements from timer import timer timer_reg = {} fix_submodel_weights = False load_weights = load_weights nopretraining = nopretraining phases = ['test'] if load_weights else ['train', 'valid', 'test'] # Agent from multiml.agent.pytorch import PytorchConnectionGridSearchAgent with timer(timer_reg, "initialize"): from my_tasks import mapping_truth_corr agent = PytorchConnectionGridSearchAgent( # BaseAgent saver=saver, storegate=storegate, task_scheduler=task_scheduler, metric=metric, metric_type='max', dump_all_results=True, # ConnectionGridAgent reuse_pretraining=True, # ConnectionSimpleAgent freeze_model_weights=fix_submodel_weights, do_pretraining=not nopretraining, connectiontask_args={ "num_epochs": 100, "max_patience": 10, "batch_size": 100, "save_weights": not load_weights, "load_weights": load_weights, "phases": phases, "loss_weights": loss_weights, "optimizer": "Adam", "optimizer_args": dict(lr=1e-3), "variable_mapping": mapping_truth_corr, "device": DEVICE, } ) with timer(timer_reg, "execute"): agent.execute() with timer(timer_reg, "finalize"): agent.finalize() if not load_weights: with open(f"{saver.save_dir}/timer.pkl", 'wb') as f: import pickle pickle.dump(timer_reg, f) # Evaluate the best parameters result, config = agent.get_best_result() subtasks = [] job_id = None for task_id, subtask_id, params in zip(result['task_ids'], result['subtask_ids'], result['subtask_hps']): subtask = task_scheduler.get_subtask(task_id=task_id, subtask_id=subtask_id) params.update(load_weights=True, phases=['test']) subtask.env.set_hps(params) agent._execute_subtask(subtask, is_pretraining=True) subtasks.append(subtask.env) job_id = params['job_id'] subtask = agent._build_connected_models(subtasks, job_id=config["job_id"], use_task_scheduler=False) subtask.env.task_id = 'connection' subtask.env.subtask_id = subtask.env.name subtask.env.set_hps({"load_weights": True, "phases": ['test']}) agent._execute_subtask(subtask, is_pretraining=False) metric.storegate = storegate result_metric = metric.calculate() from multiml import logger logger.info(f'metric = {result_metric}')
def training_keras_model(model, num_epochs, batch_size, max_patience, x_train, y_train, x_valid, y_valid, chpt_path=None, tensorboard_path=None): """ Training keras model Args: num_epochs (int): maximum number of epochs batch_size (int): mini-batch size max_patience (int): maximum patience for early stopping x_train (np.darray): input array for training y_train (np.darray): output array for training x_valid (np.darray): input array for validation y_valid (np.darray): output array for validation chpt_path (str): path for Keras check-point saving. If None, temporary directory will be used. tensorboard_path (str): Path for tensorboard callbacks. If None, tensorboard callback is not used. Returns: dict: training results, which contains loss histories. """ if chpt_path is None: import tempfile tmpdir = tempfile.TemporaryDirectory() chpt_path = f'{tmpdir.name}/tf_chpt' logger.info(f'chpt_path = {chpt_path}') cbs = [] if max_patience is not None: from tensorflow.keras.callbacks import EarlyStopping es_cb = EarlyStopping(monitor='val_loss', patience=max_patience, verbose=0, mode='min', restore_best_weights=True) cbs.append(es_cb) from tensorflow.keras.callbacks import ModelCheckpoint cp_cb = ModelCheckpoint(filepath=chpt_path, monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=True, mode='min') cbs.append(cp_cb) if tensorboard_path is not None: from tensorflow.keras.callbacks import TensorBoard tb_cb = TensorBoard(log_dir=tensorboard_path, histogram_freq=1, profile_batch=5) cbs.append(tb_cb) training_verbose_mode = 0 if logger.MIN_LEVEL <= logger.DEBUG: training_verbose_mode = 1 history = model.fit(x_train, y_train, epochs=num_epochs, batch_size=batch_size, validation_data=(x_valid, y_valid), callbacks=cbs, verbose=training_verbose_mode) # Save loss history loss_train = history.history.get('loss', [-1]) loss_valid = history.history.get('val_loss', [-1]) return { 'loss_train': loss_train, 'loss_valid': loss_valid, }
def load_model(self): """ Load pre-trained keras model weights. """ model_path = super().load_model() logger.info(f'load {model_path}') self.ml.model.load_weights(model_path).expect_partial()
def main(opts): logger.set_level(opts.loglevel) global DEVICE from utils import load_config from run_utils import get_multi_loss, set_seed config = load_config(opts.config) verbose = 1 if opts.seed is not None: config['seed'] = opts.seed if opts.gpu_index is not None and DEVICE == device('cuda'): DEVICE = device(f'cuda:{opts.gpu_index}') if opts.data_path is not None: config['dataset']['params']['data_path'] = opts.data_path if opts.event is not None: config['dataset']['params']['max_events'] = int(opts.event) if opts.clip_value is not None : config['ASNG']['clip'] = opts.clip_value if opts.alpha is not None : config['ASNG']['alpha'] = opts.alpha if opts.lam is not None : config['ASNG']['lam'] = opts.lam if opts.delta is not None : config['ASNG']['delta'] = opts.delta if opts.epochs is not None : config['ASNG']['epochs'] = opts.epochs set_seed(config.seed) if opts.do_pretrain : jobid = 'pretrain_' + opts.jobid else : jobid = 'no_train_' + opts.jobid save_dir = f'output/{os.path.basename(__file__)[:-3]}_{opts.event}evt_weight{opts.weight}_{jobid}' use_multi_loss, loss_weights = get_multi_loss(opts.weight) from run_utils import preprocessing saver, storegate, task_scheduler, metric = preprocessing( save_dir=save_dir, config=config, device=DEVICE, tau4vec_tasks=['conv2D', 'MLP', 'SF'], higgsId_tasks=['lstm', 'mlp', 'mass'], ) # Time measurements from timer import timer timer_reg = {} phases = ['test'] if opts.load_weights else ['train', 'valid', 'test'] # Agent logger.info(f'lambda / alpha / delta is {config.ASNG.lam} / {config.ASNG.alpha} / {config.ASNG.delta}') from multiml.agent.pytorch import PytorchASNGNASAgent with timer(timer_reg, "initialize"): from my_tasks import mapping_truth_corr config['ASNG']['connectiontask_args']['phases'] = phases config['ASNG']['connectiontask_args']['variable_mapping'] = mapping_truth_corr config['ASNG']['connectiontask_args']['device'] = DEVICE config['ASNG']['connectiontask_args']['loss_weights'] = loss_weights agent = PytorchASNGNASAgent( verbose = verbose, num_epochs = config.ASNG.epochs, max_patience = config.ASNG.patience, batch_size = config.ASNG.batch_size, asng_args = config.ASNG.asng_args, optimizer = config.ASNG.optimizer.name, optimizer_args = config.ASNG.optimizer.params, scheduler = config.ASNG.scheduler, # BaseAgent saver=saver, storegate=storegate, task_scheduler=task_scheduler, metric=metric, # EnsembleAgent # ConnectionSimpleAgent freeze_model_weights=False, do_pretraining = opts.do_pretrain, connectiontask_args= config.ASNG.connectiontask_args, ) with timer(timer_reg, "execute"): agent.execute() with timer(timer_reg, "finalize"): agent.finalize() results = agent.results_json results['walltime'] = timer_reg['execute'][1] results['timer_reg'] = timer_reg results['seed'] = opts.seed results['nevents'] = opts.event*2 def print_dict(key, val) : if type(val) is dict : for k, v in val.items(): print_dict( f'{key} {k}', v) else : logger.info(f'{key: <30} : {val}') for key, val in results.items() : print_dict(key, val) with open(f'{saver.save_dir}/result.run_connection_asngnas_{opts.event}evt_weight{opts.weight}.json', 'w') as fo : json.dump([results], fo, indent=2) if not opts.load_weights: with open(f"{saver.save_dir}/timer.pkl", 'wb') as f: import pickle pickle.dump(timer_reg, f) ### post processing variables = [] from my_tasks import corr_tau_4vec variables.extend(corr_tau_4vec) variables.extend(['probability']) for phase in phases : # dump prediction storegate.set_data_id("") y_pred = np.array( storegate.get_data(phase = phase, var_names = variables ) ) os.makedirs(f'{saver.save_dir}/pred/{phase}', exist_ok = True ) for i, v in enumerate(variables): np.save(f'{saver.save_dir}/pred/{phase}/{v}', y_pred[i])
def print_dict(key, val) : if type(val) is dict : for k, v in val.items(): print_dict( f'{key} {k}', v) else : logger.info(f'{key: <30} : {val}')