Esempi in Python per preprocessing, esempi in Python per run_utils.preprocessing

Esempio n. 1

0

Mostra file

save_dir = f'output/{os.path.basename(__file__)[:-3]}'

from run_utils import common_parser
parser = common_parser()
args = parser.parse_args()

from run_utils import add_suffix
save_dir = add_suffix(save_dir, args)

from run_utils import get_config_multi_loss
use_multi_loss, loss_weights = get_config_multi_loss(args)

from run_utils import preprocessing
saver, storegate, task_scheduler, metric = preprocessing(
    save_dir=save_dir,
    args=args,
    tau4vec_tasks=['MLP'],
    higgsId_tasks=['lstm'],
)

# Time measurements
from timer import timer
timer_reg = {}

# Agent
from multiml.agent.keras import KerasConnectionRandomSearchAgent
with timer(timer_reg, "initialize"):
    from my_tasks import mapping_truth_corr
    agent = KerasConnectionRandomSearchAgent(
        # BaseAgent
        saver=saver,
        storegate=storegate,

Esempio n. 2

0

Mostra file

def main(conf: str,
         seed: int,
         gpu_index: int,
         data_path: str,
         event: int,
         weight: float,
         load_weights: bool,
         epoch: int,
         nopretraining: bool):
    global DEVICE
    from utils import load_config
    from run_utils import get_multi_loss, set_seed
    config = load_config(conf)
    if seed is not None:
        config.seed = seed
    if gpu_index is not None and DEVICE == device('cuda'):
        DEVICE = device(f'cuda:{gpu_index}')
    if data_path is not None:
        config['dataset']['params']['data_path'] = data_path
    if event is not None:
        config['dataset']['params']['max_events'] = int(event)
    set_seed(config.seed)

    use_multi_loss, loss_weights = get_multi_loss(weight)

    from run_utils import preprocessing
    saver, storegate, task_scheduler, metric = preprocessing(
        save_dir=save_dir,
        config=config,
        device=DEVICE,
        tau4vec_tasks=['MLP', 'conv2D', 'SF'],
        higgsId_tasks=['mlp', 'lstm', 'mass'],
    )

    # Time measurements
    from timer import timer
    timer_reg = {}

    load_weights = load_weights
    nopretraining = nopretraining
    phases = ['test'] if load_weights else ['train', 'valid', 'test']

    # Agent
    from multiml.agent.pytorch import PytorchSPOSNASAgent
    with timer(timer_reg, "initialize"):
        from my_tasks import mapping_truth_corr
        agent = PytorchSPOSNASAgent(
            # BaseAgent
            saver=saver,
            storegate=storegate,
            task_scheduler=task_scheduler,
            metric=metric,
            # EnsembleAgent
            training_choiceblock_model=['test'],
            # ConnectionSimpleAgent
            freeze_model_weights=False,
            do_pretraining=not nopretraining,
            connectiontask_args={
                "num_epochs": epoch,
                "max_patience": 10,
                "batch_size": 100,
                "load_weights": load_weights,
                "phases": phases,
                "loss_weights": loss_weights,
                "optimizer": "Adam",
                "optimizer_args": dict(lr=1e-3),
                "variable_mapping": mapping_truth_corr,
                "device": DEVICE,
            }
        )

    with timer(timer_reg, "execute"):
        agent.execute()

    with timer(timer_reg, "finalize"):
        agent.finalize()

    if not load_weights:
        with open(f"{saver.save_dir}/timer.pkl", 'wb') as f:
            import pickle
            pickle.dump(timer_reg, f)

Esempio n. 3

0

Mostra file

File: run_multi_connection_grid.py Progetto: UTokyo-ICEPP/multiml_htautau

def main(conf: str,
         seed: int,
         gpu_index: int,
         data_path: str,
         event: int,
         weight: float,
         load_weights: bool,
         nopretraining: bool):
    global DEVICE
    from utils import load_config
    from run_utils import get_multi_loss, set_seed
    config = load_config(conf)
    if seed is not None:
        config.seed = seed
    if gpu_index is not None and DEVICE == device('cuda'):
        DEVICE = device(f'cuda:{gpu_index}')
    if data_path is not None:
        config['dataset']['params']['data_path'] = data_path
    if event is not None:
        config['dataset']['params']['max_events'] = int(event)
    set_seed(config.seed)

    use_multi_loss, loss_weights = get_multi_loss(weight)

    from run_utils import preprocessing
    saver, storegate, task_scheduler, metric = preprocessing(
        save_dir=save_dir,
        config=config,
        device=DEVICE,
        tau4vec_tasks=['conv2D', 'MLP', 'SF'],
        higgsId_tasks=['lstm', 'mlp', 'mass'],
    )

    # Time measurements
    from timer import timer
    timer_reg = {}

    fix_submodel_weights = False
    load_weights = load_weights
    nopretraining = nopretraining
    phases = ['test'] if load_weights else ['train', 'valid', 'test']
    # Agent
    from multiml.agent.pytorch import PytorchConnectionGridSearchAgent
    with timer(timer_reg, "initialize"):
        from my_tasks import mapping_truth_corr
        agent = PytorchConnectionGridSearchAgent(
            # BaseAgent
            saver=saver,
            storegate=storegate,
            task_scheduler=task_scheduler,
            metric=metric,
            metric_type='max',
            dump_all_results=True,
            # ConnectionGridAgent
            reuse_pretraining=True,
            # ConnectionSimpleAgent
            freeze_model_weights=fix_submodel_weights,
            do_pretraining=not nopretraining,
            connectiontask_args={
                "num_epochs": 100,
                "max_patience": 10,
                "batch_size": 100,
                "save_weights": not load_weights,
                "load_weights": load_weights,
                "phases": phases,
                "loss_weights": loss_weights,
                "optimizer": "Adam",
                "optimizer_args": dict(lr=1e-3),
                "variable_mapping": mapping_truth_corr,
                "device": DEVICE,
            }
        )

    with timer(timer_reg, "execute"):
        agent.execute()

    with timer(timer_reg, "finalize"):
        agent.finalize()

    if not load_weights:
        with open(f"{saver.save_dir}/timer.pkl", 'wb') as f:
            import pickle
            pickle.dump(timer_reg, f)

    # Evaluate the best parameters
    result, config = agent.get_best_result()

    subtasks = []
    job_id = None
    for task_id, subtask_id, params in zip(result['task_ids'],
                                           result['subtask_ids'],
                                           result['subtask_hps']):
        subtask = task_scheduler.get_subtask(task_id=task_id,
                                             subtask_id=subtask_id)
        params.update(load_weights=True, phases=['test'])
        subtask.env.set_hps(params)
        agent._execute_subtask(subtask, is_pretraining=True)
        subtasks.append(subtask.env)
        job_id = params['job_id']

    subtask = agent._build_connected_models(subtasks,
                                            job_id=config["job_id"],
                                            use_task_scheduler=False)
    subtask.env.task_id = 'connection'
    subtask.env.subtask_id = subtask.env.name
    subtask.env.set_hps({"load_weights": True, "phases": ['test']})
    agent._execute_subtask(subtask, is_pretraining=False)

    metric.storegate = storegate
    result_metric = metric.calculate()
    from multiml import logger
    logger.info(f'metric = {result_metric}')

Esempio n. 4

0

Mostra file

File: run_multi_grid.py Progetto: UTokyo-ICEPP/multiml_htautau

import os

save_dir = f'output/{os.path.basename(__file__)[:-3]}'

from run_utils import common_parser
parser = common_parser()
args = parser.parse_args()

from run_utils import add_suffix
save_dir = add_suffix(save_dir, args)

from run_utils import preprocessing
saver, storegate, task_scheduler, metric = preprocessing(
    save_dir=save_dir,
    args=args,
    tau4vec_tasks=['MLP', 'conv2D', 'SF', 'zero', 'noise'],
    higgsId_tasks=['mlp', 'lstm', 'mass', 'zero', 'noise'],
    truth_intermediate_inputs=False,
)

# Time measurements
from timer import timer
timer_reg = {}

# Agent
from multiml.agent.basic import GridSearchAgent
with timer(timer_reg, "initialize"):
    agent = GridSearchAgent(
        # BaseAgent
        saver=saver,
        storegate=storegate,

Esempio n. 5

0

Mostra file

File: run_multi_connection_asngnas.py Progetto: UTokyo-ICEPP/multiml_htautau

def main(opts):
    logger.set_level(opts.loglevel)
    global DEVICE
    from utils import load_config
    from run_utils import get_multi_loss, set_seed
    config = load_config(opts.config)
    
    verbose = 1
    
    if opts.seed is not None:
        config['seed'] = opts.seed
        
    if opts.gpu_index is not None and DEVICE == device('cuda'):
        DEVICE = device(f'cuda:{opts.gpu_index}')
        
    if opts.data_path is not None:
        config['dataset']['params']['data_path'] = opts.data_path
        
    if opts.event is not None:
        config['dataset']['params']['max_events'] = int(opts.event)
        
    if opts.clip_value is not None : 
        config['ASNG']['clip'] = opts.clip_value
        
    if opts.alpha is not None : 
        config['ASNG']['alpha'] = opts.alpha
    
        
    if opts.lam is not None : 
        config['ASNG']['lam'] = opts.lam
        
    if opts.delta is not None : 
        config['ASNG']['delta'] = opts.delta
        
    if opts.epochs is not None : 
        config['ASNG']['epochs'] = opts.epochs
        
    
    set_seed(config.seed)
    
    if opts.do_pretrain : 
        jobid = 'pretrain_' + opts.jobid
    else : 
        jobid = 'no_train_' + opts.jobid
                
    save_dir = f'output/{os.path.basename(__file__)[:-3]}_{opts.event}evt_weight{opts.weight}_{jobid}'

    use_multi_loss, loss_weights = get_multi_loss(opts.weight)

    from run_utils import preprocessing
    saver, storegate, task_scheduler, metric = preprocessing(
        save_dir=save_dir,
        config=config,
        device=DEVICE,
        tau4vec_tasks=['conv2D', 'MLP', 'SF'],
        higgsId_tasks=['lstm', 'mlp', 'mass'],
    )

    # Time measurements
    from timer import timer
    timer_reg = {}

    phases = ['test'] if opts.load_weights else ['train', 'valid', 'test']
    
    # Agent
    logger.info(f'lambda / alpha / delta is {config.ASNG.lam} / {config.ASNG.alpha} / {config.ASNG.delta}')
    
    
    from multiml.agent.pytorch import PytorchASNGNASAgent
    with timer(timer_reg, "initialize"):
        from my_tasks import mapping_truth_corr
        config['ASNG']['connectiontask_args']['phases'] = phases
        config['ASNG']['connectiontask_args']['variable_mapping'] = mapping_truth_corr
        config['ASNG']['connectiontask_args']['device'] = DEVICE
        config['ASNG']['connectiontask_args']['loss_weights'] = loss_weights
        

        agent = PytorchASNGNASAgent(
            verbose = verbose,
            num_epochs = config.ASNG.epochs,
            max_patience = config.ASNG.patience,
            batch_size = config.ASNG.batch_size,
            asng_args = config.ASNG.asng_args, 
            optimizer = config.ASNG.optimizer.name, 
            optimizer_args = config.ASNG.optimizer.params, 
            scheduler = config.ASNG.scheduler,
            # BaseAgent
            saver=saver,
            storegate=storegate,
            task_scheduler=task_scheduler,
            metric=metric,
            
            # EnsembleAgent
            # ConnectionSimpleAgent
            freeze_model_weights=False,
            do_pretraining = opts.do_pretrain,
            connectiontask_args= config.ASNG.connectiontask_args,
        )

    with timer(timer_reg, "execute"):
        agent.execute()

    with timer(timer_reg, "finalize"):
        agent.finalize()
        
        
    
    results = agent.results_json
    results['walltime'] = timer_reg['execute'][1]
    results['timer_reg'] = timer_reg
    results['seed'] = opts.seed
    results['nevents'] = opts.event*2
        
    def print_dict(key, val) : 
        if type(val) is dict :
            for k, v in val.items():
                print_dict( f'{key} {k}', v)
        else : 
            logger.info(f'{key: <30} : {val}')
    
    for key, val in results.items() : 
        print_dict(key, val)
    
    with open(f'{saver.save_dir}/result.run_connection_asngnas_{opts.event}evt_weight{opts.weight}.json', 'w') as fo : 
        json.dump([results], fo, indent=2)
    
    if not opts.load_weights:
        with open(f"{saver.save_dir}/timer.pkl", 'wb') as f:
            import pickle
            pickle.dump(timer_reg, f)
            
    ### post processing 
    variables = []
    from my_tasks import corr_tau_4vec
    variables.extend(corr_tau_4vec)
    variables.extend(['probability'])
    
    for phase in phases : 
        # dump prediction
        storegate.set_data_id("")
        y_pred = np.array( storegate.get_data(phase = phase, var_names = variables ) )
        
        os.makedirs(f'{saver.save_dir}/pred/{phase}', exist_ok = True )
        
        for i, v in enumerate(variables):
            np.save(f'{saver.save_dir}/pred/{phase}/{v}', y_pred[i])