예제 #1
0
def test_keras_module_ensemble():
    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    subblock1 = MLPBlock(layers=[2, 1])
    subblock2 = MLPBlock(layers=[2, 1])

    model1 = EnsembleModel(models=[subblock1, subblock2],
                           prefix='test',
                           ensemble_type='linear',
                           individual_loss=True)
    model2 = EnsembleModel(
        models=[subblock1, subblock2],
        prefix='test',
        ensemble_type='softmax',
    )

    input_tensor = np.random.normal(size=(3, 2))
    model1(input_tensor)
    model2(input_tensor)

    with pytest.raises(ValueError):
        EnsembleModel(
            models=[subblock1, subblock2],
            prefix='test',
            ensemble_type='dummy_type',
        )
예제 #2
0
def test_keras_module_conv2d_invalid_layer():
    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    with pytest.raises(ValueError):
        Conv2DBlock(layers_conv2d=[('dummy_layers', {})])
예제 #3
0
def test_agent():
    env0 = MyTask()
    env1 = MyTask()
    env2 = MyTask()
    env3 = MyTask()

    hps_dict = {'hp_layer': [5, 10, 15], 'hp_epoch': [128, 256, 512]}
    hps = Hyperparameters(hps_dict)

    task_scheduler = TaskScheduler(['step0', 'step1'])

    task_scheduler.add_subtask('step0', 'task0', env=env0, hps=hps)
    task_scheduler.add_subtask('step0', 'task1', env=env1)

    task_scheduler.add_subtask('step1', 'task2', env=env2, hps=hps)
    task_scheduler.add_subtask('step1', 'task3', env=env3)

    task_scheduler.show_info()
    task_scheduler.get_sorted_task_ids()

    assert task_scheduler.get_sorted_task_ids() == ['step0', 'step1']
    assert task_scheduler.get_subtask_ids('step0') == ['task0', 'task1']
    assert task_scheduler.get_children_task_ids('step0') == ['step1']
    assert task_scheduler.get_parents_task_ids('step1') == ['step0']
    assert task_scheduler.get_subtask('step0', 'task0').env == env0

    storegate = StoreGate(backend='numpy', data_id='test_agent')
    saver = Saver()
    metric = RandomMetric()

    logger.set_level(logger.DEBUG)
    agent = GridSearchAgent(saver=saver,
                            storegate=storegate,
                            task_scheduler=task_scheduler,
                            metric=metric,
                            dump_all_results=True)

    assert agent._storegate is storegate
    assert agent._saver is saver
    assert agent._task_scheduler is task_scheduler
    assert agent._metric is metric

    agent.storegate = storegate
    agent.saver = saver
    agent.task_scheduler = task_scheduler
    agent.metric = metric

    assert agent.storegate is storegate
    assert agent.saver is saver
    assert agent.task_scheduler is task_scheduler
    assert agent.metric is metric

    agent.execute()
    agent.finalize()

    best_result = agent.get_best_result()
    assert best_result.metric_value > 0
예제 #4
0
def test_mytask():
    logger.set_level(logger.DEBUG)
    task = MyTask()
    task.set_hps({'hp_layer': 5, 'hp_epoch': 256})
    assert task._hp_layer == 5
    assert task._hp_epoch == 256

    task.execute()
    task.finalize()
예제 #5
0
def test_keras_base():
    KerasBaseTask.__abstractmethods__ = set()

    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    saver = Saver()

    task = KerasBaseTask(saver=saver, )
    task.load_metadata()  # Do nothing
예제 #6
0
def test_keras_module_mlp():
    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    block = MLPBlock(
        layers=[2, 1],
        batch_norm=True,
    )

    input_tensor = np.random.normal(size=(3, 2))
    block(input_tensor)
예제 #7
0
def test_keras_module_connection_model():
    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    saver = Saver()
    storegate = build_storegate()

    args_mlptask = {
        # BaseTask
        'saver': saver,
        'storegate': storegate,
        # KerasBaseTask
        'optimizer': 'adam',
        'num_epochs': 2,
        'max_patience': 1,
        'loss': 'mse',
        'run_eagerly': True,
        # MLPTask
        'activation': 'relu',
        'activation_last': 'sigmoid',
    }

    task0 = MLPTask(input_var_names=['var0', 'var1'],
                    output_var_names=['output0', 'output1'],
                    true_var_names=['var2', 'var3'],
                    layers=[4, 2],
                    **args_mlptask)
    task1 = MLPTask(input_var_names=['output0', 'output1'],
                    output_var_names=['output2'],
                    true_var_names=['var4'],
                    layers=[4, 1],
                    **args_mlptask)
    task2 = MLPTask(input_var_names=['output2'],
                    output_var_names=['output3'],
                    true_var_names=['label'],
                    layers=[4, 1],
                    **args_mlptask)
    task0.execute()
    task1.execute()
    task2.execute()

    model = ConnectionModel(
        models=[task0.ml.model, task1.ml.model, task2.ml.model],
        input_var_index=[[0, 1], [-1, -2], [-3]],
        output_var_index=[[0, 1], [2], [3]])

    trainalbe_variables = model._get_variables()
    assert len(trainalbe_variables) > 0

    input_tensor = np.random.normal(size=(3, 2))
    model(input_tensor)
예제 #8
0
def test_keras_module_softmax_dense_layer():
    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    layer1 = SoftMaxDenseLayer()
    layer2 = SoftMaxDenseLayer(dropout_rate=0.3)

    layer1.get_config()

    input_tensor = np.random.normal(size=(3, 2))
    layer1(input_tensor, training=None)
    layer2(input_tensor, training=True)
예제 #9
0
def test_keras_module_darts_model_sum_tensor():
    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    sum_tensor = SumTensor(name='test_sum')

    with pytest.raises(ValueError):
        sum_tensor.result()

    sum_tensor(0.1)
    sum_tensor(0.2)
    sum_tensor.result()
예제 #10
0
def test_mypytorchtask():
    logger.set_level(logger.DEBUG)
    saver = Saver()
    task = MyPytorchTask(saver=saver,
                         model=MyPytorchModel,
                         optimizer='SGD',
                         optimizer_args=dict(lr=0.1),
                         loss='CrossEntropyLoss',
                         metrics=['acc', 'lrs'])
    task.set_hps({'num_epochs': 5})

    task.execute()
    task.finalize()

    assert saver.load_ml(key='mypytorch')['_num_epochs'] == 5
예제 #11
0
def preprocessing(tau4vec_tasks=['MLP', 'conv2D', 'SF', 'zero', 'noise'],
                  higgsId_tasks=['mlp', 'lstm', 'mass', 'zero', 'noise'],
                  truth_intermediate_inputs=True):

    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    load_weights = False

    from multiml.saver import Saver
    saver = Saver()

    # Storegate
    storegate = get_storegate(max_events=100)

    # Task scheduler
    from multiml.task_scheduler import TaskScheduler
    from my_tasks import get_higgsId_subtasks, get_tau4vec_subtasks
    task_scheduler = TaskScheduler()

    subtask1 = get_higgsId_subtasks(saver,
                                    subtask_names=higgsId_tasks,
                                    truth_input=truth_intermediate_inputs,
                                    load_weights=load_weights,
                                    run_eagerly=run_eagerly)
    task_scheduler.add_task(task_id='higgsId',
                            parents=['tau4vec'],
                            children=[],
                            subtasks=subtask1)

    subtask2 = get_tau4vec_subtasks(saver,
                                    subtask_names=tau4vec_tasks,
                                    load_weights=load_weights,
                                    run_eagerly=run_eagerly)
    task_scheduler.add_task(task_id='tau4vec',
                            parents=[],
                            children=['higgsId'],
                            subtasks=subtask2)

    # Metric
    from multiml.agent.metric import AUCMetric
    metric = AUCMetric(pred_var_name='probability',
                       true_var_name='label',
                       phase='test')

    return saver, storegate, task_scheduler, metric
예제 #12
0
def test_keras_util():
    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    saver = Saver()

    model = MLPBlock(
        layers=[2, 1],
        batch_norm=True,
    )
    model.compile(optimizer='adam', loss='binary_crossentropy')

    x_train = np.random.normal(size=(100, 2))
    x_valid = np.random.normal(size=(100, 2))
    y_train = np.random.binomial(n=1, p=0.5, size=(100, ))
    y_valid = np.random.binomial(n=1, p=0.5, size=(100, ))
    chpt_path = f"{saver.save_dir}/test_keras_util"
    from multiml.task.keras.keras_util import training_keras_model
    training_keras_model(model=model,
                         num_epochs=2,
                         batch_size=10,
                         max_patience=2,
                         x_train=x_train,
                         y_train=y_train,
                         x_valid=x_valid,
                         y_valid=y_valid,
                         chpt_path=chpt_path,
                         tensorboard_path=f'{saver.save_dir}/test_keras_util')

    from multiml.task.keras.keras_util import get_optimizer
    get_optimizer('adam', dict(learning_rate=None))
    get_optimizer('adam', dict(learning_rate=0.001))
    get_optimizer('sgd', dict(learning_rate=None))
    get_optimizer('sgd', dict(learning_rate=0.1))
    from tensorflow.keras import optimizers
    get_optimizer(optimizers.Adam())

    with pytest.raises(ValueError):
        get_optimizer(None)

    with pytest.raises(NotImplementedError):
        get_optimizer('dummyoptimizer')
예제 #13
0
def test_keras_module_conv2d():
    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    block = Conv2DBlock(
        layers_conv2d=[
            ('conv2d', {
                'filters': 4,
                'kernel_size': (2, 2)
            }),
            ('maxpooling2d', {
                'pool_size': (2, 2)
            }),
            ('upsampling2d', {
                'size': (2, 2)
            }),
        ],
        conv2d_padding='valid',
    )

    input_tensor = np.random.normal(size=(3, 3, 3, 2))
    block(input_tensor)
def main(opts):
    logger.set_level(opts.loglevel)
    global DEVICE
    from utils import load_config
    from run_utils import get_multi_loss, set_seed
    config = load_config(opts.config)
    
    verbose = 1
    
    if opts.seed is not None:
        config['seed'] = opts.seed
        
    if opts.gpu_index is not None and DEVICE == device('cuda'):
        DEVICE = device(f'cuda:{opts.gpu_index}')
        
    if opts.data_path is not None:
        config['dataset']['params']['data_path'] = opts.data_path
        
    if opts.event is not None:
        config['dataset']['params']['max_events'] = int(opts.event)
        
    if opts.clip_value is not None : 
        config['ASNG']['clip'] = opts.clip_value
        
    if opts.alpha is not None : 
        config['ASNG']['alpha'] = opts.alpha
    
        
    if opts.lam is not None : 
        config['ASNG']['lam'] = opts.lam
        
    if opts.delta is not None : 
        config['ASNG']['delta'] = opts.delta
        
    if opts.epochs is not None : 
        config['ASNG']['epochs'] = opts.epochs
        
    
    set_seed(config.seed)
    
    if opts.do_pretrain : 
        jobid = 'pretrain_' + opts.jobid
    else : 
        jobid = 'no_train_' + opts.jobid
                
    save_dir = f'output/{os.path.basename(__file__)[:-3]}_{opts.event}evt_weight{opts.weight}_{jobid}'

    use_multi_loss, loss_weights = get_multi_loss(opts.weight)

    from run_utils import preprocessing
    saver, storegate, task_scheduler, metric = preprocessing(
        save_dir=save_dir,
        config=config,
        device=DEVICE,
        tau4vec_tasks=['conv2D', 'MLP', 'SF'],
        higgsId_tasks=['lstm', 'mlp', 'mass'],
    )

    # Time measurements
    from timer import timer
    timer_reg = {}

    phases = ['test'] if opts.load_weights else ['train', 'valid', 'test']
    
    # Agent
    logger.info(f'lambda / alpha / delta is {config.ASNG.lam} / {config.ASNG.alpha} / {config.ASNG.delta}')
    
    
    from multiml.agent.pytorch import PytorchASNGNASAgent
    with timer(timer_reg, "initialize"):
        from my_tasks import mapping_truth_corr
        config['ASNG']['connectiontask_args']['phases'] = phases
        config['ASNG']['connectiontask_args']['variable_mapping'] = mapping_truth_corr
        config['ASNG']['connectiontask_args']['device'] = DEVICE
        config['ASNG']['connectiontask_args']['loss_weights'] = loss_weights
        

        agent = PytorchASNGNASAgent(
            verbose = verbose,
            num_epochs = config.ASNG.epochs,
            max_patience = config.ASNG.patience,
            batch_size = config.ASNG.batch_size,
            asng_args = config.ASNG.asng_args, 
            optimizer = config.ASNG.optimizer.name, 
            optimizer_args = config.ASNG.optimizer.params, 
            scheduler = config.ASNG.scheduler,
            # BaseAgent
            saver=saver,
            storegate=storegate,
            task_scheduler=task_scheduler,
            metric=metric,
            
            # EnsembleAgent
            # ConnectionSimpleAgent
            freeze_model_weights=False,
            do_pretraining = opts.do_pretrain,
            connectiontask_args= config.ASNG.connectiontask_args,
        )

    with timer(timer_reg, "execute"):
        agent.execute()

    with timer(timer_reg, "finalize"):
        agent.finalize()
        
        
    
    results = agent.results_json
    results['walltime'] = timer_reg['execute'][1]
    results['timer_reg'] = timer_reg
    results['seed'] = opts.seed
    results['nevents'] = opts.event*2
        
    def print_dict(key, val) : 
        if type(val) is dict :
            for k, v in val.items():
                print_dict( f'{key} {k}', v)
        else : 
            logger.info(f'{key: <30} : {val}')
    
    for key, val in results.items() : 
        print_dict(key, val)
    
    with open(f'{saver.save_dir}/result.run_connection_asngnas_{opts.event}evt_weight{opts.weight}.json', 'w') as fo : 
        json.dump([results], fo, indent=2)
    
    if not opts.load_weights:
        with open(f"{saver.save_dir}/timer.pkl", 'wb') as f:
            import pickle
            pickle.dump(timer_reg, f)
            
    ### post processing 
    variables = []
    from my_tasks import corr_tau_4vec
    variables.extend(corr_tau_4vec)
    variables.extend(['probability'])
    
    for phase in phases : 
        # dump prediction
        storegate.set_data_id("")
        y_pred = np.array( storegate.get_data(phase = phase, var_names = variables ) )
        
        os.makedirs(f'{saver.save_dir}/pred/{phase}', exist_ok = True )
        
        for i, v in enumerate(variables):
            np.save(f'{saver.save_dir}/pred/{phase}/{v}', y_pred[i])
예제 #15
0
def preprocessing(save_dir,
                  args,
                  tau4vec_tasks=['MLP', 'conv2D', 'SF', 'zero', 'noise'],
                  higgsId_tasks=['mlp', 'lstm', 'mass', 'zero', 'noise'],
                  truth_intermediate_inputs=True):
    from multiml import logger
    logger.set_level(args.log_level)

    from setup_tensorflow import setup_tensorflow
    setup_tensorflow(args.seed, args.igpu)

    load_weights = args.load_weights

    from multiml.saver import Saver
    saver = Saver(save_dir, serial_id=args.seed)
    saver.add("seed", args.seed)

    # Storegate
    from my_storegate import get_storegate
    storegate = get_storegate(
        data_path=args.data_path,
        max_events=args.max_events,
    )

    # Task scheduler
    from multiml.task_scheduler import TaskScheduler
    from my_tasks import get_higgsId_subtasks, get_tau4vec_subtasks
    task_scheduler = TaskScheduler()

    if args.remove_dummy_models:
        tau4vec_tasks = [
            v for v in tau4vec_tasks if v not in ['zero', 'noise']
        ]
        higgsId_tasks = [
            v for v in higgsId_tasks if v not in ['zero', 'noise']
        ]

    if len(tau4vec_tasks) > 0 and len(higgsId_tasks) > 0:
        subtask1 = get_higgsId_subtasks(saver,
                                        subtask_names=higgsId_tasks,
                                        truth_input=truth_intermediate_inputs,
                                        load_weights=load_weights,
                                        run_eagerly=args.run_eagerly)
        task_scheduler.add_task(task_id='higgsId',
                                parents=['tau4vec'],
                                children=[],
                                subtasks=subtask1)

        subtask2 = get_tau4vec_subtasks(saver,
                                        subtask_names=tau4vec_tasks,
                                        load_weights=load_weights,
                                        run_eagerly=args.run_eagerly)
        task_scheduler.add_task(task_id='tau4vec',
                                parents=[],
                                children=['higgsId'],
                                subtasks=subtask2)

    elif len(higgsId_tasks) > 0:
        subtask = get_higgsId_subtasks(saver,
                                       subtask_names=higgsId_tasks,
                                       load_weights=load_weights,
                                       run_eagerly=args.run_eagerly)
        task_scheduler.add_task(task_id='higgsId', subtasks=subtask)

    elif len(tau4vec_tasks) > 0:
        subtask = get_tau4vec_subtasks(saver,
                                       subtask_names=tau4vec_tasks,
                                       load_weights=load_weights,
                                       run_eagerly=args.run_eagerly)
        task_scheduler.add_task(task_id='tau4vec', subtasks=subtask)

    else:
        raise ValueError("Strange task combination...")

    # Metric
    if len(tau4vec_tasks) > 0 and len(higgsId_tasks) == 0:
        from multiml_htautau.task.metrics import CustomMSEMetric
        from my_tasks import corr_tau_4vec, truth_tau_4vec
        metric = CustomMSEMetric(pred_var_name=corr_tau_4vec,
                                 true_var_name=truth_tau_4vec,
                                 phase='test')
    else:
        from multiml.agent.metric import AUCMetric
        metric = AUCMetric(pred_var_name='probability',
                           true_var_name='label',
                           phase='test')

    return saver, storegate, task_scheduler, metric
예제 #16
0
def test_keras_mlp():
    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    storegate = build_storegate()

    saver = Saver()

    args_task = {
        # BaseTask
        'saver': saver,
        'storegate': storegate,
        # MLBaseTask
        'phases': None,
        'save_weights': True,
        # KerasBaseTask
        'input_var_names': ['var0', 'var1'],
        'output_var_names': 'output0',
        'true_var_names': 'label',
        'optimizer': 'adam',
        'num_epochs': 2,
        'max_patience': 1,
        'loss': 'binary_crossentropy',
        'run_eagerly': True,
        # MLPTask
        'layers': [8, 1],
        'activation': 'relu',
        'activation_last': 'sigmoid',
        'batch_norm': True,
    }
    task = MLPTask(**args_task)
    assert task._layers == [8, 1]

    task.set_hps({
        'layers': [4, 1],
        'input_shapes': None,
        'output_var_names': 'output0'
    })
    assert task._layers == [4, 1]
    assert task._input_shapes == [len(['var0', 'var1'])]
    assert task.get_inputs()[0].shape[1] == len(['var0', 'var1'])

    task.execute()
    task.finalize()

    # Validate model save/load
    args_task['phases'] = ['test']
    args_task['save_weights'] = False
    args_task['load_weights'] = saver.load_ml(task._name)['model_path']
    args_task['layers'] = [4, 1]
    task2 = MLPTask(**args_task)

    # Fail due to call models before defining the model
    with pytest.raises(ValueError):
        y_pred_load = task2.predict(phase='test')

    task2.execute()
    task2.finalize()

    y_pred = task.predict(phase='test')
    y_pred_load = task2.predict(phase='test')
    # assert (np.sum(np.square(y_pred - y_pred_load)) < 1e-10)
    assert (np.array_equal(y_pred, y_pred_load))
예제 #17
0
def test_keras_ensemble():
    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    saver = Saver()
    storegate = build_storegate()

    args_mlptask = {
        # BaseTask
        'saver': saver,
        'storegate': storegate,
        # KerasBaseTask
        'optimizer': 'adam',
        'num_epochs': 2,
        'max_patience': 1,
        'loss': 'mse',
        'run_eagerly': True,
        # MLPTask
        'activation': 'relu',
        'activation_last': 'sigmoid',
    }

    subtask0 = MLPTask(name='subtask0',
                       input_var_names=['var0', 'var1'],
                       output_var_names=['output0'],
                       true_var_names=['label'],
                       layers=[4, 1],
                       **args_mlptask)
    subtask1 = MLPTask(name='subtask1',
                       input_var_names=['var0', 'var1'],
                       output_var_names=['output0'],
                       true_var_names=['label'],
                       layers=[4, 1],
                       **args_mlptask)

    subtask0.execute()
    subtask1.execute()

    task = EnsembleTask(
        subtasks=[subtask0, subtask1],
        dropout_rate=None,
        individual_loss=True,
        individual_loss_weights=1.0,
        saver=saver,
        storegate=storegate,
        save_weights=True,
        # do_training=False,
        phases=["train", "valid", "test"],
    )

    assert task.get_inputs()[0].shape[1] == len(['var0', 'var1'])

    assert task.input_var_names == ['var0', 'var1']
    assert task.output_var_names == ['output0']

    task.execute()
    task.finalize()

    task2 = EnsembleTask(
        name='EnsembleTask2',
        subtasks=[subtask0, subtask1],
        dropout_rate=None,
        individual_loss=True,
        individual_loss_weights=1.0,
        saver=saver,
        storegate=storegate,
        phases=["test"],
        load_weights=saver.load_ml(task.get_unique_id())['model_path'],
    )
    task2.execute()
    task2.finalize()

    y_pred = task.predict(phase='test')
    y_pred_load = task2.predict(phase='test')
    assert (np.array_equal(y_pred, y_pred_load))
import os

save_dir = f'output/{os.path.basename(__file__)[:-3]}'

from run_utils import common_parser
parser = common_parser()
parser.add_argument("--model",
                    dest="model",
                    help="model",
                    type=str,
                    default='MLP')
args = parser.parse_args()

from multiml import logger
logger.set_level(args.log_level)

from setup_tensorflow import setup_tensorflow
setup_tensorflow(args.seed, args.igpu)

from run_utils import add_suffix
save_dir = add_suffix(save_dir, args)
save_dir += f'_{args.model}'

from multiml.saver import Saver
saver = Saver(save_dir, serial_id=args.seed)
saver.add("seed", args.seed)

# Storegate
from my_storegate import get_storegate
storegate = get_storegate(
    data_path=args.data_path,
def test_keras_model_connection():
    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    saver = Saver()
    storegate = build_storegate()

    args_mlptask = {
        # BaseTask
        'saver': saver,
        'storegate': storegate,
        # KerasBaseTask
        'optimizer': 'adam',
        'num_epochs': 2,
        'max_patience': 1,
        'loss': 'mse',
        'run_eagerly': True,
        # MLPTask
        'activation': 'relu',
        'activation_last': 'sigmoid',
    }

    subtask0 = MLPTask(name='subtask0',
                       input_var_names=['var0', 'var1'],
                       output_var_names=['output0', 'output1'],
                       true_var_names=['var2', 'var3'],
                       layers=[4, 2],
                       **args_mlptask)
    subtask1 = MLPTask(name='subtask1',
                       input_var_names=['var2', 'var3'],
                       output_var_names=['output2'],
                       true_var_names=['label'],
                       layers=[4, 1],
                       **args_mlptask)

    subtask0.execute()
    subtask1.execute()

    chpt_path = f"{saver.save_dir}/test_keras_model_connection"
    task = ModelConnectionTask(
        # ConnectionModel
        subtasks=[subtask0, subtask1],
        use_multi_loss=True,
        loss_weights=[0.5, 0.5],
        variable_mapping=[('var2', 'output0'), ('var3', 'output1')],
        # KerasBaseTask
        saver=saver,
        storegate=storegate,
        optimizer='adam',
        num_epochs=2,
        max_patience=1,
        loss='binary_crossentropy',
        run_eagerly=True,
        load_weights=False,
        save_weights=chpt_path,
        phases=['train', 'valid', 'test'],
    )

    task.execute()
    task.finalize()

    # Load model weights
    task2 = ModelConnectionTask(
        # ConnectionModel
        subtasks=[subtask0, subtask1],
        use_multi_loss=True,
        loss_weights=[0.5, 0.5],
        variable_mapping=[('var2', 'output0'), ('var3', 'output1')],
        # KerasBaseTask
        saver=saver,
        storegate=storegate,
        optimizer='adam',
        num_epochs=2,
        max_patience=1,
        loss='binary_crossentropy',
        run_eagerly=True,
        save_weights=False,
        load_weights=chpt_path,
        phases=['test'],
    )

    task2.execute()
    task2.finalize()

    y_pred = task.predict(phase='test')
    y_pred_load = task2.predict(phase='test')
    assert (np.array_equal(y_pred[0], y_pred_load[0]))
    assert (np.array_equal(y_pred[1], y_pred_load[1]))

    # Connect as series dag
    task3 = ModelConnectionTask(
        # ConnectionModel
        subtasks=[subtask0, subtask1],
        use_multi_loss=True,
        loss_weights=[0.5, 0.5],
        variable_mapping=[('var2', 'output0'), ('var3', 'output1')],
        # KerasBaseTask
        saver=Saver(),
        storegate=storegate,
        optimizer='adam',
        num_epochs=2,
        max_patience=1,
        loss='binary_crossentropy',
        run_eagerly=True,
        load_weights=False,
        save_weights=chpt_path,
        phases=['train', 'valid', 'test'],
    )
    task3.execute()
    task3.finalize()
def test_agent_keras_connection_simple():
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
    logger.set_level(logger.DEBUG)

    saver = Saver()
    storegate = build_storegate()

    args_mlptask = {
        # BaseTask
        'saver': saver,
        'storegate': storegate,
        # KerasBaseTask
        'optimizer': 'adam',
        'num_epochs': 2,
        'max_patience': 1,
        'loss': 'mse',
        'run_eagerly': True,
        # MLPTask
        'activation': 'relu',
        'activation_last': 'sigmoid',
    }

    subtask0 = MLPTask(name='subtask0',
                       input_var_names=['var0', 'var1'],
                       output_var_names=['output0', 'output1'],
                       true_var_names=['var2', 'var3'],
                       layers=[4, 2],
                       batch_norm=True,
                       **args_mlptask)
    subtask1 = MLPTask(name='subtask1',
                       input_var_names=['var2', 'var3'],
                       output_var_names=['output2'],
                       true_var_names=['label'],
                       layers=[4, 1],
                       **args_mlptask)

    task_scheduler = TaskScheduler()
    task_scheduler.add_task('step0')
    task_scheduler.add_task('step1', parents=['step0'])
    task_scheduler.add_subtask('step0', 'subtask0', env=subtask0)
    task_scheduler.add_subtask('step1', 'subtask1', env=subtask1)
    task_scheduler.show_info()

    import copy
    task_scheduler0 = copy.deepcopy(task_scheduler)
    task_scheduler1 = copy.deepcopy(task_scheduler)

    metric = RandomMetric()

    agent_args = {
        # BaseAgent
        'saver': saver,
        'storegate': storegate,
        'task_scheduler': task_scheduler0,
        'metric': metric,
        # KerasConnectionSimpleAgent
        'freeze_model_weights': False,
        'do_pretraining': False,
        'connectiontask_name': 'connectiontask',
        'connectiontask_args': {
            'num_epochs': 2,
            'max_patience': 1,
            'batch_size': 1200,
            'save_weights': True,
            'phases': None,
            'use_multi_loss': True,
            'loss_weights': [0.5, 0.5],
            'optimizer': 'adam',
            'optimizer_args': dict(learning_rate=1e-3),
            'variable_mapping': [('var2', 'output0'), ('var3', 'output1')],
            'run_eagerly': True,
        },
    }

    agent = KerasConnectionRandomSearchAgent(**agent_args)
    agent.execute()
    agent.finalize()

    # Get the 1st task 1st model
    subtask = agent._task_prod[0][0]

    # Apply pretraining
    agent._execute_subtask(subtask, is_pretraining=True)

    # Set all variables in keras model to trainable
    KerasConnectionRandomSearchAgent._set_trainable_flags(subtask.env._model,
                                                          do_trainable=True)

    # Build connect-model
    agent._build_connected_models(
        subtasks=[agent._task_prod[0][0].env, agent._task_prod[1][0].env],
        job_id='dummy',
        use_task_scheduler=False)

    # Load saved model
    for (task_id, subtask_id) in [
        ('step0', 'subtask0'),
        ('step1', 'subtask1'),
    ]:
        task = task_scheduler1.get_subtask(task_id, subtask_id).env
        task._save_weights = False
        task._load_weights = False
        task._phases = ['test']
    agent_args['connectiontask_args']['save_weights'] = False
    agent_args['connectiontask_args']['load_weights'] = True
    #agent_args['saver'] = Saver()
    agent_args['saver'] = saver
    agent_args['task_scheduler'] = task_scheduler1
    agent_args['connectiontask_args']['phases'] = ['test']
    agent2 = KerasConnectionRandomSearchAgent(**agent_args)
    agent2.execute()
    agent2.finalize()

    task_scheduler0.show_info()
    task_scheduler1.show_info()
    task0 = task_scheduler0.get_subtask('connection', 'connectiontask').env
    task1 = task_scheduler1.get_subtask('connection', 'connectiontask').env
    y_pred = task0.predict(phase='test')
    y_pred_load = task1.predict(phase='test')
    assert (np.array_equal(y_pred[0], y_pred_load[0]))
    assert (np.array_equal(y_pred[1], y_pred_load[1]))
예제 #21
0
def test_agent_keras_ensemble():
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    logger.set_level(logger.DEBUG)

    saver = Saver()
    storegate = build_storegate()

    args_mlptask = {
        # BaseTask
        'saver': saver,
        'storegate': storegate,
        # KerasBaseTask
        'optimizer': 'adam',
        'num_epochs': 2,
        'max_patience': 1,
        'loss': 'mse',
        'run_eagerly': True,
        'save_weights': True,
        # MLPTask
        'activation': 'relu',
        'activation_last': 'sigmoid',
    }

    subtask0 = MLPTask(name='subtask0',
                       input_var_names=['var0', 'var1'],
                       output_var_names=['output0', 'output1'],
                       true_var_names=['var2', 'var3'],
                       layers=[4, 2],
                       batch_norm=True,
                       **args_mlptask)
    subtask1 = MLPTask(name='subtask1',
                       input_var_names=['var0', 'var1'],
                       output_var_names=['output0', 'output1'],
                       true_var_names=['var2', 'var3'],
                       layers=[4, 2],
                       **args_mlptask)
    subtask2 = MLPTask(name='subtask2',
                       input_var_names=['var2', 'var3'],
                       output_var_names=['output2'],
                       true_var_names=['label'],
                       layers=[4, 1],
                       **args_mlptask)

    task_scheduler = TaskScheduler()
    task_scheduler.add_task('step0')
    task_scheduler.add_task('step1', parents=['step0'])
    task_scheduler.add_subtask('step0', 'subtask0', env=subtask0)
    task_scheduler.add_subtask('step0', 'subtask1', env=subtask1)
    task_scheduler.add_subtask('step1', 'subtask2', env=subtask2)
    task_scheduler.show_info()

    import copy
    task_scheduler0 = copy.deepcopy(task_scheduler)
    task_scheduler1 = copy.deepcopy(task_scheduler)

    metric = RandomMetric()

    agent_args = {
        # BaseAgent
        'saver': saver,
        'storegate': storegate,
        'task_scheduler': task_scheduler0,
        'metric': metric,
        # KerasConnectionSimpleAgent
        'freeze_model_weights': False,
        'do_pretraining': True,
        'connectiontask_name': 'connectiontask',
        'connectiontask_args': {
            'num_epochs': 2,
            'max_patience': 1,
            'batch_size': 1200,
            'save_weights': True,
            'phases': None,
            'use_multi_loss': True,
            'loss_weights': [0.5, 0.5],
            'optimizer': 'adam',
            'optimizer_args': dict(learning_rate=1e-3),
            'variable_mapping': [('var2', 'output0'), ('var3', 'output1')],
            'run_eagerly': True,
        },
        # KerasEnsembleAgent
        'ensembletask_args': {
            'dropout_rate': None,
            'individual_loss': False,
            'individual_loss_weights': 0.0,
            'phases': ['test'],
            'save_weights': True,
            'run_eagerly': True,
        },
    }

    agent = KerasEnsembleAgent(**agent_args)
    agent.execute()
    agent.finalize()

    # Not implemented the case of phses = ['train'] for ensemble task
    with pytest.raises(ValueError):
        agent_args['ensembletask_args']['phases'] = ['train', 'valid', 'test']
        KerasEnsembleAgent(**agent_args)

    # Load saved model
    for (task_id, subtask_id) in [
        ('step0', 'subtask0'),
        ('step0', 'subtask1'),
        ('step1', 'subtask2'),
    ]:
        task = task_scheduler1.get_subtask(task_id, subtask_id).env
        task._save_weights = False
        task._load_weights = True
        task._phases = ['test']
    agent_args['ensembletask_args']['phases'] = ['test']
    agent_args['connectiontask_args']['save_weights'] = False
    agent_args['connectiontask_args']['load_weights'] = True
    agent_args['saver'] = saver
    agent_args['task_scheduler'] = task_scheduler1
    agent_args['connectiontask_args']['phases'] = ['test']
    agent2 = KerasEnsembleAgent(**agent_args)
    agent2.execute()
    agent2.finalize()

    task_scheduler0.show_info()
    task_scheduler1.show_info()

    for (task_id, subtask_id) in [
        ('step0', 'subtask0'),
        ('step0', 'subtask1'),
        ('step1', 'subtask2'),
        ('step0', 'step0_ensemble'),
        ('connection', 'connectiontask'),
    ]:
        task0 = task_scheduler0.get_subtask(task_id, subtask_id).env
        task1 = task_scheduler1.get_subtask(task_id, subtask_id).env
        y_pred = task0.predict(phase='test')
        y_pred_load = task1.predict(phase='test')
        for y, y_load in zip(y_pred, y_pred_load):
            assert (np.array_equal(y, y_load))
예제 #22
0
def test_keras_module_darts_model():
    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    saver = Saver()
    storegate = build_storegate()

    args_mlptask = {
        # BaseTask
        'saver': saver,
        'storegate': storegate,
        # KerasBaseTask
        'optimizer': 'adam',
        'num_epochs': 2,
        'max_patience': 1,
        'loss': 'mse',
        'run_eagerly': True,
        # MLPTask
        'activation': 'relu',
        'activation_last': 'sigmoid',
    }

    subtask0 = MLPTask(input_var_names=['var0', 'var1'],
                       output_var_names=['output0', 'output1'],
                       true_var_names=['var2', 'var3'],
                       layers=[4, 2],
                       **args_mlptask)
    subtask1 = MLPTask(input_var_names=['var0', 'var1'],
                       output_var_names=['output0', 'output1'],
                       true_var_names=['var2', 'var3'],
                       layers=[4, 2],
                       **args_mlptask)
    subtask2 = MLPTask(input_var_names=['output0', 'output1'],
                       output_var_names=['output2'],
                       true_var_names=['label'],
                       layers=[4, 1],
                       **args_mlptask)
    subtask3 = MLPTask(input_var_names=['output0', 'output1'],
                       output_var_names=['output2'],
                       true_var_names=['label'],
                       layers=[4, 1],
                       **args_mlptask)
    subtask0.execute()
    subtask1.execute()
    subtask2.execute()
    subtask3.execute()

    task0 = EnsembleTask(
        subtasks=[subtask0, subtask1],
        dropout_rate=None,
        saver=saver,
        storegate=storegate,
        phases=['test'],
    )
    task1 = EnsembleTask(
        subtasks=[subtask2, subtask3],
        dropout_rate=None,
        saver=saver,
        storegate=storegate,
        phases=['test'],
    )
    task0.execute()
    task1.execute()

    model = DARTSModel(
        # DARTSModel
        optimizer_alpha='adam',
        optimizer_weight='adam',
        learning_rate_alpha=1e-3,
        learning_rate_weight=1e-3,
        zeta=1e-3,
        # ConnectionModel
        models=[task0.ml.model, task1.ml.model],
        input_var_index=[[0, 1], [-1, -2]],
        output_var_index=[[0, 1], [2]])

    # Directly use objest for optimizer instead of str
    DARTSModel(
        # DARTSModel
        optimizer_alpha=Adam(learning_rate=0.001),
        optimizer_weight=Adam(learning_rate=0.001),
        learning_rate_alpha=-1,  # dummy
        learning_rate_weight=-1,  # dummy
        zeta=1e-3,
        # ConnectionModel
        models=[task0.ml.model, task1.ml.model],
        input_var_index=[[0, 1], [-1, -2]],
        output_var_index=[[0, 1], [2]])

    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        run_eagerly=True,
    )

    input_tensor_train = [
        np.random.normal(size=(10, 1)),
        np.random.normal(size=(10, 1))
    ]
    input_tensor_test = [
        np.random.normal(size=(10, 1)),
        np.random.normal(size=(10, 1))
    ]
    output_tensor_train = [
        np.random.normal(size=(10, 2)),
        np.random.normal(size=(10, 1))
    ]
    output_tensor_test = [
        np.random.normal(size=(10, 2)),
        np.random.normal(size=(10, 1))
    ]

    model._batch_size_train.assign(2)
    model.fit(
        x=input_tensor_train,
        y=output_tensor_train,
        validation_data=(input_tensor_test, output_tensor_test),
        epochs=2,
    )

    alphas = model._get_variable_numpy(model.alpha_vars)
    assert alphas[0].shape == (2, 1)  # alphas of step0
    assert alphas[1].shape == (2, 1)  # alphas of step1

    best_submodel_index = model.get_index_of_best_submodels()
    assert len(best_submodel_index) == 2
예제 #23
0
def test_keras_conv2d():
    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    storegate = build_storegate()

    saver = Saver()

    args_task = {
        # BaseTask
        'saver': saver,
        'storegate': storegate,
        # MLBaseTask
        'phases': None,
        'save_weights': True,
        # KerasBaseTask
        'input_var_names': ['var0'],
        'output_var_names': ['output0'],
        'optimizer': 'adam',
        'num_epochs': 2,
        'max_patience': 1,
        'loss': 'binary_crossentropy',
        'run_eagerly': True,
        # MLPTask
        'true_var_names': ['label'],
        'layers': [4, 1],
        'input_shapes': (1, 3, 3, 1),
        'activation': 'relu',
        'activation_last': 'sigmoid',
        # Conv2DTask
        'conv2d_layers': [
            ('conv2d', {
                'filters': 4,
                'kernel_size': (2, 2)
            }),
            ('maxpooling2d', {
                'pool_size': (2, 2)
            }),
            ('upsampling2d', {
                'size': (2, 2)
            }),
        ],
    }
    from multiml.task.keras import Conv2DTask
    task = Conv2DTask(**args_task)

    task.execute()
    task.finalize()

    # Validate model save/load
    args_task['phases'] = ['test']
    args_task['save_weights'] = False
    args_task['load_weights'] = saver.load_ml(task._name)['model_path']
    task2 = Conv2DTask(**args_task)
    task2.execute()
    task2.finalize()

    y_pred = task.predict(phase='test')
    y_pred_load = task2.predict(phase='test')
    assert (np.array_equal(y_pred, y_pred_load))