Beispiel #1
0
def test_mypytorchtask():
    logger.set_level(logger.DEBUG)
    saver = Saver()
    task = MyPytorchTask(saver=saver,
                         model=MyPytorchModel,
                         optimizer='SGD',
                         optimizer_args=dict(lr=0.1),
                         loss='CrossEntropyLoss',
                         metrics=['acc', 'lrs'])
    task.set_hps({'num_epochs': 5})

    task.execute()
    task.finalize()

    assert saver.load_ml(key='mypytorch')['_num_epochs'] == 5
Beispiel #2
0
def test_agent():
    env0 = MyTask()
    env1 = MyTask()
    env2 = MyTask()
    env3 = MyTask()

    hps_dict = {'hp_layer': [5, 10, 15], 'hp_epoch': [128, 256, 512]}
    hps = Hyperparameters(hps_dict)

    task_scheduler = TaskScheduler(['step0', 'step1'])

    task_scheduler.add_subtask('step0', 'task0', env=env0, hps=hps)
    task_scheduler.add_subtask('step0', 'task1', env=env1)

    task_scheduler.add_subtask('step1', 'task2', env=env2, hps=hps)
    task_scheduler.add_subtask('step1', 'task3', env=env3)

    task_scheduler.show_info()
    task_scheduler.get_sorted_task_ids()

    assert task_scheduler.get_sorted_task_ids() == ['step0', 'step1']
    assert task_scheduler.get_subtask_ids('step0') == ['task0', 'task1']
    assert task_scheduler.get_children_task_ids('step0') == ['step1']
    assert task_scheduler.get_parents_task_ids('step1') == ['step0']
    assert task_scheduler.get_subtask('step0', 'task0').env == env0

    storegate = StoreGate(backend='numpy', data_id='test_agent')
    saver = Saver()
    metric = RandomMetric()

    logger.set_level(logger.DEBUG)
    agent = GridSearchAgent(saver=saver,
                            storegate=storegate,
                            task_scheduler=task_scheduler,
                            metric=metric,
                            dump_all_results=True)

    assert agent._storegate is storegate
    assert agent._saver is saver
    assert agent._task_scheduler is task_scheduler
    assert agent._metric is metric

    agent.storegate = storegate
    agent.saver = saver
    agent.task_scheduler = task_scheduler
    agent.metric = metric

    assert agent.storegate is storegate
    assert agent.saver is saver
    assert agent.task_scheduler is task_scheduler
    assert agent.metric is metric

    agent.execute()
    agent.finalize()

    best_result = agent.get_best_result()
    assert best_result.metric_value > 0
Beispiel #3
0
def test_keras_base():
    KerasBaseTask.__abstractmethods__ = set()

    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    saver = Saver()

    task = KerasBaseTask(saver=saver, )
    task.load_metadata()  # Do nothing
Beispiel #4
0
def test_keras_module_connection_model():
    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    saver = Saver()
    storegate = build_storegate()

    args_mlptask = {
        # BaseTask
        'saver': saver,
        'storegate': storegate,
        # KerasBaseTask
        'optimizer': 'adam',
        'num_epochs': 2,
        'max_patience': 1,
        'loss': 'mse',
        'run_eagerly': True,
        # MLPTask
        'activation': 'relu',
        'activation_last': 'sigmoid',
    }

    task0 = MLPTask(input_var_names=['var0', 'var1'],
                    output_var_names=['output0', 'output1'],
                    true_var_names=['var2', 'var3'],
                    layers=[4, 2],
                    **args_mlptask)
    task1 = MLPTask(input_var_names=['output0', 'output1'],
                    output_var_names=['output2'],
                    true_var_names=['var4'],
                    layers=[4, 1],
                    **args_mlptask)
    task2 = MLPTask(input_var_names=['output2'],
                    output_var_names=['output3'],
                    true_var_names=['label'],
                    layers=[4, 1],
                    **args_mlptask)
    task0.execute()
    task1.execute()
    task2.execute()

    model = ConnectionModel(
        models=[task0.ml.model, task1.ml.model, task2.ml.model],
        input_var_index=[[0, 1], [-1, -2], [-3]],
        output_var_index=[[0, 1], [2], [3]])

    trainalbe_variables = model._get_variables()
    assert len(trainalbe_variables) > 0

    input_tensor = np.random.normal(size=(3, 2))
    model(input_tensor)
Beispiel #5
0
def preprocessing(tau4vec_tasks=['MLP', 'conv2D', 'SF', 'zero', 'noise'],
                  higgsId_tasks=['mlp', 'lstm', 'mass', 'zero', 'noise'],
                  truth_intermediate_inputs=True):

    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    load_weights = False

    from multiml.saver import Saver
    saver = Saver()

    # Storegate
    storegate = get_storegate(max_events=100)

    # Task scheduler
    from multiml.task_scheduler import TaskScheduler
    from my_tasks import get_higgsId_subtasks, get_tau4vec_subtasks
    task_scheduler = TaskScheduler()

    subtask1 = get_higgsId_subtasks(saver,
                                    subtask_names=higgsId_tasks,
                                    truth_input=truth_intermediate_inputs,
                                    load_weights=load_weights,
                                    run_eagerly=run_eagerly)
    task_scheduler.add_task(task_id='higgsId',
                            parents=['tau4vec'],
                            children=[],
                            subtasks=subtask1)

    subtask2 = get_tau4vec_subtasks(saver,
                                    subtask_names=tau4vec_tasks,
                                    load_weights=load_weights,
                                    run_eagerly=run_eagerly)
    task_scheduler.add_task(task_id='tau4vec',
                            parents=[],
                            children=['higgsId'],
                            subtasks=subtask2)

    # Metric
    from multiml.agent.metric import AUCMetric
    metric = AUCMetric(pred_var_name='probability',
                       true_var_name='label',
                       phase='test')

    return saver, storegate, task_scheduler, metric
Beispiel #6
0
def test_keras_util():
    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    saver = Saver()

    model = MLPBlock(
        layers=[2, 1],
        batch_norm=True,
    )
    model.compile(optimizer='adam', loss='binary_crossentropy')

    x_train = np.random.normal(size=(100, 2))
    x_valid = np.random.normal(size=(100, 2))
    y_train = np.random.binomial(n=1, p=0.5, size=(100, ))
    y_valid = np.random.binomial(n=1, p=0.5, size=(100, ))
    chpt_path = f"{saver.save_dir}/test_keras_util"
    from multiml.task.keras.keras_util import training_keras_model
    training_keras_model(model=model,
                         num_epochs=2,
                         batch_size=10,
                         max_patience=2,
                         x_train=x_train,
                         y_train=y_train,
                         x_valid=x_valid,
                         y_valid=y_valid,
                         chpt_path=chpt_path,
                         tensorboard_path=f'{saver.save_dir}/test_keras_util')

    from multiml.task.keras.keras_util import get_optimizer
    get_optimizer('adam', dict(learning_rate=None))
    get_optimizer('adam', dict(learning_rate=0.001))
    get_optimizer('sgd', dict(learning_rate=None))
    get_optimizer('sgd', dict(learning_rate=0.1))
    from tensorflow.keras import optimizers
    get_optimizer(optimizers.Adam())

    with pytest.raises(ValueError):
        get_optimizer(None)

    with pytest.raises(NotImplementedError):
        get_optimizer('dummyoptimizer')
def test_agent_basic_grid_scan():

    saver = Saver()
    storegate = StoreGate(backend='numpy', data_id='test_agent')
    task_scheduler = TaskScheduler()
    metric = RandomMetric()

    subtask0 = BaseTask()
    subtask1 = BaseTask()

    task_scheduler.add_task('step0')
    task_scheduler.add_subtask('step0', 'task0', env=subtask0)
    task_scheduler.add_task('step1', parents=['step0'])
    task_scheduler.add_subtask('step1',
                               'task1',
                               env=subtask1,
                               hps=Hyperparameters({'job_id': [0, 1]}))

    for metric_type in ['min', 'max']:
        agent = GridSearchAgent(saver=saver,
                                storegate=storegate,
                                task_scheduler=task_scheduler,
                                metric=metric,
                                metric_type=metric_type,
                                dump_all_results=True)
        agent.execute()
        agent.finalize()

        agent.get_best_result()

    with pytest.raises(NotImplementedError):
        agent = GridSearchAgent(
            saver=saver,
            storegate=storegate,
            task_scheduler=task_scheduler,
            metric=metric,
            metric_type='dummy',
            dump_all_results=True,
        )
        agent.execute()
        agent.finalize()
def test_agent_basic_base():

    saver = Saver()
    storegate = StoreGate(backend='numpy', data_id='test_agent')
    task_scheduler = TaskScheduler()
    metric = RandomMetric()

    agent = BaseAgent()
    agent.storegate = storegate
    agent.saver = saver
    agent.task_scheduler = task_scheduler
    agent.metric = metric

    assert agent.storegate is storegate
    assert agent.saver is saver
    assert agent.task_scheduler is task_scheduler
    assert agent.metric is metric

    agent.execute()
    agent.finalize()

    print(agent)
Beispiel #9
0
def test_agent_basic_simple():

    saver = Saver()
    storegate = StoreGate(backend='numpy', data_id='test_agent')
    task_scheduler = TaskScheduler()
    metric = RandomMetric()

    subtask0 = BaseTask()
    subtask1 = BaseTask()

    task_scheduler.add_task('step0')
    task_scheduler.add_subtask('step0', 'task0', env=subtask0)
    task_scheduler.add_task('step1', parents=['step0'])
    task_scheduler.add_subtask('step1',
                               'task1',
                               env=subtask1,
                               hps=Hyperparameters({'job_id': [0, 1]}))

    agent = RandomSearchAgent(saver=saver,
                              storegate=storegate,
                              task_scheduler=task_scheduler,
                              metric=metric)
    agent.execute()
    agent.finalize()
def preprocessing(save_dir,
                  args,
                  tau4vec_tasks=['MLP', 'conv2D', 'SF', 'zero', 'noise'],
                  higgsId_tasks=['mlp', 'lstm', 'mass', 'zero', 'noise'],
                  truth_intermediate_inputs=True):
    from multiml import logger
    logger.set_level(args.log_level)

    from setup_tensorflow import setup_tensorflow
    setup_tensorflow(args.seed, args.igpu)

    load_weights = args.load_weights

    from multiml.saver import Saver
    saver = Saver(save_dir, serial_id=args.seed)
    saver.add("seed", args.seed)

    # Storegate
    from my_storegate import get_storegate
    storegate = get_storegate(
        data_path=args.data_path,
        max_events=args.max_events,
    )

    # Task scheduler
    from multiml.task_scheduler import TaskScheduler
    from my_tasks import get_higgsId_subtasks, get_tau4vec_subtasks
    task_scheduler = TaskScheduler()

    if args.remove_dummy_models:
        tau4vec_tasks = [
            v for v in tau4vec_tasks if v not in ['zero', 'noise']
        ]
        higgsId_tasks = [
            v for v in higgsId_tasks if v not in ['zero', 'noise']
        ]

    if len(tau4vec_tasks) > 0 and len(higgsId_tasks) > 0:
        subtask1 = get_higgsId_subtasks(saver,
                                        subtask_names=higgsId_tasks,
                                        truth_input=truth_intermediate_inputs,
                                        load_weights=load_weights,
                                        run_eagerly=args.run_eagerly)
        task_scheduler.add_task(task_id='higgsId',
                                parents=['tau4vec'],
                                children=[],
                                subtasks=subtask1)

        subtask2 = get_tau4vec_subtasks(saver,
                                        subtask_names=tau4vec_tasks,
                                        load_weights=load_weights,
                                        run_eagerly=args.run_eagerly)
        task_scheduler.add_task(task_id='tau4vec',
                                parents=[],
                                children=['higgsId'],
                                subtasks=subtask2)

    elif len(higgsId_tasks) > 0:
        subtask = get_higgsId_subtasks(saver,
                                       subtask_names=higgsId_tasks,
                                       load_weights=load_weights,
                                       run_eagerly=args.run_eagerly)
        task_scheduler.add_task(task_id='higgsId', subtasks=subtask)

    elif len(tau4vec_tasks) > 0:
        subtask = get_tau4vec_subtasks(saver,
                                       subtask_names=tau4vec_tasks,
                                       load_weights=load_weights,
                                       run_eagerly=args.run_eagerly)
        task_scheduler.add_task(task_id='tau4vec', subtasks=subtask)

    else:
        raise ValueError("Strange task combination...")

    # Metric
    if len(tau4vec_tasks) > 0 and len(higgsId_tasks) == 0:
        from multiml_htautau.task.metrics import CustomMSEMetric
        from my_tasks import corr_tau_4vec, truth_tau_4vec
        metric = CustomMSEMetric(pred_var_name=corr_tau_4vec,
                                 true_var_name=truth_tau_4vec,
                                 phase='test')
    else:
        from multiml.agent.metric import AUCMetric
        metric = AUCMetric(pred_var_name='probability',
                           true_var_name='label',
                           phase='test')

    return saver, storegate, task_scheduler, metric
Beispiel #11
0
def test_saver():
    saver = Saver(mode='shelve')
    assert saver._serial_id == 0

    saver.open()
    assert saver._state == 'open'

    saver['key0'] = 'value0'
    assert saver.keys('shelve') == ['key0']
    assert saver['key0'] == 'value0'
    assert saver._shelve['key0'] == 'value0'

    # change shelve to dict
    saver.set_mode('dict')

    saver['key1'] = 'value1'
    assert saver.keys('dict') == ['key1']
    assert saver['key1'] == 'value1'
    assert saver._dict['key1'] == 'value1'

    assert 'key0' not in saver._dict
    assert 'key1' not in saver._shelve

    # save dict objects to shelve
    saver.save()

    assert set(saver.keys()) == set(['key0', 'key1'])
    assert 'key0' not in saver._dict
    assert 'key1' not in saver._dict
    assert 'key0' in saver._shelve
    assert 'key1' in saver._shelve

    # move data from shelve to dict
    saver.to_memory('key0')
    assert 'key0' not in saver._shelve
    assert 'key0' in saver._dict

    # move data from dict to shelve
    saver.to_storage('key0')
    assert 'key0' in saver._shelve
    assert 'key0' not in saver._dict

    saver.set_mode('shelve')
    saver.dump_ml(key='ml0', param0='param0', param1='param1')
    assert saver.load_ml(key='ml0')['param0'] == 'param0'

    saver.close()
    assert saver._state == 'close'
Beispiel #12
0
def test_keras_mlp():
    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    storegate = build_storegate()

    saver = Saver()

    args_task = {
        # BaseTask
        'saver': saver,
        'storegate': storegate,
        # MLBaseTask
        'phases': None,
        'save_weights': True,
        # KerasBaseTask
        'input_var_names': ['var0', 'var1'],
        'output_var_names': 'output0',
        'true_var_names': 'label',
        'optimizer': 'adam',
        'num_epochs': 2,
        'max_patience': 1,
        'loss': 'binary_crossentropy',
        'run_eagerly': True,
        # MLPTask
        'layers': [8, 1],
        'activation': 'relu',
        'activation_last': 'sigmoid',
        'batch_norm': True,
    }
    task = MLPTask(**args_task)
    assert task._layers == [8, 1]

    task.set_hps({
        'layers': [4, 1],
        'input_shapes': None,
        'output_var_names': 'output0'
    })
    assert task._layers == [4, 1]
    assert task._input_shapes == [len(['var0', 'var1'])]
    assert task.get_inputs()[0].shape[1] == len(['var0', 'var1'])

    task.execute()
    task.finalize()

    # Validate model save/load
    args_task['phases'] = ['test']
    args_task['save_weights'] = False
    args_task['load_weights'] = saver.load_ml(task._name)['model_path']
    args_task['layers'] = [4, 1]
    task2 = MLPTask(**args_task)

    # Fail due to call models before defining the model
    with pytest.raises(ValueError):
        y_pred_load = task2.predict(phase='test')

    task2.execute()
    task2.finalize()

    y_pred = task.predict(phase='test')
    y_pred_load = task2.predict(phase='test')
    # assert (np.sum(np.square(y_pred - y_pred_load)) < 1e-10)
    assert (np.array_equal(y_pred, y_pred_load))
                    type=str,
                    default='MLP')
args = parser.parse_args()

from multiml import logger
logger.set_level(args.log_level)

from setup_tensorflow import setup_tensorflow
setup_tensorflow(args.seed, args.igpu)

from run_utils import add_suffix
save_dir = add_suffix(save_dir, args)
save_dir += f'_{args.model}'

from multiml.saver import Saver
saver = Saver(save_dir, serial_id=args.seed)
saver.add("seed", args.seed)

# Storegate
from my_storegate import get_storegate
storegate = get_storegate(
    data_path=args.data_path,
    max_events=args.max_events,
)

from multiml.task_scheduler import TaskScheduler
task_scheduler = TaskScheduler()

subtask_args = {
    'saver': saver,
    'output_var_names': ('probability', ),
def test_keras_ensemble():
    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    saver = Saver()
    storegate = build_storegate()

    args_mlptask = {
        # BaseTask
        'saver': saver,
        'storegate': storegate,
        # KerasBaseTask
        'optimizer': 'adam',
        'num_epochs': 2,
        'max_patience': 1,
        'loss': 'mse',
        'run_eagerly': True,
        # MLPTask
        'activation': 'relu',
        'activation_last': 'sigmoid',
    }

    subtask0 = MLPTask(name='subtask0',
                       input_var_names=['var0', 'var1'],
                       output_var_names=['output0'],
                       true_var_names=['label'],
                       layers=[4, 1],
                       **args_mlptask)
    subtask1 = MLPTask(name='subtask1',
                       input_var_names=['var0', 'var1'],
                       output_var_names=['output0'],
                       true_var_names=['label'],
                       layers=[4, 1],
                       **args_mlptask)

    subtask0.execute()
    subtask1.execute()

    task = EnsembleTask(
        subtasks=[subtask0, subtask1],
        dropout_rate=None,
        individual_loss=True,
        individual_loss_weights=1.0,
        saver=saver,
        storegate=storegate,
        save_weights=True,
        # do_training=False,
        phases=["train", "valid", "test"],
    )

    assert task.get_inputs()[0].shape[1] == len(['var0', 'var1'])

    assert task.input_var_names == ['var0', 'var1']
    assert task.output_var_names == ['output0']

    task.execute()
    task.finalize()

    task2 = EnsembleTask(
        name='EnsembleTask2',
        subtasks=[subtask0, subtask1],
        dropout_rate=None,
        individual_loss=True,
        individual_loss_weights=1.0,
        saver=saver,
        storegate=storegate,
        phases=["test"],
        load_weights=saver.load_ml(task.get_unique_id())['model_path'],
    )
    task2.execute()
    task2.finalize()

    y_pred = task.predict(phase='test')
    y_pred_load = task2.predict(phase='test')
    assert (np.array_equal(y_pred, y_pred_load))
def test_agent_keras_connection_grid_scan():
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
    # logger.set_level(logger.DEBUG)

    saver = Saver()
    storegate = build_storegate()

    args_mlptask = {
        # BaseTask
        'saver': saver,
        'storegate': storegate,
        # KerasBaseTask
        'optimizer': 'adam',
        'num_epochs': 2,
        'max_patience': 1,
        'loss': 'mse',
        'run_eagerly': True,
        'save_weights': True,
        # MLPTask
        'activation': 'relu',
        'activation_last': 'sigmoid',
    }

    subtask0 = MLPTask(name='subtask0',
                       input_var_names=['var0', 'var1'],
                       output_var_names=['output0', 'output1'],
                       true_var_names=['var2', 'var3'],
                       layers=[4, 2],
                       batch_norm=True,
                       **args_mlptask)
    subtask1 = MLPTask(name='subtask1',
                       input_var_names=['var0', 'var1'],
                       output_var_names=['output0', 'output1'],
                       true_var_names=['var2', 'var3'],
                       layers=[4, 2],
                       **args_mlptask)
    subtask2 = MLPTask(name='subtask2',
                       input_var_names=['var2', 'var3'],
                       output_var_names=['output2'],
                       true_var_names=['label'],
                       layers=[4, 1],
                       **args_mlptask)
    subtask3 = MLPTask(name='subtask3',
                       input_var_names=['var2', 'var3'],
                       output_var_names=['output2'],
                       true_var_names=['label'],
                       layers=[4, 1],
                       **args_mlptask)

    task_scheduler = TaskScheduler()
    task_scheduler.add_task('step0')
    task_scheduler.add_task('step1', parents=['step0'])
    task_scheduler.add_subtask('step0', 'subtask0', env=subtask0)
    task_scheduler.add_subtask('step0', 'subtask1', env=subtask1)
    task_scheduler.add_subtask('step1', 'subtask2', env=subtask2)
    task_scheduler.add_subtask('step1', 'subtask3', env=subtask3)
    task_scheduler.show_info()

    import copy
    task_scheduler0 = copy.deepcopy(task_scheduler)
    task_scheduler1 = copy.deepcopy(task_scheduler)
    task_scheduler2 = copy.deepcopy(task_scheduler)

    metric = MSEMetric(
        storegate=storegate,
        pred_var_name="output2",
        true_var_name="label",
    )

    agent_args = {
        # BaseAgent
        'saver': saver,
        'storegate': storegate,
        'task_scheduler': task_scheduler0,
        'metric': metric,
        # KerasConnectionSimpleAgent
        'freeze_model_weights': False,
        'do_pretraining': True,
        'connectiontask_name': 'connectiontask',
        'connectiontask_args': {
            'num_epochs': 2,
            'max_patience': 1,
            'batch_size': 1200,
            'save_weights': True,
            'phases': None,
            'use_multi_loss': True,
            'loss_weights': [0.5, 0.5],
            'optimizer': 'adam',
            'optimizer_args': dict(learning_rate=1e-3),
            'variable_mapping': [('var2', 'output0'), ('var3', 'output1')],
            'run_eagerly': True,
        },
        # KerasConnectionGridScanAgent
        'reuse_pretraining': True,
    }

    agent = KerasConnectionGridSearchAgent(**agent_args)
    agent.execute()
    agent.finalize()

    agent.get_best_result()

    # Test reuse_pretraining = False
    agent_args2 = {**agent_args}
    agent_args2['reuse_pretraining'] = False
    agent_args2['saver'] = saver
    agent_args2['task_scheduler'] = task_scheduler1
    agent2 = KerasConnectionGridSearchAgent(**agent_args2)
    agent2.execute()
    agent2.finalize()

    # Load saved model
    for (task_id, subtask_id) in [
        ('step0', 'subtask0'),
        ('step0', 'subtask1'),
        ('step1', 'subtask2'),
        ('step1', 'subtask3'),
    ]:
        # break
        task = task_scheduler2.get_subtask(task_id, subtask_id).env
        task._save_weights = False
        task._load_weights = False
        task._phases = ['test']
    agent_args['connectiontask_args']['save_weights'] = False
    agent_args['connectiontask_args']['load_weights'] = True
    agent_args['saver'] = saver
    agent_args['task_scheduler'] = task_scheduler2
    agent_args['connectiontask_args']['phases'] = ['test']
    agent2 = KerasConnectionGridSearchAgent(**agent_args)
    agent2.execute()
    agent2.finalize()

    task_scheduler0.show_info()
    task_scheduler2.show_info()

    for (task_id, subtask_id) in [
        ('step0', 'subtask0'),
        ('step0', 'subtask1'),
        ('step1', 'subtask2'),
        ('step1', 'subtask3'),
        ('connection', 'connectiontask.0'),
    ]:
        task0 = task_scheduler0.get_subtask(task_id, subtask_id).env
        task1 = task_scheduler2.get_subtask(task_id, subtask_id).env
        y_pred = task0.predict(phase='test')
        y_pred_load = task1.predict(phase='test')
        for y, y_load in zip(y_pred, y_pred_load):
            assert (np.array_equal(y, y_load))
def test_keras_model_connection():
    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    saver = Saver()
    storegate = build_storegate()

    args_mlptask = {
        # BaseTask
        'saver': saver,
        'storegate': storegate,
        # KerasBaseTask
        'optimizer': 'adam',
        'num_epochs': 2,
        'max_patience': 1,
        'loss': 'mse',
        'run_eagerly': True,
        # MLPTask
        'activation': 'relu',
        'activation_last': 'sigmoid',
    }

    subtask0 = MLPTask(name='subtask0',
                       input_var_names=['var0', 'var1'],
                       output_var_names=['output0', 'output1'],
                       true_var_names=['var2', 'var3'],
                       layers=[4, 2],
                       **args_mlptask)
    subtask1 = MLPTask(name='subtask1',
                       input_var_names=['var2', 'var3'],
                       output_var_names=['output2'],
                       true_var_names=['label'],
                       layers=[4, 1],
                       **args_mlptask)

    subtask0.execute()
    subtask1.execute()

    chpt_path = f"{saver.save_dir}/test_keras_model_connection"
    task = ModelConnectionTask(
        # ConnectionModel
        subtasks=[subtask0, subtask1],
        use_multi_loss=True,
        loss_weights=[0.5, 0.5],
        variable_mapping=[('var2', 'output0'), ('var3', 'output1')],
        # KerasBaseTask
        saver=saver,
        storegate=storegate,
        optimizer='adam',
        num_epochs=2,
        max_patience=1,
        loss='binary_crossentropy',
        run_eagerly=True,
        load_weights=False,
        save_weights=chpt_path,
        phases=['train', 'valid', 'test'],
    )

    task.execute()
    task.finalize()

    # Load model weights
    task2 = ModelConnectionTask(
        # ConnectionModel
        subtasks=[subtask0, subtask1],
        use_multi_loss=True,
        loss_weights=[0.5, 0.5],
        variable_mapping=[('var2', 'output0'), ('var3', 'output1')],
        # KerasBaseTask
        saver=saver,
        storegate=storegate,
        optimizer='adam',
        num_epochs=2,
        max_patience=1,
        loss='binary_crossentropy',
        run_eagerly=True,
        save_weights=False,
        load_weights=chpt_path,
        phases=['test'],
    )

    task2.execute()
    task2.finalize()

    y_pred = task.predict(phase='test')
    y_pred_load = task2.predict(phase='test')
    assert (np.array_equal(y_pred[0], y_pred_load[0]))
    assert (np.array_equal(y_pred[1], y_pred_load[1]))

    # Connect as series dag
    task3 = ModelConnectionTask(
        # ConnectionModel
        subtasks=[subtask0, subtask1],
        use_multi_loss=True,
        loss_weights=[0.5, 0.5],
        variable_mapping=[('var2', 'output0'), ('var3', 'output1')],
        # KerasBaseTask
        saver=Saver(),
        storegate=storegate,
        optimizer='adam',
        num_epochs=2,
        max_patience=1,
        loss='binary_crossentropy',
        run_eagerly=True,
        load_weights=False,
        save_weights=chpt_path,
        phases=['train', 'valid', 'test'],
    )
    task3.execute()
    task3.finalize()
def test_keras_conv2d():
    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    storegate = build_storegate()

    saver = Saver()

    args_task = {
        # BaseTask
        'saver': saver,
        'storegate': storegate,
        # MLBaseTask
        'phases': None,
        'save_weights': True,
        # KerasBaseTask
        'input_var_names': ['var0'],
        'output_var_names': ['output0'],
        'optimizer': 'adam',
        'num_epochs': 2,
        'max_patience': 1,
        'loss': 'binary_crossentropy',
        'run_eagerly': True,
        # MLPTask
        'true_var_names': ['label'],
        'layers': [4, 1],
        'input_shapes': (1, 3, 3, 1),
        'activation': 'relu',
        'activation_last': 'sigmoid',
        # Conv2DTask
        'conv2d_layers': [
            ('conv2d', {
                'filters': 4,
                'kernel_size': (2, 2)
            }),
            ('maxpooling2d', {
                'pool_size': (2, 2)
            }),
            ('upsampling2d', {
                'size': (2, 2)
            }),
        ],
    }
    from multiml.task.keras import Conv2DTask
    task = Conv2DTask(**args_task)

    task.execute()
    task.finalize()

    # Validate model save/load
    args_task['phases'] = ['test']
    args_task['save_weights'] = False
    args_task['load_weights'] = saver.load_ml(task._name)['model_path']
    task2 = Conv2DTask(**args_task)
    task2.execute()
    task2.finalize()

    y_pred = task.predict(phase='test')
    y_pred_load = task2.predict(phase='test')
    assert (np.array_equal(y_pred, y_pred_load))
Beispiel #18
0
def test_agent_keras_ensemble():
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    logger.set_level(logger.DEBUG)

    saver = Saver()
    storegate = build_storegate()

    args_mlptask = {
        # BaseTask
        'saver': saver,
        'storegate': storegate,
        # KerasBaseTask
        'optimizer': 'adam',
        'num_epochs': 2,
        'max_patience': 1,
        'loss': 'mse',
        'run_eagerly': True,
        'save_weights': True,
        # MLPTask
        'activation': 'relu',
        'activation_last': 'sigmoid',
    }

    subtask0 = MLPTask(name='subtask0',
                       input_var_names=['var0', 'var1'],
                       output_var_names=['output0', 'output1'],
                       true_var_names=['var2', 'var3'],
                       layers=[4, 2],
                       batch_norm=True,
                       **args_mlptask)
    subtask1 = MLPTask(name='subtask1',
                       input_var_names=['var0', 'var1'],
                       output_var_names=['output0', 'output1'],
                       true_var_names=['var2', 'var3'],
                       layers=[4, 2],
                       **args_mlptask)
    subtask2 = MLPTask(name='subtask2',
                       input_var_names=['var2', 'var3'],
                       output_var_names=['output2'],
                       true_var_names=['label'],
                       layers=[4, 1],
                       **args_mlptask)

    task_scheduler = TaskScheduler()
    task_scheduler.add_task('step0')
    task_scheduler.add_task('step1', parents=['step0'])
    task_scheduler.add_subtask('step0', 'subtask0', env=subtask0)
    task_scheduler.add_subtask('step0', 'subtask1', env=subtask1)
    task_scheduler.add_subtask('step1', 'subtask2', env=subtask2)
    task_scheduler.show_info()

    import copy
    task_scheduler0 = copy.deepcopy(task_scheduler)
    task_scheduler1 = copy.deepcopy(task_scheduler)

    metric = RandomMetric()

    agent_args = {
        # BaseAgent
        'saver': saver,
        'storegate': storegate,
        'task_scheduler': task_scheduler0,
        'metric': metric,
        # KerasConnectionSimpleAgent
        'freeze_model_weights': False,
        'do_pretraining': True,
        'connectiontask_name': 'connectiontask',
        'connectiontask_args': {
            'num_epochs': 2,
            'max_patience': 1,
            'batch_size': 1200,
            'save_weights': True,
            'phases': None,
            'use_multi_loss': True,
            'loss_weights': [0.5, 0.5],
            'optimizer': 'adam',
            'optimizer_args': dict(learning_rate=1e-3),
            'variable_mapping': [('var2', 'output0'), ('var3', 'output1')],
            'run_eagerly': True,
        },
        # KerasEnsembleAgent
        'ensembletask_args': {
            'dropout_rate': None,
            'individual_loss': False,
            'individual_loss_weights': 0.0,
            'phases': ['test'],
            'save_weights': True,
            'run_eagerly': True,
        },
    }

    agent = KerasEnsembleAgent(**agent_args)
    agent.execute()
    agent.finalize()

    # Not implemented the case of phses = ['train'] for ensemble task
    with pytest.raises(ValueError):
        agent_args['ensembletask_args']['phases'] = ['train', 'valid', 'test']
        KerasEnsembleAgent(**agent_args)

    # Load saved model
    for (task_id, subtask_id) in [
        ('step0', 'subtask0'),
        ('step0', 'subtask1'),
        ('step1', 'subtask2'),
    ]:
        task = task_scheduler1.get_subtask(task_id, subtask_id).env
        task._save_weights = False
        task._load_weights = True
        task._phases = ['test']
    agent_args['ensembletask_args']['phases'] = ['test']
    agent_args['connectiontask_args']['save_weights'] = False
    agent_args['connectiontask_args']['load_weights'] = True
    agent_args['saver'] = saver
    agent_args['task_scheduler'] = task_scheduler1
    agent_args['connectiontask_args']['phases'] = ['test']
    agent2 = KerasEnsembleAgent(**agent_args)
    agent2.execute()
    agent2.finalize()

    task_scheduler0.show_info()
    task_scheduler1.show_info()

    for (task_id, subtask_id) in [
        ('step0', 'subtask0'),
        ('step0', 'subtask1'),
        ('step1', 'subtask2'),
        ('step0', 'step0_ensemble'),
        ('connection', 'connectiontask'),
    ]:
        task0 = task_scheduler0.get_subtask(task_id, subtask_id).env
        task1 = task_scheduler1.get_subtask(task_id, subtask_id).env
        y_pred = task0.predict(phase='test')
        y_pred_load = task1.predict(phase='test')
        for y, y_load in zip(y_pred, y_pred_load):
            assert (np.array_equal(y, y_load))
Beispiel #19
0
def test_keras_module_darts_model():
    logger.set_level(logger.DEBUG)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    saver = Saver()
    storegate = build_storegate()

    args_mlptask = {
        # BaseTask
        'saver': saver,
        'storegate': storegate,
        # KerasBaseTask
        'optimizer': 'adam',
        'num_epochs': 2,
        'max_patience': 1,
        'loss': 'mse',
        'run_eagerly': True,
        # MLPTask
        'activation': 'relu',
        'activation_last': 'sigmoid',
    }

    subtask0 = MLPTask(input_var_names=['var0', 'var1'],
                       output_var_names=['output0', 'output1'],
                       true_var_names=['var2', 'var3'],
                       layers=[4, 2],
                       **args_mlptask)
    subtask1 = MLPTask(input_var_names=['var0', 'var1'],
                       output_var_names=['output0', 'output1'],
                       true_var_names=['var2', 'var3'],
                       layers=[4, 2],
                       **args_mlptask)
    subtask2 = MLPTask(input_var_names=['output0', 'output1'],
                       output_var_names=['output2'],
                       true_var_names=['label'],
                       layers=[4, 1],
                       **args_mlptask)
    subtask3 = MLPTask(input_var_names=['output0', 'output1'],
                       output_var_names=['output2'],
                       true_var_names=['label'],
                       layers=[4, 1],
                       **args_mlptask)
    subtask0.execute()
    subtask1.execute()
    subtask2.execute()
    subtask3.execute()

    task0 = EnsembleTask(
        subtasks=[subtask0, subtask1],
        dropout_rate=None,
        saver=saver,
        storegate=storegate,
        phases=['test'],
    )
    task1 = EnsembleTask(
        subtasks=[subtask2, subtask3],
        dropout_rate=None,
        saver=saver,
        storegate=storegate,
        phases=['test'],
    )
    task0.execute()
    task1.execute()

    model = DARTSModel(
        # DARTSModel
        optimizer_alpha='adam',
        optimizer_weight='adam',
        learning_rate_alpha=1e-3,
        learning_rate_weight=1e-3,
        zeta=1e-3,
        # ConnectionModel
        models=[task0.ml.model, task1.ml.model],
        input_var_index=[[0, 1], [-1, -2]],
        output_var_index=[[0, 1], [2]])

    # Directly use objest for optimizer instead of str
    DARTSModel(
        # DARTSModel
        optimizer_alpha=Adam(learning_rate=0.001),
        optimizer_weight=Adam(learning_rate=0.001),
        learning_rate_alpha=-1,  # dummy
        learning_rate_weight=-1,  # dummy
        zeta=1e-3,
        # ConnectionModel
        models=[task0.ml.model, task1.ml.model],
        input_var_index=[[0, 1], [-1, -2]],
        output_var_index=[[0, 1], [2]])

    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        run_eagerly=True,
    )

    input_tensor_train = [
        np.random.normal(size=(10, 1)),
        np.random.normal(size=(10, 1))
    ]
    input_tensor_test = [
        np.random.normal(size=(10, 1)),
        np.random.normal(size=(10, 1))
    ]
    output_tensor_train = [
        np.random.normal(size=(10, 2)),
        np.random.normal(size=(10, 1))
    ]
    output_tensor_test = [
        np.random.normal(size=(10, 2)),
        np.random.normal(size=(10, 1))
    ]

    model._batch_size_train.assign(2)
    model.fit(
        x=input_tensor_train,
        y=output_tensor_train,
        validation_data=(input_tensor_test, output_tensor_test),
        epochs=2,
    )

    alphas = model._get_variable_numpy(model.alpha_vars)
    assert alphas[0].shape == (2, 1)  # alphas of step0
    assert alphas[1].shape == (2, 1)  # alphas of step1

    best_submodel_index = model.get_index_of_best_submodels()
    assert len(best_submodel_index) == 2
def test_agent_keras_connection_simple():
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
    logger.set_level(logger.DEBUG)

    saver = Saver()
    storegate = build_storegate()

    args_mlptask = {
        # BaseTask
        'saver': saver,
        'storegate': storegate,
        # KerasBaseTask
        'optimizer': 'adam',
        'num_epochs': 2,
        'max_patience': 1,
        'loss': 'mse',
        'run_eagerly': True,
        # MLPTask
        'activation': 'relu',
        'activation_last': 'sigmoid',
    }

    subtask0 = MLPTask(name='subtask0',
                       input_var_names=['var0', 'var1'],
                       output_var_names=['output0', 'output1'],
                       true_var_names=['var2', 'var3'],
                       layers=[4, 2],
                       batch_norm=True,
                       **args_mlptask)
    subtask1 = MLPTask(name='subtask1',
                       input_var_names=['var2', 'var3'],
                       output_var_names=['output2'],
                       true_var_names=['label'],
                       layers=[4, 1],
                       **args_mlptask)

    task_scheduler = TaskScheduler()
    task_scheduler.add_task('step0')
    task_scheduler.add_task('step1', parents=['step0'])
    task_scheduler.add_subtask('step0', 'subtask0', env=subtask0)
    task_scheduler.add_subtask('step1', 'subtask1', env=subtask1)
    task_scheduler.show_info()

    import copy
    task_scheduler0 = copy.deepcopy(task_scheduler)
    task_scheduler1 = copy.deepcopy(task_scheduler)

    metric = RandomMetric()

    agent_args = {
        # BaseAgent
        'saver': saver,
        'storegate': storegate,
        'task_scheduler': task_scheduler0,
        'metric': metric,
        # KerasConnectionSimpleAgent
        'freeze_model_weights': False,
        'do_pretraining': False,
        'connectiontask_name': 'connectiontask',
        'connectiontask_args': {
            'num_epochs': 2,
            'max_patience': 1,
            'batch_size': 1200,
            'save_weights': True,
            'phases': None,
            'use_multi_loss': True,
            'loss_weights': [0.5, 0.5],
            'optimizer': 'adam',
            'optimizer_args': dict(learning_rate=1e-3),
            'variable_mapping': [('var2', 'output0'), ('var3', 'output1')],
            'run_eagerly': True,
        },
    }

    agent = KerasConnectionRandomSearchAgent(**agent_args)
    agent.execute()
    agent.finalize()

    # Get the 1st task 1st model
    subtask = agent._task_prod[0][0]

    # Apply pretraining
    agent._execute_subtask(subtask, is_pretraining=True)

    # Set all variables in keras model to trainable
    KerasConnectionRandomSearchAgent._set_trainable_flags(subtask.env._model,
                                                          do_trainable=True)

    # Build connect-model
    agent._build_connected_models(
        subtasks=[agent._task_prod[0][0].env, agent._task_prod[1][0].env],
        job_id='dummy',
        use_task_scheduler=False)

    # Load saved model
    for (task_id, subtask_id) in [
        ('step0', 'subtask0'),
        ('step1', 'subtask1'),
    ]:
        task = task_scheduler1.get_subtask(task_id, subtask_id).env
        task._save_weights = False
        task._load_weights = False
        task._phases = ['test']
    agent_args['connectiontask_args']['save_weights'] = False
    agent_args['connectiontask_args']['load_weights'] = True
    #agent_args['saver'] = Saver()
    agent_args['saver'] = saver
    agent_args['task_scheduler'] = task_scheduler1
    agent_args['connectiontask_args']['phases'] = ['test']
    agent2 = KerasConnectionRandomSearchAgent(**agent_args)
    agent2.execute()
    agent2.finalize()

    task_scheduler0.show_info()
    task_scheduler1.show_info()
    task0 = task_scheduler0.get_subtask('connection', 'connectiontask').env
    task1 = task_scheduler1.get_subtask('connection', 'connectiontask').env
    y_pred = task0.predict(phase='test')
    y_pred_load = task1.predict(phase='test')
    assert (np.array_equal(y_pred[0], y_pred_load[0]))
    assert (np.array_equal(y_pred[1], y_pred_load[1]))