Exemple #1
0
    def test_train(self):
        n_jobs = 2
        with Pool(n_jobs) as pool,\
                tempfile.TemporaryDirectory() as tmp_dir:
            layout = Layout(input_size=1000,
                            output_size=nb_classes,
                            output_activation='softmax')
            training = Training(
                objective=Objective('categorical_crossentropy'),
                optimizer=Optimizer(optimizer='Adam'),
                metric=Metric('categorical_accuracy'),
                stopping=EpochStoppingCondition(10),
                batch_size=batch_size)
            experiment_parameters = ExperimentParameters(
                use_default_values=True)
            experiment_parameters.layout_parameter('rows', 1)
            experiment_parameters.layout_parameter('blocks', 1)
            experiment_parameters.layout_parameter('layers', 1)
            experiment = Experiment('test__reuters_experiment',
                                    layout,
                                    training,
                                    batch_iterator,
                                    test_batch_iterator,
                                    CpuEnvironment(n_jobs=1, data_dir=tmp_dir),
                                    parameters=experiment_parameters)

            blueprints = [
                create_random_blueprint(experiment) for _ in range(n_jobs)
            ]
            pool.map(multiprocess_fit, blueprints)
Exemple #2
0
def search_model(experiment_label, steps, batch_size=32):
    """ This is where we put everythin together.
    We get the dataset, build the Training and Experiment objects, and run the experiment.
    The experiments logs are generated in ~/minos/experiment_label
    We use the CpuEnvironment to have the experiment run on the cpu, with 2 parralel processes.
    We could use GpuEnvironment to use GPUs, and specify which GPUs to use, and how many tasks
    per GPU
    """
    batch_iterator, test_batch_iterator, nb_classes = get_reuters_dataset(
        batch_size, max_words)
    layout = build_layout(max_words, nb_classes)
    training = Training(Objective('categorical_crossentropy'),
                        Optimizer(optimizer='Adam'),
                        Metric('categorical_accuracy'),
                        epoch_stopping_condition(), batch_size)
    parameters = custom_experiment_parameters()
    experiment = Experiment(experiment_label,
                            layout,
                            training,
                            batch_iterator,
                            test_batch_iterator,
                            CpuEnvironment(n_jobs=2),
                            parameters=parameters)
    run_ga_search_experiment(experiment,
                             population_size=100,
                             generations=steps,
                             resume=False,
                             log_level='DEBUG')
Exemple #3
0
    def test_mutate_w_custom_definitions(self):

        def custom_activation(x):
            return x

        register_custom_activation('custom_activation', custom_activation)
        register_custom_layer('Dense2', Dense, deepcopy(reference_parameters['layers']['Dense']))

        layout = Layout(
            input_size=100,
            output_size=10,
            output_activation='softmax',
            block=['Dense', 'Dense2'])
        training = Training(
            objective=Objective('categorical_crossentropy'),
            optimizer=None,
            metric=Metric('categorical_accuracy'),
            stopping=EpochStoppingCondition(5),
            batch_size=250)

        experiment_parameters = ExperimentParameters(use_default_values=False)
        experiment_parameters.layout_parameter('blocks', int_param(1, 5))
        experiment_parameters.layout_parameter('layers', int_param(1, 5))
        experiment_parameters.layer_parameter('Dense2.output_dim', int_param(10, 500))
        experiment_parameters.layer_parameter('Dropout.p', float_param(0.1, 0.9))
        experiment_parameters.all_search_parameters(True)
        experiment = Experiment(
            'test',
            layout,
            training,
            batch_iterator=None,
            test_batch_iterator=None,
            environment=None,
            parameters=experiment_parameters)
        check_experiment_parameters(experiment)
        for _ in range(10):
            blueprint = create_random_blueprint(experiment)
            mutant = mutate_blueprint(
                blueprint,
                parameters=experiment.parameters,
                p_mutate_layout=0,
                p_mutate_param=1,
                mutate_in_place=False)
            for row_idx, row in enumerate(mutant.layout.rows):
                for block_idx, block in enumerate(row.blocks):
                    for layer_idx, layer in enumerate(block.layers):
                        original_row = blueprint.layout.rows[row_idx]
                        original_block = original_row.blocks[block_idx]
                        original_layer = original_block.layers[layer_idx]
                        for name, value in layer.parameters.items():
                            self.assertTrue(
                                value != original_layer.parameters[name],
                                'Should have mutated parameter')
Exemple #4
0
    def test_early_stopping_condition_test(self):
        disable_sysout()
        with tempfile.TemporaryDirectory() as tmp_dir:
            batch_size = 50
            min_epoch = 10
            max_epoch = 15
            batch_iterator, test_batch_iterator, nb_classes = get_reuters_dataset(
                batch_size, 1000)
            layout = Layout(input_size=1000,
                            output_size=nb_classes,
                            output_activation='softmax')
            training = Training(
                objective=Objective('categorical_crossentropy'),
                optimizer=Optimizer(optimizer='Adam'),
                metric=Metric('categorical_accuracy'),
                stopping=AccuracyDecreaseStoppingCondition(
                    metric='categorical_accuracy',
                    noprogress_count=2,
                    min_epoch=min_epoch,
                    max_epoch=max_epoch),
                batch_size=batch_size)
            experiment_parameters = ExperimentParameters(
                use_default_values=True)
            experiment_parameters.layout_parameter('rows', 1)
            experiment_parameters.layout_parameter('blocks', 1)
            experiment_parameters.layout_parameter('layers', 1)
            experiment = Experiment('test__reuters_experiment',
                                    layout,
                                    training,
                                    batch_iterator,
                                    test_batch_iterator,
                                    CpuEnvironment(n_jobs=1, data_dir=tmp_dir),
                                    parameters=experiment_parameters)
            _assert_valid_training_parameters(experiment)

            blueprint = create_random_blueprint(experiment)
            trainer = ModelTrainer(batch_iterator, test_batch_iterator)
            model, history, _duration = trainer.train(blueprint,
                                                      cpu_device(),
                                                      save_best_model=False)
            self.assertTrue(
                len(history.epoch) >= min_epoch,
                'Should have trained for at least min epoch')
            self.assertTrue(
                len(history.epoch) <= max_epoch,
                'Should have trained for max epoch')
            self.assertIsNotNone(model, 'should have fit the model')
            score = model.evaluate_generator(
                test_batch_iterator,
                val_samples=test_batch_iterator.sample_count)
            self.assertIsNotNone(score, 'should have evaluated the model')
Exemple #5
0
    def test_save(self):
        disable_sysout()

        def custom_activation(x):
            return x

        register_custom_activation('custom_activation', custom_activation)
        register_custom_layer('custom_layer', CustomLayer,
                              {'output_dim': int_param(1, 100)})

        with tempfile.TemporaryDirectory() as tmp_dir:
            batch_size = 50
            batch_iterator, test_batch_iterator, nb_classes = get_reuters_dataset(
                batch_size, 1000)
            layout = Layout(input_size=1000,
                            output_size=nb_classes,
                            output_activation='softmax')
            training = Training(
                objective=Objective('categorical_crossentropy'),
                optimizer=Optimizer(optimizer='Adam'),
                metric=Metric('categorical_accuracy'),
                stopping=EpochStoppingCondition(10),
                batch_size=batch_size)
            experiment_parameters = ExperimentParameters(
                use_default_values=True)
            experiment_parameters.layout_parameter('rows', 1)
            experiment_parameters.layout_parameter('blocks', 1)
            experiment_parameters.layout_parameter('layers', 1)
            experiment_parameters.layout_parameter('block.layer_type',
                                                   'custom_layer')
            experiment = Experiment('test__reuters_experiment',
                                    layout,
                                    training,
                                    batch_iterator,
                                    test_batch_iterator,
                                    CpuEnvironment(n_jobs=1, data_dir=tmp_dir),
                                    parameters=experiment_parameters)

            blueprint = create_random_blueprint(experiment)
            model = ModelBuilder().build(blueprint, default_device())
            model.fit_generator(
                generator=batch_iterator,
                samples_per_epoch=batch_iterator.samples_per_epoch,
                nb_epoch=10,
                validation_data=test_batch_iterator,
                nb_val_samples=test_batch_iterator.sample_count)
            filepath = join(tmp_dir, 'model')
            model.save(filepath)
            model = load_keras_model(filepath)
            self.assertIsNotNone(model, 'Should have loaded the model')
Exemple #6
0
    def test_build(self):
        layout = Layout(input_size=100,
                        output_size=10,
                        output_activation='softmax')
        training = Training(objective=Objective('categorical_crossentropy'),
                            optimizer=None,
                            metric=Metric('categorical_accuracy'),
                            stopping=EpochStoppingCondition(10),
                            batch_size=250)

        experiment_parameters = ExperimentParameters(use_default_values=False)
        experiment_parameters.layout_parameter('blocks', int_param(1, 5))
        experiment_parameters.layout_parameter('layers', int_param(1, 5))
        experiment_parameters.layer_parameter('Dense.output_dim',
                                              int_param(10, 500))
        experiment_parameters.layer_parameter('Dense.activation',
                                              string_param(['relu', 'tanh']))
        experiment_parameters.layer_parameter('Dropout.p',
                                              float_param(0.1, 0.9))
        experiment_parameters.all_search_parameters(True)
        experiment = Experiment('test',
                                layout,
                                training,
                                batch_iterator=None,
                                test_batch_iterator=None,
                                environment=None,
                                parameters=experiment_parameters)
        check_experiment_parameters(experiment)
        for _ in range(5):
            blueprint1 = create_random_blueprint(experiment)
            model = ModelBuilder().build(blueprint1, cpu_device())
            self.assertIsNotNone(model, 'Should have built a model')
            blueprint2 = create_random_blueprint(experiment)
            model = ModelBuilder().build(blueprint2, cpu_device())
            self.assertIsNotNone(model, 'Should have built a model')
            blueprint3 = mix_blueprints(blueprint1, blueprint2,
                                        experiment_parameters)
            model = ModelBuilder().build(blueprint3, cpu_device())
            self.assertIsNotNone(model, 'Should have built a model')
            blueprint4 = mutate_blueprint(blueprint1,
                                          experiment_parameters,
                                          mutate_in_place=False)
            model = ModelBuilder().build(blueprint4, cpu_device())
            self.assertIsNotNone(model, 'Should have built a model')
Exemple #7
0
def create_experiment(input_size, output_size, batch_size):
    training = Training(objective=Objective('categorical_crossentropy'),
                        optimizer=Optimizer(optimizer='Adam'),
                        metric=Metric('categorical_accuracy'),
                        stopping=AccuracyDecreaseStoppingCondition(
                            metric='categorical_accuracy',
                            min_epoch=5,
                            max_epoch=25,
                            noprogress_count=5),
                        batch_size=batch_size)
    parameters = ExperimentParameters(use_default_values=True)
    layout = Layout(input_size=input_size,
                    output_size=output_size,
                    output_activation='softmax')
    experiment = Experiment(label='reuters_train_multi_gpu',
                            layout=layout,
                            training=training,
                            parameters=parameters)
    return experiment
Exemple #8
0
    def test_train(self):
        disable_sysout()
        with tempfile.TemporaryDirectory() as tmp_dir:
            batch_size = 50
            batch_iterator, test_batch_iterator, nb_classes = get_reuters_dataset(
                batch_size, 1000)
            layout = Layout(input_size=1000,
                            output_size=nb_classes,
                            output_activation='softmax')
            training = Training(
                objective=Objective('categorical_crossentropy'),
                optimizer=Optimizer(optimizer='Adam'),
                metric=Metric('categorical_accuracy'),
                stopping=EpochStoppingCondition(10),
                batch_size=batch_size)
            experiment_parameters = ExperimentParameters(
                use_default_values=True)
            experiment_parameters.layout_parameter('rows', 1)
            experiment_parameters.layout_parameter('blocks', 1)
            experiment_parameters.layout_parameter('layers', 1)
            experiment = Experiment('test__reuters_experiment',
                                    layout,
                                    training,
                                    batch_iterator,
                                    test_batch_iterator,
                                    CpuEnvironment(n_jobs=1, data_dir=tmp_dir),
                                    parameters=experiment_parameters)

            blueprint = create_random_blueprint(experiment)
            model = ModelBuilder().build(blueprint, default_device())
            result = model.fit_generator(
                generator=batch_iterator,
                samples_per_epoch=batch_iterator.samples_per_epoch,
                nb_epoch=10,
                validation_data=test_batch_iterator,
                nb_val_samples=test_batch_iterator.sample_count)
            self.assertIsNotNone(result, 'should have fit the model')
            score = model.evaluate_generator(
                test_batch_iterator,
                val_samples=test_batch_iterator.sample_count)
            self.assertIsNotNone(score, 'should have evaluated the model')
Exemple #9
0
    def test_model_trainer(self):
        disable_sysout()
        with tempfile.TemporaryDirectory() as tmp_dir:
            batch_size = 50
            batch_iterator, test_batch_iterator, nb_classes = get_reuters_dataset(
                batch_size, 1000)
            layout = Layout(input_size=1000,
                            output_size=nb_classes,
                            output_activation='softmax')
            training = Training(
                objective=Objective('categorical_crossentropy'),
                optimizer=Optimizer(optimizer='Adam'),
                metric=Metric('categorical_accuracy'),
                stopping=EpochStoppingCondition(10),
                batch_size=batch_size)
            experiment_parameters = ExperimentParameters(
                use_default_values=True)
            experiment_parameters.layout_parameter('rows', 1)
            experiment_parameters.layout_parameter('blocks', 1)
            experiment_parameters.layout_parameter('layers', 1)
            experiment = Experiment('test__reuters_experiment',
                                    layout,
                                    training,
                                    batch_iterator,
                                    test_batch_iterator,
                                    CpuEnvironment(n_jobs=1, data_dir=tmp_dir),
                                    parameters=experiment_parameters)

            blueprint = create_random_blueprint(experiment)
            trainer = ModelTrainer(batch_iterator, test_batch_iterator)
            model_filename = join(tmp_dir, 'model')
            model, history, _duration = trainer.train(
                blueprint,
                cpu_device(),
                save_best_model=True,
                model_filename=model_filename)
            self.assertIsNotNone(model, 'should have fit the model')
            self.assertTrue(isfile(model_filename),
                            'Should have saved the model')
            self.assertIsNotNone(history, 'Should have the training history')
Exemple #10
0
    def test_build_w_custom_definitions(self):
        def custom_activation(x):
            return x

        register_custom_activation('custom_activation', custom_activation)
        register_custom_layer(
            'Dense2', Dense, deepcopy(reference_parameters['layers']['Dense']),
            True)

        layout = Layout(input_size=100,
                        output_size=10,
                        output_activation='softmax',
                        block=['Dense2'])
        training = Training(objective=Objective('categorical_crossentropy'),
                            optimizer=None,
                            metric=Metric('categorical_accuracy'),
                            stopping=EpochStoppingCondition(5),
                            batch_size=250)

        experiment_parameters = ExperimentParameters(use_default_values=False)
        experiment_parameters.layout_parameter('blocks', int_param(1, 5))
        experiment_parameters.layout_parameter('layers', int_param(1, 5))
        experiment_parameters.layout_parameter('layer.type',
                                               string_param(['Dense2']))
        experiment_parameters.layer_parameter('Dense2.output_dim',
                                              int_param(10, 500))
        experiment_parameters.layer_parameter(
            'Dense2.activation', string_param(['custom_activation']))
        experiment_parameters.layer_parameter('Dropout.p',
                                              float_param(0.1, 0.9))
        experiment_parameters.all_search_parameters(True)
        experiment = Experiment('test',
                                layout,
                                training,
                                batch_iterator=None,
                                test_batch_iterator=None,
                                environment=None,
                                parameters=experiment_parameters)
        check_experiment_parameters(experiment)
        for _ in range(5):
            blueprint1 = create_random_blueprint(experiment)
            for layer in blueprint1.layout.get_layers():
                self.assertEqual('Dense2', layer.layer_type,
                                 'Should have used custom layer')
            model = ModelBuilder().build(blueprint1, cpu_device())
            self.assertIsNotNone(model, 'Should have built a model')
            blueprint2 = create_random_blueprint(experiment)
            for layer in blueprint2.layout.get_layers():
                self.assertEqual('Dense2', layer.layer_type,
                                 'Should have used custom layer')
            model = ModelBuilder().build(blueprint2, cpu_device())
            self.assertIsNotNone(model, 'Should have built a model')
            blueprint3 = mix_blueprints(blueprint1, blueprint2,
                                        experiment_parameters)
            for layer in blueprint3.layout.get_layers():
                self.assertEqual('Dense2', layer.layer_type,
                                 'Should have used custom layer')
            model = ModelBuilder().build(blueprint3, cpu_device())
            self.assertIsNotNone(model, 'Should have built a model')
            blueprint4 = mutate_blueprint(blueprint1,
                                          experiment_parameters,
                                          mutate_in_place=False)
            for layer in blueprint4.layout.get_layers():
                self.assertEqual('Dense2', layer.layer_type,
                                 'Should have used custom layer')
            model = ModelBuilder().build(blueprint4, cpu_device())
            self.assertIsNotNone(model, 'Should have built a model')
Exemple #11
0
    def test_ga_search(self):
        with tempfile.TemporaryDirectory() as tmp_dir:
            epoch = 3
            generations = 2
            batch_size = 50
            batch_iterator, test_batch_iterator, nb_classes = get_reuters_dataset(
                batch_size, 1000)
            layout = Layout(input_size=1000,
                            output_size=nb_classes,
                            output_activation='softmax')
            training = Training(
                objective=Objective('categorical_crossentropy'),
                optimizer=Optimizer(optimizer='Adam'),
                metric=Metric('categorical_accuracy'),
                stopping=EpochStoppingCondition(epoch),
                batch_size=batch_size)
            experiment_parameters = ExperimentParameters(
                use_default_values=False)
            experiment_parameters.layout_parameter('rows', 1)
            experiment_parameters.layout_parameter('blocks', 1)
            experiment_parameters.layout_parameter('layers', 1)
            experiment_parameters.layer_parameter('Dense.output_dim',
                                                  int_param(10, 500))
            experiment_parameters.all_search_parameters(True)

            experiment_label = 'test__reuters_experiment'
            experiment = Experiment(experiment_label,
                                    layout,
                                    training,
                                    batch_iterator,
                                    test_batch_iterator,
                                    CpuEnvironment(n_jobs=2, data_dir=tmp_dir),
                                    parameters=experiment_parameters)
            run_ga_search_experiment(experiment,
                                     population_size=2,
                                     generations=2)
            self.assertTrue(isfile(experiment.get_log_filename()),
                            'Should have logged')
            self.assertTrue(isfile(experiment.get_step_data_filename(0)),
                            'Should have logged')
            self.assertTrue(isfile(experiment.get_step_log_filename(0)),
                            'Should have logged')
            blueprints = load_experiment_blueprints(
                experiment_label, 0, Environment(data_dir=tmp_dir))
            self.assertTrue(
                len(blueprints) > 0, 'Should have saved/loaded blueprints')
            model = ModelBuilder().build(blueprints[0], cpu_device())
            disable_sysout()
            model.fit_generator(
                generator=batch_iterator,
                samples_per_epoch=batch_iterator.samples_per_epoch,
                nb_epoch=5,
                validation_data=test_batch_iterator,
                nb_val_samples=test_batch_iterator.sample_count)
            score = model.evaluate_generator(
                test_batch_iterator,
                val_samples=test_batch_iterator.sample_count)
            self.assertTrue(score[1] > 0, 'Should have valid score')

            step, population = load_experiment_checkpoint(experiment)
            self.assertEqual(generations - 1, step,
                             'Should have loaded checkpoint')
            self.assertIsNotNone(population, 'Should have loaded checkpoint')
            blueprint = load_experiment_best_blueprint(
                experiment.label,
                environment=CpuEnvironment(n_jobs=2, data_dir=tmp_dir))
            model = ModelBuilder().build(blueprint,
                                         cpu_device(),
                                         compile_model=False)
            self.assertIsNotNone(
                model,
                'Should have loaded and built best model from experiment')