コード例 #1
0
class TestWorkflowTesting:
    def setup_class(self):
        self.batch = {
            "x": torch.rand(
                (2, 1, 8, 8)),  # batch size 2, 1 input channel, 8x8 pixels
            "y": torch.LongTensor(
                [0, 1]),  # class for those two images (0 and 1 respectively)
        }

        self.data_loader = DataLoader(DummyDataset(),
                                      batch_size=1,
                                      shuffle=False,
                                      num_workers=4)

        self.module = EisenModuleWrapper(Net(),
                                         input_names=["x"],
                                         output_names=["pred"])

        self.optimizer = Adam(self.module.parameters(), 0.001)

        self.loss = EisenModuleWrapper(module=CrossEntropyLoss(),
                                       input_names=["pred", "y"],
                                       output_names=["loss"])

        self.metric = EisenModuleWrapper(
            module=CrossEntropyLoss(),
            input_names=["pred", "y"],
            output_names=["metric"],
        )

        self.testing_workflow = WorkflowTesting(self.module,
                                                self.data_loader,
                                                [self.metric],
                                                gpu=False)

        assert isinstance(self.testing_workflow, WorkflowTesting)

    def test_call(self):
        output, losses, metrics = self.testing_workflow(self.batch)

        assert isinstance(losses, list)
        assert len(losses) == 0

        assert isinstance(metrics, list)
        assert len(metrics) == 1
        assert isinstance(metrics[0], dict)
        assert isinstance(metrics[0]["metric"], torch.Tensor)

        assert isinstance(output, dict)

        pred = output["pred"]

        assert isinstance(pred, torch.Tensor)

        assert pred.size()[0] == 2
        assert pred.size()[1] == 2

    def test_run(self):
        self.testing_workflow.run()
コード例 #2
0
class TestDataParallelTraining(TestWorkflowTraining):
    def setup_class(self):
        self.batch = {
            "x": torch.rand(
                (2, 1, 8, 8)),  # batch size 2, 1 input channel, 8x8 pixels
            "y": torch.LongTensor(
                [0, 1]),  # class for those two images (0 and 1 respectively)
        }

        self.data_loader = DataLoader(DummyDataset(),
                                      batch_size=1,
                                      shuffle=False,
                                      num_workers=4)

        data_parallel_net = torch.nn.DataParallel(Net())

        self.module = EisenModuleWrapper(data_parallel_net,
                                         input_names=["x"],
                                         output_names=["pred"])

        self.optimizer = Adam(self.module.parameters(), 0.001)

        self.loss = EisenModuleWrapper(module=CrossEntropyLoss(),
                                       input_names=["pred", "y"],
                                       output_names=["loss"])

        self.metric = EisenModuleWrapper(
            module=CrossEntropyLoss(),
            input_names=["pred", "y"],
            output_names=["metric"],
        )

        self.training_workflow = WorkflowTraining(
            self.module,
            self.data_loader,
            [self.loss],
            self.optimizer,
            [self.metric],
            gpu=False,
        )

        assert isinstance(self.training_workflow, WorkflowTraining)
コード例 #3
0
ファイル: workflows_test.py プロジェクト: nick917/eisen-core
class TestWorkflowValidation:
    def setup_class(self):
        self.batch = {
            'x': torch.rand(
                (2, 1, 8, 8)),  # batch size 2, 1 input channel, 8x8 pixels
            'y': torch.LongTensor(
                [0, 1])  # class for those two images (0 and 1 respectively)
        }

        self.data_loader = DataLoader(DummyDataset(),
                                      batch_size=1,
                                      shuffle=False,
                                      num_workers=4)

        self.module = EisenModuleWrapper(Net(),
                                         input_names=['x'],
                                         output_names=['pred'])

        self.optimizer = Adam(self.module.parameters(), 0.001)

        self.loss = EisenModuleWrapper(module=CrossEntropyLoss(),
                                       input_names=['pred', 'y'],
                                       output_names=['loss'])

        self.metric = EisenModuleWrapper(module=CrossEntropyLoss(),
                                         input_names=['pred', 'y'],
                                         output_names=['metric'])

        self.validation_workflow = WorkflowValidation(self.module,
                                                      self.data_loader,
                                                      [self.loss],
                                                      [self.metric],
                                                      gpu=False)

        assert isinstance(self.validation_workflow, WorkflowValidation)

    def test_call(self):
        output, losses, metrics = self.validation_workflow(self.batch)

        assert isinstance(losses, list)
        assert len(losses) == 1
        assert isinstance(losses[0], dict)
        assert isinstance(losses[0]['loss'], torch.Tensor)

        assert isinstance(metrics, list)
        assert len(metrics) == 1
        assert isinstance(metrics[0], dict)
        assert isinstance(metrics[0]['metric'], torch.Tensor)

        assert isinstance(output, dict)

        pred = output['pred']

        assert isinstance(pred, torch.Tensor)

        assert pred.size()[0] == 2
        assert pred.size()[1] == 2

    def test_run(self):
        self.validation_workflow.run()

        assert self.validation_workflow.epoch == 1

        self.validation_workflow.run()

        assert self.validation_workflow.epoch == 2
コード例 #4
0
ファイル: train.py プロジェクト: eisen-ai/eisen-cli
def eisen_training(configuration, epochs, data_dir, artifacts_dir, resume):
    """
    This function parses a configuration file and creates all the necessary objects and worflow components
    to execute training. Everything will be built according to the configuration file.

    :param configuration: path of configuration file for the job
    :type configuration: Path
    :param epochs: number of epochs requested for training
    :type epochs: int
    :param data_dir: base path for the data of this job
    :type data_dir: Path
    :param artifacts_dir: base path for the artifacts of this job
    :type artifacts_dir: Path
    :param resume: use pre-existing artifacts to resume training
    :type resume: bool

    :return: None
    """

    # Read configuration which is stored in JSON format

    configuration_dictionary = json_file_to_dict(configuration)

    # Getting hyperparameters

    hyperparameters = configuration_dictionary['General'].get(
        'Hyperparameters', {})

    model = configuration_dictionary['General'].get('Models')[0]

    batch_size = 4
    num_workers = 4

    for entry in hyperparameters:
        if entry['type'] == '.BatchSize':
            batch_size = entry['params']['value']

        if entry['type'] == '.NumWorkers':
            num_workers = entry['params']['value']

        if entry['type'] == '.Seed':
            np.random.seed(entry['params']['value'])
            torch.manual_seed(entry['params']['value'])

    # For each phase of the workflow [training, validation, testing]

    workflows = {}

    hooks = []

    for phase in [
            key for key in configuration_dictionary.keys()
            if key in SUPPORTED_PHASES
    ]:
        logging.info(
            'INFO: setting up everything relative to {}'.format(phase))

        # For each subsection in the configuration do appropriate actions
        phase_dictionary = configuration_dictionary[phase]

        # TRANSFORMS
        # instantiate transforms for the data and combine them together

        logging.info('INFO: setting up transforms...')

        readers_list = []

        for reader in phase_dictionary['Readers']:
            logging.debug('DEBUG: setting up readers type {}'.format(
                reader['type']))

            transform_object = import_string(reader['type'])
            readers_list.append(
                transform_object(data_dir=data_dir, **reader['params']))

        reader = Compose(readers_list)

        transform_list = []

        for transform in phase_dictionary['Transforms']:
            logging.debug('DEBUG: setting up transform type {}'.format(
                transform['type']))

            transform_object = import_string(transform['type'])
            transform_list.append(transform_object(**transform['params']))

        transform = Compose(transform_list)

        data_pipeline = Compose([reader, transform])

        # DATA
        # Instantiate data-sets and relative data loaders <torch.utils.DataLoader>

        logging.info('INFO: setting up the dataset...')

        dataset_object = import_string(phase_dictionary['Datasets'][0]['type'])

        dataset = dataset_object(transform=data_pipeline,
                                 data_dir=data_dir,
                                 **phase_dictionary['Datasets'][0]['params'])

        data_loader = DataLoader(dataset,
                                 batch_size=batch_size,
                                 shuffle=True,
                                 num_workers=num_workers)

        # MODEL
        # Instantiate network

        logging.info('INFO: setting up the model...')

        model_object = import_string(model['type'])

        input_names = model['params'].pop('input_names')
        output_names = model['params'].pop('output_names')

        model = EisenModuleWrapper(module=model_object(**model['params']),
                                   input_names=input_names,
                                   output_names=output_names)

        # Instantiate metrics

        logging.info('INFO: setting up the metrics...')

        metrics = []

        for metric in phase_dictionary['Metrics']:
            logging.debug('DEBUG: setting up metric type {}'.format(
                metric['type']))

            metric_object = import_string(metric['type'])

            input_names = metric['params'].pop('input_names')
            output_names = metric['params'].pop('output_names')

            metrics.append(
                EisenModuleWrapper(module=metric_object(**metric['params']),
                                   input_names=input_names,
                                   output_names=output_names))

        losses = None
        optimizer = None

        if phase == 'Training':
            # Instantiate losses

            logging.info('INFO: setting up the losses...')

            losses = []

            for loss in phase_dictionary['Losses']:
                logging.debug('DEBUG: setting up loss type {}'.format(
                    loss['type']))

                loss_object = import_string(loss['type'])

                input_names = loss['params'].pop('input_names')
                output_names = loss['params'].pop('output_names')

                losses.append(
                    EisenModuleWrapper(module=loss_object(**loss['params']),
                                       input_names=input_names,
                                       output_names=output_names))

            # Instantiate optimizer

            logging.info('INFO: setting up the optimizer...')

            optimizer_object = import_string(
                phase_dictionary['Optimizer'][0]['type'])

            optimizer = optimizer_object(
                params=model.parameters(),
                **phase_dictionary['Optimizer'][0]['params'])

        # WORKFLOW
        # Instantiate work-flows

        logging.info('INFO: setting up the metrics...')

        workflow_object = import_string(
            phase_dictionary['Workflow'][0]['type'])

        workflow = workflow_object(
            model=model,
            losses=losses,
            optimizer=optimizer,
            metrics=metrics,
            data_loader=data_loader,
            **phase_dictionary['Workflow'][0]['params'],
        )

        workflows[phase] = workflow

        # HOOKS
        # Instantiate Hooks
        logging.info('INFO: setting up the Hooks...')

        for hook in phase_dictionary['Hooks']:
            logging.debug('DEBUG: setting up hook type {}'.format(
                hook['type']))

            hook_object = import_string(hook['type'])

            hooks.append(
                hook_object(workflows[phase].id, phase, artifacts_dir,
                            **hook['params']))

    # RUN
    # run training for the requested number of epochs

    logging.info('INFO: running workflows...')

    training_context = workflows.get('Training', None)
    validation_context = workflows.get('Validation', None)

    for epoch in range(epochs):
        logging.info('INFO: running TRAINING epoch {}'.format(epoch))
        training_context.run()

        if validation_context:
            logging.info('INFO: running VALIDATION epoch {}'.format(epoch))

            validation_context.run()