Exemple #1
0
    def setup_class(self):
        self.batch = {
            'x': torch.rand(
                (2, 1, 8, 8)),  # batch size 2, 1 input channel, 8x8 pixels
            'y': torch.LongTensor(
                [0, 1])  # class for those two images (0 and 1 respectively)
        }

        self.data_loader = DataLoader(DummyDataset(),
                                      batch_size=1,
                                      shuffle=False,
                                      num_workers=4)

        self.module = EisenModuleWrapper(Net(),
                                         input_names=['x'],
                                         output_names=['pred'])

        self.optimizer = Adam(self.module.parameters(), 0.001)

        self.loss = EisenModuleWrapper(module=CrossEntropyLoss(),
                                       input_names=['pred', 'y'],
                                       output_names=['loss'])

        self.metric = EisenModuleWrapper(module=CrossEntropyLoss(),
                                         input_names=['pred', 'y'],
                                         output_names=['metric'])

        self.testing_workflow = WorkflowTesting(self.module,
                                                self.data_loader,
                                                [self.metric],
                                                gpu=False)

        assert isinstance(self.testing_workflow, WorkflowTesting)
Exemple #2
0
    def initialize(self, ctx):
        """
        Initializes the fields of the EisenServingHandler object based on the context.

        :param ctx: context of an inference request
        :return: None
        """
        properties = ctx.system_properties

        self.device = torch.device("cuda:" +
                                   str(properties.get("gpu_id")) if torch.cuda.
                                   is_available() else "cpu")

        model_dir = properties.get("model_dir")

        # Model file
        model_pt_path = os.path.join(model_dir, "model.pt")

        # Pre processing chain
        pre_processing_pkl = os.path.join(model_dir, "pre_process_tform.pkl")

        # Post processing chain
        post_processing_pkl = os.path.join(model_dir, "post_process_tform.pkl")

        # unpickle serialized transform chain
        with open(pre_processing_pkl, "rb") as f:
            self.pre_process_tform = dill.load(f)

        with open(post_processing_pkl, "rb") as f:
            self.post_process_tform = dill.load(f)

        # Metadata about the model
        metadata_json = os.path.join(model_dir, "metadata.json")

        self.metadata = json_file_to_dict(metadata_json)

        self.input_name_list = self.metadata['model_input_list']

        self.output_name_list = self.metadata['model_output_list']

        # deserialize pytorch model
        base_model = torch.load(model_pt_path, map_location=self.device)

        self.model = EisenModuleWrapper(base_model, self.input_name_list,
                                        self.output_name_list)

        # put model in eval mode
        self.model.eval()

        logger.debug(
            'Model file {0} loaded successfully'.format(model_pt_path))

        self.initialized = True
Exemple #3
0
class TestWorkflowTesting:
    def setup_class(self):
        self.batch = {
            "x": torch.rand(
                (2, 1, 8, 8)),  # batch size 2, 1 input channel, 8x8 pixels
            "y": torch.LongTensor(
                [0, 1]),  # class for those two images (0 and 1 respectively)
        }

        self.data_loader = DataLoader(DummyDataset(),
                                      batch_size=1,
                                      shuffle=False,
                                      num_workers=4)

        self.module = EisenModuleWrapper(Net(),
                                         input_names=["x"],
                                         output_names=["pred"])

        self.optimizer = Adam(self.module.parameters(), 0.001)

        self.loss = EisenModuleWrapper(module=CrossEntropyLoss(),
                                       input_names=["pred", "y"],
                                       output_names=["loss"])

        self.metric = EisenModuleWrapper(
            module=CrossEntropyLoss(),
            input_names=["pred", "y"],
            output_names=["metric"],
        )

        self.testing_workflow = WorkflowTesting(self.module,
                                                self.data_loader,
                                                [self.metric],
                                                gpu=False)

        assert isinstance(self.testing_workflow, WorkflowTesting)

    def test_call(self):
        output, losses, metrics = self.testing_workflow(self.batch)

        assert isinstance(losses, list)
        assert len(losses) == 0

        assert isinstance(metrics, list)
        assert len(metrics) == 1
        assert isinstance(metrics[0], dict)
        assert isinstance(metrics[0]["metric"], torch.Tensor)

        assert isinstance(output, dict)

        pred = output["pred"]

        assert isinstance(pred, torch.Tensor)

        assert pred.size()[0] == 2
        assert pred.size()[1] == 2

    def test_run(self):
        self.testing_workflow.run()
Exemple #4
0
    def setup_class(self):
        self.batch = {
            "x": torch.rand(
                (2, 1, 8, 8)),  # batch size 2, 1 input channel, 8x8 pixels
            "y": torch.LongTensor(
                [0, 1]),  # class for those two images (0 and 1 respectively)
        }

        self.data_loader = DataLoader(DummyDataset(),
                                      batch_size=1,
                                      shuffle=False,
                                      num_workers=4)

        data_parallel_net = torch.nn.DataParallel(Net())

        self.module = EisenModuleWrapper(data_parallel_net,
                                         input_names=["x"],
                                         output_names=["pred"])

        self.optimizer = Adam(self.module.parameters(), 0.001)

        self.loss = EisenModuleWrapper(module=CrossEntropyLoss(),
                                       input_names=["pred", "y"],
                                       output_names=["loss"])

        self.metric = EisenModuleWrapper(
            module=CrossEntropyLoss(),
            input_names=["pred", "y"],
            output_names=["metric"],
        )

        self.training_workflow = WorkflowTraining(
            self.module,
            self.data_loader,
            [self.loss],
            self.optimizer,
            [self.metric],
            gpu=False,
        )

        assert isinstance(self.training_workflow, WorkflowTraining)
Exemple #5
0
    def setup_class(self):
        self.batch = {
            'x': torch.rand(
                (2, 1, 8, 8)),  # batch size 2, 1 input channel, 8x8 pixels
            'y': torch.LongTensor(
                [0, 1])  # class for those two images (0 and 1 respectively)
        }

        self.module = EisenModuleWrapper(Net(),
                                         input_names=['x'],
                                         output_names=['pred'])

        self.generic_module = GenericWorkflow(self.module, gpu=False)

        assert isinstance(self.generic_module, GenericWorkflow)
Exemple #6
0
class TestWorkflowValidation:
    def setup_class(self):
        self.batch = {
            'x': torch.rand(
                (2, 1, 8, 8)),  # batch size 2, 1 input channel, 8x8 pixels
            'y': torch.LongTensor(
                [0, 1])  # class for those two images (0 and 1 respectively)
        }

        self.data_loader = DataLoader(DummyDataset(),
                                      batch_size=1,
                                      shuffle=False,
                                      num_workers=4)

        self.module = EisenModuleWrapper(Net(),
                                         input_names=['x'],
                                         output_names=['pred'])

        self.optimizer = Adam(self.module.parameters(), 0.001)

        self.loss = EisenModuleWrapper(module=CrossEntropyLoss(),
                                       input_names=['pred', 'y'],
                                       output_names=['loss'])

        self.metric = EisenModuleWrapper(module=CrossEntropyLoss(),
                                         input_names=['pred', 'y'],
                                         output_names=['metric'])

        self.validation_workflow = WorkflowValidation(self.module,
                                                      self.data_loader,
                                                      [self.loss],
                                                      [self.metric],
                                                      gpu=False)

        assert isinstance(self.validation_workflow, WorkflowValidation)

    def test_call(self):
        output, losses, metrics = self.validation_workflow(self.batch)

        assert isinstance(losses, list)
        assert len(losses) == 1
        assert isinstance(losses[0], dict)
        assert isinstance(losses[0]['loss'], torch.Tensor)

        assert isinstance(metrics, list)
        assert len(metrics) == 1
        assert isinstance(metrics[0], dict)
        assert isinstance(metrics[0]['metric'], torch.Tensor)

        assert isinstance(output, dict)

        pred = output['pred']

        assert isinstance(pred, torch.Tensor)

        assert pred.size()[0] == 2
        assert pred.size()[1] == 2

    def test_run(self):
        self.validation_workflow.run()

        assert self.validation_workflow.epoch == 1

        self.validation_workflow.run()

        assert self.validation_workflow.epoch == 2
Exemple #7
0
class EisenServingHandler(object):
    """
    EisenServingHandler is a custom object to handle inference request within TorchServing. It is usually included
    automatically in the MAR.
    """
    def __init__(self):
        self.model = None
        self.device = None
        self.pre_process_tform = None
        self.post_process_tform = None
        self.metadata = None
        self.initialized = False
        self.input_name_list = []
        self.output_name_list = []

    def initialize(self, ctx):
        """
        Initializes the fields of the EisenServingHandler object based on the context.

        :param ctx: context of an inference request
        :return: None
        """
        properties = ctx.system_properties

        self.device = torch.device("cuda:" +
                                   str(properties.get("gpu_id")) if torch.cuda.
                                   is_available() else "cpu")

        model_dir = properties.get("model_dir")

        # Model file
        model_pt_path = os.path.join(model_dir, "model.pt")

        # Pre processing chain
        pre_processing_pkl = os.path.join(model_dir, "pre_process_tform.pkl")

        # Post processing chain
        post_processing_pkl = os.path.join(model_dir, "post_process_tform.pkl")

        # unpickle serialized transform chain
        with open(pre_processing_pkl, "rb") as f:
            self.pre_process_tform = dill.load(f)

        with open(post_processing_pkl, "rb") as f:
            self.post_process_tform = dill.load(f)

        # Metadata about the model
        metadata_json = os.path.join(model_dir, "metadata.json")

        self.metadata = json_file_to_dict(metadata_json)

        self.input_name_list = self.metadata['model_input_list']

        self.output_name_list = self.metadata['model_output_list']

        # deserialize pytorch model
        base_model = torch.load(model_pt_path, map_location=self.device)

        self.model = EisenModuleWrapper(base_model, self.input_name_list,
                                        self.output_name_list)

        # put model in eval mode
        self.model.eval()

        logger.debug(
            'Model file {0} loaded successfully'.format(model_pt_path))

        self.initialized = True

    def get_metadata(self):
        """
        This function returns metadata about the model as JSON

        :return: list
        """
        return [json.dumps(self.metadata)]

    def pre_process(self, data):
        """
        Applies pre-processing transform using de-pickled transform chain in the MAR.

        :param data: dictionary containing a collated batch of data
        :type data: dict

        """
        input_dict = self.pre_process_tform(data)

        return input_dict

    def inference(self, input_dict):
        """
        Performs prediction using the model. Feeds the necessary information to the model starting from the
        received data and creates an output dictionary as a result.

        :param input_dict: input batch, in form of a dictionary of collated datapoints
        :type input_dict: dict

        :return: dict
        """

        for name in self.model.input_names:
            input_dict[name] = torch.Tensor(input_dict[name]).to(self.device)

        output_dict = self.model(**input_dict)

        for name in self.model.output_names:
            output_dict[name] = output_dict[name].data.cpu().numpy()

        return output_dict

    def post_process(self, output_dict):
        """
        Applies post-processing transform using de-pickled transform chain in the MAR.

        :param output_dict: dictionary containing the result of inference on a collated batch of data
        :type output_dict: dict
        """

        prediction = self.post_process_tform(output_dict)

        return prediction

    def handle(self, data):
        """
        Handles one request.

        :param data: dictionary of data
        :type data: dict

        :return: list
        """
        input_data = {}
        for input in self.metadata['inputs']:
            input_data[input['name']] = data[input['name']]

        model_input = self.pre_process(input_data)

        model_out = self.inference(model_input)

        model_out.update(
            model_input
        )  # output dictionary still contains inputs (which may be useful for tforms)

        prediction = self.post_process(model_out)

        output_data = {}
        for output in self.metadata['outputs']:
            output_data[output['name']] = prediction[output['name']]

        buffer = msgpack.packb(prediction,
                               default=encode_data,
                               use_bin_type=True)

        return [buffer]
Exemple #8
0
def eisen_training(configuration, epochs, data_dir, artifacts_dir, resume):
    """
    This function parses a configuration file and creates all the necessary objects and worflow components
    to execute training. Everything will be built according to the configuration file.

    :param configuration: path of configuration file for the job
    :type configuration: Path
    :param epochs: number of epochs requested for training
    :type epochs: int
    :param data_dir: base path for the data of this job
    :type data_dir: Path
    :param artifacts_dir: base path for the artifacts of this job
    :type artifacts_dir: Path
    :param resume: use pre-existing artifacts to resume training
    :type resume: bool

    :return: None
    """

    # Read configuration which is stored in JSON format

    configuration_dictionary = json_file_to_dict(configuration)

    # Getting hyperparameters

    hyperparameters = configuration_dictionary['General'].get(
        'Hyperparameters', {})

    model = configuration_dictionary['General'].get('Models')[0]

    batch_size = 4
    num_workers = 4

    for entry in hyperparameters:
        if entry['type'] == '.BatchSize':
            batch_size = entry['params']['value']

        if entry['type'] == '.NumWorkers':
            num_workers = entry['params']['value']

        if entry['type'] == '.Seed':
            np.random.seed(entry['params']['value'])
            torch.manual_seed(entry['params']['value'])

    # For each phase of the workflow [training, validation, testing]

    workflows = {}

    hooks = []

    for phase in [
            key for key in configuration_dictionary.keys()
            if key in SUPPORTED_PHASES
    ]:
        logging.info(
            'INFO: setting up everything relative to {}'.format(phase))

        # For each subsection in the configuration do appropriate actions
        phase_dictionary = configuration_dictionary[phase]

        # TRANSFORMS
        # instantiate transforms for the data and combine them together

        logging.info('INFO: setting up transforms...')

        readers_list = []

        for reader in phase_dictionary['Readers']:
            logging.debug('DEBUG: setting up readers type {}'.format(
                reader['type']))

            transform_object = import_string(reader['type'])
            readers_list.append(
                transform_object(data_dir=data_dir, **reader['params']))

        reader = Compose(readers_list)

        transform_list = []

        for transform in phase_dictionary['Transforms']:
            logging.debug('DEBUG: setting up transform type {}'.format(
                transform['type']))

            transform_object = import_string(transform['type'])
            transform_list.append(transform_object(**transform['params']))

        transform = Compose(transform_list)

        data_pipeline = Compose([reader, transform])

        # DATA
        # Instantiate data-sets and relative data loaders <torch.utils.DataLoader>

        logging.info('INFO: setting up the dataset...')

        dataset_object = import_string(phase_dictionary['Datasets'][0]['type'])

        dataset = dataset_object(transform=data_pipeline,
                                 data_dir=data_dir,
                                 **phase_dictionary['Datasets'][0]['params'])

        data_loader = DataLoader(dataset,
                                 batch_size=batch_size,
                                 shuffle=True,
                                 num_workers=num_workers)

        # MODEL
        # Instantiate network

        logging.info('INFO: setting up the model...')

        model_object = import_string(model['type'])

        input_names = model['params'].pop('input_names')
        output_names = model['params'].pop('output_names')

        model = EisenModuleWrapper(module=model_object(**model['params']),
                                   input_names=input_names,
                                   output_names=output_names)

        # Instantiate metrics

        logging.info('INFO: setting up the metrics...')

        metrics = []

        for metric in phase_dictionary['Metrics']:
            logging.debug('DEBUG: setting up metric type {}'.format(
                metric['type']))

            metric_object = import_string(metric['type'])

            input_names = metric['params'].pop('input_names')
            output_names = metric['params'].pop('output_names')

            metrics.append(
                EisenModuleWrapper(module=metric_object(**metric['params']),
                                   input_names=input_names,
                                   output_names=output_names))

        losses = None
        optimizer = None

        if phase == 'Training':
            # Instantiate losses

            logging.info('INFO: setting up the losses...')

            losses = []

            for loss in phase_dictionary['Losses']:
                logging.debug('DEBUG: setting up loss type {}'.format(
                    loss['type']))

                loss_object = import_string(loss['type'])

                input_names = loss['params'].pop('input_names')
                output_names = loss['params'].pop('output_names')

                losses.append(
                    EisenModuleWrapper(module=loss_object(**loss['params']),
                                       input_names=input_names,
                                       output_names=output_names))

            # Instantiate optimizer

            logging.info('INFO: setting up the optimizer...')

            optimizer_object = import_string(
                phase_dictionary['Optimizer'][0]['type'])

            optimizer = optimizer_object(
                params=model.parameters(),
                **phase_dictionary['Optimizer'][0]['params'])

        # WORKFLOW
        # Instantiate work-flows

        logging.info('INFO: setting up the metrics...')

        workflow_object = import_string(
            phase_dictionary['Workflow'][0]['type'])

        workflow = workflow_object(
            model=model,
            losses=losses,
            optimizer=optimizer,
            metrics=metrics,
            data_loader=data_loader,
            **phase_dictionary['Workflow'][0]['params'],
        )

        workflows[phase] = workflow

        # HOOKS
        # Instantiate Hooks
        logging.info('INFO: setting up the Hooks...')

        for hook in phase_dictionary['Hooks']:
            logging.debug('DEBUG: setting up hook type {}'.format(
                hook['type']))

            hook_object = import_string(hook['type'])

            hooks.append(
                hook_object(workflows[phase].id, phase, artifacts_dir,
                            **hook['params']))

    # RUN
    # run training for the requested number of epochs

    logging.info('INFO: running workflows...')

    training_context = workflows.get('Training', None)
    validation_context = workflows.get('Validation', None)

    for epoch in range(epochs):
        logging.info('INFO: running TRAINING epoch {}'.format(epoch))
        training_context.run()

        if validation_context:
            logging.info('INFO: running VALIDATION epoch {}'.format(epoch))

            validation_context.run()