Beispiel #1
0
    def __init__(self, model: ECD, config: Dict[str, Any],
                 training_set_metadata: Dict[str, Any]):
        super().__init__()

        model.cpu()
        self.model = model.to_torchscript()

        input_features = {
            feature[NAME]: get_from_registry(feature[TYPE],
                                             input_type_registry)
            for feature in config["input_features"]
        }
        self.preproc_modules = nn.ModuleDict({
            feature_name:
            feature.create_preproc_module(training_set_metadata[feature_name])
            for feature_name, feature in input_features.items()
        })

        output_features = {
            feature[NAME]: get_from_registry(feature[TYPE],
                                             output_type_registry)
            for feature in config["output_features"]
        }
        self.predict_modules = nn.ModuleDict({
            feature_name: feature.prediction_module
            for feature_name, feature in model.output_features.items()
        })
        self.postproc_modules = nn.ModuleDict({
            feature_name:
            feature.create_postproc_module(training_set_metadata[feature_name])
            for feature_name, feature in output_features.items()
        })
Beispiel #2
0
def test_tied_micro_level(input_feature_options):
    # build input feature config
    input_feature_configs = []

    input_feature_configs.append({
        "name": "input_feature_1",
        "type": input_feature_options.feature_type
    })
    if input_feature_options.feature_options is not None:
        input_feature_configs[0].update(input_feature_options.feature_options)

    input_feature_configs.append({
        "name": "input_feature_2",
        "type": input_feature_options.feature_type
    })
    if input_feature_options.feature_options is not None:
        input_feature_configs[1].update(input_feature_options.feature_options)

    # add tied option to the second feature
    if input_feature_options.tie_features:
        input_feature_configs[1]["tied"] = "input_feature_1"

    input_features = ECD.build_inputs(input_feature_configs)

    if input_feature_options.tie_features:
        # should be same encoder
        assert input_features["input_feature_1"].encoder_obj is input_features[
            "input_feature_2"].encoder_obj
    else:
        # no tied parameter, encoders should be different
        assert input_features[
            "input_feature_1"].encoder_obj is not input_features[
                "input_feature_2"].encoder_obj
Beispiel #3
0
    def __init__(self, model: ECD, batch_size=128, horovod=None, debug=False, **kwargs):
        self._batch_size = batch_size
        self._horovod = horovod
        self._debug = debug

        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model = model.to(self.device)
Beispiel #4
0
 def create_model(model_definition):
     # todo: support loading other model types based on definition
     return ECD(
         input_features_def=model_definition['input_features'],
         combiner_def=model_definition['combiner'],
         output_features_def=model_definition['output_features'],
     )
Beispiel #5
0
def tune_learning_rate_fn(
    dataset: RayDataset,
    config: Dict[str, Any],
    data_loader_kwargs: Dict[str, Any] = None,
    executable_kwargs: Dict[str, Any] = None,
    model: ECD = None,  # noqa: F821
    training_set_metadata: Dict[str, Any] = None,
    features: Dict[str, Dict] = None,
    **kwargs,
) -> float:
    # Pin GPU before loading the model to prevent memory leaking onto other devices
    hvd = initialize_horovod()
    try:
        initialize_pytorch(horovod=hvd)

        pipe = dataset.pipeline(shuffle=False, **data_loader_kwargs)
        train_shard = RayDatasetShard(
            pipe,
            features,
            training_set_metadata,
        )

        device = get_torch_device()
        model = model.to(device)

        trainer = RemoteTrainer(model=model, horovod=hvd, **executable_kwargs)
        return trainer.tune_learning_rate(config, train_shard, **kwargs)
    finally:
        torch.cuda.empty_cache()
        hvd.shutdown()
Beispiel #6
0
 def create_model(model_definition, random_seed=default_random_seed):
     # todo: support loading other model types based on definition
     return ECD(
         input_features_def=model_definition['input_features'],
         combiner_def=model_definition['combiner'],
         output_features_def=model_definition['output_features'],
         random_seed=random_seed,
     )
Beispiel #7
0
    def batch_predict(
        self,
        model: ECD,
        dataset: Dataset,
        dataset_name: str = None,
    ):
        with dataset.initialize_batcher(self._batch_size,
                                        should_shuffle=False,
                                        horovod=self._horovod) as batcher:

            progress_bar = None
            if self.is_coordinator():
                progress_bar = tqdm(
                    desc='Prediction' if dataset_name is None else
                    'Prediction {0: <5.5}'.format(dataset_name),
                    total=batcher.steps_per_epoch,
                    file=sys.stdout,
                    disable=is_progressbar_disabled())

            predictions = defaultdict(list)
            while not batcher.last_batch():
                batch = batcher.next_batch()

                inputs = {
                    i_feat.feature_name: batch[i_feat.proc_column]
                    for i_feat in model.input_features.values()
                }

                preds = model.predict_step(inputs)

                # accumulate predictions from batch for each output feature
                for of_name, of_preds in preds.items():
                    for pred_name, pred_values in of_preds.items():
                        if pred_name not in EXCLUE_PRED_SET:
                            key = f'{of_name}_{pred_name}'
                            predictions[key].append(pred_values)

                if self.is_coordinator():
                    progress_bar.update(1)

            if self.is_coordinator():
                progress_bar.close()

        # consolidate predictions from each batch to a single tensor
        for key, pred_value_list in predictions.items():
            predictions[key] = tf.concat(pred_value_list, axis=0).numpy()

        return from_numpy_dataset(predictions)
Beispiel #8
0
    def _predict(self, model: ECD, batch: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]:
        """Predict a batch of data.

        Params:
            model: ECD model
            batch: batch of data

        Returns:
            predictions: dictionary of predictions
        """
        inputs = {
            i_feat.feature_name: torch.from_numpy(batch[i_feat.proc_column]).to(self.device)
            for i_feat in model.input_features.values()
        }

        return model.predict_step(inputs)
def test_number_input_feature(
    number_config: Dict,
) -> None:
    # setup image input feature definition
    number_def = deepcopy(number_config)

    # pickup any other missing parameters
    NumberInputFeature.populate_defaults(number_def)

    # ensure no exceptions raised during build
    input_feature_obj = ECD.build_single_input(number_def, None).to(DEVICE)

    # check one forward pass through input feature
    input_tensor = torch.rand(2, dtype=torch.float32).to(DEVICE)

    encoder_output = input_feature_obj(input_tensor)
    assert encoder_output["encoder_output"].shape == (BATCH_SIZE, *input_feature_obj.output_shape)
def test_category_input_feature(
    category_config: Dict,
    encoder: str,
) -> None:
    # setup image input feature definition
    category_def = deepcopy(category_config)
    category_def["encoder"] = encoder

    # pickup any other missing parameters
    CategoryInputFeature.populate_defaults(category_def)

    # ensure no exceptions raised during build
    input_feature_obj = ECD.build_single_input(category_def, None)

    # check one forward pass through input feature
    input_tensor = torch.randint(0, 3, size=(BATCH_SIZE,), dtype=torch.int32).to(DEVICE)

    encoder_output = input_feature_obj(input_tensor)
    assert encoder_output["encoder_output"].shape == (BATCH_SIZE, *input_feature_obj.output_shape)
Beispiel #11
0
def test_image_input_feature(image_config: Dict, encoder: str, height: int, width: int, num_channels) -> None:
    # setup image input feature definition
    image_def = deepcopy(image_config)
    image_def["encoder"] = encoder
    image_def["height"] = height
    image_def["width"] = width
    image_def["num_channels"] = num_channels

    # pickup any other missing parameters
    ImageInputFeature.populate_defaults(image_def)

    # ensure no exceptions raised during build
    input_feature_obj = ECD.build_single_input(image_def, None)

    # check one forward pass through input feature
    input_tensor = torch.randint(0, 256, size=(BATCH_SIZE, num_channels, height, width), dtype=torch.uint8)

    encoder_output = input_feature_obj(input_tensor)
    assert encoder_output["encoder_output"].shape == (BATCH_SIZE, *input_feature_obj.output_shape)
Beispiel #12
0
def test_set_input_feature(set_config: Dict, ) -> None:
    # setup image input feature definition
    set_def = deepcopy(set_config)

    # pickup any other missing parameters
    SetInputFeature.populate_defaults(set_def)

    # ensure no exceptions raised during build
    input_feature_obj = ECD.build_single_input(set_def, None).to(DEVICE)

    # check one forward pass through input feature
    input_tensor = torch.randint(0,
                                 2,
                                 size=(BATCH_SIZE, len(set_def["vocab"])),
                                 dtype=torch.int64).to(DEVICE)

    encoder_output = input_feature_obj(input_tensor)
    assert encoder_output["encoder_output"].shape == (
        BATCH_SIZE, *input_feature_obj.output_shape)
Beispiel #13
0
 def __init__(self, model: ECD):
     buf = save_weights_to_buffer(model)
     self.cls = type(model)
     self.args = model.get_args()
     self.state = ray.put(buf)
Beispiel #14
0
 def __init__(self, model: ECD, **predictor_kwargs):
     self.batch_size = predictor_kwargs.get("batch_size", 128)
     self.predictor_kwargs = predictor_kwargs
     self.actor_handles = []
     self.model = model.cpu()
Beispiel #15
0
 def __init__(self, model: ECD, **predictor_kwargs):
     self.batch_size = predictor_kwargs.get("batch_size", 128)
     self.predictor_kwargs = predictor_kwargs
     self.actor_handles = []
     self.device = "cuda" if torch.cuda.is_available() else "cpu"
     self.model = model.to(self.device)