예제 #1
0
    def forward(self, preds: Dict[str, torch.Tensor],
                feature_name: str) -> Dict[str, Any]:
        pred_predictions = output_feature_utils.get_output_feature_tensor(
            preds, feature_name, self.predictions_key)
        pred_probabilities = output_feature_utils.get_output_feature_tensor(
            preds, feature_name, self.probabilities_key)

        predictions: List[List[str]] = []
        for sequence in pred_predictions:
            sequence_predictions: List[str] = []
            for i in range(self.max_sequence_length):
                unit_id = int(sequence[i].item())
                if unit_id < len(self.idx2str):
                    unit_prediction = self.idx2str[unit_id]
                else:
                    unit_prediction = self.unknown_symbol
                sequence_predictions.append(unit_prediction)
            predictions.append(sequence_predictions)

        probabilities, _ = torch.max(pred_probabilities, dim=-1)
        probability = torch.sum(torch.log(probabilities), dim=-1)

        return {
            self.predictions_key: predictions,
            self.probabilities_key: probabilities,
            self.probability_key: probability,
        }
예제 #2
0
def test_output_feature_utils():
    tensor_dict = {}
    output_feature_utils.set_output_feature_tensor(tensor_dict, "feature_1",
                                                   "1", torch.Tensor([1]))
    output_feature_utils.set_output_feature_tensor(tensor_dict, "feature_1",
                                                   "10", torch.Tensor([10]))
    output_feature_utils.set_output_feature_tensor(tensor_dict, "feature_2",
                                                   "2", torch.Tensor([2]))
    output_feature_utils.set_output_feature_tensor(tensor_dict, "feature_2",
                                                   "20", torch.Tensor([20]))

    assert list(tensor_dict.keys()) == [
        "feature_1::1", "feature_1::10", "feature_2::2", "feature_2::20"
    ]
    assert output_feature_utils.get_output_feature_tensor(
        tensor_dict, "feature_1", "1") == torch.Tensor([1])
    assert list(
        output_feature_utils.get_single_output_feature_tensors(
            tensor_dict, "feature_1").keys()) == ["1", "10"]
    assert list(
        output_feature_utils.get_single_output_feature_tensors(
            tensor_dict, "feature_3").keys()) == []
    with pytest.raises(Exception):
        output_feature_utils.get_output_feature_tensor(tensor_dict,
                                                       "feature_1", "2")
예제 #3
0
    def forward(self, preds: Dict[str, torch.Tensor],
                feature_name: str) -> Dict[str, Any]:
        predictions = output_feature_utils.get_output_feature_tensor(
            preds, feature_name, self.predictions_key)
        logits = output_feature_utils.get_output_feature_tensor(
            preds, feature_name, self.logits_key)

        return {self.predictions_key: predictions, self.logits_key: logits}
예제 #4
0
    def forward(self, preds: Dict[str, torch.Tensor],
                feature_name: str) -> Dict[str, Any]:
        predictions = output_feature_utils.get_output_feature_tensor(
            preds, feature_name, self.predictions_key)
        probabilities = output_feature_utils.get_output_feature_tensor(
            preds, feature_name, self.probabilities_key)

        inv_preds = [self.idx2str.get(pred, self.unk) for pred in predictions]

        return {
            self.predictions_key: inv_preds,
            self.probabilities_key: probabilities,
        }
예제 #5
0
    def forward(self, inputs: Dict[str, torch.Tensor], feature_name: str) -> Dict[str, torch.Tensor]:
        logits = output_feature_utils.get_output_feature_tensor(inputs, feature_name, self.logits_key)
        probabilities = torch.sigmoid(logits)

        predictions = torch.greater_equal(probabilities, self.threshold)
        predictions = predictions.type(torch.int64)

        return {self.predictions_key: predictions, self.probabilities_key: probabilities, self.logits_key: logits}
예제 #6
0
    def forward(self, inputs: Dict[str, torch.Tensor], feature_name: str) -> Dict[str, torch.Tensor]:
        logits = output_feature_utils.get_output_feature_tensor(inputs, feature_name, self.logits_key)
        predictions = logits

        if self.clip is not None:
            predictions = torch.clamp(logits, self.clip[0], self.clip[1])
            logger.debug(f"  clipped_predictions: {predictions}")

        return {self.predictions_key: predictions, self.logits_key: logits}
예제 #7
0
 def predictions(self, inputs: Dict[str, torch.Tensor], feature_name: str,
                 **kwargs):
     logits = output_feature_utils.get_output_feature_tensor(
         inputs, feature_name, LOGITS)
     probabilities = torch.sigmoid(logits)
     predictions = probabilities >= self.threshold
     return {
         PROBABILITIES: probabilities,
         PREDICTIONS: predictions,
         LOGITS: logits,
     }
예제 #8
0
    def predictions(self, inputs: Dict[str, torch.Tensor], feature_name: str, **kwargs):
        logits = output_feature_utils.get_output_feature_tensor(inputs, feature_name, LOGITS)
        probabilities = torch.softmax(logits, -1)
        predictions = torch.argmax(logits, -1)
        predictions = predictions.long()

        # EXPECTED SHAPE OF RETURNED TENSORS
        # predictions: [batch_size]
        # probabilities: [batch_size, num_classes]
        # logits: [batch_size, num_classes]
        return {PREDICTIONS: predictions, PROBABILITIES: probabilities, LOGITS: logits}
예제 #9
0
    def forward(self, preds: Dict[str, torch.Tensor],
                feature_name: str) -> Dict[str, Any]:
        predictions = output_feature_utils.get_output_feature_tensor(
            preds, feature_name, self.predictions_key)
        probabilities = output_feature_utils.get_output_feature_tensor(
            preds, feature_name, self.probabilities_key)

        if self.bool2str is not None:
            predictions = predictions.to(dtype=torch.int32)
            predictions = [
                self.bool2str.get(pred, self.bool2str[0])
                for pred in predictions
            ]

        probabilities = torch.stack([1 - probabilities, probabilities], dim=-1)

        return {
            self.predictions_key: predictions,
            self.probabilities_key: probabilities,
        }
예제 #10
0
    def predictions(self, inputs, feature_name, **kwargs):
        logits = output_feature_utils.get_output_feature_tensor(
            inputs, feature_name, LOGITS)
        probabilities = torch.sigmoid(logits)

        predictions = torch.greater_equal(probabilities, self.threshold)
        predictions = predictions.type(torch.int64)

        return {
            PREDICTIONS: predictions,
            PROBABILITIES: probabilities,
            LOGITS: logits
        }
예제 #11
0
    def forward(self, preds: Dict[str, torch.Tensor], feature_name: str) -> Dict[str, Any]:
        predictions = output_feature_utils.get_output_feature_tensor(preds, feature_name, self.predictions_key)
        probabilities = output_feature_utils.get_output_feature_tensor(preds, feature_name, self.probabilities_key)

        inv_preds: List[List[str]] = []
        filtered_probs: List[torch.Tensor] = []
        for sample_idx, sample in enumerate(predictions):
            sample_preds: List[str] = []
            pos_sample_idxs: List[int] = []
            pos_class_idxs: List[int] = []
            for class_idx, is_positive in enumerate(sample):
                if is_positive == 1:
                    sample_preds.append(self.idx2str.get(class_idx, self.unk))
                    pos_sample_idxs.append(sample_idx)
                    pos_class_idxs.append(class_idx)
            inv_preds.append(sample_preds)
            filtered_probs.append(probabilities[pos_sample_idxs, pos_class_idxs])

        return {
            self.predictions_key: inv_preds,
            self.probabilities_key: filtered_probs,
        }
예제 #12
0
    def forward(self, inputs: Dict[str, torch.Tensor],
                feature_name: str) -> Dict[str, torch.Tensor]:
        logits = output_feature_utils.get_output_feature_tensor(
            inputs, feature_name, self.logits_key)
        probabilities = torch.softmax(logits, -1)
        predictions = torch.argmax(logits, -1)

        # predictions: [batch_size, sequence_length]
        # probabilities: [batch_size, sequence_length, vocab_size]
        # logits: [batch_size, sequence_length, vocab_size]
        return {
            self.predictions_key: predictions,
            self.probabilities_key: probabilities,
            self.logits_key: logits
        }
예제 #13
0
    def predictions(self, inputs: Dict[str, torch.Tensor], feature_name: str, **kwargs):
        logits = output_feature_utils.get_output_feature_tensor(inputs, feature_name, LOGITS)
        predictions = logits

        if self.clip is not None:
            if isinstance(self.clip, (list, tuple)) and len(self.clip) == 2:
                predictions = torch.clamp(logits, self.clip[0], self.clip[1])

                logger.debug(f"  clipped_predictions: {predictions}")
            else:
                raise ValueError(
                    "The clip parameter of {} is {}. "
                    "It must be a list or a tuple of length 2.".format(self.feature_name, self.clip)
                )

        return {PREDICTIONS: predictions, LOGITS: logits}
예제 #14
0
    def forward(self, inputs: Dict[str, torch.Tensor],
                feature_name: str) -> Dict[str, torch.Tensor]:
        logits = output_feature_utils.get_output_feature_tensor(
            inputs, feature_name, self.logits_key)

        if self.calibration_module is not None:
            probabilities = self.calibration_module(logits)
        else:
            probabilities = torch.sigmoid(logits)

        predictions = probabilities >= self.threshold
        return {
            self.probabilities_key: probabilities,
            self.predictions_key: predictions,
            self.logits_key: logits,
        }
예제 #15
0
    def forward(self, inputs: Dict[str, torch.Tensor],
                feature_name: str) -> Dict[str, torch.Tensor]:
        logits = output_feature_utils.get_output_feature_tensor(
            inputs, feature_name, self.logits_key)
        probabilities = torch.softmax(logits, -1)
        predictions = torch.argmax(logits, -1)
        predictions = predictions.long()

        # EXPECTED SHAPE OF RETURNED TENSORS
        # predictions: [batch_size]
        # probabilities: [batch_size, num_classes]
        # logits: [batch_size, num_classes]
        return {
            self.predictions_key: predictions,
            self.probabilities_key: probabilities,
            self.logits_key: logits
        }
예제 #16
0
 def predictions(self, inputs, feature_name, **kwargs):
     logits = output_feature_utils.get_output_feature_tensor(inputs, feature_name, LOGITS)
예제 #17
0
    def logits(self, inputs, **kwargs):  # hidden
        hidden = inputs[HIDDEN]
        return self.decoder_obj(hidden)

<<<<<<< HEAD
    def predictions(
            self,
            inputs,
            feature_name,
            **kwargs
    ):
<<<<<<< HEAD
        logits = inputs[LOGITS]

=======
        logits = output_feature_utils.get_output_feature_tensor(
            inputs, feature_name, LOGITS)
>>>>>>> upstream/master
=======
    def predictions(self, inputs, feature_name, **kwargs):
        logits = output_feature_utils.get_output_feature_tensor(inputs, feature_name, LOGITS)
>>>>>>> upstream/master
        probabilities = torch.sigmoid(logits)

        predictions = torch.greater_equal(probabilities, self.threshold)
        predictions = predictions.type(torch.int64)

        return {PREDICTIONS: predictions, PROBABILITIES: probabilities, LOGITS: logits}

    def loss_kwargs(self):
        return self.loss
예제 #18
0
 def predictions(self, inputs: Dict[str, torch.Tensor], feature_name: str, **kwargs):
     logits = output_feature_utils.get_output_feature_tensor(inputs, feature_name, LOGITS)
예제 #19
0
def test_torchscript(csv_filename, should_load_model):
    #######
    # Setup
    #######
    with tempfile.TemporaryDirectory() as tmpdir:
        dir_path = tmpdir
        data_csv_path = os.path.join(tmpdir, csv_filename)
        image_dest_folder = os.path.join(tmpdir, "generated_images")
        audio_dest_folder = os.path.join(tmpdir, "generated_audio")

        # Single sequence input, single category output
        input_features = [
            binary_feature(),
            numerical_feature(),
            category_feature(vocab_size=3),
            sequence_feature(vocab_size=3),
            text_feature(vocab_size=3),
            vector_feature(),
            image_feature(image_dest_folder),
            audio_feature(audio_dest_folder),
            timeseries_feature(),
            date_feature(),
            date_feature(),
            h3_feature(),
            set_feature(vocab_size=3),
            bag_feature(vocab_size=3),
        ]

        output_features = [
            category_feature(vocab_size=3),
            binary_feature(),
            numerical_feature(),
            set_feature(vocab_size=3),
            vector_feature()
            # TODO(#1333): Re-enable.
            # sequence_feature(vocab_size=3),
            # text_feature(vocab_size=3),
        ]

        predictions_column_name = "{}_predictions".format(output_features[0]["name"])

        # Generate test data
        data_csv_path = generate_data(input_features, output_features, data_csv_path)

        #############
        # Train model
        #############
        backend = LocalTestBackend()
        config = {"input_features": input_features, "output_features": output_features, "training": {"epochs": 2}}
        ludwig_model = LudwigModel(config, backend=backend)
        ludwig_model.train(
            dataset=data_csv_path,
            skip_save_training_description=True,
            skip_save_training_statistics=True,
            skip_save_model=True,
            skip_save_progress=True,
            skip_save_log=True,
            skip_save_processed_input=True,
        )

        ###################
        # save Ludwig model
        ###################
        ludwigmodel_path = os.path.join(dir_path, "ludwigmodel")
        shutil.rmtree(ludwigmodel_path, ignore_errors=True)
        ludwig_model.save(ludwigmodel_path)

        ###################
        # load Ludwig model
        ###################
        if should_load_model:
            ludwig_model = LudwigModel.load(ludwigmodel_path, backend=backend)

        ##############################
        # collect weight tensors names
        ##############################
        original_predictions_df, _ = ludwig_model.predict(dataset=data_csv_path)
        original_weights = deepcopy(list(ludwig_model.model.parameters()))

        #################
        # save torchscript
        #################
        torchscript_path = os.path.join(dir_path, "torchscript")
        shutil.rmtree(torchscript_path, ignore_errors=True)
        ludwig_model.model.save_torchscript(torchscript_path)

        ###################################################
        # load Ludwig model, obtain predictions and weights
        ###################################################
        ludwig_model = LudwigModel.load(ludwigmodel_path, backend=backend)
        loaded_prediction_df, _ = ludwig_model.predict(dataset=data_csv_path)
        loaded_weights = deepcopy(list(ludwig_model.model.parameters()))

        #####################################################
        # restore torchscript, obtain predictions and weights
        #####################################################
        training_set_metadata_json_fp = os.path.join(ludwigmodel_path, TRAIN_SET_METADATA_FILE_NAME)

        dataset, training_set_metadata = preprocess_for_prediction(
            ludwig_model.config,
            dataset=data_csv_path,
            training_set_metadata=training_set_metadata_json_fp,
            backend=backend,
        )

        restored_model = torch.jit.load(torchscript_path)

        # Check the outputs for one of the features for correctness
        # Here we choose the first output feature (categorical)
        of_name = list(ludwig_model.model.output_features.keys())[0]

        data_to_predict = {
            name: torch.from_numpy(dataset.dataset[feature.proc_column])
            for name, feature in ludwig_model.model.input_features.items()
        }

        # Get predictions from restored torchscript.
        logits = restored_model(data_to_predict)
        restored_predictions = torch.argmax(
            output_feature_utils.get_output_feature_tensor(logits, of_name, "logits"), -1
        )

        restored_predictions = [training_set_metadata[of_name]["idx2str"][idx] for idx in restored_predictions]

        restored_weights = deepcopy(list(restored_model.parameters()))

        #########
        # Cleanup
        #########
        shutil.rmtree(ludwigmodel_path, ignore_errors=True)
        shutil.rmtree(torchscript_path, ignore_errors=True)

        ###############################################
        # Check if weights and predictions are the same
        ###############################################

        # Check to weight values match the original model.
        assert utils.is_all_close(original_weights, loaded_weights)
        assert utils.is_all_close(original_weights, restored_weights)

        # Check that predictions are identical to the original model.
        assert np.all(original_predictions_df[predictions_column_name] == loaded_prediction_df[predictions_column_name])

        assert np.all(original_predictions_df[predictions_column_name] == restored_predictions)
예제 #20
0
    def forward(self, preds: Dict[str, torch.Tensor], feature_name: str) -> Dict[str, Any]:
        predictions = output_feature_utils.get_output_feature_tensor(preds, feature_name, self.predictions_key)

        return {self.predictions_key: self.numeric_transformer.inverse_transform_inference(predictions)}