Example #1
0
    def test_reasonable_predictions(self):
        model = Comparison(**self.default_config(n_epochs=3))

        # fake dataset generation
        animals = ["dog", "cat", "horse", "cow", "pig", "sheep", "goat", "chicken", "guinea pig", "donkey", "turkey", "duck", "camel", "goose", "llama", "rabbit", "fox"]
        numbers = ["one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen"]

        n_per = 50
        similar = []
        different = []
        for dataset in [animals, numbers]:
            for i in range(n_per // 2):
                similar.append([random.choice(dataset), random.choice(dataset)])
        for i in range(n_per):
            different.append([random.choice(animals), random.choice(numbers)])
        
        targets = np.asarray(["similar"] * len(similar) + ["different"] * len(different))
        data = similar + different

        x_tr, x_te, t_tr, t_te = train_test_split(data, targets, test_size=0.3)
        model.finetune(*list_transpose(x_tr), t_tr)

        predictions = model.predict(*list_transpose(x_te))
        accuracy = np.mean([pred == true for pred, true in zip(predictions, t_te)])
        naive_baseline = max(np.mean(targets == "similar"), np.mean(targets == "different"))
        self.assertGreater(accuracy, naive_baseline)
Example #2
0
    def finetune_grid_search(cls,
                             Xs,
                             Y,
                             *,
                             test_size,
                             config=None,
                             eval_fn=None,
                             probs=False,
                             return_all=False):
        """
        Performs grid search over config items defined using "GridSearchable" objects and returns either full results or
        the config object that relates to the best results. The default config contains grid searchable objects for the
        most important parameters to search over.

        :param Xs: Input text. Either [num_samples] or [sequence, num_samples] for single or multi input models respectively.
        :param Y: Targets, A list of targets, [num_samples] that correspond to each sample in Xs.
        :param test_size: Int or float. If an int is given this number of samples is used to validate, if a float is
         given then that fraction of samples is used.
        :param config: A config object, or None to use the default config.
        :param eval_fn: An eval function that takes 2 inputs (prediction, truth) and returns a float, with a max value being desired.
        :param probs: If true, eval_fn is passed probability outputs from predict_proba, otherwise the output of predict is used.
        :param return_all: If True, all results are returned, if False, only the best config is returned.
        :return: default is to return the best config object. If return_all is true, it returns a list of tuples of the
            form [(config, eval_fn output), ... ]

        """
        if isinstance(Xs[0], str):
            Xs = [Xs]
        config = config or get_default_config()
        config.val_size = 0.0
        eval_fn = eval_fn or cls.get_eval_fn()

        trainXs, testXs, trainY, testY = train_test_split(list_transpose(Xs),
                                                          Y,
                                                          test_size=test_size,
                                                          shuffle=True)
        trainXs = list_transpose(trainXs)
        testXs = list_transpose(testXs)
        gs = config.get_grid_searchable()
        ranged_keys = gs.keys()
        ranged_iterators = gs.values()
        grid_gen = itertools.product(*ranged_iterators)
        results = []
        for grid_item in grid_gen:
            config_ = deepcopy(config)
            config_.update(dict(zip(ranged_keys, grid_item)))
            instance = cls(config=config_)
            instance.finetune(*trainXs, Y=trainY)
            if probs:
                res = instance.predict_proba(*testXs)
            else:
                res = instance.predict(*testXs)
            results.append((config_, eval_fn(res, testY)))
            del instance

        if return_all:
            return results
        return max(results, key=lambda x: x[1])[0]
Example #3
0
    def finetune_grid_search(cls, Xs, Y, *, test_size, config=None, eval_fn=None, probs=False, return_all=False):
        """
        Performs grid search over config items defined using "GridSearchable" objects and returns either full results or
        the config object that relates to the best results. The default config contains grid searchable objects for the
        most important parameters to search over.

        :param Xs: Input text. Either [num_samples] or [sequence, num_samples] for single or multi input models respectively.
        :param Y: Targets, A list of targets, [num_samples] that correspond to each sample in Xs.
        :param test_size: Int or float. If an int is given this number of samples is used to validate, if a float is
         given then that fraction of samples is used.
        :param config: A config object, or None to use the default config.
        :param eval_fn: An eval function that takes 2 inputs (prediction, truth) and returns a float, with a max value being desired.
        :param probs: If true, eval_fn is passed probability outputs from predict_proba, otherwise the output of predict is used.
        :param return_all: If True, all results are returned, if False, only the best config is returned.
        :return: default is to return the best config object. If return_all is true, it returns a list of tuples of the
            form [(config, eval_fn output), ... ]

        """
        if isinstance(Xs[0], str):
            Xs = [Xs]
        config = config or get_default_config()
        config.val_size = 0.0
        eval_fn = eval_fn or cls.get_eval_fn()

        trainXs, testXs, trainY, testY = train_test_split(list_transpose(Xs), Y, test_size=test_size, shuffle=True)
        trainXs = list_transpose(trainXs)
        testXs = list_transpose(testXs)
        gs = config.get_grid_searchable()
        ranged_keys = gs.keys()
        ranged_iterators = gs.values()
        grid_gen = itertools.product(*ranged_iterators)
        results = []
        for grid_item in grid_gen:
            config_ = deepcopy(config)
            config_.update(dict(zip(ranged_keys, grid_item)))
            instance = cls(config=config_)
            instance.finetune(*trainXs, Y=trainY)
            if probs:
                res = instance.predict_proba(*testXs)
            else:
                res = instance.predict(*testXs)
            results.append((config_, eval_fn(res, testY)))
            del instance

        if return_all:
            return results
        return max(results, key=lambda x: x[1])[0]
Example #4
0
    def predict_proba(self, questions, answers):
        """
        Produces a probability distribution over classes for each example in X.


        :param question: List or array of text, shape [batch]
        :param answers: List or array of text, shape [batch, n_answers]
        :returns: list of dictionaries.  Each dictionary maps from a class label to its assigned class probability.
        """
        answers = list_transpose(answers)
        raw_probas = self._predict_proba(zip(questions, answers))

        formatted_predictions = []
        for probas, *answers_per_sample in zip(raw_probas, *answers):
            formatted_predictions.append(dict(zip(answers_per_sample, probas)))
        return formatted_predictions
Example #5
0
    def predict(self, question, answers, max_length=None):
        """
        Produces a list of most likely class labels as determined by the fine-tuned model.


        :param question: List or array of text, shape [batch]
        :param answers: List or array of text, shape [n_answers, batch]
        :param max_length: the number of byte-pair encoded tokens to be included in the document representation.
                           Providing more than `max_length` tokens as input will result in truncation.
        :returns: list of class labels.
        """
        answers = list_transpose(answers)
        raw_ids = BaseModel.predict(self,
                                    question,
                                    answers,
                                    max_length=max_length)
        return [ans[i] for ans, i in zip(zip(*answers), raw_ids)]
Example #6
0
    def predict_proba(self, question, answers, max_length=None):
        """
        Produces a probability distribution over classes for each example in X.


        :param question: List or array of text, shape [batch]
        :param answers: List or array of text, shape [n_answers, batch]
        :param max_length: the number of byte-pair encoded tokens to be included in the document representation.
                           Providing more than `max_length` tokens as input will result in truncation.
        :returns: list of dictionaries.  Each dictionary maps from a class label to its assigned class probability.
        """
        answers = list_transpose(answers)
        raw_probas = self._predict_proba(question, answers, max_length)

        formatted_predictions = []
        for probas, *answers_per_sample in zip(raw_probas, *answers):
            formatted_predictions.append(dict(zip(answers_per_sample, probas)))
        return formatted_predictions
Example #7
0
    def finetune(self,
                 question,
                 answers,
                 correct_answer,
                 batch_size=None,
                 fit_lm_only=False):
        """
        :param question: List or array of text, shape [batch]
        :param correct_answer: List or array of correct answers [batch] either in the format of an idx to the correct
                answer or a string of the correct answer.
        :param answers: List or array of text, shape [batch, n_answers], must contain the correct answer for each entry.
        :param batch_size: integer number of examples per batch. When N_GPUS > 1, this number
                           corresponds to the number of training examples provided to each GPU.
        """
        answer_idx = []
        if not len(correct_answer) == len(answers) == len(question):
            raise ValueError(
                "Answers, questions and corrext_answer are not all the same length, {},{},{}"
                .format(len(question), len(correct_answer), len(answers)))
        for correct, others in zip(correct_answer, answers):
            if isinstance(correct, int):
                if 0 > correct > len(others):
                    raise ValueError(
                        "Correct answer is of type int but is invalid with value {} for answers of len {}"
                        .format(correct, len(others)))
                answer_idx.append(correct)
            else:
                try:
                    ans_idx = others.index(correct)
                    answer_idx.append(ans_idx)
                except ValueError:
                    raise ValueError(
                        "Correct answer {} is not contained in possible answers {}"
                        .format(correct, others))

        answers = list_transpose(answers)
        self.num_answers = len(answers)
        arr_encoded = self._text_to_ids(question, answers)
        labels = None if fit_lm_only else answer_idx
        return self._training_loop(arr_encoded,
                                   Y=labels,
                                   batch_size=batch_size)