Beispiel #1
0
def test_evaluate_on_string_default_args_not_result_per_line(
        mocker: MockFixture):
    # given
    text = 'MyClass\n{'
    result = Mock(spec=EvaluationResult)
    scenarios = {EvaluationScenario('full_token_entropy'): result}

    trained_model_mock = Mock(spec=TrainedModel)

    mocked_metric = Mock(spec=callable,
                         return_value={TokenTypeSubset.full_set(): result})
    mocker.patch('langmodels.evaluation.evaluation._get_metric_by_name',
                 new=lambda x: mocked_metric)
    mocker.patch('langmodels.evaluation.evaluation.get_metrics_name',
                 new=lambda x: 'full_token_entropy')

    # when
    actual = evaluate_model_on_string(trained_model_mock,
                                      text,
                                      result_per_line=False)

    # then
    mocked_metric.assert_called_with(trained_model_mock, text, 'java', False,
                                     None, sys.maxsize)
    assert actual == Evaluation(text, scenarios)
Beispiel #2
0
def test_bin_entropy_simple_args():
    trained_model_mock = Mock(spec=TrainedModel)
    entropies = [1.0, 2.0]
    prep_text = ['My', 'Class</t>']
    types = [SplitContainer, SplitContainer]
    context_lengths = [1, 2]
    trained_model_mock.get_entropies_for_text.return_value = (entropies,
                                                              prep_text, types,
                                                              context_lengths)
    token_set = TokenTypeSubset.Builder().add(SplitContainer).build()

    expected = {
        token_set:
        EvaluationResult(prep_text, list(map(lambda tt: tt.__name__, types)),
                         entropies, 1.5, [(0.0, 0), (1.0, 1), (2.0, 1),
                                          (0.0, 0)])
    }
    actual = bin_entropy(trained_model_mock,
                         'MyClass',
                         extension=any_1,
                         append_eof=False,
                         token_type_subsets={token_set},
                         max_context_allowed=4,
                         full_tokens=False)

    assert actual == expected
Beispiel #3
0
class EvaluationScenario(object):
    metric_name: MetricName
    type_subset: TokenTypeSubset = TokenTypeSubset.full_set()

    def __str__(self):
        return f'{self.metric_name}/{self.type_subset}'

    def __repr__(self):
        return str(self)
Beispiel #4
0
def test_mrr_default_args():
    trained_model_mock = Mock(spec=TrainedModel)
    trained_model_mock.get_predictions_and_feed.side_effect = [[
        ([('a1</t>', 0.), ('b1</t>', 0.)], 'a1</t>', SplitContainer),
        ([('a2</t>', 0.), ('b2</t>', 0.)], 'b2</t>', SplitContainer)
    ]]

    expected = {
        TokenTypeSubset.full_set():
        EvaluationResult(['a1</t>', 'b2</t>'],
                         ['SplitContainer', 'SplitContainer'], [1.0, 0.5],
                         0.75)
    }

    actual = mrr(trained_model_mock,
                 'a1 b2',
                 extension='java',
                 append_eof=False,
                 token_type_subsets={TokenTypeSubset.full_set()})

    assert actual == expected
Beispiel #5
0
def test_bin_entropy_empty():
    trained_model_mock = Mock(spec=TrainedModel)
    trained_model_mock.get_entropies_for_text.return_value = ([], [], [], [])

    expected = {TokenTypeSubset.full_set(): EvaluationResult([], [], [], 0.)}
    actual = bin_entropy(
        trained_model_mock,
        '',
        extension=any_1,
        append_eof=False,
    )

    assert expected == actual
Beispiel #6
0
def test_evaluate_on_string_non_default_token_types_and_metrics_multiline(
        mocker: MockFixture):
    text = 'MyClass\n{'
    token_type_subsets = {
        TokenTypeSubset.full_set(),
        TokenTypeSubset.full_set_without_comments()
    }

    metrics = {'full_token_entropy', 'mrr'}
    scenarios = {
        EvaluationScenario(metric, token_type_subset)
        for token_type_subset in token_type_subsets for metric in metrics
    }

    trained_model_mock = Mock(spec=TrainedModel)

    evaluation_mocks = [Mock(spec=Evaluation)] * 2
    mocked_evaluate_on_line = Mock(spec=callable)
    mocked_evaluate_on_line.side_effect = evaluation_mocks
    mocker.patch('langmodels.evaluation.evaluation._evaluate_model_on_line',
                 new=mocked_evaluate_on_line)

    actual = evaluate_model_on_string(trained_model_mock,
                                      text,
                                      'java',
                                      metrics,
                                      token_type_subsets,
                                      result_per_line=True,
                                      append_eof=True)

    mocked_evaluate_on_line.assert_has_calls([
        mock.call(trained_model_mock, 'MyClass', 'java', metrics,
                  token_type_subsets, False, sys.maxsize),
        mock.call(trained_model_mock, '{', 'java', metrics, token_type_subsets,
                  True, sys.maxsize)
    ])

    assert actual == evaluation_mocks
Beispiel #7
0
def bin_entropy(model: TrainedModel, line: str, extension: str, append_eof: bool,
                token_type_subsets: Optional[Set[TokenTypeSubset]] = None, max_context_allowed: int = sys.maxsize,
                full_tokens: bool = True) \
        -> Dict[TokenTypeSubset, EvaluationResult]:
    """
    Changes the state of the model!
    """
    token_type_subsets = token_type_subsets or {TokenTypeSubset.full_set()}

    all_entropies, tokens, all_token_types, context_lengths = model.get_entropies_for_text(
        line,
        extension,
        full_tokens=full_tokens,
        append_eof=append_eof,
        max_context_allowed=max_context_allowed)
    evaluation_results: Dict[TokenTypeSubset, EvaluationResult] = {}
    for token_type_subset in token_type_subsets:
        res = []
        sum = 0.0
        count = 0
        for entropy, token_type in zip(all_entropies, all_token_types):
            if token_type_subset.contains(token_type):
                res.append(entropy)
                sum += entropy
                count += 1
            else:
                res.append(None)
        if max_context_allowed < 1000:
            of_context_length_cumul = [(0.0, 0)] * max_context_allowed
            for entropy, token_type, context_length in zip(
                    all_entropies, all_token_types, context_lengths):
                if token_type_subset.contains(token_type):
                    if context_length is not None:
                        of_context_length_cumul[context_length] = (
                            of_context_length_cumul[context_length][0] +
                            entropy,
                            of_context_length_cumul[context_length][1] + 1)
            of_context_length = [(val / n if n != 0 else 0.0, n)
                                 for (val, n) in of_context_length_cumul]
        else:
            of_context_length = None
        evaluation_results[token_type_subset] = EvaluationResult(
            tokens, list(map(lambda tt: tt.__name__, all_token_types)), res,
            sum / count if count else 0., of_context_length)
    return evaluation_results
Beispiel #8
0
def mrr(model: TrainedModel, line: str, extension: str, append_eof: bool,
        token_type_subsets: Optional[Set[TokenTypeSubset]] = None) \
        -> Dict[TokenTypeSubset, EvaluationResult]:
    """
    Changes the state of the model!
    """
    token_type_subsets = token_type_subsets or {TokenTypeSubset.full_set()}

    evaluation_results: Dict[TokenTypeSubset, EvaluationResult] = {}
    for token_type_subsets in token_type_subsets:
        inverse_rank_sum = .0
        count = 0
        inverse_ranks: List[Optional[float]] = []
        all_tokens: List[str] = []
        all_token_types: List[str] = []

        for predictions, prep_token, token_type in \
                model.get_predictions_and_feed(line, extension,
                                               n_suggestions=DEFAULT_N_MODEL_SUGGESTIONS,
                                               append_eof=append_eof):
            all_tokens.append(prep_token)
            all_token_types.append(token_type.__name__)
            predicted_tokens = list(map(lambda p: p[0], predictions))
            if token_type_subsets.contains(token_type):
                try:
                    rank = predicted_tokens.index(prep_token) + 1
                    inverse_rank = 1. / rank
                except ValueError:  # actual token is not in prediction list
                    inverse_rank = 0.
                inverse_rank_sum += inverse_rank
                inverse_ranks.append(inverse_rank)
                count += 1
            else:
                inverse_ranks.append(None)
        evaluation_results[token_type_subsets] = EvaluationResult(
            all_tokens, all_token_types, inverse_ranks,
            inverse_rank_sum / count if count else 1.)

    return evaluation_results
Beispiel #9
0
def test_evaluate_on_string_default_args(mocker: MockFixture):
    text = 'MyClass'
    prep_line = ['My', 'Class</t>']
    types = [SplitContainer, SplitContainer]
    result = EvaluationResult(prep_line, list(map(lambda t: t.__name__,
                                                  types)), [1.0, 2.0], 3.0)
    scenarios = {EvaluationScenario('full_token_entropy'): result}

    trained_model_mock = Mock(spec=TrainedModel)
    trained_model_mock.get_entropies_for_text.return_value = ([1.0, 2.0],
                                                              prep_line, types)

    mocked_metric = Mock(spec=callable,
                         return_value={TokenTypeSubset.full_set(): result})
    mocker.patch('langmodels.evaluation.evaluation._get_metric_by_name',
                 new=lambda x: mocked_metric)
    mocker.patch('langmodels.evaluation.evaluation.get_metrics_name',
                 new=lambda x: 'full_token_entropy')

    actual = evaluate_model_on_string(trained_model_mock, text)

    mocked_metric.assert_called_with(trained_model_mock, text, 'java', False,
                                     None, sys.maxsize)
    assert actual == [Evaluation(text, scenarios)]
Beispiel #10
0
def test_mrr_default_all_token_types():
    trained_model_mock = Mock(spec=TrainedModel)
    prep_tokens = ['a1</t>', 'b2</t>', '/</t>', '/</t>']
    method_call_result = [
        ([('a1</t>', 0.), ('b1</t>', 0.)], prep_tokens[0], SplitContainer),
        ([('a2</t>', 0.), ('b2</t>', 0.)], prep_tokens[1], SplitContainer),
        ([('a3</t>', 0.), ('b3</t>', 0.)], prep_tokens[2], OneLineComment),
        ([('a4</t>', 0.), ('b4</t>', 0.)], prep_tokens[3], OneLineComment)
    ]
    str_types = [
        'SplitContainer', 'SplitContainer', 'OneLineComment', 'OneLineComment'
    ]

    trained_model_mock.get_predictions_and_feed.side_effect = [
        method_call_result
    ] * 3

    expected = {
        TokenTypeSubset.full_set():
        EvaluationResult(prep_tokens, str_types,
                         [1.0, None, 0.5, None, 0., 0.], 0.375),
        TokenTypeSubset.only_comments():
        EvaluationResult(prep_tokens, str_types,
                         [None, None, None, None, 0., 0.], 0.),
        TokenTypeSubset.full_set_without_comments():
        EvaluationResult(prep_tokens, str_types,
                         [1.0, None, 0.5, None, None, None], 0.75)
    }
    actual = mrr(trained_model_mock,
                 'a1 b2 //',
                 'java',
                 append_eof=False,
                 token_type_subsets={
                     TokenTypeSubset.full_set(),
                     TokenTypeSubset.only_comments(),
                     TokenTypeSubset.full_set_without_comments()
                 })

    assert sorted(actual, key=lambda s: str(s)) == sorted(expected,
                                                          key=lambda s: str(s))
Beispiel #11
0
def test_bin_entropy_with_comment():
    trained_model_mock = Mock(spec=TrainedModel)
    prep_text = ['My', 'Class</t>', '/', '/']
    types = [SplitContainer, SplitContainer, OneLineComment, OneLineComment]
    types_str = list(map(lambda tt: tt.__name__, types))
    trained_model_mock.get_entropies_for_text.return_value = ([
        1.0, 2.0, 3.0, 6.0
    ], prep_text, [
        SplitContainer, SplitContainer, OneLineComment, OneLineComment
    ], [None, None, None, None])

    expected = {
        TokenTypeSubset.full_set():
        EvaluationResult(prep_text, types_str, [1.0, 2.0, 3.0, 6.0], 3.0,
                         [(0.0, 0)]),
        TokenTypeSubset.only_comments():
        EvaluationResult(prep_text, types_str, [None, None, 3.0, 6.0], 4.5,
                         [(0.0, 0)]),
        TokenTypeSubset.full_set_without_comments():
        EvaluationResult(prep_text, types_str, [1.0, 2.0, None, None], 1.5,
                         [(0.0, 0)])
    }

    actual = bin_entropy(trained_model_mock,
                         'MyClass //',
                         extension='java',
                         append_eof=False,
                         token_type_subsets={
                             TokenTypeSubset.full_set(),
                             TokenTypeSubset.only_comments(),
                             TokenTypeSubset.full_set_without_comments()
                         },
                         full_tokens=False,
                         max_context_allowed=1)

    assert actual == expected