def test_evaluate_on_string_default_args_not_result_per_line( mocker: MockFixture): # given text = 'MyClass\n{' result = Mock(spec=EvaluationResult) scenarios = {EvaluationScenario('full_token_entropy'): result} trained_model_mock = Mock(spec=TrainedModel) mocked_metric = Mock(spec=callable, return_value={TokenTypeSubset.full_set(): result}) mocker.patch('langmodels.evaluation.evaluation._get_metric_by_name', new=lambda x: mocked_metric) mocker.patch('langmodels.evaluation.evaluation.get_metrics_name', new=lambda x: 'full_token_entropy') # when actual = evaluate_model_on_string(trained_model_mock, text, result_per_line=False) # then mocked_metric.assert_called_with(trained_model_mock, text, 'java', False, None, sys.maxsize) assert actual == Evaluation(text, scenarios)
class EvaluationScenario(object): metric_name: MetricName type_subset: TokenTypeSubset = TokenTypeSubset.full_set() def __str__(self): return f'{self.metric_name}/{self.type_subset}' def __repr__(self): return str(self)
def test_mrr_default_args(): trained_model_mock = Mock(spec=TrainedModel) trained_model_mock.get_predictions_and_feed.side_effect = [[ ([('a1</t>', 0.), ('b1</t>', 0.)], 'a1</t>', SplitContainer), ([('a2</t>', 0.), ('b2</t>', 0.)], 'b2</t>', SplitContainer) ]] expected = { TokenTypeSubset.full_set(): EvaluationResult(['a1</t>', 'b2</t>'], ['SplitContainer', 'SplitContainer'], [1.0, 0.5], 0.75) } actual = mrr(trained_model_mock, 'a1 b2', extension='java', append_eof=False, token_type_subsets={TokenTypeSubset.full_set()}) assert actual == expected
def test_mrr_default_all_token_types(): trained_model_mock = Mock(spec=TrainedModel) prep_tokens = ['a1</t>', 'b2</t>', '/</t>', '/</t>'] method_call_result = [ ([('a1</t>', 0.), ('b1</t>', 0.)], prep_tokens[0], SplitContainer), ([('a2</t>', 0.), ('b2</t>', 0.)], prep_tokens[1], SplitContainer), ([('a3</t>', 0.), ('b3</t>', 0.)], prep_tokens[2], OneLineComment), ([('a4</t>', 0.), ('b4</t>', 0.)], prep_tokens[3], OneLineComment) ] str_types = [ 'SplitContainer', 'SplitContainer', 'OneLineComment', 'OneLineComment' ] trained_model_mock.get_predictions_and_feed.side_effect = [ method_call_result ] * 3 expected = { TokenTypeSubset.full_set(): EvaluationResult(prep_tokens, str_types, [1.0, None, 0.5, None, 0., 0.], 0.375), TokenTypeSubset.only_comments(): EvaluationResult(prep_tokens, str_types, [None, None, None, None, 0., 0.], 0.), TokenTypeSubset.full_set_without_comments(): EvaluationResult(prep_tokens, str_types, [1.0, None, 0.5, None, None, None], 0.75) } actual = mrr(trained_model_mock, 'a1 b2 //', 'java', append_eof=False, token_type_subsets={ TokenTypeSubset.full_set(), TokenTypeSubset.only_comments(), TokenTypeSubset.full_set_without_comments() }) assert sorted(actual, key=lambda s: str(s)) == sorted(expected, key=lambda s: str(s))
def test_bin_entropy_empty(): trained_model_mock = Mock(spec=TrainedModel) trained_model_mock.get_entropies_for_text.return_value = ([], [], [], []) expected = {TokenTypeSubset.full_set(): EvaluationResult([], [], [], 0.)} actual = bin_entropy( trained_model_mock, '', extension=any_1, append_eof=False, ) assert expected == actual
def test_bin_entropy_with_comment(): trained_model_mock = Mock(spec=TrainedModel) prep_text = ['My', 'Class</t>', '/', '/'] types = [SplitContainer, SplitContainer, OneLineComment, OneLineComment] types_str = list(map(lambda tt: tt.__name__, types)) trained_model_mock.get_entropies_for_text.return_value = ([ 1.0, 2.0, 3.0, 6.0 ], prep_text, [ SplitContainer, SplitContainer, OneLineComment, OneLineComment ], [None, None, None, None]) expected = { TokenTypeSubset.full_set(): EvaluationResult(prep_text, types_str, [1.0, 2.0, 3.0, 6.0], 3.0, [(0.0, 0)]), TokenTypeSubset.only_comments(): EvaluationResult(prep_text, types_str, [None, None, 3.0, 6.0], 4.5, [(0.0, 0)]), TokenTypeSubset.full_set_without_comments(): EvaluationResult(prep_text, types_str, [1.0, 2.0, None, None], 1.5, [(0.0, 0)]) } actual = bin_entropy(trained_model_mock, 'MyClass //', extension='java', append_eof=False, token_type_subsets={ TokenTypeSubset.full_set(), TokenTypeSubset.only_comments(), TokenTypeSubset.full_set_without_comments() }, full_tokens=False, max_context_allowed=1) assert actual == expected
def bin_entropy(model: TrainedModel, line: str, extension: str, append_eof: bool, token_type_subsets: Optional[Set[TokenTypeSubset]] = None, max_context_allowed: int = sys.maxsize, full_tokens: bool = True) \ -> Dict[TokenTypeSubset, EvaluationResult]: """ Changes the state of the model! """ token_type_subsets = token_type_subsets or {TokenTypeSubset.full_set()} all_entropies, tokens, all_token_types, context_lengths = model.get_entropies_for_text( line, extension, full_tokens=full_tokens, append_eof=append_eof, max_context_allowed=max_context_allowed) evaluation_results: Dict[TokenTypeSubset, EvaluationResult] = {} for token_type_subset in token_type_subsets: res = [] sum = 0.0 count = 0 for entropy, token_type in zip(all_entropies, all_token_types): if token_type_subset.contains(token_type): res.append(entropy) sum += entropy count += 1 else: res.append(None) if max_context_allowed < 1000: of_context_length_cumul = [(0.0, 0)] * max_context_allowed for entropy, token_type, context_length in zip( all_entropies, all_token_types, context_lengths): if token_type_subset.contains(token_type): if context_length is not None: of_context_length_cumul[context_length] = ( of_context_length_cumul[context_length][0] + entropy, of_context_length_cumul[context_length][1] + 1) of_context_length = [(val / n if n != 0 else 0.0, n) for (val, n) in of_context_length_cumul] else: of_context_length = None evaluation_results[token_type_subset] = EvaluationResult( tokens, list(map(lambda tt: tt.__name__, all_token_types)), res, sum / count if count else 0., of_context_length) return evaluation_results
def mrr(model: TrainedModel, line: str, extension: str, append_eof: bool, token_type_subsets: Optional[Set[TokenTypeSubset]] = None) \ -> Dict[TokenTypeSubset, EvaluationResult]: """ Changes the state of the model! """ token_type_subsets = token_type_subsets or {TokenTypeSubset.full_set()} evaluation_results: Dict[TokenTypeSubset, EvaluationResult] = {} for token_type_subsets in token_type_subsets: inverse_rank_sum = .0 count = 0 inverse_ranks: List[Optional[float]] = [] all_tokens: List[str] = [] all_token_types: List[str] = [] for predictions, prep_token, token_type in \ model.get_predictions_and_feed(line, extension, n_suggestions=DEFAULT_N_MODEL_SUGGESTIONS, append_eof=append_eof): all_tokens.append(prep_token) all_token_types.append(token_type.__name__) predicted_tokens = list(map(lambda p: p[0], predictions)) if token_type_subsets.contains(token_type): try: rank = predicted_tokens.index(prep_token) + 1 inverse_rank = 1. / rank except ValueError: # actual token is not in prediction list inverse_rank = 0. inverse_rank_sum += inverse_rank inverse_ranks.append(inverse_rank) count += 1 else: inverse_ranks.append(None) evaluation_results[token_type_subsets] = EvaluationResult( all_tokens, all_token_types, inverse_ranks, inverse_rank_sum / count if count else 1.) return evaluation_results
def test_evaluate_on_string_non_default_token_types_and_metrics_multiline( mocker: MockFixture): text = 'MyClass\n{' token_type_subsets = { TokenTypeSubset.full_set(), TokenTypeSubset.full_set_without_comments() } metrics = {'full_token_entropy', 'mrr'} scenarios = { EvaluationScenario(metric, token_type_subset) for token_type_subset in token_type_subsets for metric in metrics } trained_model_mock = Mock(spec=TrainedModel) evaluation_mocks = [Mock(spec=Evaluation)] * 2 mocked_evaluate_on_line = Mock(spec=callable) mocked_evaluate_on_line.side_effect = evaluation_mocks mocker.patch('langmodels.evaluation.evaluation._evaluate_model_on_line', new=mocked_evaluate_on_line) actual = evaluate_model_on_string(trained_model_mock, text, 'java', metrics, token_type_subsets, result_per_line=True, append_eof=True) mocked_evaluate_on_line.assert_has_calls([ mock.call(trained_model_mock, 'MyClass', 'java', metrics, token_type_subsets, False, sys.maxsize), mock.call(trained_model_mock, '{', 'java', metrics, token_type_subsets, True, sys.maxsize) ]) assert actual == evaluation_mocks
def test_evaluate_on_string_default_args(mocker: MockFixture): text = 'MyClass' prep_line = ['My', 'Class</t>'] types = [SplitContainer, SplitContainer] result = EvaluationResult(prep_line, list(map(lambda t: t.__name__, types)), [1.0, 2.0], 3.0) scenarios = {EvaluationScenario('full_token_entropy'): result} trained_model_mock = Mock(spec=TrainedModel) trained_model_mock.get_entropies_for_text.return_value = ([1.0, 2.0], prep_line, types) mocked_metric = Mock(spec=callable, return_value={TokenTypeSubset.full_set(): result}) mocker.patch('langmodels.evaluation.evaluation._get_metric_by_name', new=lambda x: mocked_metric) mocker.patch('langmodels.evaluation.evaluation.get_metrics_name', new=lambda x: 'full_token_entropy') actual = evaluate_model_on_string(trained_model_mock, text) mocked_metric.assert_called_with(trained_model_mock, text, 'java', False, None, sys.maxsize) assert actual == [Evaluation(text, scenarios)]