def test_Query_tokenize_from_string(self): rule_text = 'Redistribution and use in source and binary forms with or without modification are permitted' idx = index.LicenseIndex([Rule(_text=rule_text, licenses=['bsd'])]) querys = ''' The Redistribution and use in source and binary are permitted. Athena capital of Grece Paris and Athene Always''' qry = Query(query_string=querys, idx=idx, _test_mode=True) qry.tokenize_and_build_runs(qry.tokens_by_line()) # convert tid to actual token strings tks_as_str = lambda tks: [None if tid is None else idx.tokens_by_tid[tid] for tid in tks] expected = ['redistribution', 'and', 'use', 'in', 'source', 'and', 'binary', 'are', 'permitted', 'and'] result = tks_as_str(qry.tokens) assert expected == result expected = [None, 'redistribution', 'and', 'use', 'in', 'source', 'and', 'binary', 'are', 'permitted', None, None, None, None, None, 'and', None, None] result = tks_as_str(qry.tokens_with_unknowns()) assert expected == result assert 1 == len(qry.query_runs) qr1 = qry.query_runs[0] assert 0 == qr1.start assert 9 == qr1.end assert 10 == len(qr1) expected = ['redistribution', 'and', 'use', 'in', 'source', 'and', 'binary', 'are', 'permitted', 'and'] result = tks_as_str(qr1.tokens) assert expected == result expected = [None, 'redistribution', 'and', 'use', 'in', 'source', 'and', 'binary', 'are', 'permitted', None, None, None, None, None, 'and'] result = tks_as_str(qr1.tokens_with_unknowns()) assert expected == result
def test_Query_tokenize_from_string(self): rule_text = 'Redistribution and use in source and binary forms with or without modification are permitted' idx = index.LicenseIndex([Rule(stored_text=rule_text, license_expression='bsd')]) querys = ''' The Redistribution and use in source and binary are permitted. Athena capital of Grece Paris and Athene Always''' qry = Query(query_string=querys, idx=idx, _test_mode=True) tokens_by_line = list(qry.tokens_by_line(query_string=querys)) qry.tokenize_and_build_runs(tokens_by_line) expected = ['redistribution', 'and', 'use', 'in', 'source', 'and', 'binary', 'are', 'permitted', 'and'] result = tks_as_str(qry.tokens, idx=idx) assert result == expected expected = [None, 'redistribution', 'and', 'use', 'in', 'source', 'and', 'binary', 'are', 'permitted', None, None, None, None, None, 'and', None, None] result = tks_as_str(query_tokens_with_unknowns(qry), idx=idx) assert result == expected assert len(qry.query_runs) == 1 qr1 = qry.query_runs[0] assert qr1.start == 0 assert qr1.end == 9 assert len(qr1) == 10 expected = ['redistribution', 'and', 'use', 'in', 'source', 'and', 'binary', 'are', 'permitted', 'and'] result = tks_as_str(qr1.tokens, idx=idx) assert result == expected expected = [None, 'redistribution', 'and', 'use', 'in', 'source', 'and', 'binary', 'are', 'permitted', None, None, None, None, None, 'and'] result = tks_as_str(query_run_tokens_with_unknowns(qr1), idx=idx) assert result == expected