def sample_sentence_autocomplete(data: Dict) -> List[Tuple[str, float]]: """A sample run of the sentence autocomplete engine. data['file'] - file location data['autocompleter'] - 'simple' or 'compressed' data['weight_type'] - 'sum' or 'average' data['search'] - search input data['limit'] - autocomplete limit """ engine = SentenceAutocompleteEngine({ 'file': data['file'], 'autocompleter': data['autocompleter'], 'weight_type': data['weight_type'] }) return engine.autocomplete(data['search'], data['limit'])
def test_sentence_autocompleter() -> None: """Basic test for SentenceAutocompleteEngine. Note that this relies on a new data file that you'll need to download from the course website. That file consists of just a few lines, but there are three important details to catch: 1. You should use the second entry of each csv file as the weight of the sentence. This entry can be a float! (Don't assume it's an int.) 2. The file contains two sentences that are sanitized to the same string, and so this value is inserted twice. This means its weight is the *sum* of the weights from each of the two lines in the file. 3. Numbers *are allowed* in the strings (this is true for both types of text-based autocomplete engines). Don't remove them! """ engine = SentenceAutocompleteEngine({ 'file': 'data/sample_sentences.csv', 'autocompleter': 'simple', 'weight_type': 'average' }) # Check simple autocompletion and sanitization results = engine.autocomplete('what a') assert len(results) == 1 assert results[0][0] == 'what a wonderful world' assert results[0][1] == 1.0 # Check that numbers are allowed in the sentences results = engine.autocomplete('numbers') assert len(results) == 1 assert results[0][0] == 'numbers are 0k4y' # Check that one sentence can be inserted twice results = engine.autocomplete('a') assert len(results) == 1 assert results[0][0] == 'a star is born' assert results[0][1] == 15.0 + 6.5
def test_engine() -> None: engine = SentenceAutocompleteEngine({ 'file': 'data/google_searches.csv', 'autocompleter': 'simple', 'weight_type': 'sum' }) engine2 = SentenceAutocompleteEngine({ 'file': 'data/google_searches.csv', 'autocompleter': 'simple', 'weight_type': 'sum' }) x = engine.autocomplete('why', 20) y = engine2.autocomplete('why', 20) assert x == y
def test_sample_sentence_autocomplete() -> None: """Tests 1. CompressPrefixTree properties of autocompleter - compressibility check - subtrees non increasing order check - subtree weight check 2. SimplePrefixTree properties of autocompleter - len(subtree.value) == len(spt.value) + 1 (when subtree is List) - subtrees non increasing order check - subtree weight check 3. Test autocompleter properties - num_leaves == total_inputs - duplicate inputs - len(output) == limit - output weight is non-increasing - check leaves.weight == number of times it was inputted""" autocompleters = ['simple', 'compressed'] files = ['data/google_searches.csv'] weight_types = ['sum', 'average'] google_searches = ['how', 'why', 'when', 'who', 'what'] limits = [None] + random.sample(range(1, 200), 50) for file in files: for autocompleter in autocompleters: for weight_type in weight_types: # create engine engine = SentenceAutocompleteEngine({ 'file': file, 'autocompleter': autocompleter, 'weight_type': weight_type }) if autocompleter == 'simple': assert scheck_subtrees_non_increasing_order( engine.autocompleter) assert scheck_subtrees_value(engine.autocompleter) assert stree_weight_check(engine.autocompleter, weight_type) else: # autocomplete == 'compressed' assert check_subtrees_non_increasing_order( engine.autocompleter) assert check_subtrees_compressibility(engine.autocompleter) assert tree_weight_check(engine.autocompleter, weight_type) duplicates = num_duplicate_inputs( file, 'SentenceAutocompleteEngine') total_duplicates = 0 for duplicate in duplicates[0]: total_duplicates += duplicates[0][duplicate] assert engine.autocompleter._num_leaves == duplicates[ 1] - total_duplicates for search in google_searches: for limit in limits: output = engine.autocomplete(search, limit) if limit is not None: assert len(output) <= limit assert autocomplete_non_increasing_order(output) # weights of duplicate entries must be greater than 1 for duplicate in duplicates[0]: output = engine.autocomplete(duplicate, 20) for val in output: if val == duplicate: assert val[1] >= duplicates[0][duplicate]