def spt_method_3(spt: SimplePrefixTree, largest_prefix: int, weights: List[Any], prefixes: List[Any] = [[]]) -> List[List[Any]]: """Create a specialized generated spt for testing purposes [] [0] [1] [0,0] [0,1] [1,0] [1,1] ... ... Note: height of spt = len(largest prefix tree) + 2 = largest_prefix + 2 """ if not isinstance(spt.value, list): return [] elif len(prefixes[0]) == largest_prefix: return [] else: # extract the prefix accum_prefixes = [] # values don't matter values = random.sample(range(1000000), 10) for prefix in prefixes: for n in range(0, 2): pref = prefix + [n] accum_prefixes.append(pref) spt.insert(values.pop(), float(weights.pop()), pref) accum_prefixes.extend( spt_method_3(spt, largest_prefix, weights, [pref])) return accum_prefixes
def __init__(self, config: Dict[str, Any]) -> None: """Initialize this engine with the given configuration. <config> is a dictionary consisting of the following keys: - 'file': the path to a text file - 'autocompleter': either the string 'simple' or 'compressed', specifying which subclass of Autocompleter to use. - 'weight_type': either 'sum' or 'average', which specifies the weight type for the prefix tree. Each line of the specified file counts as one input string. Note that the line may or may not contain spaces. Each string must be sanitized, and if the resulting string contains at least one alphanumeric character, it is inserted into the Autocompleter. *Skip lines that do not contain at least one alphanumeric character!* When each string is inserted, it is given a weight of one. Note that it is possible for the same string to appear on more than one line of the input file; this would result in that string getting a larger weight (because of how Autocompleter.insert works). """ with open(config['file'], encoding='utf8') as f: if config['autocompleter'] == 'simple': self.autocompleter = SimplePrefixTree(config['weight_type']) elif config['autocompleter'] == 'compressed': self.autocompleter = SimplePrefixTree(config['weight_type']) for line in f: line = ''.join(char for char in line.lower() if (char.isalnum() or char == ' ')) self.autocompleter.insert(line, 1, [char for char in line])
def test_insert_num_nodes(length: int) -> None: """Inserting one value with a length-n prefix [x_1, .., x_n] into a new prefix tree should result in a tree with (n+2) nodes. (n+1) internal nodes plus 1 inserted value""" import sys sys.setrecursionlimit(5000) prefix = list(range(length)) spt = SimplePrefixTree('sum') spt.insert('x', 1, prefix) assert num_nodes(spt) == (length + 2) assert len(spt) == 1 assert spt.weight == 1
def test_insert(length: int) -> None: """Test the aggregate weight, length,..etc of the SimplePrefixTree""" import sys sys.setrecursionlimit(5000) # insertion method 1 (n = length) # prefixes = [[0,..,n-1],[1,..,n-1],[2,...n-1],....[n-1]] # spt must len(prefixes) subtrees # insertion method 2 (n = length) # prefixes = [[0,..,n-1],[0,..,n-2],[0,...n-3],....[0]] # spt must have 1 subtree # insertion method 3 (n = length) methods = ['1', '2', '3'] for method in methods: prefixes = [] values = [] weights = [] spt = SimplePrefixTree('sum') spt_avg = SimplePrefixTree('average') if method == '3': prefixes = spt_method_3(spt, 3, list(range(15))) spt_method_3(spt_avg, 3, list(range(15))) values = prefixes # values is only tested on length weights = list(range(15)) weights.reverse() else: for x in range(0, length): if method == '1': start = x stop = length else: start = 0 stop = length - x prefixes.append(list(range(start, stop))) values.append(length - x) weights.append(length - x) spt.insert(values[len(values) - 1], weights[len(weights) - 1], prefixes[len(prefixes) - 1]) spt_avg.insert(values[len(values) - 1], weights[len(weights) - 1], prefixes[len(prefixes) - 1]) if method == '1': assert len(spt.subtrees) == len(prefixes) elif method == '2': assert len(spt.subtrees) == 1 else: # method == '3' assert len(spt.subtrees) == 2 assert spt.weight == sum(weights) assert spt_avg.weight == sum(weights) / len(values) assert len(spt) == len(values) # check if spt has non-increasing weight order assert scheck_subtrees_non_increasing_order(spt) #assert stree_weight_check(spt, 'sum') assert stree_weight_check(spt_avg, 'average')
def __init__(self, config: Dict[str, Any]) -> None: """Initialize this engine with the given configuration. <config> is a dictionary consisting of the following keys: - 'file': the path to a CSV file - 'autocompleter': either the string 'simple' or 'compressed', specifying which subclass of Autocompleter to use. - 'weight_type': either 'sum' or 'average', which specifies the weight type for the prefix tree. Precondition: The given file is a *CSV file* where each line has the following format: - The first entry is the name of a melody (a string). - The remaining entries are grouped into pairs (as in Assignment 1) where the first number in each pair is a note pitch, and the second number is the corresponding duration. HOWEVER, there may be blank entries (stored as an empty string ''); as soon as you encounter a blank entry, stop processing this line and move onto the next line the CSV file. Each melody is be inserted into the Autocompleter with a weight of 1. """ # We haven't given you any starter code here! You should review how # you processed CSV files on Assignment 1. if config['autocompleter'] == 'simple': self.autocompleter = SimplePrefixTree(config['weight_type']) else: self.autocompleter = CompressedPrefixTree(config['weight_type']) with open(config['file']) as csvfile: reader = csv.reader(csvfile) for line in reader: interval, melody = parse_melody(line) self.autocompleter.insert(melody, 1, interval)
def __init__(self, config: Dict[str, Any]) -> None: """Initialize this engine with the given configuration. <config> is a dictionary consisting of the following keys: - 'file': the path to a CSV file - 'autocompleter': either the string 'simple' or 'compressed', specifying which subclass of Autocompleter to use. - 'weight_type': either 'sum' or 'average', which specifies the weight type for the prefix tree. Precondition: The given file is a *CSV file* where each line has the following format: - The first entry is the name of a melody (a string). - The remaining entries are grouped into pairs (as in Assignment 1) where the first number in each pair is a note pitch, and the second number is the corresponding duration. HOWEVER, there may be blank entries (stored as an empty string ''); as soon as you encounter a blank entry, stop processing this line and move onto the next line the CSV file. Each melody is be inserted into the Autocompleter with a weight of 1. """ # We haven't given you any starter code here! You should review how # you processed CSV files on Assignment 1. if config['autocompleter'] == 'simple': self.autocompleter = SimplePrefixTree(config['weight_type']) else: self.autocompleter = CompressedPrefixTree(config['weight_type']) with open(config['file']) as csvfile: csvfile = csvfile.readlines() temp = [] name_list = [] for i in csvfile: i = i.strip('\n').split(',') name_list.append(i[0]) k = [] for letter in i: if letter != '': k.append(letter) temp.append(k) nested = [] for item in temp: new_temp = [] num = 0 while num < len(item) - 1: new_temp.append((int(item[1:][num]), int(item[1:][num + 1]))) num += 2 nested.append(new_temp) new_temp = [] for node in nested: prefix = [] num = 0 while num < len(node) - 1: prefix.append((int(node[num + 1][0]) - int(node[num][0]))) num += 1 new_temp.append(prefix) for i in range(len(nested)): self.autocompleter.insert(Melody(name_list[i], nested[i]), 1, new_temp[i])
def __init__(self, config: Dict[str, Any]) -> None: """Initialize this engine with the given configuration. <config> is a dictionary consisting of the following keys: - 'file': the path to a text file - 'autocompleter': either the string 'simple' or 'compressed', specifying which subclass of Autocompleter to use. - 'weight_type': either 'sum' or 'average', which specifies the weight type for the prefix tree. Each line of the specified file counts as one input string. Note that the line may or may not contain spaces. Each string must be sanitized, and if the resulting string contains at least one alphanumeric character, it is inserted into the Autocompleter. *Skip lines that do not contain at least one alphanumeric character!* When each string is inserted, it is given a weight of one. Note that it is possible for the same string to appear on more than one line of the input file; this would result in that string getting a larger weight (because of how Autocompleter.insert works). """ # We've opened the file for you here. You should iterate over the # lines of the file and process them according to the description in # this method's docstring. if config['autocompleter'] == 'simple': self.autocompleter = SimplePrefixTree(config['weight_type']) elif config['autocompleter'] == 'compressed': self.autocompleter = CompressedPrefixTree(config['weight_type']) with open(config['file'], encoding='utf8') as f: for line in f: result = letter_engine_sanitizer(line) if result[0].strip != '': # skip lines w/o alphanumeric char self.autocompleter.insert(result[0], 1.0, result[1])
def __init__(self, config: Dict[str, Any]) -> None: """Initialize this engine with the given configuration. <config> is a dictionary consisting of the following keys: - 'file': the path to a CSV file - 'autocompleter': either the string 'simple' or 'compressed', specifying which subclass of Autocompleter to use. - 'weight_type': either 'sum' or 'average', which specifies the weight type for the prefix tree. Precondition: The given file is a *CSV file* where each line has two entries: - the first entry is a string - the second entry is the a number representing the weight of that string Note that the line may or may not contain spaces. Each string must be sanitized, and if the resulting string contains at least one word, it is inserted into the Autocompleter. *Skip lines that do not contain at least one alphanumeric character!* When each string is inserted, it is given THE WEIGHT SPECIFIED ON THE LINE FROM THE CSV FILE. (Updated Nov 19) Note that it is possible for the same string to appear on more than one line of the input file; this would result in that string getting a larger weight. """ # We haven't given you any starter code here! You should review how # you processed CSV files on Assignment 1. self.autocompleter = SimplePrefixTree(config['weight_type']) \ if config['autocompleter'] == 'simple' \ else CompressedPrefixTree(config['weight_type']) with open(config['file']) as f: lines = csv.reader(f) for line in lines: if line: string = line[0] weight = float(line[1]) string = string.strip() string = \ list(filter(lambda c: c.isalnum() or c == ' ', string)) string = ''.join(string) string = string.lower() if string: self.autocompleter.insert(string, weight, list(string))
def __init__(self, config: Dict[str, Any]) -> None: """Initialize this engine with the given configuration. <config> is a dictionary consisting of the following keys: - 'file': the path to a text file - 'autocompleter': either the string 'simple' or 'compressed', specifying which subclass of Autocompleter to use. - 'weight_type': either 'sum' or 'average', which specifies the weight type for the prefix tree. Each line of the specified file counts as one input string. Note that the line may or may not contain spaces. Each string must be sanitized, and if the resulting string contains at least one alphanumeric character, it is inserted into the Autocompleter. *Skip lines that do not contain at least one alphanumeric character!* When each string is inserted, it is given a weight of one. Note that it is possible for the same string to appear on more than one line of the input file; this would result in that string getting a larger weight (because of how Autocompleter.insert works). """ # We've opened the file for you here. You should iterate over the # lines of the file and process them according to the description in # this method's docstring. # initialize autocompleter self._weight_type = config['weight_type'] self._autocompleter_type = config['autocompleter'] if self._autocompleter_type == 'simple': self.autocompleter = SimplePrefixTree(self._weight_type) else: self.autocompleter = CompressedPrefixTree(self._weight_type) #read file line by line with open(config['file'], encoding='utf8') as f: # a = f.readlines() # print("Line {}: {}".format(1, a)) cnt = 0 for line in f: line = line.lower() line = line.replace("\n", "") count = 0 # sanatize string for char in line: if char.isalnum() or char == ' ': count += 1 else: line = line.replace(char, "") # check if there is a character in string and insert if count >= 1: # print("Line {}: {}".format(list(line), line)) self.autocompleter.insert(line, 1.0, list(line)) cnt += 1
def __init__(self, config: Dict[str, Any]) -> None: """Initialize this engine with the given configuration. <config> is a dictionary consisting of the following keys: - 'file': the path to a CSV file - 'autocompleter': either the string 'simple' or 'compressed', specifying which subclass of Autocompleter to use. - 'weight_type': either 'sum' or 'average', which specifies the weight type for the prefix tree. Precondition: The given file is a *CSV file* where each line has two entries: - the first entry is a string - the second entry is the a number representing the weight of that string Note that the line may or may not contain spaces. Each string must be sanitized, and if the resulting string contains at least one word, it is inserted into the Autocompleter. *Skip lines that do not contain at least one alphanumeric character!* When each string is inserted, it is given THE WEIGHT SPECIFIED ON THE LINE FROM THE CSV FILE. (Updated Nov 19) Note that it is possible for the same string to appear on more than one line of the input file; this would result in that string getting a larger weight. """ # We haven't given you any starter code here! You should review how # you processed CSV files on Assignment 1. if config['autocompleter'] == 'simple': self.autocompleter = SimplePrefixTree(config['weight_type']) else: self.autocompleter = CompressedPrefixTree(config['weight_type']) with open(config['file']) as csvfile: temp = [] total_weight = [] csvfile = csvfile.readlines() for i in csvfile: i = i.split(',') weight = i[1] total_weight.append(weight) i = i[0].strip('\n').split(' ') k = [] for j in i: n = '' for m in j: n += m if m.isalnum() else '' k.append(n.lower()) temp.append(k) for m in range(len(temp)): self.autocompleter.insert(' '.join(temp[m]), float(total_weight[m]), temp[m])
def __init__(self, config: Dict[str, Any]) -> None: """Initialize this engine with the given configuration. <config> is a dictionary consisting of the following keys: - 'file': the path to a CSV file - 'autocompleter': either the string 'simple' or 'compressed', specifying which subclass of Autocompleter to use. - 'weight_type': either 'sum' or 'average', which specifies the weight type for the prefix tree. Precondition: The given file is a *CSV file* where each line has the following format: - The first entry is the name of a melody (a string). - The remaining entries are grouped into pairs (as in Assignment 1) where the first number in each pair is a note pitch, and the second number is the corresponding duration. HOWEVER, there may be blank entries (stored as an empty string ''); as soon as you encounter a blank entry, stop processing this line and move onto the next line the CSV file. Each melody is be inserted into the Autocompleter with a weight of 1. """ # We haven't given you any starter code here! You should review how # you processed CSV files on Assignment 1. self.autocompleter = None # determine tree type if config['autocompleter'] == 'simple': self.autocompleter = SimplePrefixTree(config['weight_type']) elif config['autocompleter'] == 'compressed': self.autocompleter = CompressedPrefixTree(config['weight_type']) with open(config['file']) as file: reader = csv.reader(file) for item in reader: name = item[0] # get song name index = 1 notes = [] interval = [] prev_pit = None # loop to record notes and pitches while index < len(item) and len(item[index]) != 0: pitch = int(item[index]) if prev_pit is not None: interval.append(pitch - prev_pit) prev_pit = pitch notes.append((pitch, int(item[index + 1]))) index += 2 self.autocompleter.insert(Melody(name, notes), 1, interval)
def test_simple_prefix_tree_structure() -> None: """This is a test for the structure of a small simple prefix tree. NOTE: This test should pass even if you insert these values in a different order. This is a good thing to try out. """ t = SimplePrefixTree('sum') t.insert('cat', 2.0, ['c', 'a', 't']) t.insert('car', 3.0, ['c', 'a', 'r']) t.insert('dog', 4.0, ['d', 'o', 'g']) # t has 3 values (note that __len__ only counts the inserted values, # which are stored at the *leaves* of the tree). assert len(t) == 3 # This tree is using the 'sum' aggregate weight option. assert t.weight == 2.0 + 3.0 + 4.0 # t has two subtrees, and order matters (because of weights). assert len(t.subtrees) == 2 left = t.subtrees[0] right = t.subtrees[1] assert left.value == ['c'] assert left.weight == 5.0 assert right.value == ['d'] assert right.weight == 4.0
def __init__(self, config: Dict[str, Any]) -> None: """Initialize this engine with the given configuration. <config> is a dictionary consisting of the following keys: - 'file': the path to a CSV file - 'autocompleter': either the string 'simple' or 'compressed', specifying which subclass of Autocompleter to use. - 'weight_type': either 'sum' or 'average', which specifies the weight type for the prefix tree. Precondition: The given file is a *CSV file* where each line has two entries: - the first entry is a string - the second entry is the a number representing the weight of that string Note that the line may or may not contain spaces. Each string must be sanitized, and if the resulting string contains at least one word, it is inserted into the Autocompleter. *Skip lines that do not contain at least one alphanumeric character!* When each string is inserted, it is given a weight of one. Note that it is possible for the same string to appear on more than one line of the input file; this would result in that string getting a larger weight. """ # We haven't given you any starter code here! You should review how # you processed CSV files on Assignment 1. self.config = config if config['autocompleter'] == 'simple': self.autocompleter = SimplePrefixTree(config['weight_type']) else: self.autocompleter = CompressedPrefixTree(config['weight_type']) with open(config['file']) as csvfile: reader = csv.reader(csvfile) for line in reader: clean = line[0].lower() weight = line[1] cleaned_str = '' for char in clean: if char.isalnum() or char == ' ': cleaned_str += char cleaned_num = '' for num in weight: if num.isnumeric() or num == '.': cleaned_num += num if cleaned_str != '' and cleaned_num != '': self.autocompleter.insert(cleaned_str, float(cleaned_num), cleaned_str.split())
def __init__(self, config: Dict[str, Any]) -> None: """Initialize this engine with the given configuration. <config> is a dictionary consisting of the following keys: - 'file': the path to a text file - 'autocompleter': either the string 'simple' or 'compressed', specifying which subclass of Autocompleter to use. - 'weight_type': either 'sum' or 'average', which specifies the weight type for the prefix tree. Each line of the specified file counts as one input string. Note that the line may or may not contain spaces. Each string must be sanitized, and if the resulting string contains at least one alphanumeric character, it is inserted into the Autocompleter. *Skip lines that do not contain at least one alphanumeric character!* When each string is inserted, it is given a weight of one. Note that it is possible for the same string to appear on more than one line of the input file; this would result in that string getting a larger weight (because of how Autocompleter.insert works). >>> import sys >>> sys.setrecursionlimit(5000) >>> a = LetterAutocompleteEngine({'file': 'data/lotr.txt', 'autocompleter': 'simple', 'weight_type': 'sum'}) """ # We've opened the file for you here. You should iterate over the # lines of the file and process them according to the description in # this method's docstring. self.autocompleter = SimplePrefixTree(config['weight_type']) with open(config['file'], encoding='utf8') as f: for line in f: sanitized_t = _sanitize(line) self.autocompleter.insert(sanitized_t[0].lower(), 1.0, [x.lower() for x in sanitized_t[1]])
def __init__(self, config: Dict[str, Any]) -> None: """Initialize this engine with the given configuration. <config> is a dictionary consisting of the following keys: - 'file': the path to a CSV file - 'autocompleter': either the string 'simple' or 'compressed', specifying which subclass of Autocompleter to use. - 'weight_type': either 'sum' or 'average', which specifies the weight type for the prefix tree. Precondition: The given file is a *CSV file* where each line has the following format: - The first entry is the name of a melody (a string). - The remaining entries are grouped into pairs (as in Assignment 1) where the first number in each pair is a note pitch, and the second number is the corresponding duration. HOWEVER, there may be blank entries (stored as an empty string ''); as soon as you encounter a blank entry, stop processing this line and move onto the next line the CSV file. Each melody is be inserted into the Autocompleter with a weight of 1. """ # We haven't given you any starter code here! You should review how # you processed CSV files on Assignment 1. if config['autocompleter'] == 'simple': self.autocompleter = SimplePrefixTree(config['weight_type']) else: self.autocompleter = CompressedPrefixTree(config['weight_type']) with open(config['file']) as csvfile: reader = csv.reader(csvfile) for line in reader: if any([s == "" for s in line]): continue notes = [] interval = [] name = line[0] linelist = [x for x in line] linelist = linelist[1:] pitch = linelist[::2] duration = linelist[1::2] i = 0 while i < len(pitch): notes.append((int(pitch[i]), int(duration[i]))) i += 1 for i in range(1, len(notes)): interval.append(notes[i][0] - notes[i - 1][0]) self.autocompleter.insert(Melody(name, notes), 1.0, interval)
def spt_height(spt: SimplePrefixTree) -> int: """Return the height of the spt Precondition: spt is not an empty SimplePrefixTree """ if spt.is_leaf(): # an internal node with only 1 leaf return 1 else: # spt is not a leaf count = 1 for subtree in spt.subtrees: height = spt_height(subtree) if height >= count: count = height + 1 # + 1 including the own tree return count
def __init__(self, config: Dict[str, Any]) -> None: """Initialize this engine with the given configuration. <config> is a dictionary consisting of the following keys: - 'file': the path to a text file - 'autocompleter': either the string 'simple' or 'compressed', specifying which subclass of Autocompleter to use. - 'weight_type': either 'sum' or 'average', which specifies the weight type for the prefix tree. Each line of the specified file counts as one input string. Note that the line may or may not contain spaces. Each string must be sanitized, and if the resulting string contains at least one alphanumeric character, it is inserted into the Autocompleter. *Skip lines that do not contain at least one alphanumeric character!* When each string is inserted, it is given a weight of one. Note that it is possible for the same string to appear on more than one line of the input file; this would result in that string getting a larger weight (because of how Autocompleter.insert works). """ # We've opened the file for you here. You should iterate over the # lines of the file and process them according to the description in # this method's docstring. if config['autocompleter'] == 'simple': self.autocompleter = SimplePrefixTree(config['weight_type']) else: self.autocompleter = CompressedPrefixTree(config['weight_type']) with open(config['file'], encoding='utf8') as f: f = f.readlines() value_list = [] prefix_list = [] for line in f: line = line.strip('\n') value = '' prefix = [] for char in line: if char.isalnum() or char == ' ': value += char.lower() prefix.append(char.lower()) if value != len(value) * ' ': value_list.append(value) prefix_list.append(prefix) assert len(value_list) == len(prefix_list) for i in range(len(value_list)): self.autocompleter.insert(value_list[i], 1, prefix_list[i])
def test_insert_2() -> None: """Test SimplePrefixTree.insert() method using different types of SPTs""" # sum spt = SimplePrefixTree('sum') # empty spt assert len(spt) == 0 assert spt.value == [] # spt w/ len == 1 spt.insert('x', 1, ['x']) assert len(spt) == 1 assert num_nodes(spt) == 3 # spt w/ len == 1, internal nodes > 1, achieved in test_insert_num_nodes() # spt w/ len == 2, internal nodes == 2 spt = SimplePrefixTree('sum') spt.insert('x', 1, []) assert len(spt) == 1 assert num_nodes(spt) == 2
def __init__(self, config: Dict[str, Any]) -> None: """Initialize this engine with the given configuration. <config> is a dictionary consisting of the following keys: - 'file': the path to a CSV file - 'autocompleter': either the string 'simple' or 'compressed', specifying which subclass of Autocompleter to use. - 'weight_type': either 'sum' or 'average', which specifies the weight type for the prefix tree. Precondition: The given file is a *CSV file* where each line has the following format: - The first entry is the name of a melody (a string). - The remaining entries are grouped into pairs (as in Assignment 1) where the first number in each pair is a note pitch, and the second number is the corresponding duration. HOWEVER, there may be blank entries (stored as an empty string ''); as soon as you encounter a blank entry, stop processing this line and move onto the next line the CSV file. Each melody is be inserted into the Autocompleter with a weight of 1. """ # We haven't given you any starter code here! You should review how # you processed CSV files on Assignment 1. if config['autocompleter'] == 'simple': self.autocompleter = SimplePrefixTree(config['weight_type']) else: self.autocompleter = CompressedPrefixTree(config['weight_type']) with open(config['file'], encoding='utf8') as csvfile: f = csv.reader(csvfile) for line in f: helper = line[1::2] while helper[len(helper) - 1] == '': helper.pop() prefix = [] notes = [] for i in range(1, len(helper) - 1): prefix.append(int(helper[i]) - int(helper[i - 1])) for element in helper: index = line.index(element) note = (int(line[index]), int(line[index + 1])) notes.append(note) melody = Melody(line[0], notes) self.autocompleter.insert(melody, 1.0, prefix)
def __init__(self, config: Dict[str, Any]) -> None: """Initialize this engine with the given configuration. <config> is a dictionary consisting of the following keys: - 'file': the path to a CSV file - 'autocompleter': either the string 'simple' or 'compressed', specifying which subclass of Autocompleter to use. - 'weight_type': either 'sum' or 'average', which specifies the weight type for the prefix tree. Precondition: The given file is a *CSV file* where each line has two entries: - the first entry is a string - the second entry is the a number representing the weight of that string Note that the line may or may not contain spaces. Each string must be sanitized, and if the resulting string contains at least one word, it is inserted into the Autocompleter. *Skip lines that do not contain at least one alphanumeric character!* When each string is inserted, it is given a weight of one. Note that it is possible for the same string to appear on more than one line of the input file; this would result in that string getting a larger weight. """ # We haven't given you any starter code here! You should review how # you processed CSV files on Assignment 1. self.autocompleter = None # determine tree type if config['autocompleter'] == 'simple': self.autocompleter = SimplePrefixTree(config['weight_type']) elif config['autocompleter'] == 'compressed': self.autocompleter = CompressedPrefixTree(config['weight_type']) with open(config['file']) as file: reader = csv.reader(file) for item in reader: sanitized = _sanitize(item[0]) # sanitize each line if sanitized is not None: self.autocompleter.insert(sanitized, float(item[1]), sanitized.strip().split(" "))
def __init__(self, config: Dict[str, Any]) -> None: """Initialize this engine with the given configuration. <config> is a dictionary consisting of the following keys: - 'file': the path to a CSV file - 'autocompleter': either the string 'simple' or 'compressed', specifying which subclass of Autocompleter to use. - 'weight_type': either 'sum' or 'average', which specifies the weight type for the prefix tree. Precondition: The given file is a *CSV file* where each line has two entries: - the first entry is a string - the second entry is the a number representing the weight of that string Note that the line may or may not contain spaces. Each string will be sanitized, and if the resulting string contains at least one word, it is inserted into the Autocompleter. *Skip lines that do not contain at least one alphanumeric character!* When each string is inserted, it is given the weight specified on the line from the csv file. Note that it is possible for the same string to appear on more than one line of the input file; this would result in that string getting a larger weight. """ if config['autocompleter'] == 'simple': self.autocompleter = SimplePrefixTree(config['weight_type']) elif config['autocompleter'] == 'compressed': self.autocompleter = CompressedPrefixTree(config['weight_type']) with open(config['file']) as csvfile: reader = csv.reader(csvfile) for line in reader: dirty, weight_str = line[0], line[1] chars = [c.lower() for c in dirty if c.isalnum() or c == ' '] clean = ''.join(chars) prefix = clean.split() weight = float(weight_str) if len(clean) >= 1: self.autocompleter.insert(clean, weight, prefix)
def test_simple_prefix_tree_autocomplete() -> None: """This is a test for the correct autocomplete behaviour for a small simple prefix tree. NOTE: This test should pass even if you insert these values in a different order. This is a good thing to try out. """ t = SimplePrefixTree('sum') t.insert('cat', 2.0, ['c', 'a', 't']) t.insert('car', 3.0, ['c', 'a', 'r']) t.insert('dog', 4.0, ['d', 'o', 'g']) # Note that the returned tuples *must* be sorted in non-increasing weight # order. You can (and should) sort the tuples yourself inside # SimplePrefixTree.autocomplete. assert t.autocomplete([]) == [('dog', 4.0), ('car', 3.0), ('cat', 2.0)] # But keep in mind that the greedy algorithm here does not necessarily # return the highest-weight values!! In this case, the ['c'] subtree # is recursed on first. assert t.autocomplete([], 1) == [('car', 3.0)]
def test_simple_prefix_tree_remove() -> None: """This is a test for the correct remove behaviour for a small simple prefix tree. NOTE: This test should pass even if you insert these values in a different order. This is a good thing to try out. """ t = SimplePrefixTree('sum') t.insert('cat', 2.0, ['c', 'a', 't']) t.insert('car', 3.0, ['c', 'a', 'r']) t.insert('dog', 4.0, ['d', 'o', 'g']) # The trickiest part is that only *values* should be stored at leaves, # so even if you remove a specific prefix, its parent might get removed # from the tree as well! t.remove(['c', 'a']) assert len(t) == 1 assert t.weight == 4.0 # There is no more ['c'] subtree! assert len(t.subtrees) == 1 assert t.subtrees[0].value == ['d']
def num_nodes(spt: SimplePrefixTree) -> int: """Return the number of nodes an SPT has >>> spt = SimplePrefixTree('sum') >>> spt.insert('x', 1, ['x']) >>> num_nodes(spt) 3 >>> spt.insert('che', 3,['c','h','e']) >>> num_nodes(spt) 7 >>> spt.insert('xenon', 2, ['x','e','n','o','n']) >>> num_nodes(spt) 12 """ if spt.weight == 0: return 1 elif spt.is_leaf(): return 1 else: # it's an internal node count = 1 for subtree in spt.subtrees: count += num_nodes(subtree) return count
def __init__(self, config: Dict[str, Any]) -> None: """Initialize this engine with the given configuration. <config> is a dictionary consisting of the following keys: - 'file': the path to a CSV file - 'autocompleter': either the string 'simple' or 'compressed', specifying which subclass of Autocompleter to use. - 'weight_type': either 'sum' or 'average', which specifies the weight type for the prefix tree. Precondition: The given file is a *CSV file* where each line has the format: - The first entry is the name of a melody (a string). - The remaining entries are grouped into pairs (as in Assignment 1) where the first number in each pair is a note pitch, and the second number is the corresponding duration. HOWEVER, there may be blank entries (stored as an empty string ''); as soon as you encounter a blank entry, stop processing this line and move onto the next line the CSV file. Each melody is be inserted into the Autocompleter with a weight of 1. """ # We haven't given you any starter code here! You should review how # you processed CSV files on Assignment 1. self._weight_type = config['weight_type'] self._autocompleter_type = config['autocompleter'] if self._autocompleter_type == 'simple': self.autocompleter = SimplePrefixTree(self._weight_type) else: self.autocompleter = CompressedPrefixTree(self._weight_type) with open(config['file']) as csvfile: reader = csv.reader(csvfile) for line in reader: name = line[0] #name of the melody notes = [] #list of notes in the melody interval_sequence = [] found_empty = False for x in range(1, len(line) - 1, 2): pitch = int(line[x]) duration = int(line[x + 1]) if pitch == '' or duration == '': found_empty = True else: #add the note to the list of notes as a tuple notes.append((pitch, duration)) if not found_empty: for i in range(3, len(line) - 1, 2): #interval = int(line[i]) - int(line[i-2]) interval_sequence.append( int(line[i]) - int(line[i - 2])) melody = Melody(name, notes) self.autocompleter.insert(melody, 1, interval_sequence)
def __init__(self, config: Dict[str, Any]) -> None: """Initialize this engine with the given configuration. <config> is a dictionary consisting of the following keys: - 'file': the path to a CSV file - 'autocompleter': either the string 'simple' or 'compressed', specifying which subclass of Autocompleter to use. - 'weight_type': either 'sum' or 'average', which specifies the weight type for the prefix tree. Precondition: The given file is a *CSV file* where each line has two entries: - the first entry is a string - the second entry is the a number representing the weight of that string Note that the line may or may not contain spaces. Each string must be sanitized, and if the resulting string contains at least one word, it is inserted into the Autocompleter. *Skip lines that do not contain at least one alphanumeric character!* When each string is inserted, it is given THE WEIGHT SPECIFIED ON THE LINE FROM THE CSV FILE. (Updated Nov 19) Note that it is possible for the same string to appear on more than one line of the input file; this would result in that string getting a larger weight. === Attributes === autocompleter: An Autocompleter used by this engine. _weight_type: either 'sum' or 'average', which specifies the weight type for the prefix tree. _autocompleter_type: stores the type of the autocompleter """ self._weight_type = config['weight_type'] self._autocompleter_type = config['autocompleter'] if self._autocompleter_type == 'simple': self.autocompleter = SimplePrefixTree(self._weight_type) else: self.autocompleter = CompressedPrefixTree(self._weight_type) with open(config['file']) as csvfile: reader = csv.reader(csvfile) for line in reader: weight = float(line[1]) txt = line[0] txt = txt.lower() txt = txt.replace("\n", "") count = 0 # sanatize string for char in txt: if char.isalnum() or char == ' ': count += 1 else: txt = txt.replace(char, "") # check if there is a character in string and insert prefix = txt.split() if len(prefix) >= 1: self.autocompleter.insert(txt, weight, prefix)
def setUp(self): self.sum_tree = SimplePrefixTree('sum') self.avg_tree = SimplePrefixTree('average')
"""CSC148 Assignment 2: Autocomplete engines
class SentenceAutocompleteEngine: """An autocomplete engine that suggests strings based on a few words. A *word* is a string containing only alphanumeric characters. The *prefix sequence* for a string is the list of words in the string (separated by whitespace). The words themselves do not contain spaces. This autocomplete engine only stores and suggests strings with lowercase letters, numbers, and space characters; see the section on "Text sanitization" on the assignment handout. === Attributes === autocompleter: An Autocompleter used by this engine. """ autocompleter: Autocompleter def __init__(self, config: Dict[str, Any]) -> None: """Initialize this engine with the given configuration. <config> is a dictionary consisting of the following keys: - 'file': the path to a CSV file - 'autocompleter': either the string 'simple' or 'compressed', specifying which subclass of Autocompleter to use. - 'weight_type': either 'sum' or 'average', which specifies the weight type for the prefix tree. Precondition: The given file is a *CSV file* where each line has two entries: - the first entry is a string - the second entry is the a number representing the weight of that string Note that the line may or may not contain spaces. Each string must be sanitized, and if the resulting string contains at least one word, it is inserted into the Autocompleter. *Skip lines that do not contain at least one alphanumeric character!* When each string is inserted, it is given THE WEIGHT SPECIFIED ON THE LINE FROM THE CSV FILE. (Updated Nov 19) Note that it is possible for the same string to appear on more than one line of the input file; this would result in that string getting a larger weight. """ # We haven't given you any starter code here! You should review how # you processed CSV files on Assignment 1. with open(config['file'], encoding='utf8') as f: if config['autocompleter'] == 'simple': self.autocompleter = SimplePrefixTree(config['weight_type']) elif config['autocompleter'] == 'compressed': self.autocompleter = SimplePrefixTree(config['weight_type']) for line in f: line = line.lower().split(',') line[0] = ''.join(char for char in line[0] if (char.isalnum() or char == ' ')) self.autocompleter.insert(line[0], float(line[1]), line[0].split()) def autocomplete(self, prefix: str, limit: Optional[int] = None) -> List[Tuple[str, float]]: """Return up to <limit> matches for the given prefix string. The return value is a list of tuples (string, weight), and must be ordered in non-increasing weight. (You can decide how to break ties.) If limit is None, return *every* match for the given prefix. Note that the given prefix string must be transformed into a list of words before being passed to the Autocompleter. Preconditions: limit is None or limit > 0 <prefix> contains only lowercase alphanumeric characters and spaces """ prefix_list = prefix.split() return self.autocompleter.autocomplete(prefix_list, limit) def remove(self, prefix: str) -> None: """Remove all strings that match the given prefix. Note that the given prefix string must be transformed into a list of words before being passed to the Autocompleter. Precondition: <prefix> contains only lowercase alphanumeric characters and spaces. """ prefix_list = prefix.split() self.autocompleter.remove(prefix_list)
class LetterAutocompleteEngine: """An autocomplete engine that suggests strings based on a few letters. The *prefix sequence* for a string is the list of characters in the string. This can include space characters. This autocomplete engine only stores and suggests strings with lowercase letters, numbers, and space characters; see the section on "Text sanitization" on the assignment handout. === Attributes === autocompleter: An Autocompleter used by this engine. """ autocompleter: Autocompleter def __init__(self, config: Dict[str, Any]) -> None: """Initialize this engine with the given configuration. <config> is a dictionary consisting of the following keys: - 'file': the path to a text file - 'autocompleter': either the string 'simple' or 'compressed', specifying which subclass of Autocompleter to use. - 'weight_type': either 'sum' or 'average', which specifies the weight type for the prefix tree. Each line of the specified file counts as one input string. Note that the line may or may not contain spaces. Each string must be sanitized, and if the resulting string contains at least one alphanumeric character, it is inserted into the Autocompleter. *Skip lines that do not contain at least one alphanumeric character!* When each string is inserted, it is given a weight of one. Note that it is possible for the same string to appear on more than one line of the input file; this would result in that string getting a larger weight (because of how Autocompleter.insert works). """ with open(config['file'], encoding='utf8') as f: if config['autocompleter'] == 'simple': self.autocompleter = SimplePrefixTree(config['weight_type']) elif config['autocompleter'] == 'compressed': self.autocompleter = SimplePrefixTree(config['weight_type']) for line in f: line = ''.join(char for char in line.lower() if (char.isalnum() or char == ' ')) self.autocompleter.insert(line, 1, [char for char in line]) def autocomplete(self, prefix: str, limit: Optional[int] = None) -> List[Tuple[str, float]]: """Return up to <limit> matches for the given prefix string. The return value is a list of tuples (string, weight), and must be ordered in non-increasing weight. (You can decide how to break ties.) If limit is None, return *every* match for the given prefix. Note that the given prefix string must be transformed into a list of letters before being passed to the Autocompleter. Preconditions: limit is None or limit > 0 <prefix> contains only lowercase alphanumeric characters and spaces """ prefix_list = [char for char in prefix] return self.autocompleter.autocomplete(prefix_list, limit) def remove(self, prefix: str) -> None: """Remove all strings that match the given prefix string. Note that the given prefix string must be transformed into a list of letters before being passed to the Autocompleter. Precondition: <prefix> contains only lowercase alphanumeric characters and spaces. """ prefix_list = [char for char in prefix] return self.autocompleter.remove(prefix_list)