def textutils_normalize_line(): expected_file = Utilities.open_file(TST_DIR + TXT_NORMAL_FILE_EXP) expected_output = [] for line in expected_file: # need to strip new line characters expected_output += line.rstrip().split() # this constructs a list where each element is the normalized version # of the corresponding line in the file test_file = Utilities.open_file(TST_DIR + TXT_NORMAL_FILE) test_output = [] for line in test_file: test_output += TextUtils.normalize_line(line) assert test_output == expected_output
def file_to_ngram(file_path, n): if n < 1: return None if Utilities.is_file(file_path) == False: return None fileContent = [] file = Utilities.open_file(file_path); for line in file: fileContent += TextUtils.normalize_line(line) fileContent.append(' ') ng = ngrams(fileContent, n) return ng
def file_to_ngram(file_path, n): if n < 1: return None if Utilities.is_file(file_path) == False: return None fileContent = [] file = Utilities.open_file(file_path) for line in file: fileContent += TextUtils.normalize_line(line) fileContent.append(' ') ng = ngrams(fileContent, n) return ng