def prototype_test(self, text, expected): call = 'author_functions.avg_word_length({})'.format(text) returned = af.avg_word_length(text) msg = TestAverageWordLength.failure_message.format(call, expected, returned) #self.assertEqual(returned, expected, msg) #Use next line if we want to allow some error margin self.assertTrue(self.approx(returned, expected), msg)
# We assume there is a minimum of one file. files = os.listdir(dir_name) # #################################################################### # The following code parses the mystery file and calculates its # linguistic signature. # #################################################################### mystery_file = open(mystery_filename, 'r') # readlines() gives us a list of strings, one for each line of the file text = mystery_file.readlines() mystery_file.close() # Calculate the signature for the mystery file mystery_signature = [mystery_filename] mystery_signature.append(author_functions.avg_word_length(text)) mystery_signature.append(author_functions.type_token_ratio(text)) mystery_signature.append(author_functions.hapax_legomena_ratio(text)) mystery_signature.append(author_functions.avg_sentence_length(text)) mystery_signature.append(author_functions.avg_sentence_complexity(text)) # #################################################### # The following code reads the linguistic signatures, # compares them with the mystery_signature, # and reports the author that was the best match. # #################################################### # Weights of linguistic features. weights = [0, 11, 33, 50, 0.4, 4] # We assume there is at least one signature in the dir_name directory
""" (float, float) -> bool Return True iff v1 and v2 are approximately equal. """ return v1-0.0001 < v2 < v1+0.0001 # Test avg_word_length. text = [ "James Fennimore Cooper\n", "Peter, Paul, and Mary\n", ] error_message = "average_word_length on the text:\n\n" + repr(text) + \ "\n\n should return 5.142857142857143" assert approx(author_functions.avg_word_length(text), 5.142857142857143),\ error_message # Test type_token_ratio. text = [ "James Fennimore Cooper\n", "Peter, Paul, and Mary\n", "James Gosling\n" ] error_message = "type_token_ratio on the text:\n\n" + repr(text) + \ "\n\n should return 0.8888888888888888" assert approx(author_functions.type_token_ratio(text), 0.8888888888888888),\ error_message