def prototype_test(self, text, expected):
     call = 'author_functions.avg_word_length({})'.format(text)
     returned = af.avg_word_length(text)
     msg = TestAverageWordLength.failure_message.format(call, expected,
                                                        returned)
     #self.assertEqual(returned, expected, msg)
     #Use next line if we want to allow some error margin
     self.assertTrue(self.approx(returned, expected), msg)
    # We assume there is a minimum of one file.
    files = os.listdir(dir_name)

    # ####################################################################
    # The following code parses the mystery file and calculates its
    # linguistic signature.
    # ####################################################################

    mystery_file = open(mystery_filename, 'r')
    # readlines() gives us a list of strings, one for each line of the file
    text = mystery_file.readlines()
    mystery_file.close()

    # Calculate the signature for the mystery file
    mystery_signature = [mystery_filename]
    mystery_signature.append(author_functions.avg_word_length(text))
    mystery_signature.append(author_functions.type_token_ratio(text))
    mystery_signature.append(author_functions.hapax_legomena_ratio(text))
    mystery_signature.append(author_functions.avg_sentence_length(text))
    mystery_signature.append(author_functions.avg_sentence_complexity(text))

    # ####################################################
    # The following code reads the linguistic signatures,
    # compares them with the mystery_signature,
    # and reports the author that was the best match.
    # ####################################################

    # Weights of linguistic features.
    weights = [0, 11, 33, 50, 0.4, 4]

    # We assume there is at least one signature in the dir_name directory
    """ (float, float) -> bool

    Return True iff v1 and v2 are approximately equal.
    """

    return v1-0.0001 < v2 < v1+0.0001


# Test avg_word_length.
text = [
    "James Fennimore Cooper\n",
    "Peter, Paul, and Mary\n",
]
error_message = "average_word_length on the text:\n\n" + repr(text) + \
    "\n\n should return 5.142857142857143"
assert approx(author_functions.avg_word_length(text), 5.142857142857143),\
    error_message


# Test type_token_ratio.
text = [
    "James Fennimore Cooper\n",
    "Peter, Paul, and Mary\n",
    "James Gosling\n"
]

error_message = "type_token_ratio on the text:\n\n" + repr(text) + \
    "\n\n should return 0.8888888888888888"
assert approx(author_functions.type_token_ratio(text), 0.8888888888888888),\
       error_message
Example #4
0
    # We assume there is a minimum of one file.
    files = os.listdir(dir_name)

    # ####################################################################
    # The following code parses the mystery file and calculates its 
    # linguistic signature.                                         
    # ####################################################################

    mystery_file = open(mystery_filename, 'r')
    # readlines() gives us a list of strings, one for each line of the file
    text = mystery_file.readlines()
    mystery_file.close()

    # Calculate the signature for the mystery file
    mystery_signature = [mystery_filename]
    mystery_signature.append(author_functions.avg_word_length(text))
    mystery_signature.append(author_functions.type_token_ratio(text))
    mystery_signature.append(author_functions.hapax_legomena_ratio(text))
    mystery_signature.append(author_functions.avg_sentence_length(text))
    mystery_signature.append(author_functions.avg_sentence_complexity(text))
    
    # ####################################################
    # The following code reads the linguistic signatures, 
    # compares them with the mystery_signature,           
    # and reports the author that was the best match.					
    # ####################################################
    
    # Weights of linguistic features.
    weights = [0, 11, 33, 50, 0.4, 4]
    
    # We assume there is at least one signature in the dir_name directory