def save_characteristics_in_db(file_path, author_name, xml_file_path): text = open(file_path, 'r').readlines() # xml_tree = XmlParser(xml_file_path) author_position_characteristics = { "name": author_name, "average_word_length": average_word_length(text), "type_token_ratio": type_token_ratio(text), "hapax_legomana_ratio": hapax_legomana_ratio(text), "average_sentence_length": average_sentence_length(text), "average_sentence_complexity": avg_sentence_complexity(text), # "base_words": xml_tree.get_base_form_words(), # "parts_of_speech_frequencies": xml_tree.get_parts_of_speech_frequency(), # "words_count": xml_tree.get_number_of_words() } author_data = collection.find_one({"name": author_name}) if author_data: print author_position_characteristics["words_count"] old_positions_impact = author_data["words_count"] / float( author_position_characteristics["words_count"] + author_data["words_count"]) print old_positions_impact new_position_impact = 1 - old_positions_impact characteristics_to_recalculate = [ "average_word_length", "type_token_ratio", "hapax_legomana_ratio", "average_sentence_length", "average_sentence_complexity" ] recalculate_simple_characteristics_keeping_impact( author_data, old_positions_impact, author_position_characteristics, new_position_impact, characteristics_to_recalculate) recalculate_morphological_characteristics_keeping_impact( author_data, old_positions_impact, author_position_characteristics, new_position_impact, "parts_of_speech_frequencies") collection.find_one_and_replace({"name": author_name}, author_data) else: collection.insert_one(author_position_characteristics)
def save_characteristics_in_db(file_path, author_name, xml_file_path): text = open(file_path, 'r').readlines() # xml_tree = XmlParser(xml_file_path) author_position_characteristics = {"name": author_name, "average_word_length": average_word_length(text), "type_token_ratio": type_token_ratio(text), "hapax_legomana_ratio": hapax_legomana_ratio(text), "average_sentence_length": average_sentence_length(text), "average_sentence_complexity": avg_sentence_complexity(text), # "base_words": xml_tree.get_base_form_words(), # "parts_of_speech_frequencies": xml_tree.get_parts_of_speech_frequency(), # "words_count": xml_tree.get_number_of_words() } author_data = collection.find_one({"name": author_name}) if author_data: print author_position_characteristics["words_count"] old_positions_impact = author_data["words_count"] / float(author_position_characteristics["words_count"] + author_data["words_count"]) print old_positions_impact new_position_impact = 1 - old_positions_impact characteristics_to_recalculate = ["average_word_length", "type_token_ratio", "hapax_legomana_ratio", "average_sentence_length", "average_sentence_complexity"] recalculate_simple_characteristics_keeping_impact(author_data, old_positions_impact, author_position_characteristics, new_position_impact, characteristics_to_recalculate) recalculate_morphological_characteristics_keeping_impact(author_data, old_positions_impact, author_position_characteristics, new_position_impact, "parts_of_speech_frequencies") collection.find_one_and_replace({"name": author_name}, author_data) else: collection.insert_one(author_position_characteristics)
avg_sentence_complexity def print_results_in_file(path, content): print "Writing results to file results" with open(path, "w") as result_file: for result in content: result_file.write("%s\n" % result) def save_text_as_file(name, text): file = open(name, "w") file.write(text) file.write("\n") file.close() if __name__ == '__main__': text = open('./texts/toDetectFile', 'r').readlines() resultFileContent = [ 'average word length: ', average_word_length(text), 'type token ratio: ', type_token_ratio(text), 'hapax legomana ratio: ', hapax_legomana_ratio(text), 'average sentence length: ', average_sentence_length(text), 'average sentence complexity: ', avg_sentence_complexity(text) ] XmlParser.produce_xml_with_morphological_data('./numericalResults', './xmlResults/toDetect.xml') xml_tree = XmlParser('./xmlResults/toDetect.xml')
from XmlParser import XmlParser from textCharacteristics import average_word_length, type_token_ratio, hapax_legomana_ratio, average_sentence_length, \ avg_sentence_complexity def print_results_in_file(path, content): print "Writing results to file results" with open(path, "w") as result_file: for result in content: result_file.write("%s\n" % result) def save_text_as_file(name, text): file = open(name, "w") file.write(text) file.write("\n") file.close() if __name__ == '__main__': text = open('./texts/toDetectFile', 'r').readlines() resultFileContent = ['average word length: ', average_word_length(text), 'type token ratio: ', type_token_ratio(text), 'hapax legomana ratio: ', hapax_legomana_ratio(text), 'average sentence length: ', average_sentence_length(text), 'average sentence complexity: ', avg_sentence_complexity(text)] XmlParser.produce_xml_with_morphological_data('./numericalResults', './xmlResults/toDetect.xml') xml_tree = XmlParser('./xmlResults/toDetect.xml')