def save_characteristics_in_db(file_path, author_name, xml_file_path):
    text = open(file_path, 'r').readlines()
    # xml_tree = XmlParser(xml_file_path)

    author_position_characteristics = {
        "name": author_name,
        "average_word_length": average_word_length(text),
        "type_token_ratio": type_token_ratio(text),
        "hapax_legomana_ratio": hapax_legomana_ratio(text),
        "average_sentence_length": average_sentence_length(text),
        "average_sentence_complexity": avg_sentence_complexity(text),
        # "base_words": xml_tree.get_base_form_words(),
        # "parts_of_speech_frequencies": xml_tree.get_parts_of_speech_frequency(),
        # "words_count": xml_tree.get_number_of_words()
    }
    author_data = collection.find_one({"name": author_name})
    if author_data:
        print author_position_characteristics["words_count"]
        old_positions_impact = author_data["words_count"] / float(
            author_position_characteristics["words_count"] +
            author_data["words_count"])
        print old_positions_impact
        new_position_impact = 1 - old_positions_impact
        characteristics_to_recalculate = [
            "average_word_length", "type_token_ratio", "hapax_legomana_ratio",
            "average_sentence_length", "average_sentence_complexity"
        ]
        recalculate_simple_characteristics_keeping_impact(
            author_data, old_positions_impact, author_position_characteristics,
            new_position_impact, characteristics_to_recalculate)

        recalculate_morphological_characteristics_keeping_impact(
            author_data, old_positions_impact, author_position_characteristics,
            new_position_impact, "parts_of_speech_frequencies")
        collection.find_one_and_replace({"name": author_name}, author_data)

    else:
        collection.insert_one(author_position_characteristics)
def save_characteristics_in_db(file_path, author_name, xml_file_path):
  text = open(file_path, 'r').readlines()
  # xml_tree = XmlParser(xml_file_path)

  author_position_characteristics = {"name": author_name,
                                     "average_word_length": average_word_length(text),
                                     "type_token_ratio": type_token_ratio(text),
                                     "hapax_legomana_ratio": hapax_legomana_ratio(text),
                                     "average_sentence_length": average_sentence_length(text),
                                     "average_sentence_complexity": avg_sentence_complexity(text),
                                     # "base_words": xml_tree.get_base_form_words(),
                                     # "parts_of_speech_frequencies": xml_tree.get_parts_of_speech_frequency(),
                                     # "words_count": xml_tree.get_number_of_words()
                                     }
  author_data = collection.find_one({"name": author_name})
  if author_data:
    print author_position_characteristics["words_count"]
    old_positions_impact = author_data["words_count"] / float(author_position_characteristics["words_count"] + author_data["words_count"])
    print old_positions_impact
    new_position_impact = 1 - old_positions_impact
    characteristics_to_recalculate = ["average_word_length",
                                      "type_token_ratio",
                                      "hapax_legomana_ratio",
                                      "average_sentence_length",
                                      "average_sentence_complexity"]
    recalculate_simple_characteristics_keeping_impact(author_data, old_positions_impact,
                                                      author_position_characteristics, new_position_impact,
                                                      characteristics_to_recalculate)

    recalculate_morphological_characteristics_keeping_impact(author_data, old_positions_impact,
                                                             author_position_characteristics, new_position_impact,
                                                             "parts_of_speech_frequencies")
    collection.find_one_and_replace({"name": author_name}, author_data)

  else:
      collection.insert_one(author_position_characteristics)
예제 #3
0
  avg_sentence_complexity


def print_results_in_file(path, content):
    print "Writing results to file results"
    with open(path, "w") as result_file:
        for result in content:
            result_file.write("%s\n" % result)


def save_text_as_file(name, text):
    file = open(name, "w")
    file.write(text)
    file.write("\n")
    file.close()


if __name__ == '__main__':
    text = open('./texts/toDetectFile', 'r').readlines()
    resultFileContent = [
        'average word length: ',
        average_word_length(text), 'type token ratio: ',
        type_token_ratio(text), 'hapax legomana ratio: ',
        hapax_legomana_ratio(text), 'average sentence length: ',
        average_sentence_length(text), 'average sentence complexity: ',
        avg_sentence_complexity(text)
    ]
    XmlParser.produce_xml_with_morphological_data('./numericalResults',
                                                  './xmlResults/toDetect.xml')
    xml_tree = XmlParser('./xmlResults/toDetect.xml')
from XmlParser import XmlParser
from textCharacteristics import average_word_length, type_token_ratio, hapax_legomana_ratio, average_sentence_length, \
  avg_sentence_complexity


def print_results_in_file(path, content):
  print "Writing results to file results"
  with open(path, "w") as result_file:
    for result in content:
      result_file.write("%s\n" % result)


def save_text_as_file(name, text):
  file = open(name, "w")
  file.write(text)
  file.write("\n")
  file.close()

if __name__ == '__main__':
  text = open('./texts/toDetectFile', 'r').readlines()
  resultFileContent = ['average word length: ', average_word_length(text),
                     'type token ratio: ', type_token_ratio(text),
                     'hapax legomana ratio: ', hapax_legomana_ratio(text),
                     'average sentence length: ', average_sentence_length(text),
                     'average sentence complexity: ', avg_sentence_complexity(text)]
  XmlParser.produce_xml_with_morphological_data('./numericalResults', './xmlResults/toDetect.xml')
  xml_tree = XmlParser('./xmlResults/toDetect.xml')