Ejemplos de get_elapsed_time en Python, ejemplos de helpers.utils.get_elapsed_time en Python

Ejemplo n.º 1

0

Mostrar archivo

    def build_language_model(self, word_occurrences_model):
        """
        From a given word occurrences model, build a probability language model.
        """

        start_time = time.time()
        print(
            "Message=\"Starting to train a language model from a word occurrences model\""
        )

        for word, word_occurrence_node in word_occurrences_model.items():
            word_count = word_occurrence_node.word_count
            self.total_words_count += word_count

            probability_node = self.build_word_occurrence_probability_node(
                word, word_count, word_occurrence_node.children_nodes)
            self.probability_nodes[word] = probability_node

        self.calculate_nodes_probability(self.probability_nodes,
                                         self.total_words_count)
        self.sorted_probability_nodes = sort_probability_list(
            self.probability_nodes.values())

        print(
            "ElapsedTime={}, TotalWordsCount={}, TotalDistinctWordsCount={}, Message=\"Finished training language model from word occurrences model\""
            .format(get_elapsed_time(start_time), self.total_words_count,
                    len(self.probability_nodes)))

Ejemplo n.º 2

0

Mostrar archivo

Archivo: text_tokenizer.py Proyecto: istojan/language-model

def extract_all_sentences_from_files(source_directory):
    """
    Extract all sentences from all files located in a source directory.
    """

    start_time = time.time()

    sentences = list()
    files = os.listdir(source_directory)

    print("FilesCount={}, Message=\"Starting to extract sentences from files\"".format(len(files)))

    for file in files:
        if not file.endswith(".txt"):
            print("FileName={}, Message=\"Ignoring file with invalid structure. File extension needs to be .txt for sentences to be loaded\"".format(file))
            continue

        file_path = os.path.join(source_directory, file)

        sentences_file = extract_sentences(file_path)
        sentences.extend(sentences_file)

    print("ElapsedTime={}, FilesCount={}, TotalSentencesCount={}, "
          "Message=\"Finished extracting sentences from files\"".format(get_elapsed_time(start_time), len(files), len(sentences)))

    return sentences

Ejemplo n.º 3

0

Mostrar archivo

Archivo: create_language_model.py Proyecto: istojan/language-model

def save_language_model(language_model_data, path):
    """
    Saves the language model in a pickle file to be later used by the language model service.
    """

    start_time = time.time()
    print("Message=\"Starting to save language model\"")

    with open(path, "wb") as pickle_file:
        pickle.dump(language_model_data, pickle_file)

    print("ElapsedTime={}, Message=\"Finished saving language model.\"".format(
        get_elapsed_time(start_time)))

Ejemplo n.º 4

0

Mostrar archivo

def trim_word_occurrences_model(word_occurrences_model):
    """
    TODO:
    """

    start_time = time.time()
    print("BaseLevelNodes={}, Message=\"Starting to trim base word occurrence model\"".format(len(word_occurrences_model)))

    total_nodes, trimmed_nodes = trim_nodes(word_occurrences_model, 10)

    print("ElapsedTime={}, OriginalNodeCount={}, TrimmedNodes={}, UpdatedBaseLevelNodes={}, Message=\"Finished trimming word occurrence model\""
          .format(get_elapsed_time(start_time), total_nodes, trimmed_nodes, len(word_occurrences_model)))

    return word_occurrences_model

Ejemplo n.º 5

0

Mostrar archivo

def create_word_occurrence_model(n_gram_level, list_of_sentences_as_words):
    """
    Create an n-gram word occurrences model represented as a tree. We will only consider words that are valid by using
    the dictionary of words.
    """

    start_time = time.time()
    print("SentencesCount={}, Message=\"Starting to create base word occurrence model from sentences\"".format(len(list_of_sentences_as_words)))

    word_occurrences_model = dict()

    mk_dictionary = get_dictionary_words(settings["dictionary_file_path"])

    for words in list_of_sentences_as_words:

        sentence_word_count = len(words)

        for index, current_word in enumerate(words, start=0):
            if current_word not in mk_dictionary:
                continue

            if current_word not in word_occurrences_model:
                word_occurrences_model[current_word] = Node(current_word)

            current_word_combination = word_occurrences_model[current_word]
            current_word_combination.word_count += 1

            for n_gram_step in range(1, n_gram_level):
                if index + n_gram_step >= sentence_word_count:
                    break

                next_word = words[index+n_gram_step]

                if next_word not in mk_dictionary:
                    break

                current_word_combination.add_occurrence(next_word)
                current_word_combination = current_word_combination.get_or_add_node(next_word)
                current_word_combination.word_count += 1

    print("ElapsedTime={}, SentencesCount={}, Message=\"Finished creating base word occurrence model from sentences\""
          .format(get_elapsed_time(start_time), len(list_of_sentences_as_words)))

    return word_occurrences_model

Ejemplo n.º 6

0

Mostrar archivo

def build_word_occurrence_model(list_of_sentences_as_words):
    """
    For a given list of sentences represented as a list of words, build a word occurrences model. The depth of the model
    should depend from the setting 'n_gram_level'.
    The building of the model has 2 phases:
    1. Create the model using all the sentences.
    2. Trim the model in order to reduce the size of the model and to cut down noisy data.
    The word occurrences model will be represented as a dictionary where keys are the first level words are values are
    Node objects.
    """

    n_gram_level = settings["n_gram_level"]

    start_time = time.time()
    print("SentencesCount={}, Message=\"Starting to build word occurrence model\"".format(len(list_of_sentences_as_words)))

    word_occurrences_model = create_word_occurrence_model(n_gram_level, list_of_sentences_as_words)

    word_occurrences_model = trim_word_occurrences_model(word_occurrences_model)

    print("ElapsedTime={}s, SentencesCount={}, Message=\"Finished building word occurrence model\""
          .format(get_elapsed_time(start_time), len(list_of_sentences_as_words)))

    return word_occurrences_model

Ejemplo n.º 7

0

Mostrar archivo

Archivo: text_tokenizer.py Proyecto: istojan/language-model

def convert_sentences_to_list_of_words(sentences, names_set, keep_names = False):
    """
    For a given list of sentences, convert all sentences to a list of words. Optional is if we want to replace all
    names and digits with special joker characters so that they don't have a different meaning.
    We return a list of list of words where each word is in lower case.
    """

    start_time = time.time()
    print("SentencesCount={}, Message=\"Starting to convert sentences to lists of words\"".format(len(sentences)))

    sentences_to_words = list()

    for sentence in sentences:
        words = token_to_words(sentence)

        if keep_names:
            sentences_to_words.append(words)
        else:
            sentences_to_words.append([check_word_type(word, names_set) for word in words])

    print("ElapsedTime={}, SentencesCount={}, Message=\"Finished converting sentences to list of words\""
          .format(get_elapsed_time(start_time), len(sentences)))

    return sentences_to_words

Ejemplo n.º 8

0

Mostrar archivo

Archivo: create_language_model.py Proyecto: istojan/language-model

        sys.exit(0)

    start_time = time.time()
    print("ModelName=\"{}\", Message=\"Starting language model training\"".
          format(model_name))

    sentences = extract_all_sentences_from_files(
        settings["text_corpus_directory"])

    # TODO: Delete
    print(sentences[:20])

    names_set = read_names(settings["male_names_file_path"],
                           settings["female_names_file_path"])

    list_of_sentences_as_words = convert_sentences_to_list_of_words(
        sentences, names_set)

    word_occurrence_model = build_word_occurrence_model(
        list_of_sentences_as_words)

    language_model = LanguageModel()
    language_model.build_language_model(word_occurrence_model)

    model_data = language_model.prepare_model_data_for_saving()
    save_language_model(model_data, model_name)

    print(
        "TotalElapsedTime={}, ModelName=\"{}\", Message=\"Finished language model training\""
        .format(get_elapsed_time(start_time), model_name))