Python ProcessState Examples, loacore.utils.status.ProcessState Python Examples

Example #1

0

Show file

File: db.py Project: PaulBreugnot/Loacore

def safe_commit(conn, try_number, state_queue, id_process):
    import os
    import time
    import sqlite3 as sql
    from loacore.utils.status import ProcessState
    from loacore.conf import MAX_DB_COMMIT_ATTEMPTS
    try:
        conn.commit()
    except sql.OperationalError:
        if try_number <= MAX_DB_COMMIT_ATTEMPTS:
            try_number += 1
            if id_process is not None:
                print("[Process " + str(id_process) +
                      "] Commit attempt number : " + str(try_number))
            else:
                print("Commit attempt number : " + str(try_number))
            if state_queue is not None:
                state_queue.put(
                    ProcessState(id_process, os.getpid(), "DB failed, retry.",
                                 str(try_number)))
            time.sleep(10)
            safe_commit(conn, try_number + 1, state_queue, id_process)
        else:
            if state_queue is not None:
                state_queue.put(
                    ProcessState(id_process, os.getpid(), "DB commit failed.",
                                 " X "))
            if id_process is not None:
                print("[Process " + str(id_process) + "] Commit fail.")
            else:
                print("Commit fail.")

Example #2

0

Show file

File: db.py Project: PaulBreugnot/Loacore

def safe_execute(c,
                 request,
                 try_number,
                 state_queue,
                 id_process,
                 mark_args=None,
                 execute_many=False):
    import os
    import time
    import sqlite3 as sql
    from loacore.utils.status import ProcessState
    from loacore.conf import MAX_DB_COMMIT_ATTEMPTS
    try:
        if mark_args is not None:
            if not execute_many:
                c.execute(request, mark_args)
            else:
                c.executemany(request, mark_args)
        else:
            c.execute(request)
    except sql.OperationalError:
        if try_number <= MAX_DB_COMMIT_ATTEMPTS:
            try_number += 1
            if id_process is not None:
                print("[Process " + str(id_process) +
                      "] Execute attempt number : " + str(try_number))
            else:
                print("Execute attempt number : " + str(try_number))
            if state_queue is not None:
                state_queue.put(
                    ProcessState(id_process, os.getpid(), "DB failed, retry.",
                                 str(try_number)))
            time.sleep(10)
            safe_execute(c,
                         request,
                         try_number + 1,
                         state_queue,
                         id_process,
                         mark_args=mark_args)
        else:
            if state_queue is not None:
                state_queue.put(
                    ProcessState(id_process, os.getpid(), "DB execute failed.",
                                 " X "))
            if id_process is not None:
                print("[Process " + str(id_process) + "] Execute fail.")
            else:
                print("Execute fail.")

Example #3

0

Show file

File: file_process.py Project: PaulBreugnot/Loacore

    def printer(stdscr):
        try:
            refresh_count = 0

            def plot_window():
                nonlocal refresh_count
                refresh_count += 1
                if refresh_count >= 1000:
                    refresh_count = 0
                    stdscr.clear()
                stdscr.move(0, 0)
                stdscr.addstr(0, 0, "Process")
                stdscr.addstr(0, 14, "PID")
                stdscr.addstr(0, 21, "Activity")
                stdscr.addstr(0, 45, "Progress")
                stdscr.move(0, 0)
                stdscr.chgat(curses.A_REVERSE)

                for i in range(min(curses.LINES - 1, num_process)):
                    items = processes[i + 1].state_str()
                    stdscr.move(i + 1, 0)
                    stdscr.clrtoeol()
                    stdscr.addstr(i + 1, 0, items[0])
                    stdscr.addstr(i + 1, 14, items[1])
                    stdscr.addstr(i + 1, 21, items[2])
                    stdscr.addstr(i + 1, 45, items[3])
                if num_process + 1 <= curses.LINES:
                    stdscr.move(num_process + 1, 0)
                stdscr.refresh()

            print("Printer initialized")
            for n in unterminated_processes:
                processes[n] = ProcessState(n, "-", "Waiting", "-")
            old_lines = curses.LINES
            while len(unterminated_processes) > 0:
                curses.update_lines_cols()
                if curses.LINES != old_lines:
                    plot_window()
                    old_lines = curses.LINES
                while not q.empty():
                    state = q.get()
                    processes[state.id_process] = state
                    if state.activity == "Terminated" or state.activity == "DB error":
                        unterminated_processes.remove(state.id_process)
                    plot_window()

            import os
            from loacore.conf import OUTPUT_PATH
            f = open(os.path.join(OUTPUT_PATH, "result.log"), "w")
            for i in processes.keys():
                items = processes[i].state_str()
                f.write(items[0] + '\t' + items[1] + '\t' + items[2] + '\t' +
                        items[3] + '\t\n')
            f.close()
        except:
            from loacore.conf import OUTPUT_PATH
            file = open(os.path.join(OUTPUT_PATH, "debug_curse.txt"))
            file.close()

Example #4

0

Show file

File: synset_process.py Project: PaulBreugnot/Loacore

def _commit_state(state_queue, id_process, sentence_count, total_sentence):
    if state_queue is not None:
        state_queue.put(
            ProcessState(id_process, os.getpid(), "Synset DB commit...",
                         str(sentence_count) + " / " + str(total_sentence)))
    else:
        print("\r" + str(sentence_count) + " / " + str(total_sentence) +
              " sentences added.",
              end="")

Example #5

0

Show file

def _tokenization_state(state_queue, id_process, review_count, total_review):
    if state_queue is not None:
        state_queue.put(
            ProcessState(id_process, os.getpid(), "Tokenization",
                         str(review_count) + " / " + str(total_review)))
    else:
        print("\r" + str(review_count) + " / " + str(total_review) +
              " reviews processed.",
              end="")

Example #6

0

Show file

File: synset_process.py Project: PaulBreugnot/Loacore

def _commit_polarity_state(state_queue, id_process, sentence_count,
                           total_sentence):
    if state_queue is not None:
        state_queue.put(
            ProcessState(id_process, os.getpid(), "Add polarity to synset",
                         str(sentence_count) + " / " + str(total_sentence)))
    else:
        print("\r" + str(sentence_count) + " / " + str(total_sentence) +
              " polarities added.",
              end="")

Example #7

0

Show file

File: synset_process.py Project: PaulBreugnot/Loacore

def add_synsets_to_sentences(sentences,
                             print_synsets=False,
                             _state_queue=None,
                             _id_process=None,
                             freeling_modules=None):
    """
    Performs a Freeling process to disambiguate words of the sentences according to their context
    (UKB algorithm) linking them to a unique synset (if possible).\n
    Our sentences are converted to Freeling Sentences before processing.\n
    Notice that even if we may have already computed the Lemmas for example, Freeling Sentences generated from our
    sentences are "raw sentences", without any analysis linked to their Words. So we make all the Freeling
    process from scratch every time, except *tokenization* and *sentence splitting*, to avoid any confusion.

    .. note:: This function should be used only inside the file_process.add_files() function.

    :param sentences: Sentences to process
    :type sentences: :obj:`list` of |Sentence|
    :param print_synsets: If True, print disambiguation results
    :type print_synsets: boolean
    """

    from loacore.conf import DB_TIMEOUT
    from loacore.utils.db import safe_commit, safe_execute

    freeling_sentences = [
        sentence.compute_freeling_sentence() for sentence in sentences
    ]

    if freeling_modules is None:
        if _state_queue is not None:
            _state_queue.put(
                ProcessState(_id_process, os.getpid(), "Loading Freeling...",
                             " - "))
        morfo, tagger, sen, wsd = init_freeling()
    else:
        morfo, tagger, sen, wsd = freeling_modules

    _disambiguation_state(_state_queue, _id_process)
    # perform morphosyntactic analysis and disambiguation
    processed_sentences = morfo.analyze(freeling_sentences)
    processed_sentences = tagger.analyze(processed_sentences)
    # annotate and disambiguate senses
    processed_sentences = sen.analyze(processed_sentences)
    processed_sentences = wsd.analyze(processed_sentences)

    # Copy freeling results into our Words
    for s in range(len(sentences)):
        sentence = sentences[s]

        if not len(sentence.words) == len(processed_sentences[s]):
            print("/!\\ Warning, sentence offset error in synset_process /!\\")
            print(sentence.sentence_str())
            print([w.get_form() for w in processed_sentences[s]])

        for w in range(len(sentence.words)):
            word = sentence.words[w]
            rank = processed_sentences[s][w].get_senses()
            if len(rank) > 0:
                if not rank[0][0][0] == '8':
                    # ignore synsets offsets 8.......-.
                    # they are odd synsets that WordNet can't find...
                    word.synset = Synset(None, word.id_word, rank[0][0],
                                         wn.of2ss(rank[0][0]).name(), None,
                                         None, None)
                if print_synsets:
                    print("Word : " + word.word)
                    print("Synset code : " + rank[0][0])
                    print("Synset name : " + wn.of2ss(rank[0][0]).name())

    # Add synsets to database

    conn = sql.connect(DB_PATH, timeout=DB_TIMEOUT)
    c = conn.cursor()

    sentence_count = 0
    total_sentence = len(sentences)
    for sentence in sentences:
        # Print state
        sentence_count += 1
        _commit_state(_state_queue, _id_process, sentence_count,
                      total_sentence)

        for word in sentence.words:
            synset = word.synset

            if synset is not None:
                # Add synset

                safe_execute(
                    c,
                    "INSERT INTO Synset (ID_Word, Synset_Code, Synset_Name) "
                    "VALUES (?, ?, ?)",
                    0,
                    _state_queue,
                    _id_process,
                    mark_args=(word.id_word, synset.synset_code,
                               synset.synset_name))

                # Get back id of last inserted review
                safe_execute(c, "SELECT last_insert_rowid()", 0, _state_queue,
                             _id_process)
                id_synset = c.fetchone()[0]

                # Update Word table
                safe_execute(
                    c, "UPDATE Word SET ID_Synset = " + str(id_synset) +
                    " WHERE ID_Word = " + str(word.id_word), 0, _state_queue,
                    _id_process)

    safe_commit(conn, 0, _state_queue, _id_process)

    conn.close()

Example #8

0

Show file

File: synset_process.py Project: PaulBreugnot/Loacore

def _disambiguation_state(state_queue, id_process):
    if state_queue is not None:
        state_queue.put(
            ProcessState(id_process, os.getpid(), "Disambiguation", "-"))
    else:
        print("Disambiguation", end="\n")

Example #9

0

Show file

File: file_process.py Project: PaulBreugnot/Loacore

def _split_reviews_process(reviews,
                           freeling_modules,
                           _state_queue=None,
                           _id_process=None,
                           interrupt=None):
    try:
        import os
        from loacore.utils.status import ProcessState

        # Tokenization + Add all sentences and all words from all reviews
        import loacore.process.sentence_process as sentence_process
        added_sentences = sentence_process.add_sentences_from_reviews(
            reviews,
            _state_queue=_state_queue,
            _id_process=_id_process,
            freeling_modules=(freeling_modules["morfo"],
                              freeling_modules["tk"], freeling_modules["sp"]))
        print(len(added_sentences))

        # added_sentences = sentence_process.add_sentences_from_reviews(
        #     reviews,
        #     _state_queue=state_queue,
        #     _id_process=id_process)

        # Reload sentences with words
        import loacore.load.sentence_load as sentence_load
        if _state_queue is not None:
            _state_queue.put(
                ProcessState(_id_process, os.getpid(), "Reload Sentences",
                             "-"))
        else:
            print("Reload Sentences...")

        sentences = sentence_load.load_sentences(
            id_sentences=[s.id_sentence for s in added_sentences],
            load_words=True)

        # Some test outputs ############################################
        from loacore.conf import OUTPUT_PATH
        f = open(os.path.join(OUTPUT_PATH, "test_sentence.txt"), 'w')
        f.write(str(len(sentences)) + "\n")
        for s in sentences:
            f.write(str(len(s.words)) + "\t" + s.sentence_str() + "\n")
        f.close()
        #################################################################

        # Lemmatization
        import loacore.process.lemma_process as lemma_process
        lemma_process.add_lemmas_to_sentences(
            sentences,
            _state_queue=_state_queue,
            _id_process=_id_process,
            freeling_modules=freeling_modules["morfo"])

        # lemma_process.add_lemmas_to_sentences(
        #     sentences,
        #     _state_queue=state_queue,
        #     _id_process=id_process)

        # Disambiguation
        import loacore.process.synset_process as synset_process
        synset_process.add_synsets_to_sentences(
            sentences,
            _state_queue=_state_queue,
            _id_process=_id_process,
            freeling_modules=(freeling_modules["morfo"],
                              freeling_modules["tagger"],
                              freeling_modules["sen"],
                              freeling_modules["wsd"]))

        # synset_process.add_synsets_to_sentences(
        #     sentences,
        #     _state_queue=state_queue,
        #     _id_process=id_process)

        # Synset polarities
        id_words = [w.id_word for s in sentences for w in s.words]
        synset_process.add_polarity_to_synsets(id_words,
                                               _state_queue=_state_queue,
                                               _id_process=_id_process)

        # Dep tree
        import loacore.process.deptree_process as deptree_process
        deptree_process.add_dep_tree_from_sentences(
            sentences,
            _state_queue=_state_queue,
            _id_process=_id_process,
            freeling_modules=(freeling_modules["morfo"],
                              freeling_modules["tagger"],
                              freeling_modules["sen"], freeling_modules["wsd"],
                              freeling_modules["parser"]))

        # deptree_process.add_dep_tree_from_sentences(
        #     sentences,
        #     _state_queue=state_queue,
        #     _id_process=id_process)

        if _state_queue is not None:
            _state_queue.put(
                ProcessState(_id_process, os.getpid(), "Terminated", " - "))

    except:
        from loacore.conf import OUTPUT_PATH
        import logging
        import os
        file = open(os.path.join(OUTPUT_PATH, str(os.getpid()) + ".txt"))
        file.close()
        logging.basicConfig(
            filename=os.path.join(OUTPUT_PATH, "error_log.out"))
        logging.exception("Process " + str(os.getpid()) + " interrupted.")
        if interrupt is not None:
            interrupt.put("error")

Example #10

0

Show file

File: deptree_process.py Project: PaulBreugnot/Loacore

def _parsing_state(state_queue, state, id_process):
    if state_queue is not None:
        state_queue.put(ProcessState(id_process, os.getpid(), state, "-"))
    else:
        print(state, end="\n")

Example #11

0

Show file

File: deptree_process.py Project: PaulBreugnot/Loacore

def add_dep_tree_from_sentences(sentences,
                                print_result=False,
                                _state_queue=None,
                                _id_process=None,
                                freeling_modules=None):
    """
    Generates the dependency trees of the specified sentences and add the results to the
    database.\n
    Sentences are firstly converted into "raw" Freeling sentences (without any analysis) and then all the necessary
    Freeling processes are performed.\n
    The PoS_tag of words are also computed and added to the database in this function.\n

    .. note:: This function should be used only inside the :func:`file_process.add_files()` function.

    .. note:: This process can be quite long. (at least a few minutes)

    :param sentences: Sentences to process
    :type sentences: :obj:`list` of |Sentence|
    :param print_result: Print PoS_tags and labels associated to each |Word|
    :type print_result: boolean
    """

    from loacore.utils.db import safe_commit, safe_execute
    from loacore.conf import DB_TIMEOUT

    if freeling_modules is None:
        if _state_queue is not None:
            _state_queue.put(
                ProcessState(_id_process, os.getpid(), "Loading Freeling...",
                             " - "))
        morfo, tagger, sen, wsd, parser = init_freeling()
    else:
        morfo, tagger, sen, wsd, parser = freeling_modules

    freeling_sentences = [
        sentence.compute_freeling_sentence() for sentence in sentences
    ]

    # Print state
    _parsing_state(_state_queue, "DT Tagging...", _id_process)

    # perform morphosyntactic analysis
    processed_sentences = morfo.analyze(freeling_sentences)
    processed_sentences = tagger.analyze(processed_sentences)

    # Print state
    _parsing_state(_state_queue, "DT Disambiguation...", _id_process)

    # annotate and disambiguate senses
    processed_sentences = sen.analyze(processed_sentences)
    processed_sentences = wsd.analyze(processed_sentences)

    # Print state
    _parsing_state(_state_queue, "Dep Tree Parsing...", _id_process)
    # Dependency tree parsing
    processed_sentences = parser.analyze(processed_sentences)

    conn = sql.connect(DB_PATH, timeout=DB_TIMEOUT)
    c = conn.cursor()

    sentence_count = 0
    total_sentence = len(sentences)
    for s in range(len(sentences)):
        # Print State
        sentence_count += 1
        _commit_state(_state_queue, _id_process, sentence_count,
                      total_sentence)

        sentence = sentences[s]

        # Add dep_tree to database
        dt = processed_sentences[s].get_dep_tree()
        dep_tree = DepTree(None, None, sentence.id_sentence)

        safe_execute(c,
                     "INSERT INTO Dep_Tree (ID_Sentence) VALUES (?)",
                     0,
                     _state_queue,
                     _id_process,
                     mark_args=[dep_tree.id_sentence])

        # Get back id_dep_tree
        safe_execute(c, "SELECT last_insert_rowid()", 0, _state_queue,
                     _id_process)

        id_dep_tree = c.fetchone()[0]
        dep_tree.id_dep_tree = id_dep_tree

        # Database process
        root = None
        if not len(sentence.words) == len(processed_sentences[s]):
            print(
                "/!\\ Warning, sentence offset error in deptree_process /!\\")
            print(sentence.sentence_str())
            print([w.get_form() for w in processed_sentences[s]])

        for w in range(len(sentence.words)):
            word = sentence.words[w]
            rank = processed_sentences[s][w].get_senses()
            if len(rank) > 0:
                word.PoS_tag = processed_sentences[s][w].get_tag()
                if print_result:
                    print("Word : " + word.word)
                    print("PoS_tag : " + processed_sentences[s][w].get_tag())
                    print("Label : " + dt.get_node_by_pos(w).get_label())

            # We use the get_node_by_pos function to map the tree to our sentence
            node = dt.get_node_by_pos(w)

            dep_tree_node = DepTreeNode(None, id_dep_tree, word.id_word,
                                        node.get_label(), 0)
            if node == dt.begin():
                dep_tree_node.root = 1
                root = dep_tree_node

            # Add DepTreeNode to database
            safe_execute(
                c,
                "INSERT INTO Dep_Tree_Node (ID_Dep_Tree, ID_Word, Label, root) "
                "VALUES (?, ?, ?, ?)",
                0,
                _state_queue,
                _id_process,
                mark_args=(dep_tree_node.id_dep_tree, dep_tree_node.id_word,
                           dep_tree_node.label, dep_tree_node.root))

            # Get back id_dep_tree_node
            safe_execute(c, "SELECT last_insert_rowid()", 0, _state_queue,
                         _id_process)

            id_dep_tree_node = c.fetchone()[0]

            dep_tree_node.id_dep_tree_node = id_dep_tree_node

            # Use the freeling set_node_id function to store our db node id in the freeling node
            node.set_node_id(str(id_dep_tree_node))

            # Add PoS_tag to Word
            if word.PoS_tag is not None:
                safe_execute(
                    c, "UPDATE Word SET PoS_tag = '" + word.PoS_tag + "' "
                    "WHERE ID_Word = " + str(word.id_word), 0, _state_queue,
                    _id_process)

        # Add dep_tree root to database
        dep_tree.root = root
        safe_execute(
            c, "UPDATE Dep_Tree SET ID_Dep_Tree_Node = " +
            str(root.id_dep_tree_node) + " "
            "WHERE ID_Dep_Tree = " + str(id_dep_tree), 0, _state_queue,
            _id_process)

        # Add children relations
        root_node = dt.begin()
        _rec_children(c, root_node, _state_queue, _id_process)

    if _state_queue is None:
        print("")

    safe_commit(conn, 0, _state_queue, _id_process)

    conn.close()

Example #12

0

Show file

def add_sentences_from_reviews(reviews,
                               _state_queue=None,
                               _id_process=None,
                               freeling_modules=None):
    """

    Performs the first Freeling process applied to each normalized review.\n
    Each review is tokenized, and then splitted into sentences, thanks to corresponding Freeling modules.\n
    A representation of the Sentences and their Words (tokens) are then added to corresponding tables.

    .. note:: This function should be used only inside the :func:`file_process.add_files()` function.

    :param reviews: Reviews to process
    :type reviews: :obj:`list` of |Review|
    :return: added sentences
    :rtype: :obj:`list` of |Sentence|
    """
    from loacore.classes.classes import Word
    from loacore.utils.db import safe_commit, safe_execute
    from loacore.conf import DB_TIMEOUT

    if freeling_modules is None:
        if _state_queue is not None:
            _state_queue.put(
                ProcessState(_id_process, os.getpid(), "Loading Freeling...",
                             " - "))
        morfo, tk, sp = init_freeling()
    else:
        morfo, tk, sp = freeling_modules

    conn = sql.connect(DB_PATH, timeout=DB_TIMEOUT)
    c = conn.cursor()

    added_sentences = []
    review_count = 0
    try:
        total_review = len(reviews)
    except TypeError:
        # Review is a ReviewIterator, unkown length.
        total_review = " - "

    for review in reviews:

        # Print state
        review_count += 1
        _tokenization_state(_state_queue, _id_process, review_count,
                            total_review)

        raw_review = review.review
        tokens = tk.tokenize(raw_review)
        sentences = sp.split(tokens)
        sentences = morfo.analyze(sentences)

        review_index = 0

        for sentence in sentences:

            if len(sentence) <= 50:
                review_sentence = Sentence(None, review.id_review,
                                           review_index, None)

                review_index += 1

                # Add words
                sentence_index = 0
                for word in sentence:
                    review_sentence.words.append(
                        Word(None, None, sentence_index, word.get_form(), None,
                             None, None))
                    sentence_index += 1

                review.sentences.append(review_sentence)

    sentence_count = 0
    total_sentence = len([s for r in reviews for s in r.sentences])
    for r in reviews:
        for s in r.sentences:

            # Print state
            sentence_count += 1
            _commit_state(_state_queue, _id_process, sentence_count,
                          total_sentence)

            # Add sentence
            safe_execute(c, "INSERT INTO Sentence (ID_Review, Review_Index) "
                         "VALUES (?, ?)",
                         0,
                         _state_queue,
                         _id_process,
                         mark_args=(s.id_review, s.review_index))

            # Get back id of last inserted sentence
            safe_execute(c, "SELECT last_insert_rowid()", 0, _state_queue,
                         _id_process)
            id_sentence = c.fetchone()[0]
            s.id_sentence = id_sentence

            sql_words = []
            for w in s.words:
                w.id_sentence = id_sentence
                sql_words.append((id_sentence, w.sentence_index, w.word))
                safe_execute(
                    c,
                    "INSERT INTO Word (ID_Sentence, Sentence_Index, word) VALUES (?, ?, ?)",
                    0,
                    _state_queue,
                    _id_process,
                    mark_args=sql_words,
                    execute_many=True)
            added_sentences.append(s)

    if _state_queue is None:
        print("")

    safe_commit(conn, 0, _state_queue, _id_process)

    conn.close()

    return added_sentences

Example #13

0

Show file

File: lemma_process.py Project: PaulBreugnot/Loacore

def add_lemmas_to_sentences(sentences,
                            print_lemmas=False,
                            _state_queue=None,
                            _id_process=None,
                            freeling_modules=None):
    """

    Performs a Freeling process to add lemmas to words.\n
    However, the argument is actually a sentence to better fit Freeling usage.\n
    Our sentences will be converted to a Freeling Sentences before processing.

    .. note:: This function should be used only inside the :func:`file_process.add_files()` function.

    :param sentences: Sentences to process
    :type sentences: :obj:`list` of |Sentence|
    :param print_lemmas: If True, print lemmatization results
    :type print_lemmas: boolean
    """
    from loacore.utils.db import safe_commit, safe_execute
    from loacore.conf import DB_TIMEOUT

    freeling_sentences = [
        sentence.compute_freeling_sentence() for sentence in sentences
    ]

    if freeling_modules is None:
        if _state_queue is not None:
            _state_queue.put(
                ProcessState(_id_process, os.getpid(), "Loading Freeling...",
                             " - "))
        morfo = init_freeling()
    else:
        morfo = freeling_modules

    # Print sentence
    _lemmatization_state(_state_queue, _id_process)

    processed_sentences = morfo.analyze(freeling_sentences)

    # Copy freeling results into our Words
    for s in range(len(sentences)):
        sentence = sentences[s]

        if not len(sentence.words) == len(processed_sentences[s]):
            print("/!\\ Warning, sentence offset error in lemma_process /!\\")
            print(sentence.sentence_str())
            print([w.get_form() for w in processed_sentences[s]])

        for w in range(len(sentence.words)):
            word = sentence.words[w]
            word.lemma = processed_sentences[s][w].get_lemma()
            if print_lemmas:
                print(word.word + " : " + word.lemma)

    # Add lemmas to database
    conn = sql.connect(DB_PATH, timeout=DB_TIMEOUT)
    c = conn.cursor()

    sentence_count = 0
    total_sentence = len(sentences)
    _commit_state(_state_queue, _id_process, " - ", " - ")
    for sentence in sentences:
        # Print state
        sentence_count += 1
        _commit_state(_state_queue, _id_process, sentence_count,
                      total_sentence)

        for word in sentence.words:
            # Add Lemma to Lemma Table
            safe_execute(c,
                         "INSERT INTO Lemma (Lemma, ID_Word) VALUES (?, ?)",
                         0,
                         _state_queue,
                         _id_process,
                         mark_args=(word.lemma, word.id_word))

            # Get back id of last inserted lemma
            safe_execute(c, "SELECT last_insert_rowid()", 0, _state_queue,
                         _id_process)
            id_lemma = c.fetchone()[0]

            # Update Word table
            safe_execute(
                c, "UPDATE Word SET ID_Lemma = " + str(id_lemma) +
                " WHERE ID_Word = " + str(word.id_word), 0, _state_queue,
                _id_process)

    if _state_queue is None:
        print("")
    safe_commit(conn, 0, _state_queue, _id_process)

    conn.close()

Example #14

0

Show file

File: lemma_process.py Project: PaulBreugnot/Loacore

def _lemmatization_state(state_queue, id_process):
    if state_queue is not None:
        state_queue.put(
            ProcessState(id_process, os.getpid(), "Lemmatization", "-"))
    else:
        print("Lemmatization", end="\n")