Esempio n. 1
0
    def process_raw_data_literal(self, parser_name, strings, correct_patterns):
        """
        This function processes the raw data and turns it into useful information.
        That data provides useful statistics about the library's current "health".
        The function returns the overall score for the data, and specific statistics of the data.
        """

        regex = regex4dummies()

        # Printing the semantic patterns within this string
        test_patterns = regex.compare_strings(parser=parser_name, pattern_detection="literal", text=strings)

        # Compare test_patterns to correct patterns ( patterns in the "golden standard" )
        score = 0.00
        score_info = ""
        for compare_index in range(0, len(test_patterns)):
            for correct_pattern in correct_patterns:
                if test_patterns[compare_index][2] == correct_pattern[2]:
                    score += 100.00 / len(correct_patterns)

                    break

        score_info += "Test pattern: " + str(test_patterns) + "\n\n" + "Correct pattern: " + str(correct_patterns) + "\n\n"

        # Returning the final score and all other relevant information
        return score, score_info
Esempio n. 2
0
    def generate_parsed_text( self ):
        while True:
            input_text = self.input_text.get( "1.0", tk.END )
            output_text = self.output_text.get( "1.0", tk.END )

            if str( input_text ) != "Enter text to parse\n":
                input_sentences = TextBlob( input_text ).sentences
                sentences = []

                for sentence in input_sentences:
                    sentences.append( str( sentence ) )

                regex = regex4dummies()
                final_literal_text = regex.compare_strings( 'pattern', True, sentences )
                final_literal_information = ""

                sentence_information = regex.get_sentence_information()
                for sentence in sentence_information:
                    final_literal_information = "[ Pattern ]          : " + sentence.pattern
                    final_literal_information = "[ Subject ]          : " + sentence.subject + "\n"
                    final_literal_information = "[ Verb ]             : " + sentence.verb + "\n"
                    final_literal_information = "[ Object ]           : " + sentence.object[0] + "\n"
                    final_literal_information = "[ Reliability Score ]: " + str( sentence.reliability_score ) + "\n"

                final_semantic_text = regex.compare_strings( False, sentences )
                final_semantic_information = ""

                sentence_information = regex.get_sentence_information()
                for sentence in sentence_information:
                    final_semantic_information = "[ Pattern ]          : " + sentence.pattern
                    final_semantic_information = "[ Subject ]          : " + sentence.subject + "\n"
                    final_semantic_information = "[ Verb ]             : " + sentence.verb + "\n"
                    final_semantic_information = "[ Object ]           : " + sentence.object[0] + "\n"
                    final_semantic_information = "[ Reliability Score ]: " + str( sentence.reliability_score ) + "\n"

                final_text = "Literal Parse:\n" + str( final_literal_text ) + "\n\nInformation:\n" + final_literal_information + "\n---------------------------------\n\n" + "Semantic Parse:\n" + str( final_semantic_text ) + "\n\nInformation:\n" + final_semantic_information

                self.output_text['state'] = 'normal'
                self.output_text.delete( "1.0", tk.END )
                self.output_text.insert( "1.0", final_text )
                self.output_text['state'] = 'disabled'
            elif output_text != "":
                self.output_text['state'] = 'normal'
                self.output_text.delete( "1.0", tk.END )
                self.output_text['state'] = 'disabled'

            time.sleep( 1 )
Esempio n. 3
0
    def process_topics(self, strings, correct_topics):
        """
        process_topics finds the topics of the given strings and returns the topic score.
        """

        regex = regex4dummies()

        # Identifying topics discovered by the parsers in the most recently compared set of strings
        test_topics = regex.get_topics(text=strings)

        # Comparing topics & Generating topic score
        topic_score = 0
        for topic in correct_topics:
            for test_topic in test_topics:
                if topic == test_topic[0]:
                    topic_score += 100 / len(correct_topics)

                    break

        # Creating the information surrounding the test used to debug regex4dummies
        topic_score_info = "Topics Identified: " + str(test_topics) + "\n\n" + "Correct Topics: " + str(correct_topics)

        # Returning the final information
        return topic_score, topic_score_info
Esempio n. 4
0
# Importing regex4dummies
from regex4dummies import regex4dummies
from toolkit import Toolkit
from tests import MainTests

# Running tests
tester = MainTests()
tester.run_tests(regex4dummies(), Toolkit())
Esempio n. 5
0
    def generate_parsed_text(self):
        while True:
            input_text = self.input_text.get("1.0", tk.END)
            output_text = self.output_text.get("1.0", tk.END)

            if str(input_text) != "Enter text to parse\n":
                input_sentences = TextBlob(input_text).sentences
                sentences = []

                for sentence in input_sentences:
                    sentences.append(str(sentence))

                regex = regex4dummies()

                # Getting tokenized string and dependencies ( if library is up to date )
                if regex.__version__ == "1.4.5":
                    final_literal_text = regex.compare_strings(parser='default', pattern_detection="literal", text=sentences)
                    final_literal_information = ""

                    # Getting parsed data to display
                    sentence_information = regex.get_pattern_information()
                    for sentence in sentence_information:
                        final_literal_information = "[ Pattern ]          : " + sentence.pattern
                        final_literal_information = "[ Subject ]          : " + sentence.subject + "\n"
                        final_literal_information = "[ Verb ]             : " + sentence.verb + "\n"
                        final_literal_information = "[ Object ]           : " + sentence.object[0] + "\n"
                        final_literal_information = "[ Reliability Score ]: " + str(sentence.reliability_score) + "\n"

                    final_semantic_text = regex.compare_strings(parser='', pattern_detection="semantic", text=sentences)
                    final_semantic_information = ""

                    sentence_information = regex.get_pattern_information()
                    for sentence in sentence_information:
                        final_semantic_information = "[ Pattern ]          : " + sentence.pattern
                        final_semantic_information = "[ Subject ]          : " + sentence.subject + "\n"
                        final_semantic_information = "[ Verb ]             : " + sentence.verb + "\n"
                        final_semantic_information = "[ Object ]           : " + sentence.object[0] + "\n"
                        final_semantic_information = "[ Reliability Score ]: " + str(sentence.reliability_score) + "\n"

                    # Getting possible topics for semantic information
                    topics = regex.get_topics(text=sentences)

                    # Instantiating toolkit object
                    gui_toolkit = Toolkit()

                    # Getting the tokenized input
                    tokenized_input = gui_toolkit.tokenize(text=input_text, parser="pattern")

                    # Getting the dependency input
                    dependency_input = gui_toolkit.find_dependencies(text=input_text, parser="pattern")

                    # Preparing final display data
                    final_text = "Literal Parse:\n" + str(final_literal_text) + "\n\nInformation:\n" + final_literal_information + "\n---------------------------------\n\n" + "Semantic Parse:\n" + str(final_semantic_text) + "\n\nInformation:\n" + final_semantic_information + "\n\nTopics:\n" + str(topics) + "\n\nTokenized input:\n" + str(tokenized_input) + "\n\nDependencies of input:\n" + str(dependency_input)
                else:
                    final_literal_text = regex.compare_strings('default', True, sentences)
                    final_literal_information = ""

                    # Getting parsed data to display
                    sentence_information = regex.get_sentence_information()
                    for sentence in sentence_information:
                        final_literal_information = "[ Pattern ]          : " + sentence.pattern
                        final_literal_information = "[ Subject ]          : " + sentence.subject + "\n"
                        final_literal_information = "[ Verb ]             : " + sentence.verb + "\n"
                        final_literal_information = "[ Object ]           : " + sentence.object[0] + "\n"
                        final_literal_information = "[ Reliability Score ]: " + str(sentence.reliability_score) + "\n"

                    final_semantic_text = regex.compare_strings('', False, sentences)
                    final_semantic_information = ""

                    sentence_information = regex.get_sentence_information()
                    for sentence in sentence_information:
                        final_semantic_information = "[ Pattern ]          : " + sentence.pattern
                        final_semantic_information = "[ Subject ]          : " + sentence.subject + "\n"
                        final_semantic_information = "[ Verb ]             : " + sentence.verb + "\n"
                        final_semantic_information = "[ Object ]           : " + sentence.object[0] + "\n"
                        final_semantic_information = "[ Reliability Score ]: " + str(sentence.reliability_score) + "\n"

                    # Getting possible topics for semantic information
                    topics = regex.get_pattern_topics()

                    # Preparing final display data
                    final_text = "Literal Parse:\n" + str(final_literal_text) + "\n\nInformation:\n" + final_literal_information + "\n---------------------------------\n\n" + "Semantic Parse:\n" + str(final_semantic_text) + "\n\nInformation:\n" + final_semantic_information + "\n\nTopics:\n" + str(topics)

                # Displaying final data
                self.output_text['state'] = 'normal'
                self.output_text.delete("1.0", tk.END)
                self.output_text.insert("1.0", final_text)
                self.output_text['state'] = 'disabled'
            elif output_text != "":
                self.output_text['state'] = 'normal'
                self.output_text.delete("1.0", tk.END)
                self.output_text['state'] = 'disabled'

            time.sleep(1)
Esempio n. 6
0

"""
Test information:

    Tests:
    - nltk parser
    - literal parser
    - pattern parser
    - nlpnet parser

Version: 1.4.6
"""

# Creating test object
regex = regex4dummies()
tool_tester = Toolkit()

# Testing version variable
print(regex.__version__)

# Testing the 'nltk' parser
print(regex.compare_strings(parser='nltk', pattern_detection="semantic", text=["Back at my desk, I poured and killed him a rattlesnake and some more rattlesnake", "the cat and the mouse in the house is sitting, in the house, on the mat", "time is it?", "what time is it here?", "This is the cat's hat"]))
sentence_information = regex.get_pattern_information()
for sentence in sentence_information:
    print "[ Pattern ]          : " + sentence.pattern
    print "[ Subject ]          : " + sentence.subject
    print "[ Verb ]             : " + sentence.verb
    print "[ Object ]           : " + sentence.object[0]
    print "[ Reliability Score ]: " + str(sentence.reliability_score)
Esempio n. 7
0
    def process_raw_data_semantic(self, parser_name, information, correct_topics):
        """
        This function processes the raw data and turns it into useful information.
        That data provides useful statistics about the library's current "health".
        The function returns the overall score for the data, and specific statistics of the data.
        """

        regex = regex4dummies()

        # Getting the semantic patterns within the test strings
        regex.compare_strings(parser=parser_name, pattern_detection="semantic", text=information[0])

        # Gathering and comparing the "meaning" in these sentences
        test_information = {}
        sentence_information = regex.get_pattern_information()
        for sentence in sentence_information:
            # Getting information from the test sentence
            test_pattern = sentence.pattern
            test_subject = sentence.subject
            test_verb = sentence.verb
            test_object = sentence.object[0]
            test_prepositional_phrases = sentence.prepositional_phrases
            test_reliability_score = sentence.reliability_score
            test_applicability_score = sentence.applicability_score

            test_information[test_pattern] = [test_subject, test_verb, test_object, test_prepositional_phrases, test_reliability_score, test_applicability_score]

        compatibility_score = 0.00
        compare_index = 0
        for pattern in information[1]:
            base_pattern = pattern
            base_subject = information[1][base_pattern][0]
            base_verb = information[1][base_pattern][1]
            base_object = information[1][base_pattern][2]
            base_prepositional_phrases = information[1][base_pattern][3]
            base_reliability_score = information[1][base_pattern][4]
            base_applicability_score = information[1][base_pattern][5]

            try:
                test_pattern = base_pattern
                test_subject = test_information[test_pattern][0]
                test_verb = test_information[test_pattern][1]
                test_object = test_information[test_pattern][2]
                test_prepositional_phrases = test_information[test_pattern][3]
                test_reliability_score = test_information[test_pattern][4]
                test_applicability_score = test_information[test_pattern][5]

                if test_subject == base_subject:
                    compatibility_score += 100 / 6
                if test_verb == base_verb:
                    compatibility_score += 100 / 6
                if test_object == base_object:
                    compatibility_score += 100 / 6
                if test_prepositional_phrases == base_prepositional_phrases:
                    compatibility_score += 100 / 6
                if test_reliability_score == base_reliability_score:
                    compatibility_score += 100 / 6
                if test_applicability_score == base_applicability_score:
                    compatibility_score += 100 / 6
            except:
                compare_index += 1

                # The pattern was not in the gathered data. The score will not have anything additional added to it, and the loop will continue
                continue

            # Comparing and generating score
            compare_index += 1

        # Generating the semantic score
        if compare_index != 0:
            compatibility_score = compatibility_score / compare_index
        else:
            compatibility_score = 100

        compatibility_score_info = str(test_information) + "\n\n"
        compatibility_score_info += str(information[1])

        # Returning the final score and all other relevant information
        return compatibility_score, compatibility_score_info
Esempio n. 8
0
from regex4dummies import regex4dummies
from regex4dummies import Toolkit
"""
Test information:

    Tests:
    - nltk parser
    - literal parser
    - pattern parser
    - nlpnet parser

Version: 1.4.6
"""

# Creating test object
regex = regex4dummies()
tool_tester = Toolkit()

# Testing version variable
print(regex.__version__)

# Testing the 'nltk' parser
print(
    regex.compare_strings(
        parser='nltk',
        pattern_detection="semantic",
        text=[
            "Back at my desk, I poured and killed him a rattlesnake and some more rattlesnake",
            "the cat and the mouse in the house is sitting, in the house, on the mat",
            "time is it?", "what time is it here?", "This is the cat's hat"
        ]))
Esempio n. 9
0
    def generate_parsed_text(self):
        while True:
            input_text = self.input_text.get("1.0", tk.END)
            output_text = self.output_text.get("1.0", tk.END)

            if str(input_text) != "Enter text to parse\n":
                input_sentences = TextBlob(input_text).sentences
                sentences = []

                for sentence in input_sentences:
                    sentences.append(str(sentence))

                regex = regex4dummies()

                # Getting tokenized string and dependencies ( if library is up to date )
                if regex.__version__ == "1.4.5":
                    final_literal_text = regex.compare_strings(
                        parser='default',
                        pattern_detection="literal",
                        text=sentences)
                    final_literal_information = ""

                    # Getting parsed data to display
                    sentence_information = regex.get_pattern_information()
                    for sentence in sentence_information:
                        final_literal_information = "[ Pattern ]          : " + sentence.pattern
                        final_literal_information = "[ Subject ]          : " + sentence.subject + "\n"
                        final_literal_information = "[ Verb ]             : " + sentence.verb + "\n"
                        final_literal_information = "[ Object ]           : " + sentence.object[
                            0] + "\n"
                        final_literal_information = "[ Reliability Score ]: " + str(
                            sentence.reliability_score) + "\n"

                    final_semantic_text = regex.compare_strings(
                        parser='',
                        pattern_detection="semantic",
                        text=sentences)
                    final_semantic_information = ""

                    sentence_information = regex.get_pattern_information()
                    for sentence in sentence_information:
                        final_semantic_information = "[ Pattern ]          : " + sentence.pattern
                        final_semantic_information = "[ Subject ]          : " + sentence.subject + "\n"
                        final_semantic_information = "[ Verb ]             : " + sentence.verb + "\n"
                        final_semantic_information = "[ Object ]           : " + sentence.object[
                            0] + "\n"
                        final_semantic_information = "[ Reliability Score ]: " + str(
                            sentence.reliability_score) + "\n"

                    # Getting possible topics for semantic information
                    topics = regex.get_topics(text=sentences)

                    # Instantiating toolkit object
                    gui_toolkit = Toolkit()

                    # Getting the tokenized input
                    tokenized_input = gui_toolkit.tokenize(text=input_text,
                                                           parser="pattern")

                    # Getting the dependency input
                    dependency_input = gui_toolkit.find_dependencies(
                        text=input_text, parser="pattern")

                    # Preparing final display data
                    final_text = "Literal Parse:\n" + str(
                        final_literal_text
                    ) + "\n\nInformation:\n" + final_literal_information + "\n---------------------------------\n\n" + "Semantic Parse:\n" + str(
                        final_semantic_text
                    ) + "\n\nInformation:\n" + final_semantic_information + "\n\nTopics:\n" + str(
                        topics) + "\n\nTokenized input:\n" + str(
                            tokenized_input
                        ) + "\n\nDependencies of input:\n" + str(
                            dependency_input)
                else:
                    final_literal_text = regex.compare_strings(
                        'default', True, sentences)
                    final_literal_information = ""

                    # Getting parsed data to display
                    sentence_information = regex.get_sentence_information()
                    for sentence in sentence_information:
                        final_literal_information = "[ Pattern ]          : " + sentence.pattern
                        final_literal_information = "[ Subject ]          : " + sentence.subject + "\n"
                        final_literal_information = "[ Verb ]             : " + sentence.verb + "\n"
                        final_literal_information = "[ Object ]           : " + sentence.object[
                            0] + "\n"
                        final_literal_information = "[ Reliability Score ]: " + str(
                            sentence.reliability_score) + "\n"

                    final_semantic_text = regex.compare_strings(
                        '', False, sentences)
                    final_semantic_information = ""

                    sentence_information = regex.get_sentence_information()
                    for sentence in sentence_information:
                        final_semantic_information = "[ Pattern ]          : " + sentence.pattern
                        final_semantic_information = "[ Subject ]          : " + sentence.subject + "\n"
                        final_semantic_information = "[ Verb ]             : " + sentence.verb + "\n"
                        final_semantic_information = "[ Object ]           : " + sentence.object[
                            0] + "\n"
                        final_semantic_information = "[ Reliability Score ]: " + str(
                            sentence.reliability_score) + "\n"

                    # Getting possible topics for semantic information
                    topics = regex.get_pattern_topics()

                    # Preparing final display data
                    final_text = "Literal Parse:\n" + str(
                        final_literal_text
                    ) + "\n\nInformation:\n" + final_literal_information + "\n---------------------------------\n\n" + "Semantic Parse:\n" + str(
                        final_semantic_text
                    ) + "\n\nInformation:\n" + final_semantic_information + "\n\nTopics:\n" + str(
                        topics)

                # Displaying final data
                self.output_text['state'] = 'normal'
                self.output_text.delete("1.0", tk.END)
                self.output_text.insert("1.0", final_text)
                self.output_text['state'] = 'disabled'
            elif output_text != "":
                self.output_text['state'] = 'normal'
                self.output_text.delete("1.0", tk.END)
                self.output_text['state'] = 'disabled'

            time.sleep(1)