def process_raw_data_literal(self, parser_name, strings, correct_patterns): """ This function processes the raw data and turns it into useful information. That data provides useful statistics about the library's current "health". The function returns the overall score for the data, and specific statistics of the data. """ regex = regex4dummies() # Printing the semantic patterns within this string test_patterns = regex.compare_strings(parser=parser_name, pattern_detection="literal", text=strings) # Compare test_patterns to correct patterns ( patterns in the "golden standard" ) score = 0.00 score_info = "" for compare_index in range(0, len(test_patterns)): for correct_pattern in correct_patterns: if test_patterns[compare_index][2] == correct_pattern[2]: score += 100.00 / len(correct_patterns) break score_info += "Test pattern: " + str(test_patterns) + "\n\n" + "Correct pattern: " + str(correct_patterns) + "\n\n" # Returning the final score and all other relevant information return score, score_info
def generate_parsed_text( self ): while True: input_text = self.input_text.get( "1.0", tk.END ) output_text = self.output_text.get( "1.0", tk.END ) if str( input_text ) != "Enter text to parse\n": input_sentences = TextBlob( input_text ).sentences sentences = [] for sentence in input_sentences: sentences.append( str( sentence ) ) regex = regex4dummies() final_literal_text = regex.compare_strings( 'pattern', True, sentences ) final_literal_information = "" sentence_information = regex.get_sentence_information() for sentence in sentence_information: final_literal_information = "[ Pattern ] : " + sentence.pattern final_literal_information = "[ Subject ] : " + sentence.subject + "\n" final_literal_information = "[ Verb ] : " + sentence.verb + "\n" final_literal_information = "[ Object ] : " + sentence.object[0] + "\n" final_literal_information = "[ Reliability Score ]: " + str( sentence.reliability_score ) + "\n" final_semantic_text = regex.compare_strings( False, sentences ) final_semantic_information = "" sentence_information = regex.get_sentence_information() for sentence in sentence_information: final_semantic_information = "[ Pattern ] : " + sentence.pattern final_semantic_information = "[ Subject ] : " + sentence.subject + "\n" final_semantic_information = "[ Verb ] : " + sentence.verb + "\n" final_semantic_information = "[ Object ] : " + sentence.object[0] + "\n" final_semantic_information = "[ Reliability Score ]: " + str( sentence.reliability_score ) + "\n" final_text = "Literal Parse:\n" + str( final_literal_text ) + "\n\nInformation:\n" + final_literal_information + "\n---------------------------------\n\n" + "Semantic Parse:\n" + str( final_semantic_text ) + "\n\nInformation:\n" + final_semantic_information self.output_text['state'] = 'normal' self.output_text.delete( "1.0", tk.END ) self.output_text.insert( "1.0", final_text ) self.output_text['state'] = 'disabled' elif output_text != "": self.output_text['state'] = 'normal' self.output_text.delete( "1.0", tk.END ) self.output_text['state'] = 'disabled' time.sleep( 1 )
def process_topics(self, strings, correct_topics): """ process_topics finds the topics of the given strings and returns the topic score. """ regex = regex4dummies() # Identifying topics discovered by the parsers in the most recently compared set of strings test_topics = regex.get_topics(text=strings) # Comparing topics & Generating topic score topic_score = 0 for topic in correct_topics: for test_topic in test_topics: if topic == test_topic[0]: topic_score += 100 / len(correct_topics) break # Creating the information surrounding the test used to debug regex4dummies topic_score_info = "Topics Identified: " + str(test_topics) + "\n\n" + "Correct Topics: " + str(correct_topics) # Returning the final information return topic_score, topic_score_info
# Importing regex4dummies from regex4dummies import regex4dummies from toolkit import Toolkit from tests import MainTests # Running tests tester = MainTests() tester.run_tests(regex4dummies(), Toolkit())
def generate_parsed_text(self): while True: input_text = self.input_text.get("1.0", tk.END) output_text = self.output_text.get("1.0", tk.END) if str(input_text) != "Enter text to parse\n": input_sentences = TextBlob(input_text).sentences sentences = [] for sentence in input_sentences: sentences.append(str(sentence)) regex = regex4dummies() # Getting tokenized string and dependencies ( if library is up to date ) if regex.__version__ == "1.4.5": final_literal_text = regex.compare_strings(parser='default', pattern_detection="literal", text=sentences) final_literal_information = "" # Getting parsed data to display sentence_information = regex.get_pattern_information() for sentence in sentence_information: final_literal_information = "[ Pattern ] : " + sentence.pattern final_literal_information = "[ Subject ] : " + sentence.subject + "\n" final_literal_information = "[ Verb ] : " + sentence.verb + "\n" final_literal_information = "[ Object ] : " + sentence.object[0] + "\n" final_literal_information = "[ Reliability Score ]: " + str(sentence.reliability_score) + "\n" final_semantic_text = regex.compare_strings(parser='', pattern_detection="semantic", text=sentences) final_semantic_information = "" sentence_information = regex.get_pattern_information() for sentence in sentence_information: final_semantic_information = "[ Pattern ] : " + sentence.pattern final_semantic_information = "[ Subject ] : " + sentence.subject + "\n" final_semantic_information = "[ Verb ] : " + sentence.verb + "\n" final_semantic_information = "[ Object ] : " + sentence.object[0] + "\n" final_semantic_information = "[ Reliability Score ]: " + str(sentence.reliability_score) + "\n" # Getting possible topics for semantic information topics = regex.get_topics(text=sentences) # Instantiating toolkit object gui_toolkit = Toolkit() # Getting the tokenized input tokenized_input = gui_toolkit.tokenize(text=input_text, parser="pattern") # Getting the dependency input dependency_input = gui_toolkit.find_dependencies(text=input_text, parser="pattern") # Preparing final display data final_text = "Literal Parse:\n" + str(final_literal_text) + "\n\nInformation:\n" + final_literal_information + "\n---------------------------------\n\n" + "Semantic Parse:\n" + str(final_semantic_text) + "\n\nInformation:\n" + final_semantic_information + "\n\nTopics:\n" + str(topics) + "\n\nTokenized input:\n" + str(tokenized_input) + "\n\nDependencies of input:\n" + str(dependency_input) else: final_literal_text = regex.compare_strings('default', True, sentences) final_literal_information = "" # Getting parsed data to display sentence_information = regex.get_sentence_information() for sentence in sentence_information: final_literal_information = "[ Pattern ] : " + sentence.pattern final_literal_information = "[ Subject ] : " + sentence.subject + "\n" final_literal_information = "[ Verb ] : " + sentence.verb + "\n" final_literal_information = "[ Object ] : " + sentence.object[0] + "\n" final_literal_information = "[ Reliability Score ]: " + str(sentence.reliability_score) + "\n" final_semantic_text = regex.compare_strings('', False, sentences) final_semantic_information = "" sentence_information = regex.get_sentence_information() for sentence in sentence_information: final_semantic_information = "[ Pattern ] : " + sentence.pattern final_semantic_information = "[ Subject ] : " + sentence.subject + "\n" final_semantic_information = "[ Verb ] : " + sentence.verb + "\n" final_semantic_information = "[ Object ] : " + sentence.object[0] + "\n" final_semantic_information = "[ Reliability Score ]: " + str(sentence.reliability_score) + "\n" # Getting possible topics for semantic information topics = regex.get_pattern_topics() # Preparing final display data final_text = "Literal Parse:\n" + str(final_literal_text) + "\n\nInformation:\n" + final_literal_information + "\n---------------------------------\n\n" + "Semantic Parse:\n" + str(final_semantic_text) + "\n\nInformation:\n" + final_semantic_information + "\n\nTopics:\n" + str(topics) # Displaying final data self.output_text['state'] = 'normal' self.output_text.delete("1.0", tk.END) self.output_text.insert("1.0", final_text) self.output_text['state'] = 'disabled' elif output_text != "": self.output_text['state'] = 'normal' self.output_text.delete("1.0", tk.END) self.output_text['state'] = 'disabled' time.sleep(1)
""" Test information: Tests: - nltk parser - literal parser - pattern parser - nlpnet parser Version: 1.4.6 """ # Creating test object regex = regex4dummies() tool_tester = Toolkit() # Testing version variable print(regex.__version__) # Testing the 'nltk' parser print(regex.compare_strings(parser='nltk', pattern_detection="semantic", text=["Back at my desk, I poured and killed him a rattlesnake and some more rattlesnake", "the cat and the mouse in the house is sitting, in the house, on the mat", "time is it?", "what time is it here?", "This is the cat's hat"])) sentence_information = regex.get_pattern_information() for sentence in sentence_information: print "[ Pattern ] : " + sentence.pattern print "[ Subject ] : " + sentence.subject print "[ Verb ] : " + sentence.verb print "[ Object ] : " + sentence.object[0] print "[ Reliability Score ]: " + str(sentence.reliability_score)
def process_raw_data_semantic(self, parser_name, information, correct_topics): """ This function processes the raw data and turns it into useful information. That data provides useful statistics about the library's current "health". The function returns the overall score for the data, and specific statistics of the data. """ regex = regex4dummies() # Getting the semantic patterns within the test strings regex.compare_strings(parser=parser_name, pattern_detection="semantic", text=information[0]) # Gathering and comparing the "meaning" in these sentences test_information = {} sentence_information = regex.get_pattern_information() for sentence in sentence_information: # Getting information from the test sentence test_pattern = sentence.pattern test_subject = sentence.subject test_verb = sentence.verb test_object = sentence.object[0] test_prepositional_phrases = sentence.prepositional_phrases test_reliability_score = sentence.reliability_score test_applicability_score = sentence.applicability_score test_information[test_pattern] = [test_subject, test_verb, test_object, test_prepositional_phrases, test_reliability_score, test_applicability_score] compatibility_score = 0.00 compare_index = 0 for pattern in information[1]: base_pattern = pattern base_subject = information[1][base_pattern][0] base_verb = information[1][base_pattern][1] base_object = information[1][base_pattern][2] base_prepositional_phrases = information[1][base_pattern][3] base_reliability_score = information[1][base_pattern][4] base_applicability_score = information[1][base_pattern][5] try: test_pattern = base_pattern test_subject = test_information[test_pattern][0] test_verb = test_information[test_pattern][1] test_object = test_information[test_pattern][2] test_prepositional_phrases = test_information[test_pattern][3] test_reliability_score = test_information[test_pattern][4] test_applicability_score = test_information[test_pattern][5] if test_subject == base_subject: compatibility_score += 100 / 6 if test_verb == base_verb: compatibility_score += 100 / 6 if test_object == base_object: compatibility_score += 100 / 6 if test_prepositional_phrases == base_prepositional_phrases: compatibility_score += 100 / 6 if test_reliability_score == base_reliability_score: compatibility_score += 100 / 6 if test_applicability_score == base_applicability_score: compatibility_score += 100 / 6 except: compare_index += 1 # The pattern was not in the gathered data. The score will not have anything additional added to it, and the loop will continue continue # Comparing and generating score compare_index += 1 # Generating the semantic score if compare_index != 0: compatibility_score = compatibility_score / compare_index else: compatibility_score = 100 compatibility_score_info = str(test_information) + "\n\n" compatibility_score_info += str(information[1]) # Returning the final score and all other relevant information return compatibility_score, compatibility_score_info
from regex4dummies import regex4dummies from regex4dummies import Toolkit """ Test information: Tests: - nltk parser - literal parser - pattern parser - nlpnet parser Version: 1.4.6 """ # Creating test object regex = regex4dummies() tool_tester = Toolkit() # Testing version variable print(regex.__version__) # Testing the 'nltk' parser print( regex.compare_strings( parser='nltk', pattern_detection="semantic", text=[ "Back at my desk, I poured and killed him a rattlesnake and some more rattlesnake", "the cat and the mouse in the house is sitting, in the house, on the mat", "time is it?", "what time is it here?", "This is the cat's hat" ]))
def generate_parsed_text(self): while True: input_text = self.input_text.get("1.0", tk.END) output_text = self.output_text.get("1.0", tk.END) if str(input_text) != "Enter text to parse\n": input_sentences = TextBlob(input_text).sentences sentences = [] for sentence in input_sentences: sentences.append(str(sentence)) regex = regex4dummies() # Getting tokenized string and dependencies ( if library is up to date ) if regex.__version__ == "1.4.5": final_literal_text = regex.compare_strings( parser='default', pattern_detection="literal", text=sentences) final_literal_information = "" # Getting parsed data to display sentence_information = regex.get_pattern_information() for sentence in sentence_information: final_literal_information = "[ Pattern ] : " + sentence.pattern final_literal_information = "[ Subject ] : " + sentence.subject + "\n" final_literal_information = "[ Verb ] : " + sentence.verb + "\n" final_literal_information = "[ Object ] : " + sentence.object[ 0] + "\n" final_literal_information = "[ Reliability Score ]: " + str( sentence.reliability_score) + "\n" final_semantic_text = regex.compare_strings( parser='', pattern_detection="semantic", text=sentences) final_semantic_information = "" sentence_information = regex.get_pattern_information() for sentence in sentence_information: final_semantic_information = "[ Pattern ] : " + sentence.pattern final_semantic_information = "[ Subject ] : " + sentence.subject + "\n" final_semantic_information = "[ Verb ] : " + sentence.verb + "\n" final_semantic_information = "[ Object ] : " + sentence.object[ 0] + "\n" final_semantic_information = "[ Reliability Score ]: " + str( sentence.reliability_score) + "\n" # Getting possible topics for semantic information topics = regex.get_topics(text=sentences) # Instantiating toolkit object gui_toolkit = Toolkit() # Getting the tokenized input tokenized_input = gui_toolkit.tokenize(text=input_text, parser="pattern") # Getting the dependency input dependency_input = gui_toolkit.find_dependencies( text=input_text, parser="pattern") # Preparing final display data final_text = "Literal Parse:\n" + str( final_literal_text ) + "\n\nInformation:\n" + final_literal_information + "\n---------------------------------\n\n" + "Semantic Parse:\n" + str( final_semantic_text ) + "\n\nInformation:\n" + final_semantic_information + "\n\nTopics:\n" + str( topics) + "\n\nTokenized input:\n" + str( tokenized_input ) + "\n\nDependencies of input:\n" + str( dependency_input) else: final_literal_text = regex.compare_strings( 'default', True, sentences) final_literal_information = "" # Getting parsed data to display sentence_information = regex.get_sentence_information() for sentence in sentence_information: final_literal_information = "[ Pattern ] : " + sentence.pattern final_literal_information = "[ Subject ] : " + sentence.subject + "\n" final_literal_information = "[ Verb ] : " + sentence.verb + "\n" final_literal_information = "[ Object ] : " + sentence.object[ 0] + "\n" final_literal_information = "[ Reliability Score ]: " + str( sentence.reliability_score) + "\n" final_semantic_text = regex.compare_strings( '', False, sentences) final_semantic_information = "" sentence_information = regex.get_sentence_information() for sentence in sentence_information: final_semantic_information = "[ Pattern ] : " + sentence.pattern final_semantic_information = "[ Subject ] : " + sentence.subject + "\n" final_semantic_information = "[ Verb ] : " + sentence.verb + "\n" final_semantic_information = "[ Object ] : " + sentence.object[ 0] + "\n" final_semantic_information = "[ Reliability Score ]: " + str( sentence.reliability_score) + "\n" # Getting possible topics for semantic information topics = regex.get_pattern_topics() # Preparing final display data final_text = "Literal Parse:\n" + str( final_literal_text ) + "\n\nInformation:\n" + final_literal_information + "\n---------------------------------\n\n" + "Semantic Parse:\n" + str( final_semantic_text ) + "\n\nInformation:\n" + final_semantic_information + "\n\nTopics:\n" + str( topics) # Displaying final data self.output_text['state'] = 'normal' self.output_text.delete("1.0", tk.END) self.output_text.insert("1.0", final_text) self.output_text['state'] = 'disabled' elif output_text != "": self.output_text['state'] = 'normal' self.output_text.delete("1.0", tk.END) self.output_text['state'] = 'disabled' time.sleep(1)