def test_lowercase(self): a = "This Is A Text" b = "this is a text" x = NormalizeText(a) test1 = x.returnfinalstring() y = NormalizeText(b) test2 = y.returnfinalstring() self.assertEqual(test1, test2)
def test_punctuation(self): a = "a-b{} (ON)" b = "a*b** *ON*" x = NormalizeText(a) test1 = x.returnfinalstring() y = NormalizeText(b) test2 = y.returnfinalstring() self.assertTrue(test1, test2)
def test_whitespace(self): a = "Hello This is SPDX." b = "Hello`This`is`SPDX." x = NormalizeText(a) test1 = x.returnfinalstring() y = NormalizeText(b) test2 = y.returnfinalstring() self.assertTrue(test1, test2)
def test_bullets(self): a = "A. Hello. version 2.3" b = "B. Hello. version 2.4" x = NormalizeText(a) test1 = x.returnfinalstring() y = NormalizeText(b) test2 = y.returnfinalstring() self.assertEqual(test1 == test2, False)
def test_copyrightsymbol(self): c = "copyright" d = "(c)" x = NormalizeText(c) test1 = x.returnfinalstring() y = NormalizeText(d) test2 = y.returnfinalstring() self.assertEqual(test1, test2)
def test_equivalent(self): a = "I study analogue while " b = "I study analog whilst " x = NormalizeText(a) test1 = x.returnfinalstring() y = NormalizeText(b) test2 = y.returnfinalstring() self.assertEqual(test1, test2)
def test_omitable_text(self): a = "<<beginOptional>>Hello I am a Test.<<endOptional>>" b = "Hello I am a Test." x = NormalizeText(a) stringtemplate = x.returnfinalstring_for_template() y = NormalizeText(b) stringtext = y.returnfinalstring_for_template() test = NormalizeTemplate(stringtext, stringtemplate) test.remove_repeating_chars() test.remove_omitable_text() test.remove_repeating_chars() normalized_text = test.return_normalized_text() normalized_template = test.return_normalized_template() self.assertTrue( CompareNormalizedFiles(normalized_template, normalized_text))
def test_replaceable_text(self): a = " <<var;name=\"copyright\";original=\"Hello This is a test.\";match=\".{0,20}\">> Test" b = " Hello This Test" x = NormalizeText(a) stringtemplate = x.returnfinalstring_for_template() y = NormalizeText(b) stringtext = y.returnfinalstring_for_template() test = NormalizeTemplate(stringtext, stringtemplate) test.remove_repeating_chars() test.remove_replaceable_text() test.remove_repeating_chars() normalized_text = test.return_normalized_text() normalized_template = test.return_normalized_template() self.assertTrue( CompareNormalizedFiles(normalized_template, normalized_text))
def test_normalize_unmatch(self): with open(input_text, 'r') as inputfile: input_text_string = inputfile.read() inputfile.close() x = NormalizeText(input_text_string) normalized_text_string = x.returnfinalstring() with open(input_text_unmatch, 'r') as input_file: input_text_string2 = input_file.read() input_file.close() y = NormalizeText(input_text_string2) normalized_text_string2 = y.returnfinalstring() self.assertEqual(True, normalized_text_string2 == normalized_text_string)
def test_template_mismatch(self): with open(input_text_mismatch, 'r') as inputfile: input_text_string = inputfile.read() inputfile.close() x = NormalizeText(input_text_string) normalized_text_string = x.returnfinalstring_for_template() with open(input_template, 'r') as input_file: input_template_file = input_file.read() input_file.close() object_normalization = NormalizeText(input_template_file) input_template_file = object_normalization.returnfinalstring_for_template() y = NormalizeTemplate( normalized_text_string, input_template_file ) y.normalize_template() normalized_template_string = y.return_normalized_template() normalized_text_string = y.return_normalized_text() self.assertEqual(False,CompareNormalizedFiles(normalized_template_string, normalized_text_string))
def test_main_script(self): list_of_matches = [] matches_list = ["AAL.template.txt"] with open(input_text, 'r') as inputfile: input_text_string = inputfile.read() inputfile.close() x = NormalizeText(input_text_string) normalized_text_string = x.returnfinalstring_for_template() for filename in os.scandir(directory): file_name = str(filename.path) file_name = file_name.replace(str(directory), '') file_name = file_name.replace('\\', '') file_name = file_name.replace('/', '') print(file_name) try: with open(filename.path, 'r') as input_file: input_template_file = input_file.read() input_file.close() object_normalization = NormalizeText(input_template_file) input_template_file = object_normalization.returnfinalstring_for_template( ) y = NormalizeTemplate(normalized_text_string, input_template_file) y.normalize_template() normalized_template_string = y.return_normalized_template() normalized_text_string = y.return_normalized_text() except BaseException: continue if (CompareNormalizedFiles(normalized_template_string, normalized_text_string)): list_of_matches.append(file_name) print("The Text matches with the Template- " + file_name) self.assertCountEqual(list_of_matches, matches_list)
def main(): """ This file is the main execution file for comparing License Texts and Templates. It takes into the files as arguments and passes them along the Normalizing Text class before passing into Normalizing Template class. """ with open(input_license_text, 'r') as inputfile: input_text_string = inputfile.read() inputfile.close() x = NormalizeText(input_text_string) normalized_text_string = x.returnfinalstring_for_template() with open(input_license_template, 'r') as inputfile: input_template_string = inputfile.read() inputfile.close() y = NormalizeText(input_template_string) normalized_template_string = y.returnfinalstring_for_template() a = NormalizeTemplate(normalized_text_string, normalized_template_string) a.normalize_template() normalized_text = a.return_normalized_text() normalized_template = a.return_normalized_template() if(CompareNormalizedFiles(normalized_template, normalized_text)): print("The Text and the Template Match.") else: nl = "\n" print(f"The Text and the Template do not Match.{nl}" f"Following text produces a mismatch{nl}" ) compare_object = Generate_Differences( normalized_template, normalized_text) differences = compare_object.pretty_print_differences() pprint(differences)
def main(): """ This function on executing runs through all the License Templates and returns the matched IDs. """ with open(input_license_text, 'r') as inputfile: input_text_string = inputfile.read() inputfile.close() x = NormalizeText(input_text_string) normalized_text_string = x.returnfinalstring_for_template() for filename in os.scandir(directory): file_name = str(filename.path) file_name = file_name.replace(str(directory), '') try: with open(filename.path, 'r') as input_file: input_template_file = input_file.read() input_file.close() object_normalization = NormalizeText(input_template_file) input_template_file = object_normalization.returnfinalstring_for_template( ) y = NormalizeTemplate(normalized_text_string, input_template_file) y.normalize_template() normalized_template_string = y.return_normalized_template() normalized_text_string = y.return_normalized_text() except BaseException: continue if (CompareNormalizedFiles(normalized_template_string, normalized_text_string)): print("The Text matches with the Template- " + file_name)
from normalize_license_text.normalize_class import NormalizeText from compare_template_text.normalize_template_text import NormalizeTemplate from compare_template_text.compare_normalized_files import compare_normalized_files Text_Directory = str( Path(PACKAGE_PATH + "\\match_against_all_templates\\input_text_files\\")) Text_Directory = Text_Directory.replace('\\', os.sep) directory = str(Path(PACKAGE_PATH + '\\data\\templates\\')) directory = directory.replace('\\', os.sep) if __name__ == '__main__': a = "Any Sample Text passed into Module" object_a = NormalizeText(a) normalized_text_string = object_a.get_final_string_for_template() for filename in os.scandir(directory): file_name = str(filename.path) file_name = file_name.replace(str(directory), '') print(file_name) try: with open(filename.path, 'r') as input_file: input_template_file = input_file.read() input_file.close() object_normalization = NormalizeText(input_template_file) input_template_file = object_normalization.get_final_string_for_template( )
# Parses Two License Text Strings passed into them if __name__ == '__main__': from normalize_license_text.normalize_class import NormalizeText from compare_template_text.compare_normalized_files import compare_normalized_files from compare_template_text.normalize_template_text import NormalizeTemplate from generate_differences.differences import DifferenceGenerator a = "Any Sample Text passed into Module" b = "Any Sample Text passed into Module" object_a = NormalizeText(a) stringtemplate = object_a.get_final_string() object_b = NormalizeText(b) stringtext = object_b.get_final_string() if (stringtemplate == stringtext): print("The License Text \" " + b + "\" matches with the Text \"" + a + " \" ") else: nl = "\n" print(f"The Texts do not Match.{nl}") compare_object = DifferenceGenerator(stringtemplate, stringtext) differences = compare_object.pretty_print_differences() print(differences)
from normalize_license_text.normalize_class import NormalizeText from compare_template_text.normalize_template_text import NormalizeTemplate from compare_template_text.compare_normalized_files import CompareNormalizedFiles Text_Directory = str( Path(PACKAGE_PATH + "\\match_against_all_templates\\input_text_files\\")) Text_Directory = Text_Directory.replace('\\', os.sep) directory = str(Path(PACKAGE_PATH + '\\data\\templates\\')) directory = directory.replace('\\', os.sep) if __name__ == '__main__': a = "Any Sample Text passed into Module" object_a = NormalizeText(a) normalized_text_string = object_a.returnfinalstring_for_template() for filename in os.scandir(directory): file_name = str(filename.path) file_name = file_name.replace(str(directory), '') print(file_name) try: with open(filename.path, 'r') as input_file: input_template_file = input_file.read() input_file.close() object_normalization = NormalizeText(input_template_file) input_template_file = object_normalization.returnfinalstring_for_template( )
# Parses Two License Text Strings passed into them if __name__ == '__main__': from normalize_license_text.normalize_class import NormalizeText from compare_template_text.compare_normalized_files import CompareNormalizedFiles from compare_template_text.normalize_template_text import NormalizeTemplate from generate_differences.differences import Generate_Differences a = "Any Sample Text passed into Module" b = "Any Sample Text passed into Module" object_a = NormalizeText(a) stringtemplate = object_a.returnfinalstring() object_b = NormalizeText(b) stringtext = object_b.returnfinalstring() if (stringtemplate == stringtext): print("The License Text \" " + b + "\" matches with the Text \"" + a + " \" ") else: nl = "\n" print(f"The Texts do not Match.{nl}") compare_object = Generate_Differences(stringtemplate, stringtext) differences = compare_object.pretty_print_differences() print(differences)