def test_paths_in_dir_recursive_ignore_ython(self): ignored_directory_patterns = [r'\A\.$', r'\A\.\.$'] ignored_directory_regex_objects = expression_helper.regex_objects_from_patterns(ignored_directory_patterns) ignored_filename_patterns = [r'\A\.DS_Store$', r'ython'] ignored_file_regex_objects = expression_helper.regex_objects_from_patterns(ignored_filename_patterns) search_dir_full_path = pathlib.Path('.').joinpath('searcher_data', 'search_dir') actual = file_helper.paths_in_dir_recursive(search_dir_full_path, ignored_directory_regex_objects, ignored_file_regex_objects) # Don't care about element order, so compare results using set instead of list expected = { search_dir_full_path.joinpath('httpwww.beepscore.comhubcape'), search_dir_full_path.joinpath('level_1', 'level_2', 'level_3', 'level_4', 'test_result01.txt'), search_dir_full_path.joinpath('level_1', '.git_fake', 'objects_fake', 'object_fake'), search_dir_full_path.joinpath('level_1', 'c.txt alias'), search_dir_full_path.joinpath('level_1', 'something.exe'), search_dir_full_path.joinpath('level_1', 'a.txt'), search_dir_full_path.joinpath('level_1', 'level_2', 'c.txt'), search_dir_full_path.joinpath('level_1', 'level_2', 'b.txt'), search_dir_full_path.joinpath('level_1', 'level_2', 'd.txt'), search_dir_full_path.joinpath('level_1', 'level_2', 'level_3', 'd.txt alias') } self.assertEqual(expected, set(actual))
def test_files_in_dir_recursive(self): ignored_directory_patterns = [r'\A\.$', r'\A\.\.$'] ignored_directory_regex_objects = expression_helper.regex_objects_from_patterns(ignored_directory_patterns) ignored_filename_patterns = [r'\A\.DS_Store$'] ignored_file_regex_objects = expression_helper.regex_objects_from_patterns(ignored_filename_patterns) search_dir_full_path = os.path.join('.', 'searcher_data', 'search_dir') # TODO: fix recursive to work withpathlib.Path # search_dir_full_path = pathlib.Path('.').joinpath('searcher_data', 'search_dir') actual = file_helper.files_in_dir_recursive(search_dir_full_path, ignored_directory_regex_objects, ignored_file_regex_objects) # Don't care about element order, so compare results using set instead of list expected = { os.path.join('.', 'searcher_data', 'search_dir', 'httpwww.beepscore.comhubcape'), os.path.join('.', 'searcher_data', 'search_dir', 'level_1', 'level_2', 'level_3', 'level_4', 'test_result01.txt'), os.path.join('.', 'searcher_data', 'search_dir', 'level_1', '.git_fake', 'objects_fake', 'object_fake'), os.path.join('.', 'searcher_data', 'search_dir', 'level_1', 'c.txt alias'), os.path.join('.', 'searcher_data', 'search_dir', 'httpswww.google.com#q=python'), os.path.join('.', 'searcher_data', 'search_dir', 'level_1', 'something.exe'), os.path.join('.', 'searcher_data', 'search_dir', 'level_1', 'a.txt'), os.path.join('.', 'searcher_data', 'search_dir', 'level_1', 'level_2', 'c.txt'), os.path.join('.', 'searcher_data', 'search_dir', 'httpsen.wikipedia.orgwikiPython_%28programming_language%29'), os.path.join('.', 'searcher_data', 'search_dir', 'httppython.org'), os.path.join('.', 'searcher_data', 'search_dir', 'level_1', 'level_2', 'b.txt'), os.path.join('.', 'searcher_data', 'search_dir', 'level_1', 'level_2', 'd.txt'), os.path.join('.', 'searcher_data', 'search_dir', 'level_1', 'level_2', 'level_3', 'd.txt alias') } self.assertEqual(expected, set(actual))
def test_is_string_matched_in_regular_expression_objects_dot(self): """ match '.' representing current directory \A matches only at start of string $ matches at end of string https://docs.python.org/2/library/re.html """ ignored_filename_patterns = ['\A\.$'] ignored_regex_objects = expression_helper.regex_objects_from_patterns( ignored_filename_patterns) self.assertTrue( expression_helper.is_string_matched_in_regular_expression_objects( '.', ignored_regex_objects)) self.assertFalse( expression_helper.is_string_matched_in_regular_expression_objects( '..', ignored_regex_objects)) self.assertFalse( expression_helper.is_string_matched_in_regular_expression_objects( 'a.', ignored_regex_objects)) self.assertFalse( expression_helper.is_string_matched_in_regular_expression_objects( '.c', ignored_regex_objects)) self.assertFalse( expression_helper.is_string_matched_in_regular_expression_objects( 'a.c', ignored_regex_objects))
def test_is_string_matched_in_regular_expression_objects_dot_git(self): ignored_filename_patterns = ['\.git'] ignored_regex_objects = expression_helper.regex_objects_from_patterns(ignored_filename_patterns) self.assertTrue(expression_helper.is_string_matched_in_regular_expression_objects(".git", ignored_regex_objects)) self.assertTrue(expression_helper.is_string_matched_in_regular_expression_objects("a/.git/objects", ignored_regex_objects)) self.assertFalse(expression_helper.is_string_matched_in_regular_expression_objects("git", ignored_regex_objects)) self.assertFalse(expression_helper.is_string_matched_in_regular_expression_objects("a/git/objects", ignored_regex_objects))
def test_is_string_matched_in_regular_expression_objects_inner(self): """ match 'ython' within string. Case sensitive """ ignored_filename_patterns = ['ython'] ignored_regex_objects = expression_helper.regex_objects_from_patterns(ignored_filename_patterns) self.assertTrue(expression_helper.is_string_matched_in_regular_expression_objects("A big python is here.", ignored_regex_objects)) self.assertFalse(expression_helper.is_string_matched_in_regular_expression_objects("A big pythoxyz", ignored_regex_objects))
def test_files_in_dir_recursive_level_3(self): ignored_directory_patterns = [r'\A\.$', r'\A\.\.$'] ignored_directory_regex_objects = expression_helper.regex_objects_from_patterns(ignored_directory_patterns) ignored_filename_patterns = [r'\A\.DS_Store$'] ignored_file_regex_objects = expression_helper.regex_objects_from_patterns(ignored_filename_patterns) search_dir_path = os.path.join('.', 'searcher_data', 'search_dir', 'level_1', 'level_2', 'level_3') actual = file_helper.files_in_dir_recursive(search_dir_path, ignored_directory_regex_objects, ignored_file_regex_objects) # Don't care about element order, so compare results using set instead of list expected = { os.path.join('.', 'searcher_data', 'search_dir', 'level_1', 'level_2', 'level_3', 'level_4', 'test_result01.txt'), os.path.join('.', 'searcher_data', 'search_dir', 'level_1', 'level_2', 'level_3', 'd.txt alias') } self.assertEqual(expected, set(actual))
def test_directories_in_dir_recursive_ignore1(self): ignored_dirname_patterns = ['level_1'] ignored_regex_objects = expression_helper.regex_objects_from_patterns(ignored_dirname_patterns) search_dir_full_path = os.path.join('.', 'searcher_data', 'search_dir') actual = file_helper.directories_in_dir_recursive(search_dir_full_path, ignored_regex_objects) # Don't care about element order, so compare results using set instead of list expected = {os.path.join('.', 'searcher_data', 'search_dir')} self.assertEqual(expected, set(actual))
def test_files_in_dir_level_1(self): ignored_regex_objects = expression_helper.regex_objects_from_patterns(expression_helper.ignored_filename_patterns) search_dir_full_path = pathlib.Path('.').joinpath('searcher_data', 'search_dir', 'level_1') actual = file_helper.files_in_dir(search_dir_full_path, ignored_regex_objects) # Don't care about element order, so compare results using set instead of list expected = {'a.txt', 'c.txt alias'} self.assertEqual(expected, set(actual))
def test_paths_in_dir_level_3(self): ignored_filename_patterns = [r'\A\.$', r'\A\.\.$', r'\A\.DS_Store$'] ignored_regex_objects = expression_helper.regex_objects_from_patterns(ignored_filename_patterns) search_dir_full_path = pathlib.Path('.').joinpath('searcher_data', 'search_dir', 'level_1', 'level_2', 'level_3') actual = file_helper.paths_in_dir(search_dir_full_path, ignored_regex_objects) # Don't care about element order, so compare results using set instead of list expected = { search_dir_full_path.joinpath('d.txt alias') } self.assertEqual(expected, set(actual))
def test_files_in_dir_ignore_ython(self): ignored_filename_patterns = [r'\A\.$', r'\A\.\.$', r'\A\.DS_Store$', r'ython'] ignored_regex_objects = expression_helper.regex_objects_from_patterns(ignored_filename_patterns) search_dir_full_path = pathlib.Path('.').joinpath('searcher_data', 'search_dir') actual = file_helper.files_in_dir(search_dir_full_path, ignored_regex_objects) # Don't care about element order, so compare results using set instead of list expected = {'httpwww.beepscore.comhubcape'} self.assertEqual(expected, set(actual))
def test_is_string_matched_in_regular_expression_objects_dotDS_Store(self): """ match '.DS_Store' OSX file system file \A matches only at start of string $ matches at end of string https://docs.python.org/2/library/re.html """ ignored_filename_patterns = ['\A\.DS_Store$'] ignored_regex_objects = expression_helper.regex_objects_from_patterns(ignored_filename_patterns) self.assertTrue(expression_helper.is_string_matched_in_regular_expression_objects('.DS_Store', ignored_regex_objects)) self.assertFalse(expression_helper.is_string_matched_in_regular_expression_objects('a.DS_Store', ignored_regex_objects)) self.assertFalse(expression_helper.is_string_matched_in_regular_expression_objects('.DS_Storeb', ignored_regex_objects))
def test_is_string_matched_in_regular_expression_objects_inner(self): """ match 'ython' within string. Case sensitive """ ignored_filename_patterns = ['ython'] ignored_regex_objects = expression_helper.regex_objects_from_patterns( ignored_filename_patterns) self.assertTrue( expression_helper.is_string_matched_in_regular_expression_objects( "A big python is here.", ignored_regex_objects)) self.assertFalse( expression_helper.is_string_matched_in_regular_expression_objects( "A big pythoxyz", ignored_regex_objects))
def test_is_string_matched_in_regular_expression_objects_dotdot(self): """ match '..' representing directory above current directory \A matches only at start of string $ matches at end of string https://docs.python.org/2/library/re.html """ ignored_filename_patterns = ['\A\.\.$'] ignored_regex_objects = expression_helper.regex_objects_from_patterns(ignored_filename_patterns) self.assertTrue(expression_helper.is_string_matched_in_regular_expression_objects('..', ignored_regex_objects)) self.assertFalse(expression_helper.is_string_matched_in_regular_expression_objects('a..', ignored_regex_objects)) self.assertFalse(expression_helper.is_string_matched_in_regular_expression_objects('..c', ignored_regex_objects)) self.assertFalse(expression_helper.is_string_matched_in_regular_expression_objects('a..c', ignored_regex_objects))
def test_directories_number_of_files_containing_expression_test_result(self): root_dir = './searcher_data/search_dir' ignored_regex_objects = expression_helper.regex_objects_from_patterns(expression_helper.ignored_filename_patterns) expression = "^[a-zA-Z]+_TESTResult.*" actual = expression_searcher.directories_number_of_files_containing_expression(root_dir, ignored_regex_objects, expression) expected = {'./searcher_data/search_dir': 0, './searcher_data/search_dir/level_1': 0, './searcher_data/search_dir/level_1/level_2': 0, './searcher_data/search_dir/level_1/level_2/level_3': 0, './searcher_data/search_dir/level_1/level_2/level_3/level_4': 1} self.assertEqual(expected, actual)
def test_directory_paths_in_dir_recursive_ignore2(self): ignored_dirname_patterns = ['level_2'] ignored_regex_objects = expression_helper.regex_objects_from_patterns(ignored_dirname_patterns) search_dir_path = pathlib.Path('.').joinpath('searcher_data', 'search_dir') actual = file_helper.directory_paths_in_dir_recursive(search_dir_path, ignored_regex_objects) # Don't care about element order, so compare results using set instead of list expected = { pathlib.Path('.').joinpath('searcher_data', 'search_dir'), pathlib.Path('.').joinpath('searcher_data', 'search_dir', 'level_1'), pathlib.Path('.').joinpath('searcher_data', 'search_dir', 'level_1', '.git_fake'), pathlib.Path('.').joinpath('searcher_data', 'search_dir', 'level_1', '.git_fake', 'objects_fake'), } self.assertEqual(expected, set(actual))
def test_paths_in_dir(self): ignored_filename_patterns = [r'\A\.$', r'\A\.\.$', r'\A\.DS_Store$'] ignored_regex_objects = expression_helper.regex_objects_from_patterns(ignored_filename_patterns) search_dir_full_path = pathlib.Path('.').joinpath('searcher_data', 'search_dir') actual = file_helper.paths_in_dir(search_dir_full_path, ignored_regex_objects) # Don't care about element order, so compare results using set instead of list expected = { search_dir_full_path.joinpath('httppython.org'), search_dir_full_path.joinpath('httpsen.wikipedia.orgwikiPython_%28programming_language%29'), search_dir_full_path.joinpath('httpswww.google.com#q=python'), search_dir_full_path.joinpath('httpwww.beepscore.comhubcape'), } self.assertEqual(expected, set(actual))
def test_directories_number_of_files_containing_expression_foo(self): root_dir = './searcher_data/search_dir' ignored_regex_objects = expression_helper.regex_objects_from_patterns(expression_helper.ignored_filename_patterns) expression = "foo" actual = expression_searcher.directories_number_of_files_containing_expression(root_dir, ignored_regex_objects, expression) # foo matches 'footer' in several html files expected = {'./searcher_data/search_dir': 4, './searcher_data/search_dir/level_1': 0, './searcher_data/search_dir/level_1/level_2': 0, './searcher_data/search_dir/level_1/level_2/level_3': 0, './searcher_data/search_dir/level_1/level_2/level_3/level_4': 0} self.assertEqual(expected, actual)
def test_directories_number_of_files_containing_expression_this(self): root_dir = './searcher_data/search_dir' ignored_regex_objects = expression_helper.regex_objects_from_patterns(expression_helper.ignored_filename_patterns) # \A == start of a line expression = "\AThis" actual = expression_searcher.directories_number_of_files_containing_expression(root_dir, ignored_regex_objects, expression) # searcher searches the alias text, not the text of the file it links to expected = {'./searcher_data/search_dir': 0, './searcher_data/search_dir/level_1': 1, './searcher_data/search_dir/level_1/level_2': 2, './searcher_data/search_dir/level_1/level_2/level_3': 0, './searcher_data/search_dir/level_1/level_2/level_3/level_4': 0} self.assertEqual(expected, actual)
def test_is_string_matched_in_regular_expression_objects_dot_git(self): ignored_filename_patterns = ['\.git'] ignored_regex_objects = expression_helper.regex_objects_from_patterns( ignored_filename_patterns) self.assertTrue( expression_helper.is_string_matched_in_regular_expression_objects( ".git", ignored_regex_objects)) self.assertTrue( expression_helper.is_string_matched_in_regular_expression_objects( "a/.git/objects", ignored_regex_objects)) self.assertFalse( expression_helper.is_string_matched_in_regular_expression_objects( "git", ignored_regex_objects)) self.assertFalse( expression_helper.is_string_matched_in_regular_expression_objects( "a/git/objects", ignored_regex_objects))
def test_directories_in_dir_recursive_ignore_git(self): # Note: git version control normally ignores its own database .git # So for testing, committed a file search_dir/level_1/.git_fake/objects_fake/object_fake ignored_dirname_patterns = [r'\.git'] ignored_regex_objects = expression_helper.regex_objects_from_patterns(ignored_dirname_patterns) search_dir_full_path = os.path.join('.', 'searcher_data', 'search_dir') actual = file_helper.directories_in_dir_recursive(search_dir_full_path, ignored_regex_objects) # Don't care about element order, so compare results using set instead of list expected = {os.path.join('.', 'searcher_data', 'search_dir'), os.path.join('.', 'searcher_data', 'search_dir', 'level_1'), os.path.join('.', 'searcher_data', 'search_dir', 'level_1', 'level_2'), os.path.join('.', 'searcher_data', 'search_dir', 'level_1', 'level_2', 'level_3'), os.path.join('.', 'searcher_data', 'search_dir', 'level_1', 'level_2', 'level_3', 'level_4') } self.assertEqual(expected, set(actual))
def test_directories_number_of_files_containing_expression_ython(self): root_dir = './searcher_data/search_dir' ignored_regex_objects = expression_helper.regex_objects_from_patterns( expression_helper.ignored_filename_patterns) expression = "ython" actual = expression_searcher.directories_number_of_files_containing_expression( root_dir, ignored_regex_objects, expression) expected = { './searcher_data/search_dir': 2, './searcher_data/search_dir/level_1': 1, './searcher_data/search_dir/level_1/level_2': 2, './searcher_data/search_dir/level_1/level_2/level_3': 1, './searcher_data/search_dir/level_1/level_2/level_3/level_4': 0 } self.assertEqual(expected, actual)
def test_is_string_matched_in_regular_expression_objects_dotDS_Store(self): """ match '.DS_Store' OSX file system file \A matches only at start of string $ matches at end of string https://docs.python.org/2/library/re.html """ ignored_filename_patterns = ['\A\.DS_Store$'] ignored_regex_objects = expression_helper.regex_objects_from_patterns( ignored_filename_patterns) self.assertTrue( expression_helper.is_string_matched_in_regular_expression_objects( '.DS_Store', ignored_regex_objects)) self.assertFalse( expression_helper.is_string_matched_in_regular_expression_objects( 'a.DS_Store', ignored_regex_objects)) self.assertFalse( expression_helper.is_string_matched_in_regular_expression_objects( '.DS_Storeb', ignored_regex_objects))
def test_directories_number_of_files_containing_expression_foo(self): root_dir = './searcher_data/search_dir' ignored_regex_objects = expression_helper.regex_objects_from_patterns( expression_helper.ignored_filename_patterns) expression = "foo" actual = expression_searcher.directories_number_of_files_containing_expression( root_dir, ignored_regex_objects, expression) # foo matches 'footer' in several html files expected = { './searcher_data/search_dir': 4, './searcher_data/search_dir/level_1': 0, './searcher_data/search_dir/level_1/level_2': 0, './searcher_data/search_dir/level_1/level_2/level_3': 0, './searcher_data/search_dir/level_1/level_2/level_3/level_4': 0 } self.assertEqual(expected, actual)
def test_directories_in_dir_recursive_dont_ignore(self): ignored_dirname_patterns = [] ignored_regex_objects = expression_helper.regex_objects_from_patterns(ignored_dirname_patterns) # use os.path.join so macos and linux will use separator '/' and Windows will use separator '\' search_dir_full_path = os.path.join('.', 'searcher_data', 'search_dir') # search_dir_full_path = pathlib.Path('.').joinpath('searcher_data', 'search_dir') actual = file_helper.directories_in_dir_recursive(search_dir_full_path, ignored_regex_objects) # Don't care about element order, so compare results using set instead of list expected = {os.path.join('.', 'searcher_data', 'search_dir'), os.path.join('.', 'searcher_data', 'search_dir', 'level_1'), os.path.join('.', 'searcher_data', 'search_dir', 'level_1', '.git_fake'), os.path.join('.', 'searcher_data', 'search_dir', 'level_1', '.git_fake', 'objects_fake'), os.path.join('.', 'searcher_data', 'search_dir', 'level_1', 'level_2'), os.path.join('.', 'searcher_data', 'search_dir', 'level_1', 'level_2', 'level_3'), os.path.join('.', 'searcher_data', 'search_dir', 'level_1', 'level_2', 'level_3', 'level_4') } self.assertEqual(expected, set(actual))
def test_lines_in_files_containing_expression_test_result(self): root_dir = './searcher_data/search_dir' ignored_regex_objects = expression_helper.regex_objects_from_patterns( expression_helper.ignored_filename_patterns) expression = "^[a-zA-Z]+_TESTResult.*" actual = expression_searcher.lines_in_files_containing_expression( expression, root_dir, ignored_regex_objects) expected = [ ('httpwww.beepscore.comhubcape', []), ('httpsen.wikipedia.orgwikiPython_%28programming_language%29', []), ('httppython.org', []), ('httpswww.google.com#q=python', []), ('a.txt', []), ('c.txt alias', []), ('c.txt', []), ('b.txt', []), ('d.txt', []), ('d.txt alias', []), ('test_result01.txt', ['line 1 a_TESTResult.txt']) ] self.assertEqual(expected, actual)
def test_lines_in_files_containing_expression_test_result(self): root_dir = './searcher_data/search_dir' ignored_regex_objects = expression_helper.regex_objects_from_patterns(expression_helper.ignored_filename_patterns) expression = "^[a-zA-Z]+_TESTResult.*" actual = expression_searcher.lines_in_files_containing_expression(expression, root_dir, ignored_regex_objects) expected = [('httpwww.beepscore.comhubcape', []), ('httpsen.wikipedia.orgwikiPython_%28programming_language%29', []), ('httppython.org', []), ('httpswww.google.com#q=python', []), ('a.txt', []), ('c.txt alias', []), ('c.txt', []), ('b.txt', []), ('d.txt', []), ('d.txt alias', []), ('test_result01.txt', ['line 1 a_TESTResult.txt']) ] self.assertEqual(expected, actual)
def test_directories_number_of_files_containing_expression_this(self): root_dir = './searcher_data/search_dir' ignored_regex_objects = expression_helper.regex_objects_from_patterns( expression_helper.ignored_filename_patterns) # \A == start of a line expression = "\AThis" actual = expression_searcher.directories_number_of_files_containing_expression( root_dir, ignored_regex_objects, expression) # searcher searches the alias text, not the text of the file it links to expected = { './searcher_data/search_dir': 0, './searcher_data/search_dir/level_1': 1, './searcher_data/search_dir/level_1/level_2': 2, './searcher_data/search_dir/level_1/level_2/level_3': 0, './searcher_data/search_dir/level_1/level_2/level_3/level_4': 0 } self.assertEqual(expected, actual)
def test_lines_in_files_containing_expression_this(self): root_dir = './searcher_data/search_dir' ignored_regex_objects = expression_helper.regex_objects_from_patterns(expression_helper.ignored_filename_patterns) # \A == start of a line expression = "\AThis" actual = expression_searcher.lines_in_files_containing_expression(expression, root_dir, ignored_regex_objects) expected = [('httpwww.beepscore.comhubcape', []), ('httpsen.wikipedia.orgwikiPython_%28programming_language%29', []), ('httppython.org', []), ('httpswww.google.com#q=python', []), ('a.txt', ['line 1 This file has at least one "a".']), ('c.txt alias', []), ('c.txt', ['line 1 This file has Python Jython pythonic.']), ('b.txt', ['line 1 This file has at least one big "b".']), ('d.txt', []), ('d.txt alias', []), ('test_result01.txt', []) ] self.assertEqual(expected, actual)
def test_lines_in_files_containing_expression_this(self): root_dir = './searcher_data/search_dir' ignored_regex_objects = expression_helper.regex_objects_from_patterns( expression_helper.ignored_filename_patterns) # \A == start of a line expression = "\AThis" actual = expression_searcher.lines_in_files_containing_expression( expression, root_dir, ignored_regex_objects) expected = [ ('httpwww.beepscore.comhubcape', []), ('httpsen.wikipedia.orgwikiPython_%28programming_language%29', []), ('httppython.org', []), ('httpswww.google.com#q=python', []), ('a.txt', ['line 1 This file has at least one "a".']), ('c.txt alias', []), ('c.txt', ['line 1 This file has Python Jython pythonic.']), ('b.txt', ['line 1 This file has at least one big "b".']), ('d.txt', []), ('d.txt alias', []), ('test_result01.txt', []) ] self.assertEqual(expected, actual)
import pprint from searcher import expression_helper from searcher import expression_searcher from searcher import searcher_arg_reader if __name__ == '__main__': """ Search for expression without instantiating an instance. Use command line arguments. """ # instantiate arg_reader arg_reader = searcher_arg_reader.SearcherArgReader() # Call arg_reader.args() without an argument list so it reads from command line. args = arg_reader.args() expression = args.expression search_dir = args.root_dir print("Searching root_dir " + search_dir + " for expression " + expression) ignored_regex_objects = expression_helper.regex_objects_from_patterns(expression_helper.ignored_filename_patterns) results = expression_searcher.directories_number_of_files_containing_expression(search_dir, ignored_regex_objects, expression) print("Results") prettyprinter = pprint.PrettyPrinter(indent=4) prettyprinter.pprint(results)