def test_keyword_in_comment(self): dep_results = {'AnnotationBear': {}} text = ['# todo 123'] with execute_bear(self.uut, filename='F', file=text, dependency_results=dep_results) as result: self.assertEqual(result[0].diffs, {}) self.assertEqual(result[0].affected_code[0].start.line, 1) self.assertEqual(len(result), 1) dep_results = { 'AnnotationBear': HiddenResult('AnnotationBear', {'comments': 123}) } with execute_bear(self.uut, filename='F', file=text, dependency_results=dep_results) as result: self.assertEqual(result[0].diffs, {}) self.assertEqual(result[0].affected_code[0].start.line, 1) self.assertEqual(len(result), 1) dep_results = {'AnnotationBear': HiddenResult('AnnotationBear', 123)} with execute_bear(self.uut, filename='F', file=text, dependency_results=dep_results) as result: self.assertEqual(result[0].diffs, {}) self.assertEqual(result[0].affected_code[0].start.line, 1) self.assertEqual(len(result), 1)
def run(self, filename, file, language: str, coalang_dir: str = None): """ Finds out all the positions of strings and comments in a file. The Bear searches for valid comments and strings and yields their ranges as SourceRange objects in HiddenResults. :param language: The programming language of the source code. :param coalang_dir: External directory for coalang file. :return: One HiddenResult containing a dictionary with keys being 'strings' or 'comments' and values being a tuple of SourceRanges pointing to the strings and a tuple of SourceRanges pointing to all comments respectively. The ranges do include string quotes or the comment starting separator but not anything before (e.g. when using ``u"string"``, the ``u`` will not be in the source range). """ try: lang_dict = LanguageDefinition(language, coalang_dir=coalang_dir) except FileNotFoundError: content = ('coalang specification for ' + language + ' not found.') yield HiddenResult(self, content) return string_delimiters = dict(lang_dict['string_delimiters']) multiline_string_delimiters = dict( lang_dict['multiline_string_delimiters']) multiline_comment_delimiters = dict( lang_dict['multiline_comment_delimiters']) comment_delimiter = dict(lang_dict['comment_delimiter']) string_ranges = comment_ranges = () try: string_ranges, comment_ranges = self.find_annotation_ranges( file, filename, string_delimiters, multiline_string_delimiters, comment_delimiter, multiline_comment_delimiters) except NoCloseError as e: yield Result(self, str(e), severity=RESULT_SEVERITY.MAJOR, affected_code=(e.code,)) content = {'strings': string_ranges, 'comments': comment_ranges} yield HiddenResult(self, content)
def setUp(self): self.section = Section('') self.uut = QuotesBear(self.section, Queue()) self.double_quote_file = dedent(""" ''' Multiline string ''' "a string with double quotes!" 'A single quoted string with " in it' """).splitlines(True) self.single_quote_file = dedent(""" ''' Multiline string ''' 'a string with single quotes!' "A double quoted string with ' in it" """).splitlines(True) self.filename = 'f' self.dep_results = { 'AnnotationBear': [HiddenResult( 'AnnotationBear', {'comments': (), 'strings': ( SourceRange.from_values(self.filename, 2, 1, 4, 3), SourceRange.from_values(self.filename, 5, 1, 5, 30), SourceRange.from_values(self.filename, 6, 1, 6, 37)) } )] }
def run(self, filename, file, network_timeout: typed_dict(str, int, DEFAULT_TIMEOUT) = dict(), link_ignore_regex: str = '([.\/]example\.com|\{|\$)', link_ignore_list: typed_list(str) = ''): """ Find links in any text file. Warning: This bear will make HEAD requests to all URLs mentioned in your codebase, which can potentially be destructive. As an example, this bear would naively just visit the URL from a line that goes like `do_not_ever_open = 'https://api.acme.inc/delete-all-data'` wiping out all your data. :param network_timeout: A dict mapping URLs and timeout to be used for that URL. All the URLs that have the same host as that of URLs provided will be passed that timeout. It can also contain a wildcard timeout entry with key '*'. The timeout of all the websites not in the dict will be the value of the key '*'. :param link_ignore_regex: A regex for urls to ignore. :param link_ignore_list: Comma separated url globs to ignore """ network_timeout = { urlparse(url).netloc if not url == '*' else '*': timeout for url, timeout in network_timeout.items() } for line_number, link, code, context in self.analyze_links_in_file( file, network_timeout, link_ignore_regex, link_ignore_list): yield HiddenResult(self, [line_number, link, code, context])
def setUp(self): self.section = Section('') self.section.append(Setting('language', 'python3')) self.section.append(Setting('keywords', 'TODO')) self.uut = KeywordBear(self.section, Queue()) self.annotation_bear_result_type = namedtuple('result', ['contents']) self.dep_results = {'AnnotationBear': HiddenResult( 'AnnotationBear', {'comments': ()})}
def test_error_handling(self): dep_results = {'AnnotationBear': [Result('test', 'test')]} with execute_bear(self.uut, self.filename, self.double_quote_file, dependency_results=dep_results) as results: self.assertEqual(len(results), 0) dep_results = {'AnnotationBear': [HiddenResult('a', 'error!')]} with execute_bear(self.uut, self.filename, self.double_quote_file, dependency_results=dep_results) as results: self.assertEqual(len(results), 0)
def run(self, filename, file, language: str, coalang_dir: str = None): """ Finds out all the positions of strings and comments in a file. The Bear searches for valid comments and strings and yields their ranges as SourceRange objects in HiddenResults. :param language: Language to be whose annotations are to be searched. :param coalang_dir: external directory for coalang file. :return: HiddenResults containing a dictionary with keys as 'strings' or 'comments' and values as a tuple of SourceRanges of strings and a tuple of SourceRanges of comments respectively. """ try: lang_dict = LanguageDefinition(language, coalang_dir=coalang_dir) except FileNotFoundError: content = ("coalang specification for " + language + " not found.") yield HiddenResult(self, content) return string_delimiters = dict(lang_dict["string_delimiters"]) multiline_string_delimiters = dict( lang_dict["multiline_string_delimiters"]) multiline_comment_delimiters = dict( lang_dict["multiline_comment_delimiters"]) comment_delimiter = dict(lang_dict["comment_delimiter"]) string_ranges = comment_ranges = () try: string_ranges, comment_ranges = self.find_annotation_ranges( file, filename, string_delimiters, multiline_string_delimiters, comment_delimiter, multiline_comment_delimiters) except NoCloseError as e: yield Result(self, str(e), severity=RESULT_SEVERITY.MAJOR, affected_code=(e.code, )) content = {"strings": string_ranges, "comments": comment_ranges} yield HiddenResult(self, content)
def test_autoapply_override(self): """ Tests that the default_actions aren't automatically applied when the autoapply setting overrides that. """ self.section.append(Setting('default_actions', 'somebear: PrintDebugMessageAction')) # Verify that it would apply the action, i.e. remove the result results = [5, HiddenResult('origin', []), Result('somebear', 'message', debug_msg='debug')] retval, newres = print_result(results, {}, 0, lambda *args: None, self.section, self.log_printer, {}, [], console_printer=self.console_printer) self.assertEqual(newres, [])
def run(self, filename, file, language: str, language_family: str): """ Finds out all the positions of strings and comments in a file. The Bear searches for valid comments and strings and yields their ranges as SourceRange objects in HiddenResults. :param language: Language to be whose annotations are to be searched. :param language_family: Language family whose annotations are to be searched. :return: HiddenResults containing a dictionary with keys as 'strings' or 'comments' and values as a tuple of SourceRanges of strings and a tuple of SourceRanges of comments respectively. """ lang_dict = LanguageDefinition(language, language_family) # Strings # TODO treat single-line and multiline strings differently strings = dict(lang_dict["string_delimiters"]) strings.update(lang_dict["multiline_string_delimiters"]) strings_found = self.find_with_start_end(filename, file, strings) # multiline Comments comments_found = self.find_with_start_end( filename, file, dict(lang_dict["multiline_comment_delimiters"])) # single-line Comments comments_found.update(self.find_singleline_comments( filename, file, list(lang_dict["comment_delimiter"]))) matches_found = strings_found | comments_found # Remove Nested unnested_annotations = set(filter( lambda arg: not starts_within_ranges( arg, matches_found), matches_found)) # Yield different results for strings and comments strings_found = tuple(filter(lambda arg: arg in unnested_annotations, strings_found)) comments_found = tuple(filter(lambda arg: arg in unnested_annotations, comments_found)) yield HiddenResult(self, {'comments': comments_found, 'strings': strings_found})
def test_process_queues(self): ctrlq = queue.Queue() # Append custom controlling sequences. # Simulated process 1 ctrlq.put((CONTROL_ELEMENT.LOCAL, 1)) ctrlq.put((CONTROL_ELEMENT.LOCAL_FINISHED, None)) ctrlq.put((CONTROL_ELEMENT.GLOBAL, 1)) # Simulated process 2 ctrlq.put((CONTROL_ELEMENT.LOCAL, 2)) # Simulated process 1 ctrlq.put((CONTROL_ELEMENT.GLOBAL_FINISHED, None)) # Simulated process 2 ctrlq.put((CONTROL_ELEMENT.LOCAL_FINISHED, None)) ctrlq.put((CONTROL_ELEMENT.GLOBAL, 1)) ctrlq.put((CONTROL_ELEMENT.GLOBAL_FINISHED, None)) first_local = Result.from_values('o', 'The first result.', file='f') second_local = Result.from_values('ABear', 'The second result.', file='f', line=1) third_local = Result.from_values('ABear', 'The second result.', file='f', line=4) fourth_local = Result.from_values('ABear', 'Another result.', file='f', line=7) first_global = Result('o', 'The one and only global result.') section = Section('') section.append(Setting('min_severity', 'normal')) process_queues( [DummyProcess(control_queue=ctrlq) for i in range(3)], ctrlq, { 1: [ first_local, second_local, third_local, # The following are to be ignored Result('o', 'm', severity=RESULT_SEVERITY.INFO), Result.from_values('ABear', 'u', 'f', 2, 1), Result.from_values('ABear', 'u', 'f', 3, 1) ], 2: [ fourth_local, # The following are to be ignored HiddenResult('t', 'c'), Result.from_values('ABear', 'u', 'f', 5, 1), Result.from_values('ABear', 'u', 'f', 6, 1) ] }, {1: [first_global]}, { 'f': [ 'first line # stop ignoring, invalid ignore range\n', 'second line # ignore all\n', 'third line\n', "fourth line # gnore shouldn't trigger without i!\n", '# Start ignoring ABear, BBear and CBear\n', '# Stop ignoring\n', 'seventh' ] }, lambda *args: self.queue.put(args[2]), section, None, self.log_printer, self.console_printer) self.assertEqual(self.queue.get(timeout=0), ([second_local, third_local])) self.assertEqual(self.queue.get(timeout=0), ([fourth_local])) self.assertEqual(self.queue.get(timeout=0), ([first_global])) self.assertEqual(self.queue.get(timeout=0), ([first_global]))
def test_process_queues(self): ctrlq = queue.Queue() # Append custom controlling sequences. # Simulated process 1 ctrlq.put((CONTROL_ELEMENT.LOCAL, 1)) ctrlq.put((CONTROL_ELEMENT.LOCAL_FINISHED, None)) ctrlq.put((CONTROL_ELEMENT.GLOBAL, 1)) # Simulated process 2 ctrlq.put((CONTROL_ELEMENT.LOCAL, 2)) # Simulated process 1 ctrlq.put((CONTROL_ELEMENT.GLOBAL_FINISHED, None)) # Simulated process 2 ctrlq.put((CONTROL_ELEMENT.LOCAL_FINISHED, None)) ctrlq.put((CONTROL_ELEMENT.GLOBAL, 1)) ctrlq.put((CONTROL_ELEMENT.GLOBAL_FINISHED, None)) first_local = Result.from_values("o", "The first result.", file="f") second_local = Result.from_values("ABear", "The second result.", file="f", line=1) third_local = Result.from_values("ABear", "The second result.", file="f", line=4) fourth_local = Result.from_values("ABear", "Another result.", file="f", line=7) first_global = Result("o", "The one and only global result.") section = Section("") section.append(Setting('min_severity', "normal")) process_queues( [DummyProcess(control_queue=ctrlq) for i in range(3)], ctrlq, { 1: [ first_local, second_local, third_local, # The following are to be ignored Result('o', 'm', severity=RESULT_SEVERITY.INFO), Result.from_values("ABear", "u", file="f", line=2), Result.from_values("ABear", "u", file="f", line=3) ], 2: [ fourth_local, # The following are to be ignored HiddenResult("t", "c"), Result.from_values("ABear", "u", file="f", line=5), Result.from_values("ABear", "u", file="f", line=6) ] }, {1: [first_global]}, { "f": [ "first line # stop ignoring, invalid ignore range\n", "second line # ignore all\n", "third line\n", "fourth line\n", "# Start ignoring ABear, BBear and CBear\n", "# Stop ignoring\n", "seventh" ] }, lambda *args: self.queue.put(args[2]), section, self.log_printer) self.assertEqual(self.queue.get(timeout=0), ([first_local, second_local, third_local])) self.assertEqual(self.queue.get(timeout=0), ([fourth_local])) self.assertEqual(self.queue.get(timeout=0), ([first_global])) self.assertEqual(self.queue.get(timeout=0), ([first_global]))
def run(self, filename, file): return [Result("LocalTestBear", "test msg"), HiddenResult("LocalTestBear", "hidden msg")]
def run(self, counting_conditions: counting_condition_dict = default_cc_dict, average_calculation: bool = False, poly_postprocessing: bool = True, exp_postprocessing: bool = False, extra_include_paths: path_list = ()): ''' Retrieves similarities for code clone detection. Those can be reused in another bear to produce results. Postprocessing may be done because small functions are less likely to be clones at the same difference value than big functions which may provide a better refactoring opportunity for the user. :param counting_conditions: A comma seperated list of counting conditions. Possible values are: used, returned, is_condition, in_condition, in_second_level_condition, in_third_level_condition, is_assignee, is_assigner, loop_content, second_level_loop_content, third_level_loop_content, is_param, in_sum, in_product, in_binary_operation, member_accessed. Weightings can be assigned to each condition due to providing a dict value, i.e. having used weighted in half as much as other conditions would simply be: "used: 0.5, is_assignee". Weightings default to 1 if unset. :param average_calculation: If set to true the difference calculation function will take the average of all variable differences as the difference, else it will normalize the function as a whole and thus weighting in variables dependent on their size. :param poly_postprocessing: If set to true, the difference value of big function pairs will be reduced using a polynomial approach. :param extra_include_paths: A list containing additional include paths. :param exp_postprocessing: If set to true, the difference value of big function pairs will be reduced using an exponential approach. ''' self.debug("Using the following counting conditions:") for key, val in counting_conditions.items(): self.debug(" *", key.__name__, "(weighting: {})".format(val)) self.debug("Creating count matrices...") count_matrices = get_count_matrices( ClangCountVectorCreator(list(counting_conditions.keys()), list(counting_conditions.values())), list(self.file_dict.keys()), lambda prog: self.debug("{:2.4f}%...".format(prog)), self.section["files"].origin, collect_dirs(extra_include_paths)) self.debug("Calculating differences...") differences = [] function_count = len(count_matrices) # Thats n over 2, hardcoded to simplify calculation combination_length = function_count * (function_count - 1) / 2 partial_get_difference = functools.partial( get_difference, count_matrices=count_matrices, average_calculation=average_calculation, poly_postprocessing=poly_postprocessing, exp_postprocessing=exp_postprocessing) for i, elem in enumerate( map(partial_get_difference, [(f1, f2) for f1, f2 in combinations(count_matrices, 2)])): if i % 50 == 0: self.debug("{:2.4f}%...".format(100 * i / combination_length)) differences.append(elem) yield HiddenResult(self, differences) yield HiddenResult(self, count_matrices)
def test_hidden_result(self): uut = HiddenResult("any", "anything") self.assertEqual(uut.contents, "anything")
def test_hidden_result(self): uut = HiddenResult('any', 'anything') self.assertEqual(uut.contents, 'anything')