def start(self, basedir): LOG.info("Starting CDSW runner...") repo_type_env = OsUtils.get_env_value( BranchComparatorEnvVar.REPO_TYPE.value, RepoType.DOWNSTREAM.value) repo_type: RepoType = RepoType[repo_type_env.upper()] if repo_type == RepoType.DOWNSTREAM: self.run_clone_downstream_repos_script(basedir) elif repo_type == RepoType.UPSTREAM: # If we are in upstream mode, make sure downstream dir exist # Currently, yarndevtools requires both repos to be present when initializing. # BranchComparator is happy with one single repository, upstream or downstream, exclusively. # Git init the other repository so everything will be alright FileUtils.create_new_dir(HADOOP_CLOUDERA_BASEDIR) FileUtils.change_cwd(HADOOP_CLOUDERA_BASEDIR) os.system("git init") self.run_clone_upstream_repos_script(basedir) # TODO investigate why legacy script fails! self.run_comparator_and_send_mail(repo_type, algorithm="simple", run_legacy_script=False) self.run_comparator_and_send_mail(repo_type, algorithm="grouped", run_legacy_script=False)
def find_upstream_commits_and_save_to_file(self): # It's quite complex to grep for multiple jira IDs with gitpython, so let's rather call an external command git_log_result = self.upstream_repo.log(ORIGIN_TRUNK, oneline_with_date=True) cmd, output = CommandRunner.egrep_with_cli( git_log_result, self.intermediate_results_file, self.data.piped_jira_ids) normal_commit_lines = output.split("\n") modified_log_lines = self._find_missing_upstream_commits_by_message( git_log_result, normal_commit_lines) self.data.matched_upstream_commit_list = normal_commit_lines + modified_log_lines if not self.data.matched_upstream_commit_list: raise ValueError( f"Cannot find any commits for jira: {self.config.jira_id}") LOG.info("Number of matched commits: %s", self.data.no_of_matched_commits) LOG.debug( "Matched commits: \n%s", StringUtils.list_to_multiline_string( self.data.matched_upstream_commit_list)) # Commits in reverse order (oldest first) self.data.matched_upstream_commit_list.reverse() self.convert_to_commit_data_objects_upstream() FileUtils.save_to_file( self.commits_file, StringUtils.list_to_multiline_string( self.data.matched_upstream_commit_hashes))
def test_YARN_10496(self): project_out_root = ProjectUtils.get_test_output_basedir(PROJECT_NAME, allow_python_commons_as_project=True) result_basedir = FileUtils.join_path(project_out_root, "jira-data") FileUtils.ensure_dir_created(result_basedir) jira_id = "YARN-10496" jira_html_file = FileUtils.join_path(result_basedir, "jira.html") jira_list_file = FileUtils.join_path(result_basedir, "jira-list.txt") jira_html = JiraUtils.download_jira_html( "https://issues.apache.org/jira/browse/", jira_id, jira_html_file ) jira_ids_and_titles = JiraUtils.parse_subjiras_and_jira_titles_from_umbrella_html( jira_html, jira_list_file, filter_ids=[jira_id] ) expected_jira_ids = ['YARN-10169', 'YARN-10504', 'YARN-10505', 'YARN-10506', 'YARN-10512', 'YARN-10513', 'YARN-10521', 'YARN-10522', 'YARN-10524', 'YARN-10525', 'YARN-10531', 'YARN-10532', 'YARN-10535', 'YARN-10564', 'YARN-10565', 'YARN-10571', 'YARN-10573', 'YARN-10574', 'YARN-10576', 'YARN-10577', 'YARN-10578', 'YARN-10579', 'YARN-10581', 'YARN-10582', 'YARN-10583', 'YARN-10584', 'YARN-10587', 'YARN-10590', 'YARN-10592', 'YARN-10596', 'YARN-10598', 'YARN-10599', 'YARN-10600', 'YARN-10604', 'YARN-10605', 'YARN-10609', 'YARN-10614', 'YARN-10615', 'YARN-10620', 'YARN-10622', 'YARN-10624'] all_list_items_found = all(id1 in jira_ids_and_titles.keys() for id1 in expected_jira_ids) self.assertTrue(all_list_items_found) expected_mappings = {'YARN-10624': 'Support max queues limit configuration in new auto created queue, consistent with old auto created.'} self.assertEqual(expected_mappings['YARN-10624'], jira_ids_and_titles['YARN-10624']) self.assertTrue(isinstance(jira_ids_and_titles['YARN-10624'], str))
def _validate_args(self, parser, args): if args.gsheet and (args.gsheet_client_secret is None or args.gsheet_spreadsheet is None or args.gsheet_worksheet is None): parser.error( "--gsheet requires the following arguments: " "--gsheet-client-secret, --gsheet-spreadsheet and --gsheet-worksheet." ) if args.do_print: self.operation_mode = OperationMode.PRINT elif args.gsheet: self.operation_mode = OperationMode.GSHEET self.gsheet_options = GSheetOptions(args.gsheet_client_secret, args.gsheet_spreadsheet, worksheet=None) self.gsheet_jira_table = getattr(args, "gsheet_compare_with_jira_table", None) if self.operation_mode not in VALID_OPERATION_MODES: raise ValueError( f"Unknown state! " f"Operation mode should be any of {VALID_OPERATION_MODES}, but it is set to: {self.operation_mode}" ) if hasattr(args, "gmail_credentials_file"): FileUtils.ensure_file_exists(args.gmail_credentials_file)
def parse_subjiras_and_jira_titles_from_umbrella_html( html_doc, to_file, filter_ids, find_all_links=True): soup = BeautifulSoup(html_doc, "html.parser") result_dict = {} links = [] if find_all_links: links = soup.find_all("a", attrs={"class": "issue-link"}) else: table = soup.find("table", id="issuetable") if table is not None: links = table.find_all("a", attrs={"class": "issue-link"}) for link in links: jira_id = link.attrs["data-issue-key"] jira_title = str(link.contents[0]) # There are 2 anchors with class: 'issue-link' per row. Only store the one with valid title. if len(jira_title.strip()) > 0: result_dict[jira_id] = jira_title if filter_ids: LOG.info("Filtering ids from result list: %s", filter_ids) result_dict = { jira_id: title for jira_id, title in result_dict.items() if jira_id not in filter_ids } FileUtils.save_to_file( to_file, StringUtils.dict_to_multiline_string(result_dict)) return result_dict
def add_result_files_table(self): result_files_data = sorted( FileUtils.find_files(self.summary_data.output_dir, regex=".*", full_path_result=True) ) table_type = RenderedTableType.RESULT_FILES header = [HEADER_ROW, HEADER_FILE, HEADER_NO_OF_LINES] gen_tables = ResultPrinter.print_tables( result_files_data, lambda file: (file, len(FileUtils.read_file(file).splitlines())), header=header, print_result=False, max_width=200, max_width_separator=os.sep, tabulate_fmts=DEFAULT_TABLE_FORMATS, ) for table_fmt, table in gen_tables.items(): self.add_table( table_type, TableWithHeader( table_type.header, header, result_files_data, table, table_fmt=table_fmt, colorized=False, branch=None, ), )
def get_output_basedir(cls, basedir_name: str, ensure_created=True, allow_python_commons_as_project=False, project_root_determination_strategy=None): if not basedir_name: raise ValueError("Basedir name should be specified!") project_name = cls.verify_caller_filename_valid( allow_python_commons_as_project=allow_python_commons_as_project, project_root_determination_strategy= project_root_determination_strategy) proj_basedir = FileUtils.join_path(PROJECTS_BASEDIR, basedir_name) if project_name in cls.PROJECT_BASEDIR_DICT: old_basedir = cls.PROJECT_BASEDIR_DICT[project_name] if old_basedir != proj_basedir: raise ValueError( "Project is already registered with a different output basedir. Details: \n" f"Old basedir name: {old_basedir.split(os.sep)[-1]}\n" f"Project basedir's old full path: {old_basedir}\n" f"New basedir name would be: {basedir_name}\n" f"Project basedir's new full path would be: {proj_basedir}\n" ) cls.PROJECT_BASEDIR_DICT[project_name] = proj_basedir if ensure_created: FileUtils.ensure_dir_created(proj_basedir) return proj_basedir
def test_run_in_a_non_git_repo_working_dir(self): working_dir = FileUtils.join_path("/tmp", "dummydir") FileUtils.ensure_dir_created(working_dir) format_patch_saver = FormatPatchSaver(self.setup_args(), working_dir, self.current_datetime) self.assertRaises(ValueError, format_patch_saver.run)
def test_fetch_with_upstream_umbrella_cached_mode(self): self.utils.checkout_trunk() umbrella_fetcher = UpstreamJiraUmbrellaFetcher( self.setup_args(force_mode=False), self.repo_wrapper, self.repo_wrapper, self.utils.jira_umbrella_data_dir, self.base_branch, ) # Run first, to surely have results pickled for this umbrella umbrella_fetcher.run() # Run again, with using cache umbrella_fetcher.run() output_dir = FileUtils.join_path(self.utils.jira_umbrella_data_dir, UPSTREAM_JIRA_ID) original_mod_dates = FileUtils.get_mod_dates_of_files( output_dir, *ALL_OUTPUT_FILES) self._verify_files_and_mod_dates(output_dir) # Since we are using non-force mode (cached mode), we expect the files untouched new_mod_dates = FileUtils.get_mod_dates_of_files( output_dir, *ALL_OUTPUT_FILES) self.assertDictEqual(original_mod_dates, new_mod_dates)
def extract_zip_file(file: str, path: str): # Apparently, ZipFile does not resolve symlinks so let's do it manually if os.path.islink(file): file = os.path.realpath(file) FileUtils.ensure_file_exists(file) zip_file = zipfile.ZipFile(file, "r") zip_file.extractall(path)
def _get_session_dir_under_child_dir(cls, child_dir_name, test: bool = False): child_dir_type: str = "child dir" if not test else "test child dir" dir_dict = cls.CHILD_DIR_DICT if not test else cls.CHILD_DIR_TEST_DICT if not child_dir_name: raise ValueError( f"Project {child_dir_type} name should be specified!") project_name = cls._validate_project_for_child_dir_creation() if project_name in dir_dict and child_dir_name in dir_dict[ project_name]: stored_dir = dir_dict[project_name][child_dir_name] LOG.debug( f"Found already stored {child_dir_type} for project '{project_name}': {stored_dir}" ) session_dir = FileUtils.join_path( stored_dir, f"session-{DateUtils.now_formatted('%Y%m%d_%H%M%S')}") FileUtils.ensure_dir_created(session_dir) return session_dir else: raise ValueError( f"Cannot find stored {child_dir_type} for project. " f"Project: {project_name}, " f"Child dir: {child_dir_name}, " f"All stored {child_dir_type}s: {dir_dict}")
def determine_new_patch_filename(self): patch_dir = FileUtils.join_path(self.patch_basedir, self.patch_branch) FileUtils.ensure_dir_created(patch_dir) found_patches = FileUtils.find_files(patch_dir, regex=self.patch_branch + PATCH_FILE_REGEX, single_level=True) new_patch_filename, new_patch_num = PatchUtils.get_next_filename( patch_dir, found_patches) # Double-check new filename vs. putting it altogether manually new_patch_filename_sanity = FileUtils.join_path( self.patch_basedir, self.patch_branch, f"{self.patch_branch}.{str(new_patch_num)}{PATCH_EXTENSION}") # If this is a new patch, use the appended name, # Otherwise, use the generated filename if new_patch_num == FIRST_PATCH_NUMBER: new_patch_filename = new_patch_filename_sanity if new_patch_filename != new_patch_filename_sanity: raise ValueError( "File paths do not match. " f"Calculated: {new_patch_filename}, Concatenated: {new_patch_filename_sanity}" ) self.new_patch_filename = new_patch_filename
def branch_comparator(self, args, parser=None): branch_comparator = BranchComparator(args, self.downstream_repo, self.upstream_repo, self.branch_comparator_output_dir) FileUtils.create_symlink_path_dir( CommandType.BRANCH_COMPARATOR.session_link_name, branch_comparator.config.output_dir, self.project_out_root) branch_comparator.run()
def print_table_fancy_grid(converter, to_file): FileUtils.ensure_file_exists_and_writable(to_file) converted_data = converter.convert(ExportMode.TEXT) tabulated = tabulate(converted_data, converter.headers, tablefmt="fancy_grid") LOG.info("Writing results to file: %s", to_file) FileUtils.write_to_file(to_file, tabulated)
def _write_to_file_or_console(self, contents, output_type, add_sep_to_end=False): if self.config.console_mode: LOG.info(f"Printing {output_type}: {contents}") else: fn_prefix = self._convert_output_type_str_to_file_prefix(output_type, add_sep_to_end=add_sep_to_end) f = self._generate_filename(self.config.output_dir, fn_prefix) LOG.info(f"Saving {output_type} to file: {f}") FileUtils.save_to_file(f, contents)
def _write_to_file_or_console_branch_data(self, branch: BranchData, contents, output_type): if self.config.console_mode: LOG.info(f"Printing {output_type} for branch {branch.type.name}: {contents}") else: fn_prefix = self._convert_output_type_str_to_file_prefix(output_type) f = self._generate_filename(self.config.output_dir, fn_prefix, branch.shortname) LOG.info(f"Saving {output_type} for branch {branch.type.name} to file: {f}") FileUtils.save_to_file(f, contents)
def unit_test_result_aggregator(self, args, parser=None): ut_results_aggregator = UnitTestResultAggregator( args, parser, self.unit_test_result_aggregator_output_dir) FileUtils.create_symlink_path_dir( CommandType.UNIT_TEST_RESULT_AGGREGATOR.session_link_name, ut_results_aggregator.config.session_dir, self.project_out_root, ) ut_results_aggregator.run()
def main(): # checking for connected devices adb_out = os.popen("adb devices -l").read() if not adb_out: raise ValueError("Unexpected output from adb: '{}'".format(adb_out)) second_line = adb_out.split('\n', 1)[1] device_info_list = second_line.split("device")[1:] if not device_info_list: print("Found no device connected!") exit(1) device_info = "".join([d.strip() for d in device_info_list]) print("Detected connected device: " + device_info) # Get forward list forward_list = os.popen("adb forward --list").read().strip() if not forward_list: print( "Port forward not detected. Opening one port forwarding TCP socket on port %s" % PORT) os.system("adb forward tcp:{} localabstract:{}".format( PORT, ABSTRACT_SOCKET_NAME)) forward_list = os.popen("adb forward --list").read().strip() if not forward_list: raise ValueError( "Cannot create port forwarding TCP socket on port %s!" % PORT) print("Forward list: " + forward_list) data = None try: data = load_json("http://localhost:{}/json/list".format(PORT)) except ConnectionError as e: print( "Error while querying Chrome history. Make sure Google Chrome is launched on the device." ) print(e) exit(1) # Order by ids ordered_data = sorted(data, key=lambda d: d['id']) # print("Ordered data: " + str(ordered_data)) urls = [d['url'] for d in ordered_data] # print("URLs: " + str(urls)) if not urls: print("Opened pages could not be found. Exiting...") return final_result = "\n".join(urls) file_name = "webpages-phone-" + datetime.datetime.now().strftime( '%Y%m%d_%H%M%S.txt') file_path = os.path.join("/tmp", file_name) FileUtils.write_to_file(file_path, final_result) print("Pages saved to file: " + file_path) print("Please execute command: cp {} ~/Downloads/ && subl ~/Downloads/{}". format(file_path, file_name))
def setUp(self): self.current_datetime = DateUtils.get_current_datetime() self.patches_basedir = FileUtils.join_path(self.saved_patches_dir, DEST_DIR_PREFIX, self.current_datetime) self.assertIsNotNone(self.patches_basedir) self.assertNotEqual(self.patches_basedir, "~") self.assertNotEqual(self.patches_basedir, "/") self.assertTrue(self.saved_patches_dir in self.patches_basedir) FileUtils.remove_files(self.patches_basedir, FORMAT_PATCH_FILE_PREFIX)
def save_diff_to_patch_file(diff, file): if not diff or diff == "": LOG.error("Diff was empty. Patch file is not created!") return False else: diff += os.linesep LOG.info("Saving diff to patch file: %s", file) LOG.debug("Diff: %s", diff) FileUtils.save_to_file(file, diff) return True
def fetch_jira_umbrella_data(self, args, parser=None): jira_umbrella_fetcher = UpstreamJiraUmbrellaFetcher( args, self.upstream_repo, self.downstream_repo, self.jira_umbrella_data_dir, DEFAULT_BASE_BRANCH) FileUtils.create_symlink_path_dir( CommandType.FETCH_JIRA_UMBRELLA_DATA.session_link_name, jira_umbrella_fetcher.config.umbrella_result_basedir, self.project_out_root, ) jira_umbrella_fetcher.run()
def test_with_bad_patch_content(self): patch_file = FileUtils.join_path(self.dummy_patches_dir, PATCH_FILENAME) FileUtils.save_to_file(patch_file, "dummycontents") args = Object() args.patch_file = patch_file review_branch_creator = ReviewBranchCreator(args, self.repo_wrapper, BASE_BRANCH, REMOTE_BASE_BRANCH) self.assertRaises(ValueError, review_branch_creator.run)
def test_with_oddly_named_patch(self): patch_file = FileUtils.join_path(self.dummy_patches_dir, "testpatch1.patch") FileUtils.create_files(patch_file) args = Object() args.patch_file = patch_file review_branch_creator = ReviewBranchCreator(args, self.repo_wrapper, BASE_BRANCH, REMOTE_BASE_BRANCH) self.assertRaises(ValueError, review_branch_creator.run)
def remove_test_files_and_recreate_dir(cls, dir_name: str, clazz): project_name = cls._validate_project_for_child_dir_creation() cls.validate_test_child_dir(dir_name, project_name) dir_path = cls.CHILD_DIR_TEST_DICT[project_name][dir_name] LOG.info(f"Removing dir: {dir_path}") FileUtils.remove_files(dir_path, ".*") LOG.info(f"Recreating dir: {dir_path}") new_dir = FileUtils.ensure_dir_created(dir_path) LOG.info("Basedir of %s is: %s", clazz.__name__, new_dir) return new_dir
def save_to_test_file(cls, dir_name: str, filename: str, file_contents: str): if not dir_name: raise ValueError("Dir name should be specified!") if not filename: raise ValueError("Filename should be specified!") project_name = cls._validate_project_for_child_dir_creation() cls.validate_test_child_dir(dir_name, project_name) dir_path = cls.CHILD_DIR_TEST_DICT[project_name][dir_name] FileUtils.save_to_file(FileUtils.join_path(dir_path, filename), file_contents)
def print_table_html(converter, to_file): import html FileUtils.ensure_file_exists_and_writable(to_file) converted_data = converter.convert(ExportMode.HTML) tabulated = tabulate(converted_data, converter.headers, tablefmt="html") # Unescape manually here, as tabulate automatically escapes HTML content and there's no way to turn this off. tabulated = html.unescape(tabulated) LOG.info("Writing results to file: " + to_file) FileUtils.write_to_file(to_file, tabulated)
def __init__(self, args, attachment_file: str = None): if attachment_file: FileUtils.ensure_file_exists_and_readable(attachment_file) self.attachment_file = attachment_file self.email_account: EmailAccount = EmailAccount( args.account_user, args.account_password) self.email_conf: EmailConfig = EmailConfig(args.smtp_server, args.smtp_port, self.email_account) self.sender: str = args.sender self.recipients = args.recipients self.subject: str = args.subject if "subject" in args else None self.attachment_filename: str = args.attachment_filename if "attachment_filename" in args else None
def _determine_project_by_common_files(file_of_caller): LOG.debug( "Execution environment is not local, " "trying to determine project name with common files strategy. " f"Current sys.path: \n{ProjectUtils.get_sys_path_human_readable()}" f"Current caller file: {file_of_caller}") project_root_path = FileUtils.find_repo_root_dir_auto( file_of_caller) LOG.debug(f"Found project root: {project_root_path}") comps = FileUtils.get_path_components(project_root_path) project = comps[-1] path = comps[0:-1] LOG.info(f"Determined path: {path}, project: {project}") return path, project
def get_test_output_basedir(cls, basedir_name: str, allow_python_commons_as_project=False, project_root_determination_strategy=None): """ :param basedir_name: :param allow_python_commons_as_project: This is useful and a must for test executions of ProjectUtils (e.g. JiraUtilsTests) as stackframes calling pythoncommons are only the methods of the unittest framework. :return: """ cls.test_execution = True project_name = cls.verify_caller_filename_valid( allow_python_commons_as_project=allow_python_commons_as_project, project_root_determination_strategy= project_root_determination_strategy) if project_name not in cls.PROJECT_BASEDIR_DICT: # Creating project dir for the first time proj_basedir = cls.get_output_basedir( basedir_name, allow_python_commons_as_project=allow_python_commons_as_project, project_root_determination_strategy= project_root_determination_strategy) else: proj_basedir = cls.PROJECT_BASEDIR_DICT[project_name] return FileUtils.join_path(proj_basedir, TEST_OUTPUT_DIR_NAME)
def test_with_not_existing_patch(self): args = Object() args.patch_file = FileUtils.join_path("tmp", "blablabla") review_branch_creator = ReviewBranchCreator(args, self.repo_wrapper, BASE_BRANCH, REMOTE_BASE_BRANCH) self.assertRaises(ValueError, review_branch_creator.run)