Example #1
0
    def start(self, basedir):
        LOG.info("Starting CDSW runner...")
        repo_type_env = OsUtils.get_env_value(
            BranchComparatorEnvVar.REPO_TYPE.value, RepoType.DOWNSTREAM.value)
        repo_type: RepoType = RepoType[repo_type_env.upper()]

        if repo_type == RepoType.DOWNSTREAM:
            self.run_clone_downstream_repos_script(basedir)
        elif repo_type == RepoType.UPSTREAM:
            # If we are in upstream mode, make sure downstream dir exist
            # Currently, yarndevtools requires both repos to be present when initializing.
            # BranchComparator is happy with one single repository, upstream or downstream, exclusively.
            # Git init the other repository so everything will be alright
            FileUtils.create_new_dir(HADOOP_CLOUDERA_BASEDIR)
            FileUtils.change_cwd(HADOOP_CLOUDERA_BASEDIR)
            os.system("git init")
            self.run_clone_upstream_repos_script(basedir)

        # TODO investigate why legacy script fails!
        self.run_comparator_and_send_mail(repo_type,
                                          algorithm="simple",
                                          run_legacy_script=False)
        self.run_comparator_and_send_mail(repo_type,
                                          algorithm="grouped",
                                          run_legacy_script=False)
    def find_upstream_commits_and_save_to_file(self):
        # It's quite complex to grep for multiple jira IDs with gitpython, so let's rather call an external command
        git_log_result = self.upstream_repo.log(ORIGIN_TRUNK,
                                                oneline_with_date=True)
        cmd, output = CommandRunner.egrep_with_cli(
            git_log_result, self.intermediate_results_file,
            self.data.piped_jira_ids)
        normal_commit_lines = output.split("\n")
        modified_log_lines = self._find_missing_upstream_commits_by_message(
            git_log_result, normal_commit_lines)
        self.data.matched_upstream_commit_list = normal_commit_lines + modified_log_lines
        if not self.data.matched_upstream_commit_list:
            raise ValueError(
                f"Cannot find any commits for jira: {self.config.jira_id}")

        LOG.info("Number of matched commits: %s",
                 self.data.no_of_matched_commits)
        LOG.debug(
            "Matched commits: \n%s",
            StringUtils.list_to_multiline_string(
                self.data.matched_upstream_commit_list))

        # Commits in reverse order (oldest first)
        self.data.matched_upstream_commit_list.reverse()
        self.convert_to_commit_data_objects_upstream()
        FileUtils.save_to_file(
            self.commits_file,
            StringUtils.list_to_multiline_string(
                self.data.matched_upstream_commit_hashes))
    def test_YARN_10496(self):
        project_out_root = ProjectUtils.get_test_output_basedir(PROJECT_NAME, allow_python_commons_as_project=True)
        result_basedir = FileUtils.join_path(project_out_root, "jira-data")
        FileUtils.ensure_dir_created(result_basedir)
        jira_id = "YARN-10496"
        jira_html_file = FileUtils.join_path(result_basedir, "jira.html")
        jira_list_file = FileUtils.join_path(result_basedir, "jira-list.txt")

        jira_html = JiraUtils.download_jira_html(
            "https://issues.apache.org/jira/browse/", jira_id, jira_html_file
        )
        jira_ids_and_titles = JiraUtils.parse_subjiras_and_jira_titles_from_umbrella_html(
            jira_html, jira_list_file, filter_ids=[jira_id]
        )

        expected_jira_ids = ['YARN-10169', 'YARN-10504', 'YARN-10505', 'YARN-10506', 'YARN-10512', 'YARN-10513',
                             'YARN-10521', 'YARN-10522', 'YARN-10524', 'YARN-10525', 'YARN-10531', 'YARN-10532',
                             'YARN-10535', 'YARN-10564', 'YARN-10565', 'YARN-10571', 'YARN-10573', 'YARN-10574',
                             'YARN-10576', 'YARN-10577', 'YARN-10578', 'YARN-10579', 'YARN-10581', 'YARN-10582',
                             'YARN-10583', 'YARN-10584', 'YARN-10587', 'YARN-10590', 'YARN-10592', 'YARN-10596',
                             'YARN-10598', 'YARN-10599', 'YARN-10600', 'YARN-10604', 'YARN-10605', 'YARN-10609',
                             'YARN-10614', 'YARN-10615', 'YARN-10620', 'YARN-10622', 'YARN-10624']
        all_list_items_found = all(id1 in jira_ids_and_titles.keys() for id1 in expected_jira_ids)
        self.assertTrue(all_list_items_found)

        expected_mappings = {'YARN-10624': 'Support max queues limit configuration in new auto created queue, consistent with old auto created.'}
        self.assertEqual(expected_mappings['YARN-10624'], jira_ids_and_titles['YARN-10624'])
        self.assertTrue(isinstance(jira_ids_and_titles['YARN-10624'], str))
Example #4
0
    def _validate_args(self, parser, args):
        if args.gsheet and (args.gsheet_client_secret is None
                            or args.gsheet_spreadsheet is None
                            or args.gsheet_worksheet is None):
            parser.error(
                "--gsheet requires the following arguments: "
                "--gsheet-client-secret, --gsheet-spreadsheet and --gsheet-worksheet."
            )

        if args.do_print:
            self.operation_mode = OperationMode.PRINT
        elif args.gsheet:
            self.operation_mode = OperationMode.GSHEET
            self.gsheet_options = GSheetOptions(args.gsheet_client_secret,
                                                args.gsheet_spreadsheet,
                                                worksheet=None)
            self.gsheet_jira_table = getattr(args,
                                             "gsheet_compare_with_jira_table",
                                             None)
        if self.operation_mode not in VALID_OPERATION_MODES:
            raise ValueError(
                f"Unknown state! "
                f"Operation mode should be any of {VALID_OPERATION_MODES}, but it is set to: {self.operation_mode}"
            )
        if hasattr(args, "gmail_credentials_file"):
            FileUtils.ensure_file_exists(args.gmail_credentials_file)
    def parse_subjiras_and_jira_titles_from_umbrella_html(
            html_doc, to_file, filter_ids, find_all_links=True):
        soup = BeautifulSoup(html_doc, "html.parser")
        result_dict = {}

        links = []
        if find_all_links:
            links = soup.find_all("a", attrs={"class": "issue-link"})
        else:
            table = soup.find("table", id="issuetable")
            if table is not None:
                links = table.find_all("a", attrs={"class": "issue-link"})
        for link in links:
            jira_id = link.attrs["data-issue-key"]
            jira_title = str(link.contents[0])
            # There are 2 anchors with class: 'issue-link' per row. Only store the one with valid title.
            if len(jira_title.strip()) > 0:
                result_dict[jira_id] = jira_title

        if filter_ids:
            LOG.info("Filtering ids from result list: %s", filter_ids)
            result_dict = {
                jira_id: title
                for jira_id, title in result_dict.items()
                if jira_id not in filter_ids
            }

        FileUtils.save_to_file(
            to_file, StringUtils.dict_to_multiline_string(result_dict))
        return result_dict
    def add_result_files_table(self):
        result_files_data = sorted(
            FileUtils.find_files(self.summary_data.output_dir, regex=".*", full_path_result=True)
        )
        table_type = RenderedTableType.RESULT_FILES
        header = [HEADER_ROW, HEADER_FILE, HEADER_NO_OF_LINES]
        gen_tables = ResultPrinter.print_tables(
            result_files_data,
            lambda file: (file, len(FileUtils.read_file(file).splitlines())),
            header=header,
            print_result=False,
            max_width=200,
            max_width_separator=os.sep,
            tabulate_fmts=DEFAULT_TABLE_FORMATS,
        )

        for table_fmt, table in gen_tables.items():
            self.add_table(
                table_type,
                TableWithHeader(
                    table_type.header,
                    header,
                    result_files_data,
                    table,
                    table_fmt=table_fmt,
                    colorized=False,
                    branch=None,
                ),
            )
    def get_output_basedir(cls,
                           basedir_name: str,
                           ensure_created=True,
                           allow_python_commons_as_project=False,
                           project_root_determination_strategy=None):
        if not basedir_name:
            raise ValueError("Basedir name should be specified!")

        project_name = cls.verify_caller_filename_valid(
            allow_python_commons_as_project=allow_python_commons_as_project,
            project_root_determination_strategy=
            project_root_determination_strategy)
        proj_basedir = FileUtils.join_path(PROJECTS_BASEDIR, basedir_name)
        if project_name in cls.PROJECT_BASEDIR_DICT:
            old_basedir = cls.PROJECT_BASEDIR_DICT[project_name]
            if old_basedir != proj_basedir:
                raise ValueError(
                    "Project is already registered with a different output basedir. Details: \n"
                    f"Old basedir name: {old_basedir.split(os.sep)[-1]}\n"
                    f"Project basedir's old full path: {old_basedir}\n"
                    f"New basedir name would be: {basedir_name}\n"
                    f"Project basedir's new full path would be: {proj_basedir}\n"
                )
        cls.PROJECT_BASEDIR_DICT[project_name] = proj_basedir

        if ensure_created:
            FileUtils.ensure_dir_created(proj_basedir)
        return proj_basedir
    def test_run_in_a_non_git_repo_working_dir(self):
        working_dir = FileUtils.join_path("/tmp", "dummydir")
        FileUtils.ensure_dir_created(working_dir)

        format_patch_saver = FormatPatchSaver(self.setup_args(), working_dir,
                                              self.current_datetime)
        self.assertRaises(ValueError, format_patch_saver.run)
    def test_fetch_with_upstream_umbrella_cached_mode(self):
        self.utils.checkout_trunk()
        umbrella_fetcher = UpstreamJiraUmbrellaFetcher(
            self.setup_args(force_mode=False),
            self.repo_wrapper,
            self.repo_wrapper,
            self.utils.jira_umbrella_data_dir,
            self.base_branch,
        )
        # Run first, to surely have results pickled for this umbrella
        umbrella_fetcher.run()

        # Run again, with using cache
        umbrella_fetcher.run()

        output_dir = FileUtils.join_path(self.utils.jira_umbrella_data_dir,
                                         UPSTREAM_JIRA_ID)
        original_mod_dates = FileUtils.get_mod_dates_of_files(
            output_dir, *ALL_OUTPUT_FILES)

        self._verify_files_and_mod_dates(output_dir)

        # Since we are using non-force mode (cached mode), we expect the files untouched
        new_mod_dates = FileUtils.get_mod_dates_of_files(
            output_dir, *ALL_OUTPUT_FILES)
        self.assertDictEqual(original_mod_dates, new_mod_dates)
Example #10
0
 def extract_zip_file(file: str, path: str):
     # Apparently, ZipFile does not resolve symlinks so let's do it manually
     if os.path.islink(file):
         file = os.path.realpath(file)
     FileUtils.ensure_file_exists(file)
     zip_file = zipfile.ZipFile(file, "r")
     zip_file.extractall(path)
    def _get_session_dir_under_child_dir(cls,
                                         child_dir_name,
                                         test: bool = False):
        child_dir_type: str = "child dir" if not test else "test child dir"
        dir_dict = cls.CHILD_DIR_DICT if not test else cls.CHILD_DIR_TEST_DICT

        if not child_dir_name:
            raise ValueError(
                f"Project {child_dir_type} name should be specified!")

        project_name = cls._validate_project_for_child_dir_creation()
        if project_name in dir_dict and child_dir_name in dir_dict[
                project_name]:
            stored_dir = dir_dict[project_name][child_dir_name]
            LOG.debug(
                f"Found already stored {child_dir_type} for project '{project_name}': {stored_dir}"
            )

            session_dir = FileUtils.join_path(
                stored_dir,
                f"session-{DateUtils.now_formatted('%Y%m%d_%H%M%S')}")
            FileUtils.ensure_dir_created(session_dir)
            return session_dir
        else:
            raise ValueError(
                f"Cannot find stored {child_dir_type} for project. "
                f"Project: {project_name}, "
                f"Child dir: {child_dir_name}, "
                f"All stored {child_dir_type}s: {dir_dict}")
    def determine_new_patch_filename(self):
        patch_dir = FileUtils.join_path(self.patch_basedir, self.patch_branch)
        FileUtils.ensure_dir_created(patch_dir)
        found_patches = FileUtils.find_files(patch_dir,
                                             regex=self.patch_branch +
                                             PATCH_FILE_REGEX,
                                             single_level=True)
        new_patch_filename, new_patch_num = PatchUtils.get_next_filename(
            patch_dir, found_patches)

        # Double-check new filename vs. putting it altogether manually
        new_patch_filename_sanity = FileUtils.join_path(
            self.patch_basedir, self.patch_branch,
            f"{self.patch_branch}.{str(new_patch_num)}{PATCH_EXTENSION}")

        # If this is a new patch, use the appended name,
        # Otherwise, use the generated filename
        if new_patch_num == FIRST_PATCH_NUMBER:
            new_patch_filename = new_patch_filename_sanity
        if new_patch_filename != new_patch_filename_sanity:
            raise ValueError(
                "File paths do not match. "
                f"Calculated: {new_patch_filename}, Concatenated: {new_patch_filename_sanity}"
            )
        self.new_patch_filename = new_patch_filename
Example #13
0
 def branch_comparator(self, args, parser=None):
     branch_comparator = BranchComparator(args, self.downstream_repo,
                                          self.upstream_repo,
                                          self.branch_comparator_output_dir)
     FileUtils.create_symlink_path_dir(
         CommandType.BRANCH_COMPARATOR.session_link_name,
         branch_comparator.config.output_dir, self.project_out_root)
     branch_comparator.run()
Example #14
0
 def print_table_fancy_grid(converter, to_file):
     FileUtils.ensure_file_exists_and_writable(to_file)
     converted_data = converter.convert(ExportMode.TEXT)
     tabulated = tabulate(converted_data,
                          converter.headers,
                          tablefmt="fancy_grid")
     LOG.info("Writing results to file: %s", to_file)
     FileUtils.write_to_file(to_file, tabulated)
 def _write_to_file_or_console(self, contents, output_type, add_sep_to_end=False):
     if self.config.console_mode:
         LOG.info(f"Printing {output_type}: {contents}")
     else:
         fn_prefix = self._convert_output_type_str_to_file_prefix(output_type, add_sep_to_end=add_sep_to_end)
         f = self._generate_filename(self.config.output_dir, fn_prefix)
         LOG.info(f"Saving {output_type} to file: {f}")
         FileUtils.save_to_file(f, contents)
 def _write_to_file_or_console_branch_data(self, branch: BranchData, contents, output_type):
     if self.config.console_mode:
         LOG.info(f"Printing {output_type} for branch {branch.type.name}: {contents}")
     else:
         fn_prefix = self._convert_output_type_str_to_file_prefix(output_type)
         f = self._generate_filename(self.config.output_dir, fn_prefix, branch.shortname)
         LOG.info(f"Saving {output_type} for branch {branch.type.name} to file: {f}")
         FileUtils.save_to_file(f, contents)
Example #17
0
 def unit_test_result_aggregator(self, args, parser=None):
     ut_results_aggregator = UnitTestResultAggregator(
         args, parser, self.unit_test_result_aggregator_output_dir)
     FileUtils.create_symlink_path_dir(
         CommandType.UNIT_TEST_RESULT_AGGREGATOR.session_link_name,
         ut_results_aggregator.config.session_dir,
         self.project_out_root,
     )
     ut_results_aggregator.run()
def main():
    # checking for connected devices
    adb_out = os.popen("adb devices -l").read()
    if not adb_out:
        raise ValueError("Unexpected output from adb: '{}'".format(adb_out))

    second_line = adb_out.split('\n', 1)[1]
    device_info_list = second_line.split("device")[1:]
    if not device_info_list:
        print("Found no device connected!")
        exit(1)
    device_info = "".join([d.strip() for d in device_info_list])
    print("Detected connected device: " + device_info)

    # Get forward list
    forward_list = os.popen("adb forward --list").read().strip()
    if not forward_list:
        print(
            "Port forward not detected. Opening one port forwarding TCP socket on port %s"
            % PORT)
        os.system("adb forward tcp:{} localabstract:{}".format(
            PORT, ABSTRACT_SOCKET_NAME))
    forward_list = os.popen("adb forward --list").read().strip()
    if not forward_list:
        raise ValueError(
            "Cannot create port forwarding TCP socket on port %s!" % PORT)
    print("Forward list: " + forward_list)

    data = None
    try:
        data = load_json("http://localhost:{}/json/list".format(PORT))
    except ConnectionError as e:
        print(
            "Error while querying Chrome history. Make sure Google Chrome is launched on the device."
        )
        print(e)
        exit(1)

    # Order by ids
    ordered_data = sorted(data, key=lambda d: d['id'])
    # print("Ordered data: " + str(ordered_data))

    urls = [d['url'] for d in ordered_data]
    # print("URLs: " + str(urls))

    if not urls:
        print("Opened pages could not be found. Exiting...")
        return
    final_result = "\n".join(urls)
    file_name = "webpages-phone-" + datetime.datetime.now().strftime(
        '%Y%m%d_%H%M%S.txt')
    file_path = os.path.join("/tmp", file_name)
    FileUtils.write_to_file(file_path, final_result)
    print("Pages saved to file: " + file_path)
    print("Please execute command: cp {} ~/Downloads/ && subl ~/Downloads/{}".
          format(file_path, file_name))
 def setUp(self):
     self.current_datetime = DateUtils.get_current_datetime()
     self.patches_basedir = FileUtils.join_path(self.saved_patches_dir,
                                                DEST_DIR_PREFIX,
                                                self.current_datetime)
     self.assertIsNotNone(self.patches_basedir)
     self.assertNotEqual(self.patches_basedir, "~")
     self.assertNotEqual(self.patches_basedir, "/")
     self.assertTrue(self.saved_patches_dir in self.patches_basedir)
     FileUtils.remove_files(self.patches_basedir, FORMAT_PATCH_FILE_PREFIX)
 def save_diff_to_patch_file(diff, file):
     if not diff or diff == "":
         LOG.error("Diff was empty. Patch file is not created!")
         return False
     else:
         diff += os.linesep
         LOG.info("Saving diff to patch file: %s", file)
         LOG.debug("Diff: %s", diff)
         FileUtils.save_to_file(file, diff)
         return True
Example #21
0
 def fetch_jira_umbrella_data(self, args, parser=None):
     jira_umbrella_fetcher = UpstreamJiraUmbrellaFetcher(
         args, self.upstream_repo, self.downstream_repo,
         self.jira_umbrella_data_dir, DEFAULT_BASE_BRANCH)
     FileUtils.create_symlink_path_dir(
         CommandType.FETCH_JIRA_UMBRELLA_DATA.session_link_name,
         jira_umbrella_fetcher.config.umbrella_result_basedir,
         self.project_out_root,
     )
     jira_umbrella_fetcher.run()
Example #22
0
    def test_with_bad_patch_content(self):
        patch_file = FileUtils.join_path(self.dummy_patches_dir,
                                         PATCH_FILENAME)
        FileUtils.save_to_file(patch_file, "dummycontents")
        args = Object()
        args.patch_file = patch_file

        review_branch_creator = ReviewBranchCreator(args, self.repo_wrapper,
                                                    BASE_BRANCH,
                                                    REMOTE_BASE_BRANCH)
        self.assertRaises(ValueError, review_branch_creator.run)
Example #23
0
    def test_with_oddly_named_patch(self):
        patch_file = FileUtils.join_path(self.dummy_patches_dir,
                                         "testpatch1.patch")
        FileUtils.create_files(patch_file)
        args = Object()
        args.patch_file = patch_file

        review_branch_creator = ReviewBranchCreator(args, self.repo_wrapper,
                                                    BASE_BRANCH,
                                                    REMOTE_BASE_BRANCH)
        self.assertRaises(ValueError, review_branch_creator.run)
    def remove_test_files_and_recreate_dir(cls, dir_name: str, clazz):
        project_name = cls._validate_project_for_child_dir_creation()
        cls.validate_test_child_dir(dir_name, project_name)
        dir_path = cls.CHILD_DIR_TEST_DICT[project_name][dir_name]

        LOG.info(f"Removing dir: {dir_path}")
        FileUtils.remove_files(dir_path, ".*")
        LOG.info(f"Recreating dir: {dir_path}")
        new_dir = FileUtils.ensure_dir_created(dir_path)
        LOG.info("Basedir of %s is: %s", clazz.__name__, new_dir)
        return new_dir
    def save_to_test_file(cls, dir_name: str, filename: str,
                          file_contents: str):
        if not dir_name:
            raise ValueError("Dir name should be specified!")
        if not filename:
            raise ValueError("Filename should be specified!")

        project_name = cls._validate_project_for_child_dir_creation()
        cls.validate_test_child_dir(dir_name, project_name)
        dir_path = cls.CHILD_DIR_TEST_DICT[project_name][dir_name]
        FileUtils.save_to_file(FileUtils.join_path(dir_path, filename),
                               file_contents)
Example #26
0
    def print_table_html(converter, to_file):
        import html
        FileUtils.ensure_file_exists_and_writable(to_file)
        converted_data = converter.convert(ExportMode.HTML)
        tabulated = tabulate(converted_data,
                             converter.headers,
                             tablefmt="html")

        # Unescape manually here, as tabulate automatically escapes HTML content and there's no way to turn this off.
        tabulated = html.unescape(tabulated)

        LOG.info("Writing results to file: " + to_file)
        FileUtils.write_to_file(to_file, tabulated)
Example #27
0
 def __init__(self, args, attachment_file: str = None):
     if attachment_file:
         FileUtils.ensure_file_exists_and_readable(attachment_file)
         self.attachment_file = attachment_file
     self.email_account: EmailAccount = EmailAccount(
         args.account_user, args.account_password)
     self.email_conf: EmailConfig = EmailConfig(args.smtp_server,
                                                args.smtp_port,
                                                self.email_account)
     self.sender: str = args.sender
     self.recipients = args.recipients
     self.subject: str = args.subject if "subject" in args else None
     self.attachment_filename: str = args.attachment_filename if "attachment_filename" in args else None
 def _determine_project_by_common_files(file_of_caller):
     LOG.debug(
         "Execution environment is not local, "
         "trying to determine project name with common files strategy. "
         f"Current sys.path: \n{ProjectUtils.get_sys_path_human_readable()}"
         f"Current caller file: {file_of_caller}")
     project_root_path = FileUtils.find_repo_root_dir_auto(
         file_of_caller)
     LOG.debug(f"Found project root: {project_root_path}")
     comps = FileUtils.get_path_components(project_root_path)
     project = comps[-1]
     path = comps[0:-1]
     LOG.info(f"Determined path: {path}, project: {project}")
     return path, project
    def get_test_output_basedir(cls,
                                basedir_name: str,
                                allow_python_commons_as_project=False,
                                project_root_determination_strategy=None):
        """

        :param basedir_name:
        :param allow_python_commons_as_project: This is useful and a must for test executions of ProjectUtils (e.g. JiraUtilsTests)
        as stackframes calling pythoncommons are only the methods of the unittest framework.
        :return:
        """
        cls.test_execution = True
        project_name = cls.verify_caller_filename_valid(
            allow_python_commons_as_project=allow_python_commons_as_project,
            project_root_determination_strategy=
            project_root_determination_strategy)
        if project_name not in cls.PROJECT_BASEDIR_DICT:
            # Creating project dir for the first time
            proj_basedir = cls.get_output_basedir(
                basedir_name,
                allow_python_commons_as_project=allow_python_commons_as_project,
                project_root_determination_strategy=
                project_root_determination_strategy)
        else:
            proj_basedir = cls.PROJECT_BASEDIR_DICT[project_name]

        return FileUtils.join_path(proj_basedir, TEST_OUTPUT_DIR_NAME)
Example #30
0
 def test_with_not_existing_patch(self):
     args = Object()
     args.patch_file = FileUtils.join_path("tmp", "blablabla")
     review_branch_creator = ReviewBranchCreator(args, self.repo_wrapper,
                                                 BASE_BRANCH,
                                                 REMOTE_BASE_BRANCH)
     self.assertRaises(ValueError, review_branch_creator.run)