def on_buildbot_event(data, message, dry_run, stage=False):
    """Act upon buildbot events."""
    # Pulse gives us a job_id and a job_guid, we need request_id.
    LOG.info(
        "%s action requested by %s on repo_name %s with job_id: %s"
        % (data["action"], data["requester"], data["project"], data["job_id"])
    )
    # Cleaning mozci caches
    buildjson.BUILDS_CACHE = {}
    query_jobs.JOBS_CACHE = {}

    if stage:
        treeherder_client = TreeherderClient(host="treeherder.allizom.org")
    else:
        treeherder_client = TreeherderClient()
    repo_name = data["project"]
    job_id = data["job_id"]
    result = treeherder_client.get_jobs(repo_name, id=job_id)
    # If result not found, ignore
    if not result:
        LOG.info("We could not find any result for repo_name: %s and " "job_id: %s" % (repo_name, job_id))
        message.ack()
        return

    result = result[0]
    buildername = result["ref_data_name"]
    resultset_id = result["result_set_id"]
    result_sets = treeherder_client.get_resultsets(repo_name, id=resultset_id)
    revision = result_sets[0]["revision"]
    action = data["action"]
    status = None

    buildername = filter_invalid_builders(buildername)

    # Treeherder can send us invalid builder names
    # https://bugzilla.mozilla.org/show_bug.cgi?id=1242038
    if buildername is None:
        status = "Builder %s was invalid." % buildername[0]

    # Backfill action
    elif action == "backfill":
        manual_backfill(revision, buildername, max_revisions=get_maxRevisions(buildername), dry_run=dry_run)
        if not dry_run:
            status = "Backfill request sent"
        else:
            status = "Dry-run mode, nothing was backfilled"

    # Send a pulse message showing what we did
    message_sender = MessageHandler()
    pulse_message = {"job_id": job_id, "action": action, "requester": data["requester"], "status": status}
    routing_key = "{}.{}".format(repo_name, action)
    try:
        message_sender.publish_message(pulse_message, routing_key)
    except:
        LOG.warning("Failed to publish message over pulse stream.")

    if not dry_run:
        # We need to ack the message to remove it from our queue
        message.ack()
def get_all_jobs(repo_name, revision):
    '''Return dictionary of all jobs for a given revision

    Return: {'<revision_hash>': {'<job_id_1>': <job_id_1_metadata>}}
    '''
    print "Fetching Treeherder jobs for {}/{}".format(repo_name, revision)
    th_client = TreeherderClient()
    results = th_client.get_resultsets(repo_name, revision=revision)
    all_jobs = {}
    if results:
        revision_id = results[0]["id"]
        for job in th_client.get_jobs(repo_name, count=6000, result_set_id=revision_id):
            # Grab job metadata
            all_jobs[job['id']] = job

    return {revision: all_jobs}
Example #3
0
class TreeherderApi(QueryApi):

    def __init__(self, server_url='https://treeherder.mozilla.org', treeherder_host=None):
        if treeherder_host:
            LOG.warning("The `TreeherderApi()` parameter `treeherder_host` is deprecated. "
                        "Use `server_url` instead, or omit entirely to use the default of "
                        "production Treeherder.")
            server_url = 'https://%s' % treeherder_host
        self.treeherder_client = TreeherderClient(server_url=server_url)

    def get_all_jobs(self, repo_name, revision, **params):
        """
        Return all jobs for a given revision.
        If we can't query about this revision in treeherder api, we return an empty list.
        """
        # We query treeherder for its internal revision_id, and then get the jobs from them.
        # We cannot get jobs directly from revision and repo_name in TH api.
        # See: https://bugzilla.mozilla.org/show_bug.cgi?id=1165401
        results = self.treeherder_client.get_resultsets(repo_name, revision=revision, **params)
        all_jobs = []
        if results:
            revision_id = results[0]["id"]
            all_jobs = self.treeherder_client.get_jobs(repo_name, count=2000,
                                                       result_set_id=revision_id, **params)
        return all_jobs

    def get_buildapi_request_id(self, repo_name, job):
        """ Method to return buildapi's request_id. """
        job_details = self.treeherder_client.get_job_details(
            job_id=job["id"],
            title='buildbot_request_id',
            repository=repo_name)
        if not job_details:
            raise ValueError("No buildbot request id for job ({}, {}, {})".format(
                job["id"], 'buildbot_request_id', repo_name
            ))

        return int(job_details[0]["value"])

    def get_hidden_jobs(self, repo_name, revision):
        """ Return all hidden jobs on Treeherder """
        return self.get_all_jobs(repo_name, revision=revision, visibility='excluded')

    def get_matching_jobs(self, repo_name, revision, buildername):
        """
        Return all jobs that matched the criteria.
        """
        LOG.debug("Find jobs matching '%s'" % buildername)
        all_jobs = self.get_all_jobs(repo_name, revision)
        matching_jobs = []
        for j in all_jobs:
            if j["ref_data_name"] == buildername:
                matching_jobs.append(j)

        LOG.debug("We have found %d job(s) of '%s'." %
                  (len(matching_jobs), buildername))
        return matching_jobs

    def get_job_status(self, job):
        """
        Helper to determine the scheduling status of a job from treeherder.

        Raises a TreeherderError if the job doesn't complete.
        """
        if job["job_coalesced_to_guid"] is not None:
            return COALESCED

        if job["result"] == "unknown":
            if job["state"] == "pending":
                return PENDING
            elif job["state"] == "running":
                return RUNNING
            else:
                return UNKNOWN

        # If the job 'state' is completed, we can have the following possible statuses:
        # https://github.com/mozilla/treeherder/blob/master/treeherder/etl/buildbot.py#L7
        status_dict = {
            "success": SUCCESS,
            "busted": FAILURE,
            "testfailed": FAILURE,
            "skipped": SKIPPED,
            "exception": EXCEPTION,
            "retry": RETRY,
            "usercancel": CANCELLED
            }

        if job["state"] == "completed":
            return status_dict[job["result"]]

        LOG.debug(job)
        raise TreeherderError("Unexpected status")

    def find_all_jobs_by_status(self, repo_name, revision, status):
        builder_names = []
        jobs = self.get_all_jobs(repo_name, revision)
        # filer out those jobs without builder name
        jobs = [job for job in jobs if job['machine_name'] != 'unknown']
        for job in jobs:
            try:
                job_status = self.get_job_status(job)
            except TreeherderError:
                continue
            if job_status == status:
                if job['build_system_type'] == 'taskcluster':
                    job_name = job['job_type_name']
                else:
                    job_name = job['ref_data_name']
                builder_names.append(job_name)
        return builder_names

    def query_revision_for_job(self, repo_name, job_id):
        '''Return revision for a known Treeherder job id.'''
        job_info = self.treeherder_client.get_jobs(repo_name, id=job_id)[0]
        result_sets = self.treeherder_client.get_resultsets(repo_name, id=job_info["result_set_id"])
        revision = result_sets[0]["revision"]

        return revision

    def query_revision_for_resultset(self, repo_name, resultset_id):
        '''Return revision for a known Treeherder resultset id.'''
        return self.treeherder_client.get_resultsets(repo_name, id=resultset_id)[0]["revision"]
Example #4
0
def get_test_packages_url(properties):
    """Return the URL of the test packages JSON file.

    In case of localized daily builds we can query the en-US build to get
    the URL, but for candidate builds we need the tinderbox build
    of the first parent changeset which was not checked-in by the release
    automation process (necessary until bug 1242035 is not fixed).
    """
    overrides = {
        'locale': 'en-US',
        'extension': 'test_packages.json',
        'build_type': 'tinderbox',
        'retry_attempts': 0,
    }

    platform_map = {
        'linux': {'build_platform': 'linux32'},
        'linux64': {'build_platform': 'linux64'},
        'mac': {'build_os': 'mac', 'build_architecture': 'x86_64'},
        'win32': {'build_os': 'win', 'build_architecture': 'x86'},
        'win64': {'build_os': 'win', 'build_architecture': 'x86_64'},
    }

    revision = properties['revision'][:12]

    client = TreeherderClient(host='treeherder.mozilla.org', protocol='https')
    resultsets = client.get_resultsets(properties['branch'],
                                       tochange=revision,
                                       count=50)

    # Retrieve the option hashes to filter for opt builds
    option_hash = None
    for key, values in client.get_option_collection_hash().iteritems():
        for value in values:
            if value['name'] == 'opt':
                option_hash = key
                break
        if option_hash:
            break

    # Set filters to speed-up querying jobs
    kwargs = {
        'job_type_name': 'Build',
        'exclusion_profile': False,
        'option_collection_hash': option_hash,
        'result': 'success',
    }
    kwargs.update(platform_map[properties['platform']])

    for resultset in resultsets:
        kwargs.update({'result_set_id': resultset['id']})
        jobs = client.get_jobs(properties['branch'], **kwargs)
        if len(jobs):
            revision = resultset['revision']
            break

    overrides['revision'] = revision

    # For update tests we need the test package of the target build. That allows
    # us to add fallback code in case major parts of the ui are changing in Firefox.
    if properties.get('target_buildid'):
        overrides['build_id'] = properties['target_buildid']

    # The test package json file has a prefix with bug 1239808 fixed. Older builds need
    # a fallback to a prefix-less filename.
    try:
        url = query_file_url(properties, property_overrides=overrides)
    except download_errors.NotFoundError:
        overrides.pop('extension')
        build_url = query_file_url(properties, property_overrides=overrides)
        url = '{}/test_packages.json'.format(build_url[:build_url.rfind('/')])

    return url
Example #5
0
class GetBuild(object):
    ARCHIVE_URL = "https://archive.mozilla.org"
    NIGHTLY_LATEST_URL_FOLDER = "/pub/firefox/nightly/latest-mozilla-central/"
    PLATFORM_FN_MAPPING = {'linux32': {'key': 'linux-i686', 'ext': 'tar.bz2', 'trydl': 'linux', 'job': ['linux32']},
                           'linux64': {'key': 'linux-x86_64', 'ext': 'tar.bz2', 'trydl': 'linux64', 'job': ['linux64']},
                           'mac': {'key': 'mac', 'ext': 'dmg', 'trydl': 'macosx64', 'job': ['osx']},
                           'win32': {'key': 'win32', 'ext': 'zip', 'trydl': 'win32', 'job': ['windows', '32']},
                           'win64': {'key': 'win64', 'ext': 'zip', 'trydl': 'win64', 'job': ['windows', '64']}}

    def __init__(self, repo, platform, status_check):
        self.repo = repo
        self.platform = platform
        self.platform_option = 'opt'
        self.resultsets = []
        self.skip_status_check = status_check
        self.thclient = TreeherderClient()

    def fetch_resultset(self, user_email, build_hash, default_count=500):
        tmp_resultsets = self.thclient.get_resultsets(self.repo, count=default_count)
        for resultset in tmp_resultsets:
            if resultset['author'].lower() == user_email.lower():
                self.resultsets.append(resultset)
                if build_hash is None:
                    return resultset
                elif resultset['revision'] == build_hash:
                    return resultset
        print "Can't find the specify build hash [%s] in resultsets!!" % build_hash
        return None

    def get_job(self, resultset, platform_keyword_list):
        jobs = self.thclient.get_jobs(self.repo, result_set_id=resultset['id'])
        for job in jobs:
            cnt = 0
            for platform_keyword in platform_keyword_list:
                if platform_keyword in job['platform']:
                    cnt += 1
            if job['platform_option'] == self.platform_option and cnt == len(platform_keyword_list):
                return job
        print "Can't find the specify platform [%s] and platform_options [%s] in jobs!!!" % (self.platform, self.platform_option)
        return None

    def get_files_from_remote_url_folder(self, remote_url_str):
        return_dict = {}
        try:
            response_obj = urllib2.urlopen(remote_url_str)
            if response_obj.getcode() == 200:
                for line in response_obj.readlines():
                    match = re.search(r'(?<=href=").*?(?=")', line)
                    if match:
                        href_link = match.group(0)
                        f_name = href_link.split("/")[-1]
                        return_dict[f_name] = href_link
            else:
                print "ERROR: fetch remote file list error with code [%s]" % str(response_obj.getcode())
        except Exception as e:
            print "ERROR: [%s]" % e.message
        return return_dict

    def download_file(self, output_dp, download_link):
        print "Prepare to download the build from link [%s]" % download_link
        response = requests.get(download_link, verify=False, stream=True)
        download_fn = download_link.split("/")[-1]
        if os.path.exists(output_dp) is False:
            os.makedirs(output_dp)
        download_fp = os.path.join(output_dp, download_fn)
        try:
            try:
                total_len = int(response.headers['content-length'])
            except:
                total_len = None
            with open(download_fp, 'wb') as fh:
                for data in tqdm(response.iter_content(chunk_size=512 * 1024), total=total_len / (512 * 1024)):
                    fh.write(data)
            return download_fp
        except Exception as e:
            print "ERROR: [%s]" % e.message
            return None

    def download_from_remote_url_folder(self, remote_url_str, output_dp):
        # get latest nightly build list from remote url folder
        remote_file_dict = self.get_files_from_remote_url_folder(remote_url_str)

        # filter with platform, and return file name with extension
        if len(remote_file_dict.keys()) == 0:
            print "ERROR: can't get remote file list, could be the network error, or url path[%s] wrong!!" % remote_url_str
            return False
        else:
            if self.platform not in self.PLATFORM_FN_MAPPING:
                print "ERROR: we are currently not support the platform[%s] you specified!" % self.platform
                print "We are currently support the platform tag: [%s]" % self.PLATFORM_FN_MAPPING.keys()
                return False
            else:
                matched_keyword = self.PLATFORM_FN_MAPPING[self.platform]['key'] + "." + self.PLATFORM_FN_MAPPING[self.platform]['ext']
                matched_file_list = [fn for fn in remote_file_dict.keys() if matched_keyword in fn and "firefox" in fn]
                if len(matched_file_list) != 1:
                    print "WARN: the possible match file list is not equal 1, list as below: [%s]" % matched_file_list
                    if len(matched_file_list) < 1:
                        return False
                    matched_file_list = sorted(matched_file_list)[-1:]
                    print "WARN: select following file [%s]" % matched_file_list

        # combine file name with json
        matched_file_name = matched_file_list[0]
        json_file_name = matched_file_name.replace(
            self.PLATFORM_FN_MAPPING[self.platform]['key'] + "." + self.PLATFORM_FN_MAPPING[self.platform]['ext'],
            self.PLATFORM_FN_MAPPING[self.platform]['key'] + ".json")
        if json_file_name not in remote_file_dict:
            print "ERROR: can't find the json file[%s] in remote file list[%s]!" % (json_file_name, remote_file_dict)
            return False
        else:
            print "DEBUG: matched file name: [%s], json_file_name: [%s]" % (matched_file_name, json_file_name)

        # download files
        download_fx_url = self.ARCHIVE_URL + remote_file_dict[matched_file_name]
        download_fx_fp = self.download_file(output_dp, download_fx_url)
        download_json_url = self.ARCHIVE_URL + remote_file_dict[json_file_name]
        download_json_fp = self.download_file(output_dp, download_json_url)

        # check download status
        if download_fx_fp and download_json_fp:
            print "SUCCESS: build files download in [%s], [%s] " % (download_fx_fp, download_json_fp)
            return True
        else:
            print "ERROR: build files download in [%s,%s] " % (download_fx_fp, download_json_fp)
            return False

    def get_try_build(self, user_email, build_hash, output_dp):
        resultset = self.fetch_resultset(user_email, build_hash)

        # check result set
        if resultset:
            # if build hash is not porvided, use the latest revision as build hash value
            if build_hash is None:
                build_hash = resultset['revision']
            print "Resultset is found, and build hash is [%s]" % build_hash

            # compose remote folder url
            build_folder_url_template = "%s/pub/firefox/%s-builds/%s-%s/%s-%s/"
            build_folder_url = build_folder_url_template % (self.ARCHIVE_URL,
                                                            self.repo, user_email, build_hash,
                                                            self.repo,
                                                            self.PLATFORM_FN_MAPPING[self.platform][
                                                                'trydl'])

            # skip status check will retrieve the files list from remote folder url
            if self.skip_status_check:
                return self.download_from_remote_url_folder(build_folder_url, output_dp)
            else:
                job = self.get_job(resultset, self.PLATFORM_FN_MAPPING[self.platform]['job'])
                if job:
                    if job['result'].lower() == "success":
                        return self.download_from_remote_url_folder(build_folder_url, output_dp)
                    else:
                        "Current job status is [%s] !!" % job['result'].lower()
                        return False
                else:
                    print "ERROR: can't find the job!"
                    return False
        else:
            print "ERROR: can't get result set! skip download build from try server, [%s, %s]" % (user_email, build_hash)
            return False

    def get_nightly_build(self, output_dp):
        remote_url_str = self.ARCHIVE_URL + self.NIGHTLY_LATEST_URL_FOLDER
        return self.download_from_remote_url_folder(remote_url_str, output_dp)
Example #6
0
class TreeherderApi(QueryApi):
    def __init__(self,
                 server_url='https://treeherder.mozilla.org',
                 treeherder_host=None):
        if treeherder_host:
            LOG.warning(
                "The `TreeherderApi()` parameter `treeherder_host` is deprecated. "
                "Use `server_url` instead, or omit entirely to use the default of "
                "production Treeherder.")
            server_url = 'https://%s' % treeherder_host
        self.treeherder_client = TreeherderClient(server_url=server_url)

    def get_all_jobs(self, repo_name, revision, **params):
        """
        Return all jobs for a given revision.
        If we can't query about this revision in treeherder api, we return an empty list.
        """
        # We query treeherder for its internal revision_id, and then get the jobs from them.
        # We cannot get jobs directly from revision and repo_name in TH api.
        # See: https://bugzilla.mozilla.org/show_bug.cgi?id=1165401
        results = self.treeherder_client.get_resultsets(repo_name,
                                                        revision=revision,
                                                        **params)
        all_jobs = []
        if results:
            revision_id = results[0]["id"]
            all_jobs = self.treeherder_client.get_jobs(
                repo_name, count=2000, result_set_id=revision_id, **params)
        return all_jobs

    def get_buildapi_request_id(self, repo_name, job):
        """ Method to return buildapi's request_id. """
        job_details = self.treeherder_client.get_job_details(
            job_id=job["id"],
            title='buildbot_request_id',
            repository=repo_name)
        if not job_details:
            raise ValueError(
                "No buildbot request id for job ({}, {}, {})".format(
                    job["id"], 'buildbot_request_id', repo_name))

        return int(job_details[0]["value"])

    def get_hidden_jobs(self, repo_name, revision):
        """ Return all hidden jobs on Treeherder """
        return self.get_all_jobs(repo_name,
                                 revision=revision,
                                 visibility='excluded')

    def get_matching_jobs(self, repo_name, revision, buildername):
        """
        Return all jobs that matched the criteria.
        """
        LOG.debug("Find jobs matching '%s'" % buildername)
        all_jobs = self.get_all_jobs(repo_name, revision)
        matching_jobs = []
        for j in all_jobs:
            if j["ref_data_name"] == buildername:
                matching_jobs.append(j)

        LOG.debug("We have found %d job(s) of '%s'." %
                  (len(matching_jobs), buildername))
        return matching_jobs

    def get_job_status(self, job):
        """
        Helper to determine the scheduling status of a job from treeherder.

        Raises a TreeherderError if the job doesn't complete.
        """
        if job["job_coalesced_to_guid"] is not None:
            return COALESCED

        if job["result"] == "unknown":
            if job["state"] == "pending":
                return PENDING
            elif job["state"] == "running":
                return RUNNING
            else:
                return UNKNOWN

        # If the job 'state' is completed, we can have the following possible statuses:
        # https://github.com/mozilla/treeherder/blob/master/treeherder/etl/buildbot.py#L7
        status_dict = {
            "success": SUCCESS,
            "busted": FAILURE,
            "testfailed": FAILURE,
            "skipped": SKIPPED,
            "exception": EXCEPTION,
            "retry": RETRY,
            "usercancel": CANCELLED
        }

        if job["state"] == "completed":
            return status_dict[job["result"]]

        LOG.debug(job)
        raise TreeherderError("Unexpected status")

    def find_all_jobs_by_status(self, repo_name, revision, status):
        builder_names = []
        jobs = self.get_all_jobs(repo_name, revision)
        # filer out those jobs without builder name
        jobs = [job for job in jobs if job['machine_name'] != 'unknown']
        for job in jobs:
            try:
                job_status = self.get_job_status(job)
            except TreeherderError:
                continue
            if job_status == status:
                if job['build_system_type'] == 'taskcluster':
                    job_name = job['job_type_name']
                else:
                    job_name = job['ref_data_name']
                builder_names.append(job_name)
        return builder_names

    def query_revision_for_job(self, repo_name, job_id):
        '''Return revision for a known Treeherder job id.'''
        job_info = self.treeherder_client.get_jobs(repo_name, id=job_id)[0]
        result_sets = self.treeherder_client.get_resultsets(
            repo_name, id=job_info["result_set_id"])
        revision = result_sets[0]["revision"]

        return revision

    def query_revision_for_resultset(self, repo_name, resultset_id):
        '''Return revision for a known Treeherder resultset id.'''
        return self.treeherder_client.get_resultsets(
            repo_name, id=resultset_id)[0]["revision"]
def on_event(data, message, dry_run, treeherder_server_url, acknowledge, **kwargs):
    """Act upon Treeherder job events.

    Return if the outcome was successful or not
    """
    exit_code = 0  # SUCCESS

    if ignored(data):
        if acknowledge:
            # We need to ack the message to remove it from our queue
            message.ack()
        return exit_code

    # Cleaning mozci caches
    buildjson.BUILDS_CACHE = {}
    query_jobs.JOBS_CACHE = {}

    treeherder_client = TreeherderClient(server_url=treeherder_server_url)

    action = data["action"].capitalize()
    job_id = data["job_id"]
    repo_name = data["project"]
    status = None

    # We want to know the status of the job we're processing
    try:
        job_info = treeherder_client.get_jobs(repo_name, id=job_id)[0]
    except IndexError:
        LOG.info("We could not find any job_info for repo_name: %s and " "job_id: %s" % (repo_name, job_id))
        return exit_code

    buildername = job_info["ref_data_name"]

    # We want to know the revision associated for this job
    result_sets = treeherder_client.get_resultsets(repo_name, id=job_info["result_set_id"])
    revision = result_sets[0]["revision"]

    link_to_job = "{}/#/jobs?repo={}&revision={}&selectedJob={}".format(
        treeherder_server_url, repo_name, revision, job_id
    )

    LOG.info("{} action requested by {} for '{}'".format(action, data["requester"], buildername))
    LOG.info("Request for {}".format(link_to_job))

    buildername = filter_invalid_builders(buildername)

    if buildername is None:
        LOG.info("Treeherder can send us invalid builder names.")
        LOG.info("See https://bugzilla.mozilla.org/show_bug.cgi?id=1242038.")
        LOG.warning('Requested job name "%s" is invalid.' % job_info["ref_data_name"])
        exit_code = -1  # FAILURE

    # There are various actions that can be taken on a job, however, we currently
    # only process the backfill one
    elif action == "Backfill":
        exit_code = manual_backfill(revision=revision, buildername=buildername, dry_run=dry_run)
        if not dry_run:
            status = "Backfill request sent"
        else:
            status = "Dry-run mode, nothing was backfilled."
        LOG.debug(status)

    else:
        LOG.error('We were not aware of the "{}" action. Please file an issue'.format(action))
        exit_code = -1  # FAILURE

    if acknowledge:
        # We need to ack the message to remove it from our queue
        message.ack()

    return exit_code
class TreeherderApi(QueryApi):
    def __init__(self):
        self.treeherder_client = TreeherderClient()

    def _get_all_jobs(self, repo_name, revision, **params):
        """
        Return all jobs for a given revision.
        If we can't query about this revision in treeherder api, we return an empty list.
        """
        # We query treeherder for its internal revision_id, and then get the jobs from them.
        # We cannot get jobs directly from revision and repo_name in TH api.
        # See: https://bugzilla.mozilla.org/show_bug.cgi?id=1165401
        results = self.treeherder_client.get_resultsets(repo_name,
                                                        revision=revision,
                                                        **params)
        all_jobs = []
        if results:
            revision_id = results[0]["id"]
            all_jobs = self.treeherder_client.get_jobs(
                repo_name, count=2000, result_set_id=revision_id, **params)
        return all_jobs

    def get_buildapi_request_id(self, repo_name, job):
        """ Method to return buildapi's request_id. """
        job_id = job["id"]
        query_params = {'job_id': job_id, 'name': 'buildapi'}
        LOG.debug("We are fetching request_id from treeherder artifacts api")
        artifact_content = self.treeherder_client.get_artifacts(
            repo_name, **query_params)
        return artifact_content[0]["blob"]["request_id"]

    def get_hidden_jobs(self, repo_name, revision):
        """ Return all hidden jobs on Treeherder """
        return self._get_all_jobs(repo_name,
                                  revision=revision,
                                  visibility='excluded')

    def get_matching_jobs(self, repo_name, revision, buildername):
        """
        Return all jobs that matched the criteria.
        """
        LOG.debug("Find jobs matching '%s'" % buildername)
        all_jobs = self._get_all_jobs(repo_name, revision)
        matching_jobs = []
        for j in all_jobs:
            if j["ref_data_name"] == buildername:
                matching_jobs.append(j)

        LOG.debug("We have found %d job(s) of '%s'." %
                  (len(matching_jobs), buildername))
        return matching_jobs

    def get_job_status(self, job):
        """
        Helper to determine the scheduling status of a job from treeherder.

        Raises a TreeherderError if the job doesn't complete.
        """
        if job["job_coalesced_to_guid"] is not None:
            return COALESCED

        if job["result"] == "unknown":
            if job["state"] == "pending":
                return PENDING
            elif job["state"] == "running":
                return RUNNING
            else:
                return UNKNOWN

        # If the job 'state' is completed, we can have the following possible statuses:
        # https://github.com/mozilla/treeherder/blob/master/treeherder/etl/buildbot.py#L7
        status_dict = {
            "success": SUCCESS,
            "busted": FAILURE,
            "testfailed": FAILURE,
            "skipped": SKIPPED,
            "exception": EXCEPTION,
            "retry": RETRY,
            "usercancel": CANCELLED
        }

        if job["state"] == "completed":
            return status_dict[job["result"]]

        LOG.debug(job)
        raise TreeherderError("Unexpected status")
Example #9
0
def get_test_packages_url(properties):
    """Return the URL of the test packages JSON file.

    In case of localized daily builds we can query the en-US build to get
    the URL, but for candidate builds we need the tinderbox build
    of the first parent changeset which was not checked-in by the release
    automation process (necessary until bug 1242035 is not fixed).
    """
    overrides = {
        'locale': 'en-US',
        'extension': 'test_packages.json',
        'build_type': 'tinderbox',
        'retry_attempts': 0,
    }

    platform_map = {
        'linux': {
            'build_platform': 'linux32'
        },
        'linux64': {
            'build_platform': 'linux64'
        },
        'mac': {
            'build_os': 'mac',
            'build_architecture': 'x86_64'
        },
        'win32': {
            'build_os': 'win',
            'build_architecture': 'x86'
        },
        'win64': {
            'build_os': 'win',
            'build_architecture': 'x86_64'
        },
    }

    revision = properties['revision'][:12]

    client = TreeherderClient(host='treeherder.mozilla.org', protocol='https')
    resultsets = client.get_resultsets(properties['branch'],
                                       tochange=revision,
                                       count=50)

    # Retrieve the option hashes to filter for opt builds
    option_hash = None
    for key, values in client.get_option_collection_hash().iteritems():
        for value in values:
            if value['name'] == 'opt':
                option_hash = key
                break
        if option_hash:
            break

    # Set filters to speed-up querying jobs
    kwargs = {
        'job_type_name': 'Build',
        'exclusion_profile': False,
        'option_collection_hash': option_hash,
        'result': 'success',
    }
    kwargs.update(platform_map[properties['platform']])

    for resultset in resultsets:
        kwargs.update({'result_set_id': resultset['id']})
        jobs = client.get_jobs(properties['branch'], **kwargs)
        if len(jobs):
            revision = resultset['revision']
            break

    overrides['revision'] = revision

    # For update tests we need the test package of the target build. That allows
    # us to add fallback code in case major parts of the ui are changing in Firefox.
    if properties.get('target_buildid'):
        overrides['build_id'] = properties['target_buildid']

    # The test package json file has a prefix with bug 1239808 fixed. Older builds need
    # a fallback to a prefix-less filename.
    try:
        url = query_file_url(properties, property_overrides=overrides)
    except download_errors.NotFoundError:
        overrides.pop('extension')
        build_url = query_file_url(properties, property_overrides=overrides)
        url = '{}/test_packages.json'.format(build_url[:build_url.rfind('/')])

    return url
Example #10
0
def on_event(data, message, dry_run, treeherder_server_url, **kwargs):
    """Act upon Treeherder job events.

    Return if the outcome was successful or not
    """
    exit_code = 0  # SUCCESS

    if ignored(data):
        return exit_code

    # Cleaning mozci caches
    buildjson.BUILDS_CACHE = {}
    query_jobs.JOBS_CACHE = {}

    treeherder_client = TreeherderClient(server_url=treeherder_server_url)

    action = data['action'].capitalize()
    job_id = data['job_id']
    repo_name = data['project']
    status = None

    # We want to know the status of the job we're processing
    try:
        job_info = treeherder_client.get_jobs(repo_name, id=job_id)[0]
    except IndexError:
        LOG.info("We could not find any job_info for repo_name: %s and "
                 "job_id: %s" % (repo_name, job_id))
        return exit_code

    # We want to know the revision associated for this job
    result_set = treeherder_client.get_resultsets(
        repo_name, id=job_info["result_set_id"])[0]
    revision = result_set["revision"]

    link_to_job = '{}/#/jobs?repo={}&revision={}&selectedJob={}'.format(
        treeherder_server_url, repo_name, revision, job_id)

    # There are various actions that can be taken on a job, however, we currently
    # only process the backfill one
    if action == "Backfill":
        if job_info["build_system_type"] == "taskcluster":
            jobs = []
            jobs_per_call = 250
            offset = 0
            while True:
                results = treeherder_client.get_jobs(
                    repo_name,
                    push_id=job_info["result_set_id"],
                    count=jobs_per_call,
                    offset=offset)
                jobs += results
                if (len(results) < jobs_per_call):
                    break
                offset += jobs_per_call

            decision = [
                t for t in jobs if t["job_type_name"] == "Gecko Decision Task"
            ][0]
            details = treeherder_client.get_job_details(
                job_guid=decision["job_guid"])
            inspect = [
                detail["url"] for detail in details
                if detail["value"] == "Inspect Task"
            ][0]
            # Pull out the taskId from the URL e.g.
            # oN1NErz_Rf2DZJ1hi7YVfA from <tc_tools_site>/task-inspector/#oN1NErz_Rf2DZJ1hi7YVfA/
            decision_id = inspect.partition("#")[-1].rpartition("/")[0]
            mgr = TaskClusterManager(dry_run=dry_run)
            mgr.schedule_action_task(decision_id=decision_id,
                                     action="backfill",
                                     action_args={
                                         "project": repo_name,
                                         "job": job_info["id"]
                                     })

        else:
            buildername = job_info["ref_data_name"]

            LOG.info("{} action requested by {} for '{}'".format(
                action,
                data['requester'],
                buildername,
            ))
            LOG.info('Request for {}'.format(link_to_job))

            buildername = filter_invalid_builders(buildername)

            if buildername is None:
                LOG.info('Treeherder can send us invalid builder names.')
                LOG.info(
                    'See https://bugzilla.mozilla.org/show_bug.cgi?id=1242038.'
                )
                LOG.warning('Requested job name "%s" is invalid.' %
                            job_info['ref_data_name'])
                exit_code = -1  # FAILURE
            else:
                exit_code = manual_backfill(
                    revision=revision,
                    buildername=buildername,
                    dry_run=dry_run,
                )
                if not dry_run:
                    status = 'Backfill request sent'
                else:
                    status = 'Dry-run mode, nothing was backfilled.'
                LOG.debug(status)

    else:
        LOG.error('We were not aware of the "{}" action. Please file an issue'.
                  format(action))
        exit_code = -1  # FAILURE

    return exit_code
Example #11
0
tb_branch = branches[tb_version]

with open("testapps.json", "r") as jf:
    data = json.load(jf)

nightly_data = data[tb_version]

pushes = client.get_pushes(tb_branch, )  # gets last 10 by default
for platform in nightly_data:
    platform_data = nightly_data[platform]
    found_artifacts = False
    platform_data['testzip'] = \
        platform_data['testzip'].replace('.zip', '').replace('.tar.gz', '')

    for push in pushes:
        jobs = client.get_jobs(tb_branch, push_id=push['id'])

        for job in jobs:
            logging.debug(job['job_type_name'])
            if (
                    job['state'] == 'completed' and
                    job['job_type_name'] ==
                    mapping_builds[tb_version][platform]
                    ):
                logging.info("%d\t%s\t%s\t%s\t%s\t%s" % (
                    job['start_timestamp'], job['build_platform'],
                    job['job_type_name'], job['platform'],
                    job['platform_option'], job['state'])
                )

                found_test = False
Example #12
0
class TriggerBuild(object):
    ARCHIVE_URL = "https://archive.mozilla.org"
    NIGHTLY_LATEST_URL_FOLDER = "/pub/firefox/nightly/latest-mozilla-central/"
    PLATFORM_FN_MAPPING = {'linux32': {'key': 'linux-i686', 'ext': 'tar.bz2', 'trydl': 'linux', 'job': ['linux32']},
                           'linux64': {'key': 'linux-x86_64', 'ext': 'tar.bz2', 'trydl': 'linux64', 'job': ['linux64']},
                           'mac': {'key': 'mac', 'ext': 'dmg', 'trydl': 'macosx64', 'job': ['osx']},
                           'win32': {'key': 'win32', 'ext': 'zip', 'trydl': 'win32', 'job': ['windows', '32']},
                           'win64': {'key': 'win64', 'ext': 'zip', 'trydl': 'win64', 'job': ['windows', '64']}}
    ENV_KEY_TRY_REPO_USER_EMAIL = "EMAIL"
    ENV_KEY_ENABLE_WIN32 = "WIN32_FLAG"
    ENV_KEY_SKIP_STATUS_CHECK = "SKIP_STATUS_CHECK"
    ENV_KEY_OUTPUT_DP = "OUTPUT_DP"
    ENV_KEY_BUILD_HASH = "BUILD_HASH"
    ENV_KEY_BUILD_NO = "BUILD_NUMBER"
    REPO_NAME = {'TRY': "try", "NIGHTLY": "nightly"}
    DEFAULT_AGENT_CONF_DIR_LINUX = "/home/hasal/Hasal/agent"
    DEFAULT_AGENT_CONF_DIR_MAC = "/Users/hasal/Hasal/agent"
    DEFAULT_AGENT_CONF_DIR_WIN = "C:\\Users\\user\\Hasal\\agent"
    DEFAULT_AGENT_STATUS_DIR = "agent_status"
    DEFAULT_AGENT_JOB_STATUS = {'BEGIN': 'begin', 'FINISH': 'finish', 'EXCEPTION': 'exception'}
    DEFAULT_AGENT_JOB_WACTH_TIMEOUT = 180

    def __init__(self, input_env_data):
        self.platform_option = 'opt'
        self.thclient = TreeherderClient()
        self.resultsets = []
        self.env_data = {key.upper(): value for key, value in input_env_data.items()}
        self.dispatch_variables(self.env_data)

    def dispatch_variables(self, input_env_data):
        # if user email not in environment data, repo will be the nightly
        if self.ENV_KEY_TRY_REPO_USER_EMAIL in input_env_data.keys():
            self.user_email = input_env_data[self.ENV_KEY_TRY_REPO_USER_EMAIL]
            self.repo = self.REPO_NAME['TRY']
        else:
            self.repo = self.REPO_NAME['NIGHTLY']

        # check current platform, widnows will double check the --win32 flag enabled or not
        if sys.platform == "linux2":
            self.platform = "linux64"
        elif sys.platform == "darwin":
            self.platform = "mac"
        else:
            if self.ENV_KEY_ENABLE_WIN32 in input_env_data.keys() and input_env_data[self.ENV_KEY_ENABLE_WIN32] == 'true':
                self.platform = "win32"
            else:
                self.platform = "win64"

        # assign skip status check to variable
        if self.ENV_KEY_SKIP_STATUS_CHECK in input_env_data.keys() and input_env_data[self.ENV_KEY_SKIP_STATUS_CHECK] == 'true':
            self.skip_status_check = True
        else:
            self.skip_status_check = False

        # assign build hash to variable
        if self.ENV_KEY_BUILD_HASH in input_env_data.keys():
            self.build_hash = input_env_data[self.ENV_KEY_BUILD_HASH]
        else:
            self.build_hash = None

        # assign output dp to variable
        if self.ENV_KEY_OUTPUT_DP in input_env_data.keys():
            self.output_dp = input_env_data[self.ENV_KEY_OUTPUT_DP]
        else:
            self.output_dp = os.getcwd()

        # assign build number to variable
        if self.ENV_KEY_BUILD_NO in input_env_data.keys():
            self.jenkins_build_no = input_env_data[self.ENV_KEY_BUILD_NO]
        else:
            self.jenkins_build_no = 0
        self.HASAL_JSON_FN = str(self.jenkins_build_no) + ".json"

    def check_agent_status(self):
        for i in range(0, self.DEFAULT_AGENT_JOB_WACTH_TIMEOUT):
            # extract job id from agent_status dir
            agent_status_dir_path = os.path.join(os.getcwd(), self.DEFAULT_AGENT_STATUS_DIR)
            print "INFO: housekeeping the agent status folder [%s]" % agent_status_dir_path
            if not os.path.exists(agent_status_dir_path):
                os.mkdir(agent_status_dir_path)
            agent_status_file_list = os.listdir(agent_status_dir_path)
            print "DEBUG: current agent status file list [%s]" % agent_status_file_list

            # get latest agent id
            job_id_list = [int(id.split(".")[0]) for id in agent_status_file_list]
            job_id_list.sort()
            if len(job_id_list) > 0:
                current_id = job_id_list[-1]
            else:
                current_id = 0

            # get latest agent status
            # agent status will sort by alphabetical, so the last one will be the latest status
            job_status_list = [status.split(".")[1] for status in agent_status_file_list if status.split(".")[0] == str(current_id)]
            job_status_list.sort()
            if len(job_status_list) > 0:
                current_job_status = job_status_list[-1]
            else:
                return True

            if current_job_status == self.DEFAULT_AGENT_JOB_STATUS['FINISH']:
                for target_name in agent_status_file_list:
                    check_target = os.path.join(agent_status_dir_path, target_name)
                    os.remove(check_target)
                return True
            else:
                time.sleep(10)
        return False

    def trigger(self):

        # check agent status folder
        if self.check_agent_status() is False:
            sys.exit(1)

        # download build
        if self.repo == self.REPO_NAME['TRY']:
            download_fx_fp, download_json_fp = self.get_try_build(self.user_email, self.build_hash, self.output_dp)
        else:
            download_fx_fp, download_json_fp = self.get_nightly_build(self.output_dp)

        if download_fx_fp is None or download_json_fp is None:
            print "ERROR: something wrong with your build download process, please check the setting and job status."
            sys.exit(1)
        else:
            # generate hasal.json data
            with open(download_json_fp) as dl_json_fh:
                dl_json_data = json.load(dl_json_fh)
                perfherder_revision = dl_json_data['moz_source_stamp']
                build_pkg_platform = dl_json_data['moz_pkg_platform']
                # mapping the perfherder pkg platform to nomenclature of builddot
                builddot_mapping_platform = {"linux-i686": "linux32",
                                             "linux-x86_64": "linux64",
                                             "mac": "osx-10-10",
                                             "win32": "windows7-32",
                                             "win64": "windows8-64"}
                with open(self.HASAL_JSON_FN, "w") as write_fh:
                    write_data = copy.deepcopy(self.env_data)
                    write_data['FX-DL-PACKAGE-PATH'] = download_fx_fp
                    write_data['FX-DL-JSON-PATH'] = download_json_fp
                    write_data['--PERFHERDER-REVISION'] = perfherder_revision
                    write_data['--PERFHERDER-PKG-PLATFORM'] = builddot_mapping_platform[build_pkg_platform]
                    json.dump(write_data, write_fh)

            if os.path.exists(os.path.join(os.getcwd(), self.HASAL_JSON_FN)):
                print "INFO: current json file created at [%s]" % os.path.join(os.getcwd(), self.HASAL_JSON_FN)
            else:
                print "ERROR: json file not exist in expected path [%s]" % os.path.join(os.getcwd(), self.HASAL_JSON_FN)

            # create agent status folder
            if os.path.exists(os.path.join(os.getcwd(), self.DEFAULT_AGENT_STATUS_DIR)) is False:
                os.mkdir(os.path.join(os.getcwd(), self.DEFAULT_AGENT_STATUS_DIR))

            # move to agent config folder
            if sys.platform == "linux2":
                new_hasal_json_fp = os.path.join(self.DEFAULT_AGENT_CONF_DIR_LINUX, self.HASAL_JSON_FN)
            elif sys.platform == "darwin":
                new_hasal_json_fp = os.path.join(self.DEFAULT_AGENT_CONF_DIR_MAC, self.HASAL_JSON_FN)
            else:
                new_hasal_json_fp = os.path.join(self.DEFAULT_AGENT_CONF_DIR_WIN, self.HASAL_JSON_FN)
            os.rename(self.HASAL_JSON_FN, new_hasal_json_fp)

            if os.path.exists(new_hasal_json_fp):
                print "INFO: hasal json file move to new location [%s]" % new_hasal_json_fp
            else:
                print "ERROR: hasal json file in not in new location [%s]" % new_hasal_json_fp
            sys.exit(0)

    def fetch_resultset(self, user_email, build_hash, default_count=500):
        tmp_resultsets = self.thclient.get_resultsets(self.repo, count=default_count)
        for resultset in tmp_resultsets:
            if resultset['author'].lower() == user_email.lower():
                self.resultsets.append(resultset)
                if build_hash is None:
                    return resultset
                elif resultset['revision'] == build_hash:
                    return resultset
        print "Can't find the specify build hash [%s] in resultsets!!" % build_hash
        return None

    def get_job(self, resultset, platform_keyword_list):
        jobs = self.thclient.get_jobs(self.repo, result_set_id=resultset['id'])
        for job in jobs:
            cnt = 0
            for platform_keyword in platform_keyword_list:
                if platform_keyword in job['platform']:
                    cnt += 1
            if job['platform_option'] == self.platform_option and cnt == len(platform_keyword_list):
                return job
        print "Can't find the specify platform [%s] and platform_options [%s] in jobs!!!" % (self.platform, self.platform_option)
        return None

    def get_files_from_remote_url_folder(self, remote_url_str):
        return_dict = {}
        try:
            response_obj = urllib2.urlopen(remote_url_str)
            if response_obj.getcode() == 200:
                for line in response_obj.readlines():
                    match = re.search(r'(?<=href=").*?(?=")', line)
                    if match:
                        href_link = match.group(0)
                        f_name = href_link.split("/")[-1]
                        return_dict[f_name] = href_link
            else:
                print "ERROR: fetch remote file list error with code [%s]" % str(response_obj.getcode())
        except Exception as e:
            print "ERROR: [%s]" % e.message
        return return_dict

    def download_file(self, output_dp, download_link):
        print "Prepare to download the build from link [%s]" % download_link
        response = requests.get(download_link, verify=False, stream=True)
        download_fn = download_link.split("/")[-1]
        if os.path.exists(output_dp) is False:
            os.makedirs(output_dp)
        download_fp = os.path.join(output_dp, download_fn)
        try:
            try:
                total_len = int(response.headers['content-length'])
            except:
                total_len = None
            with open(download_fp, 'wb') as fh:
                for data in tqdm(response.iter_content(chunk_size=512 * 1024), total=total_len / (512 * 1024)):
                    fh.write(data)
            return download_fp
        except Exception as e:
            print "ERROR: [%s]" % e.message
            return None

    def download_from_remote_url_folder(self, remote_url_str, output_dp):
        # get latest nightly build list from remote url folder
        remote_file_dict = self.get_files_from_remote_url_folder(remote_url_str)

        # filter with platform, and return file name with extension
        if len(remote_file_dict.keys()) == 0:
            print "ERROR: can't get remote file list, could be the network error, or url path[%s] wrong!!" % remote_url_str
            return False
        else:
            if self.platform not in self.PLATFORM_FN_MAPPING:
                print "ERROR: we are currently not support the platform[%s] you specified!" % self.platform
                print "We are currently support the platform tag: [%s]" % self.PLATFORM_FN_MAPPING.keys()
                return False
            else:
                matched_keyword = self.PLATFORM_FN_MAPPING[self.platform]['key'] + "." + self.PLATFORM_FN_MAPPING[self.platform]['ext']
                matched_file_list = [fn for fn in remote_file_dict.keys()
                                     if ((matched_keyword in fn) and ('firefox' in fn) and (not fn.endswith('.asc')))]
                if len(matched_file_list) != 1:
                    print "WARN: the possible match file list is not equal 1, list as below: [%s]" % matched_file_list
                    if len(matched_file_list) < 1:
                        return False
                    matched_file_list = sorted(matched_file_list)[-1:]
                    print "WARN: select following file [%s]" % matched_file_list

        # combine file name with json
        matched_file_name = matched_file_list[0]
        json_file_name = matched_file_name.replace(
            self.PLATFORM_FN_MAPPING[self.platform]['key'] + "." + self.PLATFORM_FN_MAPPING[self.platform]['ext'],
            self.PLATFORM_FN_MAPPING[self.platform]['key'] + ".json")
        if json_file_name not in remote_file_dict:
            print "ERROR: can't find the json file[%s] in remote file list[%s]!" % (json_file_name, remote_file_dict)
            return False
        else:
            print "DEBUG: matched file name: [%s], json_file_name: [%s]" % (matched_file_name, json_file_name)

        # download files
        download_fx_url = self.ARCHIVE_URL + remote_file_dict[matched_file_name]
        download_fx_fp = self.download_file(output_dp, download_fx_url)
        download_json_url = self.ARCHIVE_URL + remote_file_dict[json_file_name]
        download_json_fp = self.download_file(output_dp, download_json_url)

        # check download status
        if download_fx_fp and download_json_fp:
            print "SUCCESS: build files download in [%s], [%s] " % (download_fx_fp, download_json_fp)
            return (download_fx_fp, download_json_fp)
        else:
            print "ERROR: build files download in [%s,%s] " % (download_fx_fp, download_json_fp)
            return None

    def get_try_build(self, user_email, build_hash, output_dp):
        resultset = self.fetch_resultset(user_email, build_hash)

        # check result set
        if resultset:
            # if build hash is not porvided, use the latest revision as build hash value
            if build_hash is None:
                build_hash = resultset['revision']
            print "Resultset is found, and build hash is [%s]" % build_hash

            # compose remote folder url
            build_folder_url_template = "%s/pub/firefox/%s-builds/%s-%s/%s-%s/"
            build_folder_url = build_folder_url_template % (self.ARCHIVE_URL,
                                                            self.repo, user_email, build_hash,
                                                            self.repo,
                                                            self.PLATFORM_FN_MAPPING[self.platform][
                                                                'trydl'])

            # skip status check will retrieve the files list from remote folder url
            if self.skip_status_check:
                return self.download_from_remote_url_folder(build_folder_url, output_dp)
            else:
                job = self.get_job(resultset, self.PLATFORM_FN_MAPPING[self.platform]['job'])
                if job:
                    if job['result'].lower() == "success":
                        return self.download_from_remote_url_folder(build_folder_url, output_dp)
                    else:
                        print "WARNING: Current job status is [%s] !! Your build will download when job status is success" % job[
                            'result'].lower()
                        return (None, None)
                else:
                    print "ERROR: can't find the job!"
                    return (None, None)
        else:
            print "ERROR: can't get result set! skip download build from try server, [%s, %s]" % (user_email, build_hash)
            return (None, None)

    def get_nightly_build(self, output_dp):
        remote_url_str = self.ARCHIVE_URL + self.NIGHTLY_LATEST_URL_FOLDER
        return self.download_from_remote_url_folder(remote_url_str, output_dp)
Example #13
0
class Treeherder(object):
    """Wrapper class for TreeherderClient to ease the use of its API."""

    def __init__(self, application, branch, platform, host=TREEHERDER_HOST, protocol="https"):
        """Create a new instance of the Treeherder class.

        :param application: The name of the application to download.
        :param branch: Name of the branch.
        :param platform: Platform of the application.
        :param host: The Treeherder host to make use of.
        :param protocol: The protocol for the Treeherder host.
        """
        self.logger = logging.getLogger(__name__)

        self.client = TreeherderClient(host=host, protocol=protocol)
        self.application = application
        self.branch = branch
        self.platform = platform

    def get_treeherder_platform(self, platform):
        """Return the internal Treeherder platform identifier.

        :param platform: Platform of the application.
        """
        try:
            return PLATFORM_MAP[platform]
        except KeyError:
            raise NotSupportedError('Platform "{}" is not supported.'.format(platform))

    def query_builds_by_revision(self, revision, job_type_name="Build", debug_build=False):
        """Retrieve build folders for a given revision with the help of Treeherder.

        :param revision: Revision of the build to download.
        :param job_type_name: Name of the job to look for. For builds it should be
            'Build', 'Nightly', and 'L10n Nightly'. Defaults to `Build`.
        :param debug_build: Download a debug build.
        """
        builds = set()

        try:
            self.logger.info(
                "Querying {host} for list of builds for revision: {revision}".format(
                    host=self.client.host, revision=revision
                )
            )

            # Retrieve the option hash to filter for type of build (opt, and debug for now)
            option_hash = None
            for key, values in self.client.get_option_collection_hash().iteritems():
                for value in values:
                    if value["name"] == ("debug" if debug_build else "opt"):
                        option_hash = key
                        break
                if option_hash:
                    break

            resultsets = self.client.get_resultsets(self.branch, revision=revision)

            # Set filters to speed-up querying jobs
            kwargs = {"option_collection_hash": option_hash, "job_type_name": job_type_name, "exclusion_profile": False}
            kwargs.update(self.get_treeherder_platform(self.platform))

            for resultset in resultsets:
                kwargs.update({"result_set_id": resultset["id"]})
                jobs = self.client.get_jobs(self.branch, **kwargs)
                for job in jobs:
                    log_urls = self.client.get_job_log_url(self.branch, job_id=job["id"])
                    for log_url in log_urls:
                        if self.application in log_url["url"]:
                            self.logger.debug("Found build folder: {}".format(log_url["url"]))
                            builds.update([log_url["url"]])

        except Exception:
            self.logger.exception("Failure occurred when querying Treeherder for builds")

        return list(builds)
Example #14
0
class TreeWatcher(object):
    """Class to keep track of test jobs starting and finishing, known
    revisions and builders, and re-trigger jobs in either when a job
    fails or a when requested by a user.

    Redundant triggers are prevented by keeping track of each buildername,
    tree, revision we've already triggered. The invariant is that for
    any (buildername, tree, revision) combination, we will only issue triggers
    once. Old revisions are purged after a certain interval, so care must
    be taken that enough revisions are stored at a time to prevent issuing
    redundant triggers.
    """
    # Allow at least this many failures for a revision.
    # If we re-trigger for each orange and per-push orange
    # factor is approximately fixed, we shouldn't need to trigger
    # much more than that for any push that would be suitable to land.
    default_retry = 1
    per_push_failures = 4
    # We may trigger more than this as long as the total is below this
    # proportion of all builds for a push (~3% of jobs for now).
    failure_tolerance_factor = 33

    # See the comment below about pruning old revisions.
    revmap_threshold = 2000
    # If someone asks for more than 20 rebuilds on a push, only give them 20.
    requested_limit = 20

    def __init__(self, ldap_auth, is_triggerbot_user=lambda _: True):
        self.revmap = defaultdict(dict)
        self.revmap_threshold = TreeWatcher.revmap_threshold
        self.auth = ldap_auth
        self.lower_trigger_limit = TreeWatcher.default_retry * TreeWatcher.per_push_failures
        self.log = logging.getLogger('trigger-bot')
        self.is_triggerbot_user = is_triggerbot_user
        self.global_trigger_count = 0
        self.treeherder_client = TreeherderClient()
        self.hidden_builders = set()
        self.refresh_builder_counter = 0

    def _prune_revmap(self):
        # After a certain point we'll need to prune our revmap so it doesn't grow
        # infinitely.
        # We only need to keep an entry around from when we last see it
        # as an incoming revision and the next time it's finished and potentially
        # failed, but it could be pending for a while so we don't know how long that
        # will be.
        target_count = int(TreeWatcher.revmap_threshold * 2/3)
        prune_count = len(self.revmap.keys()) - target_count
        self.log.info('Pruning %d entries from the revmap' % prune_count)

        # Could/should use an LRU cache here, but assuming any job will go
        # from pending to complete in 24 hrs and we have up to 528 pushes a
        # day (like we had last April fool's day), that's still just 528
        # entries to sort.
        for rev, data in sorted(self.revmap.items(), key=lambda (k, v): v['time_seen']):
            if not prune_count:
                self.log.info('Finished pruning, oldest rev is now: %s' % rev)
                return

            del self.revmap[rev]
            prune_count -= 1

    def known_rev(self, branch, rev):
        return rev in self.revmap


    def _get_jobs(self, branch, rev, hidden):
        results = self.treeherder_client.get_resultsets(branch, revision=rev)
        jobs = []
        if results:
            result_set_id = results[0]['id']
            kwargs = {
                'count': 2000,
                'result_set_id': result_set_id,
            }
            if hidden:
                kwargs['visibility'] = 'excluded'
            jobs = self.treeherder_client.get_jobs(branch, **kwargs)
        return [job['ref_data_name'] for job in jobs
                if not re.match('[a-z0-9]{12}', job['ref_data_name'])]


    def get_hidden_jobs(self, branch, rev):
        return self._get_jobs(branch, rev, True)


    def get_visible_jobs(self, branch, rev):
        return self._get_jobs(branch, rev, False)


    def update_hidden_builders(self, branch, rev):
        hidden_builders = set(self.get_hidden_jobs(branch, rev))
        visible_builders = set(self.get_visible_jobs(branch, rev))
        self.hidden_builders -= visible_builders
        self.hidden_builders |= hidden_builders
        self.log.info('Updating hidden builders')
        self.log.info('There are %d hidden builders on try' %
                      len(self.hidden_builders))


    def failure_trigger(self, branch, rev, builder):

        if rev in self.revmap:

            if 'fail_retrigger' not in self.revmap[rev]:
                self.log.info('Found no request to retrigger %s on failure' %
                              rev)
                return

            seen_builders = self.revmap[rev]['seen_builders']

            if builder in seen_builders:
                self.log.info('We\'ve already seen "%s" at %s and don\'t'
                              ' need to trigger it' % (builder, rev))
                return

            if builder in self.hidden_builders:
                self.log.info('Would have triggered "%s" at %s due to failures,'
                              ' but that builder is hidden.' % (builder, rev))
                return

            seen_builders.add(builder)

            count = self.revmap[rev]['fail_retrigger']
            seen = self.revmap[rev]['rev_trigger_count']

            triggered = self.attempt_triggers(branch, rev, builder, count, seen)
            if triggered:
                self.revmap[rev]['rev_trigger_count'] += triggered
                self.log.info('Triggered %d of "%s" at %s' % (triggered, builder, rev))


    def requested_trigger(self, branch, rev, builder):
        if rev in self.revmap and 'requested_trigger' in self.revmap[rev]:

            self.log.info('Found a request to trigger %s and may retrigger' % rev)
            seen_builders = self.revmap[rev]['seen_builders']

            if builder in seen_builders:
                self.log.info('We already triggered "%s" at %s don\'t need'
                            ' to do it again' % (builder, rev))
                return

            seen_builders.add(builder)
            count, talos_count = self.revmap[rev]['requested_trigger']
            if talos_count and 'talos' in builder:
                count = talos_count

            self.log.info('May trigger %d requested jobs for "%s" at %s' %
                        (count, builder, rev))
            self.attempt_triggers(branch, rev, builder, count)


    def add_rev(self, branch, rev, comments, user):

        req_count, req_talos_count, should_retry = self.triggers_from_msg(comments)

        # Only trigger based on a request or a failure, not both.
        if req_count or req_talos_count:
            self.log.info('Added %d triggers for %s' % (req_count, rev))
            self.revmap[rev]['requested_trigger'] = (req_count, req_talos_count)

        if should_retry and not req_count:
            # self.log.info('Adding default failure retries for %s' % rev)
            self.revmap[rev]['fail_retrigger'] = TreeWatcher.default_retry

        self.revmap[rev]['rev_trigger_count'] = 0

        # When we need to purge old revisions, we need to purge the
        # oldest first.
        self.revmap[rev]['time_seen'] = time.time()

        # Prevent an infinite retrigger loop - if we take a trigger action,
        # ensure we only take it once for a builder on a particular revision.
        self.revmap[rev]['seen_builders'] = set()

        # Filter triggering activity based on users.
        self.revmap[rev]['user'] = user

        if len(self.revmap.keys()) > self.revmap_threshold:
            self._prune_revmap()


    def triggers_from_msg(self, msg):

        try_message = None
        all_try_args = None

        for line in msg.splitlines():
            if 'try: ' in line:
                # Autoland adds quotes to try strings that will confuse our
                # args later on.
                if line.startswith('"') and line.endswith('"'):
                    line = line[1:-1]
                # Allow spaces inside of [filter expressions]
                try_message = line.strip().split('try: ', 1)
                all_try_args = re.findall(r'(?:\[.*?\]|\S)+', try_message[1])
                break

        if not try_message:
            return 0

        parser = argparse.ArgumentParser()
        parser.add_argument('--rebuild', type=int, default=0)
        parser.add_argument('--rebuild-talos', type=int, dest='rebuild_talos',
                            default=0)
        parser.add_argument('--no-retry', action='store_false', dest='retry',
                            default=True)
        (args, _) = parser.parse_known_args(all_try_args)

        limit = TreeWatcher.requested_limit
        rebuilds = args.rebuild if args.rebuild < limit else limit
        rebuild_talos = args.rebuild_talos if args.rebuild_talos < limit else limit
        return rebuilds, rebuild_talos, args.retry


    def handle_message(self, key, branch, rev, builder, status, comments, user):
        if not self.known_rev(branch, rev) and comments:
            # First time we've seen this revision? Add it to known
            # revs and mark required triggers,
            self.add_rev(branch, rev, comments, user)

        if key.endswith('started'):
            # If the job is starting and a user requested unconditional
            # retriggers, process them right away.
            self.requested_trigger(branch, rev, builder)

        if status in (1, 2):
            # A failing job is a candidate to retrigger.
            self.failure_trigger(branch, rev, builder)

        if self.refresh_builder_counter == 0:
            self.update_hidden_builders(branch, rev)
            self.refresh_builder_counter = 300
        else:
            self.refresh_builder_counter -= 1


    def attempt_triggers(self, branch, rev, builder, count, seen=0, attempt=0):
        if not re.match('[a-z0-9]{12}', rev):
            self.log.error('%s doesn\'t look like a valid revision, can\'t trigger it' %
                           rev)
            return

        build_data = self._get_ids_for_rev(branch, rev, builder)

        if build_data is None:
            return

        found_buildid, found_requestid, builder_total, rev_total = build_data

        if builder_total > count:
            self.log.warning('Would have triggered %d of "%s" at %s, but we\'ve already'
                             ' found more requests than that for this builder/rev.' %
                             (count, builder, rev))
            return

        self.log.info("Found %s jobs total for %s" % (rev_total, rev))
        if (seen * self.failure_tolerance_factor > rev_total and
            seen > self.lower_trigger_limit):
            self.log.warning('Would have triggered "%s" at %s but there are already '
                             'too many failures.' % (builder, rev))
            return

        self.global_trigger_count += count
        self.log.warning('Up to %d total triggers have been performed by this service.' %
                         self.global_trigger_count)

        if not self.is_triggerbot_user(self.revmap[rev]['user']):
            self.log.warning('Would have triggered "%s" at %s %d times.' %
                             (builder, rev, count))
            self.log.warning('But %s is not a triggerbot user.' % self.revmap[rev]['user'])
            # Pretend we did these triggers, just for accounting purposes.
            return count

        self.log.info('attempt_triggers, attempt %d' % attempt)

        root_url = 'https://secure.pub.build.mozilla.org/buildapi/self-serve'
        payload = {
            'count': count,
        }

        if found_buildid:
            build_url = '%s/%s/build' % (root_url, branch)
            payload['build_id'] = found_buildid
        elif found_requestid:
            build_url = '%s/%s/request' % (root_url, branch)
            payload['request_id'] = found_requestid
        else:
            # For a short time after a job starts it seems there might not be
            # any info associated with this job/builder in.
            self.log.warning('Could not trigger "%s" at %s because there were '
                             'no builds found with that buildername to rebuild.' %
                             (builder, rev))

            if attempt > 4:
                self.log.warning('Already tried to find something to rebuild '
                                 'for "%s" at %s, giving up' % (builder, rev))
                return

            self.log.warning('Will re-attempt')
            tm = Timer(90, self.attempt_triggers,
                       args=[branch, rev, builder, count, seen, attempt + 1])
            tm.start()
            # Assume some subsequent attempt will be succesful for accounting
            # purposes.
            return count

        self._rebuild(build_url, payload)
        return count


    def _get_ids_for_rev(self, branch, rev, builder):
        # Get the request or build id associated with the given branch/rev/builder,
        # if any.
        root_url = 'https://secure.pub.build.mozilla.org/buildapi/self-serve'

        # First find the build_id for the job to rebuild
        build_info_url = '%s/%s/rev/%s?format=json' % (root_url, branch, rev)
        info_req = requests.get(build_info_url,
                                headers={'Accept': 'application/json'},
                                auth=self.auth)
        found_buildid = None
        found_requestid = None
        builder_total, rev_total = 0, 0

        try:
            results = info_req.json()
        except ValueError:
            self.log.error('Received an unexpected ValueError when retrieving '
                           'information about %s from buildapi.' % rev)
            self.log.error('Request status: %d' % info_req.status_code)
            return None

        for res in results:
            rev_total += 1
            if res['buildername'] == builder:
                builder_total += 1
                if 'build_id' in res and not found_buildid:
                    found_buildid = res['build_id']
                if 'request_id' in res and not found_requestid:
                    found_requestid = res['request_id']

        return found_buildid, found_requestid, builder_total, rev_total

    def _rebuild(self, build_url, payload):
        # Actually do the triggering for a url and payload and keep track of the result.
        self.log.info('Triggering url: %s' % build_url)
        self.log.debug('Triggering payload:\n\t%s' % payload)
        req = requests.post(
            build_url,
            headers={'Accept': 'application/json'},
            data=payload,
            auth=self.auth
        )
        self.log.info('Requested job, return: %s' % req.status_code)
def on_event(data, message, dry_run, treeherder_server_url, acknowledge,
             **kwargs):
    """Act upon Treeherder job events.

    Return if the outcome was successful or not
    """
    LOG.info('Acknowledge value: {}'.format(acknowledge))

    exit_code = 0  # SUCCESS

    if ignored(data):
        if acknowledge:
            # We need to ack the message to remove it from our queue
            LOG.info('Message acknowledged')
            message.ack()
        return exit_code

    # Cleaning mozci caches
    buildjson.BUILDS_CACHE = {}
    query_jobs.JOBS_CACHE = {}

    treeherder_client = TreeherderClient(server_url=treeherder_server_url)

    action = data['action'].capitalize()
    job_id = data['job_id']
    repo_name = data['project']
    status = None

    # We want to know the status of the job we're processing
    try:
        job_info = treeherder_client.get_jobs(repo_name, id=job_id)[0]
    except IndexError:
        LOG.info("We could not find any job_info for repo_name: %s and "
                 "job_id: %s" % (repo_name, job_id))
        return exit_code

    buildername = job_info["ref_data_name"]

    # We want to know the revision associated for this job
    result_sets = treeherder_client.get_resultsets(
        repo_name, id=job_info["result_set_id"])
    revision = result_sets[0]["revision"]

    link_to_job = '{}/#/jobs?repo={}&revision={}&selectedJob={}'.format(
        treeherder_server_url, repo_name, revision, job_id)

    LOG.info("{} action requested by {} for '{}'".format(
        action,
        data['requester'],
        buildername,
    ))
    LOG.info('Request for {}'.format(link_to_job))

    buildername = filter_invalid_builders(buildername)

    if buildername is None:
        LOG.info('Treeherder can send us invalid builder names.')
        LOG.info('See https://bugzilla.mozilla.org/show_bug.cgi?id=1242038.')
        LOG.warning('Requested job name "%s" is invalid.' %
                    job_info['ref_data_name'])
        exit_code = -1  # FAILURE

    # There are various actions that can be taken on a job, however, we currently
    # only process the backfill one
    elif action == "Backfill":
        exit_code = manual_backfill(
            revision=revision,
            buildername=buildername,
            dry_run=dry_run,
        )
        if not dry_run:
            status = 'Backfill request sent'
        else:
            status = 'Dry-run mode, nothing was backfilled.'
        LOG.debug(status)

    else:
        LOG.error('We were not aware of the "{}" action. Please file an issue'.
                  format(action))
        exit_code = -1  # FAILURE

    if acknowledge:
        # We need to ack the message to remove it from our queue
        LOG.info('Message acknowledged')
        message.ack()

    return exit_code
Example #16
0
class TreeherderApi(QueryApi):

    def __init__(self):
        self.treeherder_client = TreeherderClient()

    def get_all_jobs(self, repo_name, revision, **params):
        """
        Return all jobs for a given revision.
        If we can't query about this revision in treeherder api, we return an empty list.
        """
        # We query treeherder for its internal revision_id, and then get the jobs from them.
        # We cannot get jobs directly from revision and repo_name in TH api.
        # See: https://bugzilla.mozilla.org/show_bug.cgi?id=1165401
        results = self.treeherder_client.get_resultsets(repo_name, revision=revision, **params)
        all_jobs = []
        if results:
            revision_id = results[0]["id"]
            all_jobs = self.treeherder_client.get_jobs(repo_name, count=2000,
                                                       result_set_id=revision_id, **params)
        return all_jobs

    def get_buildapi_request_id(self, repo_name, job):
        """ Method to return buildapi's request_id. """
        job_id = job["id"]
        query_params = {'job_id': job_id,
                        'name': 'buildapi'}
        LOG.debug("We are fetching request_id from treeherder artifacts api")
        artifact_content = self.treeherder_client.get_artifacts(repo_name,
                                                                **query_params)
        return artifact_content[0]["blob"]["request_id"]

    def get_hidden_jobs(self, repo_name, revision):
        """ Return all hidden jobs on Treeherder """
        return self.get_all_jobs(repo_name, revision=revision, visibility='excluded')

    def get_matching_jobs(self, repo_name, revision, buildername):
        """
        Return all jobs that matched the criteria.
        """
        LOG.debug("Find jobs matching '%s'" % buildername)
        all_jobs = self.get_all_jobs(repo_name, revision)
        matching_jobs = []
        for j in all_jobs:
            if j["ref_data_name"] == buildername:
                matching_jobs.append(j)

        LOG.debug("We have found %d job(s) of '%s'." %
                  (len(matching_jobs), buildername))
        return matching_jobs

    def get_job_status(self, job):
        """
        Helper to determine the scheduling status of a job from treeherder.

        Raises a TreeherderError if the job doesn't complete.
        """
        if job["job_coalesced_to_guid"] is not None:
            return COALESCED

        if job["result"] == "unknown":
            if job["state"] == "pending":
                return PENDING
            elif job["state"] == "running":
                return RUNNING
            else:
                return UNKNOWN

        # If the job 'state' is completed, we can have the following possible statuses:
        # https://github.com/mozilla/treeherder/blob/master/treeherder/etl/buildbot.py#L7
        status_dict = {
            "success": SUCCESS,
            "busted": FAILURE,
            "testfailed": FAILURE,
            "skipped": SKIPPED,
            "exception": EXCEPTION,
            "retry": RETRY,
            "usercancel": CANCELLED
            }

        if job["state"] == "completed":
            return status_dict[job["result"]]

        LOG.debug(job)
        raise TreeherderError("Unexpected status")

    def find_all_jobs_by_status(self, repo_name, revision, status):
        builder_names = []
        jobs = self.get_all_jobs(repo_name, revision)
        # filer out those jobs without builder name
        jobs = [job for job in jobs if job['machine_name'] != 'unknown']
        for job in jobs:
            try:
                job_status = self.get_job_status(job)
            except TreeherderError:
                continue
            if job_status == status:
                if job['build_system_type'] == 'taskcluster':
                    job_name = job['job_type_name']
                else:
                    job_name = job['ref_data_name']
                builder_names.append(job_name)
        return builder_names
Example #17
0
    def get_test_packages_url(self, properties):
        """Return the URL of the test packages JSON file.

        In case of localized daily builds we can query the en-US build to get
        the URL, but for candidate builds we need the tinderbox build
        of the first parent changeset which was not checked-in by the release
        automation process (necessary until bug 1242035 is not fixed).
        """
        if properties.get('test_packages_url'):
            url = properties['test_packages_url']
        else:
            overrides = {
                'locale': 'en-US',
                'extension': 'test_packages.json',
            }

            # Use Treeherder to query for the next revision which has Tinderbox builds
            # available. We can use this revision to retrieve the test-packages URL.
            if properties['tree'].startswith('release-'):
                platform_map = {
                    'linux': {'build_platform': 'linux32'},
                    'linux64': {'build_platform': 'linux64'},
                    'macosx': {'build_os': 'mac', 'build_architecture': 'x86_64'},
                    'macosx64': {'build_os': 'mac', 'build_architecture': 'x86_64'},
                    'win32': {'build_os': 'win', 'build_architecture': 'x86'},
                    'win64': {'build_os': 'win', 'build_architecture': 'x86_64'},
                }

                self.logger.info('Querying tinderbox revision for {} build...'.format(
                                 properties['tree']))
                revision = properties['revision'][:12]

                client = TreeherderClient(server_url='https://treeherder.mozilla.org')
                resultsets = client.get_resultsets(properties['branch'],
                                                   tochange=revision,
                                                   count=50)

                # Retrieve the option hashes to filter for opt builds
                option_hash = None
                for key, values in client.get_option_collection_hash().iteritems():
                    for value in values:
                        if value['name'] == 'opt':
                            option_hash = key
                            break
                    if option_hash:
                        break

                # Set filters to speed-up querying jobs
                kwargs = {
                    'job_type_name': 'Build',
                    'exclusion_profile': False,
                    'option_collection_hash': option_hash,
                    'result': 'success',
                }
                kwargs.update(platform_map[properties['platform']])

                for resultset in resultsets:
                    kwargs.update({'result_set_id': resultset['id']})
                    jobs = client.get_jobs(properties['branch'], **kwargs)
                    if len(jobs):
                        revision = resultset['revision']
                        break

                self.logger.info('Found revision for tinderbox build: {}'.format(revision))

                overrides['build_type'] = 'tinderbox'
                overrides['revision'] = revision

            # For update tests we need the test package of the target build. That allows
            # us to add fallback code in case major parts of the ui are changing in Firefox.
            if properties.get('target_buildid'):
                overrides['build_id'] = properties['target_buildid']

            # The test package json file has a prefix with bug 1239808 fixed. Older builds need
            # a fallback to a prefix-less filename.
            try:
                self.logger.info('Querying test packages URL...')
                url = self.query_file_url(properties, property_overrides=overrides)
            except download_errors.NotFoundError:
                self.logger.info('URL not found. Querying not-prefixed test packages URL...')
                extension = overrides.pop('extension')
                build_url = self.query_file_url(properties, property_overrides=overrides)
                url = '{}/{}'.format(build_url[:build_url.rfind('/')], extension)
                r = requests.head(url)
                if r.status_code != 200:
                    url = None

            self.logger.info('Found test package URL at: {}'.format(url))

        return url
Example #18
0
class TreeWatcher(object):
    """Class to keep track of test jobs starting and finishing, known
    revisions and builders, and re-trigger jobs in either when a job
    fails or a when requested by a user.

    Redundant triggers are prevented by keeping track of each buildername,
    tree, revision we've already triggered. The invariant is that for
    any (buildername, tree, revision) combination, we will only issue triggers
    once. Old revisions are purged after a certain interval, so care must
    be taken that enough revisions are stored at a time to prevent issuing
    redundant triggers.
    """
    # Allow at least this many failures for a revision.
    # If we re-trigger for each orange and per-push orange
    # factor is approximately fixed, we shouldn't need to trigger
    # much more than that for any push that would be suitable to land.
    default_retry = 1
    per_push_failures = 4
    # We may trigger more than this as long as the total is below this
    # proportion of all builds for a push (~3% of jobs for now).
    failure_tolerance_factor = 33

    # See the comment below about pruning old revisions.
    revmap_threshold = 2000
    # If someone asks for more than 20 rebuilds on a push, only give them 20.
    requested_limit = 20

    def __init__(self, ldap_auth, is_triggerbot_user=lambda _: True):
        self.revmap = defaultdict(dict)
        self.revmap_threshold = TreeWatcher.revmap_threshold
        self.auth = ldap_auth
        self.lower_trigger_limit = TreeWatcher.default_retry * TreeWatcher.per_push_failures
        self.log = logging.getLogger('trigger-bot')
        self.is_triggerbot_user = is_triggerbot_user
        self.global_trigger_count = 0
        self.treeherder_client = TreeherderClient()
        self.hidden_builders = set()
        self.refresh_builder_counter = 0

    def _prune_revmap(self):
        # After a certain point we'll need to prune our revmap so it doesn't grow
        # infinitely.
        # We only need to keep an entry around from when we last see it
        # as an incoming revision and the next time it's finished and potentially
        # failed, but it could be pending for a while so we don't know how long that
        # will be.
        target_count = int(TreeWatcher.revmap_threshold * 2 / 3)
        prune_count = len(self.revmap.keys()) - target_count
        self.log.info('Pruning %d entries from the revmap' % prune_count)

        # Could/should use an LRU cache here, but assuming any job will go
        # from pending to complete in 24 hrs and we have up to 528 pushes a
        # day (like we had last April fool's day), that's still just 528
        # entries to sort.
        for rev, data in sorted(self.revmap.items(),
                                key=lambda (k, v): v['time_seen']):
            if not prune_count:
                self.log.info('Finished pruning, oldest rev is now: %s' % rev)
                return

            del self.revmap[rev]
            prune_count -= 1

    def known_rev(self, branch, rev):
        return rev in self.revmap

    def _get_jobs(self, branch, rev, hidden):
        results = self.treeherder_client.get_resultsets(branch, revision=rev)
        jobs = []
        if results:
            result_set_id = results[0]['id']
            kwargs = {
                'count': 2000,
                'result_set_id': result_set_id,
            }
            if hidden:
                kwargs['visibility'] = 'excluded'
            jobs = self.treeherder_client.get_jobs(branch, **kwargs)
        return [
            job['ref_data_name'] for job in jobs
            if not re.match('[a-z0-9]{12}', job['ref_data_name'])
        ]

    def get_hidden_jobs(self, branch, rev):
        return self._get_jobs(branch, rev, True)

    def get_visible_jobs(self, branch, rev):
        return self._get_jobs(branch, rev, False)

    def update_hidden_builders(self, branch, rev):
        hidden_builders = set(self.get_hidden_jobs(branch, rev))
        visible_builders = set(self.get_visible_jobs(branch, rev))
        self.hidden_builders -= visible_builders
        self.hidden_builders |= hidden_builders
        self.log.info('Updating hidden builders')
        self.log.info('There are %d hidden builders on try' %
                      len(self.hidden_builders))

    def failure_trigger(self, branch, rev, builder):

        if rev in self.revmap:

            if 'fail_retrigger' not in self.revmap[rev]:
                self.log.info('Found no request to retrigger %s on failure' %
                              rev)
                return

            seen_builders = self.revmap[rev]['seen_builders']

            if builder in seen_builders:
                self.log.info('We\'ve already seen "%s" at %s and don\'t'
                              ' need to trigger it' % (builder, rev))
                return

            if builder in self.hidden_builders:
                self.log.info(
                    'Would have triggered "%s" at %s due to failures,'
                    ' but that builder is hidden.' % (builder, rev))
                return

            seen_builders.add(builder)

            count = self.revmap[rev]['fail_retrigger']
            seen = self.revmap[rev]['rev_trigger_count']

            triggered = self.attempt_triggers(branch, rev, builder, count,
                                              seen)
            if triggered:
                self.revmap[rev]['rev_trigger_count'] += triggered
                self.log.info('Triggered %d of "%s" at %s' %
                              (triggered, builder, rev))

    def requested_trigger(self, branch, rev, builder):
        if rev in self.revmap and 'requested_trigger' in self.revmap[rev]:

            self.log.info('Found a request to trigger %s and may retrigger' %
                          rev)
            seen_builders = self.revmap[rev]['seen_builders']

            if builder in seen_builders:
                self.log.info('We already triggered "%s" at %s don\'t need'
                              ' to do it again' % (builder, rev))
                return

            seen_builders.add(builder)
            count, talos_count = self.revmap[rev]['requested_trigger']
            if talos_count and 'talos' in builder:
                count = talos_count

            self.log.info('May trigger %d requested jobs for "%s" at %s' %
                          (count, builder, rev))
            self.attempt_triggers(branch, rev, builder, count)

    def add_rev(self, branch, rev, comments, user):

        req_count, req_talos_count, should_retry = self.triggers_from_msg(
            comments)

        # Only trigger based on a request or a failure, not both.
        if req_count or req_talos_count:
            self.log.info('Added %d triggers for %s' % (req_count, rev))
            self.revmap[rev]['requested_trigger'] = (req_count,
                                                     req_talos_count)

        if should_retry and not req_count:
            # self.log.info('Adding default failure retries for %s' % rev)
            self.revmap[rev]['fail_retrigger'] = TreeWatcher.default_retry

        self.revmap[rev]['rev_trigger_count'] = 0

        # When we need to purge old revisions, we need to purge the
        # oldest first.
        self.revmap[rev]['time_seen'] = time.time()

        # Prevent an infinite retrigger loop - if we take a trigger action,
        # ensure we only take it once for a builder on a particular revision.
        self.revmap[rev]['seen_builders'] = set()

        # Filter triggering activity based on users.
        self.revmap[rev]['user'] = user

        if len(self.revmap.keys()) > self.revmap_threshold:
            self._prune_revmap()

    def triggers_from_msg(self, msg):

        try_message = None
        all_try_args = None

        for line in msg.splitlines():
            if 'try: ' in line:
                # Autoland adds quotes to try strings that will confuse our
                # args later on.
                if line.startswith('"') and line.endswith('"'):
                    line = line[1:-1]
                # Allow spaces inside of [filter expressions]
                try_message = line.strip().split('try: ', 1)
                all_try_args = re.findall(r'(?:\[.*?\]|\S)+', try_message[1])
                break

        if not try_message:
            return 0

        parser = argparse.ArgumentParser()
        parser.add_argument('--rebuild', type=int, default=0)
        parser.add_argument('--rebuild-talos',
                            type=int,
                            dest='rebuild_talos',
                            default=0)
        parser.add_argument('--no-retry',
                            action='store_false',
                            dest='retry',
                            default=True)
        (args, _) = parser.parse_known_args(all_try_args)

        limit = TreeWatcher.requested_limit
        rebuilds = args.rebuild if args.rebuild < limit else limit
        rebuild_talos = args.rebuild_talos if args.rebuild_talos < limit else limit
        return rebuilds, rebuild_talos, args.retry

    def handle_message(self, key, branch, rev, builder, status, comments,
                       user):
        if not self.known_rev(branch, rev) and comments:
            # First time we've seen this revision? Add it to known
            # revs and mark required triggers,
            self.add_rev(branch, rev, comments, user)

        if key.endswith('started'):
            # If the job is starting and a user requested unconditional
            # retriggers, process them right away.
            self.requested_trigger(branch, rev, builder)

        if status in (1, 2):
            # A failing job is a candidate to retrigger.
            self.failure_trigger(branch, rev, builder)

        if self.refresh_builder_counter == 0:
            self.update_hidden_builders(branch, rev)
            self.refresh_builder_counter = 300
        else:
            self.refresh_builder_counter -= 1

    def attempt_triggers(self, branch, rev, builder, count, seen=0, attempt=0):
        if not re.match('[a-z0-9]{12}', rev):
            self.log.error(
                '%s doesn\'t look like a valid revision, can\'t trigger it' %
                rev)
            return

        build_data = self._get_ids_for_rev(branch, rev, builder)

        if build_data is None:
            return

        found_buildid, found_requestid, builder_total, rev_total = build_data

        if builder_total > count:
            self.log.warning(
                'Would have triggered %d of "%s" at %s, but we\'ve already'
                ' found more requests than that for this builder/rev.' %
                (count, builder, rev))
            return

        self.log.info("Found %s jobs total for %s" % (rev_total, rev))
        if (seen * self.failure_tolerance_factor > rev_total
                and seen > self.lower_trigger_limit):
            self.log.warning(
                'Would have triggered "%s" at %s but there are already '
                'too many failures.' % (builder, rev))
            return

        self.global_trigger_count += count
        self.log.warning(
            'Up to %d total triggers have been performed by this service.' %
            self.global_trigger_count)

        if not self.is_triggerbot_user(self.revmap[rev]['user']):
            self.log.warning('Would have triggered "%s" at %s %d times.' %
                             (builder, rev, count))
            self.log.warning('But %s is not a triggerbot user.' %
                             self.revmap[rev]['user'])
            # Pretend we did these triggers, just for accounting purposes.
            return count

        self.log.info('attempt_triggers, attempt %d' % attempt)

        root_url = 'https://secure.pub.build.mozilla.org/buildapi/self-serve'
        payload = {
            'count': count,
        }

        if found_buildid:
            build_url = '%s/%s/build' % (root_url, branch)
            payload['build_id'] = found_buildid
        elif found_requestid:
            build_url = '%s/%s/request' % (root_url, branch)
            payload['request_id'] = found_requestid
        else:
            # For a short time after a job starts it seems there might not be
            # any info associated with this job/builder in.
            self.log.warning(
                'Could not trigger "%s" at %s because there were '
                'no builds found with that buildername to rebuild.' %
                (builder, rev))

            if attempt > 4:
                self.log.warning('Already tried to find something to rebuild '
                                 'for "%s" at %s, giving up' % (builder, rev))
                return

            self.log.warning('Will re-attempt')
            tm = Timer(90,
                       self.attempt_triggers,
                       args=[branch, rev, builder, count, seen, attempt + 1])
            tm.start()
            # Assume some subsequent attempt will be succesful for accounting
            # purposes.
            return count

        self._rebuild(build_url, payload)
        return count

    def _get_ids_for_rev(self, branch, rev, builder):
        # Get the request or build id associated with the given branch/rev/builder,
        # if any.
        root_url = 'https://secure.pub.build.mozilla.org/buildapi/self-serve'

        # First find the build_id for the job to rebuild
        build_info_url = '%s/%s/rev/%s?format=json' % (root_url, branch, rev)
        info_req = requests.get(build_info_url,
                                headers={'Accept': 'application/json'},
                                auth=self.auth)
        found_buildid = None
        found_requestid = None
        builder_total, rev_total = 0, 0

        try:
            results = info_req.json()
        except ValueError:
            self.log.error('Received an unexpected ValueError when retrieving '
                           'information about %s from buildapi.' % rev)
            self.log.error('Request status: %d' % info_req.status_code)
            return None

        for res in results:
            rev_total += 1
            if res['buildername'] == builder:
                builder_total += 1
                if 'build_id' in res and not found_buildid:
                    found_buildid = res['build_id']
                if 'request_id' in res and not found_requestid:
                    found_requestid = res['request_id']

        return found_buildid, found_requestid, builder_total, rev_total

    def _rebuild(self, build_url, payload):
        # Actually do the triggering for a url and payload and keep track of the result.
        self.log.info('Triggering url: %s' % build_url)
        self.log.debug('Triggering payload:\n\t%s' % payload)
        req = requests.post(build_url,
                            headers={'Accept': 'application/json'},
                            data=payload,
                            auth=self.auth)
        self.log.info('Requested job, return: %s' % req.status_code)
class GetBuild(object):
    ARCHIVE_URL = "https://archive.mozilla.org"
    NIGHTLY_LATEST_URL_FOLDER = "/pub/firefox/nightly/latest-mozilla-central/"
    PLATFORM_FN_MAPPING = {
        'linux32': {
            'key': 'linux-i686',
            'ext': 'tar.bz2',
            'trydl': 'linux',
            'job': ['linux32']
        },
        'linux64': {
            'key': 'linux-x86_64',
            'ext': 'tar.bz2',
            'trydl': 'linux64',
            'job': ['linux64']
        },
        'mac': {
            'key': 'mac',
            'ext': 'dmg',
            'trydl': 'macosx64',
            'job': ['osx']
        },
        'win32': {
            'key': 'win32',
            'ext': 'zip',
            'trydl': 'win32',
            'job': ['windows', '32']
        },
        'win64': {
            'key': 'win64',
            'ext': 'zip',
            'trydl': 'win64',
            'job': ['windows', '64']
        }
    }

    def __init__(self, repo, platform, status_check):
        self.repo = repo
        self.platform = platform
        self.platform_option = 'opt'
        self.pushes = []
        self.skip_status_check = status_check
        self.thclient = TreeherderClient()

    def fetch_push(self, user_email, build_hash, default_count=500):
        tmp_pushes = self.thclient.get_pushes(self.repo, count=default_count)
        for push in tmp_pushes:
            if push['author'].lower() == user_email.lower():
                self.pushes.append(push)
                if build_hash is None:
                    return push
                elif push['revision'] == build_hash:
                    return push
        print "Can't find the specify build hash [%s] in resultsets!!" % build_hash
        return None

    def get_job(self, resultset, platform_keyword_list):
        jobs = self.thclient.get_jobs(self.repo, result_set_id=resultset['id'])
        for job in jobs:
            cnt = 0
            for platform_keyword in platform_keyword_list:
                if platform_keyword in job['platform']:
                    cnt += 1
            if job['platform_option'] == self.platform_option and cnt == len(
                    platform_keyword_list):
                return job
        print "Can't find the specify platform [%s] and platform_options [%s] in jobs!!!" % (
            self.platform, self.platform_option)
        return None

    def get_files_from_remote_url_folder(self, remote_url_str):
        return_dict = {}
        try:
            response_obj = urllib2.urlopen(remote_url_str)
            if response_obj.getcode() == 200:
                for line in response_obj.readlines():
                    match = re.search(r'(?<=href=").*?(?=")', line)
                    if match:
                        href_link = match.group(0)
                        f_name = href_link.split("/")[-1]
                        return_dict[f_name] = href_link
            else:
                print "ERROR: fetch remote file list error with code [%s]" % str(
                    response_obj.getcode())
        except Exception as e:
            print "ERROR: [%s]" % e.message
        return return_dict

    def download_file(self, output_dp, download_link):
        print "Prepare to download the build from link [%s]" % download_link
        response = requests.get(download_link, verify=False, stream=True)
        download_fn = download_link.split("/")[-1]
        if os.path.exists(output_dp) is False:
            os.makedirs(output_dp)
        download_fp = os.path.join(output_dp, download_fn)
        try:
            try:
                total_len = int(response.headers['content-length'])
            except:
                total_len = None
            with open(download_fp, 'wb') as fh:
                for data in tqdm(response.iter_content(chunk_size=512 * 1024),
                                 total=total_len / (512 * 1024)):
                    fh.write(data)
            return download_fp
        except Exception as e:
            print "ERROR: [%s]" % e.message
            return None

    def download_from_remote_url_folder(self, remote_url_str, output_dp):
        # get latest nightly build list from remote url folder
        remote_file_dict = self.get_files_from_remote_url_folder(
            remote_url_str)

        # filter with platform, and return file name with extension
        if len(remote_file_dict.keys()) == 0:
            print "ERROR: can't get remote file list, could be the network error, or url path[%s] wrong!!" % remote_url_str
            return False
        else:
            if self.platform not in self.PLATFORM_FN_MAPPING:
                print "ERROR: we are currently not support the platform[%s] you specified!" % self.platform
                print "We are currently support the platform tag: [%s]" % self.PLATFORM_FN_MAPPING.keys(
                )
                return False
            else:
                matched_keyword = self.PLATFORM_FN_MAPPING[
                    self.platform]['key'] + "." + self.PLATFORM_FN_MAPPING[
                        self.platform]['ext']
                matched_file_list = [
                    fn for fn in remote_file_dict.keys()
                    if matched_keyword in fn and "firefox" in fn
                ]
                if len(matched_file_list) != 1:
                    print "WARN: the possible match file list is not equal 1, list as below: [%s]" % matched_file_list
                    if len(matched_file_list) < 1:
                        return False
                    matched_file_list = sorted(matched_file_list)[-1:]
                    print "WARN: select following file [%s]" % matched_file_list

        # combine file name with json
        matched_file_name = matched_file_list[0]
        json_file_name = matched_file_name.replace(
            self.PLATFORM_FN_MAPPING[self.platform]['key'] + "." +
            self.PLATFORM_FN_MAPPING[self.platform]['ext'],
            self.PLATFORM_FN_MAPPING[self.platform]['key'] + ".json")
        if json_file_name not in remote_file_dict:
            print "ERROR: can't find the json file[%s] in remote file list[%s]!" % (
                json_file_name, remote_file_dict)
            return False
        else:
            print "DEBUG: matched file name: [%s], json_file_name: [%s]" % (
                matched_file_name, json_file_name)

        # download files
        download_fx_url = self.ARCHIVE_URL + remote_file_dict[matched_file_name]
        download_fx_fp = self.download_file(output_dp, download_fx_url)
        download_json_url = self.ARCHIVE_URL + remote_file_dict[json_file_name]
        download_json_fp = self.download_file(output_dp, download_json_url)

        # check download status
        if download_fx_fp and download_json_fp:
            print "SUCCESS: build files download in [%s], [%s] " % (
                download_fx_fp, download_json_fp)
            return True
        else:
            print "ERROR: build files download in [%s,%s] " % (
                download_fx_fp, download_json_fp)
            return False

    def get_try_build(self, user_email, build_hash, output_dp):
        resultset = self.fetch_push(user_email, build_hash)

        # check result set
        if resultset:
            # if build hash is not porvided, use the latest revision as build hash value
            if build_hash is None:
                build_hash = resultset['revision']
            print "Resultset is found, and build hash is [%s]" % build_hash

            # compose remote folder url
            build_folder_url_template = "%s/pub/firefox/%s-builds/%s-%s/%s-%s/"
            build_folder_url = build_folder_url_template % (
                self.ARCHIVE_URL, self.repo, user_email, build_hash, self.repo,
                self.PLATFORM_FN_MAPPING[self.platform]['trydl'])

            # skip status check will retrieve the files list from remote folder url
            if self.skip_status_check:
                return self.download_from_remote_url_folder(
                    build_folder_url, output_dp)
            else:
                job = self.get_job(
                    resultset, self.PLATFORM_FN_MAPPING[self.platform]['job'])
                if job:
                    if job['result'].lower() == "success":
                        return self.download_from_remote_url_folder(
                            build_folder_url, output_dp)
                    else:
                        "Current job status is [%s] !!" % job['result'].lower()
                        return False
                else:
                    print "ERROR: can't find the job!"
                    return False
        else:
            print "ERROR: can't get result set! skip download build from try server, [%s, %s]" % (
                user_email, build_hash)
            return False

    def get_nightly_build(self, output_dp):
        remote_url_str = self.ARCHIVE_URL + self.NIGHTLY_LATEST_URL_FOLDER
        return self.download_from_remote_url_folder(remote_url_str, output_dp)
Example #20
0
    def get_test_packages_url(self, properties):
        """Return the URL of the test packages JSON file.

        In case of localized daily builds we can query the en-US build to get
        the URL, but for candidate builds we need the tinderbox build
        of the first parent changeset which was not checked-in by the release
        automation process (necessary until bug 1242035 is not fixed).
        """
        if properties.get('test_packages_url'):
            url = properties['test_packages_url']
        else:
            overrides = {
                'locale': 'en-US',
                'extension': 'test_packages.json',
            }

            # Use Treeherder to query for the next revision which has Tinderbox builds
            # available. We can use this revision to retrieve the test-packages URL.
            if properties['tree'].startswith('release-'):
                platform_map = {
                    'linux': {
                        'build_platform': 'linux32'
                    },
                    'linux64': {
                        'build_platform': 'linux64'
                    },
                    'macosx': {
                        'build_os': 'mac',
                        'build_architecture': 'x86_64'
                    },
                    'macosx64': {
                        'build_os': 'mac',
                        'build_architecture': 'x86_64'
                    },
                    'win32': {
                        'build_os': 'win',
                        'build_architecture': 'x86'
                    },
                    'win64': {
                        'build_os': 'win',
                        'build_architecture': 'x86_64'
                    },
                }

                self.logger.info(
                    'Querying tinderbox revision for {} build...'.format(
                        properties['tree']))
                revision = properties['revision'][:12]

                client = TreeherderClient(
                    server_url='https://treeherder.mozilla.org')
                resultsets = client.get_resultsets(properties['branch'],
                                                   tochange=revision,
                                                   count=50)

                # Retrieve the option hashes to filter for opt builds
                option_hash = None
                for key, values in client.get_option_collection_hash(
                ).iteritems():
                    for value in values:
                        if value['name'] == 'opt':
                            option_hash = key
                            break
                    if option_hash:
                        break

                # Set filters to speed-up querying jobs
                kwargs = {
                    'job_type_name': 'Build',
                    'exclusion_profile': False,
                    'option_collection_hash': option_hash,
                    'result': 'success',
                }
                kwargs.update(platform_map[properties['platform']])

                for resultset in resultsets:
                    kwargs.update({'result_set_id': resultset['id']})
                    jobs = client.get_jobs(properties['branch'], **kwargs)
                    if len(jobs):
                        revision = resultset['revision']
                        break

                self.logger.info(
                    'Found revision for tinderbox build: {}'.format(revision))

                overrides['build_type'] = 'tinderbox'
                overrides['revision'] = revision

            # For update tests we need the test package of the target build. That allows
            # us to add fallback code in case major parts of the ui are changing in Firefox.
            if properties.get('target_buildid'):
                overrides['build_id'] = properties['target_buildid']

            # The test package json file has a prefix with bug 1239808 fixed. Older builds need
            # a fallback to a prefix-less filename.
            try:
                self.logger.info('Querying test packages URL...')
                url = self.query_file_url(properties,
                                          property_overrides=overrides)
            except download_errors.NotFoundError:
                self.logger.info(
                    'URL not found. Querying not-prefixed test packages URL...'
                )
                extension = overrides.pop('extension')
                build_url = self.query_file_url(properties,
                                                property_overrides=overrides)
                url = '{}/{}'.format(build_url[:build_url.rfind('/')],
                                     extension)
                r = requests.head(url)
                if r.status_code != 200:
                    url = None

            self.logger.info('Found test package URL at: {}'.format(url))

        return url
Example #21
0
class Treeherder(object):
    """Wrapper class for TreeherderClient to ease the use of its API."""

    def __init__(self, application, branch, platform, server_url=TREEHERDER_URL):
        """Create a new instance of the Treeherder class.

        :param application: The name of the application to download.
        :param branch: Name of the branch.
        :param platform: Platform of the application.
        :param server_url: The URL of the Treeherder instance to access.
        """
        self.logger = logging.getLogger(__name__)

        self.client = TreeherderClient(server_url=server_url)
        self.application = application
        self.branch = branch
        self.platform = platform

    def get_treeherder_platform(self, platform):
        """Return the internal Treeherder platform identifier.

        :param platform: Platform of the application.
        """
        try:
            return PLATFORM_MAP[platform]
        except KeyError:
            raise NotSupportedError('Platform "{}" is not supported.'.format(platform))

    def query_builds_by_revision(self, revision, job_type_name='Build', debug_build=False):
        """Retrieve build folders for a given revision with the help of Treeherder.

        :param revision: Revision of the build to download.
        :param job_type_name: Name of the job to look for. For builds it should be
            'Build', 'Nightly', and 'L10n Nightly'. Defaults to `Build`.
        :param debug_build: Download a debug build.
        """
        builds = set()

        try:
            self.logger.info('Querying {url} for list of builds for revision: {revision}'.format(
                             url=self.client.server_url, revision=revision))

            # Retrieve the option hash to filter for type of build (opt, and debug for now)
            option_hash = None
            for key, values in self.client.get_option_collection_hash().iteritems():
                for value in values:
                    if value['name'] == ('debug' if debug_build else 'opt'):
                        option_hash = key
                        break
                if option_hash:
                    break

            resultsets = self.client.get_pushes(self.branch, revision=revision)

            # Set filters to speed-up querying jobs
            kwargs = {
                'option_collection_hash': option_hash,
                'job_type_name': job_type_name,
                'exclusion_profile': False,
            }
            kwargs.update(self.get_treeherder_platform(self.platform))

            for resultset in resultsets:
                kwargs.update({'result_set_id': resultset['id']})
                jobs = self.client.get_jobs(self.branch, **kwargs)
                for job in jobs:
                    log_urls = self.client.get_job_log_url(self.branch, job_id=job['id'])
                    for log_url in log_urls:
                        if self.application in log_url['url']:
                            self.logger.debug('Found build folder: {}'.format(log_url['url']))
                            builds.update([log_url['url']])

        except Exception:
            self.logger.exception('Failure occurred when querying Treeherder for builds')

        return list(builds)
Example #22
0
class TriggerBuild(object):
    ARCHIVE_URL = "https://archive.mozilla.org"
    NIGHTLY_LATEST_URL_FOLDER = "/pub/firefox/nightly/latest-mozilla-central/"
    PLATFORM_FN_MAPPING = {
        'linux32': {
            'key': 'linux-i686',
            'ext': 'tar.bz2',
            'trydl': 'linux',
            'job': ['linux32']
        },
        'linux64': {
            'key': 'linux-x86_64',
            'ext': 'tar.bz2',
            'trydl': 'linux64',
            'job': ['linux64']
        },
        'mac': {
            'key': 'mac',
            'ext': 'dmg',
            'trydl': 'macosx64',
            'job': ['osx']
        },
        'win32': {
            'key': 'win32',
            'ext': 'zip',
            'trydl': 'win32',
            'job': ['windows', '32']
        },
        'win64': {
            'key': 'win64',
            'ext': 'zip',
            'trydl': 'win64',
            'job': ['windows', '64']
        }
    }
    ENV_KEY_TRY_REPO_USER_EMAIL = "EMAIL"
    ENV_KEY_ENABLE_WIN32 = "WIN32_FLAG"
    ENV_KEY_SKIP_STATUS_CHECK = "SKIP_STATUS_CHECK"
    ENV_KEY_OUTPUT_DP = "OUTPUT_DP"
    ENV_KEY_BUILD_HASH = "BUILD_HASH"
    ENV_KEY_BUILD_NO = "BUILD_NUMBER"
    REPO_NAME = {'TRY': "try", "NIGHTLY": "nightly"}
    DEFAULT_AGENT_CONF_DIR_LINUX = "/home/hasal/Hasal/agent"
    DEFAULT_AGENT_CONF_DIR_MAC = "/Users/hasal/Hasal/agent"
    DEFAULT_AGENT_CONF_DIR_WIN = "C:\\Users\\user\\Hasal\\agent"
    DEFAULT_AGENT_STATUS_DIR = "agent_status"
    DEFAULT_AGENT_JOB_STATUS = {
        'BEGIN': 'begin',
        'FINISH': 'finish',
        'EXCEPTION': 'exception'
    }
    DEFAULT_AGENT_JOB_WACTH_TIMEOUT = 180

    def __init__(self, input_env_data):
        self.platform_option = 'opt'
        self.thclient = TreeherderClient()
        self.resultsets = []
        self.env_data = {
            key.upper(): value
            for key, value in input_env_data.items()
        }
        self.dispatch_variables(self.env_data)

    def dispatch_variables(self, input_env_data):
        # if user email not in environment data, repo will be the nightly
        if self.ENV_KEY_TRY_REPO_USER_EMAIL in input_env_data.keys():
            self.user_email = input_env_data[self.ENV_KEY_TRY_REPO_USER_EMAIL]
            self.repo = self.REPO_NAME['TRY']
        else:
            self.repo = self.REPO_NAME['NIGHTLY']

        # check current platform, widnows will double check the --win32 flag enabled or not
        if sys.platform == "linux2":
            self.platform = "linux64"
        elif sys.platform == "darwin":
            self.platform = "mac"
        else:
            if self.ENV_KEY_ENABLE_WIN32 in input_env_data.keys(
            ) and input_env_data[self.ENV_KEY_ENABLE_WIN32] == 'true':
                self.platform = "win32"
            else:
                self.platform = "win64"

        # assign skip status check to variable
        if self.ENV_KEY_SKIP_STATUS_CHECK in input_env_data.keys(
        ) and input_env_data[self.ENV_KEY_SKIP_STATUS_CHECK] == 'true':
            self.skip_status_check = True
        else:
            self.skip_status_check = False

        # assign build hash to variable
        if self.ENV_KEY_BUILD_HASH in input_env_data.keys():
            self.build_hash = input_env_data[self.ENV_KEY_BUILD_HASH]
        else:
            self.build_hash = None

        # assign output dp to variable
        if self.ENV_KEY_OUTPUT_DP in input_env_data.keys():
            self.output_dp = input_env_data[self.ENV_KEY_OUTPUT_DP]
        else:
            self.output_dp = os.getcwd()

        # assign build number to variable
        if self.ENV_KEY_BUILD_NO in input_env_data.keys():
            self.jenkins_build_no = input_env_data[self.ENV_KEY_BUILD_NO]
        else:
            self.jenkins_build_no = 0
        self.HASAL_JSON_FN = str(self.jenkins_build_no) + ".json"

    def check_agent_status(self):
        for i in range(0, self.DEFAULT_AGENT_JOB_WACTH_TIMEOUT):
            # extract job id from agent_status dir
            agent_status_dir_path = os.path.join(os.getcwd(),
                                                 self.DEFAULT_AGENT_STATUS_DIR)
            print "INFO: housekeeping the agent status folder [%s]" % agent_status_dir_path
            if not os.path.exists(agent_status_dir_path):
                os.mkdir(agent_status_dir_path)
            agent_status_file_list = os.listdir(agent_status_dir_path)
            print "DEBUG: current agent status file list [%s]" % agent_status_file_list

            # get latest agent id
            job_id_list = [
                int(id.split(".")[0]) for id in agent_status_file_list
            ]
            job_id_list.sort()
            if len(job_id_list) > 0:
                current_id = job_id_list[-1]
            else:
                current_id = 0

            # get latest agent status
            # agent status will sort by alphabetical, so the last one will be the latest status
            job_status_list = [
                status.split(".")[1] for status in agent_status_file_list
                if status.split(".")[0] == str(current_id)
            ]
            job_status_list.sort()
            if len(job_status_list) > 0:
                current_job_status = job_status_list[-1]
            else:
                return True

            if current_job_status == self.DEFAULT_AGENT_JOB_STATUS['FINISH']:
                for target_name in agent_status_file_list:
                    check_target = os.path.join(agent_status_dir_path,
                                                target_name)
                    os.remove(check_target)
                return True
            else:
                time.sleep(10)
        return False

    def trigger(self):

        # check agent status folder
        if self.check_agent_status() is False:
            sys.exit(1)

        # download build
        if self.repo == self.REPO_NAME['TRY']:
            download_fx_fp, download_json_fp = self.get_try_build(
                self.user_email, self.build_hash, self.output_dp)
        else:
            download_fx_fp, download_json_fp = self.get_nightly_build(
                self.output_dp)

        if download_fx_fp is None or download_json_fp is None:
            print "ERROR: something wrong with your build download process, please check the setting and job status."
            sys.exit(1)
        else:
            # generate hasal.json data
            with open(download_json_fp) as dl_json_fh:
                dl_json_data = json.load(dl_json_fh)
                perfherder_revision = dl_json_data['moz_source_stamp']
                build_pkg_platform = dl_json_data['moz_pkg_platform']
                # mapping the perfherder pkg platform to nomenclature of builddot
                builddot_mapping_platform = {
                    "linux-i686": "linux32",
                    "linux-x86_64": "linux64",
                    "mac": "osx-10-10",
                    "win32": "windows7-32",
                    "win64": "windows8-64"
                }
                with open(self.HASAL_JSON_FN, "w") as write_fh:
                    write_data = copy.deepcopy(self.env_data)
                    write_data['FX-DL-PACKAGE-PATH'] = download_fx_fp
                    write_data['FX-DL-JSON-PATH'] = download_json_fp
                    write_data['--PERFHERDER-REVISION'] = perfherder_revision
                    write_data[
                        '--PERFHERDER-PKG-PLATFORM'] = builddot_mapping_platform[
                            build_pkg_platform]
                    json.dump(write_data, write_fh)

            if os.path.exists(os.path.join(os.getcwd(), self.HASAL_JSON_FN)):
                print "INFO: current json file created at [%s]" % os.path.join(
                    os.getcwd(), self.HASAL_JSON_FN)
            else:
                print "ERROR: json file not exist in expected path [%s]" % os.path.join(
                    os.getcwd(), self.HASAL_JSON_FN)

            # create agent status folder
            if os.path.exists(
                    os.path.join(os.getcwd(),
                                 self.DEFAULT_AGENT_STATUS_DIR)) is False:
                os.mkdir(
                    os.path.join(os.getcwd(), self.DEFAULT_AGENT_STATUS_DIR))

            # move to agent config folder
            if sys.platform == "linux2":
                new_hasal_json_fp = os.path.join(
                    self.DEFAULT_AGENT_CONF_DIR_LINUX, self.HASAL_JSON_FN)
            elif sys.platform == "darwin":
                new_hasal_json_fp = os.path.join(
                    self.DEFAULT_AGENT_CONF_DIR_MAC, self.HASAL_JSON_FN)
            else:
                new_hasal_json_fp = os.path.join(
                    self.DEFAULT_AGENT_CONF_DIR_WIN, self.HASAL_JSON_FN)
            os.rename(self.HASAL_JSON_FN, new_hasal_json_fp)

            if os.path.exists(new_hasal_json_fp):
                print "INFO: hasal json file move to new location [%s]" % new_hasal_json_fp
            else:
                print "ERROR: hasal json file in not in new location [%s]" % new_hasal_json_fp
            sys.exit(0)

    def fetch_resultset(self, user_email, build_hash, default_count=500):
        tmp_resultsets = self.thclient.get_resultsets(self.repo,
                                                      count=default_count)
        for resultset in tmp_resultsets:
            if resultset['author'].lower() == user_email.lower():
                self.resultsets.append(resultset)
                if build_hash is None:
                    return resultset
                elif resultset['revision'] == build_hash:
                    return resultset
        print "Can't find the specify build hash [%s] in resultsets!!" % build_hash
        return None

    def get_job(self, resultset, platform_keyword_list):
        jobs = self.thclient.get_jobs(self.repo, result_set_id=resultset['id'])
        for job in jobs:
            cnt = 0
            for platform_keyword in platform_keyword_list:
                if platform_keyword in job['platform']:
                    cnt += 1
            if job['platform_option'] == self.platform_option and cnt == len(
                    platform_keyword_list):
                return job
        print "Can't find the specify platform [%s] and platform_options [%s] in jobs!!!" % (
            self.platform, self.platform_option)
        return None

    def get_files_from_remote_url_folder(self, remote_url_str):
        return_dict = {}
        try:
            response_obj = urllib2.urlopen(remote_url_str)
            if response_obj.getcode() == 200:
                for line in response_obj.readlines():
                    match = re.search(r'(?<=href=").*?(?=")', line)
                    if match:
                        href_link = match.group(0)
                        f_name = href_link.split("/")[-1]
                        return_dict[f_name] = href_link
            else:
                print "ERROR: fetch remote file list error with code [%s]" % str(
                    response_obj.getcode())
        except Exception as e:
            print "ERROR: [%s]" % e.message
        return return_dict

    def download_file(self, output_dp, download_link):
        print "Prepare to download the build from link [%s]" % download_link
        response = requests.get(download_link, verify=False, stream=True)
        download_fn = download_link.split("/")[-1]
        if os.path.exists(output_dp) is False:
            os.makedirs(output_dp)
        download_fp = os.path.join(output_dp, download_fn)
        try:
            try:
                total_len = int(response.headers['content-length'])
            except:
                total_len = None
            with open(download_fp, 'wb') as fh:
                for data in tqdm(response.iter_content(chunk_size=512 * 1024),
                                 total=total_len / (512 * 1024)):
                    fh.write(data)
            return download_fp
        except Exception as e:
            print "ERROR: [%s]" % e.message
            return None

    def download_from_remote_url_folder(self, remote_url_str, output_dp):
        # get latest nightly build list from remote url folder
        remote_file_dict = self.get_files_from_remote_url_folder(
            remote_url_str)

        # filter with platform, and return file name with extension
        if len(remote_file_dict.keys()) == 0:
            print "ERROR: can't get remote file list, could be the network error, or url path[%s] wrong!!" % remote_url_str
            return False
        else:
            if self.platform not in self.PLATFORM_FN_MAPPING:
                print "ERROR: we are currently not support the platform[%s] you specified!" % self.platform
                print "We are currently support the platform tag: [%s]" % self.PLATFORM_FN_MAPPING.keys(
                )
                return False
            else:
                matched_keyword = self.PLATFORM_FN_MAPPING[
                    self.platform]['key'] + "." + self.PLATFORM_FN_MAPPING[
                        self.platform]['ext']
                matched_file_list = [
                    fn for fn in remote_file_dict.keys()
                    if ((matched_keyword in fn) and ('firefox' in fn) and (
                        not fn.endswith('.asc')))
                ]
                if len(matched_file_list) != 1:
                    print "WARN: the possible match file list is not equal 1, list as below: [%s]" % matched_file_list
                    if len(matched_file_list) < 1:
                        return False
                    matched_file_list = sorted(matched_file_list)[-1:]
                    print "WARN: select following file [%s]" % matched_file_list

        # combine file name with json
        matched_file_name = matched_file_list[0]
        json_file_name = matched_file_name.replace(
            self.PLATFORM_FN_MAPPING[self.platform]['key'] + "." +
            self.PLATFORM_FN_MAPPING[self.platform]['ext'],
            self.PLATFORM_FN_MAPPING[self.platform]['key'] + ".json")
        if json_file_name not in remote_file_dict:
            print "ERROR: can't find the json file[%s] in remote file list[%s]!" % (
                json_file_name, remote_file_dict)
            return False
        else:
            print "DEBUG: matched file name: [%s], json_file_name: [%s]" % (
                matched_file_name, json_file_name)

        # download files
        download_fx_url = self.ARCHIVE_URL + remote_file_dict[matched_file_name]
        download_fx_fp = self.download_file(output_dp, download_fx_url)
        download_json_url = self.ARCHIVE_URL + remote_file_dict[json_file_name]
        download_json_fp = self.download_file(output_dp, download_json_url)

        # check download status
        if download_fx_fp and download_json_fp:
            print "SUCCESS: build files download in [%s], [%s] " % (
                download_fx_fp, download_json_fp)
            return (download_fx_fp, download_json_fp)
        else:
            print "ERROR: build files download in [%s,%s] " % (
                download_fx_fp, download_json_fp)
            return None

    def get_try_build(self, user_email, build_hash, output_dp):
        resultset = self.fetch_resultset(user_email, build_hash)

        # check result set
        if resultset:
            # if build hash is not porvided, use the latest revision as build hash value
            if build_hash is None:
                build_hash = resultset['revision']
            print "Resultset is found, and build hash is [%s]" % build_hash

            # compose remote folder url
            build_folder_url_template = "%s/pub/firefox/%s-builds/%s-%s/%s-%s/"
            build_folder_url = build_folder_url_template % (
                self.ARCHIVE_URL, self.repo, user_email, build_hash, self.repo,
                self.PLATFORM_FN_MAPPING[self.platform]['trydl'])

            # skip status check will retrieve the files list from remote folder url
            if self.skip_status_check:
                return self.download_from_remote_url_folder(
                    build_folder_url, output_dp)
            else:
                job = self.get_job(
                    resultset, self.PLATFORM_FN_MAPPING[self.platform]['job'])
                if job:
                    if job['result'].lower() == "success":
                        return self.download_from_remote_url_folder(
                            build_folder_url, output_dp)
                    else:
                        print "WARNING: Current job status is [%s] !! Your build will download when job status is success" % job[
                            'result'].lower()
                        return (None, None)
                else:
                    print "ERROR: can't find the job!"
                    return (None, None)
        else:
            print "ERROR: can't get result set! skip download build from try server, [%s, %s]" % (
                user_email, build_hash)
            return (None, None)

    def get_nightly_build(self, output_dp):
        remote_url_str = self.ARCHIVE_URL + self.NIGHTLY_LATEST_URL_FOLDER
        return self.download_from_remote_url_folder(remote_url_str, output_dp)
Example #23
0
class Treeherder(object):
    """Wrapper class for TreeherderClient to ease the use of its API."""
    def __init__(self,
                 application,
                 branch,
                 platform,
                 server_url=TREEHERDER_URL):
        """Create a new instance of the Treeherder class.

        :param application: The name of the application to download.
        :param branch: Name of the branch.
        :param platform: Platform of the application.
        :param server_url: The URL of the Treeherder instance to access.
        """
        self.logger = logging.getLogger(__name__)

        self.client = TreeherderClient(server_url=server_url)
        self.application = application
        self.branch = branch
        self.platform = platform

    def get_treeherder_platform(self, platform):
        """Return the internal Treeherder platform identifier.

        :param platform: Platform of the application.
        """
        try:
            return PLATFORM_MAP[platform]
        except KeyError:
            raise NotSupportedError(
                'Platform "{}" is not supported.'.format(platform))

    def query_builds_by_revision(self,
                                 revision,
                                 job_type_name='Build',
                                 debug_build=False):
        """Retrieve build folders for a given revision with the help of Treeherder.

        :param revision: Revision of the build to download.
        :param job_type_name: Name of the job to look for. For builds it should be
            'Build', 'Nightly', and 'L10n Nightly'. Defaults to `Build`.
        :param debug_build: Download a debug build.
        """
        builds = set()

        try:
            self.logger.info(
                'Querying {url} for list of builds for revision: {revision}'.
                format(url=self.client.server_url, revision=revision))

            # Retrieve the option hash to filter for type of build (opt, and debug for now)
            option_hash = None
            for key, values in self.client.get_option_collection_hash(
            ).iteritems():
                for value in values:
                    if value['name'] == ('debug' if debug_build else 'opt'):
                        option_hash = key
                        break
                if option_hash:
                    break

            resultsets = self.client.get_resultsets(self.branch,
                                                    revision=revision)

            # Set filters to speed-up querying jobs
            kwargs = {
                'option_collection_hash': option_hash,
                'job_type_name': job_type_name,
                'exclusion_profile': False,
            }
            kwargs.update(self.get_treeherder_platform(self.platform))

            for resultset in resultsets:
                kwargs.update({'result_set_id': resultset['id']})
                jobs = self.client.get_jobs(self.branch, **kwargs)
                for job in jobs:
                    log_urls = self.client.get_job_log_url(self.branch,
                                                           job_id=job['id'])
                    for log_url in log_urls:
                        if self.application in log_url['url']:
                            self.logger.debug('Found build folder: {}'.format(
                                log_url['url']))
                            builds.update([log_url['url']])

        except Exception:
            self.logger.exception(
                'Failure occurred when querying Treeherder for builds')

        return list(builds)
Example #24
0
    parser.add_argument(
        "--host", default="localhost", help="Host to compare. It defaults to localhost"
    )
    parser.add_argument("--revision", required=True, help="Revision to compare")
    parser.add_argument(
        "--project",
        default="mozilla-central",
        help="Project to compare. It defaults to mozilla-central",
    )

    args = parser.parse_args()

    th_instance = TreeherderClient(server_url=HOSTS[args.host])
    th_instance_pushid = th_instance.get_pushes(args.project, revision=args.revision)[0]["id"]
    th_instance_jobs = (
        th_instance.get_jobs(args.project, push_id=th_instance_pushid, count=None) or []
    )

    production = TreeherderClient(server_url=HOSTS["production"])
    production_pushid = production.get_pushes(args.project, revision=args.revision)[0]["id"]
    production_jobs = production.get_jobs(args.project, push_id=production_pushid, count=None)

    production_dict = {}
    for job in production_jobs:
        production_dict[job["job_guid"]] = job

    th_instance_dict = {}
    th_instance_not_found = []
    for job in th_instance_jobs:
        production_job = production_dict.get(job["job_guid"])
        if production_job is None:
Example #25
0
def retrieve_test_logs(repo, revision, platform='linux64',
                       cache_dir=None, use_cache=True,
                       warning_re=WARNING_RE):
    """
    Retrieves and processes the test logs for the given revision.

    Returns list of processed files.
    """
    if not cache_dir:
        cache_dir = "%s-%s-%s" % (repo, revision, platform)

    cache = logspam.cache.Cache(cache_dir, warning_re)

    cache_dir_exists = os.path.isdir(cache_dir)
    if cache_dir_exists and use_cache:
        # We already have logs for this revision.
        print "Using cached data"
        try:
            return cache.read_results()
        except logspam.cache.CacheFileNotFoundException as e:
            print "Cache file for %s not found" % warning_re
            print e

    client = TreeherderClient()
    print "getting result set"
    pushes = client.get_pushes(repo, revision=revision)
    print "pushes = client.get_pushes('%s', revision='%s')" % (repo, revision)
    print "got pushes"
    if not pushes:
        print "Failed to find %s in %s" % (revision, repo)
        return None

    print "getting jobs"
    for x in range(5):
        try:
            # option_collection_hash is just the convoluted way of specifying
            # we want a debug build.
            print "jobs = client.get_jobs('%s',result_set_id=%d, count=5000, platform='%s', option_collection_hash='%s')" % (
                    repo, pushes[0]['id'], platform, DEBUG_OPTIONHASH)
            jobs = client.get_jobs(repo,
                                   result_set_id=pushes[0]['id'],
                                   count=5000, # Just make this really large to avoid pagination
                                   platform=platform,
                                   option_collection_hash=DEBUG_OPTIONHASH,
                                   state='completed')
            break
        except requests.exceptions.ConnectionError:
            pass

    if not jobs:
        print "No jobs found for %s %s" % (revision, platform)
        import traceback
        traceback.print_exc()
        return None

    print "got jobs"

    print "getting %d job log urls" % len(jobs)
    job_ids = [ job['id'] for job in jobs ]
    print job_ids
    for x in range(5):
        logs = []
        try:
            for y in range(0, len(job_ids), 100):
                logs = logs + client.get_job_log_url(repo, job_id=job_ids[y:y+100])
            job_logs = logs
            break
        except requests.exceptions.ConnectionError, e:
            pass