Beispiel #1
0
    def handle(self, *args, **options):
        """ Store pushlog data in the database. """

        repo_host = options.get("repo_host")
        enddate = options.get("enddate")
        numdays = options.get("numdays")
        hours = options.get("hours")
        branch = options.get("branch")
        verbosity = options.get("verbosity")
        project = options.get("project")

        if not repo_host:
            raise CommandError("You must supply a host name for the repo pushlogs " +
                         "to store: --repo_host hostname")

        if not numdays and not hours:
            raise CommandError("You must supply the number of days or hours of data.")
        else:
            if numdays:
                try:
                    numdays = int(numdays)
                except ValueError:
                    raise CommandError("numdays must be an integer.")

            if hours:

                try:
                    hours = int(hours)
                except ValueError:
                    raise CommandError("hours must be an integer.")

        lock = FileLock(self.LOCK_FILE)
        try:
            lock.acquire(timeout=0)
            try:
                plm = PushLogModel(project=project, out=self.stdout, verbosity=verbosity)

                # store the pushlogs for the branch specified, or all branches
                summary = plm.store_pushlogs(repo_host, numdays, hours, enddate, branch)
                self.println(("Branches: {0}\nPushlogs stored: {1}, skipped: {2}\n" +
                              "Changesets stored: {3}, skipped: {4}").format(
                        summary["branches"],
                        summary["pushlogs_stored"],
                        summary["pushlogs_skipped"],
                        summary["changesets_stored"],
                        summary["changesets_skipped"],
                        ))
                plm.disconnect()

            finally:
                lock.release()

        except AlreadyLocked:
            self.println("This command is already being run elsewhere.  Please try again later.")
    def handle_project(self, project, **options):
        def to_seconds(td):
            return (td.microseconds +
                    (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6

        numdays = int(options.get("numdays", 1))
        now = int(time.time())
        time_constraint = now - to_seconds(timedelta(numdays))

        mtm = MetricsTestModel(project)

        test_run_ids = mtm.get_test_runs_not_in_all_dimensions(time_constraint)
        self.stdout.write("test run ids {0}\n".format(str(len(test_run_ids))))

        #Make a list of test_run_id chunks to iterate over
        chunk_size = 20
        test_run_id_chunks = [
            test_run_ids[i:i + chunk_size]
            for i in range(0, len(test_run_ids), chunk_size)
        ]

        plm = PushLogModel()

        for ids in test_run_id_chunks:

            self.stdout.write("Processing ids {0}\n".format(str(ids)))

            revisions_without_push_data = mtm.load_test_data_all_dimensions(
                ids)

            if revisions_without_push_data:

                revision_nodes = {}

                for revision in revisions_without_push_data:

                    node = plm.get_node_from_revision(
                        revision, revisions_without_push_data[revision])

                    revision_nodes[revision] = node

                mtm.set_push_data_all_dimensions(revision_nodes)

        plm.disconnect()
        mtm.disconnect()
    def handle_project(self, project, **options):

        self.stdout.write("Processing project {0}\n".format(project))

        pushlog_project = options.get("pushlog_project", 'pushlog')
        loadlimit = int(options.get("loadlimit", 1))
        debug = options.get("debug", None)

        test_run_ids = []
        ptm = PerformanceTestModel(project)
        test_run_ids = ptm.process_objects(loadlimit)
        ptm.disconnect()

        """
        metrics_exclude_projects = set(['b2g', 'games', 'jetperf', 'marketapps', 'microperf', 'stoneridge', 'test', 'webpagetest'])
        if project not in metrics_exclude_projects:
            #minimum required number of replicates for
            #metrics processing
            replicate_min = 5
            compute_test_run_metrics(
                project, pushlog_project, debug, replicate_min, test_run_ids
                )
        """

        mtm = MetricsTestModel(project)
        revisions_without_push_data = mtm.load_test_data_all_dimensions(
            test_run_ids)

        if revisions_without_push_data:

            revision_nodes = {}
            plm = PushLogModel(pushlog_project)

            for revision in revisions_without_push_data:

                node = plm.get_node_from_revision(
                    revision, revisions_without_push_data[revision])

                revision_nodes[revision] = node

            plm.disconnect()
            mtm.set_push_data_all_dimensions(revision_nodes)

        mtm.disconnect()
    def handle_project(self, project, **options):

        def to_seconds(td):
            return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6

        numdays = int(options.get("numdays", 1))
        now = int(time.time())
        time_constraint = now - to_seconds(timedelta(numdays))

        mtm = MetricsTestModel(project)

        test_run_ids = mtm.get_test_runs_not_in_all_dimensions(time_constraint)
        self.stdout.write("test run ids {0}\n".format(str(len(test_run_ids))))

        #Make a list of test_run_id chunks to iterate over
        chunk_size = 20
        test_run_id_chunks = [
            test_run_ids[i:i + chunk_size] for i in range(0, len(test_run_ids), chunk_size)
            ]

        plm = PushLogModel()

        for ids in test_run_id_chunks:

            self.stdout.write("Processing ids {0}\n".format(str(ids)))

            revisions_without_push_data = mtm.load_test_data_all_dimensions(ids)

            if revisions_without_push_data:

                revision_nodes = {}

                for revision in revisions_without_push_data:

                    node = plm.get_node_from_revision(
                        revision, revisions_without_push_data[revision])

                    revision_nodes[revision] = node

                mtm.set_push_data_all_dimensions(revision_nodes)

        plm.disconnect()
        mtm.disconnect()
def compute_test_run_metrics(project,
                             pushlog_project,
                             debug,
                             replicate_min,
                             test_run_ids=[]):
    """
    Runs all metric tests and associated summaries on a list of test run ids
    """
    ###
    #Insure that test_run_ids is iterable, if process objects generates
    #an error it's possible that test_run_ids will be explicitly set to
    #None
    ###
    test_run_ids = test_run_ids or []

    ##
    #Get test data for test run ids
    ##
    plm = PushLogModel(pushlog_project)
    mtm = MetricsTestModel(project)

    #####
    #We don't know if we need the pushlog, or for what branches
    #it will be required.  Make sure to only retrieve once for each
    #branch encountered and only when we need it.
    ####
    pushlog = {}

    #####
    #This data structure is used to lookup up the index position
    #of a revision in the push log to start walking from
    #####
    pushlog_lookup = {}

    for test_run_id in test_run_ids:

        child_test_data = mtm.get_test_values_by_test_run_id(test_run_id)

        if not child_test_data:
            msg = u"No test data available for test run id {0}".format(
                test_run_id)
            println(msg, debug)
            continue

        first_key = _get_first_mkey(child_test_data)

        rep_count = len(child_test_data[first_key]['values'])

        test_name = child_test_data[first_key]['ref_data']['test_name']

        child_revision, push_node, branch = _get_revision_and_push_node(
            plm, child_test_data, first_key)

        base_message = u"{0} {1}".format(child_revision, str(test_run_id))

        if not check_run_conditions(test_name, rep_count, push_node, branch,
                                    replicate_min, debug):
            println(u"Not able to run {0}\n".format(base_message), debug)
            continue

        #The test and its replicates pass the run conditions
        println(u"Running {0}".format(base_message), debug)

        stored_metric_keys = []

        try:

            stored_metric_keys = _run_metrics(test_run_id, mtm, plm,
                                              child_test_data, pushlog,
                                              pushlog_lookup, child_revision,
                                              push_node, branch, test_name,
                                              debug)

        except Exception as e:

            _handle_exception(mtm, e, test_name, child_revision, test_run_id,
                              compute_test_run_metrics.__name__, debug)

        try:

            _run_summary(test_run_id, mtm, plm, child_revision,
                         child_test_data, stored_metric_keys, push_node, debug)

        except Exception as e:

            _handle_exception(mtm, e, test_name, child_revision, test_run_id,
                              compute_test_run_metrics.__name__, debug)

        println(u"\tProcessing complete for {0}\n".format(base_message), debug)

    plm.disconnect()
    mtm.disconnect()
Beispiel #6
0
def compute_test_run_metrics(
    project, pushlog_project, debug, replicate_min, test_run_ids=[]
    ):
    """
    Runs all metric tests and associated summaries on a list of test run ids
    """
    ###
    #Insure that test_run_ids is iterable, if process objects generates
    #an error it's possible that test_run_ids will be explicitly set to
    #None
    ###
    test_run_ids = test_run_ids or []

    ##
    #Get test data for test run ids
    ##
    plm = PushLogModel(pushlog_project)
    mtm = MetricsTestModel(project)

    #####
    #We don't know if we need the pushlog, or for what branches
    #it will be required.  Make sure to only retrieve once for each
    #branch encountered and only when we need it.
    ####
    pushlog = {}

    #####
    #This data structure is used to lookup up the index position
    #of a revision in the push log to start walking from
    #####
    pushlog_lookup = {}

    for test_run_id in test_run_ids:

        child_test_data = mtm.get_test_values_by_test_run_id(test_run_id)

        if not child_test_data:
            msg = u"No test data available for test run id {0}".format(
                test_run_id
                )
            println(msg, debug)
            continue

        first_key = _get_first_mkey(child_test_data)

        rep_count = len(child_test_data[first_key]['values'])

        test_name = child_test_data[first_key]['ref_data']['test_name']

        child_revision, push_node, branch = _get_revision_and_push_node(
            plm, child_test_data, first_key
            )

        base_message = u"{0} {1}".format(child_revision, str(test_run_id))

        if not check_run_conditions(
            test_name, rep_count, push_node, branch, replicate_min, debug
            ):
            println(u"Not able to run {0}\n".format(base_message), debug)
            continue

        #The test and its replicates pass the run conditions
        println(u"Running {0}".format(base_message), debug)

        stored_metric_keys = []

        try:

            stored_metric_keys = _run_metrics(
                test_run_id, mtm, plm, child_test_data, pushlog,
                pushlog_lookup, child_revision, push_node, branch,
                test_name, debug
                )

        except Exception as e:

            _handle_exception(
                mtm, e, test_name, child_revision, test_run_id,
                compute_test_run_metrics.__name__, debug
                )

        try:

            _run_summary(
                test_run_id, mtm, plm, child_revision, child_test_data,
                stored_metric_keys, push_node, debug
                )

        except Exception as e:

            _handle_exception(
                mtm, e, test_name, child_revision, test_run_id,
                compute_test_run_metrics.__name__, debug
                )

        println(
            u"\tProcessing complete for {0}\n".format(base_message),
            debug
            )

    plm.disconnect()
    mtm.disconnect()
Beispiel #7
0
def run_metrics(project, pushlog_project, numdays, daysago):
    """
        This function retrieves the push log for a given branch and
    iterates over each push in ascending order implementing the following
    rule set:

    1.) If a revision associated with a push node has no data in the
        perftest schema skip it.

    2.) If a revision associated with a push node already has metrics
        data associated with it in the perftest schema skip the metrics
        datums that already have data.

    3.) If test data is present for a revision assiociated with a push
        node, implement the following for all test data metric datums
        that have no associated metric data:

        3a.) If a threshold is present for a given metric datum,
             use the test data associated with it to compute the
             results of the associated metric method.  Store the
             metric test results.

             If the metric method test succeeds and the push date
             associated with the revision is greater than or equal
             to the threshold push date, update the threshold.

        3b.) If no threshold is present for a given metric datum,
             walk through consecutive pushes in the push log until
             a parent is found that passes the metric test with the
             child provided.  Store the test results and the threshold
             associated with the metric datum.

        If the immediate parent push does not have data in datazilla this
    could be due to the asynchronous build/test environment sending data in
    a different order than the pushlog push order.  How do we distinguish
    between when this occurs and when the data has never been sent to
    datazilla for a particular push?  These two scenarios are
    indistiguishable given the information this system has access to.
        The algorithm implemented uses test run data associated with a
    metric threshold if it's available for a particular metric datum, even
    if that threshold is not associated with the parent push.  There are
    several edge cases that can occur in the build environment that cause a
    push found in the push log to never have performance test data
    generated.  Because of this we cannot assume every push will have a
    parent with test data.
        If the child push is from a date before the metric threshold its
    test results will not be used to update the threshold so the stored
    threshold data is always moving forward in time.
    """

    plm = PushLogModel(pushlog_project)

    mtm = MetricsTestModel(project)

    branches = plm.get_branch_list()

    for b in branches:

        if b['name'] in SPECIAL_HANDLING_BRANCHES:
            continue

        pushlog = plm.get_branch_pushlog(
            b['id'], numdays, daysago
            )

        for index, node in enumerate(pushlog):

            revision = mtm.truncate_revision(node['node'])

            #Get the test value data for this revision
            child_test_data = mtm.get_test_values_by_revision(revision)
            test_data_set = set(child_test_data.keys())

            ###
            #CASE: No test data for the push, move on to the next push
            ###
            if not child_test_data:
                """
                Keep track of pushes with no data so we can skip them
                when looking for parents
                """
                mtm.add_skip_revision(revision)
                continue

            #Get the computed metrics for this revision
            computed_metrics_data = mtm.get_metrics_data(revision)
            computed_metrics_set = set(computed_metrics_data.keys())

            ###
            #CASE: Revision could already have metrics associated with it.
            #   Use computed_metrics_data to exclude datums that have
            #   already had their metrics data calculated.
            ###
            data_without_metrics = test_data_set.difference(
                computed_metrics_set
                )

            for child_key in data_without_metrics:

                threshold_data = mtm.get_threshold_data(
                    child_test_data[child_key]['ref_data']
                    )


                extend_ref_data(child_test_data, child_key, node)

                if threshold_data:

                    ###
                    #CASE: Threshold data exists for the metric datum.
                    #   Use it to run the test.
                    ###
                    test_result = mtm.run_metric_method(
                        child_test_data[child_key]['ref_data'],
                        child_test_data[child_key]['values'],
                        threshold_data[child_key]['values'],
                        threshold_data[child_key]['metric_values'],
                        )

                    mtm.store_metric_results(
                        revision,
                        child_test_data[child_key]['ref_data'],
                        test_result,
                        threshold_data[child_key]['ref_data']['test_run_id']
                        )
                else:

                    ###
                    # CASE: No threshold data exists for the metric datum
                    #   get the first parent with data.
                    #
                    # ASSUMPTION: The first parent with data is a viable
                    #   place to bootstrap the threshold value for the
                    #   metric datum.
                    ###

                    parent_data, test_result = mtm.get_parent_test_data(
                        pushlog, index, child_key,
                        child_test_data[child_key]['ref_data'],
                        child_test_data[child_key]['values']
                        )

                    if parent_data and test_result:
                        mtm.store_metric_results(
                            revision,
                            child_test_data[child_key]['ref_data'],
                            test_result,
                            parent_data['ref_data']['test_run_id']
                            )

    plm.disconnect()
    mtm.disconnect()
Beispiel #8
0
def summary(project, pushlog_project, numdays, daysago):
    """
        This function retrieves the push log for a given branch and
    iterates over each push in ascending order implementing the following
    ruleset:

    1.) If no metrics data is associated with the revision skip it.

    2.) For tests associated with a given revision, retrieve metric datums
        that do not have metric method summary data associated with them.

    3.) Run the metric method summary and store the results.
    """
    mtm = MetricsTestModel(project)
    plm = PushLogModel(pushlog_project)

    branches = plm.get_branch_list()

    for b in branches:

        if b['name'] in SPECIAL_HANDLING_BRANCHES:
            continue

        pushlog = plm.get_branch_pushlog(
            b['id'], numdays, daysago
            )

        for node in pushlog:

            revision = mtm.truncate_revision(node['node'])

            #Get the metric value data for this revision
            metrics_data = mtm.get_metrics_data(revision)

            #If there's no metric data a summary cannot be computed
            if not metrics_data:
                continue

            #Filter out tests that have had their summary computed
            store_list = get_test_keys_for_storage(mtm, metrics_data)

            cached_parent_data = {}

            for test_key in store_list:

                extend_ref_data(metrics_data, test_key, node)

                t_test_run_id = \
                    metrics_data[test_key]['ref_data']['threshold_test_run_id']

                test_id = metrics_data[test_key]['ref_data']['test_id']

                lookup_key = '{0}-{1}'.format(
                    str(t_test_run_id), str(test_id)
                    )

                if lookup_key in cached_parent_data:
                    parent_metrics_data = cached_parent_data[lookup_key]
                else:
                    parent_metrics_data = cached_parent_data.setdefault(
                        lookup_key,
                        mtm.get_metrics_data_from_ref_data(
                            metrics_data[test_key]['ref_data'],
                            t_test_run_id
                            )
                        )

                ############
                # ASSUMPTION: All of the metric values for each
                # page in the test are computed.  This is currently
                # true due to the requirements of the incoming JSON data
                # for a given test run.
                ###########
                results = mtm.run_metric_summary(
                    metrics_data[test_key]['ref_data'],
                    metrics_data[test_key]['values']
                    )

                if test_key in parent_metrics_data:

                    mtm.store_metric_summary_results(
                        revision,
                        metrics_data[test_key]['ref_data'],
                        results,
                        metrics_data[test_key]['values'],
                        metrics_data[test_key]['ref_data']['threshold_test_run_id'],
                        parent_metrics_data[test_key]['values']
                        )

                else:
                    mtm.store_metric_summary_results(
                        revision,
                        metrics_data[test_key]['ref_data'],
                        results,
                        metrics_data[test_key]['values'],
                        metrics_data[test_key]['ref_data']['threshold_test_run_id']
                        )

    plm.disconnect()
    mtm.disconnect()