Python PushLogModel Examples, datazilla.model.PushLogModel Python Examples

Example #1

0

Show file

File: conftest.py Project: BastinRobin/datazilla

def pytest_sessionstart(session):
    """
    Set up the test environment.

    Set DJANGO_SETTINGS_MODULE, adds the vendor lib, and sets up a test
    database.

    """
    os.environ.setdefault("DJANGO_SETTINGS_MODULE", "datazilla.settings.base")
    add_vendor_lib()

    from django.conf import settings
    from django.test.simple import DjangoTestSuiteRunner
    # we don't actually let Django run the tests, but we need to use some
    # methods of its runner for setup/teardown of dbs and some other things
    session.django_runner = DjangoTestSuiteRunner()
    # this provides templates-rendered debugging info and locmem mail storage
    session.django_runner.setup_test_environment()
    # support custom db prefix for tests for the main datazilla datasource
    # as well as for the testproj and testpushlog dbs
    prefix = getattr(settings, "TEST_DB_PREFIX", "")
    settings.DATABASES["default"]["TEST_NAME"] = "{0}test_datazilla".format(prefix)
    # this sets up a clean test-only database
    session.django_db_config = session.django_runner.setup_databases()
    # store the name of the test project/pushlog based on user custom settings
    session.perftest_name = "{0}testproj".format(prefix)
    session.pushlog_name = "{0}testpushlog".format(prefix)

    increment_cache_key_prefix()

    from datazilla.model import PerformanceTestModel, PushLogModel
    ptm = PerformanceTestModel.create(
        session.perftest_name,
        cron_batch="small",
        )
    PushLogModel.create(project=session.pushlog_name)

    # patch in additional test-only procs on the datasources
    objstore = ptm.sources["objectstore"]
    del objstore.dhub.procs[objstore.datasource.key]
    objstore.dhub.data_sources[objstore.datasource.key]["procs"].append(
        os.path.join(
            os.path.abspath(os.path.dirname(__file__)),
            "objectstore_test.json",
            )
        )
    objstore.dhub.load_procs(objstore.datasource.key)

    perftest = ptm.sources["perftest"]
    del perftest.dhub.procs[perftest.datasource.key]
    perftest.dhub.data_sources[perftest.datasource.key]["procs"].append(
        os.path.join(
            os.path.abspath(os.path.dirname(__file__)),
            "perftest_test.json",
            )
        )
    perftest.dhub.load_procs(perftest.datasource.key)

Example #2

0

Show file

File: conftest.py Project: wanderview/datazilla

def pytest_sessionstart(session):
    """
    Set up the test environment.

    Set DJANGO_SETTINGS_MODULE, adds the vendor lib, and sets up a test
    database.

    """
    os.environ.setdefault("DJANGO_SETTINGS_MODULE", "datazilla.settings.base")
    add_vendor_lib()

    from django.conf import settings
    from django.test.simple import DjangoTestSuiteRunner
    # we don't actually let Django run the tests, but we need to use some
    # methods of its runner for setup/teardown of dbs and some other things
    session.django_runner = DjangoTestSuiteRunner()
    # this provides templates-rendered debugging info and locmem mail storage
    session.django_runner.setup_test_environment()
    # support custom db prefix for tests for the main datazilla datasource
    # as well as for the testproj and testpushlog dbs
    prefix = getattr(settings, "TEST_DB_PREFIX", "")
    settings.DATABASES["default"]["TEST_NAME"] = "{0}test_datazilla".format(
        prefix)
    # this sets up a clean test-only database
    session.django_db_config = session.django_runner.setup_databases()
    # store the name of the test project/pushlog based on user custom settings
    session.perftest_name = "{0}testproj".format(prefix)
    session.pushlog_name = "{0}testpushlog".format(prefix)

    increment_cache_key_prefix()

    from datazilla.model import PerformanceTestModel, PushLogModel
    ptm = PerformanceTestModel.create(
        session.perftest_name,
        cron_batch="small",
    )
    PushLogModel.create(project=session.pushlog_name)

    # patch in additional test-only procs on the datasources
    objstore = ptm.sources["objectstore"]
    del objstore.dhub.procs[objstore.datasource.key]
    objstore.dhub.data_sources[objstore.datasource.key]["procs"].append(
        os.path.join(
            os.path.abspath(os.path.dirname(__file__)),
            "objectstore_test.json",
        ))
    objstore.dhub.load_procs(objstore.datasource.key)

    perftest = ptm.sources["perftest"]
    del perftest.dhub.procs[perftest.datasource.key]
    perftest.dhub.data_sources[perftest.datasource.key]["procs"].append(
        os.path.join(
            os.path.abspath(os.path.dirname(__file__)),
            "perftest_test.json",
        ))
    perftest.dhub.load_procs(perftest.datasource.key)

Example #3

0

Show file

File: update_pushlog.py Project: hfeeki/datazilla

    def handle(self, *args, **options):
        """ Store pushlog data in the database. """

        repo_host = options.get("repo_host")
        enddate = options.get("enddate")
        numdays = options.get("numdays")
        hours = options.get("hours")
        branch = options.get("branch")
        verbosity = options.get("verbosity")
        project = options.get("project")

        if not repo_host:
            raise CommandError("You must supply a host name for the repo pushlogs " +
                         "to store: --repo_host hostname")

        if not numdays and not hours:
            raise CommandError("You must supply the number of days or hours of data.")
        else:
            if numdays:
                try:
                    numdays = int(numdays)
                except ValueError:
                    raise CommandError("numdays must be an integer.")

            if hours:

                try:
                    hours = int(hours)
                except ValueError:
                    raise CommandError("hours must be an integer.")

        lock = FileLock(self.LOCK_FILE)
        try:
            lock.acquire(timeout=0)
            try:
                plm = PushLogModel(project=project, out=self.stdout, verbosity=verbosity)

                # store the pushlogs for the branch specified, or all branches
                summary = plm.store_pushlogs(repo_host, numdays, hours, enddate, branch)
                self.println(("Branches: {0}\nPushlogs stored: {1}, skipped: {2}\n" +
                              "Changesets stored: {3}, skipped: {4}").format(
                        summary["branches"],
                        summary["pushlogs_stored"],
                        summary["pushlogs_skipped"],
                        summary["changesets_stored"],
                        summary["changesets_skipped"],
                        ))
                plm.disconnect()

            finally:
                lock.release()

        except AlreadyLocked:
            self.println("This command is already being run elsewhere.  Please try again later.")

Example #4

0

Show file

File: backfill_all_dimensions.py Project: wanderview/datazilla

    def handle_project(self, project, **options):
        def to_seconds(td):
            return (td.microseconds +
                    (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6

        numdays = int(options.get("numdays", 1))
        now = int(time.time())
        time_constraint = now - to_seconds(timedelta(numdays))

        mtm = MetricsTestModel(project)

        test_run_ids = mtm.get_test_runs_not_in_all_dimensions(time_constraint)
        self.stdout.write("test run ids {0}\n".format(str(len(test_run_ids))))

        #Make a list of test_run_id chunks to iterate over
        chunk_size = 20
        test_run_id_chunks = [
            test_run_ids[i:i + chunk_size]
            for i in range(0, len(test_run_ids), chunk_size)
        ]

        plm = PushLogModel()

        for ids in test_run_id_chunks:

            self.stdout.write("Processing ids {0}\n".format(str(ids)))

            revisions_without_push_data = mtm.load_test_data_all_dimensions(
                ids)

            if revisions_without_push_data:

                revision_nodes = {}

                for revision in revisions_without_push_data:

                    node = plm.get_node_from_revision(
                        revision, revisions_without_push_data[revision])

                    revision_nodes[revision] = node

                mtm.set_push_data_all_dimensions(revision_nodes)

        plm.disconnect()
        mtm.disconnect()

Example #5

0

Show file

File: process_objects.py Project: wanderview/datazilla

    def handle_project(self, project, **options):

        self.stdout.write("Processing project {0}\n".format(project))

        pushlog_project = options.get("pushlog_project", 'pushlog')
        loadlimit = int(options.get("loadlimit", 1))
        debug = options.get("debug", None)

        test_run_ids = []
        ptm = PerformanceTestModel(project)
        test_run_ids = ptm.process_objects(loadlimit)
        ptm.disconnect()

        """
        metrics_exclude_projects = set(['b2g', 'games', 'jetperf', 'marketapps', 'microperf', 'stoneridge', 'test', 'webpagetest'])
        if project not in metrics_exclude_projects:
            #minimum required number of replicates for
            #metrics processing
            replicate_min = 5
            compute_test_run_metrics(
                project, pushlog_project, debug, replicate_min, test_run_ids
                )
        """

        mtm = MetricsTestModel(project)
        revisions_without_push_data = mtm.load_test_data_all_dimensions(
            test_run_ids)

        if revisions_without_push_data:

            revision_nodes = {}
            plm = PushLogModel(pushlog_project)

            for revision in revisions_without_push_data:

                node = plm.get_node_from_revision(
                    revision, revisions_without_push_data[revision])

                revision_nodes[revision] = node

            plm.disconnect()
            mtm.set_push_data_all_dimensions(revision_nodes)

        mtm.disconnect()

Example #6

0

Show file

File: backfill_all_dimensions.py Project: BastinRobin/datazilla

    def handle_project(self, project, **options):

        def to_seconds(td):
            return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6

        numdays = int(options.get("numdays", 1))
        now = int(time.time())
        time_constraint = now - to_seconds(timedelta(numdays))

        mtm = MetricsTestModel(project)

        test_run_ids = mtm.get_test_runs_not_in_all_dimensions(time_constraint)
        self.stdout.write("test run ids {0}\n".format(str(len(test_run_ids))))

        #Make a list of test_run_id chunks to iterate over
        chunk_size = 20
        test_run_id_chunks = [
            test_run_ids[i:i + chunk_size] for i in range(0, len(test_run_ids), chunk_size)
            ]

        plm = PushLogModel()

        for ids in test_run_id_chunks:

            self.stdout.write("Processing ids {0}\n".format(str(ids)))

            revisions_without_push_data = mtm.load_test_data_all_dimensions(ids)

            if revisions_without_push_data:

                revision_nodes = {}

                for revision in revisions_without_push_data:

                    node = plm.get_node_from_revision(
                        revision, revisions_without_push_data[revision])

                    revision_nodes[revision] = node

                mtm.set_push_data_all_dimensions(revision_nodes)

        plm.disconnect()
        mtm.disconnect()

Example #7

0

Show file

File: conftest.py Project: wanderview/datazilla

def pytest_funcarg__plm(request):
    """
    Give a test access to a PushLogModel instance.

    Truncate all project tables between tests in order to provide isolation.

    """
    from datazilla.model import PushLogModel

    plm = PushLogModel(request._pyfuncitem.session.pushlog_name,
                       out=sys.stdout,
                       verbosity=2)

    request.addfinalizer(partial(truncate, plm, ["branches", "branch_map"]))
    return plm

Example #8

0

Show file

File: create_pushlog.py Project: BastinRobin/datazilla

    def handle(self, *args, **options):
        """ Create databases for a new project based on the args value. """

        host = options.get("host")
        project = options.get("project")

        if not host:
            self.println("You must supply a host name for the pushlog " +
                     "database: --host hostname")
            return

        pl = PushLogModel.create(
            project=project,
            host=host,
            type=options.get("type"),
            )
        self.println("Pushlog database created on {0}".format(host))
        pl.disconnect()

Example #9

0

Show file

    def handle(self, *args, **options):
        """ Create databases for a new project based on the args value. """

        host = options.get("host")
        project = options.get("project")

        if not host:
            self.println("You must supply a host name for the pushlog " +
                         "database: --host hostname")
            return

        pl = PushLogModel.create(
            project=project,
            host=host,
            type=options.get("type"),
        )
        self.println("Pushlog database created on {0}".format(host))
        pl.disconnect()

Example #10

0

Show file

File: conftest.py Project: wanderview/datazilla

def pytest_sessionfinish(session):
    """Tear down the test environment, including databases."""
    print("\n")

    from django.conf import settings
    from datazilla.model import PerformanceTestModel, PushLogModel
    import MySQLdb

    source_list = PerformanceTestModel(session.perftest_name).sources.values()
    source_list.extend(
        PushLogModel(project=session.pushlog_name).sources.values())

    for sds in source_list:
        conn = MySQLdb.connect(
            host=sds.datasource.host,
            user=settings.DATAZILLA_DATABASE_USER,
            passwd=settings.DATAZILLA_DATABASE_PASSWORD,
        )
        cur = conn.cursor()
        cur.execute("DROP DATABASE {0}".format(sds.datasource.name))
        conn.close()

    session.django_runner.teardown_databases(session.django_db_config)
    session.django_runner.teardown_test_environment()

Example #11

0

Show file

File: perftest_metrics.py Project: wanderview/datazilla

def compute_test_run_metrics(project,
                             pushlog_project,
                             debug,
                             replicate_min,
                             test_run_ids=[]):
    """
    Runs all metric tests and associated summaries on a list of test run ids
    """
    ###
    #Insure that test_run_ids is iterable, if process objects generates
    #an error it's possible that test_run_ids will be explicitly set to
    #None
    ###
    test_run_ids = test_run_ids or []

    ##
    #Get test data for test run ids
    ##
    plm = PushLogModel(pushlog_project)
    mtm = MetricsTestModel(project)

    #####
    #We don't know if we need the pushlog, or for what branches
    #it will be required.  Make sure to only retrieve once for each
    #branch encountered and only when we need it.
    ####
    pushlog = {}

    #####
    #This data structure is used to lookup up the index position
    #of a revision in the push log to start walking from
    #####
    pushlog_lookup = {}

    for test_run_id in test_run_ids:

        child_test_data = mtm.get_test_values_by_test_run_id(test_run_id)

        if not child_test_data:
            msg = u"No test data available for test run id {0}".format(
                test_run_id)
            println(msg, debug)
            continue

        first_key = _get_first_mkey(child_test_data)

        rep_count = len(child_test_data[first_key]['values'])

        test_name = child_test_data[first_key]['ref_data']['test_name']

        child_revision, push_node, branch = _get_revision_and_push_node(
            plm, child_test_data, first_key)

        base_message = u"{0} {1}".format(child_revision, str(test_run_id))

        if not check_run_conditions(test_name, rep_count, push_node, branch,
                                    replicate_min, debug):
            println(u"Not able to run {0}\n".format(base_message), debug)
            continue

        #The test and its replicates pass the run conditions
        println(u"Running {0}".format(base_message), debug)

        stored_metric_keys = []

        try:

            stored_metric_keys = _run_metrics(test_run_id, mtm, plm,
                                              child_test_data, pushlog,
                                              pushlog_lookup, child_revision,
                                              push_node, branch, test_name,
                                              debug)

        except Exception as e:

            _handle_exception(mtm, e, test_name, child_revision, test_run_id,
                              compute_test_run_metrics.__name__, debug)

        try:

            _run_summary(test_run_id, mtm, plm, child_revision,
                         child_test_data, stored_metric_keys, push_node, debug)

        except Exception as e:

            _handle_exception(mtm, e, test_name, child_revision, test_run_id,
                              compute_test_run_metrics.__name__, debug)

        println(u"\tProcessing complete for {0}\n".format(base_message), debug)

    plm.disconnect()
    mtm.disconnect()

Example #12

0

Show file

File: perftest_metrics.py Project: hfeeki/datazilla

def compute_test_run_metrics(
    project, pushlog_project, debug, replicate_min, test_run_ids=[]
    ):
    """
    Runs all metric tests and associated summaries on a list of test run ids
    """
    ###
    #Insure that test_run_ids is iterable, if process objects generates
    #an error it's possible that test_run_ids will be explicitly set to
    #None
    ###
    test_run_ids = test_run_ids or []

    ##
    #Get test data for test run ids
    ##
    plm = PushLogModel(pushlog_project)
    mtm = MetricsTestModel(project)

    #####
    #We don't know if we need the pushlog, or for what branches
    #it will be required.  Make sure to only retrieve once for each
    #branch encountered and only when we need it.
    ####
    pushlog = {}

    #####
    #This data structure is used to lookup up the index position
    #of a revision in the push log to start walking from
    #####
    pushlog_lookup = {}

    for test_run_id in test_run_ids:

        child_test_data = mtm.get_test_values_by_test_run_id(test_run_id)

        if not child_test_data:
            msg = u"No test data available for test run id {0}".format(
                test_run_id
                )
            println(msg, debug)
            continue

        first_key = _get_first_mkey(child_test_data)

        rep_count = len(child_test_data[first_key]['values'])

        test_name = child_test_data[first_key]['ref_data']['test_name']

        child_revision, push_node, branch = _get_revision_and_push_node(
            plm, child_test_data, first_key
            )

        base_message = u"{0} {1}".format(child_revision, str(test_run_id))

        if not check_run_conditions(
            test_name, rep_count, push_node, branch, replicate_min, debug
            ):
            println(u"Not able to run {0}\n".format(base_message), debug)
            continue

        #The test and its replicates pass the run conditions
        println(u"Running {0}".format(base_message), debug)

        stored_metric_keys = []

        try:

            stored_metric_keys = _run_metrics(
                test_run_id, mtm, plm, child_test_data, pushlog,
                pushlog_lookup, child_revision, push_node, branch,
                test_name, debug
                )

        except Exception as e:

            _handle_exception(
                mtm, e, test_name, child_revision, test_run_id,
                compute_test_run_metrics.__name__, debug
                )

        try:

            _run_summary(
                test_run_id, mtm, plm, child_revision, child_test_data,
                stored_metric_keys, push_node, debug
                )

        except Exception as e:

            _handle_exception(
                mtm, e, test_name, child_revision, test_run_id,
                compute_test_run_metrics.__name__, debug
                )

        println(
            u"\tProcessing complete for {0}\n".format(base_message),
            debug
            )

    plm.disconnect()
    mtm.disconnect()

Example #13

0

Show file

class Command(BaseCommand):
    """
    Management command to update the pushlog table with the latest pushes.

    example resulting url:
        https://hg.mozilla.org/integration/mozilla-inbound/json-pushes?full=1&startdate=06/04/2012&enddate=06/07/2012


    """
    LOCK_FILE = "update_pushlog"

    help = "Update the repo pushlog table."

    option_list = BaseCommand.option_list + (
        make_option("--repo_host",
                    action="store",
                    dest="repo_host",
                    default=None,
                    help="The host name for the repo (e.g. hg.mozilla.org)"),
        make_option("--enddate",
                    action="store",
                    dest="enddate",
                    default=None,
                    help="(optional) The ending date range for pushlogs in " +
                    "the format: MM/DD/YYYY.  Default to today."),
        make_option("--numdays",
                    action="store",
                    dest="numdays",
                    default=None,
                    help="Number of days worth of pushlogs to return."),
        make_option("--hours",
                    action="store",
                    dest="hours",
                    default=None,
                    help="Number of hours worth of pushlogs to return."),

        # probably mostly for testing purposes, but could be otherwise useful.
        make_option("--branch",
                    action="store",
                    dest="branch",
                    default=None,
                    help="The branch to import pushlogs for (default to all)"),

        # probably mostly for testing purposes, but could be otherwise useful.
        make_option("--project",
                    action="store",
                    dest="project",
                    default=None,
                    help=("The project name for the the pushlog database " +
                          "storage (default to 'pushlog')")),
    )

    def println(self, val):
        self.stdout.write("{0}\n".format(str(val)))

    def handle(self, *args, **options):
        """ Store pushlog data in the database. """

        repo_host = options.get("repo_host")
        enddate = options.get("enddate")
        numdays = options.get("numdays")
        hours = options.get("hours")
        branch = options.get("branch")
        verbosity = options.get("verbosity")
        project = options.get("project")

        if not repo_host:
            raise CommandError(
                "You must supply a host name for the repo pushlogs " +
                "to store: --repo_host hostname")

        if not numdays and not hours:
            raise CommandError(
                "You must supply the number of days or hours of data.")
        else:
            if numdays:
                try:
                    numdays = int(numdays)
                except ValueError:
                    raise CommandError("numdays must be an integer.")

            if hours:

                try:
                    hours = int(hours)
                except ValueError:
                    raise CommandError("hours must be an integer.")

        pidfile = "{0}.pid".format(self.LOCK_FILE)

        if os.path.isfile(pidfile):

            pid = ""
            with open(pidfile) as f:
                pid = f.readline().strip()

            ####
            #If we have a pid file assume the update_pushlog command is
            #hanging on an intermitent urllib timeout from the call to the
            #json-pushes web service method and kill the hanging program.
            ####
            if pid:

                logfile_name = "{0}.log".format(self.LOCK_FILE)
                time_stamp = str(time.time()).split('.')[0]

                try:

                    os.kill(int(pid), signal.SIGKILL)

                except OSError, err:

                    log_file = open(logfile_name, 'a+')

                    msg = ""
                    if err.errno == errno.ESRCH:
                        msg = "pid:{0} time:{1}, Not running\n".format(
                            pid, time_stamp)
                    elif err.errno == errno.EPERM:
                        msg = "pid:{0} time:{1}, No permission to signal process\n".format(
                            pid, time_stamp)
                    else:
                        msg = "pid:{0} time:{1}, Generated unknown error {2}\n".format(
                            pid, str(err), time_stampe)

                    log_file.write(msg)
                    log_file.close()

                    #make sure we get rid of any pid file on error
                    os.unlink(pidfile)

                else:

                    #log the kill
                    log_file = open(logfile_name, 'a+')
                    log_file.write("pid:{0} time:{1}, Killed\n".format(
                        pid, time_stamp))
                    log_file.close()

                    #remove any existing pidfile
                    os.unlink(pidfile)

        #Write pid file
        pid = str(os.getpid())
        file(pidfile, 'w').write(pid)

        plm = PushLogModel(project=project,
                           out=self.stdout,
                           verbosity=verbosity)

        # store the pushlogs for the branch specified, or all branches
        summary = plm.store_pushlogs(repo_host, numdays, hours, enddate,
                                     branch)
        self.println(("Branches: {0}\nPushlogs stored: {1}, skipped: {2}\n" +
                      "Changesets stored: {3}, skipped: {4}").format(
                          summary["branches"],
                          summary["pushlogs_stored"],
                          summary["pushlogs_skipped"],
                          summary["changesets_stored"],
                          summary["changesets_skipped"],
                      ))

        plm.disconnect()

        os.unlink(pidfile)

Example #14

0

Show file

File: push_walker.py Project: BastinRobin/datazilla

def run_metrics(project, pushlog_project, numdays, daysago):
    """
        This function retrieves the push log for a given branch and
    iterates over each push in ascending order implementing the following
    rule set:

    1.) If a revision associated with a push node has no data in the
        perftest schema skip it.

    2.) If a revision associated with a push node already has metrics
        data associated with it in the perftest schema skip the metrics
        datums that already have data.

    3.) If test data is present for a revision assiociated with a push
        node, implement the following for all test data metric datums
        that have no associated metric data:

        3a.) If a threshold is present for a given metric datum,
             use the test data associated with it to compute the
             results of the associated metric method.  Store the
             metric test results.

             If the metric method test succeeds and the push date
             associated with the revision is greater than or equal
             to the threshold push date, update the threshold.

        3b.) If no threshold is present for a given metric datum,
             walk through consecutive pushes in the push log until
             a parent is found that passes the metric test with the
             child provided.  Store the test results and the threshold
             associated with the metric datum.

        If the immediate parent push does not have data in datazilla this
    could be due to the asynchronous build/test environment sending data in
    a different order than the pushlog push order.  How do we distinguish
    between when this occurs and when the data has never been sent to
    datazilla for a particular push?  These two scenarios are
    indistiguishable given the information this system has access to.
        The algorithm implemented uses test run data associated with a
    metric threshold if it's available for a particular metric datum, even
    if that threshold is not associated with the parent push.  There are
    several edge cases that can occur in the build environment that cause a
    push found in the push log to never have performance test data
    generated.  Because of this we cannot assume every push will have a
    parent with test data.
        If the child push is from a date before the metric threshold its
    test results will not be used to update the threshold so the stored
    threshold data is always moving forward in time.
    """

    plm = PushLogModel(pushlog_project)

    mtm = MetricsTestModel(project)

    branches = plm.get_branch_list()

    for b in branches:

        if b['name'] in SPECIAL_HANDLING_BRANCHES:
            continue

        pushlog = plm.get_branch_pushlog(
            b['id'], numdays, daysago
            )

        for index, node in enumerate(pushlog):

            revision = mtm.truncate_revision(node['node'])

            #Get the test value data for this revision
            child_test_data = mtm.get_test_values_by_revision(revision)
            test_data_set = set(child_test_data.keys())

            ###
            #CASE: No test data for the push, move on to the next push
            ###
            if not child_test_data:
                """
                Keep track of pushes with no data so we can skip them
                when looking for parents
                """
                mtm.add_skip_revision(revision)
                continue

            #Get the computed metrics for this revision
            computed_metrics_data = mtm.get_metrics_data(revision)
            computed_metrics_set = set(computed_metrics_data.keys())

            ###
            #CASE: Revision could already have metrics associated with it.
            #   Use computed_metrics_data to exclude datums that have
            #   already had their metrics data calculated.
            ###
            data_without_metrics = test_data_set.difference(
                computed_metrics_set
                )

            for child_key in data_without_metrics:

                threshold_data = mtm.get_threshold_data(
                    child_test_data[child_key]['ref_data']
                    )


                extend_ref_data(child_test_data, child_key, node)

                if threshold_data:

                    ###
                    #CASE: Threshold data exists for the metric datum.
                    #   Use it to run the test.
                    ###
                    test_result = mtm.run_metric_method(
                        child_test_data[child_key]['ref_data'],
                        child_test_data[child_key]['values'],
                        threshold_data[child_key]['values'],
                        threshold_data[child_key]['metric_values'],
                        )

                    mtm.store_metric_results(
                        revision,
                        child_test_data[child_key]['ref_data'],
                        test_result,
                        threshold_data[child_key]['ref_data']['test_run_id']
                        )
                else:

                    ###
                    # CASE: No threshold data exists for the metric datum
                    #   get the first parent with data.
                    #
                    # ASSUMPTION: The first parent with data is a viable
                    #   place to bootstrap the threshold value for the
                    #   metric datum.
                    ###

                    parent_data, test_result = mtm.get_parent_test_data(
                        pushlog, index, child_key,
                        child_test_data[child_key]['ref_data'],
                        child_test_data[child_key]['values']
                        )

                    if parent_data and test_result:
                        mtm.store_metric_results(
                            revision,
                            child_test_data[child_key]['ref_data'],
                            test_result,
                            parent_data['ref_data']['test_run_id']
                            )

    plm.disconnect()
    mtm.disconnect()

Example #15

0

Show file

File: push_walker.py Project: BastinRobin/datazilla

def summary(project, pushlog_project, numdays, daysago):
    """
        This function retrieves the push log for a given branch and
    iterates over each push in ascending order implementing the following
    ruleset:

    1.) If no metrics data is associated with the revision skip it.

    2.) For tests associated with a given revision, retrieve metric datums
        that do not have metric method summary data associated with them.

    3.) Run the metric method summary and store the results.
    """
    mtm = MetricsTestModel(project)
    plm = PushLogModel(pushlog_project)

    branches = plm.get_branch_list()

    for b in branches:

        if b['name'] in SPECIAL_HANDLING_BRANCHES:
            continue

        pushlog = plm.get_branch_pushlog(
            b['id'], numdays, daysago
            )

        for node in pushlog:

            revision = mtm.truncate_revision(node['node'])

            #Get the metric value data for this revision
            metrics_data = mtm.get_metrics_data(revision)

            #If there's no metric data a summary cannot be computed
            if not metrics_data:
                continue

            #Filter out tests that have had their summary computed
            store_list = get_test_keys_for_storage(mtm, metrics_data)

            cached_parent_data = {}

            for test_key in store_list:

                extend_ref_data(metrics_data, test_key, node)

                t_test_run_id = \
                    metrics_data[test_key]['ref_data']['threshold_test_run_id']

                test_id = metrics_data[test_key]['ref_data']['test_id']

                lookup_key = '{0}-{1}'.format(
                    str(t_test_run_id), str(test_id)
                    )

                if lookup_key in cached_parent_data:
                    parent_metrics_data = cached_parent_data[lookup_key]
                else:
                    parent_metrics_data = cached_parent_data.setdefault(
                        lookup_key,
                        mtm.get_metrics_data_from_ref_data(
                            metrics_data[test_key]['ref_data'],
                            t_test_run_id
                            )
                        )

                ############
                # ASSUMPTION: All of the metric values for each
                # page in the test are computed.  This is currently
                # true due to the requirements of the incoming JSON data
                # for a given test run.
                ###########
                results = mtm.run_metric_summary(
                    metrics_data[test_key]['ref_data'],
                    metrics_data[test_key]['values']
                    )

                if test_key in parent_metrics_data:

                    mtm.store_metric_summary_results(
                        revision,
                        metrics_data[test_key]['ref_data'],
                        results,
                        metrics_data[test_key]['values'],
                        metrics_data[test_key]['ref_data']['threshold_test_run_id'],
                        parent_metrics_data[test_key]['values']
                        )

                else:
                    mtm.store_metric_summary_results(
                        revision,
                        metrics_data[test_key]['ref_data'],
                        results,
                        metrics_data[test_key]['values'],
                        metrics_data[test_key]['ref_data']['threshold_test_run_id']
                        )

    plm.disconnect()
    mtm.disconnect()