Example #1
0
    def _get_manifests_in_db(self, db_obj, trace_id):
        """Returns a list of present workflow types in a trace in the DB.
        """
        hist = Histogram()
        hist_man = _filter_non_man(hist.get_list_of_results(db_obj, trace_id))
        stats = NumericStats()
        stats_man = _filter_non_man(stats.get_list_of_results(
            db_obj, trace_id))

        hist_man = [x.split("_")[1] for x in hist_man]
        stats_man = [x.split("_")[1] for x in stats_man]
        return list(set(hist_man + stats_man))
Example #2
0
    def test_load_job_results_per_manifest(self):
        db_obj = self._db
        hist = Histogram()
        stat = NumericStats()
        self.addCleanup(self._del_table, "histograms")
        self.addCleanup(self._del_table, "numericStats")
        hist.create_table(db_obj)
        stat.create_table(db_obj)

        we = WorkflowsExtractor()
        job_list = {
            "job_name": [
                "wf_manifest-2_S0", "wf_manifest-2_S1_dS0",
                "wf_manifest-2_S2_dS0", "wf_manifest-2_S3_dS2",
                "wf_manifest-2_S4_dS3", "wf_manifest-2_S5_dS4-dS1",
                "wf_manifest-2_S6_dS0", "sim_job", "wf_manifest-3_S0",
                "wf_manifest-3_S1_dS0", "wf_manifest-3_S2_dS0",
                "wf_manifest-3_S3_dS1-dS2", "wf_manifest2-4_S0"
            ],
            "id_job": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
            "time_start": [1, 15, 17, 22, 27, 42, 12, 20, 1, 15, 17, 22, 30],
            "time_end": [10, 20, 40, 25, 29, 50, 70, 30, 10, 20, 19, 25, 35],
            "time_submit": [1, 1, 1, 1, 1, 1, 1, 20, 2, 2, 2, 2, 3],
            "cpus_alloc": [1, 2, 3, 4, 5, 6, 7, 1, 1, 2, 3, 4, 33]
        }

        we.extract(job_list)
        we.do_processing()

        old_results = we.calculate_per_manifest_results(True, db_obj, 1)

        new_we = WorkflowsExtractor()
        new_results = new_we.load_per_manifest_results(db_obj, 1)
        self.assertEqual(sorted(list(new_results.keys())),
                         sorted(["manifest2", "manifest"]))
        for manifest in ["manifest2", "manifest"]:
            for field in [
                    "wf_runtime_cdf", "wf_runtime_stats", "wf_waittime_cdf",
                    "wf_waittime_stats", "wf_turnaround_cdf",
                    "wf_turnaround_stats", "wf_stretch_factor_cdf",
                    "wf_stretch_factor_stats", "wf_jobs_runtime_cdf",
                    "wf_jobs_runtime_stats", "wf_jobs_cores_cdf",
                    "wf_jobs_cores_stats"
            ]:
                field = "m_" + manifest + "_" + field
                assertEqualResult(self, old_results[manifest][field],
                                  new_results[manifest][field], field)
Example #3
0
 def del_results(self, db_obj):
     """Deletes all analysis results associated with this experiment"""
     field="trace_id"
     value=self._trace_id
     db_obj.delete_rows(Histogram()._table_name, field, value)
     db_obj.delete_rows(ResultTrace()._get_utilization_result()._table_name,
                         field, value)
     db_obj.delete_rows(NumericStats()._table_name, field, value)
Example #4
0
    def test_load_job_results_grouped_core_seconds(self):
        db_obj = self._db
        hist = Histogram()
        stat = NumericStats()
        self.addCleanup(self._del_table, "histograms")
        self.addCleanup(self._del_table, "numericStats")
        hist.create_table(db_obj)
        stat.create_table(db_obj)

        rt = ResultTrace()
        pbs_list = {
            "account": ["account1", "account2"],
            "cores_per_node": [24, 24, 24],
            "numnodes": [1, 1, 1],
            "wallclock_requested": [120, 368, 400],
            "class": ["queue1", "queue2", "queue3"],
            "created": [1000, 2000, 3000],
            "start": [1100, 2200, 3300],
            "completion": [1500, 2700, 4000],
            "jobname": ["name1", "name2", "name3"]
        }
        rt._lists_submit = rt._transform_pbs_to_slurm(pbs_list)

        rt.calculate_job_results_grouped_core_seconds([0, 24 * 450, 24 * 550],
                                                      True, db_obj, 1)

        db_obj = self._db
        new_rt = ResultTrace()
        new_rt.load_job_results_grouped_core_seconds([0, 24 * 450, 24 * 550],
                                                     db_obj, 1)

        fields = [
            "jobs_runtime_cdf", "jobs_runtime_stats", "jobs_waittime_cdf",
            "jobs_waittime_stats", "jobs_turnaround_cdf",
            "jobs_turnaround_stats", "jobs_requested_wc_cdf",
            "jobs_requested_wc_stats", "jobs_cpus_alloc_cdf",
            "jobs_cpus_alloc_stats", "jobs_slowdown_cdf", "jobs_slowdown_stats"
        ]
        new_fields = []
        for edge in [0, 24 * 450, 24 * 550]:
            for field in fields:
                new_fields.append("g" + str(edge) + "_" + field)

        for field in new_fields:
            self.assertNotEqual(new_rt.jobs_results[field], None)
Example #5
0
    def test_store_load(self):
        db_obj = self._db
        hist = Histogram()
        stat = NumericStats()
        self.addCleanup(self._del_table, "histograms")
        self.addCleanup(self._del_table, "numericStats")
        hist.create_table(db_obj)
        stat.create_table(db_obj)

        job_list_1 = {
            "job_name": [
                "wf_manifest-2_S0", "wf_manifest-2_S1_dS0", "wf_manifest-3_S0",
                "wf_manifest-3_S1_dS0"
            ],
            "id_job": [0, 1, 2, 3],
            "time_submit": [100, 100, 1100, 1100],
            "time_start": [110, 215, 1200, 1400],
            "time_end": [200, 250, 1300, 1500]
        }

        job_list_2 = {
            "job_name": ["wf_manifest-2_S0", "wf_manifest-3_S0"],
            "id_job": [0, 1],
            "time_submit": [100, 1100],
            "time_start": [110, 1200],
            "time_end": [615, 2000]
        }

        wf_d = WorkflowDeltas()
        wf_d._first_trace = ResultTrace()
        wf_d._second_trace = ResultTrace()

        wf_d._first_trace._lists_submit = job_list_1
        wf_d._second_trace._lists_submit = job_list_2
        wf_d._first_workflows = wf_d._first_trace.do_workflow_pre_processing()
        wf_d._second_workflows = wf_d._second_trace.do_workflow_pre_processing(
        )
        wf_d.produce_deltas()
        results_1 = wf_d.calculate_delta_results(True, db_obj, 1)

        wf_d_2 = WorkflowDeltas()
        results_2 = wf_d_2.load_delta_results(db_obj, 1)

        for field in list(results_1.keys()):
            assertEqualResult(self, results_1[field], results_2[field], field)
Example #6
0
	def __init__(self, propStdDev, min, binWidth, values):
		self.targetDistr = Histogram.createInitialized(min, binWidth, values)
		self.propsalDistr = GaussianRejectSampler(0, propStdDev)
		self.proposalMixture = False
		
		# bootstrap sample
		(min, max) = self.targetDistr.getMinMax()
		self.curSample = random.randint(min, max)
		self.curDistr = self.targetDistr.value(self.curSample)
		self.transCount = 0
Example #7
0
	def __init__(self, propStdDev, min, binWidth, values):
		self.targetDistr = Histogram.createInitialized(min, binWidth, values)
		self.propsalDistr = GaussianRejectSampler(0, propStdDev)
		self.proposalMixture = False
		
		# bootstrap sample
		(min, max) = self.targetDistr.getMinMax()
		self.curSample = random.randint(min, max)
		self.curDistr = self.targetDistr.value(self.curSample)
		self.transCount = 0
Example #8
0
    def test(self, examples, print_level=1):
        """Computes the "area under the ROC curve". This is a way to measure the
        precision/recall WITHOUT choosing a cutoff-threshold.  It is mathematically
        equivalent to:
           "the probability that a random positive example has a higher
            prob_output1 than a random negative case"
        (This equivalence is non-obvious).

        The algorithm below computes this average probability by effectively trying
        all combinations of positive-vs-negative examples, but does this in O(NlgN)
        instead of O(N^2)"""
        if type(examples) is TrainingExamples:
            examples = examples.examples

        prob_stats = SummaryStats()
        prob_hist = Histogram()
        output1_scores = list()
        output0_scores = list()
        for example in examples:
            assert example["_OUTPUT"] in [0, 1]
            prob = self.prob_output1(example)
            prob_stats.add(prob)
            prob_key = "%1.1f-%1.1f" % (int(prob * 10) / 10.0, (int(prob * 10) + 1) / 10.0)
            if prob == 1:
                prob_key = "0.9-1.0"  # don't create a 1.0-1.1 bucket
            prob_hist.add(prob_key)
            real_output = example["_OUTPUT"] == 1
            if real_output:
                output1_scores.append(prob)
            else:
                output0_scores.append(prob)

        output1_scores.sort()
        output0_scores.sort()

        if print_level >= 2:
            print "%d output1 scores:" % len(output1_scores),
            print ["%2.2f" % i for i in output1_scores[0:5]],
            print " ... ",
Example #9
0
    def test_load_job_results(self):
        db_obj = self._db
        hist = Histogram()
        stat = NumericStats()
        self.addCleanup(self._del_table, "histograms")
        self.addCleanup(self._del_table, "numericStats")
        hist.create_table(db_obj)
        stat.create_table(db_obj)

        rt = ResultTrace()
        pbs_list = {
            "account": ["account1", "account2"],
            "cores_per_node": [24, 48],
            "numnodes": [100, 200],
            "class": ["queue1", "queue2"],
            "wallclock_requested": [120, 368],
            "created": [1000, 2000],
            "start": [1100, 2200],
            "completion": [1500, 2700],
            "jobname": ["name1", "name2"]
        }
        rt._lists_submit = rt._transform_pbs_to_slurm(pbs_list)

        rt.calculate_job_results(True, db_obj, 1)

        db_obj = self._db
        new_rt = ResultTrace()
        new_rt.load_job_results(db_obj, 1)

        for field in [
                "jobs_runtime_cdf", "jobs_runtime_stats", "jobs_waittime_cdf",
                "jobs_waittime_stats", "jobs_turnaround_cdf",
                "jobs_turnaround_stats", "jobs_requested_wc_cdf",
                "jobs_requested_wc_stats", "jobs_cpus_alloc_cdf",
                "jobs_cpus_alloc_stats", "jobs_slowdown_cdf",
                "jobs_slowdown_stats"
        ]:
            self.assertNotEqual(rt.jobs_results[field], None)
Example #10
0
"""
Creates the SQL schema for the workload databases.
 
Env vars:
- ANALYSIS_DB_HOST: hostname of the system hosting the database.
- ANALYSIS_DB_NAME: database name to read from.
- ANALYSIS_DB_USER: user to be used to access the database.
- ANALYSIS_DB_PASS: password to be used to used to access the database.
- ANALYSIS_DB_PORT: port on which the database runs. 
"""

from orchestration import get_central_db

from orchestration.definition import ExperimentDefinition
from stats.trace import ResultTrace
from stats import Histogram, NumericStats

db_obj = get_central_db()

ExperimentDefinition().create_table(db_obj)
ResultTrace().create_trace_table(db_obj, ResultTrace()._table_name)
Histogram().create_table(db_obj)
ResultTrace()._get_utilization_result().create_table(db_obj)

NumericStats().create_table(db_obj)
    def setUp(self):
        self._db = DB(os.getenv("TEST_DB_HOST", "127.0.0.1"),
                      os.getenv("TEST_DB_NAME", "test"),
                      os.getenv("TEST_DB_USER", "root"),
                      os.getenv("TEST_DB_PASS", ""))
        ht = Histogram()
        ht.create_table(self._db)
        self.addCleanup(self._del_table, ht._table_name)

        ns = NumericStats()
        ns.create_table(self._db)
        self.addCleanup(self._del_table, ns._table_name)

        us = NumericList("usage_values", ["utilization", "waste"])
        us.create_table(self._db)
        self.addCleanup(self._del_table, "usage_values")

        rt = ResultTrace()
        self.addCleanup(self._del_table, "import_table")
        rt.create_import_table(self._db, "import_table")

        self.addCleanup(self._del_table, "traces")
        rt.create_trace_table(self._db, "traces")

        rt = ResultTrace()
        rt._lists_submit = {
            "job_db_inx": [1, 2],
            "account": ["account1", "account2"],
            "cpus_req": [48, 96],
            "cpus_alloc": [48, 96],
            "job_name": ["jobName1", "jbname2"],
            "id_job": [1, 2],
            "id_qos": [2, 3],
            "id_resv": [3, 4],
            "id_user": [4, 5],
            "nodes_alloc": [2, 4],
            "partition": ["partition1", "partition2"],
            "priority": [99, 199],
            "state": [3, 2],
            "timelimit": [100, 200],
            "time_submit": [3000, 3001],
            "time_start": [3002, 3001],
            "time_end": [3002, 3005]
        }
        rt._lists_start = {
            "job_db_inx": [2, 1],
            "account": ["account2", "account1"],
            "cpus_req": [96, 48],
            "cpus_alloc": [96, 48],
            "job_name": ["jobName2", "jobName1"],
            "id_job": [2, 1],
            "id_qos": [3, 2],
            "id_resv": [4, 3],
            "id_user": [5, 4],
            "nodes_alloc": [4, 2],
            "partition": ["partition2", "partition1"],
            "priority": [199, 99],
            "state": [2, 3],
            "timelimit": [200, 100],
            "time_submit": [3003, 3000],
            "time_start": [3001, 3002],
            "time_end": [3005, 3002]
        }
        rt.store_trace(self._db, 1)
        self._rt = rt
Example #12
0
    def test_calculate(self):
        hist = Histogram()

        hist.calculate([1, 2, 3, 3, 5], 1)
        bins, edges = hist.get_data()
        self.assertEqual(edges, [1, 2, 3, 4, 5, 6])
        self.assertEqual(list(bins), [0.2, 0.2, 0.4, 0, 0.2])

        hist.calculate([1, 2, 3, 3, 5], 1, minmax=(1, 3))
        self.assertEqual(hist._get("edges"), [1, 2, 3, 4])
        self.assertEqual(list(hist._get("bins")), [0.25, 0.25, 0.5])

        hist.calculate([1, 2, 3, 3, 5], 1, minmax=(1, 3), input_bins=[1, 6])
        self.assertEqual(hist._get("edges"), [1, 6])
        self.assertEqual(list(hist._get("bins")), [1.0])
Example #13
0
    def test_save_load(self):
        hist = Histogram()
        self.addCleanup(self._del_table, "histograms")
        hist.create_table(self._db)

        hist.calculate([1, 2, 3, 3, 5], 1)

        data_id = hist.store(self._db, 1, "MyHist")
        hist = None
        hist_new = Histogram()
        hist_new.load(self._db, 1, "MyHist")
        self.assertEqual(hist_new._get("edges"), [1, 2, 3, 4, 5, 6])
        self.assertEqual(list(hist_new._get("bins")), [0.2, 0.2, 0.4, 0, 0.2])