Python ModelJobの例、kronos_modeller.jobs.ModelJob Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_jobs.py プロジェクト: ecmwf/kronos

    def test_is_valid(self):
        """
        There are some things that are required. Check these things here!
        """
        job = ModelJob()
        self.assertFalse(job.is_valid())

        # If all of the required arguments are supplied, this should result in a valid job
        ts_complete_set = {
            tsk: TimeSignal.from_values(tsk, [0., 0.1], [1., 999.])
            for tsk in time_signal_names
        }

        valid_args = {
            'time_start': 0,
            'duration': 0.2,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': ts_complete_set
        }

        self.assertTrue(ModelJob(**valid_args).is_valid())

        # If any of the supplied arguments are missing, this should invalidate things
        for k in valid_args.keys():
            invalid_args = valid_args.copy()
            del invalid_args[k]
            self.assertTrue(ModelJob(**valid_args).is_valid())

コード例 #2

0

ファイルを表示

ファイル: test_jobs.py プロジェクト: ecmwf/kronos

    def test_merge(self):
        """
        Can we merge multiple ModelJobs.

        n.b. Currently this only supports time signals!
        TODO: Merge the non-time-signal data.
        """
        # n.b. non-zero values. Zero time signals are ignored.
        kb_read1 = TimeSignal.from_values('kb_read', [0.0], [1.0], priority=8)
        kb_read2 = TimeSignal.from_values('kb_read', [0.0], [1.0], priority=10)
        kb_write1 = TimeSignal.from_values('kb_write', [0.0], [1.0],
                                           priority=8)

        # Test that we take the union of the available time series
        job1 = ModelJob(label="label1", timesignals={'kb_read': kb_read1})
        job2 = ModelJob(label="label1", timesignals={'kb_write': kb_write1})
        job1.merge(job2)

        self.assertEqual(len(job1.timesignals), len(signal_types))
        self.assertEqual(job1.timesignals['kb_read'], kb_read1)
        self.assertEqual(job1.timesignals['kb_write'], kb_write1)

        # (The other time signals should still be None)
        for ts_name in signal_types:
            if ts_name in ['kb_read', 'kb_write']:
                continue
            self.assertIn(ts_name, job1.timesignals)
            self.assertIsNone(job1.timesignals[ts_name])

        # check that when merging we take the signal with highest priority index
        job1 = ModelJob(label="label1", timesignals={'kb_read': kb_read1})
        job2 = ModelJob(label="label1", timesignals={'kb_read': kb_read2})
        job1.merge(job2)
        self.assertEqual(job1.timesignals['kb_read'], kb_read2)

コード例 #3

0

ファイルを表示

ファイル: test_jobs.py プロジェクト: ecmwf/kronos

    def test_merge_fails_different_label(self):
        """
        We should not be able to merge two jobs with differing labels, as these don't correspond to the same overall job
        """
        job1 = ModelJob(label="a-label-1")
        job2 = ModelJob(label="a-label-2")

        self.assertRaises(AssertionError, lambda: job1.merge(job2))

コード例 #4

0

ファイルを表示

    def model_job(self):
        """
        Return a ModelJob from the supplied information
        """
        assert len(self.tasks) > 0

        # Combine the task metadata. We take the first observed task time as the start time, and the last observed
        # time as the end time.
        time_start = self.time_start
        time_end = max([t.time_end for t in self.tasks])
        duration = time_end - time_start

        # The number of tasks and hosts may also vary, as there may be multiple aprun/mpirun/mpiexec calls profiled
        ntasks = max([t.ntasks for t in self.tasks])
        nhosts = max([t.nhosts for t in self.tasks])

        # TODO: We want to capture multi-threading as well as multi-processing somewhere3
        return ModelJob(
            job_name=self.jobname,
            user_name=self.user,
            queue_name=self.queue_type,
            time_queued=self.time_queued,
            time_start=self.time_start,
            duration=duration,
            ncpus=ntasks,
            nnodes=nhosts,
            stdout=self.stdout,
            label=self.label,
            timesignals=self.model_time_series(),
        )

コード例 #5

0

ファイルを表示

ファイル: scheduler_reader.py プロジェクト: ecmwf/kronos

    def model_jobs(self):
        for job in self.joblist:
            assert isinstance(job, IngestedJob)
            assert not job.timesignals

            # yield ModelJob(
            #     time_start=job.time_start - self.global_start_time,
            #     duration=job.time_end-job.time_start,
            #     ncpus=job.ncpus,
            #     nnodes=job.nnodes,
            #     scheduler_timing=job.time_queued,
            #     stdout=job.stdout
            # )

            yield ModelJob(
                job_name=job.jobname,
                user_name=job.user,
                cmd_str=job.cmd_str,
                queue_name=job.queue_type,
                time_queued=job.time_queued,
                time_start=job.time_start,
                duration=job.time_end - job.time_start,
                ncpus=job.ncpus,
                nnodes=job.nnodes,
                scheduler_timing=job.time_queued,
                stdout=job.stdout,
                label=None,
            )

コード例 #6

0

ファイルを表示

ファイル: test_jobs.py プロジェクト: ecmwf/kronos

    def test_merge_ignores_empty_timesignals(self):
        """
        When merging in time signals from another job, if there is no data in the "other" time signal, then it
        should be ignored for merging purposes.
        :return:
        """
        kb_read = TimeSignal.from_values('kb_read', [0.0], [1.0])
        kb_write = TimeSignal.from_values('kb_write', [0.0],
                                          [0.0])  # n.b. zero data

        job1 = ModelJob(label="label1", timesignals={'kb_read': kb_read})
        job2 = ModelJob(label="label1", timesignals={'kb_write': kb_write})

        self.assertIsNone(job1.timesignals['kb_write'])
        self.assertIsNotNone(job2.timesignals['kb_write'])
        job1.merge(job2)
        self.assertIsNone(job1.timesignals['kb_write'])

コード例 #7

0

ファイルを表示

ファイル: test_jobs.py プロジェクト: ecmwf/kronos

 def test_reject_mislabelled_time_signals(self):
     """
     The initialisation routine should reject invalid time signals in a model job.
     """
     self.assertRaises(
         ModellingError, lambda: ModelJob(timesignals={
             'kb_write':
             TimeSignal.from_values('kb_read', [0.0], [0.0]),
         }))

コード例 #8

0

ファイルを表示

ファイル: test_jobs.py プロジェクト: ecmwf/kronos

    def test_reanimation_kprofile(self):
        """
        The purpose of the KProfile is to be able to (re-)animate ModelJobs from the input data.
        """
        valid = {
            "version":
            1,
            "tag":
            "KRONOS-KPROFILE-MAGIC",
            "created":
            "2016-12-14T09:57:35Z",  # Timestamp in strict rfc3339 format.
            "uid":
            1234,
            "workload_tag":
            "A-tag",
            "profiled_jobs": [{
                "time_start": 537700,
                "time_queued": 99,
                "duration": 147,
                "ncpus": 72,
                "nnodes": 2,
                "time_series": {
                    "kb_read": {
                        "times": [0.01, 0.02, 0.03, 0.04],
                        "values": [15, 16, 17, 18],
                        "priority": 10
                    }
                }
            }]
        }

        pf = ProfileFormat.from_file(StringIO(json.dumps(valid)))

        jobs = [ModelJob.from_json(j) for j in pf.profiled_jobs]
        self.assertEqual(len(jobs), 1)
        self.assertIsInstance(jobs[0], ModelJob)

        self.assertEqual(jobs[0].time_start, 537700)
        self.assertEqual(jobs[0].time_queued, 99)
        self.assertEqual(jobs[0].duration, 147)
        self.assertEqual(jobs[0].ncpus, 72)
        self.assertEqual(jobs[0].nnodes, 2)

        self.assertEqual(len(jobs[0].timesignals), len(signal_types))
        self.assertIn('kb_read', jobs[0].timesignals)
        for name, signal in jobs[0].timesignals.items():
            if name == 'kb_read':
                self.assertIsInstance(signal, TimeSignal)
                self.assertTrue(
                    all(x1 == x2 for x1, x2 in zip(signal.xvalues,
                                                   [0.01, 0.02, 0.03, 0.04])))
                self.assertTrue(
                    all(y1 == y2
                        for y1, y2 in zip(signal.yvalues, [15, 16, 17, 18])))
            else:
                self.assertIsNone(signal)

コード例 #9

0

ファイルを表示

ファイル: test_jobs.py プロジェクト: ecmwf/kronos

    def test_initialisation(self):

        # Test some defaults
        job = ModelJob()

        for attr in ['time_start', 'ncpus', 'nnodes', 'duration', 'label']:
            self.assertTrue(hasattr(job, attr))
            self.assertIsNone(getattr(job, attr))

        for ts_name in signal_types:
            self.assertIn(ts_name, job.timesignals)
            self.assertIsNone(job.timesignals[ts_name])

        # Test that we can override specified fields
        job = ModelJob(timesignals={
            'kb_read':
            TimeSignal.from_values('kb_read', [0.0], [0.0]),
            'kb_write':
            TimeSignal.from_values('kb_write', [0.0], [0.0]),
        },
                       time_start=123,
                       ncpus=4,
                       nnodes=5,
                       duration=678,
                       label="a-label")

        self.assertEqual(job.time_start, 123)
        self.assertEqual(job.ncpus, 4)
        self.assertEqual(job.nnodes, 5)
        self.assertEqual(job.duration, 678)
        self.assertEqual(job.label, "a-label")
        self.assertIsInstance(job.timesignals['kb_read'], TimeSignal)
        self.assertIsInstance(job.timesignals['kb_write'], TimeSignal)

        for ts_name in signal_types:
            if ts_name in ['kb_read', 'kb_write']:
                continue
            self.assertIn(ts_name, job.timesignals)
            self.assertIsNone(job.timesignals[ts_name])

        # Test that we cannot override non-specific fields
        self.assertRaises(ModellingError, lambda: ModelJob(invalid=123))

コード例 #10

0

ファイルを表示

ファイル: user_generated_jobs.py プロジェクト: ecmwf/kronos

    def model_job(self):
        """
        Return a model job from this job
        :return:
        """

        return ModelJob(time_start=0,
                        duration=self.max_duration(),
                        ncpus=self.n_procs,
                        nnodes=self.n_nodes,
                        timesignals=self.timesignals,
                        label=self.name)

コード例 #11

0

ファイルを表示

    def model_jobs(self):
        for job in self.joblist:
            assert isinstance(job, IngestedJob)

            yield ModelJob(
                job_name=job.jobname,
                user_name=job.user,
                queue_name=job.queue_type,
                cmd_str=job.cmd_str,
                time_queued=job.time_queued,
                time_start=job.time_start,
                duration=job.time_end - job.time_start,
                ncpus=job.ncpus,
                nnodes=job.nnodes,
                stdout=job.stdout,
                label=self.json_label_map[os.path.basename(job.jobname)]
                if self.json_label_map else None,
                timesignals=job.timesignals,
            )

コード例 #12

0

ファイルを表示

ファイル: darshan.py プロジェクト: ecmwf/kronos

    def model_job(self):
        """
        Return a ModelJob from the supplied information
        """
        if float(self.log_version) <= 2.0:
            raise DarshanLogReaderError("Darshan log version unsupported")

        return ModelJob(
            job_name=self.filename,
            user_name=None,  # not provided
            cmd_str=self.exe_cmd,
            queue_name=None,  # not provided
            time_queued=None,  # not provided
            time_start=self.time_start,
            duration=self.time_end - self.time_start,
            ncpus=self.nprocs,
            nnodes=None,  # not provided
            stdout=None,  # not provided
            label=self.label,
            timesignals=self.model_time_series(),
        )

コード例 #13

0

ファイルを表示

ファイル: scheduler_reader.py プロジェクト: ecmwf/kronos

    def model_jobs(self):
        for job in self.joblist:
            # assert isinstance(job, IngestedJob)
            assert not job.timesignals

            # if job.time_created >= 0:
            #     submit_time = job.time_created - self.global_created_time
            # else:
            #     submit_time = job.time_start - self.global_start_time

            yield ModelJob(
                job_name=job.jobname,
                user_name=job.user,
                cmd_str=job.cmd_str,
                queue_name=job.queue_type,
                time_queued=job.time_queued,
                time_start=job.time_start,
                duration=job.time_end - job.time_start,
                ncpus=job.ncpus,
                nnodes=job.nnodes,
                stdout=job.stdout,
                label=None,
            )

コード例 #14

0

ファイルを表示

ファイル: test_filling.py プロジェクト: ecmwf/kronos

    def test_workload_fillin_default(self):
        """
        Test the correct assignment of user-defined time-series
        :return:
        """

        io_metrics = ['kb_read', 'kb_write', 'n_read', 'n_write']

        # create 2 random jobs (with ONLY io metrics)
        valid_args_1 = {
            'time_start': 0.1,
            'duration': 0.2,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': {
                tsk: TimeSignal.from_values(tsk, np.random.rand(10),
                                            np.random.rand(10))
                for tsk in io_metrics
            }
        }
        job1 = ModelJob(**valid_args_1)

        valid_args_2 = {
            'time_start': 0.1,
            'duration': 0.2,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': {
                tsk: TimeSignal.from_values(tsk, np.random.rand(10),
                                            np.random.rand(10))
                for tsk in io_metrics
            }
        }
        job2 = ModelJob(**valid_args_2)

        test_workload = Workload(jobs=[job1, job2], tag='wl_2jobs')

        # ---------------------- fill in config -----------------------
        filling_funct_config = [{
            "type": "step",
            "name": "step-1",
            "x_step": 0.5
        }, {
            "type":
            "custom",
            "name":
            "custom-1",
            "x_values": [0, 0.1, 0.15, 0.3333, 0.5, 0.8, 0.9, 1.0],
            "y_values": [0, 0.1, 0.2, 0.3, 0.5, 0.8, 0.9, 1.0]
        }]

        # Values to assign to all the unspecified metrics
        default_config = {
            "type": "fill_missing_entries",
            "apply_to": ["wl_2jobs"],
            "priority": 0,
            "metrics": {
                "kb_collective": [100, 101],
                "n_collective": [100, 101],
                "kb_pairwise": {
                    "function": "step-1",
                    "scaling": 1000.0
                },
                "n_pairwise": {
                    "function": "custom-1",
                    "scaling": 1000.0
                },
                "flops": [100, 101],
            }
        }

        # update the filling config with the user-defined functions
        default_config.update({"user_functions": filling_funct_config})

        # Apply the user defaults to the workloads
        workloads = [test_workload]
        filler = StrategyUserDefaults(workloads)
        filler.apply(default_config)

        # test that the IO metrics are within the random range used [0,1]
        for j in workloads[0].jobs:
            self.assertTrue(
                all([0.0 < x < 1.0 for x in j.timesignals['n_write'].xvalues]))
            self.assertTrue(
                all([0.0 < x < 1.0 for x in j.timesignals['n_write'].yvalues]))

            self.assertTrue(
                all([0.0 < x < 1.0
                     for x in j.timesignals['kb_write'].xvalues]))
            self.assertTrue(
                all([0.0 < x < 1.0
                     for x in j.timesignals['kb_write'].yvalues]))

            self.assertTrue(
                all([0.0 < x < 1.0 for x in j.timesignals['n_read'].xvalues]))
            self.assertTrue(
                all([0.0 < x < 1.0 for x in j.timesignals['n_read'].yvalues]))

            self.assertTrue(
                all([0.0 < x < 1.0 for x in j.timesignals['kb_read'].xvalues]))
            self.assertTrue(
                all([0.0 < x < 1.0 for x in j.timesignals['kb_read'].yvalues]))

        # test that the user-defined metrics are within the random range chosen [0,1]
        for j in workloads[0].jobs:
            self.assertTrue(
                all([100 < x < 101 for x in j.timesignals['flops'].yvalues]))
            self.assertTrue(
                all([
                    100 < x < 101
                    for x in j.timesignals['n_collective'].yvalues
                ]))
            self.assertTrue(
                all([
                    100 < x < 101
                    for x in j.timesignals['kb_collective'].yvalues
                ]))

        # test that the user-defined functions are being applied as expected

        for j in workloads[0].jobs:

            # values vs expected
            val_exp = zip(j.timesignals['n_pairwise'].yvalues,
                          [0, 0.1, 0.2, 0.3, 0.5, 0.8, 0.9, 1.0])
            self.assertTrue(all([x == y * 1000. for x, y in val_exp]))

            # and the step function
            self.assertTrue(
                all([(x == 0 or x == 1000.)
                     for x in j.timesignals['kb_pairwise'].yvalues]))

コード例 #15

0

ファイルを表示

ファイル: test_generator.py プロジェクト: ecmwf/kronos

    def test_generator(self):
        """
        The configuration object should have some sane defaults
        """

        # If all of the required arguments are supplied, this should result in a valid job
        ts_complete_set = {
            tsk: TimeSignal.from_values(tsk, [0., 0.1], [1., 999.])
            for tsk in time_signal_names
        }

        valid_args = {
            'time_start': 0.1,
            'duration': 0.2,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': ts_complete_set
        }

        ts_complete_set_2 = {
            tsk: TimeSignal.from_values(tsk, [0., 0.1], [1., 444.])
            for tsk in time_signal_names
        }

        valid_args_2 = {
            'time_start': 0.1,
            'duration': 0.2,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': ts_complete_set_2
        }

        # check that it is a valid job
        job1 = ModelJob(**valid_args)
        job1.label = "job1"

        job2 = ModelJob(**valid_args_2)
        job2.label = "job2"

        job3 = ModelJob(**valid_args)
        job3.label = "job3"

        job4 = ModelJob(**valid_args_2)
        job4.label = "job4"

        job5 = ModelJob(**valid_args)
        job5.label = "job5"

        input_jobs = [job1, job2, job3, job4, job5]

        # diversify the time start..
        for jj, job in enumerate(input_jobs):
            job.time_start += jj * 0.1

        for job in input_jobs:
            self.assertTrue(job.is_valid())

        config_generator = {
            "type": "cluster_and_spawn",
            "job_clustering": {
                "type": "Kmeans",
                "rseed": 0,
                "apply_to": ["test_wl_0"],
                "ok_if_low_rank": True,
                "max_iter": 100,
                "max_num_clusters": 3,
                "delta_num_clusters": 1,
                "num_timesignal_bins": 1,
                "user_does_not_check": True
            },
            "job_submission_strategy": {
                "type": "match_job_pdf_exact",
                "n_bins_for_pdf": 20,
                "submit_rate_factor": 8,
                "total_submit_interval": 60,
                "random_seed": 0
            }
        }

        # select the appropriate workload_filling strategy
        workloads = [
            Workload(jobs=input_jobs, tag='test_wl_0'),
            Workload(jobs=input_jobs, tag='test_wl_1'),
            Workload(jobs=input_jobs, tag='test_wl_2')
        ]

        workload_modeller = workload_modelling_types[config_generator["type"]](
            workloads)
        workload_modeller.apply(config_generator)

        # get the newly created set of (modelled) workloads
        workload_set = workload_modeller.get_workload_set()

        # make sure that we are creating only one workload
        self.assertEqual(len(workload_set.workloads), 1)

        # ---- check that all the jobs are correctly formed.. ----

        # check that each job has time-signals as expected..
        for job in workload_set.workloads[0].jobs:
            self.assertTrue(hasattr(job, "timesignals"))

        # check that each job has all the time-signals at this point..
        for job in workload_set.workloads[0].jobs:
            self.assertTrue(
                all([k in job.timesignals.keys() for k in time_signal_names]))

コード例 #16

0

ファイルを表示

    def generate_jobs(self):

        logger.info("====> Generating jobs from sub-workload: {}, "
                    "that has {} jobs".format(self.wl_clusters['source-workload'],
                                              len(self.wl_clusters['jobs_for_clustering'])))

        start_times_vec_sa, _, _ = self.schedule_strategy.create_schedule()

        np.random.seed(self.config["job_submission_strategy"]['random_seed'])

        n_modelled_jobs = len(start_times_vec_sa)
        n_clusters = self.wl_clusters['cluster_matrix'].shape[0]
        clustered_jobs_all = self.wl_clusters['jobs_for_clustering']
        clustered_jobs_labels = self.wl_clusters['labels']

        # this is the fraction of jobs that need to be taken from eah cluster..
        n_job_cluster = len(self.wl_clusters['jobs_for_clustering'])
        model_job_fraction = min(1.0, n_modelled_jobs/float(n_job_cluster))

        chosen_model_jobs = []
        vec_clust_indexes = np.asarray([], dtype=int)
        for ic in range(n_clusters):

            # jobs in this cluster
            jobs_in_cluster = np.asarray(clustered_jobs_all)[clustered_jobs_labels == ic]
            jobs_in_cluster_idxs = np.arange(len(jobs_in_cluster))
            n_jobs_to_generate_from_cluster = max(1, int(len(jobs_in_cluster)*model_job_fraction))

            if n_jobs_to_generate_from_cluster > len(jobs_in_cluster):
                n_jobs_to_generate_from_cluster = len(jobs_in_cluster)

            # take a random sample of jobs in cluster (according to job fraction)
            jobs_in_cluster_sampled_idx = np.random.choice(jobs_in_cluster_idxs,
                                                           n_jobs_to_generate_from_cluster,
                                                           replace=False)

            jobs_in_cluster_for_generation = []
            for idxj in jobs_in_cluster_sampled_idx:
                jobs_in_cluster_for_generation.append(jobs_in_cluster[idxj])

            # chosen_model_jobs.extend(jobs_in_cluster[jobs_in_cluster_sampled_idx])
            chosen_model_jobs.extend(jobs_in_cluster_for_generation)

            vec_clust_indexes = np.append(vec_clust_indexes,
                                          np.ones(n_jobs_to_generate_from_cluster, dtype=int)*ic)

        # generates model jobs as needed
        generated_model_jobs = []
        for cc, job in enumerate(chosen_model_jobs):

            job_copy = copy.deepcopy(job)

            # assign this job a start time (if more jobs are created,
            # the start time is chosen randomly within the start times..)
            if cc < len(start_times_vec_sa):
                start_time = start_times_vec_sa[cc]
            else:
                start_time = start_times_vec_sa[np.random.randint(0, len(start_times_vec_sa), 1)[0]]

            # choose a number of cpu for this job
            if not job.ncpus:
                logger.warning("job ID {} had not ncpu specified, "
                               "set to default ncpu=1 instead".format(cc))
                job_ncpus = 1
            else:
                job_ncpus = job.ncpus

            # choose a number of nodes for this job
            if not job.nnodes:
                job_nnodes = 1
            else:
                job_nnodes = job.nnodes

            # Spawn and append the model job
            job = ModelJob(
                time_start=start_time,
                duration=None,
                ncpus=job_ncpus,
                nnodes=job_nnodes,
                timesignals=job_copy.timesignals,
                label="job-{}".format(cc)
            )
            generated_model_jobs.append(job)

        n_cluster_jobs = len(self.wl_clusters['jobs_for_clustering'])
        n_job_ratio = len(generated_model_jobs)/float(n_cluster_jobs) * 100.

        logger.info("<==== Generated {} jobs (#job ratio = {:.2f}%)".format(
            len(generated_model_jobs), n_job_ratio))

        return generated_model_jobs, vec_clust_indexes.tolist()

コード例 #17

0

ファイルを表示

    def generate_jobs(self):

        logger.info("Generating jobs from cluster: {}, "
                    "that has {} jobs".format(
                        self.wl_clusters['source-workload'],
                        len(self.wl_clusters['jobs_for_clustering'])))

        start_times_vec_sa, _, _ = self.schedule_strategy.create_schedule()

        # Random vector of cluster indexes
        n_modelled_jobs = len(start_times_vec_sa)
        np.random.seed(self.config["job_submission_strategy"].get(
            'random_seed', 0))
        vec_clust_indexes = np.random.randint(
            self.wl_clusters['cluster_matrix'].shape[0], size=n_modelled_jobs)

        # Mean NCPU in cluster (considering jobs in cluster)
        jobs_all = self.wl_clusters['jobs_for_clustering']
        lab_all = np.asarray(self.wl_clusters['labels'])

        # jobs in each cluster
        jobs_in_each_cluster = {
            cl: np.asarray(jobs_all)[lab_all == cl]
            for cl in set(lab_all)
        }

        # mean #CPUS in each cluster (from jobs for which ncpus is available, otherwise 1)
        mean_cpus = {
            cl_id: np.mean([job.ncpus if job.ncpus else 1 for job in cl_jobs])
            for cl_id, cl_jobs in jobs_in_each_cluster.items()
        }

        # mean #NODES in each cluster (from jobs for which nnodes is available, otherwise 1)
        mean_nodes = {
            cl_id:
            np.mean([job.nnodes if job.nnodes else 1 for job in cl_jobs])
            for cl_id, cl_jobs in jobs_in_each_cluster.items()
        }

        # loop over the clusters and generates jobs as needed
        generated_model_jobs = []
        for cc, cl_idx in enumerate(vec_clust_indexes):

            ts_dict = {}
            row = self.wl_clusters['cluster_matrix'][cl_idx, :]
            ts_yvalues = np.split(row, len(time_signal_names))
            for tt, ts_vv in enumerate(ts_yvalues):
                ts_name = time_signal_names[tt]
                ts = TimeSignal(ts_name).from_values(ts_name,
                                                     np.arange(len(ts_vv)),
                                                     ts_vv)
                ts_dict[ts_name] = ts

            job = ModelJob(time_start=start_times_vec_sa[cc],
                           job_name="job-{}-cl-{}".format(cc, cl_idx),
                           duration=None,
                           ncpus=mean_cpus[cl_idx],
                           nnodes=mean_nodes[cl_idx],
                           timesignals=ts_dict,
                           label="job-{}-cl-{}".format(cc, cl_idx))
            generated_model_jobs.append(job)

        n_sa = len(generated_model_jobs)
        n_job_ratio = n_sa / float(len(
            self.wl_clusters['jobs_for_clustering'])) * 100.
        logger.info(
            "====> Generated {} jobs from cluster (#job ratio = {:.2f}%)".
            format(n_sa, n_job_ratio))

        return generated_model_jobs, vec_clust_indexes

コード例 #18

0

ファイルを表示

ファイル: test_filling.py プロジェクト: ecmwf/kronos

    def test_workload_fillin_match(self):
        """
        Test the metrics assignment through job name (label) matching
        :return:
        """

        # ------------ verify global time signals -------------------
        valid_args_1 = {
            'job_name': "blabla_weird_name",
            'time_start': 0.1,
            'duration': 0.222,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': {
                tsk: TimeSignal.from_values(tsk, np.random.rand(10),
                                            np.arange(10) * 2)
                for tsk in time_signal_names
            }
        }
        job1 = ModelJob(**valid_args_1)

        valid_args_2 = {
            'job_name': "job_match",
            'time_start': 0.1,
            'duration': 0.333,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': {}
        }
        job2 = ModelJob(**valid_args_2)

        # ------ target workload (that will receive the time metrics..)
        target_wl = Workload(jobs=[job1, job2], tag='target_workload')

        # ---------- source workload
        valid_args_3 = {
            'job_name': "job_match",
            'time_start': 0.1,
            'duration': 0.333,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': {
                tsk: TimeSignal.from_values(tsk, np.random.rand(10),
                                            np.random.rand(10))
                for tsk in time_signal_names
            }
        }

        job3 = ModelJob(**valid_args_3)
        source_wl = Workload(jobs=[job3], tag='wl_match_source')

        # filler config
        filler_config = {
            "type": "match_by_keyword",
            "priority": 0,
            "keywords": ["job_name"],
            "similarity_threshold": 0.3,
            "source_workloads": ["wl_match_source"],
            "apply_to": ["target_workload"]
        }

        # Apply the user defaults to the workloads
        workloads = [target_wl, source_wl]
        filler = StrategyMatchKeyword(workloads)
        filler.apply(filler_config)

        # for ts_k, ts_v in job3.timesignals.iteritems():
        #     print "JOB3:{}:{}".format(ts_k, ts_v.yvalues)
        #
        # for ts_k, ts_v in target_wl.jobs[1].timesignals.iteritems():
        #     print "TRG_J1:{}:{}".format(ts_k, ts_v.yvalues)

        self.assertTrue(
            all([
                all(ys == yt for ys, yt in zip(job3.timesignals[ts_k].yvalues,
                                               ts_v.yvalues))
                for ts_k, ts_v in target_wl.jobs[1].timesignals.items()
            ]))

コード例 #19

0

ファイルを表示

    def test_workload_data(self):

        # If all of the required arguments are supplied, this should result in a valid job
        ts_complete_set = {
            tsk: TimeSignal.from_values(tsk, [0., 0.1], [1., 999.])
            for tsk in time_signal_names
        }

        valid_args = {
            'time_start': 0.1,
            'duration': 0.2,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': ts_complete_set
        }

        # check that it is a valid job
        job1 = ModelJob(**valid_args)
        job2 = ModelJob(**valid_args)
        job3 = ModelJob(**valid_args)
        job4 = ModelJob(**valid_args)
        job5 = ModelJob(**valid_args)

        input_jobs = [job1, job2, job3, job4, job5]

        # diversify the time start..
        for jj, job in enumerate(input_jobs):
            job.time_start += jj * 0.1

        for job in input_jobs:
            self.assertTrue(job.is_valid())

        # create a workload with 5 model jobs
        test_workload = Workload(jobs=input_jobs, tag='test_wl')

        # -- verify that all the jobs in workload are actually the initial jobs provided --
        self.assertTrue(
            all(job is input_jobs[jj]
                for jj, job in enumerate(test_workload.jobs)))

        # ------------ verify sums of timesignals -------------------
        for ts_name in signal_types:
            ts_sum = 0
            for j in input_jobs:
                ts_sum += sum(j.timesignals[ts_name].yvalues)

            # verify the sums..
            self.assertEqual(ts_sum,
                             test_workload.total_metrics_sum_dict[ts_name])

        # ------------ verify global time signals -------------------
        valid_args_1 = {
            'time_start': 0.1,
            'duration': 0.222,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': {
                tsk: TimeSignal.from_values(tsk, np.random.rand(10),
                                            np.random.rand(10))
                for tsk in time_signal_names
            }
        }
        job1 = ModelJob(**valid_args_1)

        valid_args_2 = {
            'time_start': 0.1,
            'duration': 0.333,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': {
                tsk: TimeSignal.from_values(tsk, np.random.rand(10),
                                            np.random.rand(10))
                for tsk in time_signal_names
            }
        }
        job2 = ModelJob(**valid_args_2)

        test_workload = Workload(jobs=[job1, job2], tag='wl_2jobs')

        for job in [job1, job2]:
            for ts in signal_types:
                self.assertTrue(
                    all(v + job.time_start in
                        test_workload.total_metrics_timesignals[ts].xvalues
                        for v in job.timesignals[ts].xvalues))
                self.assertTrue(
                    all(v in
                        test_workload.total_metrics_timesignals[ts].yvalues
                        for v in job.timesignals[ts].yvalues))

コード例 #20

0

ファイルを表示

ファイル: test_jobs.py プロジェクト: ecmwf/kronos

    def test_merge_rejects_mislabelled_time_signals(self):
        """
        Test that the merging routine checks the labelling validity. Both ways around.
        :return:
        """
        kb_read = TimeSignal.from_values('kb_read', [0.0], [1.0])
        kb_write = TimeSignal.from_values('kb_read', [0.0],
                                          [1.0])  # n.b. mislabelled

        job1 = ModelJob(label="label1", timesignals={'kb_read': kb_read})
        job2 = ModelJob(label="label1")
        job2.timesignals['kb_write'] = kb_write

        self.assertRaises(ModellingError, lambda: job1.merge(job2))

        # And the other way around
        job2 = ModelJob(label="label1", timesignals={'kb_read': kb_read})
        job1 = ModelJob(label="label1")
        job1.timesignals['kb_write'] = kb_write

        self.assertRaises(ModellingError, lambda: job1.merge(job2))

コード例 #21

0

ファイルを表示

ファイル: test_split.py プロジェクト: ecmwf/kronos

    def test_splitter(self):

        # -------------- prepare a couple of dummy jobs ---------------

        # If all of the required arguments are supplied, this should result in a valid job
        ts_complete_set = {
            tsk: TimeSignal.from_values(tsk, [0., 0.1], [1., 999.])
            for tsk in time_signal_names
        }

        ts_complete_set_2 = {
            tsk: TimeSignal.from_values(tsk, [0., 0.1], [1., 444.])
            for tsk in time_signal_names
        }

        valid_args = {
            'time_start': 0.1,
            'duration': 0.2,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': ts_complete_set,
            'job_name': "job_name_1"
        }

        valid_args_2 = {
            'time_start': 0.2,
            'duration': 0.4,
            'ncpus': 2,
            'nnodes': 2,
            'timesignals': ts_complete_set_2,
            'job_name': "job_name_2"
        }

        # a model job that WILL NOT be picked by the algorithm..
        job1 = ModelJob(**valid_args)
        job1.label = "label_nottobepicked"

        # a model job that WILL be picked by the algorithm..
        job2 = ModelJob(**valid_args_2)
        job2.label = "label_includeme"

        # dummy workload with 20 jobs
        np.random.seed(0)
        jobs_all = []
        for i in range(20):

            # spawn a new job from either job1 or job2
            if np.random.rand() < 0.5:
                new_job = copy.deepcopy(job1)
            else:
                new_job = copy.deepcopy(job2)

            # assign it a new label
            jobs_all.append(new_job)

        # create a workload out of all the jobs..
        workload = Workload(jobs=jobs_all, tag="testing_workload")

        # configure the splitter from user config
        config_splitting = {
            "type": "split",
            "keywords_in": ["includeme"],
            "keywords_out": ["excludeme"],
            "split_by": "label",
            "apply_to": ["testing_workload"],
            "create_workload": "spawn_workload"
        }

        workloads = [workload]
        splitter = WorkloadSplit(workloads)
        splitter.apply(config_splitting)

        wl_out = None
        for wl in workloads:
            if wl.tag == config_splitting["create_workload"]:
                wl_out = wl
                break

        # make sure that we have created a workload as expected
        self.assertTrue(wl_out is not None)
        self.assertEqual(wl_out.tag, config_splitting["create_workload"])

        # make sure that all the jobs have a label consistent with the filter
        for j in wl_out.jobs:
            self.assertTrue("includeme" in j.label
                            and "excludeme" not in j.label)