예제 #1
0
파일: test_jobs.py 프로젝트: ecmwf/kronos
    def test_merge(self):
        """
        Can we merge multiple ModelJobs.

        n.b. Currently this only supports time signals!
        TODO: Merge the non-time-signal data.
        """
        # n.b. non-zero values. Zero time signals are ignored.
        kb_read1 = TimeSignal.from_values('kb_read', [0.0], [1.0], priority=8)
        kb_read2 = TimeSignal.from_values('kb_read', [0.0], [1.0], priority=10)
        kb_write1 = TimeSignal.from_values('kb_write', [0.0], [1.0],
                                           priority=8)

        # Test that we take the union of the available time series
        job1 = ModelJob(label="label1", timesignals={'kb_read': kb_read1})
        job2 = ModelJob(label="label1", timesignals={'kb_write': kb_write1})
        job1.merge(job2)

        self.assertEqual(len(job1.timesignals), len(signal_types))
        self.assertEqual(job1.timesignals['kb_read'], kb_read1)
        self.assertEqual(job1.timesignals['kb_write'], kb_write1)

        # (The other time signals should still be None)
        for ts_name in signal_types:
            if ts_name in ['kb_read', 'kb_write']:
                continue
            self.assertIn(ts_name, job1.timesignals)
            self.assertIsNone(job1.timesignals[ts_name])

        # check that when merging we take the signal with highest priority index
        job1 = ModelJob(label="label1", timesignals={'kb_read': kb_read1})
        job2 = ModelJob(label="label1", timesignals={'kb_read': kb_read2})
        job1.merge(job2)
        self.assertEqual(job1.timesignals['kb_read'], kb_read2)
예제 #2
0
파일: jobs.py 프로젝트: ecmwf/kronos
    def vector_to_ts(self, ts_vector, priority, idx_map=None, n_bins=None):
        """
        Apply a vector to the time-signals
        :param ts_vector:
        :param idx_map:
        :return:
        """

        # check that when the vector does not contain all the values, also the n_bins for each ts is provided
        if idx_map and not n_bins:
            raise ConfigurationError(
                "the RS results are mapped to N_columns < N_columns_tot => n_bins is needed!"
            )

        if not idx_map and n_bins:
            raise ConfigurationError(
                "n_bins need to be specified only for mapped cases")

        if self.time_start is None:
            raise ConfigurationError(
                "job start-time is needed to accept time-series")

        if self.duration is None:
            raise ConfigurationError(
                "job duration is needed to accept time-series")

        if not idx_map:

            # case in which all the elements are filled up
            split_values = np.split(ts_vector, len(time_signal_names))
            for tt, ts in enumerate(time_signal_names):
                y_values = split_values[tt]
                x_values = np.linspace(0.0, self.duration, len(y_values))

                if not self.timesignals[ts]:
                    self.timesignals[ts] = TimeSignal(ts).from_values(
                        ts, x_values, y_values, priority=priority)
                elif self.timesignals[ts].priority <= priority:
                    self.timesignals[ts] = TimeSignal(ts).from_values(
                        ts, x_values, y_values, priority=priority)

        else:

            # case for which only some columns are filled up (therefore the mapping)
            row = np.zeros(len(time_signal_names) * n_bins)
            for tt, ts in enumerate(ts_vector):
                row[idx_map[tt]] = ts

            # then re-call the same function without mapping
            self.vector_to_ts(row, priority)
예제 #3
0
    def from_random_proto_signals(job_name=None, ts_scales=None, ts_len=10):
        """
        Generate a job from randomly chosen signals (from signal prototypes..)
        :param job_name:
        :param ts_scales:
        :param ts_len:
        :return:
        """

        ts_scales = ts_scales if ts_scales else {}

        proto_signals = UserGeneratedJob.proto_signals(ts_len=ts_len)

        timesignals = {}

        # Add all the time-signals
        for ts_name in time_signal_names:

            xvalues = np.arange(ts_len)

            # toss a coin to decide which signal type to choose
            proto_signal_idx = np.random.randint(len(proto_signals))
            yvalues = np.asarray(proto_signals[proto_signal_idx])

            if ts_scales:
                yvalues = yvalues * ts_scales[ts_name]

            timesignals[ts_name] = TimeSignal.from_values(ts_name,
                                                          xvalues,
                                                          yvalues,
                                                          priority=10)

        return UserGeneratedJob(name=job_name,
                                timesignals=timesignals,
                                ts_scales=ts_scales)
예제 #4
0
파일: test_jobs.py 프로젝트: ecmwf/kronos
    def test_is_valid(self):
        """
        There are some things that are required. Check these things here!
        """
        job = ModelJob()
        self.assertFalse(job.is_valid())

        # If all of the required arguments are supplied, this should result in a valid job
        ts_complete_set = {
            tsk: TimeSignal.from_values(tsk, [0., 0.1], [1., 999.])
            for tsk in time_signal_names
        }

        valid_args = {
            'time_start': 0,
            'duration': 0.2,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': ts_complete_set
        }

        self.assertTrue(ModelJob(**valid_args).is_valid())

        # If any of the supplied arguments are missing, this should invalidate things
        for k in valid_args.keys():
            invalid_args = valid_args.copy()
            del invalid_args[k]
            self.assertTrue(ModelJob(**valid_args).is_valid())
예제 #5
0
파일: workload.py 프로젝트: ecmwf/kronos
    def total_metrics_timesignals(self):
        """
        Return a dictionary with the total time signal..
        :return:
        """

        # Concatenate all the available time series data for each of the jobs
        total_metrics = {}
        for signal_name, signal_details in signal_types.items():

            try:
                times_vec = np.concatenate([
                    job.timesignals[signal_name].xvalues + job.time_start
                    for job in self.jobs
                    if job.timesignals[signal_name] is not None
                ])

                data_vec = np.concatenate([
                    job.timesignals[signal_name].yvalues for job in self.jobs
                    if job.timesignals[signal_name] is not None
                ])

                ts = TimeSignal.from_values(signal_name,
                                            times_vec,
                                            data_vec,
                                            base_signal_name=signal_name)
                total_metrics[signal_name] = ts

            except ValueError:
                # logger.info( "======= No jobs found with time series for {}".format(signal_name))
                pass

        return total_metrics
예제 #6
0
파일: test_jobs.py 프로젝트: ecmwf/kronos
    def test_merge_ignores_empty_timesignals(self):
        """
        When merging in time signals from another job, if there is no data in the "other" time signal, then it
        should be ignored for merging purposes.
        :return:
        """
        kb_read = TimeSignal.from_values('kb_read', [0.0], [1.0])
        kb_write = TimeSignal.from_values('kb_write', [0.0],
                                          [0.0])  # n.b. zero data

        job1 = ModelJob(label="label1", timesignals={'kb_read': kb_read})
        job2 = ModelJob(label="label1", timesignals={'kb_write': kb_write})

        self.assertIsNone(job1.timesignals['kb_write'])
        self.assertIsNotNone(job2.timesignals['kb_write'])
        job1.merge(job2)
        self.assertIsNone(job1.timesignals['kb_write'])
예제 #7
0
파일: test_jobs.py 프로젝트: ecmwf/kronos
 def test_reject_mislabelled_time_signals(self):
     """
     The initialisation routine should reject invalid time signals in a model job.
     """
     self.assertRaises(
         ModellingError, lambda: ModelJob(timesignals={
             'kb_write':
             TimeSignal.from_values('kb_read', [0.0], [0.0]),
         }))
예제 #8
0
파일: test_jobs.py 프로젝트: ecmwf/kronos
    def test_initialisation(self):

        # Test some defaults
        job = ModelJob()

        for attr in ['time_start', 'ncpus', 'nnodes', 'duration', 'label']:
            self.assertTrue(hasattr(job, attr))
            self.assertIsNone(getattr(job, attr))

        for ts_name in signal_types:
            self.assertIn(ts_name, job.timesignals)
            self.assertIsNone(job.timesignals[ts_name])

        # Test that we can override specified fields
        job = ModelJob(timesignals={
            'kb_read':
            TimeSignal.from_values('kb_read', [0.0], [0.0]),
            'kb_write':
            TimeSignal.from_values('kb_write', [0.0], [0.0]),
        },
                       time_start=123,
                       ncpus=4,
                       nnodes=5,
                       duration=678,
                       label="a-label")

        self.assertEqual(job.time_start, 123)
        self.assertEqual(job.ncpus, 4)
        self.assertEqual(job.nnodes, 5)
        self.assertEqual(job.duration, 678)
        self.assertEqual(job.label, "a-label")
        self.assertIsInstance(job.timesignals['kb_read'], TimeSignal)
        self.assertIsInstance(job.timesignals['kb_write'], TimeSignal)

        for ts_name in signal_types:
            if ts_name in ['kb_read', 'kb_write']:
                continue
            self.assertIn(ts_name, job.timesignals)
            self.assertIsNone(job.timesignals[ts_name])

        # Test that we cannot override non-specific fields
        self.assertRaises(ModellingError, lambda: ModelJob(invalid=123))
예제 #9
0
파일: test_jobs.py 프로젝트: ecmwf/kronos
    def test_merge_rejects_mislabelled_time_signals(self):
        """
        Test that the merging routine checks the labelling validity. Both ways around.
        :return:
        """
        kb_read = TimeSignal.from_values('kb_read', [0.0], [1.0])
        kb_write = TimeSignal.from_values('kb_read', [0.0],
                                          [1.0])  # n.b. mislabelled

        job1 = ModelJob(label="label1", timesignals={'kb_read': kb_read})
        job2 = ModelJob(label="label1")
        job2.timesignals['kb_write'] = kb_write

        self.assertRaises(ModellingError, lambda: job1.merge(job2))

        # And the other way around
        job2 = ModelJob(label="label1", timesignals={'kb_read': kb_read})
        job1 = ModelJob(label="label1")
        job1.timesignals['kb_write'] = kb_write

        self.assertRaises(ModellingError, lambda: job1.merge(job2))
예제 #10
0
파일: jobs.py 프로젝트: ecmwf/kronos
 def from_json(js):
     """
     (Re)animate a ModelJob from json extracted from a KProfile (kronos.io_formats.profile_format.ProfileFormat)
     """
     return ModelJob(timesignals={
         n: TimeSignal.from_values(n,
                                   xvals=t['times'],
                                   yvals=t['values'],
                                   priority=t['priority'],
                                   base_signal_name=n)
         for n, t in js.get('time_series', {}).items()
     },
                     **{
                         k: v
                         for k, v in js.items()
                         if hasattr(ModelJob, k) and v is not None
                     })
예제 #11
0
def read_allinea_log(filename, jobs_n_bins=None, cfg=None):
    """ Collect info from Allinea logs """

    # The time signal map has a number of options for each element in the profile:
    #
    # 'name':     What is the name of this signal mapped into Kronos-land (i.e. mapping onto time_signal.signal_types)
    # 'is_rate':  True if the data is recorded as x-per-second rates, rather than accumulatable values.
    #             (default False)
    # 'per_task': Is the value presented per-task, or global. If per-task it needs to be multiplied up.
    #             (default False)

    logger.info(
        "NOTE: FLOPS not available for allinea Dataset: it will be estimated from %CPU and clock rate"
    )

    # check of the clock_rate is passed in the config
    if not cfg:
        logger.info(
            "WARNING: clock rate not provided! arbitrarily set to 2.5GHz")
        clock_rate = 2.5e9
    else:
        if cfg.get("clock_rate", None):
            clock_rate = cfg["clock_rate"]
        else:
            logger.info(
                "WARNING: clock rate not provided! arbitrarily set to 2.5GHz")
            clock_rate = 2.5e9

    # read the data of the json file..
    with open(filename) as json_file:
        json_data = json.load(json_file)

    # Detect the proper io_keys (lustre or not) as they have a different name in the MAP logs
    _samples = json_data['profile']['samples']

    if _samples.get("lustre_bytes_read") and _samples.get(
            "lustre_bytes_written"):

        io_key_write = "lustre_bytes_written"
        io_key_read = "lustre_bytes_read"

    elif _samples.get("bytes_read") and _samples.get("bytes_written"):

        io_key_write = "bytes_written"
        io_key_read = "bytes_read"

    else:
        print(
            "The allinea map file does not seem to contain IO traces: i.e. [lustre_]bytes_[written|read]"
        )
        sys.exit(1)

    allinea_time_signal_map = {
        'instr_fp': {
            'name': 'flops',
            'scale_factor': clock_rate,
            'is_time_percent': True
        },
        io_key_read: {
            'name': 'kb_read',
            'is_rate': True,
            'scale_factor': 1. / 1024.
        },
        io_key_write: {
            'name': 'kb_write',
            'is_rate': True,
            'scale_factor': 1. / 1024.
        },
        'mpi_p2p': {
            'name': 'n_pairwise',
            'is_rate': True
        },
        'mpi_p2p_bytes': {
            'name': 'kb_pairwise',
            'is_rate': True,
            'scale_factor': 1. / 1024.
        },
        'mpi_collect': {
            'name': 'n_collective',
            'is_rate': True
        },
        'mpi_collect_bytes': {
            'name': 'kb_collective',
            'is_rate': True,
            'scale_factor': 1. / 1024.
        }
    }

    # A quick sanity check
    for value in allinea_time_signal_map.values():
        assert value['name'] in signal_types

    # # fill in the workload structure
    # i_job = IngestedJob()

    # time_start = json_data_stats['profile']['timestamp']
    # runtime = float(json_data_stats['profile']['runtime_ms']) / 1000.
    # time_start_epoch = (datetime.strptime(time_start, "%a %b %d %H:%M:%S %Y") -
    #                     datetime(1970, 1, 1)).total_seconds()

    # fill in the workload structure
    i_job = IngestedJob()

    time_start = json_data['profile']['timestamp']
    runtime = float(json_data['profile']['runtime_ms']) / 1000.

    try_formats = [
        "%a %b %d %H:%M:%S %Y", "%Y-%m-%dT%H:%M:%S+00", "%Y-%m-%dT%H:%M:%S"
    ]
    time_start_epoch = None
    for fmt in try_formats:
        try:
            time_start_epoch = datetime.strptime(time_start, fmt).timestamp()
            break
        except ValueError:
            continue
    if time_start_epoch is None:
        raise ValueError(f"cannot parse timestamp {time_start_epoch!r}")

    # this job might not necessarily been queued
    i_job.time_created = time_start_epoch - 3
    i_job.time_queued = time_start_epoch - 2
    i_job.time_eligible = time_start_epoch - 1
    i_job.time_start = time_start_epoch
    i_job.runtime = runtime

    i_job.time_end = time_start_epoch + runtime
    i_job.time_in_queue = i_job.time_start - i_job.time_queued

    # Threads are not considered for now..
    i_job.nnodes = int(json_data['profile']["nodes"])
    i_job.ncpus = int(json_data['profile']['targetProcs'])

    # average memory used is taken from sample average of "node_mem_percent"
    mem_val_bk = json_data['profile']['samples']['node_mem_percent']
    mem_val = [v[2] for v in mem_val_bk
               ]  # values inside the blocks are: min, max, mean, var
    mem_val_mean = sum(mem_val) / float(len(mem_val)) / 100.
    mem_node_kb = json_data['profile']["memory_per_node"][2] / 1024.
    i_job.memory_kb = mem_node_kb * mem_val_mean

    i_job.cpu_percent = 0

    i_job.jobname = os.path.basename(filename)
    i_job.user = "******"
    i_job.group = ""
    i_job.queue_type = None

    # # times relative to start of log
    # profiler jobs are considered as if they were started at T0
    # TODO: find more sensible solution to that..
    i_job.time_start_0 = 0.0

    # Obtain the timestamps for the (end of) each sampling window, converted into seconds.
    sample_times = np.array(json_data['profile']['sample_times']) / 1000.
    sample_interval = json_data['profile']['sample_interval'] / 1000.

    for ts_name_allinea, ts_config in allinea_time_signal_map.items():

        scale_factor = ts_config.get('scale_factor', 1.0)

        # The Allinea time-series data is a sequence of tuples of the form: (min, max, mean, variance)
        # Extract the mean value for each sampling interval.
        y_vals = np.array([
            v[2] * scale_factor
            for v in json_data['profile']['samples'][ts_name_allinea]
        ])

        # If the data is recorded as a rate (a per-second value), then adjust it to record absolute data volumes
        # per time interval.
        if ts_config.get('is_rate', False):
            y_vals = np.array([
                v * (sample_times[i] - (sample_times[i - 1] if i > 0 else 0))
                for i, v in enumerate(y_vals)
            ])

        if ts_config.get('per_task', False):
            y_vals *= i_job.ncpus

        if ts_config.get('is_time_percent', False):
            y_vals *= sample_interval / 100.

        # special case: flops areestimated by (cpu_percent*fp_percent*FREQ*Dt/100)
        if ts_name_allinea == 'instr_fp':
            y_vals *= np.array([
                v[2] / 100.
                for v in json_data['profile']['samples']['cpu_time_percentage']
            ])

        ts = TimeSignal.from_values(
            ts_config['name'],
            sample_times,
            y_vals,
            priority=allinea_signal_priorities[ts_config['name']])
        # if jobs_n_bins is not None:
        #     ts.digitized(nbins=jobs_n_bins)
        i_job.append_time_signal(ts)

    return i_job
예제 #12
0
    def test_workload_data(self):

        # If all of the required arguments are supplied, this should result in a valid job
        ts_complete_set = {
            tsk: TimeSignal.from_values(tsk, [0., 0.1], [1., 999.])
            for tsk in time_signal_names
        }

        valid_args = {
            'time_start': 0.1,
            'duration': 0.2,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': ts_complete_set
        }

        # check that it is a valid job
        job1 = ModelJob(**valid_args)
        job2 = ModelJob(**valid_args)
        job3 = ModelJob(**valid_args)
        job4 = ModelJob(**valid_args)
        job5 = ModelJob(**valid_args)

        input_jobs = [job1, job2, job3, job4, job5]

        # diversify the time start..
        for jj, job in enumerate(input_jobs):
            job.time_start += jj * 0.1

        for job in input_jobs:
            self.assertTrue(job.is_valid())

        # create a workload with 5 model jobs
        test_workload = Workload(jobs=input_jobs, tag='test_wl')

        # -- verify that all the jobs in workload are actually the initial jobs provided --
        self.assertTrue(
            all(job is input_jobs[jj]
                for jj, job in enumerate(test_workload.jobs)))

        # ------------ verify sums of timesignals -------------------
        for ts_name in signal_types:
            ts_sum = 0
            for j in input_jobs:
                ts_sum += sum(j.timesignals[ts_name].yvalues)

            # verify the sums..
            self.assertEqual(ts_sum,
                             test_workload.total_metrics_sum_dict[ts_name])

        # ------------ verify global time signals -------------------
        valid_args_1 = {
            'time_start': 0.1,
            'duration': 0.222,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': {
                tsk: TimeSignal.from_values(tsk, np.random.rand(10),
                                            np.random.rand(10))
                for tsk in time_signal_names
            }
        }
        job1 = ModelJob(**valid_args_1)

        valid_args_2 = {
            'time_start': 0.1,
            'duration': 0.333,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': {
                tsk: TimeSignal.from_values(tsk, np.random.rand(10),
                                            np.random.rand(10))
                for tsk in time_signal_names
            }
        }
        job2 = ModelJob(**valid_args_2)

        test_workload = Workload(jobs=[job1, job2], tag='wl_2jobs')

        for job in [job1, job2]:
            for ts in signal_types:
                self.assertTrue(
                    all(v + job.time_start in
                        test_workload.total_metrics_timesignals[ts].xvalues
                        for v in job.timesignals[ts].xvalues))
                self.assertTrue(
                    all(v in
                        test_workload.total_metrics_timesignals[ts].yvalues
                        for v in job.timesignals[ts].yvalues))
예제 #13
0
    def model_time_series(self):

        total_mpi_pairwise_count_send = 0
        total_mpi_pairwise_bytes_send = 0
        total_mpi_pairwise_count_recv = 0
        total_mpi_pairwise_bytes_recv = 0

        total_mpi_collective_count = 0
        total_mpi_collective_bytes = 0

        total_read_count = 0
        total_write_count = 0
        total_bytes_read = 0
        total_bytes_written = 0

        for task in self.tasks:
            total_mpi_pairwise_count_send += task.mpi_pairwise_count_send
            total_mpi_pairwise_bytes_send += task.mpi_pairwise_bytes_send
            total_mpi_pairwise_count_recv += task.mpi_pairwise_count_recv
            total_mpi_pairwise_bytes_recv += task.mpi_pairwise_bytes_recv

            total_mpi_collective_count += task.mpi_collective_count
            total_mpi_collective_bytes += task.mpi_collective_bytes

            total_read_count += task.read_count
            total_write_count += task.write_count
            total_bytes_read += task.bytes_read
            total_bytes_written += task.bytes_written

        # divide the totals of MPI ops by the number of nprocs (if specified..)
        tasks_list = [t.ntasks for t in self.tasks]
        if tasks_list:
            ntasks = max([t.ntasks for t in self.tasks])
            total_mpi_collective_count = int(total_mpi_collective_count/float(ntasks))
            total_mpi_collective_bytes /= float(ntasks)
            total_mpi_pairwise_count_send = int(total_mpi_pairwise_count_send/float(ntasks))
            total_mpi_pairwise_bytes_send /= float(ntasks)

        # n.b. only using the pairwise send data. Recv should be largely a duplicate, but slightly smaller
        #      as MPI_Sendrecv is only being counted under send for now. If we used both send and recv data
        #      from _all_ tasks we would double count the transfers.

        return {
            'n_collective': TimeSignal.from_values('n_collective', [0.0], [total_mpi_collective_count],
                                                   priority=ipm_signal_priorities['n_collective']),

            'kb_collective': TimeSignal.from_values('kb_collective', [0.0],
                                                    [float(total_mpi_collective_bytes) / 1024.0],
                                                    priority=ipm_signal_priorities['kb_collective']),

            'n_pairwise': TimeSignal.from_values('n_pairwise', [0.0], [total_mpi_pairwise_count_send],
                                                 priority=ipm_signal_priorities['n_pairwise']),

            'kb_pairwise': TimeSignal.from_values('kb_pairwise', [0.0],
                                                  [float(total_mpi_pairwise_bytes_send) / 1024.0],
                                                  priority=ipm_signal_priorities['kb_pairwise']),

            'kb_read': TimeSignal.from_values('kb_read', [0.0], [float(total_bytes_read) / 1024.0],
                                              priority=ipm_signal_priorities['kb_read']),

            'kb_write': TimeSignal.from_values('kb_write', [0.0], [float(total_bytes_written) / 1024.0],
                                               priority=ipm_signal_priorities['kb_write']),

            'n_read': TimeSignal.from_values('n_read', [0.0], [float(total_read_count)],
                                             priority=ipm_signal_priorities['n_read']),

            'n_write': TimeSignal.from_values('n_write', [0.0], [float(total_write_count)],
                                              priority=ipm_signal_priorities['n_write'])
        }
예제 #14
0
파일: darshan.py 프로젝트: ecmwf/kronos
    def model_time_series(self):
        """
        We want to model the time series here.

        TODO: Actually introduce time dependence. For now, it only considers totals!
        """
        read_data = []
        read_counts = []
        write_data = []
        write_counts = []

        if self.time_end and self.time_start:
            duration = self.time_end - self.time_start
        else:
            duration = None

        for model_file in self.file_details.values():

            if duration:
                if model_file.read_time_start > duration:
                    model_file.read_time_start = duration - 1
                if model_file.read_time_end > duration:
                    model_file.read_time_end = duration

            if model_file.read_time_start is not None and (
                    model_file.read_count != 0 or model_file.bytes_read != 0):
                read_data.append(
                    (model_file.read_time_start,
                     model_file.bytes_read / 1024.0,
                     model_file.read_time_end - model_file.read_time_start))
                read_counts.append(
                    (model_file.read_time_start, model_file.read_count,
                     model_file.read_time_end - model_file.read_time_start))

            if model_file.write_time_start is not None and (
                    model_file.write_count != 0
                    or model_file.bytes_written != 0):
                write_data.append(
                    (model_file.write_time_start,
                     model_file.bytes_written / 1024.0,
                     model_file.write_time_end - model_file.write_time_start))
                write_counts.append(
                    (model_file.write_time_start, model_file.write_count,
                     model_file.write_time_end - model_file.write_time_start))

        times_read, read_data, read_durations = zip(
            *read_data) if read_data else (None, None, None)
        times_read2, read_counts, read_durations2 = zip(
            *read_counts) if read_counts else (None, None, None)
        times_write, write_data, write_durations = zip(
            *write_data) if write_data else (None, None, None)
        times_write2, write_counts, write_durations2 = zip(
            *write_counts) if write_counts else (None, None, None)

        time_series = {}
        if read_data:
            time_series['kb_read'] = TimeSignal.from_values(
                'kb_read',
                times_read,
                read_data,
                durations=read_durations,
                priority=darshan_signal_priorities['kb_read'])
        if write_data:
            time_series['kb_write'] = TimeSignal.from_values(
                'kb_write',
                times_write,
                write_data,
                durations=write_durations,
                priority=darshan_signal_priorities['kb_write'])
        if read_counts:
            time_series['n_read'] = TimeSignal.from_values(
                'n_read',
                times_read,
                read_counts,
                durations=read_durations,
                priority=darshan_signal_priorities['n_read'])
        if write_counts:
            time_series['n_write'] = TimeSignal.from_values(
                'n_write',
                times_write,
                write_counts,
                durations=write_durations,
                priority=darshan_signal_priorities['n_write'])

        return time_series
예제 #15
0
    def test_generator(self):
        """
        The configuration object should have some sane defaults
        """

        # If all of the required arguments are supplied, this should result in a valid job
        ts_complete_set = {
            tsk: TimeSignal.from_values(tsk, [0., 0.1], [1., 999.])
            for tsk in time_signal_names
        }

        valid_args = {
            'time_start': 0.1,
            'duration': 0.2,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': ts_complete_set
        }

        ts_complete_set_2 = {
            tsk: TimeSignal.from_values(tsk, [0., 0.1], [1., 444.])
            for tsk in time_signal_names
        }

        valid_args_2 = {
            'time_start': 0.1,
            'duration': 0.2,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': ts_complete_set_2
        }

        # check that it is a valid job
        job1 = ModelJob(**valid_args)
        job1.label = "job1"

        job2 = ModelJob(**valid_args_2)
        job2.label = "job2"

        job3 = ModelJob(**valid_args)
        job3.label = "job3"

        job4 = ModelJob(**valid_args_2)
        job4.label = "job4"

        job5 = ModelJob(**valid_args)
        job5.label = "job5"

        input_jobs = [job1, job2, job3, job4, job5]

        # diversify the time start..
        for jj, job in enumerate(input_jobs):
            job.time_start += jj * 0.1

        for job in input_jobs:
            self.assertTrue(job.is_valid())

        config_generator = {
            "type": "cluster_and_spawn",
            "job_clustering": {
                "type": "Kmeans",
                "rseed": 0,
                "apply_to": ["test_wl_0"],
                "ok_if_low_rank": True,
                "max_iter": 100,
                "max_num_clusters": 3,
                "delta_num_clusters": 1,
                "num_timesignal_bins": 1,
                "user_does_not_check": True
            },
            "job_submission_strategy": {
                "type": "match_job_pdf_exact",
                "n_bins_for_pdf": 20,
                "submit_rate_factor": 8,
                "total_submit_interval": 60,
                "random_seed": 0
            }
        }

        # select the appropriate workload_filling strategy
        workloads = [
            Workload(jobs=input_jobs, tag='test_wl_0'),
            Workload(jobs=input_jobs, tag='test_wl_1'),
            Workload(jobs=input_jobs, tag='test_wl_2')
        ]

        workload_modeller = workload_modelling_types[config_generator["type"]](
            workloads)
        workload_modeller.apply(config_generator)

        # get the newly created set of (modelled) workloads
        workload_set = workload_modeller.get_workload_set()

        # make sure that we are creating only one workload
        self.assertEqual(len(workload_set.workloads), 1)

        # ---- check that all the jobs are correctly formed.. ----

        # check that each job has time-signals as expected..
        for job in workload_set.workloads[0].jobs:
            self.assertTrue(hasattr(job, "timesignals"))

        # check that each job has all the time-signals at this point..
        for job in workload_set.workloads[0].jobs:
            self.assertTrue(
                all([k in job.timesignals.keys() for k in time_signal_names]))
예제 #16
0
파일: jobs.py 프로젝트: ecmwf/kronos
def concatenate_modeljobs(cat_job_label, job_list):
    """ Interlaces (or concatenates) a list of jobs into one single job
    the job time series will be interlaced according to their respective timestamps

    :param   cat_job_label: name of concatenated job
        job_list: list of jobs to concatenate

    :return: A ModelJob

    """

    # 2) find start-time and end-time
    cat_start_time = min([job.time_start for job in job_list])
    cat_end_time = max([job.time_start + job.duration for job in job_list])

    # 3) find overall duration
    cat_duration = cat_end_time - cat_start_time

    # 4) interlace time-series
    cat_time_series = {}
    for ts_type in time_signal_names:

        cat_xvalues = []
        cat_yvalues = []
        cat_durations = []

        # loop over jobs
        for job in job_list:
            if ts_type in job.timesignals.keys(
            ) and job.timesignals[ts_type] is not None:

                # add xvalues (in absolute value) and yvalues
                cat_xvalues.extend(
                    np.asarray(job.timesignals[ts_type].xvalues) +
                    job.time_start)
                cat_yvalues.extend(np.asarray(
                    job.timesignals[ts_type].yvalues))

                # add durations only if available, otherwise set them to zero..
                if job.timesignals[ts_type].durations is not None:
                    cat_durations.extend(job.timesignals[ts_type].durations)
                else:
                    cat_durations.extend(
                        np.zeros(len(job.timesignals[ts_type].xvalues)))

        if cat_xvalues:
            # reset the initial time to zero..
            cat_xvalues = [x - cat_start_time for x in cat_xvalues]

            # sort values as time sequence..
            cat_vals = zip(cat_xvalues, cat_yvalues, cat_durations)
            cat_vals.sort(key=lambda x: x[0], reverse=False)
            xvalues, yvalues, durations = zip(*cat_vals)

            # build the concatenated time signal..
            cat_time_series[ts_type] = TimeSignal(ts_type,
                                                  base_signal_name=ts_type,
                                                  durations=durations,
                                                  xvalues=xvalues,
                                                  yvalues=yvalues)
    print("job {} created".format(cat_job_label))

    # TODO: make a decision on what nnodes and nproc to choose..
    return ModelJob(time_start=cat_start_time,
                    duration=cat_duration,
                    ncpus=2,
                    nnodes=1,
                    timesignals=cat_time_series,
                    label=cat_job_label)
예제 #17
0
    def test_workload_fillin_match(self):
        """
        Test the metrics assignment through job name (label) matching
        :return:
        """

        # ------------ verify global time signals -------------------
        valid_args_1 = {
            'job_name': "blabla_weird_name",
            'time_start': 0.1,
            'duration': 0.222,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': {
                tsk: TimeSignal.from_values(tsk, np.random.rand(10),
                                            np.arange(10) * 2)
                for tsk in time_signal_names
            }
        }
        job1 = ModelJob(**valid_args_1)

        valid_args_2 = {
            'job_name': "job_match",
            'time_start': 0.1,
            'duration': 0.333,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': {}
        }
        job2 = ModelJob(**valid_args_2)

        # ------ target workload (that will receive the time metrics..)
        target_wl = Workload(jobs=[job1, job2], tag='target_workload')

        # ---------- source workload
        valid_args_3 = {
            'job_name': "job_match",
            'time_start': 0.1,
            'duration': 0.333,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': {
                tsk: TimeSignal.from_values(tsk, np.random.rand(10),
                                            np.random.rand(10))
                for tsk in time_signal_names
            }
        }

        job3 = ModelJob(**valid_args_3)
        source_wl = Workload(jobs=[job3], tag='wl_match_source')

        # filler config
        filler_config = {
            "type": "match_by_keyword",
            "priority": 0,
            "keywords": ["job_name"],
            "similarity_threshold": 0.3,
            "source_workloads": ["wl_match_source"],
            "apply_to": ["target_workload"]
        }

        # Apply the user defaults to the workloads
        workloads = [target_wl, source_wl]
        filler = StrategyMatchKeyword(workloads)
        filler.apply(filler_config)

        # for ts_k, ts_v in job3.timesignals.iteritems():
        #     print "JOB3:{}:{}".format(ts_k, ts_v.yvalues)
        #
        # for ts_k, ts_v in target_wl.jobs[1].timesignals.iteritems():
        #     print "TRG_J1:{}:{}".format(ts_k, ts_v.yvalues)

        self.assertTrue(
            all([
                all(ys == yt for ys, yt in zip(job3.timesignals[ts_k].yvalues,
                                               ts_v.yvalues))
                for ts_k, ts_v in target_wl.jobs[1].timesignals.items()
            ]))
예제 #18
0
    def generate_jobs(self):

        logger.info("Generating jobs from cluster: {}, "
                    "that has {} jobs".format(
                        self.wl_clusters['source-workload'],
                        len(self.wl_clusters['jobs_for_clustering'])))

        start_times_vec_sa, _, _ = self.schedule_strategy.create_schedule()

        # Random vector of cluster indexes
        n_modelled_jobs = len(start_times_vec_sa)
        np.random.seed(self.config["job_submission_strategy"].get(
            'random_seed', 0))
        vec_clust_indexes = np.random.randint(
            self.wl_clusters['cluster_matrix'].shape[0], size=n_modelled_jobs)

        # Mean NCPU in cluster (considering jobs in cluster)
        jobs_all = self.wl_clusters['jobs_for_clustering']
        lab_all = np.asarray(self.wl_clusters['labels'])

        # jobs in each cluster
        jobs_in_each_cluster = {
            cl: np.asarray(jobs_all)[lab_all == cl]
            for cl in set(lab_all)
        }

        # mean #CPUS in each cluster (from jobs for which ncpus is available, otherwise 1)
        mean_cpus = {
            cl_id: np.mean([job.ncpus if job.ncpus else 1 for job in cl_jobs])
            for cl_id, cl_jobs in jobs_in_each_cluster.items()
        }

        # mean #NODES in each cluster (from jobs for which nnodes is available, otherwise 1)
        mean_nodes = {
            cl_id:
            np.mean([job.nnodes if job.nnodes else 1 for job in cl_jobs])
            for cl_id, cl_jobs in jobs_in_each_cluster.items()
        }

        # loop over the clusters and generates jobs as needed
        generated_model_jobs = []
        for cc, cl_idx in enumerate(vec_clust_indexes):

            ts_dict = {}
            row = self.wl_clusters['cluster_matrix'][cl_idx, :]
            ts_yvalues = np.split(row, len(time_signal_names))
            for tt, ts_vv in enumerate(ts_yvalues):
                ts_name = time_signal_names[tt]
                ts = TimeSignal(ts_name).from_values(ts_name,
                                                     np.arange(len(ts_vv)),
                                                     ts_vv)
                ts_dict[ts_name] = ts

            job = ModelJob(time_start=start_times_vec_sa[cc],
                           job_name="job-{}-cl-{}".format(cc, cl_idx),
                           duration=None,
                           ncpus=mean_cpus[cl_idx],
                           nnodes=mean_nodes[cl_idx],
                           timesignals=ts_dict,
                           label="job-{}-cl-{}".format(cc, cl_idx))
            generated_model_jobs.append(job)

        n_sa = len(generated_model_jobs)
        n_job_ratio = n_sa / float(len(
            self.wl_clusters['jobs_for_clustering'])) * 100.
        logger.info(
            "====> Generated {} jobs from cluster (#job ratio = {:.2f}%)".
            format(n_sa, n_job_ratio))

        return generated_model_jobs, vec_clust_indexes
예제 #19
0
    def test_workload_fillin_default(self):
        """
        Test the correct assignment of user-defined time-series
        :return:
        """

        io_metrics = ['kb_read', 'kb_write', 'n_read', 'n_write']

        # create 2 random jobs (with ONLY io metrics)
        valid_args_1 = {
            'time_start': 0.1,
            'duration': 0.2,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': {
                tsk: TimeSignal.from_values(tsk, np.random.rand(10),
                                            np.random.rand(10))
                for tsk in io_metrics
            }
        }
        job1 = ModelJob(**valid_args_1)

        valid_args_2 = {
            'time_start': 0.1,
            'duration': 0.2,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': {
                tsk: TimeSignal.from_values(tsk, np.random.rand(10),
                                            np.random.rand(10))
                for tsk in io_metrics
            }
        }
        job2 = ModelJob(**valid_args_2)

        test_workload = Workload(jobs=[job1, job2], tag='wl_2jobs')

        # ---------------------- fill in config -----------------------
        filling_funct_config = [{
            "type": "step",
            "name": "step-1",
            "x_step": 0.5
        }, {
            "type":
            "custom",
            "name":
            "custom-1",
            "x_values": [0, 0.1, 0.15, 0.3333, 0.5, 0.8, 0.9, 1.0],
            "y_values": [0, 0.1, 0.2, 0.3, 0.5, 0.8, 0.9, 1.0]
        }]

        # Values to assign to all the unspecified metrics
        default_config = {
            "type": "fill_missing_entries",
            "apply_to": ["wl_2jobs"],
            "priority": 0,
            "metrics": {
                "kb_collective": [100, 101],
                "n_collective": [100, 101],
                "kb_pairwise": {
                    "function": "step-1",
                    "scaling": 1000.0
                },
                "n_pairwise": {
                    "function": "custom-1",
                    "scaling": 1000.0
                },
                "flops": [100, 101],
            }
        }

        # update the filling config with the user-defined functions
        default_config.update({"user_functions": filling_funct_config})

        # Apply the user defaults to the workloads
        workloads = [test_workload]
        filler = StrategyUserDefaults(workloads)
        filler.apply(default_config)

        # test that the IO metrics are within the random range used [0,1]
        for j in workloads[0].jobs:
            self.assertTrue(
                all([0.0 < x < 1.0 for x in j.timesignals['n_write'].xvalues]))
            self.assertTrue(
                all([0.0 < x < 1.0 for x in j.timesignals['n_write'].yvalues]))

            self.assertTrue(
                all([0.0 < x < 1.0
                     for x in j.timesignals['kb_write'].xvalues]))
            self.assertTrue(
                all([0.0 < x < 1.0
                     for x in j.timesignals['kb_write'].yvalues]))

            self.assertTrue(
                all([0.0 < x < 1.0 for x in j.timesignals['n_read'].xvalues]))
            self.assertTrue(
                all([0.0 < x < 1.0 for x in j.timesignals['n_read'].yvalues]))

            self.assertTrue(
                all([0.0 < x < 1.0 for x in j.timesignals['kb_read'].xvalues]))
            self.assertTrue(
                all([0.0 < x < 1.0 for x in j.timesignals['kb_read'].yvalues]))

        # test that the user-defined metrics are within the random range chosen [0,1]
        for j in workloads[0].jobs:
            self.assertTrue(
                all([100 < x < 101 for x in j.timesignals['flops'].yvalues]))
            self.assertTrue(
                all([
                    100 < x < 101
                    for x in j.timesignals['n_collective'].yvalues
                ]))
            self.assertTrue(
                all([
                    100 < x < 101
                    for x in j.timesignals['kb_collective'].yvalues
                ]))

        # test that the user-defined functions are being applied as expected

        for j in workloads[0].jobs:

            # values vs expected
            val_exp = zip(j.timesignals['n_pairwise'].yvalues,
                          [0, 0.1, 0.2, 0.3, 0.5, 0.8, 0.9, 1.0])
            self.assertTrue(all([x == y * 1000. for x, y in val_exp]))

            # and the step function
            self.assertTrue(
                all([(x == 0 or x == 1000.)
                     for x in j.timesignals['kb_pairwise'].yvalues]))
예제 #20
0
class UserJobTests(unittest.TestCase):

    xvals = list(range(10))
    yvals = [y**2 for y in range(10)]

    dummy_time_signals = {
        tsname: TimeSignal.from_values(tsname,
                                       xvals=list(range(10)),
                                       yvals=[y**2 for y in range(10)],
                                       priority=10)
        for tsname in time_signal_names
    }

    def test_user_job_init(self):
        """
        Test initialisation of user-generated jobs
        :return:
        """

        # instantiate user job
        user_job = UserGeneratedJob("dummy_job",
                                    timesignals=self.dummy_time_signals,
                                    ts_scales=None)

        self.assertEqual(user_job.name, "dummy_job")
        self.assertEqual(user_job.timesignals, self.dummy_time_signals)

        # from its proto-signals
        job = UserGeneratedJob.from_random_proto_signals(
            "from_proto_signals_job", ts_len=25)

        self.assertEqual(job.name, "from_proto_signals_job")

        first_ts_len = len(next(iter(job.timesignals.values())).xvalues)
        self.assertEqual(first_ts_len, 25)

    def test_timesignal_probability(self):

        # from its proto-signals
        job = UserGeneratedJob.from_random_proto_signals(
            "from_proto_signals_job", ts_len=25)

        # check length of all the ts..
        first_ts_len = len(next(iter(job.timesignals.values())).xvalues)
        self.assertEqual(first_ts_len, 25)

        # check that all the lengths
        for tsv in job.timesignals.values():
            self.assertEqual(len(tsv.xvalues), first_ts_len)

        # probability 0 meant that all the signals will be removed
        job_no_ts = copy.deepcopy(job)
        job_no_ts.apply_ts_probability(0.0)
        for tsv in job_no_ts.timesignals.values():
            self.assertTrue(all([y == -1 for y in tsv.yvalues]))

        # probability 1 meant that all the signals are retained
        job_no_ts = copy.deepcopy(job)
        job_no_ts.apply_ts_probability(1.0)
        for tsv in job_no_ts.timesignals.values():
            self.assertTrue(all([y != -1 for y in tsv.yvalues]))
예제 #21
0
파일: test_split.py 프로젝트: ecmwf/kronos
    def test_splitter(self):

        # -------------- prepare a couple of dummy jobs ---------------

        # If all of the required arguments are supplied, this should result in a valid job
        ts_complete_set = {
            tsk: TimeSignal.from_values(tsk, [0., 0.1], [1., 999.])
            for tsk in time_signal_names
        }

        ts_complete_set_2 = {
            tsk: TimeSignal.from_values(tsk, [0., 0.1], [1., 444.])
            for tsk in time_signal_names
        }

        valid_args = {
            'time_start': 0.1,
            'duration': 0.2,
            'ncpus': 1,
            'nnodes': 1,
            'timesignals': ts_complete_set,
            'job_name': "job_name_1"
        }

        valid_args_2 = {
            'time_start': 0.2,
            'duration': 0.4,
            'ncpus': 2,
            'nnodes': 2,
            'timesignals': ts_complete_set_2,
            'job_name': "job_name_2"
        }

        # a model job that WILL NOT be picked by the algorithm..
        job1 = ModelJob(**valid_args)
        job1.label = "label_nottobepicked"

        # a model job that WILL be picked by the algorithm..
        job2 = ModelJob(**valid_args_2)
        job2.label = "label_includeme"

        # dummy workload with 20 jobs
        np.random.seed(0)
        jobs_all = []
        for i in range(20):

            # spawn a new job from either job1 or job2
            if np.random.rand() < 0.5:
                new_job = copy.deepcopy(job1)
            else:
                new_job = copy.deepcopy(job2)

            # assign it a new label
            jobs_all.append(new_job)

        # create a workload out of all the jobs..
        workload = Workload(jobs=jobs_all, tag="testing_workload")

        # configure the splitter from user config
        config_splitting = {
            "type": "split",
            "keywords_in": ["includeme"],
            "keywords_out": ["excludeme"],
            "split_by": "label",
            "apply_to": ["testing_workload"],
            "create_workload": "spawn_workload"
        }

        workloads = [workload]
        splitter = WorkloadSplit(workloads)
        splitter.apply(config_splitting)

        wl_out = None
        for wl in workloads:
            if wl.tag == config_splitting["create_workload"]:
                wl_out = wl
                break

        # make sure that we have created a workload as expected
        self.assertTrue(wl_out is not None)
        self.assertEqual(wl_out.tag, config_splitting["create_workload"])

        # make sure that all the jobs have a label consistent with the filter
        for j in wl_out.jobs:
            self.assertTrue("includeme" in j.label
                            and "excludeme" not in j.label)