Exemple #1
0
 def test_name(self):
     name = "test"
     job = Job(resources={}, used_resources={"walltime": 100}, name=name)
     assert job.name == name
     assert repr(job) == "<Job: %s>" % name
     job = Job(resources={}, used_resources={"walltime": 100})
     assert job.name == id(job)
     assert repr(job) == "<Job: %s>" % id(job)
Exemple #2
0
 async def test_run_job(self):
     drone = DummyDrone()
     job = Job(resources={"walltime": 50}, used_resources={"walltime": 10})
     assert float("inf") == job.waiting_time
     async with Scope() as scope:
         scope.do(job.run(drone))
     assert 10 == time
     assert 0 == job.waiting_time
     assert job.successful
Exemple #3
0
 def test_init(self):
     with pytest.raises(KeyError):
         Job(resources={}, used_resources={})
     with pytest.raises(KeyError):
         Job(resources={"walltime": 100}, used_resources={})
     assert Job(resources={}, used_resources={"walltime": 100})
     with pytest.raises(AssertionError):
         Job(resources={},
             used_resources={"walltime": 100},
             in_queue_since=-5)
Exemple #4
0
    async def test_calculation_time(self):
        self.job = Job(resources={"walltime": 60},
                       used_resources={
                           "walltime": 10,
                           "cores": 0.7
                       })
        self.job.drone = DummyDrone(1)
        starttime = time.now
        await self.job._calculate()
        assert time.now - starttime == 10

        self.job = Job(resources={
            "walltime": 60,
            "inputfiles": {"file"}
        },
                       used_resources={
                           "walltime": 10,
                           "cores": 0.7
                       })
        self.job.drone = DummyDrone(1)
        starttime = time.now
        await self.job._calculate()
        assert time.now - starttime == 7

        self.job = Job(resources={
            "walltime": 60,
            "inputfiles": {"file"}
        },
                       used_resources={
                           "walltime": 10,
                           "cores": 0.7
                       },
                       calculation_efficiency=0.5)
        self.job.drone = DummyDrone(1)
        starttime = time.now
        await self.job._calculate()
        assert time.now - starttime == 14

        self.job = Job(resources={
            "walltime": 60,
            "inputfiles": {"file"}
        },
                       used_resources={"walltime": 10},
                       calculation_efficiency=0.5)
        self.job.drone = DummyDrone(1)
        starttime = time.now
        await self.job._calculate()
        assert time.now - starttime == 10
Exemple #5
0
 async def test_nonmatching_job_in_drone(self):
     scheduler = DummyScheduler()
     job = Job(
         resources={
             "walltime": 50,
             "cores": 2,
             "memory": 1
         },
         used_resources={
             "walltime": 10,
             "cores": 1,
             "memory": 1
         },
     )
     drone = Drone(
         scheduler=scheduler,
         pool_resources={
             "cores": 1,
             "memory": 1
         },
         scheduling_duration=0,
     )
     async with Scope() as scope:
         scope.do(drone.run(), volatile=True)
         scope.do(drone.schedule_job(job=job))
         await (scheduler.statistics._available ==
                scheduler.statistics.resource_type(job_failed=1))
     assert 0 == time
     assert not job.successful
     assert 0 == job.waiting_time
Exemple #6
0
def htcondor_job_reader(
    iterable,
    calculation_efficiency: Optional[float] = None,
    resource_name_mapping={  # noqa: B006
        "cores": "RequestCpus",
        "walltime": "RequestWalltime",  # s
        "memory": "RequestMemory",  # MiB
        "disk": "RequestDisk",  # KiB
    },
    used_resource_name_mapping={  # noqa: B006
        "queuetime": "QDate",
        "walltime": "RemoteWallClockTime",  # s
        "memory": "MemoryUsage",  # MB
        "disk": "DiskUsage_RAW",  # KiB
    },
    unit_conversion_mapping={  # noqa: B006
        "RequestCpus": 1,
        "RequestWalltime": 1,
        "RequestMemory": 1024 * 1024,
        "RequestDisk": 1024,  # KBytes
        "queuetime": 1,
        "RemoteWallClockTime": 1,
        "MemoryUsage": 1000 * 1000,  # MB
        "DiskUsage_RAW": 1024,  # KBytes
        "filesize": 1000 * 1000 * 1000,  # GB
        "usedsize": 1000 * 1000 * 1000,  # GB
    },
):
    input_file_type = iterable.name.split(".")[-1].lower()
    if input_file_type == "json":
        htcondor_reader = json.load(iterable)
    elif input_file_type == "csv":
        htcondor_reader = csv.DictReader(iterable,
                                         delimiter=" ",
                                         quotechar="'")
    else:
        logging.getLogger("implementation").error(
            "Invalid input file %s. Job input file can not be read." %
            iterable.name)
    for entry in htcondor_reader:
        if float(entry[used_resource_name_mapping["walltime"]]) <= 0:
            logging.getLogger("implementation").warning(
                "removed job from htcondor import (%s)", entry)
            continue
        resources = {}
        for key, original_key in resource_name_mapping.items():
            try:
                resources[key] = int(
                    float(entry[original_key]) *
                    unit_conversion_mapping.get(original_key, 1))
            except ValueError:
                pass

        used_resources = {
            "cores":
            ((float(entry["RemoteSysCpu"]) + float(entry["RemoteUserCpu"])) /
             float(entry[used_resource_name_mapping["walltime"]])) *
            unit_conversion_mapping.get(resource_name_mapping["cores"], 1)
        }
        for key in ["memory", "walltime", "disk"]:
            original_key = used_resource_name_mapping[key]
            used_resources[key] = int(
                float(entry[original_key]) *
                unit_conversion_mapping.get(original_key, 1))

        calculation_efficiency = entry.get("calculation_efficiency",
                                           calculation_efficiency)

        try:
            if not entry["Inputfiles"]:
                del entry["Inputfiles"]
                raise KeyError
            resources["inputfiles"] = deepcopy(entry["Inputfiles"])
            used_resources["inputfiles"] = deepcopy(entry["Inputfiles"])
            for filename, filespecs in entry["Inputfiles"].items():
                for key in filespecs.keys():
                    if key == "hitrates":
                        continue
                    resources["inputfiles"][filename][
                        key] = filespecs[key] * unit_conversion_mapping.get(
                            key, 1)
                    used_resources["inputfiles"][filename][
                        key] = filespecs[key] * unit_conversion_mapping.get(
                            key, 1)

                if "usedsize" in filespecs:
                    del resources["inputfiles"][filename]["usedsize"]

                if "filesize" in filespecs:
                    if "usedsize" not in filespecs:
                        used_resources["inputfiles"][filename][
                            "usedsize"] = resources["inputfiles"][filename][
                                "filesize"]
                    del used_resources["inputfiles"][filename]["filesize"]

        except KeyError:
            pass
        yield Job(
            resources=resources,
            used_resources=used_resources,
            queue_date=float(entry[used_resource_name_mapping["queuetime"]]),
            calculation_efficiency=calculation_efficiency,
            name=entry.get("name", None),
        )
Exemple #7
0
    async def _run_job(self, job: Job, kill: bool):
        """
        Method manages to start a job in the context of the given drone.
        The job is started regardless of the available resources. The resource
        allocation takes place after starting the job and the job is killed if the
        drone's overall resources are exceeded. In addition, if the `kill` flag is
        set, jobs are killed if the resources they use exceed the resources they
        requested.
        Then the end of the job's execution is awaited and the drones status
        known to the scheduler is changed.

        :param job: the job to start
        :param kill: if True, a job is killed when used resources exceed
                     requested resources
        """
        job.drone = self
        async with Scope() as scope:
            from lapis.monitor import sampling_required

            self._utilisation = self._allocation = None

            job_execution = scope.do(job.run(self))
            self.jobs += 1
            if job._cached_data:
                self.jobs_with_cached_data += 1
            try:
                async with self.resources.claim(**job.resources):
                    await sampling_required.put(
                        DroneStatusCaching(
                            repr(self),
                            self.pool_resources["cores"],
                            self.theoretical_available_resources["cores"],
                            self.jobs_with_cached_data,
                        ))
                    await sampling_required.put(self)
                    if kill:
                        for resource_key in job.resources:
                            try:
                                if (job.resources[resource_key] <
                                        job.used_resources[resource_key]):
                                    await instant
                                    job_execution.cancel()
                                    await instant
                            except KeyError:
                                # check is not relevant if the data is not stored
                                pass
                    # self.scheduler.update_drone(self)
                    await job_execution.done
            except ResourcesUnavailable:
                await instant
                job_execution.cancel()
                await instant
            except AssertionError:
                await instant
                job_execution.cancel()
                await instant
            self.jobs -= 1
            if job._cached_data:
                self.jobs_with_cached_data -= 1

            await self.scheduler.job_finished(job)
            self._utilisation = self._allocation = None
            self.scheduler.update_drone(self)
            await sampling_required.put(self)
            await sampling_required.put(
                DroneStatusCaching(
                    repr(self),
                    self.pool_resources["cores"],
                    self.theoretical_available_resources["cores"],
                    self.jobs_with_cached_data,
                ))
Exemple #8
0
    async def test_transfer_time(self):
        conversion_GB_to_B = 1000 * 1000 * 1000
        drone = DummyDrone(1)
        self.job = Job(resources={
            "walltime": 60,
            "inputfiles": {
                "file": {
                    "usedsize": 20 * conversion_GB_to_B
                }
            }
        },
                       used_resources={
                           "walltime": 10,
                           "inputfiles": {
                               "file": {
                                   "usedsize": 20 * conversion_GB_to_B,
                                   "hitrates": {}
                               }
                           }
                       },
                       calculation_efficiency=1.0)

        self.job.drone = drone
        starttime = time.now
        await self.job._transfer_inputfiles()
        assert time.now - starttime == 20

        self.job = Job(resources={"walltime": 60},
                       used_resources={"walltime": 10},
                       calculation_efficiency=1.0)

        self.job.drone = drone
        starttime = time.now
        await self.job._transfer_inputfiles()
        assert time.now - starttime == 0

        self.job = Job(resources={
            "walltime": 60,
            "inputfiles": {
                "file": {
                    "usedsize": 20 * conversion_GB_to_B
                }
            }
        },
                       used_resources={"walltime": 10},
                       calculation_efficiency=1.0)

        self.job.drone = drone
        starttime = time.now
        await self.job._transfer_inputfiles()
        assert time.now - starttime == 0

        self.job = Job(resources={
            "walltime": 60,
            "inputfiles": {
                "file": {
                    "usedsize": 20 * conversion_GB_to_B
                }
            }
        },
                       used_resources={
                           "walltime": 10,
                           "inputfiles": {
                               "file": {
                                   "usedsize": 20 * conversion_GB_to_B,
                                   "hitrates": {}
                               },
                           }
                       },
                       calculation_efficiency=1.0)

        self.job.drone = drone
        starttime = time.now
        await self.job._transfer_inputfiles()
        assert time.now - starttime == 20
Exemple #9
0
def swf_job_reader(
    iterable,
    calculation_efficiency: Optional[float] = None,
    resource_name_mapping={  # noqa: B006
        "cores": "Requested Number of Processors",
        "walltime": "Requested Time",  # s
        "memory": "Requested Memory",  # KiB
    },
    used_resource_name_mapping={  # noqa: B006
        "walltime": "Run Time",  # s
        "cores": "Number of Allocated Processors",
        "memory": "Used Memory",  # KiB
        "queuetime": "Submit Time",
    },
    unit_conversion_mapping={  # noqa: B006
        "Used Memory": 1024,
        "Requested Memory": 1024,
    },
):
    header = {
        "Job Number": 0,
        "Submit Time": 1,
        "Wait Time": 2,  # s
        "Run Time": 3,  # s
        "Number of Allocated Processors": 4,
        "Average CPU Time Used": 5,  # s
        "Used Memory": 6,  # average kB per processor
        "Requested Number of Processors": 7,
        "Requested Time": 8,
        "Requested Memory": 9,  # kB per processor
        "Status": 10,
        "User ID": 11,
        "Group ID": 12,
        "Executable (Application) Number": 13,
        "Queue Number": 14,
        "Partition Number": 15,
        "Preceding Job Number": 16,
        "Think Time from Preceding Job": 17,  # s
    }
    reader = csv.reader(
        (line for line in iterable if line[0] != ";"),
        delimiter=" ",
        skipinitialspace=True,
    )
    for row in reader:
        resources = {}
        used_resources = {}
        # correct request parameters
        for key in ["cores", "walltime", "memory"]:
            if float(row[header[resource_name_mapping[key]]]) < 0:
                row[header[resource_name_mapping[key]]] = 0
        for key in ["cores", "walltime"]:
            value = float(row[header[resource_name_mapping[key]]])
            used_value = float(row[header[used_resource_name_mapping[key]]])
            if value >= 0:
                resources[key] = value * unit_conversion_mapping.get(
                    resource_name_mapping[key], 1)
            if used_value >= 0:
                used_resources[key] = used_value * unit_conversion_mapping.get(
                    used_resource_name_mapping[key], 1)
        # handle memory
        key = "memory"
        resources[key] = int(
            (float(row[header[resource_name_mapping[key]]]) *
             float(row[header[resource_name_mapping["cores"]]])) *
            unit_conversion_mapping.get(resource_name_mapping[key], 1))
        used_resources[key] = int(
            (float(row[header[used_resource_name_mapping[key]]]) *
             float(row[header[used_resource_name_mapping["cores"]]])) *
            unit_conversion_mapping.get(used_resource_name_mapping[key], 1))
        yield Job(
            resources=resources,
            used_resources=used_resources,
            queue_date=float(
                row[header[used_resource_name_mapping["queuetime"]]]),
            name=row[header["Job Number"]],
            calculation_efficiency=calculation_efficiency,
        )