Ejemplo n.º 1
0
    def batch_resources(self, spec, **kwargs):
        if kwargs:
            spec = dict(spec, **kwargs)
        space = self.indent_text
        sio = StringIO()
        if not isinstance(spec, JobResourceSpec):
            spec = JobResourceSpec(spec)

        result = ''
        if spec[0].get('walltime', ''):
            dt = tools.to_timedelta(spec[0]['walltime'])
            dt = dt.total_seconds()
            hours = int(dt // 3600)
            minutes = int((dt % 3600) // 60)
            seconds = int(math.floor(dt % 60))
            sio.write(f'#PBS -l walltime={hours:d}:{minutes:02d}'
                      f':{seconds:02d}\n')

        megabytes = self.get_memory_from_resource_spec(spec)
        if megabytes is not None:
            sio.write(f'#PBS -l vmem={megabytes:d}M\n')

        if spec[0].get('outerr', ''):
            sio.write(f'#PBS -j oe -o {spec[0]["outerr"]}\n')
        else:
            if spec[0].get('stdout', ''):
                sio.write('#PBS -o {spec[0]["stdout"]}\n')
            if spec[0].get('stderr', ''):
                sio.write('#PBS -e {spec[0]["stderr"]}\n')
        if spec[0].get('jobname'):
            sio.write('#PBS -J {spec[0]["jobname"]}\n')

        # --------------------------------------------------------------
        # Request processors.
        if spec.is_pure_serial():
            if spec[0].is_exclusive() in [True, None]:
                sio.write('#PBS -l nodes=1:ppn=2\n')
            else:
                sio.write('#PBS -l procs=1\n')
        elif spec.is_pure_openmp():
            # Pure threaded.  Treat as exclusive serial.
            sio.write('#PBS -l nodes=1:ppn=2\n')
        else:
            # This is an MPI program.

            # Split into (nodes,ranks_per_node) pairs.  Ignore
            # differing executables between ranks while merging them
            # (del_exe):
            nodes_ranks = self.nodes.to_nodes_ppn(
                spec, can_merge_ranks=self.nodes.same_except_exe)
            sio.write('#PBS -l nodes=')
            sio.write('+'.join([f'{n}:ppn={p}' for n, p in nodes_ranks]))
            sio.write('\n')
        ret = sio.getvalue()
        sio.close()
        return ret
Ejemplo n.º 2
0
    def rocoto_resources(self, *args, indent=0, **kwargs):
        spec = tools.make_dict_from(args, kwargs)
        sio = StringIO()
        space = self.indent_text
        if not isinstance(spec, JobResourceSpec):
            spec = JobResourceSpec(spec)

        if spec[0].get('walltime', ''):
            dt = tools.to_timedelta(spec[0]['walltime'])
            dt = dt.total_seconds()
            hours = int(dt // 3600)
            minutes = int((dt % 3600) // 60)
            seconds = int(math.floor(dt % 60))
            sio.write(
                f'{indent*space}<walltime>{hours}:{minutes:02d}:{seconds:02d}</walltime>\n'
            )

        megabytes = self.get_memory_from_resource_spec(spec)
        if megabytes is not None:
            sio.write(f'{indent*space}<memory>{megabytes:d}M</memory>\n')

        if 'outerr' in spec:
            sio.write(f'{indent*space}<join>{spec["outerr"]}</join>\n')
        else:
            if 'stdout' in spec:
                sio.write('{indent*space}<stdout>{spec["stdout"]}</stdout>\n')
            if 'stderr' in spec:
                sio.write('{indent*space}<stderr>{spec["stderr"]}</stderr>\n')

        if spec.is_pure_serial():
            if spec[0].is_exclusive() in [True, None]:
                sio.write(indent * space + '<nodes>1:ppn=2</nodes>\n')
            else:
                sio.write(indent * space + '<cores>1</cores>\n')
        elif spec.is_pure_openmp():
            # Pure threaded.  Treat as exclusive serial.
            sio.write(indent * space + '<nodes>1:ppn=2</nodes>\n')
        else:
            # This is an MPI program.

            # Split into (nodes,ranks_per_node) pairs.  Ignore differing
            # executables between ranks while merging them (del_exe):
            nodes_ranks = self.nodes.to_nodes_ppn(
                spec, can_merge_ranks=self.nodes.same_except_exe)

            sio.write(indent*space+'<nodes>' \
                + '+'.join([f'{n}:ppn={p}' for n,p in nodes_ranks ]) \
                + '</nodes>\n')
        ret = sio.getvalue()
        sio.close()
        return ret
Ejemplo n.º 3
0
def test():
    settings = {
        'physical_cores_per_node': 24,
        'logical_cpus_per_core': 2,
        'hyperthreading_allowed': True
    }
    sched = Scheduler(settings)

    # MPI + OpenMP program test
    input1 = [{
        'mpi_ranks': 5,
        'OMP_NUM_THREADS': 12
    }, {
        'mpi_ranks': 7,
        'OMP_NUM_THREADS': 12
    }, {
        'mpi_ranks': 7
    }]
    spec1 = JobResourceSpec(input1)
    result = sched.rocoto_resources(spec1)
    assert (result == '<nodes>6:ppn=2+1:ppn=7</nodes>\n')

    # Serial program test
    input2 = [{'exe': 'echo', 'args': ['hello', 'world'], 'exclusive': False}]
    spec2 = JobResourceSpec(input2)
    assert (sched.rocoto_resources(spec2) == '<cores>1</cores>\n')

    # Exclusive serial program test
    input3 = [{'exe': 'echo', 'args': ['hello', 'world 2'], 'exclusive': True}]
    spec3 = JobResourceSpec(input3)
    result = sched.rocoto_resources(spec3)
    assert (result == '<nodes>1:ppn=2</nodes>\n')

    # Pure openmp test
    input4 = [{'OMP_NUM_THREADS': 20}]
    spec4 = JobResourceSpec(input4)
    result = sched.rocoto_resources(spec4)
    assert (result == '<nodes>1:ppn=2</nodes>\n')

    # Too big for node
    try:
        input5 = [{'OMP_NUM_THREADS': 200, 'mpi_ranks': 3}]
        spec5 = JobResourceSpec(input5)
        result = sched.rocoto_resources(spec5)
        assert (False)
    except MachineTooSmallError:
        pass  # success!
Ejemplo n.º 4
0
    def rocoto_resources(self, spec, indent=0):
        sio = StringIO()
        space = self.indent_text
        if not isinstance(spec, JobResourceSpec):
            spec = JobResourceSpec(spec)

        if spec[0].get('walltime', ''):
            dt = tools.to_timedelta(spec[0]['walltime'])
            dt = dt.total_seconds()
            hours = int(dt // 3600)
            minutes = int((dt % 3600) // 60)
            seconds = int(math.floor(dt % 60))
            sio.write(
                f'{indent*space}<walltime>{hours}:{minutes:02d}:{seconds:02d}</walltime>\n'
            )

        # Handle memory.
        if self.specify_memory:
            bytes = self._decide_memory_for(spec)
            megabytes = int(math.ceil(bytes / 1048576.))
            sio.write(f'{indent*space}<memory>{megabytes:d}M</memory>\n')

        # Stdout and stderr if specified:
        self._rocoto_stdout_stderr(spec[0], indent, space, sio)

        # Write nodes=x:ppn=y
        # Split into (nodes,ranks_per_node) pairs.  Ignore differing
        # executables between ranks while merging them (same_except_exe):
        nodes_ranks = self.nodes.to_nodes_ppn(
            spec, can_merge_ranks=self.nodes.same_except_exe)

        sio.write(indent*space+'<nodes>' \
            + '+'.join([f'{max(n,1)}:ppn={max(p,1)}' for n,p in nodes_ranks ]) \
            + '</nodes>\n')

        # Write out affinity.
        hyperthreads = max([self.nodes.hyperthreads_for(r) for r in spec])
        node_size = self.nodes.cores_per_node
        if hyperthreads > 1:
            node_size *= self.nodes.cpus_per_core
        max_ppn = min([self.nodes.max_ranks_per_node(r) for r in spec])
        affinity_count = node_size // max_ppn
        affinity_type = 'cpu' if hyperthreads > 1 else 'core'

        sio.write(f'{indent*space}<native>'
                  f"-R 'affinity[{affinity_type}({affinity_count})]'"
                  '</native>\n')
        #sio.write(f'{indent*space}<nodes>{requested_nodes}:ppn={nodesize}</nodes>')
        ret = sio.getvalue()
        sio.close()
        return ret
Ejemplo n.º 5
0
def test():
    settings = {
        'physical_cores_per_node': 28,
        'logical_cpus_per_core': 2,
        'specify_memory': True,
        'use_task_geometry': False,
        'hyperthreading_allowed': True
    }
    sched = Scheduler(settings)

    # MPI + OpenMP program test
    input0 = [{'mpi_ranks': 5, 'OMP_NUM_THREADS': 12}]
    spec1 = JobResourceSpec(input0)
    result = sched.rocoto_resources(spec1)
    bresult = sched.batch_resources(spec1)
    #assert(result=='<nodes>6:ppn=2+1:ppn=7</nodes>\n')
    print(f'{input0} => \n{result}')
    print(f'{input0} => \n{bresult}')

    # Compound MPI + OpenMP program test
    input1 = [{
        'mpi_ranks': 5,
        'OMP_NUM_THREADS': 12
    }, {
        'mpi_ranks': 7,
        'OMP_NUM_THREADS': 12
    }, {
        'mpi_ranks': 7
    }]
    spec1 = JobResourceSpec(input1)
    result = sched.rocoto_resources(spec1)
    bresult = sched.batch_resources(spec1)
    #assert(result=='<nodes>6:ppn=2+1:ppn=7</nodes>\n')
    print(f'{input1} => \n{result}')
    print(f'{input1} => \n{bresult}')

    # Serial program test
    input2 = [{'exe': 'echo', 'args': ['hello', 'world'], 'exclusive': False}]
    spec2 = JobResourceSpec(input2)
    result = sched.rocoto_resources(spec2)
    bresult = sched.batch_resources(spec2)
    #assert(result=='<cores>1</cores>\n')
    print(f'{input2} => \n{result}')
    print(f'{input2} => \n{bresult}')

    # Exclusive serial program test
    input3 = [{'exe': 'echo', 'args': ['hello', 'world 2'], 'exclusive': True}]
    spec3 = JobResourceSpec(input3)
    result = sched.rocoto_resources(spec3)
    bresult = sched.batch_resources(spec3)
    #assert(result=='<nodes>1:ppn=2</nodes>\n')
    print(f'{input3} => \n{result}')
    print(f'{input3} => \n{bresult}')

    # Pure openmp test
    input4 = [{'OMP_NUM_THREADS': 20}]
    spec4 = JobResourceSpec(input4)
    result = sched.rocoto_resources(spec4)
    bresult = sched.batch_resources(spec4)
    #assert(result=='<nodes>1:ppn=2</nodes>\n')
    print(f'{input4} => \n{result}')
    print(f'{input4} => \n{bresult}')

    # Too big for node
    try:
        input5 = [{'OMP_NUM_THREADS': 200, 'mpi_ranks': 3}]
        spec5 = JobResourceSpec(input5)
        result = sched.rocoto_resources(spec5)
        assert (False)
    except MachineTooSmallError:
        pass  # success!
Ejemplo n.º 6
0
    def batch_resources(self, spec, **kwargs):
        if kwargs:
            spec = dict(spec, **kwargs)
        space = self.indent_text
        sio = StringIO()
        if not isinstance(spec, JobResourceSpec):
            spec = JobResourceSpec(spec)

        result = ''
        if spec[0].get('walltime', ''):
            dt = tools.to_timedelta(spec[0]['walltime'])
            dt = dt.total_seconds()
            hours = int(dt // 3600)
            minutes = int((dt % 3600) // 60)
            seconds = int(math.floor(dt % 60))
            sio.write(f'#BSUB -W {hours}:{minutes:02d}\n')

        # ------------------------------------------------------------
        # Handle memory

        rusage = ''
        if self.specify_memory:
            bytes = self._decide_memory_for(spec)
            megabytes = int(math.ceil(bytes / 1048576.))
            rusage = f'rusage[mem={megabytes:d}]'

        # ------------------------------------------------------------
        # stdout/stderr locations

        self._batch_stdout_stderr(spec[0], sio)

        # ------------------------------------------------------------
        # ranks, affinity, and span

        if spec[0].exclusive:
            sio.write('#BSUB -x\n')

        if len(spec) == 1:
            # Special case: only one block.  We'll put the affinity
            # and span on their own line and use "-n" to specify the
            # number of ranks.

            # There are some specialer cases in here, including pure
            # OpenMP or pure serial.

            ras = self._ranks_affinity_and_span_for(spec)

            if rusage:
                sio.write(f"""#BSUB -R '{rusage}'\n""")

            # Affinity is mandatory:
            sio.write(f"""#BSUB -R 'affinity[{ras[0]["affinity"]}]'\n""")

            # Span is only used when OpenMP or MPI are in use:
            if not spec.is_pure_serial():
                sio.write(f"""#BSUB -R 'span[{ras[0]["span"]}]'\n""")

            # -n is used except in shared, non-mpi jobs
            if spec[0].exclusive or spec.total_ranks() > 2:
                sio.write(f"""#BSUB -n {ras[0]["ranks"]}\n""")

        elif not self.use_task_geometry:

            # General case: more than one block.  Task geometry is
            # disabled.

            hyperthreads = max([self.nodes.hyperthreads_for(r) for r in spec])
            node_size = self.nodes.cores_per_node
            if hyperthreads > 1:
                node_size *= self.nodes.cpus_per_core
            max_ppn = min([self.nodes.max_ranks_per_node(r) for r in spec])
            affinity_count = node_size // max_ppn
            affinity_type = 'cpu' if hyperthreads > 1 else 'core'
            affinity = f'{affinity_type}({affinity_count})'

            if rusage:
                sio.write(f"""#BSUB -R '{rusage}'\n""")
            sio.write(f"""#BSUB -R 'affinity[{affinity}]'\n""")
            sio.write(
                f"""#BSUB -R 'span[{min(spec.total_ranks(),max_ppn)}]'\n""")
            sio.write(f"""#BSUB -n {spec.total_ranks()}\n""")

        else:
            # General case: more than one block.  Task geometry is
            # enabled.
            ras = self._ranks_affinity_and_span_for(spec)
            sio.write("#BSUB -R '")
            first = True
            for ras1 in ras:
                if first:
                    first = False
                else:
                    sio.write(' + ')
                sio.write(f'{ras1["ranks"]}*'
                          f'{{span[{ras1["span"]}]'
                          f'affinity[{ras1["affinity"]}]{rusage}}}')
            sio.write("'\n")

        ret = sio.getvalue()
        sio.close()
        return ret
Ejemplo n.º 7
0
    def batch_resources(self, spec, **kwargs):
        if kwargs:
            spec = dict(spec, **kwargs)
        space = self.indent_text
        sio = StringIO()
        if not isinstance(spec, JobResourceSpec):
            spec = JobResourceSpec(spec)

        result = ''
        if spec[0].get('walltime', ''):
            dt = tools.to_timedelta(spec[0]['walltime'])
            dt = dt.total_seconds()
            hours = int(dt // 3600)
            minutes = int((dt % 3600) // 60)
            seconds = int(math.floor(dt % 60))
            sio.write(f'#BSUB -W {hours}:{minutes:02d}\n')

        # Handle memory.
        if spec[0].is_exclusive() and spec[0].get('batch_memory', ''):
            bytes = tools.memory_in_bytes(spec[0]['batch_memory'])
        elif not spec[0].is_exclusive() and spec[0].get('compute_memory', ''):
            bytes = tools.memory_in_bytes(spec[0]['compute_memory'])
        elif spec[0].get('memory', ''):
            bytes = tools.memory_in_bytes(spec[0]['memory'])
        else:
            bytes = 2000 * 1048576.

        megabytes = int(math.ceil(bytes / 1048576.))

        sio.write(f'#BSUB -R rusage[mem={megabytes:d}]\n')

        if spec[0].get('outerr', ''):
            sio.write(f'#BSUB -o {spec[0]["outerr"]}\n')
        else:
            if spec[0].get('stdout', ''):
                sio.write('#BSUB -o {spec[0]["stdout"]}\n')
            if spec[0].get('stderr', ''):
                sio.write('#BSUB -e {spec[0]["stderr"]}\n')
        # --------------------------------------------------------------

        # With LSF+ALPS on WCOSS Cray, to my knowledge, you can only
        # request one node size for all ranks.  This code calculates
        # the largest node size required (hyperthreading vs. non)

        requested_nodes = 1

        nodesize = max([self.nodes.node_size(r) for r in spec])

        if spec[0].is_exclusive() is False:
            # Shared program.  This requires a different batch card syntax
            nranks = max(1, spec.total_ranks())
            sio.write(f'#BSUB -n {nranks}\n')
        else:
            if not spec.is_pure_serial() and not spec.is_pure_openmp():
                # This is an MPI program.
                nodes_ranks = self.nodes.to_nodes_ppn(spec)
                requested_nodes = sum([n for n, p in nodes_ranks])
            sio.write('#BSUB -extsched CRAYLINUX[]\n')
            if self.settings.get('use_export_nodes', True):
                sio.write(f'export NODES={requested_nodes}')
            else:
                sio.write("#BSUB -R '1*{select[craylinux && !vnode]} + ")
                sio.write('%d' % requested_nodes)
                sio.write("*{select[craylinux && vnode]span[")
                sio.write(f"ptile={nodesize}] cu[type=cabinet]}}'")

        ret = sio.getvalue()
        sio.close()
        return ret
Ejemplo n.º 8
0
    def rocoto_resources(self, spec, indent=0):
        sio = StringIO()
        space = self.indent_text
        if not isinstance(spec, JobResourceSpec):
            spec = JobResourceSpec(spec)

        if spec[0].get('walltime', ''):
            dt = tools.to_timedelta(spec[0]['walltime'])
            dt = dt.total_seconds()
            hours = int(dt // 3600)
            minutes = int((dt % 3600) // 60)
            seconds = int(math.floor(dt % 60))
            sio.write(
                f'{indent*space}<walltime>{hours}:{minutes:02d}:{seconds:02d}</walltime>\n'
            )

        # Handle memory.
        if spec[0].is_exclusive() and spec[0].get('batch_memory', ''):
            bytes = tools.memory_in_bytes(spec[0]['batch_memory'])
        elif not spec[0].is_exclusive() and spec[0].get('compute_memory', ''):
            bytes = tools.memory_in_bytes(spec[0]['compute_memory'])
        elif spec[0].get('memory', ''):
            bytes = tools.memory_in_bytes(spec[0]['memory'])
        else:
            bytes = 2000 * 1048576.

        megabytes = int(math.ceil(bytes / 1048576.))

        sio.write(f'{indent*space}<memory>{megabytes:d}M</memory>\n')

        if 'outerr' in spec:
            sio.write(f'{indent*space}<join>{spec["outerr"]}</join>\n')
        else:
            if 'stdout' in spec:
                sio.write('{indent*space}<stdout>{spec["stdout"]}</stdout>\n')
            if 'stderr' in spec:
                sio.write('{indent*space}<stderr>{spec["stderr"]}</stderr>\n')

        nodesize = max([self.nodes.node_size(r) for r in spec])
        requested_nodes = 1

        if spec[0].is_exclusive() is False:
            # Shared program.  This requires a different batch card syntax
            nranks = max(1, spec.total_ranks())
            sio.write(
                f'{indent*space}<cores>{max(1,spec.total_ranks())}</cores>\n'
                f'{indent*space}<shared></shared>\n')
        else:
            if not spec.is_pure_serial() and not spec.is_pure_openmp():
                # This is an MPI program.
                nodes_ranks = self.nodes.to_nodes_ppn(spec)
                requested_nodes = sum([n for n, p in nodes_ranks])

            nodes_ranks = self.nodes.to_nodes_ppn(
                spec, can_merge_ranks=lambda x, y: False)

            sio.write(indent*space+'<nodes>' \
                + '+'.join([f'{max(n,1)}:ppn={max(p,1)}' for n,p in nodes_ranks ]) \
                + '</nodes>\n')

            #sio.write(f'{indent*space}<nodes>{requested_nodes}:ppn={nodesize}</nodes>')
        ret = sio.getvalue()
        sio.close()
        return ret