Beispiel #1
0
def stdouterr_redirected(to=os.devnull, comm=None):
    '''
    Based on http://stackoverflow.com/questions/5081657

    import os

    with stdouterr_redirected(to=filename):
        print("from Python")
        os.system("echo non-Python applications are also supported")
    '''
    fd = sys.stdout.fileno()
    fde = sys.stderr.fileno()

    ##### assert that Python and C stdio write using the same file descriptor
    ####assert libc.fileno(ctypes.c_void_p.in_dll(libc, "stdout")) == fd == 1

    def _redirect_stdout(to):
        sys.stdout.close()  # + implicit flush()
        os.dup2(to.fileno(), fd)  # fd writes to 'to' file
        sys.stdout = os.fdopen(fd, 'w')  # Python writes to fd
        sys.stderr.close()  # + implicit flush()
        os.dup2(to.fileno(), fde)  # fd writes to 'to' file
        sys.stderr = os.fdopen(fde, 'w')  # Python writes to fd
        # update desi logging to use new stdout
        log = get_logger()
        while len(log.handlers) > 0:
            h = log.handlers[0]
            log.removeHandler(h)
        # Add the current stdout.
        ch = logging.StreamHandler(sys.stdout)
        formatter = logging.Formatter(
            '%(levelname)s:%(filename)s:%(lineno)s:%(funcName)s: %(message)s')
        ch.setFormatter(formatter)
        log.addHandler(ch)

    with os.fdopen(os.dup(fd), 'w') as old_stdout:
        if comm is None:
            with open(to, 'w') as file:
                _redirect_stdout(to=file)
        else:
            for p in range(comm.size):
                if p == comm.rank:
                    with open(to, 'w') as file:
                        _redirect_stdout(to=file)
                comm.barrier()
        try:
            if (comm is None) or (comm.rank == 0):
                log.info("Begin log redirection to {} at {}".format(
                    to, time.asctime()))
            sys.stdout.flush()
            yield  # allow code to be run with the redirected stdout
        finally:
            if (comm is None) or (comm.rank == 0):
                log.info("End log redirection to {} at {}".format(
                    to, time.asctime()))
            sys.stdout.flush()
            _redirect_stdout(to=old_stdout)  # restore stdout.
            # buffering and flags such as
            # CLOEXEC may be different
    return
Beispiel #2
0
def stdouterr_redirected(to=os.devnull, comm=None):
    '''
    Based on http://stackoverflow.com/questions/5081657

    import os

    with stdouterr_redirected(to=filename):
        print("from Python")
        os.system("echo non-Python applications are also supported")
    '''
    fd = sys.stdout.fileno()
    fde = sys.stderr.fileno()

    ##### assert that Python and C stdio write using the same file descriptor
    ####assert libc.fileno(ctypes.c_void_p.in_dll(libc, "stdout")) == fd == 1

    def _redirect_stdout(to):
        sys.stdout.close() # + implicit flush()
        os.dup2(to.fileno(), fd) # fd writes to 'to' file
        sys.stdout = os.fdopen(fd, 'w') # Python writes to fd
        sys.stderr.close() # + implicit flush()
        os.dup2(to.fileno(), fde) # fd writes to 'to' file
        sys.stderr = os.fdopen(fde, 'w') # Python writes to fd
        # update desi logging to use new stdout
        log = get_logger()
        while len(log.handlers) > 0:
            h = log.handlers[0]
            log.removeHandler(h)
        # Add the current stdout.
        ch = logging.StreamHandler(sys.stdout)
        formatter = logging.Formatter('%(levelname)s:%(filename)s:%(lineno)s:%(funcName)s: %(message)s')
        ch.setFormatter(formatter)
        log.addHandler(ch)

    with os.fdopen(os.dup(fd), 'w') as old_stdout:
        if comm is None:
            with open(to, 'w') as file:
                _redirect_stdout(to=file)
        else:
            for p in range(comm.size):
                if p == comm.rank:
                    with open(to, 'w') as file:
                        _redirect_stdout(to=file)
                comm.barrier()
        try:
            if (comm is None) or (comm.rank == 0):
                log.info("Begin log redirection to {} at {}".format(to, time.asctime()))
            sys.stdout.flush()
            yield # allow code to be run with the redirected stdout
        finally:
            if (comm is None) or (comm.rank == 0):
                log.info("End log redirection to {} at {}".format(to, time.asctime()))
            sys.stdout.flush()
            _redirect_stdout(to=old_stdout) # restore stdout.
                                            # buffering and flags such as
                                            # CLOEXEC may be different
    return
Beispiel #3
0
def runcmd(cmd, args=None, inputs=[], outputs=[], clobber=False):
    """
    Runs a command, checking for inputs and outputs

    Args:
        cmd : command string to run with os.system()
        inputs : list of filename inputs that must exist before running
        outputs : list of output filenames that should be created
        clobber : if True, run even if outputs already exist

    Returns:
        error code from command or input/output checking; 0 is good

    TODO:
        Should it raise an exception instead?

    Notes:
        If any inputs are missing, don't run cmd.
        If outputs exist and have timestamps after all inputs, don't run cmd.

    """
    log = desispec.log.get_logger()
    #- Check that inputs exist
    err = 0
    input_time = 0  #- timestamp of latest input file
    for x in inputs:
        if not os.path.exists(x):
            log.error("missing input " + x)
            err = 1
        else:
            input_time = max(input_time, os.stat(x).st_mtime)

    if err > 0:
        return err

    #- Check if outputs already exist and that their timestamp is after
    #- the last input timestamp
    already_done = (not clobber) and (len(outputs) > 0)
    if not clobber:
        for x in outputs:
            if not os.path.exists(x):
                already_done = False
                break
            if len(inputs) > 0 and os.stat(x).st_mtime < input_time:
                already_done = False
                break

    if already_done:
        log.info("SKIPPING: {}".format(cmd))
        return 0

    #- Green light to go; print input/output info
    #- Use log.level to decide verbosity, but avoid long prefixes
    log.info(time.asctime())
    log.info("RUNNING: {}".format(cmd))
    if log.level <= desispec.log.INFO:
        if len(inputs) > 0:
            print("  Inputs")
            for x in inputs:
                print("   ", x)
        if len(outputs) > 0:
            print("  Outputs")
            for x in outputs:
                print("   ", x)

    #- run command
    if callable(cmd):
        if args is None:
            return cmd()
        else:
            return cmd(*args)
    else:
        if args is None:
            err = os.system(cmd)
        else:
            raise ValueError("Don't provide args unless cmd is function")

    log.info(time.asctime())
    if err > 0:
        log.critical("FAILED {}".format(cmd))
        return err

    #- Check for outputs
    err = 0
    for x in outputs:
        if not os.path.exists(x):
            log.error("missing output " + x)
            err = 2
    if err > 0:
        return err

    log.info("SUCCESS: {}".format(cmd))
    return 0
Beispiel #4
0
def runcmd(cmd, args=None, inputs=[], outputs=[], clobber=False):
    """
    Runs a command, checking for inputs and outputs

    Args:
        cmd : command string to run with os.system()
        inputs : list of filename inputs that must exist before running
        outputs : list of output filenames that should be created
        clobber : if True, run even if outputs already exist

    Returns:
        error code from command or input/output checking; 0 is good

    TODO:
        Should it raise an exception instead?

    Notes:
        If any inputs are missing, don't run cmd.
        If outputs exist and have timestamps after all inputs, don't run cmd.

    """
    log = desispec.log.get_logger()
    #- Check that inputs exist
    err = 0
    input_time = 0  #- timestamp of latest input file
    for x in inputs:
        if not os.path.exists(x):
            log.error("missing input "+x)
            err = 1
        else:
            input_time = max(input_time, os.stat(x).st_mtime)

    if err > 0:
        return err

    #- Check if outputs already exist and that their timestamp is after
    #- the last input timestamp
    already_done = (not clobber) and (len(outputs) > 0)
    if not clobber:
        for x in outputs:
            if not os.path.exists(x):
                already_done = False
                break
            if len(inputs)>0 and os.stat(x).st_mtime < input_time:
                already_done = False
                break

    if already_done:
        log.info("SKIPPING: {}".format(cmd))
        return 0

    #- Green light to go; print input/output info
    #- Use log.level to decide verbosity, but avoid long prefixes
    log.info(time.asctime())
    log.info("RUNNING: {}".format(cmd))
    if log.level <= desispec.log.INFO:
        if len(inputs) > 0:
            print("  Inputs")
            for x in inputs:
                print("   ", x)
        if len(outputs) > 0:
            print("  Outputs")
            for x in outputs:
                print("   ", x)

    #- run command
    if callable(cmd):
        if args is None:
            return cmd()
        else:
            return cmd(*args)
    else:
        if args is None:
            err = os.system(cmd)
        else:
            raise ValueError("Don't provide args unless cmd is function")
                
    
    log.info(time.asctime())
    if err > 0:
        log.critical("FAILED {}".format(cmd))
        return err

    #- Check for outputs
    err = 0
    for x in outputs:
        if not os.path.exists(x):
            log.error("missing output "+x)
            err = 2
    if err > 0:
        return err

    log.info("SUCCESS: {}".format(cmd))
    return 0
Beispiel #5
0
def run_steps(first, last, rawdir, proddir, spectrographs=None, nightstr=None, comm=None):
    '''
    Run multiple sequential pipeline steps.

    

    This function first takes the communicator and the requested processes
    per task and splits the communicator to form groups of processes of
    the desired size.  It then takes the full dependency graph and extracts 
    all the tasks for a given step.  These tasks are then distributed among
    the groups of processes.

    Each process group loops over its assigned tasks.  For each task, it
    redirects stdout/stderr to a per-task file and calls run_task().  If
    any process in the group throws an exception, then the traceback and
    all information (graph and options) needed to re-run the task are written
    to disk.

    After all process groups have finished, the state of the full graph is
    merged from all processes.  This way a failure of one process on one task
    will be propagated as a failed task to all processes.

    Args:
        step (str): the pipeline step to process.
        rawdir (str): the path to the raw data directory.
        proddir (str): the path to the production directory.
        grph (dict): the dependency graph.
        opts (dict): the global options.
        comm (mpi4py.Comm): the full communicator to use for whole step.
        taskproc (int): the number of processes to use for a single task.

    Returns:
        Nothing.
    '''
    log = get_logger()

    rank = 0
    nproc = 1
    if comm is not None:
        rank = comm.rank
        nproc = comm.size

    # get the full graph

    grph = None
    if rank == 0:
        grph = graph_read_prod(proddir, nightstr=nightstr, spectrographs=spectrographs)
        prod_state(rawdir, proddir, grph)
    if comm is not None:
        grph = comm.bcast(grph, root=0)

    # read run options from disk

    rundir = os.path.join(proddir, "run")
    optfile = os.path.join(rundir, "options.yaml")
    opts = None
    if rank == 0:
        opts = read_options(optfile)
    if comm is not None:
        opts = comm.bcast(opts, root=0)

    # compute the ordered list of steps to run

    firststep = None
    if first is None:
        firststep = 0
    else:
        s = 0
        for st in run_step_types:
            if st == first:
                firststep = s
            s += 1

    laststep = None
    if last is None:
        laststep = len(run_step_types)
    else:
        s = 1
        for st in run_step_types:
            if st == last:
                laststep = s
            s += 1

    if rank == 0:
        log.info("running steps {} to {}".format(run_step_types[firststep], run_step_types[laststep-1]))

    # Assign the desired number of processes per task

    steptaskproc = {}
    for st in run_step_types:
        steptaskproc[st] = 1

    steptaskproc['bootcalib'] = 1
    steptaskproc['specex'] = 20
    steptaskproc['psfcombine'] = 1
    steptaskproc['extract'] = 20
    steptaskproc['fiberflat'] = 1
    steptaskproc['sky'] = 1
    steptaskproc['stdstars'] = 1
    steptaskproc['fluxcal'] = 1
    steptaskproc['procexp'] = 1
    steptaskproc['zfind'] = 48

    jobid = None
    if rank == 0:
        if 'SLURM_JOBID' in os.environ.keys():
            jobid = "slurm-{}".format(os.environ['SLURM_JOBID'])
        else:
            jobid = os.getpid()

    statefile = None
    statedot = None
    if rank == 0:
        stateroot = "state_{}-{}_{}".format(run_step_types[firststep], run_step_types[laststep-1], jobid)
        statefile = os.path.join(rundir, "{}.yaml".format(stateroot))
        statedot = os.path.join(rundir, "{}.dot".format(stateroot))

    # Mark our steps as in progress

    for st in range(firststep, laststep):
        for name, nd in grph.items():
            if nd['type'] in step_file_types[run_step_types[st]]:
                if 'state' in nd.keys():
                    if nd['state'] != 'done':
                        graph_mark(grph, name, 'wait')
                else:
                    graph_mark(grph, name, 'wait')

    if rank == 0:
        graph_write(statefile, grph)
        with open(statedot, 'w') as f:
            graph_dot(grph, f)

    # Run the steps.  Each step updates the graph in place to track
    # the state of all nodes.

    for st in range(firststep, laststep):
        runfile = None
        if rank == 0:
            log.info("starting step {} at {}".format(run_step_types[st], time.asctime()))
        taskproc = steptaskproc[run_step_types[st]]
        if taskproc > nproc:
            taskproc = nproc
        grph = run_step(run_step_types[st], rawdir, proddir, grph, opts, comm=comm, taskproc=taskproc)
        if comm is not None:
            comm.barrier()
        if rank == 0:
            log.info("completed step {} at {}".format(run_step_types[st], time.asctime()))
        if rank == 0:
            graph_write(statefile, grph)
            with open(statedot, 'w') as f:
                graph_dot(grph, f)
            log.info("finished steps {} to {}".format(run_step_types[firststep], run_step_types[laststep-1]))

    return
Beispiel #6
0
def run_steps(first, last, rawdir, proddir, spectrographs=None, nightstr=None, comm=None):
    log = get_logger()

    rank = 0
    nproc = 1
    if comm is not None:
        rank = comm.rank
        nproc = comm.size

    # get the full graph

    grph = None
    if rank == 0:
        grph = graph_read_prod(proddir, nightstr=nightstr, spectrographs=spectrographs)
        prod_state(rawdir, proddir, grph)
    if comm is not None:
        grph = comm.bcast(grph, root=0)

    # read run options from disk

    rundir = os.path.join(proddir, "run")
    optfile = os.path.join(rundir, "options.yaml")
    opts = None
    if rank == 0:
        opts = read_options(optfile)
    if comm is not None:
        opts = comm.bcast(opts, root=0)

    # compute the ordered list of steps to run

    firststep = None
    if first is None:
        firststep = 0
    else:
        s = 0
        for st in run_step_types:
            if st == first:
                firststep = s
            s += 1

    laststep = None
    if last is None:
        laststep = len(run_step_types)
    else:
        s = 1
        for st in run_step_types:
            if st == last:
                laststep = s
            s += 1

    if rank == 0:
        log.info("running steps {} to {}".format(run_step_types[firststep], run_step_types[laststep-1]))

    # Assign the desired number of processes per task

    steptaskproc = {}
    for st in run_step_types:
        steptaskproc[st] = 1

    steptaskproc['bootcalib'] = 1
    steptaskproc['specex'] = 20
    steptaskproc['psfcombine'] = 1
    steptaskproc['extract'] = 20
    steptaskproc['fiberflat'] = 1
    steptaskproc['sky'] = 1
    steptaskproc['stdstars'] = 1
    steptaskproc['fluxcal'] = 1
    steptaskproc['procexp'] = 1
    steptaskproc['zfind'] = 48

    jobid = None
    if rank == 0:
        if 'SLURM_JOBID' in os.environ.keys():
            jobid = "slurm-{}".format(os.environ['SLURM_JOBID'])
        else:
            jobid = os.getpid()

    statefile = None
    statedot = None
    if rank == 0:
        stateroot = "state_{}-{}_{}".format(run_step_types[firststep], run_step_types[laststep-1], jobid)
        statefile = os.path.join(rundir, "{}.yaml".format(stateroot))
        statedot = os.path.join(rundir, "{}.dot".format(stateroot))

    # Mark our steps as in progress

    for st in range(firststep, laststep):
        for name, nd in grph.items():
            if nd['type'] in step_file_types[run_step_types[st]]:
                if 'state' in nd.keys():
                    if nd['state'] != 'done':
                        graph_mark(grph, name, 'wait')
                else:
                    graph_mark(grph, name, 'wait')

    if rank == 0:
        graph_write(statefile, grph)
        with open(statedot, 'w') as f:
            graph_dot(grph, f)

    # Run the steps.  Each step updates the graph in place to track
    # the state of all nodes.

    for st in range(firststep, laststep):
        runfile = None
        if rank == 0:
            log.info("starting step {} at {}".format(run_step_types[st], time.asctime()))
        taskproc = steptaskproc[run_step_types[st]]
        if taskproc > nproc:
            taskproc = nproc
        grph = run_step(run_step_types[st], rawdir, proddir, grph, opts, comm=comm, taskproc=taskproc)
        if comm is not None:
            comm.barrier()
        if rank == 0:
            log.info("completed step {} at {}".format(run_step_types[st], time.asctime()))
        if rank == 0:
            graph_write(statefile, grph)
            with open(statedot, 'w') as f:
                graph_dot(grph, f)
            log.info("finished steps {} to {}".format(run_step_types[firststep], run_step_types[laststep-1]))

    return
Beispiel #7
0
def run_steps(first,
              last,
              rawdir,
              proddir,
              spectrographs=None,
              nightstr=None,
              comm=None):
    '''
    Run multiple sequential pipeline steps.

    This function first takes the communicator and the requested processes
    per task and splits the communicator to form groups of processes of
    the desired size.  It then takes the full dependency graph and extracts 
    all the tasks for a given step.  These tasks are then distributed among
    the groups of processes.

    Each process group loops over its assigned tasks.  For each task, it
    redirects stdout/stderr to a per-task file and calls run_task().  If
    any process in the group throws an exception, then the traceback and
    all information (graph and options) needed to re-run the task are written
    to disk.

    After all process groups have finished, the state of the full graph is
    merged from all processes.  This way a failure of one process on one task
    will be propagated as a failed task to all processes.

    Args:
        step (str): the pipeline step to process.
        rawdir (str): the path to the raw data directory.
        proddir (str): the path to the production directory.
        grph (dict): the dependency graph.
        opts (dict): the global options.
        comm (mpi4py.Comm): the full communicator to use for whole step.
        taskproc (int): the number of processes to use for a single task.

    Returns:
        Nothing.
    '''
    log = get_logger()

    rank = 0
    nproc = 1
    if comm is not None:
        rank = comm.rank
        nproc = comm.size

    # get the full graph

    grph = None
    if rank == 0:
        grph = graph_read_prod(proddir,
                               nightstr=nightstr,
                               spectrographs=spectrographs)
        prod_state(rawdir, proddir, grph)
    if comm is not None:
        grph = comm.bcast(grph, root=0)

    # read run options from disk

    rundir = os.path.join(proddir, "run")
    optfile = os.path.join(rundir, "options.yaml")
    opts = None
    if rank == 0:
        opts = read_options(optfile)
    if comm is not None:
        opts = comm.bcast(opts, root=0)

    # compute the ordered list of steps to run

    firststep = None
    if first is None:
        firststep = 0
    else:
        s = 0
        for st in run_step_types:
            if st == first:
                firststep = s
            s += 1

    laststep = None
    if last is None:
        laststep = len(run_step_types)
    else:
        s = 1
        for st in run_step_types:
            if st == last:
                laststep = s
            s += 1

    if rank == 0:
        log.info("running steps {} to {}".format(run_step_types[firststep],
                                                 run_step_types[laststep - 1]))

    # Assign the desired number of processes per task

    steptaskproc = {}
    for st in run_step_types:
        steptaskproc[st] = 1

    steptaskproc['bootcalib'] = 1
    steptaskproc['specex'] = 20
    steptaskproc['psfcombine'] = 1
    steptaskproc['extract'] = 20
    steptaskproc['fiberflat'] = 1
    steptaskproc['sky'] = 1
    steptaskproc['stdstars'] = 1
    steptaskproc['fluxcal'] = 1
    steptaskproc['procexp'] = 1
    steptaskproc['zfind'] = 48

    jobid = None
    if rank == 0:
        if 'SLURM_JOBID' in os.environ:
            jobid = "slurm-{}".format(os.environ['SLURM_JOBID'])
        else:
            jobid = os.getpid()

    statefile = None
    statedot = None
    if rank == 0:
        stateroot = "state_{}-{}_{}".format(run_step_types[firststep],
                                            run_step_types[laststep - 1],
                                            jobid)
        statefile = os.path.join(rundir, "{}.yaml".format(stateroot))
        statedot = os.path.join(rundir, "{}.dot".format(stateroot))

    # Mark our steps as in progress

    for st in range(firststep, laststep):
        for name, nd in grph.items():
            if nd['type'] in step_file_types[run_step_types[st]]:
                if 'state' in nd:
                    if nd['state'] != 'done':
                        graph_mark(grph, name, 'wait')
                else:
                    graph_mark(grph, name, 'wait')

    if rank == 0:
        graph_write(statefile, grph)
        with open(statedot, 'w') as f:
            graph_dot(grph, f)

    # Run the steps.  Each step updates the graph in place to track
    # the state of all nodes.

    for st in range(firststep, laststep):
        runfile = None
        if rank == 0:
            log.info("starting step {} at {}".format(run_step_types[st],
                                                     time.asctime()))
        taskproc = steptaskproc[run_step_types[st]]
        if taskproc > nproc:
            taskproc = nproc

        grph, ntask, failtask = run_step(run_step_types[st],
                                         rawdir,
                                         proddir,
                                         grph,
                                         opts,
                                         comm=comm,
                                         taskproc=taskproc)

        if rank == 0:
            log.info("completed step {} at {}".format(run_step_types[st],
                                                      time.asctime()))
            log.info("  {} total tasks, {} failures".format(ntask, failtask))
            graph_write(statefile, grph)
            with open(statedot, 'w') as f:
                graph_dot(grph, f)

        if ntask == failtask:
            if rank == 0:
                log.info("step {}: all tasks failed, quiting at {}".format(
                    run_step_types[st], time.asctime()))
            break

        if comm is not None:
            comm.barrier()

    if rank == 0:
        log.info("finished steps {} to {}".format(
            run_step_types[firststep], run_step_types[laststep - 1]))

    return