Exemple #1
0
  def _restore ():
#    if saveval is not None:
    _verbose(2,"restoring %s=%s"%(varname,saveval),sync=True);
    assign(varname,saveval,namespace=namespace,interpolate=False);
Exemple #2
0
  def _restore ():
#    if saveval is not None:
    _verbose(2,"restoring %s=%s"%(varname,saveval),sync=True);
    assign(varname,saveval,namespace=namespace,interpolate=False);
Exemple #3
0
def _per (varname,parallel,*commands):
  # default frame to look for vars is caller of caller
  frame = inspect.currentframe().f_back.f_back;
  namespace,vname = Pyxis.Internals._resolve_namespace(varname,frame=frame,default_namespace=Pyxis.Context);
  _verbose(2,"per(%s.%s)",namespace.get('__name__',"???") if namespace is not Pyxis.Context else "v",vname);
  saveval = namespace.get(vname,None);
  def _restore ():
#    if saveval is not None:
    _verbose(2,"restoring %s=%s"%(varname,saveval),sync=True);
    assign(varname,saveval,namespace=namespace,interpolate=False);
  varlist = namespace.get(vname+"_List",None);
  cmdlist = ",".join([ x if isinstance(x,str) else getattr(x,"__name__","?") for x in commands ]);
  persist = Pyxis.Context.get("PERSIST");
  fail_list = [];
  if varlist is None:
    _verbose(1,"per(%s,%s): %s_List is empty"%(varname,cmdlist,varname));
    return;
  try:
    if type(varlist) is str:
      varlist = map(_int_or_str,varlist.split(","));
    elif not isinstance(varlist,(list,tuple)):
      _abort("PYXIS: per(%s,%s): %s_List has invalid type %s"%(varname,cmdlist,str(type(varlist))));
    nforks = Pyxis.Context.get("JOBS",0);
    stagger = Pyxis.Context.get("JOB_STAGGER",0);
    # unforked case
    _verbose(1,"per(%s,%s,persist=%d): iterating over %s=%s"%(varname,cmdlist,1 if persist else 0,varname," ".join(map(str,varlist))));
    global _subprocess_id;
    if not parallel or nforks < 2 or len(varlist) < 2 or _subprocess_id is not None:
      # do the actual iteration
      for value in varlist:
        _verbose(1,"per-loop, setting %s=%s"%(varname,value));
        assign(vname,value,namespace=namespace,interpolate=False); 
        try:
          Pyxis.Internals.run(*commands);
        except (Exception,SystemExit,KeyboardInterrupt) as exc:
          if persist:
            _warn("exception raised for %s=%s:\n"%(vname,value),
                *traceback.format_exception(*sys.exc_info()));
            _warn("persistent mode is on (PERSIST=1), so continuing to end of %s_List"%vname)
            fail_list.append((value,str(exc)));
          else:
            raise;
      # any fails?
      if fail_list:
        _restore();
        _abort("per-loop failed for %s"%(",".join([f[0] for f in fail_list])));
    else:
      # else split varlist into forked subprocesses
      nforks = min(nforks,len(varlist));
      vpf = len(varlist)/nforks;
      # distribute N values per each fork
      subvals_list = [ varlist[i*vpf:(i+1)*vpf] for i in range(nforks) ];
      # if something is left over, assign to first few forks
      for i in range(nforks*vpf,len(varlist)):
        subvals_list[i-nforks*vpf].append(varlist[i]); 
      # how many vars per fork?
#      print [ len(sv) for sv in subvals_list ];
#      print subvals_list;
      _verbose(1,"splitting into %d jobs, up to %d %s's per job, staggered by %ds"%(len(subvals_list),len(subvals_list[0]),varname,stagger));
      Pyxis.Internals.flush_log();
      forked_pids = {};
      try:
        for job_id,subvals in enumerate(subvals_list):
          if job_id and stagger:
            time.sleep(stagger);
          # subvals is range of values to be iterated over by this subjob
          subval_str = ",".join(map(str,subvals));
          pid = os.fork();
          if not pid:
            # child fork: run commands
            _subprocess_id = job_id;
            _verbose(1,"started job %d for %s"%(job_id,", ".join(map(str,subvals))),sync=True);
            try:
              fail_list = [];
              for value in subvals:
                _verbose(1,"per-loop, setting %s=%s"%(varname,value),sync=True);
                assign(vname,value,namespace=namespace,interpolate=False);
                try:
                  Pyxis.Internals.run(*commands);
                except (Exception,SystemExit,KeyboardInterrupt) as exc:
                  if persist:
                    _warn("exception raised for %s=%s:\n"%(vname,value),
                        sync=True,*traceback.format_exception(*sys.exc_info()));
                    _warn("persistent mode is on (PERSIST=1), so continuing to end of %s_List"%vname,sync=True)
                    fail_list.append((value,str(exc)));
                  else:
                    raise;
              # any fails?
              if fail_list:
                _restore();
                _abort("per-loop failed for %s"%(", ".join([str(f[0]) for f in fail_list])),sync=True);
            except:
              traceback.print_exc();
              _verbose(2,"job #%d (pid %d: %s=%s) exiting with error code 1"%(_subprocess_id,os.getpid(),varname,value),sync=True);
              _restore();
              _verbose(2,"log is",Pyxis.Context['LOG'],sync=True);
              _error("per-loop failed for %s"%value,sync=True);
              sys.exit(1);
            _verbose(2,"job #%d (pid %d) exiting normally"%(_subprocess_id,os.getpid()),sync=True);
            sys.exit(0);
          else: # parent pid: append to list
            _verbose(2,"launched job #%d (%s=%s) with pid %d"%(job_id,varname,subval_str,pid),sync=True);
            forked_pids[pid] = job_id,subval_str;
        njobs = len(forked_pids);
        _verbose(1,"%d jobs launched, waiting for finish"%len(forked_pids),sync=True);
        failed = [];
        while forked_pids:
          pid,status = os.waitpid(-1,0);
          if pid in forked_pids:
            job_id,subval_str = forked_pids.pop(pid);
            status >>= 8;
            if status:
              failed.append((job_id,subval_str));
  #            success = False;
              _error("job #%d (%s=%s) exited with error status %d, waiting for %d more jobs to complete"%(job_id,varname,subval_str,status,len(forked_pids)),sync=True);
            else:
              _verbose(1,"job #%d (%s=%s) finished, waiting for %d more jobs to complete"%(job_id,varname,subval_str,len(forked_pids)),sync=True);
        if failed:
          _abort("%d of %d jobs have failed"%(len(failed),njobs),sync=True);
        else:     
          _verbose(1,"all jobs finished ok",sync=True);
      except KeyboardInterrupt:
        if _subprocess_id is None:
          _restore();
          _error("Caught Ctrl+C, waiting for %d jobs to exit"%len(forked_pids),sync=True);
          import signal;
          for pid in forked_pids.keys():
            os.kill(pid,signal.SIGINT);
          while forked_pids:
            pid,status = os.waitpid(-1,0);
            if pid in forked_pids:
              job_id,subval_str = forked_pids.pop(pid);
              _verbose(1,"job #%d (%s=%s) exited with error status %d, waiting for %d more"%
                  (job_id,varname,subval_str,status>>8,len(forked_pids)),sync=True);
        raise;
  finally:
    # note that children also execute this block with sys.exit()
    if _subprocess_id is None:
      _restore();
    Pyxis.Internals.flush_log();
Exemple #4
0
def _per (varname,parallel,*commands):
  # default frame to look for vars is caller of caller
  frame = inspect.currentframe().f_back.f_back;
  namespace,vname = Pyxis.Internals._resolve_namespace(varname,frame=frame,default_namespace=Pyxis.Context);
  _verbose(2,"per(%s.%s)",namespace.get('__name__',"???") if namespace is not Pyxis.Context else "v",vname);
  saveval = namespace.get(vname,None);
  def _restore ():
#    if saveval is not None:
    _verbose(2,"restoring %s=%s"%(varname,saveval),sync=True);
    assign(varname,saveval,namespace=namespace,interpolate=False);
  varlist = namespace.get(vname+"_List",None);
  cmdlist = ",".join([ x if isinstance(x,str) else getattr(x,"__name__","?") for x in commands ]);
  persist = Pyxis.Context.get("PERSIST");
  fail_list = [];
  if varlist is None:
    _verbose(1,"per(%s,%s): %s_List is empty"%(varname,cmdlist,varname));
    return;
  try:
    if type(varlist) is str:
      varlist = list(map(_int_or_str,varlist.split(",")));
    elif not isinstance(varlist,(list,tuple)):
      _abort("PYXIS: per(%s,%s): %s_List has invalid type %s"%(varname,cmdlist,str(type(varlist))));
    nforks = Pyxis.Context.get("JOBS",0);
    stagger = Pyxis.Context.get("JOB_STAGGER",0);
    # unforked case
    _verbose(1,"per(%s,%s,persist=%d): iterating over %s=%s"%(varname,cmdlist,1 if persist else 0,varname," ".join(map(str,varlist))));
    global _subprocess_id;
    if not parallel or nforks < 2 or len(varlist) < 2 or _subprocess_id is not None:
      # do the actual iteration
      for value in varlist:
        _verbose(1,"per-loop, setting %s=%s"%(varname,value));
        assign(vname,value,namespace=namespace,interpolate=False); 
        try:
          Pyxis.Internals.run(*commands);
        except (Exception,SystemExit,KeyboardInterrupt) as exc:
          if persist:
            _warn("exception raised for %s=%s:\n"%(vname,value),
                *traceback.format_exception(*sys.exc_info()));
            _warn("persistent mode is on (PERSIST=1), so continuing to end of %s_List"%vname)
            fail_list.append((value,str(exc)));
          else:
            raise;
      # any fails?
      if fail_list:
        _restore();
        _abort("per-loop failed for %s"%(",".join([f[0] for f in fail_list])));
    else:
      # else split varlist into forked subprocesses
      nforks = min(nforks,len(varlist));
      # create a queue for all variable values
      varqueue = multiprocessing.Queue(len(varlist))
      for x in varlist:
        varqueue.put(x)
      # distribute N values per each fork
      _verbose(1,"splitting into %d jobs by %s, staggered by %ds"%(nforks,varname,stagger));
      Pyxis.Internals.flush_log();
      forked_pids = {};
      try:
        for job_id in range(nforks):
          if job_id and stagger:
            time.sleep(stagger);
          # subvals is range of values to be iterated over by this subjob
          pid = os.fork();
          if not pid:
            # child fork: run commands while something is on queue
            _subprocess_id = job_id;
            _verbose(1,"started job %d"%job_id,sync=True);
            try:
              fail_list = []
              success_list = []
              while True:
                try:
                  value = varqueue.get(False)
                except queue.Empty:
                  break
                _verbose(1,"per-loop, setting %s=%s"%(varname,value),sync=True);
                assign(vname,value,namespace=namespace,interpolate=False);
                try:
                  Pyxis.Internals.run(*commands);
                  success_list.append(value)
                except (Exception,SystemExit,KeyboardInterrupt) as exc:
                  if persist:
                    _warn("exception raised for %s=%s:\n"%(vname,value),
                        sync=True,*traceback.format_exception(*sys.exc_info()));
                    _warn("persistent mode is on (PERSIST=1), so continuing to end of %s_List"%vname,sync=True)
                    fail_list.append((value,str(exc)));
                  else:
                    raise;
              # any successes?
              if success_list:
                _verbose(1,"job #%d (pid %d): per-loop succeeded for %s"%(_subprocess_id,pid,
                    ", ".join([str(f) for f in success_list])),sync=True)
              # any fails?
              if fail_list:
                _restore();
                _abort("job #%d (pid %d): per-loop failed for %s"%(_subprocess_id,pid,
                    ", ".join([str(f[0]) for f in fail_list])),sync=True)
            except:
              traceback.print_exc();
              _verbose(1,"job #%d (pid %d) aborted at %s=%s, exiting with error code 1"%(_subprocess_id,pid,varname,value),sync=True);
              _restore();
              _verbose(2,"logfile is",Pyxis.Context.get('LOG'),sync=True);
              _error("per-loop failed for %s"%value,sync=True);
              sys.exit(1);
            _verbose(2,"job #%d (pid %d) exiting normally"%(_subprocess_id,os.getpid()),sync=True);
            sys.exit(0);
          else: # parent pid: append to list
            _verbose(2,"launched job #%d with pid %d"%(job_id,pid),sync=True);
            forked_pids[pid] = job_id
        njobs = len(forked_pids);
        _verbose(1,"%d jobs launched, waiting for finish"%len(forked_pids),sync=True);
        failed = [];
        while forked_pids:
          pid,status = os.waitpid(-1,0);
          if pid in forked_pids:
            job_id = forked_pids.pop(pid);
            status >>= 8;
            if status:
              failed.append(job_id);
  #            success = False;
              _error("job #%d exited with error status %d, waiting for %d more jobs to complete"%(job_id,status,len(forked_pids)),sync=True);
            else:
              _verbose(1,"job #%d finished, waiting for %d more jobs to complete"%(job_id,len(forked_pids)),sync=True);
        if failed:
          _abort("%d of %d jobs have failed"%(len(failed),njobs),sync=True);
        else:     
          _verbose(1,"all jobs finished ok",sync=True);
      except KeyboardInterrupt:
        if _subprocess_id is None:
          _restore();
          _error("Caught Ctrl+C, waiting for %d jobs to exit"%len(forked_pids),sync=True);
          import signal;
          for pid in list(forked_pids.keys()):
            os.kill(pid,signal.SIGINT);
          while forked_pids:
            pid,status = os.waitpid(-1,0);
            if pid in forked_pids:
              job_id = forked_pids.pop(pid);
              _verbose(1,"job #%d exited with error status %d, waiting for %d more"%
                  (job_id,status>>8,len(forked_pids)),sync=True);
        raise;
  finally:
    # note that children also execute this block with sys.exit()
    if _subprocess_id is None:
      _restore();
    Pyxis.Internals.flush_log();