def check_rst(date,cf,wait=900): '''check rst for forecast, and wait for tdiff''' FA='a' date0=dateu.next_date(date,-1) rst=opt.nameof('out','rst',date=date0,FA=FA,cf=cf) # check also if run terminated! The rst may not have the last reccord yet! rout=opt.nameof('out','rout',date=date0,FA=FA,cf=cf) if os.path.isfile(rst) and is_roms_out_ok(rout): return True else: now = dateu.currday() tdiff = dateu.date_diff(date,now) print "waiting for rst" while tdiff.days < 1.5: time.sleep(wait) sys.stdout.write('.') now = dateu.currday() tdiff = dateu.date_diff(date,now) cond=os.path.isfile(rst) and is_roms_out_ok(rout) print " rst file ready = ",cond,' at ',now, tdiff if cond: return True return os.path.isfile(rst)
def status(self,quiet=False): # reload: res=self.load() if not res: if not quiet: print 'log file not found for %s %s' % (self.date,self.FA) return 'unavailable' if self.dt==-1: # model didnt finnished # check its status if self.job_type=='local': res=cb.run0('ps '+str(self.pid)) if len(res)==2: dt=time.time()-self.tstart msg='Model running for %.2f min' % (dt/60.) Status='running' else: Status='terminated' # check model stdout: if rt.is_roms_out_ok(self.rout): msg='Model terminated with SUCCESS (??)' else: msg='Model terminated with ERROR' else: # job type is queue c=Popen(('qstat',self.pid),stdout=PIPE,stderr=PIPE) o,e=c.communicate() if e: Status='terminated' if rt.is_roms_out_ok(self.rout): msg='Model terminated with SUCCESS (??)' else: msg='Model terminated with ERROR' else: queue_status=o.split()[-2] if queue_status=='R': Status='running' else: Status='submitted' dt=time.time()-self.tstart msg='Model is '+Status+' for %.2f min' % (dt/60.) else: Status='finnished' msg='Model finnished in %.2f min' % self.dt if rt.is_roms_out_ok(self.rout): msg+=' with SUCCESS' else: msg+=' with ERROR' if not quiet: print msg return Status
def status_out(self,quiet=False): if not self.rout: if not quiet: print 'log file not found for %s %s' % (self.date,self.FA) return 0,0,0 data=rt.roms_read_out(self.rout) time=data[0] # check model ended: # time may not have the full time since print to roms.out may not occur at every time step # get ntimes and dt to get the right time simulated: for l in open(self.rout).readlines(): if l.find(' ntimes ')>-1: ntimes = int(l.split()[0]) if l.find(' dt ')>-1: dt = float(l.split()[0]) break expected=ntimes*dt/86400. if rt.is_roms_out_ok(self.rout): time1=time[0]+expected else: time1=time[-1] if not quiet: print 'Model runned from day %.2f to %.2f' % (time[0], time1) return time[0],time1, expected
def oof(cf,plconf,date=False,last_date=False,FA='a',env=False): # start email notifications service: emailInfo=opt.email_info(cf=cf) sendEmail=emailInfo['send'] #if sendEmail: sys.stdout=opt.Redirect() env_vars(env) flags=opt.flags_info(cf) if date: date=dateu.parse_date(date) if last_date: last_date=dateu.parse_date(last_date) if not date: # find date-1 for prediction: date,file=find_last(type='rst',cf=cf) if not date: on_error(sendEmail,'ERROR (%s): Cannot find previous file'%FA,emailInfo) return else: print 'Last date = %s from file %s' % (date,file) rout=opt.nameof('out','rout',date=date,FA='a',cf=cf) if is_roms_out_ok(rout): print 'Previous roms out is ok: %s' % rout else: on_error(sendEmail,'ERROR (%s): Last run is not ok %s : %s' % (FA,date,rout),emailInfo) return else: date=dateu.next_date(date,-1) # read dates: start_date,end_date=opt.dates_info(cf) if last_date: end_date=dateu.next_date(last_date,+1) while date >= start_date and date < end_date: # read dates again, possible update may occur. start_date,end_date=opt.dates_info(cf) if last_date: end_date=dateu.next_date(last_date,+1) date=dateu.next_date(date) # check if already runned for that date: # ie, check for rst and check if roms_out is ok: rst=opt.nameof('out','rst',date=date,FA=FA,cf=cf) rout=opt.nameof('out','rout',date=date,FA=FA,cf=cf) if os.path.isfile(rst): print 'Found rst file for %s: %s' % (date,rst) if os.path.isfile(rout): if is_roms_out_ok(rout): print ' previous roms out is ok: %s' % rout else: on_error(sendEmail,'ERROR (%s): Previous roms out is NOT ok: %s' % (FA,rout),emailInfo) break else: print ' roms out for %s not found: NOT CHECKED' % date else: print '\nModel will start from %s' % date # check for atm data for current simulation: if flags['atmfrc'] or flags['atmblk']: atmStat=check_atm(date,FA,cf=cf) else: atmStat=True ## wait for rst in case of fa==F: ##if FA=='f': rstStat=check_rst(date,cf=cf) ##else: rstStat=True rstStat=check_rst(date,cf=cf) # check for bondary data for current simulation: if flags['clmbry']: # this step may take forever !! just let us belive parent model is available #bcStat=check_bc(date,FA,cf=cf) bcStat=True else: bcStat=True now=time.strftime("%Y-%m-%d %H:%M:%S +0",time.gmtime()) if (not atmStat is False) and (not rstStat is False) and (not bcStat is False): rout,dt,runErr=run(date,FA,cf=cf) now=time.strftime("%Y-%m-%d %H:%M:%S +0",time.gmtime()) # check if run was ok: if is_roms_out_ok(rout): msg='NO error %s %s'%(date,FA) Msg=' Run %s %s finished ok [%s] dt=%6.2f' % (date,FA,now,dt) print Msg # make plots: if flags['plots']: err,savenames=op_plot.op_plt(cf,plconf,date,FA) if not all(e=='' for e in err): msg+=' --> ERROR plotting' print ' ERROR plotting : ', for e in err: print e if not all(e=='' for e in savenames): for sv in savenames: print ' Saved plot '+sv elif runErr: on_error(sendEmail,'ERROR (%s): Run %s returned the error msg: %s' % (FA,date,runErr),emailInfo) break else: on_error(sendEmail,'ERROR (%s): Run %s finished with ERROR [%s] dt=%6.2f' % (FA,date,now,dt),emailInfo) break elif atmStat is False: Msg='ERROR (%s): Run %s cannot run (atm data missing) ERROR [%s]' % (FA,date,FA,now) if FA=='a': on_error(sendEmail,Msg,emailInfo) break else: msg='ERROR: atm data missing' print Msg elif rstStat is False: msg='ERROR: rst data missing' Msg='ERROR (%s): Run %s cannot run (atm data missing) ERROR [%s]' % (FA,date,now) print Msg print '\n' if sendEmail: send_email.send(emailInfo['dest'],Msg,msg)
else: s='started' print ' %s %s %s %s [%s]' % (title,date,FA,s,now) # maybe no seed to send emails on every submission... #if sendEmail: send_email.send(emailInfo['dest'],sys.stdout.content,s) nAttempts=10 nAt=0 dtLim=2 # if run took less then dtLim min and was not successful, retry. # there is a bug here: job may be waiting more than 5 min in queue !! # then if not is_roms_out_ok the oof will stop. # The solution is to calc dt as the time since model execution ti=time.time() dtexc=0 rout='' while not is_roms_out_ok(rout) and nAt<nAttempts and dtexc<dtLim: nAt+=1 print 'Attempt ',nAt if flags['qsub']: status,rout,dtexc=__run_queue(fsub,date,FA,cf) # if forecast and job is in queue for too long (status 9), just dont run it! if status==9 and FA=='f': break else: status,rout,dtexc=__run_local(date,FA,cf) runErr='' if status!=0: print ' :: ERROR ',status runErr='status %d'% status