def ncrcat(input,output,kwargs={}): '''Call `ncrcat` via task_manager Parameters ---------- input : list list of files to concatenate output : str output file kwargs : dict, optional dictionary of keyword arguments to task_manager.submit ''' kwargs['modules'] = ['nco'] kwargs['module_purge'] = False if 'memory' not in kwargs: kwargs['memory'] = '100GB' (fid,tmpfile) = tempfile.mkstemp('.filelist') with open(tmpfile,'w') as fid: for f in input: fid.write('%s\n'%f) jid = tm.submit(['cat',tmpfile,'|','ncrcat','-o',output],**kwargs) return jid
def ncop_chunktime(script, kwargs, chunk_size, start=0, stop=None, clobber=False, cleanup=True): ''' run script on time segments within a file and concatenate results ''' jid_list = [] def op_one_chunk(tnx): #-- intermediate output file file_out_i = file_out + '.tnx.%d-%d' % (tnx) #-- update input arguments kwargs.update({'dimsub': {'time': tnx}, 'file_out': file_out_i}) #-- submit print '\'{0}\''.format(json.dumps(kwargs)) if not os.path.exists(file_out_i) or clobber: jid = tm.submit([script, '\'{0}\''.format(json.dumps(kwargs))]) jid_list.extend(jid) return file_out_i file_out = copy.copy(kwargs['file_out']) if not os.path.exists(file_out) or clobber: #-- get time chunks if stop is None: ds = xr.open_dataset(kwargs['file_in'], decode_times=False, decode_coords=False) stop = len(ds.time) time_chunks = gen_time_chunks(start, stop, chunk_size) #-- operate on each chunk file_cat = [op_one_chunk(tnx) for tnx in time_chunks] #-- concatenate files jid = ncrcat(file_cat, file_out, depjob=jid_list) if cleanup: tm.submit(['rm', '-f', ' '.join(file_cat)], depjob=jid) return jid_list
def ncrcat(input, output, depjob=[]): ''' call ncrcat ''' (fid, tmpfile) = tempfile.mkstemp('.filelist') with open(tmpfile, 'w') as fid: for f in input: fid.write('%s\n' % f) jid = tm.submit(['cat', tmpfile, '|', 'ncrcat', '-o', output], depjob=depjob) return jid
def op_one_chunk(tnx): #-- intermediate output file file_out_i = file_out + '.tnx.%d-%d' % (tnx) #-- update input arguments kwargs.update({'dimsub': {'time': tnx}, 'file_out': file_out_i}) #-- submit print '\'{0}\''.format(json.dumps(kwargs)) if not os.path.exists(file_out_i) or clobber: jid = tm.submit([script, '\'{0}\''.format(json.dumps(kwargs))]) jid_list.extend(jid) return file_out_i
file_out_i = file_out+'.tnx.%d-%d'%(tnx) if os.path.exists(file_out_i) and not clobber: return file_out_i #-- update input arguments if isel in kwargs: kwargs['isel'] = kwargs['isel'].update({'time':tnx}) else: kwargs['isel'] = {'time':tnx} kwargs.update('file_out': file_out_i}) #-- submit print(json_cmd(kwargs)) jid = tm.submit([script,json_cmd(kwargs)],**submit_kwargs_i) jid_list.append(jid) return file_out_i #-- get stopping index if stop is None: stop = len(xr.open_dataset(kwargs['file_in'], decode_times=False, decode_coords=False).time) time_chunks = gen_time_chunks(start,stop,chunk_size) #-- operate on each chunk file_cat = [_apply_one_chunk(tnx) for tnx in time_chunks]
'output_dir': odir, 'max_depth': max_depth, 'clobber': clobber } script = 'assemble_wod_database_nc.py' files_out = [] for file_in in files: file_out = os.path.join(odir, os.path.basename(file_in) + '.nc') files_out.append(file_out) if not os.path.exists(file_out) or clobber: kwargs = settings.copy() kwargs.update({'file_in': file_in, 'file_out': file_out}) jid = tm.submit([script, '\'{0}\''.format(json.dumps(kwargs))]) tm.wait() #-- if 2 phases, resubmit this script and exit if split_phase and tm.total_elapsed_time() > 1: print('resubmitting myself') call('bsub < run_global_req_temp_salt_o2.py', shell=True) sys.exit(0) #----------------------------------------------------------------- #-- make gridded #----------------------------------------------------------------- clobber = False
'q': 'caldera', 'm': '12000' } bset_lg = { 'q': 'geyser', 'm': '300000' } # stop script (graceful exit) and wait on jobs after: tm.QUEUE_MAX_HOURS = 1. / 3600. # max number of jobs in queue tm.MAXJOBS = 50 # loop and submit jobs = [] for i in range(1, 3): jid = tm.submit(['sleep', '10']) jobs.extend(jid) time.sleep(2) # submit dependent job print('dep job') jid = tm.submit(['ls', '-1'], depjob=jobs) # wait on all jobs ok = tm.wait() # report status print 'OK (False indicates an error): ' + str(ok)