Example #1
0
def ncrcat(input,output,kwargs={}):
    '''Call `ncrcat` via task_manager

    Parameters
    ----------

    input : list
      list of files to concatenate
    output : str
      output file
    kwargs : dict, optional
      dictionary of keyword arguments to task_manager.submit
    '''

    kwargs['modules'] = ['nco']
    kwargs['module_purge'] = False
    if 'memory' not in kwargs:
        kwargs['memory'] = '100GB'

    (fid,tmpfile) = tempfile.mkstemp('.filelist')
    with open(tmpfile,'w') as fid:
        for f in input:
            fid.write('%s\n'%f)

    jid = tm.submit(['cat',tmpfile,'|','ncrcat','-o',output],**kwargs)
    return jid
Example #2
0
def ncop_chunktime(script,
                   kwargs,
                   chunk_size,
                   start=0,
                   stop=None,
                   clobber=False,
                   cleanup=True):
    '''
    run script on time segments within a file and concatenate results
    '''
    jid_list = []

    def op_one_chunk(tnx):
        #-- intermediate output file
        file_out_i = file_out + '.tnx.%d-%d' % (tnx)

        #-- update input arguments
        kwargs.update({'dimsub': {'time': tnx}, 'file_out': file_out_i})

        #-- submit
        print '\'{0}\''.format(json.dumps(kwargs))
        if not os.path.exists(file_out_i) or clobber:
            jid = tm.submit([script, '\'{0}\''.format(json.dumps(kwargs))])
            jid_list.extend(jid)
        return file_out_i

    file_out = copy.copy(kwargs['file_out'])

    if not os.path.exists(file_out) or clobber:
        #-- get time chunks
        if stop is None:
            ds = xr.open_dataset(kwargs['file_in'],
                                 decode_times=False,
                                 decode_coords=False)
            stop = len(ds.time)
        time_chunks = gen_time_chunks(start, stop, chunk_size)

        #-- operate on each chunk
        file_cat = [op_one_chunk(tnx) for tnx in time_chunks]

        #-- concatenate files
        jid = ncrcat(file_cat, file_out, depjob=jid_list)
        if cleanup:
            tm.submit(['rm', '-f', ' '.join(file_cat)], depjob=jid)

    return jid_list
Example #3
0
def ncrcat(input, output, depjob=[]):
    '''
    call ncrcat
    '''

    (fid, tmpfile) = tempfile.mkstemp('.filelist')
    with open(tmpfile, 'w') as fid:
        for f in input:
            fid.write('%s\n' % f)
        jid = tm.submit(['cat', tmpfile, '|', 'ncrcat', '-o', output],
                        depjob=depjob)
    return jid
Example #4
0
    def op_one_chunk(tnx):
        #-- intermediate output file
        file_out_i = file_out + '.tnx.%d-%d' % (tnx)

        #-- update input arguments
        kwargs.update({'dimsub': {'time': tnx}, 'file_out': file_out_i})

        #-- submit
        print '\'{0}\''.format(json.dumps(kwargs))
        if not os.path.exists(file_out_i) or clobber:
            jid = tm.submit([script, '\'{0}\''.format(json.dumps(kwargs))])
            jid_list.extend(jid)
        return file_out_i
Example #5
0
        file_out_i = file_out+'.tnx.%d-%d'%(tnx)

        if os.path.exists(file_out_i) and not clobber:
            return file_out_i

        #-- update input arguments
        if isel in kwargs:
            kwargs['isel'] = kwargs['isel'].update({'time':tnx})
        else:
            kwargs['isel'] = {'time':tnx}

        kwargs.update('file_out': file_out_i})

        #-- submit
        print(json_cmd(kwargs))
        jid = tm.submit([script,json_cmd(kwargs)],**submit_kwargs_i)
        jid_list.append(jid)

        return file_out_i


    #-- get stopping index
    if stop is None:
        stop = len(xr.open_dataset(kwargs['file_in'],
                                   decode_times=False,
                                   decode_coords=False).time)
    time_chunks = gen_time_chunks(start,stop,chunk_size)

    #-- operate on each chunk
    file_cat = [_apply_one_chunk(tnx) for tnx in time_chunks]
Example #6
0
    'output_dir': odir,
    'max_depth': max_depth,
    'clobber': clobber
}

script = 'assemble_wod_database_nc.py'
files_out = []
for file_in in files:

    file_out = os.path.join(odir, os.path.basename(file_in) + '.nc')
    files_out.append(file_out)

    if not os.path.exists(file_out) or clobber:
        kwargs = settings.copy()
        kwargs.update({'file_in': file_in, 'file_out': file_out})
        jid = tm.submit([script, '\'{0}\''.format(json.dumps(kwargs))])

tm.wait()

#-- if 2 phases, resubmit this script and exit
if split_phase and tm.total_elapsed_time() > 1:
    print('resubmitting myself')
    call('bsub < run_global_req_temp_salt_o2.py', shell=True)
    sys.exit(0)

#-----------------------------------------------------------------
#-- make gridded
#-----------------------------------------------------------------

clobber = False
Example #7
0
    'q': 'caldera',
    'm': '12000'
}
bset_lg = {
    'q': 'geyser',
    'm': '300000'
}

# stop script (graceful exit) and wait on jobs after:
tm.QUEUE_MAX_HOURS = 1. / 3600.

# max number of jobs in queue
tm.MAXJOBS = 50

# loop and submit
jobs = []
for i in range(1, 3):
    jid = tm.submit(['sleep', '10'])
    jobs.extend(jid)
    time.sleep(2)

# submit dependent job
print('dep job')
jid = tm.submit(['ls', '-1'], depjob=jobs)

# wait on all jobs
ok = tm.wait()

# report status
print 'OK (False indicates an error): ' + str(ok)