def get_time(base_time=None, delay=None, round=None): """ Returns base_time minus a delay in hours, rounded down to the nearest hour given in round. Warning - works by rounding back to the beginning of the day, and does not currently work for the case where you request a cycle time which is later than the base_time Arguments: base_time -- base time to calculate from delay -- optional delay in hours to apply round -- a list of integer hours to restrict the return value to e.g. [0,6,12,18]""" logger = loghelper.get(LOGGER) hour = datetime.timedelta(0, 60*60) base_time = base_time if base_time else datetime.datetime.today() delay = delay if delay else 0 delayed_time = base_time - delay * hour start = delayed_time if round: start_day = datetime.datetime(delayed_time.year, delayed_time.month, delayed_time.day, 0, 0) # throw away all time parts start_hour = delayed_time.hour past_hours = [ h for h in round if (h <= start_hour)] recent_hour = past_hours[-1] start = start_day + recent_hour * hour return start
def filter(frame, rules): """ Filters a data frame according to a set of rules specified in rules i.e. 'column name' : [list of values] | 'column name' : tuple of numeric limits (min, max) Arguments: frame -- the DataFrame to filter rules -- rules to apply """ logger = loghelper.get(LOGGER) logger.debug("%d rows before filtering" % len(frame)) logger.debug(json.dumps(rules, indent=4)) for column, rule in rules.items(): logger.debug("filtering on %s" % column) # if a list of values is provided, filter on those if type(rule)==list: frame = frame[frame[column].isin(rule)] # else if a tuple is provided, treat those as min, max values elif type(rule)==str: min, max = _parse_filter(rule) frame = frame[(frame[column]>=min) & (frame[column]<=max)] else: raise ConfigError("filter type not understood, please give list or tuple") logger.debug("%d rows" % len(frame)) return frame
def filter(frame, rules): """ Filters a data frame according to a set of rules specified in rules i.e. 'column name' : [list of values] | 'column name' : tuple of numeric limits (min, max) Arguments: frame -- the DataFrame to filter rules -- rules to apply """ logger = loghelper.get(LOGGER) logger.debug("%d rows before filtering" % len(frame)) logger.debug(json.dumps(rules, indent=4)) for column, rule in rules.items(): logger.debug("filtering on %s" % column) # if a list of values is provided, filter on those if type(rule) == list: frame = frame[frame[column].isin(rule)] # else if a tuple is provided, treat those as min, max values elif type(rule) == str: min, max = _parse_filter(rule) frame = frame[(frame[column] >= min) & (frame[column] <= max)] else: raise ConfigError( "filter type not understood, please give list or tuple") logger.debug("%d rows" % len(frame)) return frame
def _concat_cols(frame, cols, delim='.', formatter=None): """ mashes two columns into one by concantenating their columns. Default applies str to each column and separates with a dot Arguments: frame -- data frame to operate on cols -- list of columns to join delimiter -- delimiter to use to sepearate values """ logger = loghelper.get(LOGGER) logger.warn( "Peformance warning, string concatenation of columns not done very efficiently" ) if len(cols) != 2: raise NotYetImplemented( "concatenating other than two columns is not yet implemented") if formatter != None: if type(formatter == type([])): result = frame[cols[0]].apply( formatter[0]) + delim + frame[cols[1]].apply(formatter[1]) elif type(formatter == type({})): result = frame[cols[0]].apply( formatter[cols[0]]) + delim + frame[cols[1]].apply( formatter[cols[1]]) else: result = frame[cols[0]].map(str) + delim + frame[cols[1]].map(str) return result
def _char_array_to_str(chars): """Converts a NetCDF masked character array into an array of strings""" logger = loghelper.get(LOGGER) # assert we have two dimensions assert(len(chars.shape)==2) dim0 = chars.shape[0] dim1 = chars.shape[1] # if it is a masked array, replace masked with blanks if hasattr(chars[:], 'mask'): # first fill in masked elements with blanks filled = chars[:].filled(' ') else: filled = chars # join character arrays across last dimension strs = [''.join(filled[n,:]) for n in range(dim0) ] # then strip away the blanks strs = map(string.strip, strs) # return as an array of strings return np.array(strs)
def _concat_cols(frame, cols, delim='.', formatter=None): """ mashes two columns into one by concantenating their columns. Default applies str to each column and separates with a dot Arguments: frame -- data frame to operate on cols -- list of columns to join delimiter -- delimiter to use to sepearate values """ logger = loghelper.get(LOGGER) logger.warn("Peformance warning, string concatenation of columns not done very efficiently") if len(cols)!=2: raise NotYetImplemented("concatenating other than two columns is not yet implemented") if formatter!=None: if type(formatter==type([])): result = frame[cols[0]].apply(formatter[0]) + delim + frame[cols[1]].apply(formatter[1]) elif type(formatter==type({})): result = frame[cols[0]].apply(formatter[cols[0]]) + delim + frame[cols[1]].apply(formatter[cols[1]]) else: result = frame[cols[0]].map(str) + delim + frame[cols[1]].map(str) return result
def create_directory_structure(expand, remove=None, create=None,copy=None,link=None, dry_run=False): """Creates a subdirectory structure, and copies, moves, and links in files Arguments: expand -- a single-argument function to perform any string substitutions on any of the input arguments create -- a list of subdirectories to create if they don't already exists remove -- a list of file patterns to remove copy -- a list of file patterns to copy link -- a list of file patterns to link dry_run -- log rather than execute commands """ # pass initial time as an argument, to leave a one-argument function which will expand strings logger = loghelper.get(LOGGER) if create: for d in create: subdir = expand(d) shared.create(subdir, dry_run=dry_run) if remove: for pattern in remove: shared.remove(expand(pattern), dry_run=dry_run) if copy: for pattern in copy: shared.copy(expand(pattern), dry_run=dry_run) if link: for pattern in link: shared.link(expand(pattern), dry_run=dry_run)
def submit(jobs, expand, after_job=None, array_job=None, dry_run=False): """Submits specicfied jobs to a scheduling engine e.g. SGE Arguments: jobs : job entry specification as a dictionary expand : function to expand any placeholders in strings after_id : initial job id to specify as depenpency Returns: The final job_id submitted""" logger = loghelper.get(LOGGER) logger.debug("submitting jobs") job_ids = {} first=True for entry in jobs: name = expand(entry['name']) script = expand(entry['script']) run_dir = os.path.split(script)[0] # job dependency can either come from entry, or from previous simulation if not first and entry.get('after'): after_job = entry['after'] job_id = queue.qsub(script, name=entry.get('name'), queue=entry.get('queue'), pe=entry.get('pe'), nslots=entry.get('nprocs'), after_job=after_job, cwd=run_dir, array=entry.get('array'), merge=True, log=entry.get('log'), dry_run=dry_run) job_ids[name] = job_id first=False return job_id
def _to_json(frame, out_name, float_format="%0.3f" ): """ Writes each variable and init_time series into one json file. If vars is None, then all export all variables""" logger = loghelper.get(LOGGER) logger.info("*** outputting data as json ***") # Bit of a hack to ease output formatting, convert init_time to string frame['reftime'] = frame['reftime'].apply(str) # we need to group by everything except valid time, lead time and value group_by = [c for c in frame.columns if c not in ["valid_time","leadtime", "value"]] gb = frame.groupby(group_by) # Convert time to milliseconds since epoc convert = lambda t: time.mktime(t.timetuple())*1000 series = [] for name, group in gb: # create a dictionary from all the fields except valid time and value d = dict(zip(group_by,list(name))) timestamp = map(convert, group['valid_time']) values = group['value'] mvals = np.ma.masked_invalid(np.array(values)) data = [ (timestamp[n],mvals[n]) for n in range(len(timestamp))] ldata = map(list, data) d['data'] = ldata s = str(d) # this is an ugly hack which could potentially lead to errors if " u'" occurs at the end of a string s = s.replace(" u'", " '") # change single quotes to double s = s.replace("'", '"') # replace masked values. Again, ugly s = s.replace('masked', 'null') series.append(s) json_str = ','.join(series) # if not os.path.exists(out_dir): # os.makedirs(out_dir) fout = open(out_name, 'w') fout.write('[') fout.write(json_str) fout.write(']') fout.close()
def _to_json(frame, out_name, float_format="%0.3f"): """ Writes each variable and init_time series into one json file. If vars is None, then all export all variables""" logger = loghelper.get(LOGGER) logger.info("*** outputting data as json ***") # Bit of a hack to ease output formatting, convert init_time to string frame['reftime'] = frame['reftime'].apply(str) # we need to group by everything except valid time, lead time and value group_by = [ c for c in frame.columns if c not in ["valid_time", "leadtime", "value"] ] gb = frame.groupby(group_by) # Convert time to milliseconds since epoc convert = lambda t: time.mktime(t.timetuple()) * 1000 series = [] for name, group in gb: # create a dictionary from all the fields except valid time and value d = dict(zip(group_by, list(name))) timestamp = map(convert, group['valid_time']) values = group['value'] mvals = np.ma.masked_invalid(np.array(values)) data = [(timestamp[n], mvals[n]) for n in range(len(timestamp))] ldata = map(list, data) d['data'] = ldata s = str(d) # this is an ugly hack which could potentially lead to errors if " u'" occurs at the end of a string s = s.replace(" u'", " '") # change single quotes to double s = s.replace("'", '"') # replace masked values. Again, ugly s = s.replace('masked', 'null') series.append(s) json_str = ','.join(series) # if not os.path.exists(out_dir): # os.makedirs(out_dir) fout = open(out_name, 'w') fout.write('[') fout.write(json_str) fout.write(']') fout.close()
def qsub(script, name=None, queue=None, pe=None, nslots=1, after_job=None, cwd=None, array=None, merge=True, log=None, dry_run=False): """Submits a SGE job via qsub Arguments: script -- full path to job script file name -- job name queue -- name of queue to submit to pe -- parallel environment to use nslots -- number of slots (usually processor cores) after_job -- job id or name to supply as dependency cwd -- change working directory to this before submitting array -- if integer N supplied, will submit array jobs 1:N dry_run -- log but don't submit commands (default False) Returns: job_id -- the job id returned by the sheduling system system """ logger = loghelper.get(LOGGER) if not os.path.exists(script): raise IOError("%s not found" % script) name_arg = '-N %s' % name if name else '' q_arg = '-q %s' % queue if queue else '' pe_arg = '-pe %s' % pe if pe else '' nslots_arg = ' %d' % nslots if nslots else '' pe_slots_arg = pe_arg + nslots_arg after_arg = '-hold_jid %s' % after_job if after_job else '' cwd_arg = '-cwd' if cwd else '' array_arg = '-t 1-%s' % array if array else '' merge_arg = '-j y' if merge else '' log_arg = '-o %s' % log if log else '' all_args = ' '.join([name_arg,q_arg,pe_slots_arg,after_arg,cwd_arg,array_arg,merge_arg,log_arg]) cmd = 'qsub %s %s ' % (all_args, script) if dry_run: job_id = str(random.randint(1,100)) else: proc = subprocess.Popen([cmd], stdout=subprocess.PIPE, shell=True, cwd=cwd, executable='/bin/bash') output = proc.stdout.read() job_id = job_id_from_reponse(output) logger.debug("%s \t--->\t %s" % (cmd.ljust(150), job_id)) return job_id
def safe_remove(path, dry_run=False): logger = loghelper.get(LOGGER) # try and prevent removing someting unsafe: root path, unexpanded wildcards, or paths which are just too short cnd1 = path == "/" cnd2 = "*" in path cnd3 = len(path.split("/"))<3 if cnd1 or cnd2 or cnd3: raise UnsafeDeletion("Unsafe deletion detected with path %s") % path logger.warn("removing path %s" % path) if not dry_run and os.path.exists(path): shutil.rmtree(path)
def _merge_name(filename, key): """Merges a groupby key into an filename, by inserting it before the file extension Arguments: filename -- the base filename to insert into key -- the groupby key (string or tuple)""" path,name = os.path.split(filename) tokens = name.split('.') logger= loghelper.get(LOGGER) flatkey = _to_str(key) tokens.insert(-1,flatkey) newname = '.'.join(tokens) newpath = os.path.join(path, newname) return newpath
def _merge_name(filename, key): """Merges a groupby key into an filename, by inserting it before the file extension Arguments: filename -- the base filename to insert into key -- the groupby key (string or tuple)""" path, name = os.path.split(filename) tokens = name.split('.') logger = loghelper.get(LOGGER) flatkey = _to_str(key) tokens.insert(-1, flatkey) newname = '.'.join(tokens) newpath = os.path.join(path, newname) return newpath
def save(frame, out, format, float_format=None): logger = loghelper.get(LOGGER) if format not in SUPPORTED_FORMATS: raise UnknownFormat("%s output format not supported" % format) # append a % sign to float format if float_format: float_format = "%" + float_format if format=="hdf": frame.to_hdf(out, 'w') elif format=="csv": frame.to_csv(out, float_format=float_format, index=False) elif format=="json": _to_json(frame, out, float_format=float_format)
def save(frame, out, format, float_format=None): logger = loghelper.get(LOGGER) if format not in SUPPORTED_FORMATS: raise UnknownFormat("%s output format not supported" % format) # append a % sign to float format if float_format: float_format = "%" + float_format if format == "hdf": frame.to_hdf(out, 'w') elif format == "csv": frame.to_csv(out, float_format=float_format, index=False) elif format == "json": _to_json(frame, out, float_format=float_format)
def _recursive_replace(source_dir, target_dir, replacements): logger=loghelper.get(LOGGER) """recursively make replacements to files in source_dir to target_dir""" # from os.walk # dirpath is a string, the path to the directory. # dirnames is a list of the names of the subdirectories in dirpath (excluding '.' and '..'). # filenames is a list of the names of the non-directory files in dirpath. # Note that the names in the lists contain no path components. To get a full path (which begins with top) to a file or directory in dirpath, do os.path.join(dirpath, name). logger.debug('_recursive_replace(%s, %s, replacements)' %(source_dir, target_dir)) for dirpath, dirnames, filenames in os.walk(source_dir): for name in filenames: source = os.path.join(dirpath, name) target = source.replace(source_dir, target_dir) target_path = os.path.split(target)[0] if not os.path.exists(target_path): os.makedirs(target_path) logger.debug("%s ---> %s" %(source, target)) fill_template(source,target,replacements) assert(os.path.exists(target))
def _recursive_replace(source_dir, target_dir, replacements): logger = loghelper.get(LOGGER) """recursively make replacements to files in source_dir to target_dir""" # from os.walk # dirpath is a string, the path to the directory. # dirnames is a list of the names of the subdirectories in dirpath (excluding '.' and '..'). # filenames is a list of the names of the non-directory files in dirpath. # Note that the names in the lists contain no path components. To get a full path (which begins with top) to a file or directory in dirpath, do os.path.join(dirpath, name). logger.debug('_recursive_replace(%s, %s, replacements)' % (source_dir, target_dir)) for dirpath, dirnames, filenames in os.walk(source_dir): for name in filenames: source = os.path.join(dirpath, name) target = source.replace(source_dir, target_dir) target_path = os.path.split(target)[0] if not os.path.exists(target_path): os.makedirs(target_path) logger.debug("%s ---> %s" % (source, target)) fill_template(source, target, replacements) assert (os.path.exists(target))
def submit(jobs, expand, after_job=None, array_job=None, dry_run=False): """Submits specicfied jobs to a scheduling engine e.g. SGE Arguments: jobs : job entry specification as a dictionary expand : function to expand any placeholders in strings after_id : initial job id to specify as depenpency Returns: The final job_id submitted""" logger = loghelper.get(LOGGER) logger.debug("submitting jobs") job_ids = {} first = True for entry in jobs: name = expand(entry['name']) script = expand(entry['script']) run_dir = os.path.split(script)[0] # job dependency can either come from entry, or from previous simulation if not first and entry.get('after'): after_job = entry['after'] job_id = queue.qsub(script, name=entry.get('name'), queue=entry.get('queue'), pe=entry.get('pe'), nslots=entry.get('nprocs'), after_job=after_job, cwd=run_dir, array=entry.get('array'), merge=True, log=entry.get('log'), dry_run=dry_run) job_ids[name] = job_id first = False return job_id
def get_coordinate_vars(dataset, coords=None): """ Gets coordinate variables associated with dimensions, doing some conversion to character array and time units Arguments: dataset -- a NetCDF4 Dataset object coords -- a list of variable names to treat as coordinates. If None, then coordinate variables are selected based on dimension names""" logger = loghelper.get(LOGGER) logger.debug("get_coordinate_vars()") dims = dataset.dimensions vars = dataset.variables ndims = len(dims) # if coordinate variables are not specified, fetch all variables # with the same name as dimensions (it they exist) if not coords: logger.debug("no coordinate variables given, finding automatically") coords = [ d for d in dims if vars.get(d) ] # package the result as a dictionary result = {} for c in coords: cvar = vars[c] if str(cvar.dtype)=="|S1": result[c] = _char_array_to_str(cvar) elif _is_time(cvar): result[c] = num2date(cvar[:], units=cvar.units,calendar=cvar.calendar) else: result[c] = cvar[:] return result
def get_coordinate_vars(dataset, coords=None): """ Gets coordinate variables associated with dimensions, doing some conversion to character array and time units Arguments: dataset -- a NetCDF4 Dataset object coords -- a list of variable names to treat as coordinates. If None, then coordinate variables are selected based on dimension names""" logger = loghelper.get(LOGGER) logger.debug("get_coordinate_vars()") dims = dataset.dimensions vars = dataset.variables ndims = len(dims) # if coordinate variables are not specified, fetch all variables # with the same name as dimensions (it they exist) if not coords: logger.debug("no coordinate variables given, finding automatically") coords = [d for d in dims if vars.get(d)] # package the result as a dictionary result = {} for c in coords: cvar = vars[c] if str(cvar.dtype) == "|S1": result[c] = _char_array_to_str(cvar) elif _is_time(cvar): result[c] = num2date(cvar[:], units=cvar.units, calendar=cvar.calendar) else: result[c] = cvar[:] return result
def get_init_times(start, end, interval): """ Returns a list of datetimes representing initial times in a forecast test case Start and end can be lists of start and end times, in which case they must the same length, each pair of start and end times will define a simulation block. Arguments: start -- single start time or list of start times end -- single end time or list of end times same length as start interval -- integer interval in hours between initialsation times Returns: a list of intital times""" logger = loghelper.get(LOGGER) freq = rrule.HOURLY # even if start and end are single elements, package them into lists # to make the following code generic start = _listify(start) end = _listify(end) if len(start)!=len(end): raise IOError('different start and end times specified') init_times = [] hour = datetime.timedelta(0,60*60) for s, e in zip(start, end): rec = rrule.rrule(freq, dtstart=s, until=e, interval=interval) init_times.extend(list(rec)) logger.debug("get_init_times done") return init_times
def _char_array_to_str(chars): """Converts a NetCDF masked character array into an array of strings""" logger = loghelper.get(LOGGER) # assert we have two dimensions assert (len(chars.shape) == 2) dim0 = chars.shape[0] dim1 = chars.shape[1] # if it is a masked array, replace masked with blanks if hasattr(chars[:], 'mask'): # first fill in masked elements with blanks filled = chars[:].filled(' ') else: filled = chars # join character arrays across last dimension strs = [''.join(filled[n, :]) for n in range(dim0)] # then strip away the blanks strs = map(string.strip, strs) # return as an array of strings return np.array(strs)
def ncdump(config): logger = loghelper.get(LOGGER) # _listify ensures arguments are enclosed within a list # to simplify treatement in following code files = nctools._listify(config['<files>']) vars = nctools._listify(config.get('vars')) global_atts = nctools._listify(config.get('global-atts')) var_atts = nctools._listify(config.get('var-atts')) coord_vars = nctools._listify(config.get('coords')) sort_by = nctools._listify(config.get('sort-by')) order_by = nctools._listify(config.get('order-by')) out = config.get('out') pivot = config.get('pivot') valid_time = config.get('valid-time') format = config.get('format') filter = config.get('filter') split_by = config.get('split-by') concat = config.get('concat') start = config.get('start') delay = config.get('delay') cycles = nctools._listify(config.get('cycles')) basetime = start if start else datetime.datetime.today() prior = _prior_time(basetime, delay=delay, hours=cycles) logger.debug("using %s as a start time" % prior) if files==[]: logger.info("no files specified, finding using options") file_pattern = config.get('file-pattern') if not file_pattern: raise nctools.ConfigError('either supply files or specify file-pattern') expanded = substitute.sub_date(file_pattern, init_time=prior) files = glob.glob(expanded) if files==[]: raise IOError("no files found") frame = nctools.melt(files, vars, global_atts, var_atts, coord_vars, missing=MISSING_ATTS) if valid_time: logger.debug("adding valid time into frame") frame['valid_time'] = frame['reftime'] + frame['leadtime']*datetime.timedelta(0,60*60) if filter: frame = nctools.filter(frame, filter) if concat: nctools.concat(frame, concat, name='variable', inplace=True) if pivot: frame = pd.pivot_table(frame, index=['reftime','leadtime','location'], columns='variable', values='value') frame.reset_index(inplace=True) if sort_by: frame.sort(sort_by, inplace=True) if order_by: frame = frame[order_by] if out: out = substitute.sub_date(out, init_time=prior) if split_by: gb = frame.groupby(split_by) for key,group in gb: if out: new_name = _merge_name(out,key) save(gb.get_group(key), new_name, config['format'], float_format=config.get('float-format')) else: print gb.get_group(key).to_string() print '\n\n\n' elif out: save(frame, out, config['format'], float_format=config.get('float-format')) else: print frame.to_string()
def ncdump(config): logger = loghelper.get(LOGGER) # _listify ensures arguments are enclosed within a list # to simplify treatement in following code files = nctools._listify(config['<files>']) vars = nctools._listify(config.get('vars')) global_atts = nctools._listify(config.get('global-atts')) var_atts = nctools._listify(config.get('var-atts')) coord_vars = nctools._listify(config.get('coords')) sort_by = nctools._listify(config.get('sort-by')) order_by = nctools._listify(config.get('order-by')) out = config.get('out') pivot = config.get('pivot') valid_time = config.get('valid-time') format = config.get('format') filter = config.get('filter') split_by = config.get('split-by') concat = config.get('concat') start = config.get('start') delay = config.get('delay') cycles = nctools._listify(config.get('cycles')) basetime = start if start else datetime.datetime.today() prior = _prior_time(basetime, delay=delay, hours=cycles) logger.debug("using %s as a start time" % prior) if files == []: logger.info("no files specified, finding using options") file_pattern = config.get('file-pattern') if not file_pattern: raise nctools.ConfigError( 'either supply files or specify file-pattern') expanded = substitute.sub_date(file_pattern, init_time=prior) files = glob.glob(expanded) if files == []: raise IOError("no files found") frame = nctools.melt(files, vars, global_atts, var_atts, coord_vars, missing=MISSING_ATTS) if valid_time: logger.debug("adding valid time into frame") frame['valid_time'] = frame[ 'reftime'] + frame['leadtime'] * datetime.timedelta(0, 60 * 60) if filter: frame = nctools.filter(frame, filter) if concat: nctools.concat(frame, concat, name='variable', inplace=True) if pivot: frame = pd.pivot_table(frame, index=['reftime', 'leadtime', 'location'], columns='variable', values='value') frame.reset_index(inplace=True) if sort_by: frame.sort(sort_by, inplace=True) if order_by: frame = frame[order_by] if out: out = substitute.sub_date(out, init_time=prior) if split_by: gb = frame.groupby(split_by) for key, group in gb: if out: new_name = _merge_name(out, key) save(gb.get_group(key), new_name, config['format'], float_format=config.get('float-format')) else: print gb.get_group(key).to_string() print '\n\n\n' elif out: save(frame, out, config['format'], float_format=config.get('float-format')) else: print frame.to_string()
def qsub(script, name=None, queue=None, pe=None, nslots=1, after_job=None, cwd=None, array=None, merge=True, log=None, dry_run=False): """Submits a SGE job via qsub Arguments: script -- full path to job script file name -- job name queue -- name of queue to submit to pe -- parallel environment to use nslots -- number of slots (usually processor cores) after_job -- job id or name to supply as dependency cwd -- change working directory to this before submitting array -- if integer N supplied, will submit array jobs 1:N dry_run -- log but don't submit commands (default False) Returns: job_id -- the job id returned by the sheduling system system """ logger = loghelper.get(LOGGER) if not os.path.exists(script): raise IOError("%s not found" % script) name_arg = '-N %s' % name if name else '' q_arg = '-q %s' % queue if queue else '' pe_arg = '-pe %s' % pe if pe else '' nslots_arg = ' %d' % nslots if nslots else '' pe_slots_arg = pe_arg + nslots_arg after_arg = '-hold_jid %s' % after_job if after_job else '' cwd_arg = '-cwd' if cwd else '' array_arg = '-t 1-%s' % array if array else '' merge_arg = '-j y' if merge else '' log_arg = '-o %s' % log if log else '' all_args = ' '.join([ name_arg, q_arg, pe_slots_arg, after_arg, cwd_arg, array_arg, merge_arg, log_arg ]) cmd = 'qsub %s %s ' % (all_args, script) if dry_run: job_id = str(random.randint(1, 100)) else: proc = subprocess.Popen([cmd], stdout=subprocess.PIPE, shell=True, cwd=cwd, executable='/bin/bash') output = proc.stdout.read() job_id = job_id_from_reponse(output) logger.debug("%s \t--->\t %s" % (cmd.ljust(150), job_id)) return job_id
def get_logger(): return loghelper.get(LOGGER)