Exemple #1
0
def get_time(base_time=None, delay=None, round=None):
    """ Returns base_time minus a delay in hours, rounded down to the nearest hour given in round.
    Warning - works by rounding back to the beginning of the day, and does not currently work for the 
    case where you request a cycle time which is later than the base_time
    
    Arguments:
        base_time  -- base time to calculate from
        delay  -- optional delay in hours to apply 
        round  -- a list of integer hours to restrict the return value to e.g. [0,6,12,18]"""
    
    logger = loghelper.get(LOGGER)

    hour  = datetime.timedelta(0, 60*60)

    base_time = base_time if base_time else datetime.datetime.today()
    
    delay = delay if delay else 0
    delayed_time  = base_time - delay * hour

    start = delayed_time
    if round:
        start_day   = datetime.datetime(delayed_time.year, delayed_time.month, delayed_time.day, 0, 0)           # throw away all time parts
        start_hour  = delayed_time.hour
        past_hours  = [ h for h in round if (h <= start_hour)]
        recent_hour = past_hours[-1]
        start       = start_day + recent_hour * hour
    

    return start
Exemple #2
0
def filter(frame, rules):
    """ Filters a data frame according to a set of rules specified in rules i.e. 
    'column name' : [list of values] |
    'column name' : tuple of numeric limits (min, max)
    
        Arguments:
        frame   -- the DataFrame to filter
        rules   -- rules to apply """        
    
    
    
    logger = loghelper.get(LOGGER)
    
    logger.debug("%d rows before filtering" % len(frame))
    logger.debug(json.dumps(rules, indent=4))
    for column, rule in rules.items():
        logger.debug("filtering on %s" % column)
        
        # if a list of values is provided, filter on those
        if type(rule)==list:
            frame = frame[frame[column].isin(rule)]
        
        # else if a tuple is provided, treat those as min, max values
        elif type(rule)==str:
            min, max = _parse_filter(rule)
            frame = frame[(frame[column]>=min) & (frame[column]<=max)]
    
        else:
            raise ConfigError("filter type not understood, please give list or tuple")

        logger.debug("%d rows" % len(frame))            
    
    return frame
Exemple #3
0
def filter(frame, rules):
    """ Filters a data frame according to a set of rules specified in rules i.e. 
    'column name' : [list of values] |
    'column name' : tuple of numeric limits (min, max)
    
        Arguments:
        frame   -- the DataFrame to filter
        rules   -- rules to apply """

    logger = loghelper.get(LOGGER)

    logger.debug("%d rows before filtering" % len(frame))
    logger.debug(json.dumps(rules, indent=4))
    for column, rule in rules.items():
        logger.debug("filtering on %s" % column)

        # if a list of values is provided, filter on those
        if type(rule) == list:
            frame = frame[frame[column].isin(rule)]

        # else if a tuple is provided, treat those as min, max values
        elif type(rule) == str:
            min, max = _parse_filter(rule)
            frame = frame[(frame[column] >= min) & (frame[column] <= max)]

        else:
            raise ConfigError(
                "filter type not understood, please give list or tuple")

        logger.debug("%d rows" % len(frame))

    return frame
Exemple #4
0
def _concat_cols(frame, cols, delim='.', formatter=None):
    """ mashes two columns into one by concantenating their columns. Default applies 
        str to each column and separates with a dot
        
        Arguments:
          frame     -- data frame to operate on
          cols      -- list of columns to join
          delimiter -- delimiter to use to sepearate values 
        """
    logger = loghelper.get(LOGGER)
    logger.warn(
        "Peformance warning, string concatenation of columns not done very efficiently"
    )

    if len(cols) != 2:
        raise NotYetImplemented(
            "concatenating other than two columns is not yet implemented")

    if formatter != None:
        if type(formatter == type([])):
            result = frame[cols[0]].apply(
                formatter[0]) + delim + frame[cols[1]].apply(formatter[1])

        elif type(formatter == type({})):
            result = frame[cols[0]].apply(
                formatter[cols[0]]) + delim + frame[cols[1]].apply(
                    formatter[cols[1]])
    else:
        result = frame[cols[0]].map(str) + delim + frame[cols[1]].map(str)

    return result
Exemple #5
0
def _char_array_to_str(chars):
    """Converts a NetCDF masked character array into an array of strings"""
    logger = loghelper.get(LOGGER)
    # assert we have two dimensions
    assert(len(chars.shape)==2)
    dim0 = chars.shape[0]
    dim1 = chars.shape[1]
    

    # if it is a masked array, replace masked with blanks
    if hasattr(chars[:], 'mask'):
        # first fill in masked elements with blanks
        
        filled = chars[:].filled(' ')
    else: 
        filled = chars

    # join character arrays across last dimension
    strs = [''.join(filled[n,:]) for n  in range(dim0) ]        
    
    # then strip away the blanks
    strs = map(string.strip, strs)
    
    # return as an array of strings 
    return np.array(strs)
Exemple #6
0
def _concat_cols(frame, cols, delim='.', formatter=None):
        """ mashes two columns into one by concantenating their columns. Default applies 
        str to each column and separates with a dot
        
        Arguments:
          frame     -- data frame to operate on
          cols      -- list of columns to join
          delimiter -- delimiter to use to sepearate values 
        """
        logger = loghelper.get(LOGGER)
        logger.warn("Peformance warning, string concatenation of columns not done very efficiently")
        
        if len(cols)!=2:
            raise NotYetImplemented("concatenating other than two columns is not yet implemented")
        
        if formatter!=None:
            if type(formatter==type([])):
                result = frame[cols[0]].apply(formatter[0]) + delim +  frame[cols[1]].apply(formatter[1])
            
            elif type(formatter==type({})):
                result = frame[cols[0]].apply(formatter[cols[0]]) + delim +  frame[cols[1]].apply(formatter[cols[1]])        
        else:
            result = frame[cols[0]].map(str) + delim +  frame[cols[1]].map(str)
        
        return result
Exemple #7
0
def create_directory_structure(expand, remove=None, create=None,copy=None,link=None, dry_run=False):
    """Creates a subdirectory structure, and copies, moves, and links in files
    
    Arguments:
        expand  -- a single-argument function to perform any string substitutions on any of the input arguments
        create  -- a list of subdirectories to create if they don't already exists
        remove  -- a list of file patterns to remove
        copy    -- a list of file patterns to copy
        link    -- a list of file patterns to link
        dry_run -- log rather than execute commands 

    """        
    # pass initial time as an argument, to leave a one-argument function which will expand strings

    logger = loghelper.get(LOGGER)
    if create:
        for d in create:
            subdir = expand(d)
            shared.create(subdir, dry_run=dry_run)

    if remove:
        for pattern in remove:
            shared.remove(expand(pattern), dry_run=dry_run)

    if copy:
        for pattern in copy:
            shared.copy(expand(pattern), dry_run=dry_run)
            
    if link:
        for pattern in link:
            shared.link(expand(pattern), dry_run=dry_run)
Exemple #8
0
def submit(jobs, expand, after_job=None, array_job=None, dry_run=False):    
    """Submits specicfied jobs to a scheduling engine e.g. SGE
    
    Arguments:
        jobs     : job entry specification as a dictionary
        expand   : function to expand any placeholders in strings
        after_id : initial job id to specify as depenpency
        
    Returns:
        The final job_id submitted"""
        
    logger = loghelper.get(LOGGER)
    logger.debug("submitting jobs")
    job_ids = {}
    first=True
    for entry in jobs:
        name = expand(entry['name'])
        script = expand(entry['script'])
        run_dir = os.path.split(script)[0]

        
        # job dependency can either come from entry, or from previous simulation
        if not first and entry.get('after'):
            after_job = entry['after']

        job_id = queue.qsub(script, name=entry.get('name'), queue=entry.get('queue'), pe=entry.get('pe'), nslots=entry.get('nprocs'), 
                            after_job=after_job, cwd=run_dir, array=entry.get('array'), merge=True, log=entry.get('log'),
                            dry_run=dry_run)

        
        job_ids[name] = job_id
        first=False
    
    return job_id
Exemple #9
0
def _to_json(frame, out_name, float_format="%0.3f" ):
    """ Writes each variable and init_time series into one json file. If vars is None, then all export all variables"""

    logger = loghelper.get(LOGGER)

    logger.info("*** outputting data as json ***")

        
    # Bit of a hack to ease output formatting, convert init_time to string
    frame['reftime'] = frame['reftime'].apply(str)
    
    
    # we need to group by everything except valid time, lead time and value
    group_by = [c for c in frame.columns if c not in ["valid_time","leadtime", "value"]]
    gb = frame.groupby(group_by)

        
    # Convert time to milliseconds since epoc
    convert = lambda t: time.mktime(t.timetuple())*1000        
    
    series = []
    for name, group in gb:

        # create a dictionary from all the fields except valid time and value
        d = dict(zip(group_by,list(name)))
        
        timestamp = map(convert, group['valid_time'])
        values  = group['value']
        mvals   = np.ma.masked_invalid(np.array(values))
        data    = [ (timestamp[n],mvals[n]) for n in range(len(timestamp))]
        ldata   = map(list, data)
        d['data'] = ldata
        s = str(d)
    
        # this is an ugly hack which could potentially lead to errors if " u'" occurs at the end of a string
        s =  s.replace(" u'", " '")
                
        # change single quotes to double
        s = s.replace("'", '"')
        
        # replace masked values. Again, ugly
        s = s.replace('masked', 'null')
        
        series.append(s)

    json_str = ','.join(series)
    
    
    # if not os.path.exists(out_dir):
        # os.makedirs(out_dir)
    
    fout = open(out_name, 'w')
    fout.write('[')
    fout.write(json_str)
    fout.write(']')
    fout.close()
Exemple #10
0
def _to_json(frame, out_name, float_format="%0.3f"):
    """ Writes each variable and init_time series into one json file. If vars is None, then all export all variables"""

    logger = loghelper.get(LOGGER)

    logger.info("*** outputting data as json ***")

    # Bit of a hack to ease output formatting, convert init_time to string
    frame['reftime'] = frame['reftime'].apply(str)

    # we need to group by everything except valid time, lead time and value
    group_by = [
        c for c in frame.columns
        if c not in ["valid_time", "leadtime", "value"]
    ]
    gb = frame.groupby(group_by)

    # Convert time to milliseconds since epoc
    convert = lambda t: time.mktime(t.timetuple()) * 1000

    series = []
    for name, group in gb:

        # create a dictionary from all the fields except valid time and value
        d = dict(zip(group_by, list(name)))

        timestamp = map(convert, group['valid_time'])
        values = group['value']
        mvals = np.ma.masked_invalid(np.array(values))
        data = [(timestamp[n], mvals[n]) for n in range(len(timestamp))]
        ldata = map(list, data)
        d['data'] = ldata
        s = str(d)

        # this is an ugly hack which could potentially lead to errors if " u'" occurs at the end of a string
        s = s.replace(" u'", " '")

        # change single quotes to double
        s = s.replace("'", '"')

        # replace masked values. Again, ugly
        s = s.replace('masked', 'null')

        series.append(s)

    json_str = ','.join(series)

    # if not os.path.exists(out_dir):
    # os.makedirs(out_dir)

    fout = open(out_name, 'w')
    fout.write('[')
    fout.write(json_str)
    fout.write(']')
    fout.close()
Exemple #11
0
def qsub(script, name=None, queue=None, pe=None, nslots=1, after_job=None, cwd=None, array=None, merge=True, log=None, dry_run=False):
    """Submits a SGE job via qsub
    
    Arguments:
        script -- full path to job script file
        name -- job name
        queue -- name of queue to submit to
        pe -- parallel environment to use
        nslots -- number of slots (usually processor cores)
        after_job -- job id or name to supply as dependency
        cwd -- change working directory to this before submitting
        array -- if integer N supplied, will submit array jobs 1:N
        dry_run -- log but don't submit commands (default False)
    Returns:
        job_id -- the job id returned by the sheduling system system """
 
    logger = loghelper.get(LOGGER)
    
    if not os.path.exists(script): 
        raise IOError("%s not found" % script)
    
    
    name_arg = '-N %s' % name if name else ''
    q_arg = '-q %s' % queue if queue else ''
    pe_arg = '-pe %s' % pe if pe else ''    
    nslots_arg = ' %d' % nslots if nslots else ''
    pe_slots_arg = pe_arg + nslots_arg
    after_arg = '-hold_jid %s' % after_job if after_job else ''
    cwd_arg = '-cwd' if cwd else ''
    array_arg = '-t 1-%s' % array if array else ''
    merge_arg = '-j y' if merge else ''
    log_arg = '-o %s' % log if log else ''
    
    all_args = ' '.join([name_arg,q_arg,pe_slots_arg,after_arg,cwd_arg,array_arg,merge_arg,log_arg])
    
    
        
    
    cmd  = 'qsub %s %s ' % (all_args, script)
    
    
    if dry_run:
        job_id = str(random.randint(1,100))
    else:
        proc = subprocess.Popen([cmd], stdout=subprocess.PIPE, shell=True, cwd=cwd, executable='/bin/bash')
        output = proc.stdout.read()
        job_id = job_id_from_reponse(output)

    logger.debug("%s \t--->\t %s" % (cmd.ljust(150), job_id))
    return job_id
Exemple #12
0
def safe_remove(path, dry_run=False):
    
    logger = loghelper.get(LOGGER)
    
    # try and prevent removing someting unsafe: root path, unexpanded wildcards, or paths which are just too short
    cnd1 = path == "/" 
    cnd2 = "*" in path
    cnd3 = len(path.split("/"))<3
    
    if cnd1 or cnd2 or cnd3:
        raise UnsafeDeletion("Unsafe deletion detected with path %s") % path
    
    logger.warn("removing path %s" % path)
    if not dry_run and os.path.exists(path):
        shutil.rmtree(path)        
Exemple #13
0
def _merge_name(filename, key):
    """Merges a groupby key into an filename, by inserting it before the file extension
    Arguments:
        filename -- the base filename to insert into
        key      -- the groupby key (string or tuple)"""
    
    path,name = os.path.split(filename)
    tokens = name.split('.')
    
    logger= loghelper.get(LOGGER)
    flatkey = _to_str(key)

    tokens.insert(-1,flatkey)
    newname = '.'.join(tokens)
    newpath = os.path.join(path, newname)
    return newpath
Exemple #14
0
def _merge_name(filename, key):
    """Merges a groupby key into an filename, by inserting it before the file extension
    Arguments:
        filename -- the base filename to insert into
        key      -- the groupby key (string or tuple)"""

    path, name = os.path.split(filename)
    tokens = name.split('.')

    logger = loghelper.get(LOGGER)
    flatkey = _to_str(key)

    tokens.insert(-1, flatkey)
    newname = '.'.join(tokens)
    newpath = os.path.join(path, newname)
    return newpath
Exemple #15
0
def save(frame, out, format, float_format=None):
    
    logger = loghelper.get(LOGGER)
    
    if format not in SUPPORTED_FORMATS: raise UnknownFormat("%s output format not supported" % format)
    
    # append a % sign to float format
    if float_format: float_format = "%" + float_format
    
    if format=="hdf":
        frame.to_hdf(out, 'w')
        
    elif format=="csv":
        frame.to_csv(out, float_format=float_format, index=False)        

    elif format=="json":
        _to_json(frame, out, float_format=float_format) 
Exemple #16
0
def save(frame, out, format, float_format=None):

    logger = loghelper.get(LOGGER)

    if format not in SUPPORTED_FORMATS:
        raise UnknownFormat("%s output format not supported" % format)

    # append a % sign to float format
    if float_format: float_format = "%" + float_format

    if format == "hdf":
        frame.to_hdf(out, 'w')

    elif format == "csv":
        frame.to_csv(out, float_format=float_format, index=False)

    elif format == "json":
        _to_json(frame, out, float_format=float_format)
Exemple #17
0
def _recursive_replace(source_dir, target_dir, replacements):
    logger=loghelper.get(LOGGER)
    """recursively make replacements to files in source_dir to target_dir"""
    # from os.walk
    # dirpath is a string, the path to the directory. 
    # dirnames is a list of the names of the subdirectories in dirpath (excluding '.' and '..'). 
    # filenames is a list of the names of the non-directory files in dirpath. 
    # Note that the names in the lists contain no path components. To get a full path (which begins with top) to a file or directory in dirpath, do os.path.join(dirpath, name).
    logger.debug('_recursive_replace(%s, %s, replacements)' %(source_dir, target_dir))
    for dirpath, dirnames, filenames in os.walk(source_dir):

        for name in filenames:
            source = os.path.join(dirpath, name)
            target = source.replace(source_dir, target_dir)
            target_path = os.path.split(target)[0]
            if not os.path.exists(target_path):
                os.makedirs(target_path)
            logger.debug("%s ---> %s" %(source, target))
            fill_template(source,target,replacements)
            assert(os.path.exists(target))
Exemple #18
0
def _recursive_replace(source_dir, target_dir, replacements):
    logger = loghelper.get(LOGGER)
    """recursively make replacements to files in source_dir to target_dir"""
    # from os.walk
    # dirpath is a string, the path to the directory.
    # dirnames is a list of the names of the subdirectories in dirpath (excluding '.' and '..').
    # filenames is a list of the names of the non-directory files in dirpath.
    # Note that the names in the lists contain no path components. To get a full path (which begins with top) to a file or directory in dirpath, do os.path.join(dirpath, name).
    logger.debug('_recursive_replace(%s, %s, replacements)' %
                 (source_dir, target_dir))
    for dirpath, dirnames, filenames in os.walk(source_dir):

        for name in filenames:
            source = os.path.join(dirpath, name)
            target = source.replace(source_dir, target_dir)
            target_path = os.path.split(target)[0]
            if not os.path.exists(target_path):
                os.makedirs(target_path)
            logger.debug("%s ---> %s" % (source, target))
            fill_template(source, target, replacements)
            assert (os.path.exists(target))
Exemple #19
0
def submit(jobs, expand, after_job=None, array_job=None, dry_run=False):
    """Submits specicfied jobs to a scheduling engine e.g. SGE
    
    Arguments:
        jobs     : job entry specification as a dictionary
        expand   : function to expand any placeholders in strings
        after_id : initial job id to specify as depenpency
        
    Returns:
        The final job_id submitted"""

    logger = loghelper.get(LOGGER)
    logger.debug("submitting jobs")
    job_ids = {}
    first = True
    for entry in jobs:
        name = expand(entry['name'])
        script = expand(entry['script'])
        run_dir = os.path.split(script)[0]

        # job dependency can either come from entry, or from previous simulation
        if not first and entry.get('after'):
            after_job = entry['after']

        job_id = queue.qsub(script,
                            name=entry.get('name'),
                            queue=entry.get('queue'),
                            pe=entry.get('pe'),
                            nslots=entry.get('nprocs'),
                            after_job=after_job,
                            cwd=run_dir,
                            array=entry.get('array'),
                            merge=True,
                            log=entry.get('log'),
                            dry_run=dry_run)

        job_ids[name] = job_id
        first = False

    return job_id
Exemple #20
0
def get_coordinate_vars(dataset, coords=None):
    """ Gets coordinate variables associated with dimensions,
    doing some conversion to character array and time units
    
    Arguments:
      dataset -- a NetCDF4 Dataset object
      coords  -- a list of variable names to treat as coordinates. If None, then 
                 coordinate variables are selected based on dimension names"""
    
    logger = loghelper.get(LOGGER)
    logger.debug("get_coordinate_vars()")
    
    dims = dataset.dimensions
    vars = dataset.variables
    ndims = len(dims)
    
    
    
    # if coordinate variables are not specified, fetch all variables 
    # with the same name as dimensions (it they exist)
    if not coords:
        logger.debug("no coordinate variables given, finding automatically")
        coords = [ d for d in dims if vars.get(d) ] 


    # package the result as a dictionary 
    result = {}
        
    for c in coords:
        cvar = vars[c]
        
        if str(cvar.dtype)=="|S1":
            result[c] = _char_array_to_str(cvar)
        elif _is_time(cvar):
            result[c] = num2date(cvar[:], units=cvar.units,calendar=cvar.calendar)
        else:
            result[c] = cvar[:]
    return result
Exemple #21
0
def get_coordinate_vars(dataset, coords=None):
    """ Gets coordinate variables associated with dimensions,
    doing some conversion to character array and time units
    
    Arguments:
      dataset -- a NetCDF4 Dataset object
      coords  -- a list of variable names to treat as coordinates. If None, then 
                 coordinate variables are selected based on dimension names"""

    logger = loghelper.get(LOGGER)
    logger.debug("get_coordinate_vars()")

    dims = dataset.dimensions
    vars = dataset.variables
    ndims = len(dims)

    # if coordinate variables are not specified, fetch all variables
    # with the same name as dimensions (it they exist)
    if not coords:
        logger.debug("no coordinate variables given, finding automatically")
        coords = [d for d in dims if vars.get(d)]

    # package the result as a dictionary
    result = {}

    for c in coords:
        cvar = vars[c]

        if str(cvar.dtype) == "|S1":
            result[c] = _char_array_to_str(cvar)
        elif _is_time(cvar):
            result[c] = num2date(cvar[:],
                                 units=cvar.units,
                                 calendar=cvar.calendar)
        else:
            result[c] = cvar[:]
    return result
Exemple #22
0
def get_init_times(start, end, interval):
    """ Returns a list of datetimes representing initial times in a forecast test case
    
    Start and end can be lists of start and end times, in which case they must the same length,
    each pair of start and end times will define a simulation block.
    
    Arguments:
        start -- single start time or list of start times
        end -- single end time or list of end times same length as start
        interval -- integer interval in hours between initialsation times
    
    Returns:
        a list of intital times"""

    logger = loghelper.get(LOGGER)
    
    freq = rrule.HOURLY

    # even if start and end are single elements, package them into lists 
    # to make the following code generic
    start = _listify(start)
    end = _listify(end) 
    
    if len(start)!=len(end):
        raise IOError('different start and end times specified')

    init_times = []
    hour = datetime.timedelta(0,60*60)
    
    
    for s, e in zip(start, end):
        rec  = rrule.rrule(freq, dtstart=s, until=e, interval=interval)
        init_times.extend(list(rec))


    logger.debug("get_init_times done")
    return init_times
Exemple #23
0
def _char_array_to_str(chars):
    """Converts a NetCDF masked character array into an array of strings"""
    logger = loghelper.get(LOGGER)
    # assert we have two dimensions
    assert (len(chars.shape) == 2)
    dim0 = chars.shape[0]
    dim1 = chars.shape[1]

    # if it is a masked array, replace masked with blanks
    if hasattr(chars[:], 'mask'):
        # first fill in masked elements with blanks

        filled = chars[:].filled(' ')
    else:
        filled = chars

    # join character arrays across last dimension
    strs = [''.join(filled[n, :]) for n in range(dim0)]

    # then strip away the blanks
    strs = map(string.strip, strs)

    # return as an array of strings
    return np.array(strs)
Exemple #24
0
def ncdump(config):
    
    logger = loghelper.get(LOGGER)
    
    # _listify ensures arguments are enclosed within a list
    # to simplify treatement in following code
    files = nctools._listify(config['<files>'])
    vars        = nctools._listify(config.get('vars'))
    global_atts = nctools._listify(config.get('global-atts'))
    var_atts    = nctools._listify(config.get('var-atts'))
    coord_vars  = nctools._listify(config.get('coords'))
    sort_by     = nctools._listify(config.get('sort-by')) 
    order_by    = nctools._listify(config.get('order-by'))
    out         = config.get('out')
    pivot       = config.get('pivot')
    valid_time  = config.get('valid-time')
    format      = config.get('format')
    filter      = config.get('filter')
    split_by    = config.get('split-by')
    concat      = config.get('concat')
    start       = config.get('start')
    delay       = config.get('delay')
    cycles      = nctools._listify(config.get('cycles'))
        
    basetime = start if start else datetime.datetime.today()
    
    prior = _prior_time(basetime, delay=delay, hours=cycles)

    logger.debug("using %s as a start time" % prior)
    
    if files==[]:
        logger.info("no files specified, finding using options")
        file_pattern = config.get('file-pattern')
        if not file_pattern: raise nctools.ConfigError('either supply files or specify file-pattern')
        
        expanded = substitute.sub_date(file_pattern, init_time=prior)
        files = glob.glob(expanded)


    if files==[]: raise IOError("no files found")

     
    frame = nctools.melt(files, vars, global_atts, var_atts, coord_vars, missing=MISSING_ATTS)
    
    if valid_time:
        logger.debug("adding valid time into frame")
        frame['valid_time'] = frame['reftime'] + frame['leadtime']*datetime.timedelta(0,60*60)
    
    if filter:
        frame = nctools.filter(frame, filter)
    
    if concat:
        nctools.concat(frame, concat, name='variable', inplace=True)
    
    if pivot: 
        frame = pd.pivot_table(frame, index=['reftime','leadtime','location'], columns='variable', values='value')
        frame.reset_index(inplace=True)
        
    if sort_by: frame.sort(sort_by, inplace=True)

    
    if order_by:
        frame = frame[order_by]
    
    if out:
        out = substitute.sub_date(out, init_time=prior)
   
    if split_by:
        gb = frame.groupby(split_by)
        for key,group in gb:
            if out:
                new_name = _merge_name(out,key)
                save(gb.get_group(key), new_name, config['format'], float_format=config.get('float-format')) 
            else:
                print gb.get_group(key).to_string()
                print '\n\n\n'
    elif out: 
        save(frame, out, config['format'], float_format=config.get('float-format')) 
    
    else: 
        print frame.to_string()
Exemple #25
0
def ncdump(config):

    logger = loghelper.get(LOGGER)

    # _listify ensures arguments are enclosed within a list
    # to simplify treatement in following code
    files = nctools._listify(config['<files>'])
    vars = nctools._listify(config.get('vars'))
    global_atts = nctools._listify(config.get('global-atts'))
    var_atts = nctools._listify(config.get('var-atts'))
    coord_vars = nctools._listify(config.get('coords'))
    sort_by = nctools._listify(config.get('sort-by'))
    order_by = nctools._listify(config.get('order-by'))
    out = config.get('out')
    pivot = config.get('pivot')
    valid_time = config.get('valid-time')
    format = config.get('format')
    filter = config.get('filter')
    split_by = config.get('split-by')
    concat = config.get('concat')
    start = config.get('start')
    delay = config.get('delay')
    cycles = nctools._listify(config.get('cycles'))

    basetime = start if start else datetime.datetime.today()

    prior = _prior_time(basetime, delay=delay, hours=cycles)

    logger.debug("using %s as a start time" % prior)

    if files == []:
        logger.info("no files specified, finding using options")
        file_pattern = config.get('file-pattern')
        if not file_pattern:
            raise nctools.ConfigError(
                'either supply files or specify file-pattern')

        expanded = substitute.sub_date(file_pattern, init_time=prior)
        files = glob.glob(expanded)

    if files == []: raise IOError("no files found")

    frame = nctools.melt(files,
                         vars,
                         global_atts,
                         var_atts,
                         coord_vars,
                         missing=MISSING_ATTS)

    if valid_time:
        logger.debug("adding valid time into frame")
        frame['valid_time'] = frame[
            'reftime'] + frame['leadtime'] * datetime.timedelta(0, 60 * 60)

    if filter:
        frame = nctools.filter(frame, filter)

    if concat:
        nctools.concat(frame, concat, name='variable', inplace=True)

    if pivot:
        frame = pd.pivot_table(frame,
                               index=['reftime', 'leadtime', 'location'],
                               columns='variable',
                               values='value')
        frame.reset_index(inplace=True)

    if sort_by: frame.sort(sort_by, inplace=True)

    if order_by:
        frame = frame[order_by]

    if out:
        out = substitute.sub_date(out, init_time=prior)

    if split_by:
        gb = frame.groupby(split_by)
        for key, group in gb:
            if out:
                new_name = _merge_name(out, key)
                save(gb.get_group(key),
                     new_name,
                     config['format'],
                     float_format=config.get('float-format'))
            else:
                print gb.get_group(key).to_string()
                print '\n\n\n'
    elif out:
        save(frame,
             out,
             config['format'],
             float_format=config.get('float-format'))

    else:
        print frame.to_string()
Exemple #26
0
def qsub(script,
         name=None,
         queue=None,
         pe=None,
         nslots=1,
         after_job=None,
         cwd=None,
         array=None,
         merge=True,
         log=None,
         dry_run=False):
    """Submits a SGE job via qsub
    
    Arguments:
        script -- full path to job script file
        name -- job name
        queue -- name of queue to submit to
        pe -- parallel environment to use
        nslots -- number of slots (usually processor cores)
        after_job -- job id or name to supply as dependency
        cwd -- change working directory to this before submitting
        array -- if integer N supplied, will submit array jobs 1:N
        dry_run -- log but don't submit commands (default False)
    Returns:
        job_id -- the job id returned by the sheduling system system """

    logger = loghelper.get(LOGGER)

    if not os.path.exists(script):
        raise IOError("%s not found" % script)

    name_arg = '-N %s' % name if name else ''
    q_arg = '-q %s' % queue if queue else ''
    pe_arg = '-pe %s' % pe if pe else ''
    nslots_arg = ' %d' % nslots if nslots else ''
    pe_slots_arg = pe_arg + nslots_arg
    after_arg = '-hold_jid %s' % after_job if after_job else ''
    cwd_arg = '-cwd' if cwd else ''
    array_arg = '-t 1-%s' % array if array else ''
    merge_arg = '-j y' if merge else ''
    log_arg = '-o %s' % log if log else ''

    all_args = ' '.join([
        name_arg, q_arg, pe_slots_arg, after_arg, cwd_arg, array_arg,
        merge_arg, log_arg
    ])

    cmd = 'qsub %s %s ' % (all_args, script)

    if dry_run:
        job_id = str(random.randint(1, 100))
    else:
        proc = subprocess.Popen([cmd],
                                stdout=subprocess.PIPE,
                                shell=True,
                                cwd=cwd,
                                executable='/bin/bash')
        output = proc.stdout.read()
        job_id = job_id_from_reponse(output)

    logger.debug("%s \t--->\t %s" % (cmd.ljust(150), job_id))
    return job_id
Exemple #27
0
def get_logger():
    return loghelper.get(LOGGER)