def test_which(): env = os.environ['PATH'] os.environ['PATH'] = '{}{}{}'.format(os.environ['PATH'], os.path.pathsep, os.path.dirname(os.__file__)) program = anc.which(os.__file__, os.F_OK) assert os.path.isfile(program) assert anc.which(program, os.F_OK) == program assert anc.which('foobar') is None os.environ['PATH'] = env
def parse_command(command, indent=' '): """ Parse the help text of a Gamma command to a Python function including a docstring. The docstring is in rst format and can thu be parsed by e.g. sphinx. This function is not intended to be used by itself, but rather within function :func:`parse_module`. Parameters ---------- command: str the name of the gamma command indent: str the Python function indentation string; default: four spaces Returns ------- str the full Python function text """ # run the command without passing arguments to just catch its usage description command = which(command) if command is None: raise OSError('command does not exist') command_base = os.path.basename(command) proc = sp.Popen(command, stdin=sp.PIPE, stdout=sp.PIPE, stderr=sp.PIPE, universal_newlines=True) out, err = proc.communicate() # sometimes the description string is split between stdout and stderr # for the following commands stderr contains the usage description line, which is inserted into stdout if command_base in ['ras_pt', 'ras_data_pt', 'rasdt_cmap_pt']: out = out.replace(' ***\n ', ' ***\n ' + err) else: # for all other commands stderr is just appended to stdout out += err pattern = r'([\w\.]+ (?:has been|was) re(?:named to|placed(?: that [ \*\n]*|) by)(?: the ISP program|) [\w\.]+)' match = re.search(pattern, out) if match: raise DeprecationWarning('\n' + out) if re.search(r"Can't locate FILE/Path\.pm in @INC", out): raise RuntimeError('unable to parse Perl script') ########################################### # fix command-specific inconsistencies in parameter naming # in several commands the parameter naming in the usage description line does not match that of the docstring parnames_lookup = { '2PASS_INT': [('OFF_PAR', 'OFF_par')], 'adapt_filt': [('low_snr_thr', 'low_SNR_thr')], 'atm_mod2': [ ('rpt', 'report'), ('[mode]', '[model_atm]'), ('[model]', '[model_atm]'), ('model atm', 'model_atm atm'), ], 'atm_mod_2d': [('xref', 'rref'), ('yref', 'azref')], 'atm_mod_2d_pt': [('[sigma_min]', '[sigma_max]')], 'cc_monitoring': [('...', '<...>')], 'cct_sp_pt': [('pcct_sp_pt', 'pcct_sp')], 'comb_interfs': [('combi_out', 'combi_int')], 'coord_to_sarpix': [('north/lat', 'north_lat'), ('east/lon', 'east_lon'), ('SLC_par', '<SLC_MLI_par>'), ('SLC/MLI_par', 'SLC_MLI_par')], 'base_calc': [('plt_flg', 'plt_flag'), ('pltflg', 'plt_flag')], 'base_init': [('<base>', '<baseline>')], 'base_plot': [('plt_flg', 'plt_flag'), ('pltflg', 'plt_flag')], 'dis2hgt': [('m/cycle', 'm_cycle')], 'discc': [('min_corr', 'cmin'), ('max_corr', 'cmax')], 'disp2ras': [('<list>', '<DISP_tab>')], 'dis_data': [('...', '<...>')], 'dispwr': [('data_type', 'dtype')], 'DORIS_vec': [('SLC_PAR', 'SLC_par')], 'gc_map_fd': [('fdtab', 'fd_tab')], 'gc_map_grd': [('<MLI_par>', '<GRD_par>')], 'geocode_back': [('<gc_map>', '<lookup_table>'), ('\n gc_map ', '\n lookup_table ')], 'GRD_to_SR': [('SLC_par', 'MLI_par')], 'haalpha': [('<alpha> <entropy>', '<alpha2> <entropy>'), ('alpha (output)', 'alpha2 (output)')], 'histogram_ras': [('mean/stdev', 'mean_stdev')], 'hsi_color_scale': [('[chip]', '[chip_width]')], 'HUYNEN_DEC': [('T11_0', 'T11'), ('<T12> <T13> <T11>', '<T11> <T12> <T13>'), ('HUYNEN_DEC:', '***')], 'interf_SLC': [(' SLC2_pa ', ' SLC2_par ')], 'ionosphere_mitigation': [('<SLC1> <ID1>', '<ID1>')], 'landsat2dem': [('<DEM>', '<image>')], 'line_interp': [('input file', 'data_in'), ('output file', 'data_out')], 'm-alpha': [('<c2 ', '<c2> ')], 'm-chi': [('<c2 ', '<c2> ')], 'm-delta': [('<c2 ', '<c2> ')], 'map_section': [('n1', 'north1'), ('e1', 'east1'), ('n2', 'north2'), ('e2', 'east2'), ('[coord]', '[coords]')], 'mask_class': [('...', '<...>')], 'mcf_pt': [('<azlks>', '[azlks]'), ('<rlks>', '[rlks]')], 'mk_2d_im_geo': [('exponent', 'exp')], 'mk_adf2_2d': [('[alpha_max [', '[alpha_max] ['), ('-m MLI_dir', 'mli_dir'), ('-s scale', 'scale'), ('-e exp', 'exponent'), ('-u', 'update')], 'mk_base_calc': [('<RSLC_tab>', '<SLC_tab>')], 'mk_cpd_all': [('dtab', 'data_tab')], 'mk_cpx_ref_2d': [('diff_tab', 'cpx_tab')], 'mk_dispmap2_2d': [('RMLI_image', 'MLI'), ('RMLI_par', 'MLI_par'), ('MLI_image', 'MLI'), ('DISP_tab', 'disp_tab')], 'mk_dispmap_2d': [('RMLI_image', 'MLI'), ('RMLI_par', 'MLI_par'), ('MLI_image', 'MLI'), ('DISP_tab', 'disp_tab')], 'mk_geo_data_all': [('data_geo_dir', 'geo_dir')], 'mk_itab': [('<offset>', '<start>')], 'mk_hgt_2d': [('m/cycle', 'm_cycle')], 'mk_pol2rec_2d': [('data_tab', 'DIFF_tab'), ('<type> <rmli>', '<dtype>'), ('<dtype> <rmli>', '<dtype>'), ('type input', 'dtype input'), ('\n Options:\n', ''), ('-s scale', 'scale'), ('-e exp', 'exponent'), ('-a min', 'min'), ('-b max', 'max'), ('-R rmax', 'rmax'), ('-m mode', 'mode'), ('-u', 'update')], 'mk_rasdt_all': [('RMLI_image', 'MLI'), ('MLI_image', 'MLI')], 'mk_rasmph_all': [('RMLI_image', 'MLI'), ('MLI_image', 'MLI')], 'mk_unw_2d': [('unw_mask1', 'unw_mask')], 'mk_unw_ref_2d': [('diff_tab', 'DIFF_tab')], 'MLI2pt': [('MLI_TAB', 'MLI_tab'), ('pSLC_par', 'pMLI_par')], 'mosaic': [('<..>', '<...>'), ('DEM_parout', 'DEM_par_out')], 'multi_class_mapping': [('...', '<...>')], 'multi_look_geo': [('geo_SLC', 'SLC'), ('SLC/MLI', ('SLC_MLI'))], 'multi_look_MLI': [('MLI in_par', 'MLI_in_par')], 'offset_fit': [('interact_flag', 'interact_mode')], 'offset_plot_az': [('rmin', 'r_min'), ('rmax', 'r_max')], 'par_ASF_SLC': [('CEOS_SAR_leader', 'CEOS_leader')], 'par_ASAR': [('ASAR/ERS_file', 'ASAR_ERS_file')], 'par_EORC_JERS_SLC': [('slc', 'SLC')], 'par_ERSDAC_PALSAR': [('VEXCEL_SLC_par', 'ERSDAC_SLC_par')], 'par_ESA_JERS_SEASAT_SLC': [('[slc]', '[SLC]')], 'par_ICEYE_GRD': [('<GeoTIFF>', '<GeoTIFF> <XML>'), ('[mli]', '[MLI]')], 'par_ICEYE_SLC': [('[slc]', '[SLC]')], 'par_MSP': [('SLC/MLI_par', 'SLC_MLI_par')], 'par_SIRC': [('UTC/MET', 'UTC_MET')], 'par_TX_GRD': [('COSAR', 'GeoTIFF')], 'par_UAVSAR_SLC': [('SLC/MLC_in', 'SLC_MLC_in'), ('SLC/MLI_par', 'SLC_MLI_par'), ('SLC/MLI_out', 'SLC_MLI_out')], 'par_UAVSAR_geo': [('SLC/MLI_par', 'SLC_MLI_par')], 'phase_sim': [('sim (', 'sim_unw (')], 'product': [('wgt_flg', 'wgt_flag')], 'radcal_MLI': [('MLI_PAR', 'MLI_par')], 'radcal_PRI': [('GRD_PAR', 'GRD_par'), ('PRI_PAR', 'PRI_par')], 'radcal_SLC': [('SLC_PAR', 'SLC_par')], 'ras2jpg': [('{', '{{'), ('}', '}}')], 'ras_data_pt': [('pdata1', 'pdata')], 'ras_to_rgb': [('red channel', 'red_channel'), ('green channel', 'green_channel'), ('blue channel', 'blue_channel')], 'rascc_mask_thinning': [('...', '[...]')], 'rashgt': [('m/cycle', 'm_cycle')], 'rashgt_shd': [('m/cycle', 'm_cycle'), ('\n cycle ', '\n m_cycle ')], 'rasdt_cmap_pt': [('pdata1', 'pdata')], 'raspwr': [('hdrz', 'hdrsz')], 'ras_ras': [('r_lin/log', 'r_lin_log'), ('g_lin/log', 'g_lin_log'), ('b_lin/log', 'b_lin_log')], 'ras_ratio_dB': [('[min_cc] [max_cc] [scale] [exp]', '[min_value] [max_value] [dB_offset]')], 'rasSLC': [('[header]', '[hdrsz]')], 'ratio': [('wgt_flg', 'wgt_flag')], 'restore_float': [('input file', 'data_in'), ('output file', 'data_out'), ('interpolation_limit', 'interp_limit')], 'S1_coreg_TOPS_no_refinement': [('RLK', 'rlks'), ('AZLK', 'azlks')], 'S1_OPOD_vec': [('SLC_PAR', 'SLC_par')], 'single_class_mapping': [('>...', '> <...>')], 'ScanSAR_burst_cc_ad': [('bx', 'box_min'), ('by', 'box_max')], 'ScanSAR_burst_to_mosaic': [('DATA_tab_ref', 'data_tab_ref'), ('[mflg] [dtype]', '[mflg]')], 'ScanSAR_full_aperture_SLC': [('SLCR_dir', 'SLC2_dir')], 'scale_base': [('SLC-1_par-2', 'SLC1_par-2')], 'SLC_interp_lt': [('SLC-2', 'SLC2'), ('blksz', 'blk_size')], 'SLC_intf': [('SLC1s_par', 'SLC-1s_par'), ('SLC2Rs_par', 'SLC-2Rs_par')], 'SLC_intf_geo2': [('cc (', 'CC (')], 'SLC_interp_map': [('coffs2_sm', 'coffs_sm')], 'srtm_mosaic': [('<lon>', '<lon2>')], 'SSI_INT_S1': [('<SLC2> <par2>', '<SLC_tab2>')], 'texture': [('weights_flag', 'wgt_flag')], 'ts_rate': [('sim_flg', 'sim_flag')], 'TX_SLC_preproc': [('TX_list', 'TSX_list')], 'uchar2float': [('infile', 'data_in'), ('outfile', 'data_out')], 'validate': [('ras1', 'ras_map'), ('rasf_map', 'ras_map'), ('ras2', 'ras_inv'), ('rasf_inventory', 'ras_inv'), ('class1[1]', 'class1_1'), ('class1[2]', 'class1_2'), ('class1[n]', 'class1_n'), ('class2[1]', 'class2_1'), ('class2[2]', 'class2_2'), ('class2[n]', 'class2_n')] } if command_base in parnames_lookup.keys(): for replacement in parnames_lookup[command_base]: out = out.replace(*replacement) ########################################### # filter header (general command description) and usage description string header = '\n'.join( [x.strip('* ') for x in re.findall('[*]{3}.*(?:[*]{3}|)', out)]) header = '| ' + header.replace('\n', '\n| ') usage = re.search('usage:.*(?=\n)', out).group() # filter required and optional arguments from usage description text arg_req_raw = [ re.sub(r'[^\w.-]*', '', x) for x in re.findall('[^<]*<([^>]*)>', usage) ] arg_opt_raw = [ re.sub(r'[^\w.-]*', '', x) for x in re.findall(r'[^[]*\[([^]]*)\]', usage) ] ########################################### # add parameters missing in the usage argument lists appends = { 'mk_adf2_2d': ['cc_min', 'cc_max', 'mli_dir', 'scale', 'exponent', 'update'], 'mk_pol2rec_2d': ['scale', 'exponent', 'min', 'max', 'rmax', 'mode', 'update'], 'SLC_interp_S1_TOPS': ['mode', 'order'], 'SLC_interp_map': ['mode', 'order'] } if command_base in appends.keys(): for var in appends[command_base]: arg_opt_raw.append(var) ########################################### # define parameter replacements; this is intended for parameters which are to be aggregated into a list parameter replacements = { 'cc_monitoring': [(['nfiles', 'f1', 'f2', '...'], ['files'], ['a list of input data files (float)'])], 'dis_data': [(['nstack', 'pdata1', '...'], ['pdata'], ['a list of point data stack files'])], 'lin_comb': [(['nfiles', 'f1', 'f2', '...'], ['files'], ['a list of input data files (float)']), (['factor1', 'factor2', '...'], ['factors'], ['a list of factors to multiply the input files with'])], 'lin_comb_cpx': [(['nfiles', 'f1', 'f2', '...'], ['files'], ['a list of input data files (float)']), (['factor1_r', 'factor2_r', '...'], ['factors_r'], ['a list of real part factors to multiply the input files with']), (['factor1_i', 'factor2_i'], ['factors_i'], [ 'a list of imaginary part factors to multiply the input files with' ])], 'mask_class': [(['n_class', 'class_1', '...', 'class_n'], ['class_values'], ['a list of class map values'])], 'mosaic': [([ 'nfiles', 'data_in1', 'DEM_par1', 'data_in2', 'DEM_par2', '...', '...' ], ['data_in_list', 'DEM_par_list'], [ 'a list of input data files', 'a list of DEM/MAP parameter files for each data file' ])], 'multi_class_mapping': [(['nfiles', 'f1', 'f2', '...', 'fn'], ['files'], ['a list of input data files (float)'])], 'rascc_mask_thinning': [ (['thresh_1', '...', 'thresh_nmax'], ['thresholds'], [ 'a list of thresholds sorted from smallest to ' 'largest scale sampling reduction' ]) ], 'single_class_mapping': [ (['nfiles', 'f1', '...', 'fn'], ['files'], ['a list of point data stack files']), (['lt1', 'ltn'], ['thres_lower'], ['a list of lower thresholds for the files']), (['ut1', 'utn'], ['thres_upper'], ['a list of upper thresholds for the files']) ], 'validate': [(['nclass1', 'class1_1', 'class1_2', '...', 'class1_n'], ['classes_map'], [ 'a list of class values for the map data file (max. 16), 0 for all' ]), (['nclass2', 'class2_1', 'class2_2', '...', 'class2_n'], [ 'classes_inv' ], [ 'a list of class values for the inventory data file (max. 16), 0 for all' ])] } if '..' in usage and command_base not in replacements.keys(): raise RuntimeError( 'the command contains multi-args which were not properly parsed') def replace(inlist, replacement): outlist = list(inlist) for old, new, description in replacement: if old[0] not in outlist: return outlist outlist[outlist.index(old[0])] = new for i in range(1, len(old)): if old[i] in outlist: outlist.remove(old[i]) return dissolve(outlist) arg_req = list(arg_req_raw) arg_opt = list(arg_opt_raw) if command_base in replacements.keys(): arg_req = replace(arg_req, replacements[command_base]) arg_opt = replace(arg_opt, replacements[command_base]) if command_base in ['par_CS_geo', 'par_KS_geo']: out = re.sub('[ ]*trunk.*', '', out, flags=re.DOTALL) ########################################### # check if there are any double parameters double = [k for k, v in Counter(arg_req + arg_opt).items() if v > 1] if len(double) > 0: raise RuntimeError('double parameter{0}: {1}'.format( 's' if len(double) > 1 else '', ', '.join(double))) ########################################### # add a parameter inlist for commands which take interactive input via stdin # the list of commands, which are interactive is hard to assess and thus likely a source of future errors inlist = ['create_dem_par', 'par_ESA_ERS'] if command_base in inlist: arg_req.append('inlist') ###################################################################################### # create the function argument string for the Python function # optional arguments are parametrized with '-' as default value, e.g. arg_opt='-' # a '-' in the parameter name is replaced with '_' # example: "arg1, arg2, arg3='-'" argstr_function = re.sub(r'([^\'])-([^\'])', r'\1_\2', ', '.join(arg_req + [x + "='-'" for x in arg_opt])) \ .replace(', def=', ', drm=') # create the function definition string fun_def = 'def {name}({args_fun}, logpath=None, outdir=None, shellscript=None):' \ .format(name=command_base.replace('-', '_'), args_fun=argstr_function) if command_base == '2PASS_INT': fun_def = fun_def.replace(command_base, 'TWO_PASS_INT') ###################################################################################### # special handling of flag args flag_args = { 'mk_adf2_2d': [('mli_dir', '-m', None), ('scale', '-s', None), ('exponent', '-e', None), ('update', '-u', False)], 'mk_pol2rec_2d': [('scale', '-s', None), ('exp', '-e', None), ('min', '-a', None), ('max', '-b', None), ('rmax', '-R', None), ('mode', '-m', None), ('update', '-u', False)] } # replace arg default like arg='-' with arg=None or arg=False if command_base in flag_args: for arg in flag_args[command_base]: fun_def = re.sub('{}=\'-\''.format(arg[0]), '{0}={1}'.format(arg[0], arg[2]), fun_def) ###################################################################################### # create the process call argument string # a '-' in the parameter name is replaced with '_' # e.g. 'arg1, arg2, arg3' # if a parameter is named 'def' (not allowed in Python) it is renamed to 'drm' # inlist is not a proc arg but a parameter passed to function process proc_args = arg_req + arg_opt if command_base in inlist: proc_args.remove('inlist') proc_args_tmp = list(proc_args) # insert the length of a list argument as a proc arg if command_base in replacements.keys( ) and command_base != 'rascc_mask_thinning': key = replacements[command_base][0][1] if isinstance(key, list): key = key[0] proc_args_tmp.insert(proc_args_tmp.index(key), 'len({})'.format(key)) if command_base == 'validate': index = proc_args_tmp.index('classes_inv') proc_args_tmp.insert(index, 'len(classes_inv)') argstr_process = ', '.join(proc_args_tmp) \ .replace('-', '_') \ .replace(', def,', ', drm,') # create the process argument list string cmd_str = "cmd = ['{command}', {args_cmd}]".format(command=command, args_cmd=argstr_process) # special handling of optional flag args # the args are removed from the cmd list and flags (plus values) added if not None or True # e.g. '-u' if update=True or '-m /path' if mli_dir='/path' if command_base in flag_args: args = [] for arg in flag_args[command_base]: cmd_str = cmd_str.replace(', {}'.format(arg[0]), '') args.append(arg[0]) cmd_str += "\nif {a} is not {d}:\n{i}cmd.append('{k}')" \ .format(i=indent, d=arg[2], k=arg[1], a=arg[0]) if arg[2] is None: cmd_str += '\n{i}cmd.append({a})'.format(i=indent, a=arg[0]) # create the process call string proc_str = "process(cmd, logpath=logpath, outdir=outdir{inlist}, shellscript=shellscript)" \ .format(inlist=', inlist=inlist' if command_base in inlist else '') fun_proc = '{0}\n{1}'.format(cmd_str, proc_str) if command_base == 'lin_comb_cpx': fun_proc = fun_proc.replace('factors_r, factors_i', 'zip(factors_r, factors_i)') elif command_base == 'mosaic': fun_proc = fun_proc.replace('data_in_list, DEM_par_list', 'zip(data_in_list, DEM_par_list)') elif command_base == 'single_class_mapping': fun_proc = fun_proc.replace('files, thres_lower, thres_upper', 'zip(files, thres_lower, thres_upper)') ###################################################################################### # create the function docstring # find the start of the docstring and filter the result doc_start = 'input parameters:[ ]*\n' if re.search( 'input parameters', out) else 'usage:.*(?=\n)' doc = '\n' + out[re.search(doc_start, out).end():] # define a pattern containing individual parameter documentations pattern = r'\n[ ]*[<\[]*(?P<par>{0})[>\]]*[\t ]+(?P<doc>.*)'.format( '|'.join(arg_req_raw + arg_opt_raw).replace('.', r'\.')) # identify the start indices of all pattern matches starts = [m.start(0) for m in re.finditer(pattern, doc)] + [len(out)] # filter out all individual (parameter, description) docstring tuples doc_items = [] j = 0 done = [] for i in range(0, len(starts) - 1): doc_raw = doc[starts[i]:starts[i + 1]] doc_list = list(re.search(pattern, doc_raw, flags=re.DOTALL).groups()) if doc_list[0] not in proc_args: if command_base in replacements.keys(): repl = replacements[command_base][0] for k, item in enumerate(repl[1]): if item not in done: doc_items.append([item, repl[2][k]]) done.append(item) j += 1 continue if doc_list[0] in done: doc_items[-1][1] += doc_raw continue while doc_list[0] != proc_args[j]: doc_list_sub = [proc_args[j], 'not documented'] doc_items.append(doc_list_sub) j += 1 doc_items.append(doc_list) done.append(doc_items[-1][0]) j += 1 for k in range(j, len(proc_args)): doc_items.append([proc_args[k], 'not documented']) # add a parameter inlist to the docstring tuples if command_base in inlist: pos = [x[0] for x in doc_items].index(arg_opt[0]) doc_items.insert( pos, ('inlist', 'a list of arguments to be passed to stdin')) # remove the replaced parameters from the argument lists doc_items = [x for x in doc_items if x[0] in arg_req + arg_opt] # replace parameter names which are not possible in Python syntax, i.e. containing '-' or named 'def' for i, item in enumerate(doc_items): par = item[0].replace('-', '_').replace(', def,', ', drm,') description = item[1] doc_items[i] = (par, description) if command_base in ['par_CS_geo', 'par_KS_geo']: doc_items.append(( 'MLI_par', '(output) ISP SLC/MLI parameter file (example: yyyymmdd.mli.par)')) doc_items.append( ('DEM_par', '(output) DIFF/GEO DEM parameter file (example: yyyymmdd.dem_par)' )) doc_items.append( ('GEO', '(output) Geocoded image data file (example: yyyymmdd.geo)')) # check if all parameters are documented: proc_args = [ x.replace('-', '_').replace(', def,', ', drm,') for x in arg_req + arg_opt ] mismatch = [x for x in proc_args if x not in [y[0] for y in doc_items]] if len(mismatch) > 0: raise RuntimeError('parameters missing in docsring: {}'.format( ', '.join(mismatch))) ########################################### # format the docstring parameter descriptions docstring_elements = ['Parameters\n----------'] # do some extra formatting for i, item in enumerate(doc_items): par, description = item description = re.split(r'\n+\s*', description.strip('\n')) # escape * characters (which are treated as special characters for bullet lists by sphinx) description = [x.replace('*', r'\*') for x in description] # convert all lines starting with an integer number or 'NOTE' to bullet list items latest = None for i in range(len(description)): item = description[i] if re.search('^(?:(?:-|)[-0-9]+|NOTE):', item): latest = i # prepend '* ' and replace missing spaces after a colon: 'x:x' -> 'x: x' description[i] = '* ' + re.sub(r'((?:-|)[-0-9]+:)(\w+)', r'\1 \2', item) # format documentation lines coming after the last bullet list item # sphinx expects lines after the last bullet item to be indented by two spaces if # they belong to the bullet item or otherwise a blank line to mark the end of the bullet list if latest: # case if there are still lines coming after the last bullet item, # prepend an extra two spaces to these lines so that they are properly # aligned with the text of the bullet item if latest + 2 <= len(description): i = 1 while latest + i + 1 <= len(description): description[latest + i] = ' ' + description[latest + i] i += 1 # if not, then insert an extra blank line else: description[-1] = description[-1] + '\n' # parse the final documentation string for the current parameter description = '\n{0}{0}'.join(description).format(indent) doc = '{0}:\n{1}{2}'.format(par, indent, description) docstring_elements.append(doc) ########################################### # add docsrings of general parameters and combine the result # create docstring for parameter logpath doc = 'logpath: str or None\n{0}a directory to write command logfiles to'.format( indent) docstring_elements.append(doc) # create docstring for parameter outdir doc = 'outdir: str or None\n{0}the directory to execute the command in'.format( indent) docstring_elements.append(doc) # create docstring for parameter shellscript doc = 'shellscript: str or None\n{0}a file to write the Gamma commands to in shell format'.format( indent) docstring_elements.append(doc) # combine the complete docstring fun_doc = '\n{header}\n\n{doc}\n' \ .format(header=header, doc='\n'.join(docstring_elements)) ###################################################################################### # combine the elements to a complete Python function string fun = '''{defn}\n"""{doc}"""\n{proc}'''.format(defn=fun_def, doc=fun_doc, proc=fun_proc) # indent all lines and add an extra empty line at the end fun = fun.replace('\n', '\n{}'.format(indent)) + '\n' return fun
def parse_command(command): """ Parse the help text of a Gamma command to a Python function including a docstring. The docstring is in rst format and can thu be parsed by e.g. sphinx. This function is not intended to be used by itself, but rather within function :func:`parse_module`. Parameters ---------- command: str the name of the gamma command Returns ------- str the full Python function text """ command = which(command) proc = sp.Popen(command, stdin=sp.PIPE, stdout=sp.PIPE, stderr=sp.PIPE, universal_newlines=True) out, err = proc.communicate() out += err # filter header command description and usage description text header = '\n'.join( [x.strip('* ') for x in re.findall('[*]{3}.*[*]{3}', out)]) header = '| ' + header.replace('\n', '\n| ') usage = re.search('usage:.*(?=\n)', out).group() # filter required and optional arguments from usage description text arg_req = [ re.sub('[^\w.-]*', '', x) for x in re.findall('[^<]*<([^>]*)>', usage) ] arg_opt = [ re.sub('[^\w.-]*', '', x) for x in re.findall('[^[]*\[([^]]*)\]', usage) ] # fix inconsistencies in parameter naming related to case differences, # e.g. ISP_PAR in the usage text vs. ISP_Par in the parameter description for arg in arg_req + arg_opt: for item in re.findall(arg, out, re.IGNORECASE): if item != arg: out = out.replace(item, arg) double = [k for k, v in Counter(arg_req + arg_opt).items() if v > 1] if len(double) > 0: raise RuntimeError('double parameter{0}: {1}'.format( 's' if len(double) > 1 else '', ', '.join(double))) # print('header_raw: \n{}\n'.format(header)) # print('usage_raw: \n{}\n'.format(usage)) # print('required args: {}\n'.format(', '.join(arg_req))) # print('optional args: {}\n'.format(', '.join(arg_opt))) # print('double args: {}\n'.format(', '.join(double))) # create the function argument string for the Python function # optional arguments are parametrized with '-' as default value, e.g. arg_opt='-' # a '-' in the parameter name is replaced with '_' # example: "arg1, arg2, arg3='-'" argstr_function = re.sub(r'([^\'])-([^\'])', r'\1_\2', ', '.join(arg_req + [x + "='-'" for x in arg_opt])) \ .replace(', def=', ', drm=') # create the process call argument string # a '-' in the parameter name is replaced with '_' # e.g. 'arg1, arg2, arg3' # if a parameter is named 'def' (not allowed in Python) it is renamed to 'drm' argstr_process = ', '.join(arg_req + arg_opt) \ .replace('-', '_') \ .replace(', def,', ', drm,') # print('arg_str1: \n{}\n'.format(argstr_function)) # print('arg_str2: \n{}\n'.format(argstr_process)) # define the start of the parameter documentation string, which is either after 'input_parameters' or after # the usage description string doc_start = 'input parameters:[ ]*\n' if re.search( 'input parameters', out) else 'usage:.*(?=\n)' # parse the parameter documentation to a Python docstring format # define the number of spaces to indent indent = ' ' * 4 docstring_elements = ['Parameters\n----------'] # gather the indices, which mark the documentation start of the respective parameters within # the raw documentation text starts = [] for x in arg_req + arg_opt: try: starts.append(re.search(r'\n[ ]*{0} .*'.format(x), out).start()) except AttributeError: raise RuntimeError('cannot find parameter {}'.format(x)) starts += [len(out)] # define a pattern for parsing individual parameter documentations pattern = r'\n[ ]*(?P<par>{0})[ ]+(?P<doc>.*)'.format('|'.join(arg_req + arg_opt)) # print(pattern) for i in range(0, len(starts) - 1): # draw a subset from the Gamma docstring containing only the doc of a single parameter doc_raw = out[starts[i]:starts[i + 1]] # print(repr(doc_raw)) # parse the docstring match = re.match(pattern, doc_raw, flags=re.DOTALL) if not match: continue # retrieve the parameter name and the documentation lines par = match.group('par') doc_items = re.split('\n+\s*', match.group('doc').strip('\n')) # escape * characters (which are treated as special characters for bullet lists by sphinx) doc_items = [x.replace('*', '\*') for x in doc_items] # convert all lines starting with an integer number or 'NOTE' to bullet list items latest = None for i in range(len(doc_items)): item = doc_items[i] if re.search('^(?:(?:-|)[-0-9]+|NOTE):', item): latest = i # prepend '* ' and replace missing spaces after a colon: 'x:x' -> 'x: x' doc_items[i] = '* ' + re.sub(r'((?:-|)[-0-9]+:)(\w+)', r'\1 \2', item) # format documentation lines coming after the last bullet list item # sphinx expects lines after the last bullet item to be indented by two spaces if # they belong to the bullet item or otherwise a blank line to mark the end of the bullet list if latest: # case if there are still lines coming after the last bullet item, # prepend an extra two spaces to these lines so that they are properly # aligned with the text of the bullet item if latest + 2 <= len(doc_items): i = 1 while latest + i + 1 <= len(doc_items): doc_items[latest + i] = ' ' + doc_items[latest + i] i += 1 # if not, then insert an extra blank line else: doc_items[-1] = doc_items[-1] + '\n' # parse the final documentation string for the current parameter description = '\n{0}{0}'.join(doc_items).format(indent) doc = '{0}:\n{1}{2}'.format(par, indent, description) docstring_elements.append(doc) # create docstring for parameter logpath doc = 'logpath: str or None\n{0}a directory to write command logfiles to'.format( indent) docstring_elements.append(doc) # create the function definition string fun_def = 'def {name}({args_fun}, logpath=None):' \ .format(name=os.path.basename(command).replace('-', '_'), args_fun=argstr_function) # create the complete docstring fun_doc = '\n{header}\n\n{doc}\n' \ .format(header=header, doc='\n'.join(docstring_elements)) # create the process call string fun_proc = "process(['{command}', {args_cmd}], logpath=logpath)" \ .format(command=command, args_cmd=argstr_process) # combine the elements to a complete Python function string fun = '''{defn}\n"""{doc}"""\n{proc}'''.format(defn=fun_def, doc=fun_doc, proc=fun_proc) # indent all lines and add an extra empty line at the end fun = fun.replace('\n', '\n{}'.format(indent)) + '\n' return fun