def main(argv=None): # Read input args, ret = parse_cmdline(argv) if ret != GOOD_RET or args is None: return ret len_buffer = None try: if args.buffer is not None: try: len_buffer = float(args.buffer) except ValueError: raise InvalidDataError("Input for buffer ({}) could not be converted to a float.".format(args.buffer)) if args.out_dir is None: args.out_dir = os.path.dirname(args.file) if args.min_max_file is None: min_max_dict = None else: min_max_dict = read_csv(args.min_max_file, quote_style=csv.QUOTE_NONNUMERIC) process_file(args.file, args.out_dir, len_buffer, args.delimiter, min_max_dict, header=args.names, make_hist=args.histogram) except IOError as e: warning("Problems reading file:", e) return IO_ERROR except InvalidDataError as e: warning("Problems reading data:", e) return INVALID_DATA return GOOD_RET # success
def parse_cmdline(argv): """ Returns the parsed argument list and return code. `argv` is a list of arguments, or `None` for ``sys.argv[1:]``. """ if argv is None: argv = sys.argv[1:] # initialize the parser object: parser = ThrowingArgumentParser(description='Reads in a file and adds a beginning and/or end to each line. ' 'The first argument must be the name of the file to be read.') # Below, it is a positional argument, that is required. parser.add_argument("file", help="The location of the file to be amended (required).", ) parser.add_argument("-b", "--begin", help="String to add to the beginning of a line.", default=DEF_BEGIN_STR) parser.add_argument("-e", "--end", help="String to add to the end of a line.", default=DEF_END_STR) parser.add_argument("-n", "--new_name", help="Name of amended file.", default=DEF_NEW_FNAME) args = None try: args = parser.parse_args(argv) if args.begin == DEF_BEGIN_STR and args.end == DEF_END_STR: warning("Return file will be the same as the input, as no begin or end strings were passed. " "Use -h for help.") except ArgumentParserError as e: warning("Argument Parser Error:", e) parser.print_help() return args, INPUT_ERROR return args, GOOD_RET
def main(argv=None): # Read input args, ret = parse_cmdline(argv) if ret != GOOD_RET: return ret try: process_file(args.file, args.begin, args.end, args.new_name) except IOError as e: warning("Problems reading file:", e) return IO_ERROR return GOOD_RET # success
def parse_cmdline(argv): """ Returns the parsed argument list and return code. `argv` is a list of arguments, or `None` for ``sys.argv[1:]``. """ if argv is None: argv = sys.argv[1:] # initialize the parser object: parser = argparse.ArgumentParser(description='Reads in space-separated columns and returns the min, max, avg, and ' 'std dev for each column. It can optionally prepare histograms of ' 'non-numerical data.') parser.add_argument("-f", "--file", help="The location of the file with the dimensions with one line per vector, " "space-separated, containing at least two lines. The default file is {}, " "located in the current directory".format(DEF_ARRAY_FILE), default=DEF_ARRAY_FILE) parser.add_argument("-b", "--buffer", help="If specified, the program will output only the max dimension" "in each column plus an additional buffer amount (float).", default=None) parser.add_argument("-d", "--delimiter", help="Delimiter. Default is '{}'".format(DEF_DELIMITER), default=DEF_DELIMITER) parser.add_argument("-m", "--min_max_file", help="CSV file with column names (first line), " "initial values (second line), min values " "(third line), and max values (fourth line), used to further " "analyze the data file.", default=None) parser.add_argument("-n", "--names", help="File contains column names (header) (default is false). " "Note: lines beginning with '#' are ignored.", action='store_true') parser.add_argument("-o", "--out_dir", help="Output folder. Default is the directory of the file to be processed.", default=None) parser.add_argument("-s", "--histogram", help="Create histograms of the non-numerical data (default is false).", action='store_true') args = None try: args = parser.parse_args(argv) except SystemExit as e: if hasattr(e, 'code') and e.code == 0: return args, GOOD_RET warning(e) parser.print_help() return args, INPUT_ERROR return args, GOOD_RET
def parse_cmdline(argv): """ Returns the parsed argument list and return code. `argv` is a list of arguments, or `None` for ``sys.argv[1:]``. """ if argv is None: argv = sys.argv[1:] # initialize the parser object: parser = argparse.ArgumentParser( description='Adds a word to a Hunspell-type dictionary file.') parser.add_argument( "-s", "--sfx", help= "Suffix to be added after word (and after a '/'. For example, 'SM' will " "allow the word to be made plural and possessive. See hunspell " "documentation for more documentation on codes.", default='') parser.add_argument("-d", "--dict_loc", help="Location of the dictionary file to be modified. " "The default is: '{}'".format(DEF_DICT), default=DEF_DICT) parser.add_argument("new_word", help="The word to add to the dictionary", type=str) args = None try: args = parser.parse_args(argv) except (InvalidDataError, IOError, DuplicateOptionError, SystemExit) as e: if hasattr(e, 'code') and e.code == 0: return args, GOOD_RET warning(e) parser.print_help() return args, INPUT_ERROR return args, GOOD_RET
def main(argv=None): # Read input args, ret = parse_cmdline(argv) # TODO: did not show the expected behavior when I didn't have a required cfg in the ini file if ret != GOOD_RET or args is None: return ret cfg = args.config # Read and process pdb files try: atom_num_dict = read_csv_dict(cfg[ATOM_REORDER_FILE]) mol_num_dict = read_csv_dict(cfg[MOL_RENUM_FILE], one_to_one=False) element_dict = create_element_dict(cfg[ELEMENT_DICT_FILE]) process_pdb(cfg, atom_num_dict, mol_num_dict, element_dict) except IOError as e: warning("Problems reading file:", e) return IO_ERROR except (InvalidDataError, ValueError) as e: warning("Problems with input:", e) return INVALID_DATA return GOOD_RET # success
def main(argv=None): """ Runs the main program. :param argv: The command line arguments. :return: The return code for the program's termination. """ args, ret = parse_cmdline(argv) if ret != GOOD_RET or args is None: return ret cfg = args.config try: make_tpl(cfg, cfg[TPL_FNAME], cfg[FILLED_TPL_FNAME]) except (TemplateNotReadableError, IOError) as e: warning("Problems reading file: {}".format(e)) return IO_ERROR except (KeyError, InvalidDataError) as e: warning(e) return IO_ERROR return GOOD_RET # success
def parse_cmdline(argv): """ Returns the parsed argument list and return code. `argv` is a list of arguments, or `None` for ``sys.argv[1:]``. """ if argv is None: argv = sys.argv[1:] # initialize the parser object: parser = argparse.ArgumentParser( description= 'Creates a new version of a pdb file. Atoms will be numbered ' 'starting from one. Options include renumbering molecules.') parser.add_argument( "-c", "--config", help="The location of the configuration file in ini format. " "The default file name is {}, located in the " "base directory where the program as run.".format(DEF_CFG_FILE), default=DEF_CFG_FILE, type=read_cfg) args = None try: args = parser.parse_args(argv) except IOError as e: warning(e) parser.print_help() return args, IO_ERROR except (KeyError, InvalidDataError, SystemExit) as e: if hasattr(e, 'code') and e.code == 0: return args, GOOD_RET warning("Input data missing:", e) parser.print_help() return args, INPUT_ERROR return args, GOOD_RET
def process_pdb(cfg, atom_num_dict, mol_num_dict, element_dict): pdb_loc = cfg[PDB_FILE] pdb_data = {HEAD_CONTENT: [], ATOMS_CONTENT: [], TAIL_CONTENT: []} # to allow warning to be printed once and only once missing_types = [] qmmm_elem_id_dict = {} ca_res_atom_id_dict = {} cb_res_atom_id_dict = {} atoms_for_vmd = [] with open(pdb_loc) as f: wat_count = 0 atom_count = 0 mol_count = 1 current_mol = None last_mol_num = None atoms_content = [] for line in f: line = line.strip() if len(line) == 0: continue line_head = line[:cfg[PDB_LINE_TYPE_LAST_CHAR]] # head_content to contain Everything before 'Atoms' section # also capture the number of atoms if line_head == 'REMARK' or line_head == 'CRYST1': pdb_data[HEAD_CONTENT].append(line) # atoms_content to contain everything but the xyz elif line_head == 'ATOM ': # My template PDB has ***** after atom_count 99999. Thus, I'm renumbering. Otherwise, this this: # atom_num = line[cfg[PDB_LINE_TYPE_LAST_CHAR]:cfg[PDB_ATOM_NUM_LAST_CHAR]] # For renumbering, making sure prints in the correct format, including num of characters: atom_count += 1 # For reordering atoms if atom_count in atom_num_dict: atom_id = atom_num_dict[atom_count] else: atom_id = atom_count if atom_id > 99999: atom_num = format(atom_id, 'x') if len(atom_num) > 5: warning( "Hex representation of {} is {}, which is greater than 5 characters. This" "will affect the PDB output formatting.".format( atom_id, atom_num)) else: atom_num = '{:5d}'.format(atom_id) atom_type = line[ cfg[PDB_ATOM_NUM_LAST_CHAR]:cfg[PDB_ATOM_TYPE_LAST_CHAR]] atom_type_stripped = atom_type.strip() res_type = line[ cfg[PDB_ATOM_TYPE_LAST_CHAR]:cfg[PDB_RES_TYPE_LAST_CHAR]] mol_num = int(line[ cfg[PDB_RES_TYPE_LAST_CHAR]:cfg[PDB_MOL_NUM_LAST_CHAR]]) pdb_x = float( line[cfg[PDB_MOL_NUM_LAST_CHAR]:cfg[PDB_X_LAST_CHAR]]) pdb_y = float(line[cfg[PDB_X_LAST_CHAR]:cfg[PDB_Y_LAST_CHAR]]) pdb_z = float(line[cfg[PDB_Y_LAST_CHAR]:cfg[PDB_Z_LAST_CHAR]]) occ_t = line[cfg[PDB_Z_LAST_CHAR]:cfg[PDB_LAST_T_CHAR]] element = line[cfg[PDB_LAST_T_CHAR]:cfg[PDB_LAST_ELEM_CHAR]] last_cols = line[cfg[PDB_LAST_ELEM_CHAR]:] # For user-specified changing of molecule number if mol_num in mol_num_dict: mol_num = mol_num_dict[mol_num] # If doing water molecule checking... if cfg[FIRST_WAT_ID] <= atom_count <= cfg[LAST_WAT_ID]: if (wat_count % 3) == 0: current_mol = mol_num if atom_type != ' OH2 ': warning( 'Expected an OH2 atom to be the first atom of a water molecule. ' 'Check line: {}'.format(line)) # last_cols = ' 0.00 0.00 S2 O' else: if current_mol != mol_num: warning('Water not in order on line:', line) if (wat_count % 3) == 1: if atom_type != ' H1 ': warning( 'Expected an H1 atom to be the second atom of a water molecule. ' 'Check line: {}'.format(line)) else: if atom_type != ' H2 ': warning( 'Expected an H2 atom to be the second atom of a water molecule. ' 'Check line: {}'.format(line)) wat_count += 1 if mol_num in cfg[ RESID_QMMM] and atom_type not in SKIP_ATOM_TYPES: if atom_type == C_ALPHA: ca_res_atom_id_dict[mol_num] = atom_id else: if atom_type == C_BETA: cb_res_atom_id_dict[mol_num] = atom_id if atom_type_stripped in element_dict: element = element_dict[atom_type_stripped] else: raise InvalidDataError( "Did not find atom type '{}' in the element dictionary. Please " "provide a new atom type, element dictionary (using keyword {} " "in the configuration file) that includes all atom types in the " "residues identified with the '{}' key." "".format(atom_type_stripped, ELEMENT_DICT_FILE, RESID_QMMM)) if element in qmmm_elem_id_dict: qmmm_elem_id_dict[element].append(atom_id) else: qmmm_elem_id_dict[element] = [atom_id] atoms_for_vmd.append(atom_id - 1) if cfg[ADD_ELEMENTS] and atom_count <= cfg[LAST_ADD_ELEM]: if atom_type_stripped in element_dict: element = element_dict[atom_type_stripped] else: if atom_type_stripped not in missing_types: warning( "Please add atom type '{}' to dictionary of elements. Will not write/overwrite " "element type in the pdb output.".format( atom_type_stripped)) missing_types.append(atom_type_stripped) # For numbering molecules from 1 to end if cfg[RENUM_MOL]: if last_mol_num is None: last_mol_num = mol_num if mol_num != last_mol_num: last_mol_num = mol_num mol_count += 1 if mol_count == 10000: warning( "Molecule numbers greater than 9999 will be printed in hex" ) # Due to PDB format constraints, need to print in hex starting at 9999 molecules. if mol_count > 9999: mol_num = format(mol_count, 'x') if len(mol_num) > 4: warning( "Hex representation of {} is {}, which is greater than 4 characters. This" "will affect the PDB output formatting.". format(atom_id, atom_num)) else: mol_num = '{:4d}'.format(mol_count) line_struct = [ line_head, atom_num, atom_type, res_type, mol_num, pdb_x, pdb_y, pdb_z, occ_t, element, last_cols ] atoms_content.append(line_struct) # tail_content to contain everything after the 'Atoms' section else: pdb_data[TAIL_CONTENT].append(line) # Only sort if there is renumbering if len(atom_num_dict) > 0: pdb_data[ATOMS_CONTENT] = sorted(atoms_content, key=lambda entry: entry[1]) else: pdb_data[ATOMS_CONTENT] = atoms_content if cfg[PDB_NEW_FILE] is None: f_name = create_out_fname(cfg[PDB_FILE], suffix="_new", base_dir=cfg[OUT_BASE_DIR]) else: f_name = create_out_fname(cfg[PDB_NEW_FILE], base_dir=cfg[OUT_BASE_DIR]) print_pdb(pdb_data[HEAD_CONTENT], pdb_data[ATOMS_CONTENT], pdb_data[TAIL_CONTENT], f_name, cfg[PDB_FORMAT]) if len(cfg[RESID_QMMM]) > 0: f_name = create_out_fname('amino_id.dat', base_dir=cfg[OUT_BASE_DIR]) print_mode = "w" for elem in sorted(qmmm_elem_id_dict): print_qm_kind(qmmm_elem_id_dict[elem], elem, f_name, mode=print_mode) print_mode = 'a' print_qm_links(ca_res_atom_id_dict, cb_res_atom_id_dict, f_name, mode=print_mode) f_name = create_out_fname('vmd_protein_atoms.dat', base_dir=cfg[OUT_BASE_DIR]) list_to_csv([atoms_for_vmd], f_name, delimiter=' ')
def parse_cmdline(argv=None): """ Returns the parsed argument list and return code. :param argv: A list of arguments, or `None` for ``sys.argv[1:]``. """ if argv is None: argv = sys.argv[1:] # initialize the parser object: parser = argparse.ArgumentParser( description='Fills in a template file with parameter values.') parser.add_argument( "-c", "--config", help="The location of the configuration file in ini format. " "The default file name is {}, located in the " "base directory where the program as run. " "Note: 1) a [{}] section is required. 2) optional sections are [{}] and " "[{}], which allows key values to be calculated based on other tpl " "values. 3) Equations will be evaluated in the order provided, so if " "an equation depends on the value computed from another equation, list " "the dependent equation after its inputs. 4) Multiple values and " "equations may be listed for any keys. In that case, the program will " "create multiple output files. If a static '{}' is provided, the " "file will be overwritten, leaving only one filled file at the end. " "The '{}' can include keys (i.e. filled_tpl_name = {{key1}}.txt), so " "if multiple values are listed for key1 (i.e. key1 = A,B,C), multiple " "output files will be created (A.txt, B.txt, C.txt)." "".format(DEF_CFG_FILE, MAIN_SEC, TPL_VALS_SEC, TPL_EQS_SEC, FILLED_TPL_FNAME, FILLED_TPL_FNAME), default=DEF_CFG_FILE, type=read_cfg) parser.add_argument( "-f", "--filled_tpl_name", help="File name for new file to be created by filling the template " "file. It can also be specified in the configuration file. " "If specified in both places, the command line option will " "take precedence.", default=None) args = None try: args = parser.parse_args(argv) if not os.path.isfile(args.config[TPL_FNAME]): if args.config[TPL_FNAME] == DEF_TPL: error_message = "Check input for the configuration key '{}'; " \ "could not find the default template file: {}" else: error_message = "Could not find the template file specified with " \ "the configuration key '{}': {}" raise IOError( error_message.format(TPL_FNAME, args.config[TPL_FNAME])) if args.filled_tpl_name is not None: args.config[FILLED_TPL_FNAME] = args.filled_tpl_name if args.config[FILLED_TPL_FNAME] is None: raise InvalidDataError( "Missing required key '{}', which can be specified in the " "required either in the command line for configuration file." "".format(FILLED_TPL_FNAME)) except (KeyError, InvalidDataError, IOError, SystemExit) as e: if hasattr(e, 'code') and e.code == 0: return args, GOOD_RET warning(e) parser.print_help() return args, INPUT_ERROR return args, GOOD_RET
def process_file(data_file, out_dir, len_buffer, delimiter, min_max_dict, header=False, make_hist=False): try: dim_vectors, header_row, hist_data = np_float_array_from_file(data_file, delimiter=delimiter, header=header, gather_hist=make_hist) except InvalidDataError as e: raise InvalidDataError("{}\n" "Run program with '-h' to see options, such as specifying header row (-n) " "and/or delimiter (-d)".format(e)) if header: to_print = [[''] + header_row] else: to_print = [] max_vector = dim_vectors.max(axis=0) min_vector = dim_vectors.min(axis=0) avg_vector = dim_vectors.mean(axis=0) med_vector = np.percentile(dim_vectors, 50, axis=0) # noinspection PyTypeChecker to_print += [['Min values:'] + min_vector.tolist(), ['Max values:'] + max_vector.tolist(), ['Avg values:'] + avg_vector.tolist(), ['Std dev:'] + dim_vectors.std(axis=0, ddof=1).tolist(), ['5% percentile:'] + np.percentile(dim_vectors, 4.55, axis=0).tolist(), ['32% percentile:'] + np.percentile(dim_vectors, 31.73, axis=0).tolist(), ['50% percentile:'] + med_vector.tolist(), ['68% percentile:'] + np.percentile(dim_vectors, 68.27, axis=0).tolist(), ['95% percentile:'] + np.percentile(dim_vectors, 95.45, axis=0).tolist(), ] if len_buffer is not None: to_print.append(['Max plus {} buffer:'.format(len_buffer)] + (max_vector + len_buffer).tolist()) if min_max_dict is not None: nan_list = [np.nan] * len(header_row) avg_ini_diff = ['Avg % Diff:'] + nan_list med_ini_diff = ['Med % Diff:'] + nan_list med_is_min = ['Median is Min:'] + nan_list med_is_max = ['Median is Max:'] + nan_list for col_num, header in enumerate(to_print[0]): if header in min_max_dict[0]: ini_val = min_max_dict[0][header] low_val = min_max_dict[1][header] upp_val = min_max_dict[2][header] avg_val = avg_vector[col_num - 1] med_val = med_vector[col_num - 1] min_val = min_vector[col_num - 1] max_val = max_vector[col_num - 1] min_tol = max(TOL * max(abs(min_val), abs(low_val)), TOL) med_tol = max(TOL * abs(med_val), TOL) max_tol = max(TOL * max(abs(max_val), abs(upp_val)), TOL) if (low_val - min_val) > min_tol: warning("Minimum value found for header '{}' ({}) is less than lower bound ({})" "".format(header, min_val, low_val)) if (max_val - upp_val) > max_tol: warning("Maximum value found for header '{}' ({}) is greater than upper bound ({})" "".format(header, max_val, upp_val)) avg_ini_diff[col_num] = (avg_val - ini_val) / ini_val * 100 med_ini_diff[col_num] = (med_val - ini_val) / ini_val * 100 if abs(med_val - low_val) > med_tol: med_is_min[col_num] = 0 else: med_is_min[col_num] = 1 if abs(med_val - upp_val) > med_tol: med_is_max[col_num] = 0 else: med_is_max[col_num] = 1 # else: # for min_max_list in [avg_ini_diff, med_ini_diff, med_is_min, med_is_max]: # min_max_list.append(np.nan) for min_max_list in [avg_ini_diff, med_ini_diff, med_is_min, med_is_max]: to_print.append(min_max_list) # Printing to standard out: do not print quotes around strings because using csv writer # print("Number of dimensions ({}) based on first line of file: {}".format(len(dim_vectors[0]), data_file)) if len(dim_vectors[0]) < 12: for index, row in enumerate(to_print): # formatting for header if index == 0 and header: print("{:>20s} {}".format(row[0], ' '.join(['{:>16s}'.format(x.strip()) for x in row[1:]]))) # formatting for vals else: print("{:>20s} {}".format(row[0], ' '.join(['{:16.6f}'.format(x) for x in row[1:]]))) f_name = create_out_fname(data_file, prefix='stats_', ext='.csv', base_dir=out_dir) list_to_csv(to_print, f_name) # list_to_file(to_print, f_name, delimiter=',') if make_hist: create_hists(data_file, header_row, hist_data, out_dir)