예제 #1
0
파일: pdb_edit.py 프로젝트: abb58/md_utils
def process_pdb(cfg, atom_num_dict, mol_num_dict, element_dict):
    pdb_loc = cfg[PDB_FILE]
    pdb_data = {HEAD_CONTENT: [], ATOMS_CONTENT: [], TAIL_CONTENT: []}
    # to allow warning to be printed once and only once
    missing_types = []
    qmmm_elem_id_dict = {}
    ca_res_atom_id_dict = {}
    cb_res_atom_id_dict = {}
    atoms_for_vmd = []

    with open(pdb_loc) as f:
        wat_count = 0
        atom_count = 0
        mol_count = 1

        current_mol = None
        last_mol_num = None
        atoms_content = []

        for line in f:
            line = line.strip()
            line_len = len(line)
            if line_len == 0:
                continue
            line_head = line[:cfg[PDB_LINE_TYPE_LAST_CHAR]]
            # head_content to contain Everything before 'Atoms' section
            # also capture the number of atoms
            if line_head == 'REMARK' or line_head == 'CRYST1':
                pdb_data[HEAD_CONTENT].append(line)

            # atoms_content to contain everything but the xyz
            elif line_head == 'ATOM  ':

                # My template PDB has ***** after atom_count 99999. Thus, I'm renumbering. Otherwise, this this:
                # atom_num = line[cfg[PDB_LINE_TYPE_LAST_CHAR]:cfg[PDB_ATOM_NUM_LAST_CHAR]]
                # For renumbering, making sure prints in the correct format, including num of characters:
                atom_count += 1

                # For reordering atoms
                if atom_count in atom_num_dict:
                    atom_id = atom_num_dict[atom_count]
                else:
                    atom_id = atom_count

                if atom_id > 99999:
                    atom_num = format(atom_id, 'x')
                    if len(atom_num) > 5:
                        warning("Hex representation of {} is {}, which is greater than 5 characters. This"
                                "will affect the PDB output formatting.".format(atom_id, atom_num))
                else:
                    atom_num = '{:5d}'.format(atom_id)

                atom_type = line[cfg[PDB_ATOM_NUM_LAST_CHAR]:cfg[PDB_ATOM_TYPE_LAST_CHAR]]
                res_type = line[cfg[PDB_ATOM_TYPE_LAST_CHAR]:cfg[PDB_RES_TYPE_LAST_CHAR]]
                mol_num = int(line[cfg[PDB_RES_TYPE_LAST_CHAR]:cfg[PDB_MOL_NUM_LAST_CHAR]])
                pdb_x = float(line[cfg[PDB_MOL_NUM_LAST_CHAR]:cfg[PDB_X_LAST_CHAR]])
                pdb_y = float(line[cfg[PDB_X_LAST_CHAR]:cfg[PDB_Y_LAST_CHAR]])
                pdb_z = float(line[cfg[PDB_Y_LAST_CHAR]:cfg[PDB_Z_LAST_CHAR]])
                occ_t = line[cfg[PDB_Z_LAST_CHAR]:cfg[PDB_LAST_T_CHAR]]
                element = line[cfg[PDB_LAST_T_CHAR]:cfg[PDB_LAST_ELEM_CHAR]]
                last_cols = line[cfg[PDB_LAST_ELEM_CHAR]:]

                # For user-specified changing of molecule number
                if mol_num in mol_num_dict:
                    mol_num = mol_num_dict[mol_num]

                # If doing water molecule checking...
                if cfg[FIRST_WAT_ID] <= atom_count <= cfg[LAST_WAT_ID]:
                    if (wat_count % 3) == 0:
                        current_mol = mol_num
                        if atom_type != '  OH2 ':
                                warning('Expected an OH2 atom to be the first atom of a water molecule. '
                                        'Check line: {}'.format(line))
                        # last_cols = '  0.00  0.00      S2   O'
                    else:
                        if current_mol != mol_num:
                            warning('Water not in order on line:', line)
                        if (wat_count % 3) == 1:
                            if atom_type != '  H1  ':
                                warning('Expected an H1 atom to be the second atom of a water molecule. '
                                        'Check line: {}'.format(line))
                        else:
                            if atom_type != '  H2  ':
                                warning('Expected an H2 atom to be the second atom of a water molecule. '
                                        'Check line: {}'.format(line))
                    wat_count += 1

                if mol_num in cfg[RESID_QMMM] and atom_type not in SKIP_ATOM_TYPES:
                    if atom_type == C_ALPHA:
                        ca_res_atom_id_dict[mol_num] = atom_id
                    else:
                        if atom_type == C_BETA:
                            cb_res_atom_id_dict[mol_num] = atom_id
                        if atom_type in element_dict:
                            element = element_dict[atom_type]
                        else:
                            raise InvalidDataError("Did not find atom type '{}' in the element dictionary. Please "
                                                   "provide a new atom type, element dictionary (using keyword {} "
                                                   "in the configuration file) that includes all atom types in the "
                                                   "residues identified with the '{}' key."
                                                   "".format(atom_type, ELEMENT_DICT_FILE, RESID_QMMM))
                        if element in qmmm_elem_id_dict:
                            qmmm_elem_id_dict[element].append(atom_id)
                        else:
                            qmmm_elem_id_dict[element] = [atom_id]
                        atoms_for_vmd.append(atom_id - 1)

                if cfg[ADD_ELEMENTS] and atom_count <= cfg[LAST_ADD_ELEM]:
                    if atom_type in element_dict:
                        element = element_dict[atom_type]
                    else:
                        if atom_type not in missing_types:
                            warning("Please add atom type '{}' to dictionary of elements. Will not write/overwrite "
                                    "element type in the pdb output.".format(atom_type))
                            missing_types.append(atom_type)

                # For numbering molecules from 1 to end
                if cfg[RENUM_MOL]:
                    if last_mol_num is None:
                        last_mol_num = mol_num

                    if mol_num != last_mol_num:
                        last_mol_num = mol_num
                        mol_count += 1
                        if mol_count == 10000:
                            warning("Molecule numbers greater than 9999 will be printed in hex")

                    # Due to PDB format constraints, need to print in hex starting at 9999 molecules.
                    if mol_count > 9999:
                        mol_num = format(mol_count, 'x')
                        if len(mol_num) > 4:
                            warning("Hex representation of {} is {}, which is greater than 4 characters. This"
                                    "will affect the PDB output formatting.".format(atom_id, atom_num))
                    else:
                        mol_num = '{:4d}'.format(mol_count)

                line_struct = [line_head, atom_num, atom_type, res_type, mol_num, pdb_x, pdb_y, pdb_z,
                               occ_t, element, last_cols]
                atoms_content.append(line_struct)

            # tail_content to contain everything after the 'Atoms' section
            else:
                pdb_data[TAIL_CONTENT].append(line)

    # Only sort if there is renumbering
    if len(atom_num_dict) > 0:
        pdb_data[ATOMS_CONTENT] = sorted(atoms_content, key=lambda entry: entry[1])
    else:
        pdb_data[ATOMS_CONTENT] = atoms_content

    if cfg[PDB_NEW_FILE] is None:
        f_name = create_out_fname(cfg[PDB_FILE], suffix="_new", base_dir=cfg[OUT_BASE_DIR])
    else:
        f_name = create_out_fname(cfg[PDB_NEW_FILE], base_dir=cfg[OUT_BASE_DIR])
    print_pdb(pdb_data[HEAD_CONTENT], pdb_data[ATOMS_CONTENT], pdb_data[TAIL_CONTENT],
              f_name, cfg[PDB_FORMAT])

    if len(cfg[RESID_QMMM]) > 0:
        f_name = create_out_fname('amino_id.dat', base_dir=cfg[OUT_BASE_DIR])
        print_mode = "w"
        for elem in qmmm_elem_id_dict:
            print_qm_kind(qmmm_elem_id_dict[elem], elem, f_name, mode=print_mode)
            print_mode = 'a'
        print_qm_links(ca_res_atom_id_dict, cb_res_atom_id_dict, f_name, mode=print_mode)
        f_name = create_out_fname('vmd_protein_atoms.dat', base_dir=cfg[OUT_BASE_DIR])
        list_to_csv([atoms_for_vmd], f_name, delimiter=' ')
예제 #2
0
def process_pdb(cfg, atom_num_dict, mol_num_dict, element_dict):
    pdb_loc = cfg[PDB_FILE]
    pdb_data = {HEAD_CONTENT: [], ATOMS_CONTENT: [], TAIL_CONTENT: []}
    # to allow warning to be printed once and only once
    missing_types = []
    qmmm_elem_id_dict = {}
    ca_res_atom_id_dict = {}
    cb_res_atom_id_dict = {}
    atoms_for_vmd = []

    with open(pdb_loc) as f:
        wat_count = 0
        atom_count = 0
        mol_count = 1

        current_mol = None
        last_mol_num = None
        atoms_content = []

        for line in f:
            line = line.strip()
            line_len = len(line)
            if line_len == 0:
                continue
            line_head = line[:cfg[PDB_LINE_TYPE_LAST_CHAR]]
            # head_content to contain Everything before 'Atoms' section
            # also capture the number of atoms
            if line_head == 'REMARK' or line_head == 'CRYST1':
                pdb_data[HEAD_CONTENT].append(line)

            # atoms_content to contain everything but the xyz
            elif line_head == 'ATOM  ':

                # My template PDB has ***** after atom_count 99999. Thus, I'm renumbering. Otherwise, this this:
                # atom_num = line[cfg[PDB_LINE_TYPE_LAST_CHAR]:cfg[PDB_ATOM_NUM_LAST_CHAR]]
                # For renumbering, making sure prints in the correct format, including num of characters:
                atom_count += 1

                # For reordering atoms
                if atom_count in atom_num_dict:
                    atom_id = atom_num_dict[atom_count]
                else:
                    atom_id = atom_count

                if atom_id > 99999:
                    atom_num = format(atom_id, 'x')
                    if len(atom_num) > 5:
                        warning("Hex representation of {} is {}, which is greater than 5 characters. This"
                                "will affect the PDB output formatting.".format(atom_id, atom_num))
                else:
                    atom_num = '{:5d}'.format(atom_id)

                atom_type = line[cfg[PDB_ATOM_NUM_LAST_CHAR]:cfg[PDB_ATOM_TYPE_LAST_CHAR]]
                res_type = line[cfg[PDB_ATOM_TYPE_LAST_CHAR]:cfg[PDB_RES_TYPE_LAST_CHAR]]
                mol_num = int(line[cfg[PDB_RES_TYPE_LAST_CHAR]:cfg[PDB_MOL_NUM_LAST_CHAR]])
                pdb_x = float(line[cfg[PDB_MOL_NUM_LAST_CHAR]:cfg[PDB_X_LAST_CHAR]])
                pdb_y = float(line[cfg[PDB_X_LAST_CHAR]:cfg[PDB_Y_LAST_CHAR]])
                pdb_z = float(line[cfg[PDB_Y_LAST_CHAR]:cfg[PDB_Z_LAST_CHAR]])
                occ_t = line[cfg[PDB_Z_LAST_CHAR]:cfg[PDB_LAST_T_CHAR]]
                element = line[cfg[PDB_LAST_T_CHAR]:cfg[PDB_LAST_ELEM_CHAR]]
                last_cols = line[cfg[PDB_LAST_ELEM_CHAR]:]

                # For user-specified changing of molecule number
                if mol_num in mol_num_dict:
                    mol_num = mol_num_dict[mol_num]

                # If doing water molecule checking...
                if cfg[FIRST_WAT_ID] <= atom_count <= cfg[LAST_WAT_ID]:
                    if (wat_count % 3) == 0:
                        current_mol = mol_num
                        if atom_type != '  OH2 ':
                                warning('Expected an OH2 atom to be the first atom of a water molecule. '
                                        'Check line: {}'.format(line))
                        # last_cols = '  0.00  0.00      S2   O'
                    else:
                        if current_mol != mol_num:
                            warning('Water not in order on line:', line)
                        if (wat_count % 3) == 1:
                            if atom_type != '  H1  ':
                                warning('Expected an H1 atom to be the second atom of a water molecule. '
                                        'Check line: {}'.format(line))
                        else:
                            if atom_type != '  H2  ':
                                warning('Expected an H2 atom to be the second atom of a water molecule. '
                                        'Check line: {}'.format(line))
                    wat_count += 1

                if mol_num in cfg[RESID_QMMM] and atom_type not in SKIP_ATOM_TYPES:
                    if atom_type == C_ALPHA:
                        ca_res_atom_id_dict[mol_num] = atom_id
                    else:
                        if atom_type == C_BETA:
                            cb_res_atom_id_dict[mol_num] = atom_id
                        if atom_type in element_dict:
                            element = element_dict[atom_type]
                        else:
                            raise InvalidDataError("Did not find atom type '{}' in the element dictionary. Please "
                                                   "provide a new atom type, element dictionary (using keyword {} "
                                                   "in the configuration file) that includes all atom types in the "
                                                   "residues identified with the '{}' key."
                                                   "".format(atom_type, ELEMENT_DICT_FILE, RESID_QMMM))
                        if element in qmmm_elem_id_dict:
                            qmmm_elem_id_dict[element].append(atom_id)
                        else:
                            qmmm_elem_id_dict[element] = [atom_id]
                        atoms_for_vmd.append(atom_id - 1)

                if cfg[ADD_ELEMENTS] and atom_count <= cfg[LAST_ADD_ELEM]:
                    if atom_type in element_dict:
                        element = element_dict[atom_type]
                    else:
                        if atom_type not in missing_types:
                            warning("Please add atom type '{}' to dictionary of elements. Will not write/overwrite "
                                    "element type in the pdb output.".format(atom_type))
                            missing_types.append(atom_type)

                # For numbering molecules from 1 to end
                if cfg[RENUM_MOL]:
                    if last_mol_num is None:
                        last_mol_num = mol_num

                    if mol_num != last_mol_num:
                        last_mol_num = mol_num
                        mol_count += 1
                        if mol_count == 10000:
                            warning("Molecule numbers greater than 9999 will be printed in hex")

                    # Due to PDB format constraints, need to print in hex starting at 9999 molecules.
                    if mol_count > 9999:
                        mol_num = format(mol_count, 'x')
                        if len(mol_num) > 4:
                            warning("Hex representation of {} is {}, which is greater than 4 characters. This"
                                    "will affect the PDB output formatting.".format(atom_id, atom_num))
                    else:
                        mol_num = '{:4d}'.format(mol_count)

                line_struct = [line_head, atom_num, atom_type, res_type, mol_num, pdb_x, pdb_y, pdb_z,
                               occ_t, element, last_cols]
                atoms_content.append(line_struct)

            # tail_content to contain everything after the 'Atoms' section
            else:
                pdb_data[TAIL_CONTENT].append(line)

    # Only sort if there is renumbering
    if len(atom_num_dict) > 0:
        pdb_data[ATOMS_CONTENT] = sorted(atoms_content, key=lambda entry: entry[1])
    else:
        pdb_data[ATOMS_CONTENT] = atoms_content

    if cfg[PDB_NEW_FILE] is None:
        f_name = create_out_fname(cfg[PDB_FILE], suffix="_new", base_dir=cfg[OUT_BASE_DIR])
    else:
        f_name = create_out_fname(cfg[PDB_NEW_FILE], base_dir=cfg[OUT_BASE_DIR])
    print_pdb(pdb_data[HEAD_CONTENT], pdb_data[ATOMS_CONTENT], pdb_data[TAIL_CONTENT],
              f_name, cfg[PDB_FORMAT])

    if len(cfg[RESID_QMMM]) > 0:
        f_name = create_out_fname('amino_id.dat', base_dir=cfg[OUT_BASE_DIR])
        print_mode = "w"
        for elem in qmmm_elem_id_dict:
            print_qm_kind(qmmm_elem_id_dict[elem], elem, f_name, mode=print_mode)
            print_mode = 'a'
        print_qm_links(ca_res_atom_id_dict, cb_res_atom_id_dict, f_name, mode=print_mode)
        f_name = create_out_fname('vmd_protein_atoms.dat', base_dir=cfg[OUT_BASE_DIR])
        list_to_csv([atoms_for_vmd], f_name, delimiter=' ')
예제 #3
0
def get_evb_atoms(cfg, chk_file):

    with open(chk_file) as d:
        chk_data = {HEAD_CONTENT: [], ATOMS_CONTENT: [], TAIL_CONTENT: []}

        section = SEC_HEAD
        o_ids = []
        h_ids = []

        for line in d:
            line = line.strip()
            # head_content to contain Everything before 'Atoms' section
            # also capture the number of atoms
            if section == SEC_HEAD:
                chk_data[HEAD_CONTENT].append(line)

                atoms_match = NUM_ATOMS_PAT.match(line)
                if atoms_match:
                    # regex is 1-based
                    # print(atoms_match.group(1))
                    chk_data[NUM_ATOMS] = int(atoms_match.group(1))
                    section = SEC_ATOMS

            elif section == SEC_ATOMS:
                if len(line) == 0:
                    continue
                split_line = line.split()
                index = int(split_line[0])
                atom_num = int(split_line[1])
                x, y, z = map(float, split_line[2:5])
                atom_type = split_line[5]
                atom_struct = [index, atom_num, x, y, z, atom_type]
                chk_data[ATOMS_CONTENT].append(atom_struct)
                if atom_num > cfg[LAST_EXCLUDE_ID]:
                    if atom_type == 'O':
                        o_ids.append(atom_num)
                    elif atom_type == 'H':
                        h_ids.append(atom_num)
                    else:
                        raise InvalidDataError("Expected atom types are 'O' and 'H' (looking for water "
                                               "molecules only). Found type '{}' for line:\n {}\n"
                                               "Use the '{}' keyword to specify the last atom to exclude (i.e. "
                                               "the last protein atom)."
                                               "".format(atom_type, line, LAST_EXCLUDE_ID))

                if len(chk_data[ATOMS_CONTENT]) == chk_data[NUM_ATOMS]:
                    section = SEC_TAIL
            # tail_content to contain everything after the 'Atoms' section
            elif section == SEC_TAIL:
                break

    # Data validation: checking total charge
    num_o = len(o_ids)
    num_h = len(h_ids)
    total_charge = num_h - 2 * num_o
    if cfg[EXPECTED_CHARGE] is None:
        print("Found {} oxygen atoms and {} hydrogen atoms for a total charge of {}."
              "".format(num_o, num_h, add_sign(total_charge)))
    else:
        if total_charge != cfg[EXPECTED_CHARGE]:
            raise InvalidDataError("Expected a total charge of {} but found {} for file: {}"
                                   "".format(add_sign(cfg[EXPECTED_CHARGE]), add_sign(total_charge), chk_file))

    # printing!
    f_name = create_out_fname(chk_file, prefix='water_', ext='.dat', base_dir=cfg[OUT_BASE_DIR],
                              remove_prefix='CHK_')
    print_qm_kind(h_ids, 'H', f_name)
    print_qm_kind(o_ids, 'O', f_name, mode='a')
    f_name = create_out_fname(chk_file, prefix='vmd_water_', ext='.dat', base_dir=cfg[OUT_BASE_DIR],
                              remove_prefix='CHK_')
    print_vmd_list(o_ids+h_ids, f_name)
예제 #4
0
파일: psf_edit.py 프로젝트: abb58/md_utils
def process_psf(cfg, atom_num_dict, mol_num_dict, element_dict, radii_dict):

    with open(cfg[PSF_FILE]) as f:
        psf_data = {HEAD_CONTENT: [], ATOMS_CONTENT: [], TAIL_CONTENT: []}
        num_atoms_pat = re.compile(r"(\d+).*NATOM$")

        num_atoms = 1
        section = SEC_HEAD

        # for printing qmmm info
        qmmm_elem_id_dict = {}
        ca_res_atom_id_dict = {}
        cb_res_atom_id_dict = {}
        atoms_for_vmd = []
        types_for_mm_kind = set()
        qmmm_charge = 0

        # for RENUM_MOL
        last_resid = None
        cur_mol_num = 0

        for line in f.readlines():
            s_line = line.strip()
            # head_content to contain Everything before 'Atoms' section
            # also capture the number of atoms
            if section == SEC_HEAD:
                psf_data[HEAD_CONTENT].append(line.rstrip())

                atoms_match = num_atoms_pat.match(s_line)
                if atoms_match:
                    # regex is 1-based
                    num_atoms = int(atoms_match.group(1))
                    section = SEC_ATOMS

            elif section == SEC_ATOMS:
                if len(s_line) == 0:
                    continue
                split_line = s_line.split()
                atom_num = int(split_line[0])
                segid = split_line[1]
                resid = int(split_line[2])
                resname = split_line[3]
                atom_type = split_line[4]
                charmm_type = split_line[5]
                charge = float(split_line[6])
                atom_wt = float(split_line[7])
                zero = split_line[8]

                # For reordering atoms
                if atom_num in atom_num_dict:
                    atom_num = atom_num_dict[atom_num]

                # For user-specified changing of molecule number
                if resid in mol_num_dict:
                    resid = mol_num_dict[resid]

                if cfg[RENUM_MOL]:
                    if resid != last_resid:
                        last_resid = resid
                        cur_mol_num += 1
                    resid = cur_mol_num

                atom_struct = [
                    atom_num, segid, resid, resname, atom_type, charmm_type,
                    charge, atom_wt, zero
                ]
                psf_data[ATOMS_CONTENT].append(atom_struct)

                if resid in cfg[RESID_QM] or resid in cfg[
                        RESID_QMMM] and atom_type not in cfg[SKIP_ATOM_TYPES]:
                    if resid in cfg[RESID_QMMM]:
                        if atom_type == C_ALPHA:
                            ca_res_atom_id_dict[resid] = atom_num

                    if resid in cfg[RESID_QMMM] and atom_type == C_ALPHA:
                        ca_res_atom_id_dict[resid] = atom_num
                    else:
                        if resid in cfg[RESID_QMMM] and atom_type == C_BETA:
                            cb_res_atom_id_dict[resid] = atom_num
                        if atom_type in element_dict:
                            element = element_dict[atom_type]
                        else:
                            raise InvalidDataError(
                                "Did not find atom type '{}' in the element dictionary. Please "
                                "provide a new atom type, element dictionary (using keyword {} "
                                "in the configuration file) that includes all atom types in the "
                                "residues identified with the '{}' key."
                                "".format(atom_type, ELEMENT_DICT_FILE,
                                          RESID_QMMM))
                        if element in qmmm_elem_id_dict:
                            qmmm_elem_id_dict[element].append(atom_num)
                        else:
                            qmmm_elem_id_dict[element] = [atom_num]
                        qmmm_charge += charge
                        atoms_for_vmd.append(atom_num - 1)

                if cfg[PRINT_FOR_CP2K]:
                    types_for_mm_kind.add(atom_type)

                if len(psf_data[ATOMS_CONTENT]) == num_atoms:
                    section = SEC_TAIL
            # tail_content to contain everything after the 'Atoms' section
            elif section == SEC_TAIL:
                psf_data[TAIL_CONTENT].append(line.rstrip())

    if len(atom_num_dict) > 0:
        warning(
            "This program does not yet edit any sections other than the atoms section."
            "If you are renumbering atoms, the bonds, angles, dihedrals, impropers, and"
            "cross-terms sections will not match.")
        psf_data[ATOMS_CONTENT] = sorted(psf_data[ATOMS_CONTENT],
                                         key=lambda entry: entry[0])

    if cfg[RENUM_MOL] or len(atom_num_dict) + len(mol_num_dict) > 0:
        if cfg[PSF_NEW_FILE] is None:
            f_name = create_out_fname(cfg[PSF_FILE],
                                      suffix="_new",
                                      base_dir=cfg[OUT_BASE_DIR])
        else:
            f_name = cfg[PSF_NEW_FILE]
        list_to_file(psf_data[HEAD_CONTENT] + psf_data[ATOMS_CONTENT] +
                     psf_data[TAIL_CONTENT],
                     f_name,
                     list_format=cfg[PSF_FORMAT])

    if cfg[PRINT_FOR_CP2K]:
        print("Total charge from QM atoms: {:.2f}".format(qmmm_charge))
        # create CP2K input listing amino atom ids
        f_name = create_out_fname('amino_id.dat', base_dir=cfg[OUT_BASE_DIR])
        print_mode = "w"
        for elem in qmmm_elem_id_dict:
            print_qm_kind(qmmm_elem_id_dict[elem],
                          elem,
                          f_name,
                          mode=print_mode)
            print_mode = 'a'
        print_qm_links(ca_res_atom_id_dict,
                       cb_res_atom_id_dict,
                       f_name,
                       mode=print_mode)
        # create CP2K input listing MM atom type radii
        f_name = create_out_fname('mm_kinds.dat', base_dir=cfg[OUT_BASE_DIR])
        print_mode = "w"

        for atom_type in types_for_mm_kind:
            try:
                print_mm_kind(atom_type,
                              radii_dict[atom_type],
                              f_name,
                              mode=print_mode)
                print_mode = 'a'
            except KeyError:
                warning(
                    "Did not find atom type '{}' in the atom_type to radius dictionary: {}\n"
                    "    '{}' printed without this type; user may manually add its radius specification.\n"
                    "    To print this file with all MM types, use the keyword '{}' in the configuration file \n"
                    "    to identify a file with atom_type,radius (one per line, comma-separated) with all "
                    "MM types in the psf".format(atom_type,
                                                 cfg[RADII_DICT_FILE],
                                                 'mm_kinds.dat',
                                                 RADII_DICT_FILE))

        # create VMD input listing amino atom indexes (base-zero counting)
        f_name = create_out_fname('vmd_protein_atoms.dat',
                                  base_dir=cfg[OUT_BASE_DIR])
        list_to_csv([atoms_for_vmd], f_name, delimiter=' ')
예제 #5
0
def process_file(cfg):

    chk_list_loc = cfg[CHK_FILE_LIST]
    num_atoms_pat = re.compile(r"^ATOMS (\d+).*")
    last_exclude_id = cfg[LAST_EXCLUDE_ID]

    with open(chk_list_loc) as f:
        for chk_file in f:
            chk_file = chk_file.strip()
            with open(chk_file) as d:
                chk_data = {HEAD_CONTENT: [], ATOMS_CONTENT: [], TAIL_CONTENT: []}

                section = SEC_HEAD
                o_ids = []
                h_ids = []

                for line in d:
                    line = line.strip()
                    # head_content to contain Everything before 'Atoms' section
                    # also capture the number of atoms
                    if section == SEC_HEAD:
                        chk_data[HEAD_CONTENT].append(line)

                        atoms_match = num_atoms_pat.match(line)
                        if atoms_match:
                            # regex is 1-based
                            # print(atoms_match.group(1))
                            chk_data[NUM_ATOMS] = int(atoms_match.group(1))
                            section = SEC_ATOMS

                    elif section == SEC_ATOMS:
                        if len(line) == 0:
                            continue
                        split_line = line.split()
                        index = int(split_line[0])
                        atom_num = int(split_line[1])
                        x, y, z = map(float, split_line[2:5])
                        atom_type = split_line[5]
                        atom_struct = [index, atom_num, x, y, z, atom_type]
                        chk_data[ATOMS_CONTENT].append(atom_struct)
                        if atom_num > last_exclude_id:
                            if atom_type == 'O':
                                o_ids.append(atom_num)
                            elif atom_type == 'H':
                                h_ids.append(atom_num)
                            else:
                                raise InvalidDataError("Expected atom types are 'O' and 'H' (looking for water "
                                                       "molecules only). Found type '{}' for line:\n {}\n"
                                                       "Use the '{}' keyword to specify the last atom to exclude (i.e. "
                                                       "the last protein atom)."
                                                       "".format(atom_type, line, LAST_EXCLUDE_ID))

                        if len(chk_data[ATOMS_CONTENT]) == chk_data[NUM_ATOMS]:
                            section = SEC_TAIL
                    # tail_content to contain everything after the 'Atoms' section
                    elif section == SEC_TAIL:
                        break

            # Data validation: checking total charge
            num_o = len(o_ids)
            num_h = len(h_ids)
            total_charge = num_h - 2 * num_o
            if cfg[EXPECTED_CHARGE] is None:
                print("Found {} oxygen atoms and {} hydrogen atoms for a total charge of {}."
                      "".format(num_o, num_h, add_sign(total_charge)))
            else:
                if total_charge != cfg[EXPECTED_CHARGE]:
                    raise InvalidDataError("Expected a total charge of {} but found {} for file: {}"
                                           "".format(add_sign(cfg[EXPECTED_CHARGE]), add_sign(total_charge), chk_file))

            # printing!
            f_name = create_out_fname(chk_file, prefix='water_', ext='.dat', base_dir=cfg[OUT_BASE_DIR],
                                      remove_prefix='CHK_')
            print_qm_kind(h_ids, 'H', f_name)
            print_qm_kind(o_ids, 'O', f_name, mode='a')
            f_name = create_out_fname(chk_file, prefix='vmd_water_', ext='.dat', base_dir=cfg[OUT_BASE_DIR],
                                      remove_prefix='CHK_')
            print_vmd_list(o_ids+h_ids, f_name)
예제 #6
0
def process_psf(cfg, atom_num_dict, mol_num_dict, element_dict, radii_dict):

    with open(cfg[PSF_FILE]) as f:
        psf_data = {HEAD_CONTENT: [], ATOMS_CONTENT: [], TAIL_CONTENT: []}
        num_atoms_pat = re.compile(r"(\d+).*NATOM$")

        num_atoms = 1
        section = SEC_HEAD

        # for printing qmmm info
        qmmm_elem_id_dict = {}
        ca_res_atom_id_dict = {}
        cb_res_atom_id_dict = {}
        atoms_for_vmd = []
        types_for_mm_kind = set()
        qmmm_charge = 0

        # for RENUM_MOL
        last_resid = None
        cur_mol_num = 0

        for line in f.readlines():
            s_line = line.strip()
            # head_content to contain Everything before 'Atoms' section
            # also capture the number of atoms
            if section == SEC_HEAD:
                psf_data[HEAD_CONTENT].append(line.rstrip())

                atoms_match = num_atoms_pat.match(s_line)
                if atoms_match:
                    # regex is 1-based
                    num_atoms = int(atoms_match.group(1))
                    section = SEC_ATOMS

            elif section == SEC_ATOMS:
                if len(s_line) == 0:
                    continue
                split_line = s_line.split()
                atom_num = int(split_line[0])
                segid = split_line[1]
                resid = int(split_line[2])
                resname = split_line[3]
                atom_type = split_line[4]
                charmm_type = split_line[5]
                charge = float(split_line[6])
                atom_wt = float(split_line[7])
                zero = split_line[8]

                # For reordering atoms
                if atom_num in atom_num_dict:
                    atom_num = atom_num_dict[atom_num]

                # For user-specified changing of molecule number
                if resid in mol_num_dict:
                    resid = mol_num_dict[resid]

                if cfg[RENUM_MOL]:
                    if resid != last_resid:
                        last_resid = resid
                        cur_mol_num += 1
                    resid = cur_mol_num

                atom_struct = [atom_num, segid, resid, resname, atom_type, charmm_type, charge, atom_wt, zero]
                psf_data[ATOMS_CONTENT].append(atom_struct)

                if resid in cfg[RESID_QM] or resid in cfg[RESID_QMMM] and atom_type not in cfg[SKIP_ATOM_TYPES]:
                    if resid in cfg[RESID_QMMM]:
                        if atom_type == C_ALPHA:
                            ca_res_atom_id_dict[resid] = atom_num

                    if resid in cfg[RESID_QMMM] and atom_type == C_ALPHA:
                        ca_res_atom_id_dict[resid] = atom_num
                    else:
                        if resid in cfg[RESID_QMMM] and atom_type == C_BETA:
                            cb_res_atom_id_dict[resid] = atom_num
                        if atom_type in element_dict:
                            element = element_dict[atom_type]
                        else:
                            raise InvalidDataError(
                                "Did not find atom type '{}' in the element dictionary. Please "
                                "provide a new atom type, element dictionary (using keyword {} "
                                "in the configuration file) that includes all atom types in the "
                                "residues identified with the '{}' key."
                                "".format(atom_type, ELEMENT_DICT_FILE, RESID_QMMM)
                            )
                        if element in qmmm_elem_id_dict:
                            qmmm_elem_id_dict[element].append(atom_num)
                        else:
                            qmmm_elem_id_dict[element] = [atom_num]
                        qmmm_charge += charge
                        atoms_for_vmd.append(atom_num - 1)

                if cfg[PRINT_FOR_CP2K]:
                    types_for_mm_kind.add(atom_type)

                if len(psf_data[ATOMS_CONTENT]) == num_atoms:
                    section = SEC_TAIL
            # tail_content to contain everything after the 'Atoms' section
            elif section == SEC_TAIL:
                psf_data[TAIL_CONTENT].append(line.rstrip())

    if len(atom_num_dict) > 0:
        warning(
            "This program does not yet edit any sections other than the atoms section."
            "If you are renumbering atoms, the bonds, angles, dihedrals, impropers, and"
            "cross-terms sections will not match."
        )
        psf_data[ATOMS_CONTENT] = sorted(psf_data[ATOMS_CONTENT], key=lambda entry: entry[0])

    if cfg[RENUM_MOL] or len(atom_num_dict) + len(mol_num_dict) > 0:
        if cfg[PSF_NEW_FILE] is None:
            f_name = create_out_fname(cfg[PSF_FILE], suffix="_new", base_dir=cfg[OUT_BASE_DIR])
        else:
            f_name = cfg[PSF_NEW_FILE]
        list_to_file(
            psf_data[HEAD_CONTENT] + psf_data[ATOMS_CONTENT] + psf_data[TAIL_CONTENT],
            f_name,
            list_format=cfg[PSF_FORMAT],
        )

    if cfg[PRINT_FOR_CP2K]:
        print("Total charge from QM atoms: {:.2f}".format(qmmm_charge))
        # create CP2K input listing amino atom ids
        f_name = create_out_fname("amino_id.dat", base_dir=cfg[OUT_BASE_DIR])
        print_mode = "w"
        for elem in qmmm_elem_id_dict:
            print_qm_kind(qmmm_elem_id_dict[elem], elem, f_name, mode=print_mode)
            print_mode = "a"
        print_qm_links(ca_res_atom_id_dict, cb_res_atom_id_dict, f_name, mode=print_mode)
        # create CP2K input listing MM atom type radii
        f_name = create_out_fname("mm_kinds.dat", base_dir=cfg[OUT_BASE_DIR])
        print_mode = "w"

        for atom_type in types_for_mm_kind:
            try:
                print_mm_kind(atom_type, radii_dict[atom_type], f_name, mode=print_mode)
                print_mode = "a"
            except KeyError:
                warning(
                    "Did not find atom type '{}' in the atom_type to radius dictionary: {}\n"
                    "    '{}' printed without this type; user may manually add its radius specification.\n"
                    "    To print this file with all MM types, use the keyword '{}' in the configuration file \n"
                    "    to identify a file with atom_type,radius (one per line, comma-separated) with all "
                    "MM types in the psf".format(atom_type, cfg[RADII_DICT_FILE], "mm_kinds.dat", RADII_DICT_FILE)
                )

        # create VMD input listing amino atom indexes (base-zero counting)
        f_name = create_out_fname("vmd_protein_atoms.dat", base_dir=cfg[OUT_BASE_DIR])
        list_to_csv([atoms_for_vmd], f_name, delimiter=" ")