예제 #1
0
파일: cp2k2data.py 프로젝트: abb58/md_utils
def process_cp2k_file(cp2k_file, data_tpl_content, data_template_fname):
    new_atoms_section = None
    with open(cp2k_file) as f:
        data_tpl_content[HEAD_CONTENT][0] = "Created on {} by {} version {} from template file {} and " \
                                            "cp2k output file {}".format(datetime.now(), __name__, __version__,
                                                                         data_template_fname, cp2k_file
                                                                         )
        for line in f:
            line = line.strip()
            if ENERGY_PAT.match(line):
                qmmm_energy = line.split()[-1]
            if COORD_PAT.match(line):
                # Now advance to first line of coordinates
                for _ in range(3):
                    next(f)
                new_atoms_section = process_coords(f, data_tpl_content)

    # If we successfully returned the new_atoms_section, make new file
    if new_atoms_section is None:
        raise InvalidDataError(
            "Did not file atoms coordinates in file: {}".format(cp2k_file))
    print("{} energy: {}".format(cp2k_file, qmmm_energy))
    f_name = create_out_fname(cp2k_file, ext='.data')
    list_to_file(data_tpl_content[HEAD_CONTENT] + new_atoms_section +
                 data_tpl_content[TAIL_CONTENT],
                 f_name,
                 print_message=False)
예제 #2
0
def print_content(atom_id_dict, cfg, content, data_file, highlight_content, section_order, type_dict):
    data_content = content[SEC_HEAD]
    select_data_content = []
    for section in section_order:
        # empty list will become an empty line
        data_content += [''] + [section, '']
        select_data_content += [section]
        sec_format = SEC_FORMAT_DICT[section][0]
        comment_col = SEC_FORMAT_DICT[section][1]
        for line in content[section]:
            data_content.append(sec_format.format(*line[:comment_col]) + " ".join(line[comment_col:]))
        for line in highlight_content[section]:
            select_data_content.append(sec_format.format(*line[:comment_col]) + " ".join(line[comment_col:]))

    # Only print a "new" data file if something is changed
    dict_lens = len(atom_id_dict)
    for name, t_dict in type_dict.items():
        dict_lens += len(t_dict)
    if dict_lens > 0 or cfg[SORT_ME]:
        f_name = create_out_fname(data_file, suffix='_new', ext='.data')
        list_to_file(data_content, f_name)
        print('Completed writing {}'.format(f_name))
    if (len(cfg[PRINT_DATA_ATOMS]) + len(cfg[PRINT_OWN_ATOMS])) > 0:
        f_name = create_out_fname(data_file, suffix='_selected', ext='.txt')
        list_to_file(select_data_content, f_name)
        print('Completed writing {}'.format(f_name))
예제 #3
0
def main(argv=None):
    """ Runs the main program.

    @param argv: The command line arguments.
    :return: The return code for the program's termination.
    """
    args, ret = parse_cmdline(argv)
    if ret != 0:
        return ret

    if args.src_file is not None:
        proc_data = calc_for_wham(args.src_file)
        write_csv(proc_data, create_out_fname(args.src_file, prefix=OUT_PFX), COLVAR_WHAM_KEY_SEQ)
    else:
        found_files = find_files_by_dir(args.base_dir, args.pattern)
        logger.debug("Found '%d' dirs with files to process", len(found_files))
        # noinspection PyCompatibility
        for f_dir, files in found_files.iteritems():
            if not files:
                logger.warn("No files found for dir '%s'", f_dir)
                continue
            for colvar_path in ([os.path.join(f_dir, tgt) for tgt in files]):
                proc_data = calc_for_wham(colvar_path)
                f_name = create_out_fname(colvar_path, prefix=OUT_PFX)
                if allow_write(f_name, overwrite=args.overwrite):
                    list_to_file([str(d['r']) for d in proc_data if 'r' in d], f_name)
                    # write_csv(proc_data, f_name, COLVAR_WHAM_KEY_SEQ, extrasaction="ignore")
    return 0  # success
예제 #4
0
def main(argv=None):
    """ Runs the main program.

    @param argv: The command line arguments.
    :return: The return code for the program's termination.
    """
    args, ret = parse_cmdline(argv)
    if ret != 0:
        return ret

    if args.src_file is not None:
        proc_data = calc_for_wham(args.src_file)
        write_csv(proc_data, create_out_fname(args.src_file, prefix=OUT_PFX),
                  COLVAR_WHAM_KEY_SEQ)
    else:
        found_files = find_files_by_dir(args.base_dir, args.pattern)
        logger.debug("Found '%d' dirs with files to process", len(found_files))
        # noinspection PyCompatibility
        for f_dir, files in found_files.iteritems():
            if not files:
                logger.warn("No files found for dir '%s'", f_dir)
                continue
            for colvar_path in ([os.path.join(f_dir, tgt) for tgt in files]):
                proc_data = calc_for_wham(colvar_path)
                f_name = create_out_fname(colvar_path, prefix=OUT_PFX)
                if allow_write(f_name, overwrite=args.overwrite):
                    list_to_file([str(d['r']) for d in proc_data if 'r' in d],
                                 f_name)
                    # write_csv(proc_data, f_name, COLVAR_WHAM_KEY_SEQ, extrasaction="ignore")
    return 0  # success
예제 #5
0
def print_content(atom_id_dict, cfg, content, data_file, highlight_content,
                  section_order, type_dict):
    data_content = content[SEC_HEAD]
    select_data_content = []
    for section in section_order:
        # empty list will become an empty line
        data_content += [''] + [section, '']
        select_data_content += [section]
        sec_format = SEC_FORMAT_DICT[section][0]
        comment_col = SEC_FORMAT_DICT[section][1]
        for line in content[section]:
            data_content.append(
                sec_format.format(*line[:comment_col]) +
                " ".join(line[comment_col:]))
        for line in highlight_content[section]:
            select_data_content.append(
                sec_format.format(*line[:comment_col]) +
                " ".join(line[comment_col:]))

    # Only print a "new" data file if something is changed
    dict_lens = len(atom_id_dict)
    for name, t_dict in type_dict.items():
        dict_lens += len(t_dict)
    if dict_lens > 0 or cfg[SORT_ME]:
        f_name = create_out_fname(data_file, suffix='_new', ext='.data')
        list_to_file(data_content, f_name)
        print('Completed writing {}'.format(f_name))
    if (len(cfg[PRINT_DATA_ATOMS]) + len(cfg[PRINT_OWN_ATOMS])) > 0:
        f_name = create_out_fname(data_file, suffix='_selected', ext='.txt')
        list_to_file(select_data_content, f_name)
        print('Completed writing {}'.format(f_name))
예제 #6
0
def adjust_atom_xyz(cfg, data_tpl_content):
    """
    If this options is selected, adjust the xyz coordinates as specified
    @param cfg: configuration for the run
    @param data_tpl_content: processed data from the template
    @return: will print new data files or raise InvalidDataError
    """
    if cfg[ADJUST_ATOM] > data_tpl_content[NUM_ATOMS]:
        raise InvalidDataError(
            "Keyword '{}' specified atom index {} to have its XYZ coordinates adjusted, "
            "but found only "
            "{} atoms in the data template file: {}".format(
                ADJUST_ATOM, cfg[ADJUST_ATOM], data_tpl_content[NUM_ATOMS],
                cfg[DATA_TPL_FILE]))
    diff_vector = np.asarray((np.subtract(cfg[XYZ2], cfg[XYZ1])))
    inc_vector = np.divide(diff_vector, cfg[XYZ_STEPS])
    head_content = data_tpl_content[HEAD_CONTENT]
    atoms_content = data_tpl_content[ATOMS_CONTENT]
    tail_content = data_tpl_content[TAIL_CONTENT]
    # since python is zero-based, must subtract 1
    adjust_atom_num = cfg[ADJUST_ATOM] - 1
    for multiplier in range(-cfg[XYZ_STEPS_EXTEND],
                            cfg[XYZ_STEPS] + cfg[XYZ_STEPS_EXTEND]):
        f_name = create_out_fname(cfg[DATA_TPL_FILE],
                                  suffix='_' + str(multiplier),
                                  ext='.data')
        atoms_content[adjust_atom_num][4:7] = np.round(
            multiplier * inc_vector + cfg[XYZ1], 6)
        list_to_file(head_content + atoms_content + tail_content, f_name)
예제 #7
0
def process_pdb_tpl(cfg):
    tpl_loc = cfg[PDB_TPL_FILE]
    tpl_data = {HEAD_CONTENT: [], ATOMS_CONTENT: [], TAIL_CONTENT: []}

    atom_id = 0

    with open(tpl_loc) as f:
        for line in f:
            line = line.strip()
            if len(line) == 0:
                continue
            line_head = line[:cfg[PDB_LINE_TYPE_LAST_CHAR]]
            # head_content to contain Everything before 'Atoms' section
            # also capture the number of atoms
            # match 5 letters so don't need to set up regex for the ones that have numbers following the letters
            # noinspection SpellCheckingInspection
            if line_head[:-1] in ['HEADE', 'TITLE', 'REMAR', 'CRYST', 'MODEL', 'COMPN',
                                  'NUMMD', 'ORIGX', 'SCALE', 'SOURC', 'AUTHO', 'CAVEA',
                                  'EXPDT', 'MDLTY', 'KEYWD', 'OBSLT', 'SPLIT', 'SPRSD',
                                  'REVDA', 'JRNL ', 'DBREF', 'SEQRE', 'HET  ', 'HETNA',
                                  'HETSY', 'FORMU', 'HELIX', 'SHEET', 'SSBON', 'LINK ',
                                  'CISPE', 'SITE ', ]:
                tpl_data[HEAD_CONTENT].append(line)

            # atoms_content to contain everything but the xyz
            elif line_head == 'ATOM  ':

                # By renumbering, handles the case when a PDB template has ***** after atom_id 99999.
                # For renumbering, making sure prints in the correct format, including num of characters:
                atom_id += 1
                if atom_id > 99999:
                    atom_num = format(atom_id, 'x')
                else:
                    atom_num = '{:5d}'.format(atom_id)
                # Alternately, use this:
                # atom_num = line[cfg[PDB_LINE_TYPE_LAST_CHAR]:cfg[PDB_ATOM_NUM_LAST_CHAR]]

                atom_type = line[cfg[PDB_ATOM_NUM_LAST_CHAR]:cfg[PDB_ATOM_TYPE_LAST_CHAR]]
                res_type = line[cfg[PDB_ATOM_TYPE_LAST_CHAR]:cfg[PDB_RES_TYPE_LAST_CHAR]]
                # There is already a try when calling the subroutine, so maybe I don't need to?
                mol_num = int(line[cfg[PDB_RES_TYPE_LAST_CHAR]:cfg[PDB_MOL_NUM_LAST_CHAR]])
                pdb_x = float(line[cfg[PDB_MOL_NUM_LAST_CHAR]:cfg[PDB_X_LAST_CHAR]])
                pdb_y = float(line[cfg[PDB_X_LAST_CHAR]:cfg[PDB_Y_LAST_CHAR]])
                pdb_z = float(line[cfg[PDB_Y_LAST_CHAR]:cfg[PDB_Z_LAST_CHAR]])
                last_cols = line[cfg[PDB_Z_LAST_CHAR]:]

                line_struct = [line_head, atom_num, atom_type, res_type, mol_num, pdb_x, pdb_y, pdb_z, last_cols]
                tpl_data[ATOMS_CONTENT].append(line_struct)

            # tail_content to contain everything after the 'Atoms' section
            else:
                tpl_data[TAIL_CONTENT].append(line)

    if logger.isEnabledFor(logging.DEBUG):
        f_name = create_out_fname('reproduced_tpl', ext='.pdb', base_dir=cfg[OUT_BASE_DIR])
        list_to_file(tpl_data[HEAD_CONTENT] + tpl_data[ATOMS_CONTENT] + tpl_data[TAIL_CONTENT],
                     f_name, list_format=cfg[PDB_FORMAT])
    return tpl_data
예제 #8
0
def comp_files(cfg, atom_id_dict, type_dicts):
    """
    Compares each section of data files
    @param cfg: configuration information for current run
    @param atom_id_dict: dictionary for changing the atom id
    @param type_dicts: dictionary for changing atom and interaction types
    @return:
    """
    first_content, first_section_order = proc_data_file(
        cfg,
        cfg[DATA_FILE],
        atom_id_dict,
        type_dicts,
    )
    second_content, second_section_order = proc_data_file(
        cfg,
        cfg[DATA_COMP],
        atom_id_dict,
        type_dicts,
    )

    for section in second_section_order:
        if section not in first_section_order:
            warning("Skipping section '{}'; section found in the file: {}\n"
                    "   but not in file: {}".format(section, cfg[DATA_COMP],
                                                    cfg[DATA_FILE]))

    diffs = ["Differences in head section:"]
    compare_heads(first_content[SEC_HEAD], second_content[SEC_HEAD], diffs)

    for section in first_section_order:
        if section not in second_section_order:
            warning("Skipping section '{}'; section found in the file: {}\n"
                    "   but not in file: {}".format(section, cfg[DATA_FILE],
                                                    cfg[DATA_COMP]))
        elif section in [SEC_VELOS]:
            diffs.append("\nSkipping section '{}'".format(section))
        elif section in COMP_ORD_SEC_COL_DICT:
            diffs.append("\nDifferences in section '{}':".format(section))
            num_col_to_compare = COMP_ORD_SEC_COL_DICT[section]
            compare_lists(first_content[section], second_content[section], 0,
                          num_col_to_compare, diffs,
                          SEC_FORMAT_DICT[section][0],
                          SEC_FORMAT_DICT[section][1])
        elif section in NUM_SEC_DICT:
            diffs.append("\nDifferences in section '{}':".format(section))
            num_col_to_compare = NUM_SEC_DICT[section][1]
            compare_lists(first_content[section], second_content[section], 1,
                          num_col_to_compare, diffs,
                          SEC_FORMAT_DICT[section][0],
                          SEC_FORMAT_DICT[section][1])
        else:
            print("Encountered unexpected section '{}'".format(section))

    f_name = create_out_fname(cfg[DATA_COMP], prefix='diffs_', ext='.txt')
    list_to_file(diffs, f_name)
    print('Completed writing {}'.format(f_name))
def proc_file(file_name):
    with open(file_name) as d:
        nodups_lines = ['']
        for line in d:
            line = line.strip()
            if len(line) == 0:
                continue
            elif line == nodups_lines[-1]:
                continue
            else:
                nodups_lines.append(line)
    print('Completed reading {}.\n'.format(file_name))
    f_out_name = create_out_fname(file_name, suffix='_nodups')
    list_to_file(nodups_lines[1:], f_out_name)
    print('Wrote {}.\n'.format(f_out_name))
예제 #10
0
def proc_file(file_name):
    with open(file_name) as d:
        nodups_lines = ['']
        for line in d:
            line = line.strip()
            if len(line) == 0:
                continue
            elif line == nodups_lines[-1]:
                continue
            else:
                nodups_lines.append(line)
    print('Completed reading {}.\n'.format(file_name))
    f_out_name = create_out_fname(file_name, suffix='_nodups')
    list_to_file(nodups_lines[1:], f_out_name)
    print('Wrote {}.\n'.format(f_out_name))
예제 #11
0
def read_file_list(file_list, out_dir):
    """
    @param file_list: the list of files to be read
    @param out_dir: user-specified output directory
    """
    summary_header = ['num_atoms', 'sum_x', 'sum_y', 'sum_z', 'total']
    summary_array = None

    with open(file_list) as f:
        for f_file in f:
            f_file = f_file.strip()
            if len(f_file) == 0:
                continue
            elif os.path.isfile(f_file):
                summary = process_cp2k_force_file(f_file, out_dir)
                if summary is not None:
                    if summary_array is None:
                        summary_array = summary
                    else:
                        summary_array = np.vstack((summary, summary_array))
            else:
                warning('Could not read file {} in file list {}. '
                        'Continuing to the next line in file list.'.format(f_file, file_list))
    # print(np.amax(summary_array, axis=1))
    if summary_array is None:
        warning("No valid cp2k force output files were read.")
    elif summary_array.size == 5:
        print('For the one CP2K force file read:')
        print(' ' + '      '.join(summary_header))
        print(' '.join(['%10.0f' % summary_array[0]] + ['%10.3f' % F for F in summary_array[1:]]))
    else:
        f_out = create_out_fname(file_list, prefix='force_sums_', base_dir=out_dir, ext='.csv')
        list_to_file(summary_array, f_out)
        with open(f_out, 'w') as logfile:
            logfile.write(','.join(summary_header) + "\n")
            # noinspection PyTypeChecker
            for line in summary_array:
                logfile.write(','.join(['%d' % line[0]] + ['%f' % F for F in line[1:]]) + "\n")
        print('Finished reading all cp2k force files. Printed each atomic force sum to: {}'.format(f_out))

        min_vals = np.amin(summary_array, axis=0)
        max_vals = np.amax(summary_array, axis=0)

        print('           ' + '      '.join(summary_header))
        print('min_vals: ' + ' '.join(['%10.0f' % min_vals[0]] + ['%10.3f' % F for F in min_vals[1:]]))
        print('max_vals: ' + ' '.join(['%10.0f' % max_vals[0]] + ['%10.3f' % F for F in max_vals[1:]]))
예제 #12
0
def comp_files(cfg, atom_id_dict, type_dicts):
    """
    Compares each section of data files
    @param cfg: configuration information for current run
    @param atom_id_dict: dictionary for changing the atom id
    @param type_dicts: dictionary for changing atom and interaction types
    @return:
    """
    first_content, first_section_order = proc_data_file(cfg, cfg[DATA_FILE], atom_id_dict, type_dicts,)
    second_content, second_section_order = proc_data_file(cfg, cfg[DATA_COMP], atom_id_dict, type_dicts,)

    for section in second_section_order:
        if section not in first_section_order:
            warning("Skipping section '{}'; section found in the file: {}\n"
                    "   but not in file: {}".format(section, cfg[DATA_COMP], cfg[DATA_FILE]))

    diffs = ["Differences in head section:"]
    compare_heads(first_content[SEC_HEAD], second_content[SEC_HEAD], diffs)

    for section in first_section_order:
        if section not in second_section_order:
            warning("Skipping section '{}'; section found in the file: {}\n"
                    "   but not in file: {}".format(section, cfg[DATA_FILE], cfg[DATA_COMP]))
        elif section in [SEC_VELOS]:
            diffs.append("\nSkipping section '{}'".format(section))
        elif section in COMP_ORD_SEC_COL_DICT:
            diffs.append("\nDifferences in section '{}':".format(section))
            num_col_to_compare = COMP_ORD_SEC_COL_DICT[section]
            compare_lists(first_content[section], second_content[section], 0, num_col_to_compare, diffs,
                          SEC_FORMAT_DICT[section][0], SEC_FORMAT_DICT[section][1])
        elif section in NUM_SEC_DICT:
            diffs.append("\nDifferences in section '{}':".format(section))
            num_col_to_compare = NUM_SEC_DICT[section][1]
            compare_lists(first_content[section], second_content[section], 1, num_col_to_compare, diffs,
                          SEC_FORMAT_DICT[section][0], SEC_FORMAT_DICT[section][1])
        else:
            print("Encountered unexpected section '{}'".format(section))

    f_name = create_out_fname(cfg[DATA_COMP], prefix='diffs_', ext='.txt')
    list_to_file(diffs, f_name)
    print('Completed writing {}'.format(f_name))
예제 #13
0
def adjust_atom_dist(cfg, data_tpl_content):
    """
    If this options is selected, adjust the xyz coordinates to specified distances
    @param cfg: configuration for the run
    @param data_tpl_content: processed data from the template
    @return: will print new data files or raise InvalidDataError
    """
    for atom_num in cfg[ATOMS_DIST]:
        if atom_num > data_tpl_content[NUM_ATOMS]:
            raise InvalidDataError(
                "Keyword '{}' specified atom indexes {} but found only "
                "{} atoms in the data template file: {}".format(
                    ATOMS_DIST, cfg[ATOMS_DIST], data_tpl_content[NUM_ATOMS],
                    cfg[DATA_TPL_FILE]))
    # since python is zero-based, must subtract 1
    pivot_atom_num = cfg[ATOMS_DIST][0] - 1
    pivot_atom = data_tpl_content[ATOMS_CONTENT][pivot_atom_num]
    pivot_xyz = np.array(pivot_atom[4:7])

    moving_atom_num = cfg[ATOMS_DIST][1] - 1
    moving_atom = data_tpl_content[ATOMS_CONTENT][moving_atom_num]
    moving_xyz = np.array(moving_atom[4:7])

    diff_vector = pbc_calc_vector(moving_xyz, pivot_xyz,
                                  data_tpl_content[BOX_SIZE])
    base_dist = np.linalg.norm(diff_vector)

    head_content = data_tpl_content[HEAD_CONTENT]
    atoms_content = data_tpl_content[ATOMS_CONTENT]
    tail_content = data_tpl_content[TAIL_CONTENT]

    for new_dist in cfg[NEW_DIST_LIST]:
        multiplier = new_dist / base_dist
        f_name = create_out_fname(cfg[DATA_TPL_FILE],
                                  suffix='_' + str(new_dist),
                                  ext='.data')
        atoms_content[moving_atom_num][4:7] = np.round(
            multiplier * diff_vector + pivot_xyz, 6)
        list_to_file(head_content + atoms_content + tail_content, f_name)
예제 #14
0
def process_cv_file(cv_file, time_col, cv_col, row_index, time_conv):
    data_to_print = []
    with open(cv_file) as f:
        for line in f:
            if row_index == 0:
                row_index = 1
            else:
                data = [x.strip() for x in line.split()]
                try:
                    timestep = int(float(data[time_col]) * time_conv)
                    cv = float(data[cv_col])
                    data_to_print.append([timestep, cv])
                except ValueError as e:
                    warning("Excepted a number for the time_column ({}) and cv column({}). Found {} and {}."
                            "".format(time_col, cv_col, data[time_col], data[cv_col]), e)
                    return INVALID_DATA
    d_out = create_out_fname(cv_file, suffix='_converted', ext='.txt')
    list_to_file(data_to_print, d_out)
    print('Wrote file: {}'.format(d_out))

    d_out = create_out_fname(cv_file, suffix='_converted', ext='.csv')
    list_to_file(data_to_print, d_out, delimiter=',')
    print('Wrote file: {}'.format(d_out))
예제 #15
0
def process_pdb_files(cfg, data_tpl_content):
    # # For printing a dictionary
    # new_atom_type_dict = {}
    with open(cfg[PDBS_FILE]) as f:
        for pdb_file in f.readlines():
            pdb_atom_line = []
            pdb_file = pdb_file.strip()
            with open(pdb_file) as d:
                atom_num = 0
                for line in d.readlines():
                    pdb_section = line[:cfg[PDB_SECTION_LAST_CHAR]]
                    if pdb_section == 'ATOM  ':
                        # atom_nums = line[cfg[PDB_SECTION_LAST_CHAR]:cfg[PDB_ATOM_NUM_LAST_CHAR]]
                        # atom_type = line[cfg[PDB_ATOM_NUM_LAST_CHAR]:cfg[PDB_ATOM_INFO_LAST_CHAR]]
                        # There is already a try when calling the subroutine, so maybe I don't need to?
                        # mol_num = int(line[cfg[PDB_ATOM_INFO_LAST_CHAR]:cfg[PDB_MOL_NUM_LAST_CHAR]])
                        pdb_x = float(line[cfg[PDB_MOL_NUM_LAST_CHAR]:cfg[PDB_X_LAST_CHAR]])
                        pdb_y = float(line[cfg[PDB_X_LAST_CHAR]:cfg[PDB_Y_LAST_CHAR]])
                        pdb_z = float(line[cfg[PDB_Y_LAST_CHAR]:cfg[PDB_Z_LAST_CHAR]])
                        # last_cols = line[cfg[PDB_Z_LAST_CHAR]:]
                        # if data_tpl_content[ATOMS_CONTENT][atom_num][2] !=data_tpl_content[ATOM_TYPE_DICT][atom_type]:
                        #     print(atom_num,atom_type, data_tpl_content[ATOMS_CONTENT][atom_num][2],
                        # data_tpl_content[ATOM_TYPE_DICT][atom_type])
                        # # For printing a dictionary
                        # new_atom_type_dict[atom_type] = data_tpl_content[ATOMS_CONTENT][atom_num][2]
                        pdb_atom_line.append(data_tpl_content[ATOMS_CONTENT][atom_num][:4] +
                                             [pdb_x, pdb_y, pdb_z] + data_tpl_content[ATOMS_CONTENT][atom_num][4:])
                        atom_num += 1
            if atom_num != data_tpl_content[NUM_ATOMS]:
                raise InvalidDataError('The length of the "Atoms" section ({}) in the pdb does not equal '
                                       'the number of atoms in the data template file ({}).'
                                       ''.format(len(atom_num),
                                                 data_tpl_content[NUM_ATOMS]))
            d_out = create_out_fname(pdb_file, suffix='_from_py', ext='.data')
            list_to_file(data_tpl_content[HEAD_CONTENT] + pdb_atom_line + data_tpl_content[TAIL_CONTENT],
                         d_out)
            print('Wrote file: {}'.format(d_out))
예제 #16
0
def process_data_tpl(cfg):
    dict_loc = cfg[ATOM_DICT_FILE]
    tpl_loc = cfg[DATA_TPL_FILE]
    tpl_data = {HEAD_CONTENT: [], ATOMS_CONTENT: [], TAIL_CONTENT: [], ATOM_TYPE_DICT: {}}
    section = SEC_HEAD
    num_atoms_pat = re.compile(r"(\d+).*atoms$")
    atoms_pat = re.compile(r"^Atoms.*")
    bond_pat = re.compile(r"^Bond.*")

    with open(dict_loc) as csv_file:
        for line in csv.reader(csv_file):
            try:
                tpl_data[ATOM_TYPE_DICT][line[0]] = int(line[1])
            except ValueError as e:
                logger.debug("{}: Could not convert value {} to int.".format(e, line[1]))

    with open(tpl_loc) as f:
        for line in f.readlines():
            line = line.strip()
            # head_content to contain Everything before 'Atoms' section
            # also capture the number of atoms
            if section == SEC_HEAD:
                tpl_data[HEAD_CONTENT].append(line)
                if NUM_ATOMS not in tpl_data:
                    atoms_match = num_atoms_pat.match(line)
                    if atoms_match:
                        # regex is 1-based
                        tpl_data[NUM_ATOMS] = int(atoms_match.group(1))
                elif atoms_pat.match(line):
                    section = SEC_ATOMS
                    tpl_data[HEAD_CONTENT].append('')

            # atoms_content to contain everything but the xyz: atom_num, mol_num, atom_type, charge'
            elif section == SEC_ATOMS:
                if len(line) == 0:
                    continue
                if bond_pat.match(line):
                    section = SEC_TAIL
                    # Append one new line
                    tpl_data[TAIL_CONTENT].append('')
                    tpl_data[TAIL_CONTENT].append(line)
                    continue
                split_line = line.split()
                atom_num = int(split_line[0])
                mol_num = int(split_line[1])
                atom_type = int(split_line[2])
                charge = float(split_line[3])
                end = ' '.join(split_line[7:])
                # atom_struct = [atom_num, mol_num, atom_type, charge,end]
                # tpl_data[ATOMS_CONTENT].append(atom_struct)
                tpl_data[ATOMS_CONTENT].append([atom_num, mol_num, atom_type, charge, end])
            # tail_content to contain everything after the 'Atoms' section
            elif section == SEC_TAIL:
                tpl_data[TAIL_CONTENT].append(line)

    # Validate data section
    if len(tpl_data[ATOMS_CONTENT]) != tpl_data[NUM_ATOMS]:
        raise InvalidDataError('The length of the "Atoms" section ({}) does not equal '
                               'the number of atoms ({}).'.format(len(tpl_data[ATOMS_CONTENT]), tpl_data[NUM_ATOMS]))

    if logger.isEnabledFor(logging.DEBUG):
        list_to_file(tpl_data[HEAD_CONTENT], 'head.txt')
        list_to_file(tpl_data[ATOMS_CONTENT], 'atoms.txt')
        list_to_file(tpl_data[TAIL_CONTENT], 'tail.txt')

    return tpl_data
예제 #17
0
def process_cp2k_force_file(f_file, out_dir, req_atom_num):
    """
    Gathers and prints a list of data for converted force file (length = num atoms)
    Gathers summary data for the file; not as a dict but as a list because that helps data analysis
    Also checks how many times found the summary section; ideally, there are exactly 3. However, if the program
        did not complete a QMMM force calculations, it will not print 3 sections (MM, QM, and then QMMM).
        If the job is attempted more than once, CP2K will keep appending forces each time. It is possible that the
        MM section (and others) are repeated multiple times. There may be 3 sections that are all MM. There
        may be some combination of MM, QM, and QMMM outputs. CP2K does not identify the force calculation type.
        The logic here attempts to catch multiple MM outputs (assumes that QM and QMMM force summaries will not
        exactly match the MM force summary).

    @param f_file: cp2k force output file to read and process (convert last section to kcal/(mol-Angstrom) )
    @param out_dir: where to create the new output file (last section, converted)
    @param req_atom_num: An integer if specified by the user; otherwise None. If not None, use it to validate
      that found the expected number of atoms
    @return: if a valid cp2k file, return a np array with converted force summary.
       Otherwise, None
    """
    forces_pat = re.compile(r"^ATOMIC FORCES in .*")
    comment_pat = re.compile(r"^#.*")
    sum_pat = re.compile(r"^SUM.*")
    sum_pat_num = 0

    last_line = None
    md_sum = None
    qm_sum = None
    qmmm_sum = None
    ready_to_read = False

    keep_lines = False
    to_print = []

    with open(f_file) as f:
        atom_num = None
        qm_atom_num = None
        for line in f:
            line = line.strip()
            if len(line) == 0:
                continue
            if sum_pat.match(line):
                sum_pat_num += 1
                if md_sum is None:
                    md_sum = line
                    check_atom_num(req_atom_num, last_line, f_file)
                elif line == md_sum:
                    warning("Line matching the read MM summary encountered.")
                    if keep_lines:
                        # this means that saved converted MM (not QMMM) forces. Reset.
                        to_print = []
                        ready_to_read = False
                        keep_lines = False
                        qm_sum = None
                else:
                    if qm_sum is None:
                        qm_sum = line
                        qm_atom_num = int(last_line.split()[0])
                        ready_to_read = True
                    elif line == qm_sum:
                        raise InvalidDataError(
                            "Did not expect to read QM summary twice infile: {}"
                            .format(f_file))
                    else:
                        # ideally, the third and last sum section; either way, exit now
                        qmmm_sum = line
                        break

            elif forces_pat.match(line):
                if ready_to_read:
                    keep_lines = True
            elif keep_lines:
                split_line = line.split()
                if comment_pat.match(split_line[0]):
                    continue
                try:
                    if len(split_line) != 6:
                        raise InvalidDataError(
                            "Did not find six expected values (Atom Kind Element X Y Z)"
                        )
                    atom_num = int(split_line[0])
                    xyz = np.asarray(list(map(float,
                                              split_line[3:]))) * au_to_N
                    # noinspection PyTypeChecker
                    to_print.append([atom_num] + xyz.tolist())
                except (ValueError, InvalidDataError) as e:
                    warning(
                        "{}\n"
                        "Check file: {}\n"
                        "  Problem reading line as atomic forces: {}\n"
                        "Continuing to the next line in the file list".format(
                            e, f_file, line))
                    return None
            last_line = line
    if qmmm_sum is None:
        warning(
            "Invalid file: {}\n"
            "Reached end of file without encountering the expected QMMM 'SUM OF ATOMIC FORCES' section "
            "(read {} summary force sections, checking for likely duplicate MM force output). "
            "Continuing to the next line in the file list.".format(
                f_file, sum_pat_num))
        return None
    try:
        check_atom_num(req_atom_num, last_line, f_file)
        split_line = qmmm_sum.split()
        sums = np.asarray(list(map(float, split_line[4:]))) * au_to_N
        if len(sums) != 4:
            raise InvalidDataError(
                "Did not find the expected four force values (x, y, z, total)")
        f_out = create_out_fname(f_file,
                                 prefix=OUT_FILE_PREFIX,
                                 base_dir=out_dir,
                                 ext='')
        list_to_file(to_print, f_out)
        return np.append([atom_num, qm_atom_num], sums)
    except (ValueError, InvalidDataError) as e:
        warning("{}\nCheck file: {}\n"
                "   Problem converting values in line: {}\n"
                "Continuing to the next line in the file list".format(
                    e, f_file, qmmm_sum))
    return None
예제 #18
0
def process_pdb_tpl(cfg):
    tpl_loc = cfg[PDB_TPL_FILE]
    tpl_data = {HEAD_CONTENT: [], ATOMS_CONTENT: [], TAIL_CONTENT: []}

    atom_id = 0

    with open(tpl_loc) as f:
        for line in f:
            line = line.strip()
            if len(line) == 0:
                continue
            line_head = line[:cfg[PDB_LINE_TYPE_LAST_CHAR]]
            # head_content to contain Everything before 'Atoms' section
            # also capture the number of atoms
            # match 5 letters so don't need to set up regex for the ones that have numbers following the letters
            # noinspection SpellCheckingInspection
            if line_head[:-1] in [
                    'HEADE',
                    'TITLE',
                    'REMAR',
                    'CRYST',
                    'MODEL',
                    'COMPN',
                    'NUMMD',
                    'ORIGX',
                    'SCALE',
                    'SOURC',
                    'AUTHO',
                    'CAVEA',
                    'EXPDT',
                    'MDLTY',
                    'KEYWD',
                    'OBSLT',
                    'SPLIT',
                    'SPRSD',
                    'REVDA',
                    'JRNL ',
                    'DBREF',
                    'SEQRE',
                    'HET  ',
                    'HETNA',
                    'HETSY',
                    'FORMU',
                    'HELIX',
                    'SHEET',
                    'SSBON',
                    'LINK ',
                    'CISPE',
                    'SITE ',
            ]:
                tpl_data[HEAD_CONTENT].append(line)

            # atoms_content to contain everything but the xyz
            elif line_head == 'ATOM  ':

                # By renumbering, handles the case when a PDB template has ***** after atom_id 99999.
                # For renumbering, making sure prints in the correct format, including num of characters:
                atom_id += 1
                if atom_id > 99999:
                    atom_num = format(atom_id, 'x')
                else:
                    atom_num = '{:5d}'.format(atom_id)
                # Alternately, use this:
                # atom_num = line[cfg[PDB_LINE_TYPE_LAST_CHAR]:cfg[PDB_ATOM_NUM_LAST_CHAR]]

                atom_type = line[
                    cfg[PDB_ATOM_NUM_LAST_CHAR]:cfg[PDB_ATOM_TYPE_LAST_CHAR]]
                res_type = line[
                    cfg[PDB_ATOM_TYPE_LAST_CHAR]:cfg[PDB_RES_TYPE_LAST_CHAR]]
                # There is already a try when calling the subroutine, so maybe I don't need to?
                mol_num = int(line[
                    cfg[PDB_RES_TYPE_LAST_CHAR]:cfg[PDB_MOL_NUM_LAST_CHAR]])
                pdb_x = float(
                    line[cfg[PDB_MOL_NUM_LAST_CHAR]:cfg[PDB_X_LAST_CHAR]])
                pdb_y = float(line[cfg[PDB_X_LAST_CHAR]:cfg[PDB_Y_LAST_CHAR]])
                pdb_z = float(line[cfg[PDB_Y_LAST_CHAR]:cfg[PDB_Z_LAST_CHAR]])
                last_cols = line[cfg[PDB_Z_LAST_CHAR]:]

                line_struct = [
                    line_head, atom_num, atom_type, res_type, mol_num, pdb_x,
                    pdb_y, pdb_z, last_cols
                ]
                tpl_data[ATOMS_CONTENT].append(line_struct)

            # tail_content to contain everything after the 'Atoms' section
            else:
                tpl_data[TAIL_CONTENT].append(line)

    if logger.isEnabledFor(logging.DEBUG):
        f_name = create_out_fname('reproduced_tpl',
                                  ext='.pdb',
                                  base_dir=cfg[OUT_BASE_DIR])
        list_to_file(tpl_data[HEAD_CONTENT] + tpl_data[ATOMS_CONTENT] +
                     tpl_data[TAIL_CONTENT],
                     f_name,
                     list_format=cfg[PDB_FORMAT])
    return tpl_data
예제 #19
0
def process_data_file(cfg, chk_atom_type, data_dict, data_file,
                      data_tpl_content):
    with open(data_file) as d:
        pdb_data_section = copy.deepcopy(data_tpl_content[ATOMS_CONTENT])
        pdb_atom_num = len(pdb_data_section)
        section = SEC_HEAD
        atom_id = 0
        num_atoms = None
        atom_types = []

        for line in d:
            line = line.strip()
            # not currently keeping anything from the header; just check num atoms
            if section == SEC_HEAD:
                if ATOMS_PAT.match(line):
                    section = SEC_ATOMS
                elif num_atoms is None:
                    atoms_match = NUM_ATOMS_PAT.match(line)
                    if atoms_match:
                        # regex is 1-based
                        num_atoms = int(atoms_match.group(1))
                        if num_atoms != pdb_atom_num:
                            raise InvalidDataError(
                                "Mismatched numbers of atoms: \n"
                                "  Found {} atoms in file: {}\n"
                                "    and {} atoms in file: {}\n"
                                "".format(pdb_atom_num, cfg[PDB_TPL_FILE],
                                          num_atoms, data_file))

            # atoms_content to contain only xyz; also perform some checking
            elif section == SEC_ATOMS:
                if len(line) == 0:
                    continue
                split_line = line.split()

                # Not currently checking molecule number
                # If decide to do so, should make a count from 1 as the PDB is read; the PDB does not
                # have to start from 1, but the data file counts molecules from 1. For now, decided
                # checking atom type is a sufficient check
                # mol_num = int(split_line[1])

                # Keep as string; json save as string and this helps compare
                atom_types.append(split_line[2])
                pdb_data_section[atom_id][5:8] = map(float, split_line[4:7])
                atom_id += 1
                # Check after increment because the counter started at 0
                if atom_id == num_atoms:
                    # Since the tail will come only from the template, nothing more is needed.
                    break

    # Now that finished reading the file...
    if atom_id != num_atoms:
        raise InvalidDataError(
            'In data file: {}\n'
            '  header section lists {} atoms, but found {} atoms'.format(
                data_file, num_atoms, atom_id))
    if chk_atom_type:
        for data_type, atom in zip(atom_types, pdb_data_section):
            try:
                pdb_type = atom[2] + atom[3]
                if pdb_type not in data_dict[data_type]:
                    warning(
                        'Did not find type {} in dictionary of values for atom_type {}: ({})'
                        ''.format(pdb_type, data_type, data_dict[data_type]))
                    # print("atom", atom_type, data_dict[atom_type])
            except KeyError:
                warning(
                    'Did not find data file atom type {} in the atom type dictionary {}'
                    ''.format(data_type, cfg[ATOM_TYPE_DICT_FILE]))
    f_name = create_out_fname(data_file,
                              ext='.pdb',
                              base_dir=cfg[OUT_BASE_DIR])
    list_to_file(data_tpl_content[HEAD_CONTENT] + pdb_data_section +
                 data_tpl_content[TAIL_CONTENT],
                 f_name,
                 list_format=cfg[PDB_FORMAT])
예제 #20
0
def process_data_file(atom_type_dict, data_file, data_tpl_content,
                      new_data_section):
    with open(data_file) as d:
        section = SEC_HEAD
        atom_id = 0
        num_atoms = None
        for line in d.readlines():
            line = line.strip()
            # not keeping anything from the header
            if section == SEC_HEAD:
                if ATOMS_PAT.match(line):
                    section = SEC_ATOMS
                elif num_atoms is None:
                    atoms_match = NUM_ATOMS_PAT.match(line)
                    if atoms_match:
                        # regex is 1-based
                        num_atoms = int(atoms_match.group(1))
                        if num_atoms != len(data_tpl_content[ATOMS_CONTENT]):
                            raise InvalidDataError(
                                'The number of atoms in the template file ({}) does '
                                'not equal the number of atoms ({}) in the data file file: {}.'
                                ''.format(data_tpl_content[NUM_ATOMS],
                                          num_atoms, data_file))
            # atoms_content to grab xyz and pbc rep; also perform some checking
            elif section == SEC_ATOMS:
                if len(line) == 0:
                    continue
                split_line = line.split()

                # Not currently checking molecule number; the number may be wrong and the data still correct,
                # because of the reordering I did to match the template ordering.
                # Thus, I don't need:
                # mol_num = int(split_line[1])

                # Perform checking that the atom type in the corresponding line of the template file matches
                # the current file
                try:
                    old_atom_type = int(split_line[2])
                    # Add in the xyz coordinates
                    new_data_section[atom_id][4:7] = [
                        "{:16.10f}".format(element)
                        for element in list(map(float, split_line[4:7]))
                    ]
                except (IndexError, ValueError):
                    raise InvalidDataError(
                        "In attempting to read {} atoms from file: {}\n  "
                        "expected, but did not find, three ints followed by four floats on"
                        "line: {}\n  "
                        "Check input".format(data_tpl_content[NUM_ATOMS],
                                             data_file, line))

                # If there is an atom_type_dict, and the read atom type is in it....
                if old_atom_type in atom_type_dict:
                    new_atom_type = data_tpl_content[ATOMS_CONTENT][atom_id][2]
                    matching_new_atom_type = atom_type_dict[old_atom_type]

                    if new_atom_type != matching_new_atom_type:
                        print(
                            'Data mismatch on atom_id {:3d}, line: {}\n  Expected type {} but found type {}'
                            ''.format(atom_id + 1, line,
                                      matching_new_atom_type, new_atom_type))

                # and pbc ids, if they are there, before comments
                try:
                    new_data_section[atom_id][7] = ' '.join(
                        list(
                            map(
                                int, split_line[8:10] +
                                [new_data_section[atom_id][7]])))
                except (ValueError, IndexError):
                    # if there is no pdb id info and/or comment info, no problem. Keep on.
                    pass
                atom_id += 1
                # Check after increment because the counter started at 0
                if atom_id == num_atoms:
                    # Since the tail will come only from the template, nothing more is needed.
                    break

    # Now that finished reading the file...
    # Check total length
    # (will be wrong if got to tail before reaching num_atoms)
    if atom_id != num_atoms:
        raise InvalidDataError(
            'The number of atoms read from the file {} ({}) does not equal '
            'the listed number of atoms ({}).'.format(data_file, atom_id,
                                                      num_atoms))
        # Now make new file
    f_name = create_out_fname(data_file, suffix='_new', ext='.data')
    list_to_file(
        data_tpl_content[HEAD_CONTENT] + new_data_section +
        data_tpl_content[TAIL_CONTENT], f_name)
예제 #21
0
def process_dump_file(cfg, data_tpl_content, dump_file):
    section = None
    box = np.zeros((3, ))
    counter = 1
    atom_list_order = [PRE_RES, PROT_RES, POST_RES, HYD_MOL, WAT_MOL, POST_WAT]
    dump_atom_data = []
    atom_lists = {
        PRE_RES: [],
        PROT_RES: [],
        POST_RES: [],
        HYD_MOL: [],
        WAT_MOL: [],
        POST_WAT: []
    }

    with open(dump_file) as d:
        for line in d:
            line = line.strip()
            if section is None:
                section = find_dump_section_state(line)
                if section is None:
                    raise InvalidDataError(
                        'Unexpected line in file {}: {}'.format(
                            dump_file, line))
            elif section == SEC_TIMESTEP:
                timestep = line
                # Reset variables
                water_dict = defaultdict(list)
                dump_atom_data = []
                excess_proton = None
                hydronium = []
                for a_list in atom_lists:
                    atom_lists[a_list] = []
                section = None
            elif section == SEC_NUM_ATOMS:
                if data_tpl_content[NUM_ATOMS] != int(line):
                    raise InvalidDataError(
                        'At timestep {} in file {}, the listed number of atoms ({}) does '
                        'not equal the number of atoms in the template data file '
                        '({}).'.format(timestep, dump_file, line,
                                       data_tpl_content[NUM_ATOMS]))
                section = None
            elif section == SEC_BOX_SIZE:
                split_line = line.split()
                diff = float(split_line[1]) - float(split_line[0])
                box[counter - 1] = diff
                if counter == 3:
                    counter = 0
                    section = None
                counter += 1
            elif section == SEC_ATOMS:
                split_line = line.split()
                # If there is an incomplete line in a dump file, move on to the next file
                if len(split_line) < 7:
                    continue
                atom_num = int(split_line[0])
                mol_num = int(split_line[1])
                atom_type = int(split_line[2])
                charge = float(split_line[3])
                x, y, z = map(float, split_line[4:7])
                description = ''
                atom_struct = [
                    atom_num, mol_num, atom_type, charge, x, y, z, description
                ]

                # Keep track of separate portions of the system to allow sorting and processing
                if mol_num == cfg[PROT_RES_MOL_ID]:
                    if atom_type == cfg[PROT_H_TYPE] and atom_num not in cfg[
                            PROT_H_IGNORE]:
                        excess_proton = atom_struct
                    else:
                        atom_lists[PROT_RES].append(atom_struct)
                elif atom_type == cfg[H3O_O_TYPE] or atom_type == cfg[
                        H3O_H_TYPE]:
                    hydronium.append(atom_struct)
                elif atom_type == cfg[WAT_O_TYPE] or atom_type == cfg[
                        WAT_H_TYPE]:
                    water_dict[mol_num].append(atom_struct)
                # Save everything else in three chunks for recombining sections post-processing
                elif len(atom_lists[PROT_RES]) == 0:
                    atom_lists[PRE_RES].append(atom_struct)
                elif len(water_dict) == 0:
                    atom_lists[POST_RES].append(atom_struct)
                else:
                    atom_lists[POST_WAT].append(atom_struct)

                if counter == data_tpl_content[NUM_ATOMS]:
                    counter = 0
                    section = None

                    # Now that finished reading all atom lines...
                    # Check and process!
                    if len(water_dict) == 0:
                        raise InvalidDataError(
                            'Found no water molecules. Check that the input types {} = {} '
                            'and {} = {} are in the dump '
                            'file.'.format(WAT_O_TYPE, cfg[WAT_O_TYPE],
                                           WAT_H_TYPE, cfg[WAT_H_TYPE]))
                    if excess_proton is None:
                        if len(hydronium) != 4:
                            raise InvalidDataError(
                                'Did not find an excess proton or one hydronium ion. Check dump '
                                'file and input types: {} = {}; {} = {}; {} = {}'
                                .format(PROT_H_TYPE, cfg[PROT_H_TYPE],
                                        H3O_O_TYPE, cfg[H3O_O_TYPE],
                                        H3O_H_TYPE, cfg[H3O_H_TYPE]))
                    else:
                        if len(hydronium) != 0:
                            raise InvalidDataError(
                                'Found an excess proton and a hydronium atoms. Check dump file '
                                'and input types: {} = {}; {} = {}; {} = {}'.
                                format(PROT_H_TYPE, cfg[PROT_H_TYPE],
                                       H3O_O_TYPE, cfg[H3O_O_TYPE], H3O_H_TYPE,
                                       cfg[H3O_H_TYPE]))
                        deprotonate(cfg, atom_lists[PROT_RES], excess_proton,
                                    hydronium, water_dict, box,
                                    data_tpl_content)

                    # Ensure in correct order for printing
                    atom_lists[HYD_MOL] = assign_hyd_mol(cfg, hydronium)
                    atom_lists[WAT_MOL] = sort_wat_mols(cfg, water_dict)

                    for a_list in atom_list_order:
                        dump_atom_data += atom_lists[a_list]

                    # overwrite atom_num, mol_num, atom_type, charge, then description
                    for index in range(len(dump_atom_data)):
                        if dump_atom_data[index][3] == data_tpl_content[ATOMS_CONTENT][index][3] or \
                                dump_atom_data[index][0] in cfg[PROT_TYPE_IGNORE_ATOMS]:
                            dump_atom_data[index][0:4] = data_tpl_content[
                                ATOMS_CONTENT][index][0:4]
                            dump_atom_data[index][7] = ' '.join(
                                data_tpl_content[ATOMS_CONTENT][index][7:])
                        else:
                            raise InvalidDataError(
                                "In reading file: {}\n found atom index {} with charge {} which "
                                "does not match the charge in the data template ({}). \n"
                                "To ignore this mis-match, list "
                                "the atom's index number in the keyword '{}' in the ini file."
                                "".format(
                                    dump_file, dump_atom_data[index][0],
                                    dump_atom_data[index][3],
                                    data_tpl_content[ATOMS_CONTENT][index][3],
                                    PROT_TYPE_IGNORE_ATOMS))

                    d_out = create_out_fname(dump_file,
                                             suffix='_' + str(timestep),
                                             ext='.data',
                                             base_dir=cfg[OUT_BASE_DIR])
                    data_tpl_content[HEAD_CONTENT][0] = "Created by evbdump2data from {} " \
                                                        "timestep {}".format(dump_file, timestep)
                    list_to_file(
                        data_tpl_content[HEAD_CONTENT] + dump_atom_data +
                        data_tpl_content[TAIL_CONTENT], d_out)
                counter += 1
    if counter == 1:
        print("Completed reading dumpfile {}".format(dump_file))
    else:
        warning(
            "Dump file {} step {} did not have the full list of atom numbers. "
            "Continuing program.".format(dump_file, timestep))
예제 #22
0
def process_data_file(atom_type_dict, data_file, data_tpl_content, new_data_section):
    with open(data_file) as d:
        section = SEC_HEAD
        atom_id = 0
        num_atoms = None
        for line in d.readlines():
            line = line.strip()
            # not keeping anything from the header
            if section == SEC_HEAD:
                if ATOMS_PAT.match(line):
                    section = SEC_ATOMS
                elif num_atoms is None:
                    atoms_match = NUM_ATOMS_PAT.match(line)
                    if atoms_match:
                        # regex is 1-based
                        num_atoms = int(atoms_match.group(1))
                        if num_atoms != len(data_tpl_content[ATOMS_CONTENT]):
                            raise InvalidDataError('The number of atoms in the template file ({}) does '
                                                   'not equal the number of atoms ({}) in the data file file: {}.'
                                                   ''.format(data_tpl_content[NUM_ATOMS], num_atoms, data_file))
            # atoms_content to grab xyz and pbc rep; also perform some checking
            elif section == SEC_ATOMS:
                if len(line) == 0:
                    continue
                split_line = line.split()

                # Not currently checking molecule number; the number may be wrong and the data still correct,
                # because of the reordering I did to match the template ordering.
                # Thus, I don't need:
                # mol_num = int(split_line[1])

                # Perform checking that the atom type in the corresponding line of the template file matches
                # the current file
                try:
                    old_atom_type = int(split_line[2])
                    # Add in the xyz coordinates
                    new_data_section[atom_id][4:7] = map(float, split_line[4:7])
                except (IndexError, ValueError):
                    raise InvalidDataError("In attempting to read {} atoms from file: {}\n  "
                                           "expected, but did not find, three ints followed by four floats on"
                                           "line: {}\n  "
                                           "Check input".format(data_tpl_content[NUM_ATOMS], data_file, line))

                # If there is an atom_type_dict, and the read atom type is in it....
                if old_atom_type in atom_type_dict:
                    new_atom_type = data_tpl_content[ATOMS_CONTENT][atom_id][2]
                    matching_new_atom_type = atom_type_dict[old_atom_type]

                    if new_atom_type != matching_new_atom_type:
                        print('Data mismatch on atom_id {:3d}, line: {}\n  Expected type {} but found type {}'
                              ''.format(atom_id + 1, line, matching_new_atom_type, new_atom_type))

                # and pbc ids, if they are there, before comments
                try:
                    new_data_section[atom_id][7] = ' '.join(map(int, split_line[8:10] + [new_data_section[atom_id][7]]))
                except (ValueError, IndexError):
                    # if there is no pdb id info and/or comment info, no problem. Keep on.
                    pass
                atom_id += 1
                # Check after increment because the counter started at 0
                if atom_id == num_atoms:
                    # Since the tail will come only from the template, nothing more is needed.
                    break

    # Now that finished reading the file...
    # Check total length
    # (will be wrong if got to tail before reaching num_atoms)
    if atom_id != num_atoms:
        raise InvalidDataError('The number of atoms read from the file {} ({}) does not equal '
                               'the listed number of atoms ({}).'.format(data_file, atom_id, num_atoms))
        # Now make new file
    f_name = create_out_fname(data_file, suffix='_new', ext='.data')
    list_to_file(data_tpl_content[HEAD_CONTENT] + new_data_section + data_tpl_content[TAIL_CONTENT],
                 f_name)
    print('Completed writing {}'.format(f_name))
예제 #23
0
def process_psf(cfg, atom_num_dict, mol_num_dict, element_dict, radii_dict):

    with open(cfg[PSF_FILE]) as f:
        psf_data = {HEAD_CONTENT: [], ATOMS_CONTENT: [], TAIL_CONTENT: []}
        num_atoms_pat = re.compile(r"(\d+).*NATOM$")

        num_atoms = 1
        section = SEC_HEAD

        # for printing qmmm info
        qmmm_elem_id_dict = {}
        ca_res_atom_id_dict = {}
        cb_res_atom_id_dict = {}
        atoms_for_vmd = []
        types_for_mm_kind = set()
        qmmm_charge = 0

        # for RENUM_MOL
        last_resid = None
        cur_mol_num = 0

        for line in f.readlines():
            s_line = line.strip()
            # head_content to contain Everything before 'Atoms' section
            # also capture the number of atoms
            if section == SEC_HEAD:
                psf_data[HEAD_CONTENT].append(line.rstrip())

                atoms_match = num_atoms_pat.match(s_line)
                if atoms_match:
                    # regex is 1-based
                    num_atoms = int(atoms_match.group(1))
                    section = SEC_ATOMS

            elif section == SEC_ATOMS:
                if len(s_line) == 0:
                    continue
                split_line = s_line.split()
                atom_num = int(split_line[0])
                segid = split_line[1]
                resid = int(split_line[2])
                resname = split_line[3]
                atom_type = split_line[4]
                charmm_type = split_line[5]
                charge = float(split_line[6])
                atom_wt = float(split_line[7])
                zero = split_line[8]

                # For reordering atoms
                if atom_num in atom_num_dict:
                    atom_num = atom_num_dict[atom_num]

                # For user-specified changing of molecule number
                if resid in mol_num_dict:
                    resid = mol_num_dict[resid]

                if cfg[RENUM_MOL]:
                    if resid != last_resid:
                        last_resid = resid
                        cur_mol_num += 1
                    resid = cur_mol_num

                atom_struct = [atom_num, segid, resid, resname, atom_type, charmm_type, charge, atom_wt, zero]
                psf_data[ATOMS_CONTENT].append(atom_struct)

                if resid in cfg[RESID_QM] or resid in cfg[RESID_QMMM] and atom_type not in cfg[SKIP_ATOM_TYPES]:
                    if resid in cfg[RESID_QMMM]:
                        if atom_type == C_ALPHA:
                            ca_res_atom_id_dict[resid] = atom_num

                    if resid in cfg[RESID_QMMM] and atom_type == C_ALPHA:
                        ca_res_atom_id_dict[resid] = atom_num
                    else:
                        if resid in cfg[RESID_QMMM] and atom_type == C_BETA:
                            cb_res_atom_id_dict[resid] = atom_num
                        if atom_type in element_dict:
                            element = element_dict[atom_type]
                        else:
                            raise InvalidDataError(
                                "Did not find atom type '{}' in the element dictionary. Please "
                                "provide a new atom type, element dictionary (using keyword {} "
                                "in the configuration file) that includes all atom types in the "
                                "residues identified with the '{}' key."
                                "".format(atom_type, ELEMENT_DICT_FILE, RESID_QMMM)
                            )
                        if element in qmmm_elem_id_dict:
                            qmmm_elem_id_dict[element].append(atom_num)
                        else:
                            qmmm_elem_id_dict[element] = [atom_num]
                        qmmm_charge += charge
                        atoms_for_vmd.append(atom_num - 1)

                if cfg[PRINT_FOR_CP2K]:
                    types_for_mm_kind.add(atom_type)

                if len(psf_data[ATOMS_CONTENT]) == num_atoms:
                    section = SEC_TAIL
            # tail_content to contain everything after the 'Atoms' section
            elif section == SEC_TAIL:
                psf_data[TAIL_CONTENT].append(line.rstrip())

    if len(atom_num_dict) > 0:
        warning(
            "This program does not yet edit any sections other than the atoms section."
            "If you are renumbering atoms, the bonds, angles, dihedrals, impropers, and"
            "cross-terms sections will not match."
        )
        psf_data[ATOMS_CONTENT] = sorted(psf_data[ATOMS_CONTENT], key=lambda entry: entry[0])

    if cfg[RENUM_MOL] or len(atom_num_dict) + len(mol_num_dict) > 0:
        if cfg[PSF_NEW_FILE] is None:
            f_name = create_out_fname(cfg[PSF_FILE], suffix="_new", base_dir=cfg[OUT_BASE_DIR])
        else:
            f_name = cfg[PSF_NEW_FILE]
        list_to_file(
            psf_data[HEAD_CONTENT] + psf_data[ATOMS_CONTENT] + psf_data[TAIL_CONTENT],
            f_name,
            list_format=cfg[PSF_FORMAT],
        )

    if cfg[PRINT_FOR_CP2K]:
        print("Total charge from QM atoms: {:.2f}".format(qmmm_charge))
        # create CP2K input listing amino atom ids
        f_name = create_out_fname("amino_id.dat", base_dir=cfg[OUT_BASE_DIR])
        print_mode = "w"
        for elem in qmmm_elem_id_dict:
            print_qm_kind(qmmm_elem_id_dict[elem], elem, f_name, mode=print_mode)
            print_mode = "a"
        print_qm_links(ca_res_atom_id_dict, cb_res_atom_id_dict, f_name, mode=print_mode)
        # create CP2K input listing MM atom type radii
        f_name = create_out_fname("mm_kinds.dat", base_dir=cfg[OUT_BASE_DIR])
        print_mode = "w"

        for atom_type in types_for_mm_kind:
            try:
                print_mm_kind(atom_type, radii_dict[atom_type], f_name, mode=print_mode)
                print_mode = "a"
            except KeyError:
                warning(
                    "Did not find atom type '{}' in the atom_type to radius dictionary: {}\n"
                    "    '{}' printed without this type; user may manually add its radius specification.\n"
                    "    To print this file with all MM types, use the keyword '{}' in the configuration file \n"
                    "    to identify a file with atom_type,radius (one per line, comma-separated) with all "
                    "MM types in the psf".format(atom_type, cfg[RADII_DICT_FILE], "mm_kinds.dat", RADII_DICT_FILE)
                )

        # create VMD input listing amino atom indexes (base-zero counting)
        f_name = create_out_fname("vmd_protein_atoms.dat", base_dir=cfg[OUT_BASE_DIR])
        list_to_csv([atoms_for_vmd], f_name, delimiter=" ")
예제 #24
0
def process_data_tpl(cfg):
    tpl_loc = cfg[DATA_TPL_FILE]
    tpl_data = {
        HEAD_CONTENT: [],
        ATOMS_CONTENT: [],
        TAIL_CONTENT: [],
        PROT_RES_MOL: [],
        H3O_MOL: [],
        WATER_MOLS: defaultdict(list),
        FIRST_H3O_H_INDEX: None
    }
    section = SEC_HEAD
    num_atoms_pat = re.compile(r"(\d+).*atoms$")
    atoms_pat = re.compile(r"^Atoms.*")
    # put in dummy x y z
    x = 0.0
    y = 0.0
    z = 0.0

    total_charge = 0.0

    # For debugging total charge
    calc_charge_atom_nums = {}
    for name in CALC_CHARGE_NAMES:
        calc_charge_atom_nums[cfg[name]] = name

    with open(tpl_loc) as f:
        for line in f:
            line = line.strip()
            # head_content to contain Everything before 'Atoms' section
            # also capture the number of atoms
            if section == SEC_HEAD:
                tpl_data[HEAD_CONTENT].append(line)
                if NUM_ATOMS not in tpl_data:
                    atoms_match = num_atoms_pat.match(line)
                    if atoms_match:
                        # regex is 1-based
                        tpl_data[NUM_ATOMS] = int(atoms_match.group(1))
                if atoms_pat.match(line):
                    section = SEC_ATOMS
                    tpl_data[HEAD_CONTENT].append('')
            # atoms_content to contain everything but the xyz: atom_num, mol_num, atom_type, charge, type'
            elif section == SEC_ATOMS:
                if len(line) == 0:
                    continue
                split_line = line.split()
                atom_num = int(split_line[0])
                mol_num = int(split_line[1])
                atom_type = int(split_line[2])
                charge = float(split_line[3])
                description = ' '.join(split_line[7:])
                atom_struct = [
                    atom_num, mol_num, atom_type, charge, x, y, z, description
                ]
                tpl_data[ATOMS_CONTENT].append(atom_struct)
                total_charge += charge

                if atom_type == cfg[H3O_O_TYPE]:
                    tpl_data[H3O_MOL].append(atom_struct)
                    tpl_data[H3O_O_CHARGE] = charge
                elif atom_type == cfg[H3O_H_TYPE]:
                    if tpl_data[FIRST_H3O_H_INDEX] is None:
                        tpl_data[FIRST_H3O_H_INDEX] = len(tpl_data[H3O_MOL])
                    tpl_data[H3O_MOL].append(atom_struct)
                    tpl_data[H3O_H_CHARGE] = charge
                elif mol_num == cfg[PROT_RES_MOL_ID]:
                    tpl_data[PROT_RES_MOL].append(atom_struct)
                elif atom_type == cfg[WAT_O_TYPE] or atom_type == cfg[
                        WAT_H_TYPE]:
                    tpl_data[WATER_MOLS][mol_num].append(atom_struct)
                if atom_num == tpl_data[NUM_ATOMS]:
                    section = SEC_TAIL
                    # Perform checks total charge
                    if abs(total_charge) < TOL:
                        print(
                            'The data file system is neutral (total charge {:.2e})'
                            .format(total_charge))
                    else:
                        warning(
                            'The data file system is not neutral. Total charge {0:.6f}'
                            .format(total_charge))
                    if len(tpl_data[PROT_RES_MOL]) == 0:
                        raise InvalidDataError(
                            'Did not find the input {} ({}).'.format(
                                PROT_RES_MOL, cfg[PROT_RES_MOL]))
                    for mol_list in [H3O_MOL, WATER_MOLS]:
                        if len(tpl_data[mol_list]) == 0:
                            raise InvalidDataError(
                                'In reading the data file, found no {}. Check the data file and '
                                'the input atom types: \n{} = {}\n{} = {}\n{} = {}\n'
                                '{} = {}\n{} = {}.'
                                ''.format(mol_list, PROT_H_TYPE,
                                          cfg[PROT_H_TYPE], H3O_O_TYPE,
                                          cfg[H3O_O_TYPE], H3O_H_TYPE,
                                          cfg[H3O_H_TYPE], WAT_O_TYPE,
                                          cfg[WAT_O_TYPE], WAT_H_TYPE,
                                          cfg[WAT_H_TYPE]))

                elif atom_num in calc_charge_atom_nums:
                    print('After atom {0} ({1}), the total charge is: {2:.3f}'.
                          format(atom_num, calc_charge_atom_nums[atom_num],
                                 total_charge))

            # tail_content to contain everything after the 'Atoms' section
            elif section == SEC_TAIL:
                tpl_data[TAIL_CONTENT].append(line)

    # Validate data section
    if len(tpl_data[ATOMS_CONTENT]) != tpl_data[NUM_ATOMS]:
        raise InvalidDataError(
            'In the file {}, The length of the "Atoms" section ({}) does not equal '
            'the number of atoms ({}).'.format(tpl_loc,
                                               len(tpl_data[ATOMS_CONTENT]),
                                               tpl_data[NUM_ATOMS]))

    if cfg[REPROD_TPL]:
        f_out = create_out_fname('reproduced_tpl',
                                 base_dir=cfg[OUT_BASE_DIR],
                                 ext='.data')
        list_to_file(
            tpl_data[HEAD_CONTENT] + tpl_data[ATOMS_CONTENT][:] +
            tpl_data[TAIL_CONTENT], f_out)

    return tpl_data
예제 #25
0
def print_pdb(head_data, atoms_data, tail_data, file_name, file_format):
    list_to_file(head_data, file_name)
    pdb_atoms_to_file(file_format, atoms_data, file_name, mode='a')
    list_to_file(tail_data, file_name, mode='a', print_message=False)
예제 #26
0
def process_cp2k_file(cfg, cp2k_file, data_tpl_content, pdb_tpl_content,
                      element_dict):
    """
    Gather info from CP2K output file and update xyz data if needed
    @param cfg: confirmation for the run
    @param cp2k_file: the file to open
    @param data_tpl_content: list of lists
    @param pdb_tpl_content: list of lists
    @param element_dict: element dictionary for making xyz files
    @return: xyz coordinates info in data, pdb, and xyz formats (as needed)
    """
    data_atoms_section = []
    pdb_atoms_section = []

    if data_tpl_content is None:
        make_data_file = False
    else:
        make_data_file = True

    if pdb_tpl_content is None:
        make_pdb_file = False
    else:
        make_pdb_file = True

    result_dict = {
        FILE_NAME: cp2k_file,
        QMMM_ENERGY: np.inf,
        OPT_GEOM: 'NA',
        COMPLETED_JOB: False
    }
    atoms_xyz = None
    with open(cp2k_file) as f:
        pkg_version = md_utils.__version__
        # print(temp1)
        # pkg_version = pkg_resources.parse_version(md_utils.__version__)
        if make_pdb_file:
            pdb_tpl_content[HEAD_CONTENT][0] = "REMARK 450 Created on {} by {} version {}" \
                                               "".format(datetime.now(), __name__, pkg_version)
            pdb_tpl_content[HEAD_CONTENT][
                1] = "REMARK 450 from template {}".format(cfg[PDB_TPL_FILE])
            pdb_tpl_content[HEAD_CONTENT][
                2] = "REMARK 450 and coordinates from {}".format(cp2k_file)

        if make_data_file:
            data_tpl_content[HEAD_CONTENT][0] = "Created on {} by {} version {} from template file {} and " \
                                                "cp2k output file {}".format(datetime.now(), __name__,
                                                                             pkg_version,
                                                                             cfg[DATA_TPL_FILE], cp2k_file)
        for line in f:
            line = line.strip()
            if COORD_PAT.match(line):
                # Now advance to first line of coordinates
                for _ in range(3):
                    next(f)
                data_atoms_section, pdb_atoms_section, atoms_xyz = process_coords(
                    f, data_tpl_content, pdb_tpl_content, cfg[PRINT_XYZ_FLAG],
                    element_dict)
            elif ENERGY_PAT.match(line):
                # skip steps that take further from the min energy
                qmmm_energy = float(line.split()[-1])
                if qmmm_energy < result_dict[QMMM_ENERGY]:
                    result_dict[QMMM_ENERGY] = qmmm_energy
            elif GEOM_OPT_RUN_PAT.match(line):
                # set to false because not optimized, overwriting "NA"
                result_dict[OPT_GEOM] = False
            elif GEOM_OPT_COMPLETE_PAT.match(line):
                result_dict[OPT_GEOM] = True
            elif REF_PAT.match(line):
                result_dict[COMPLETED_JOB] = True
                break

    # If we successfully returned the data_atoms_section, make new file
    if (make_data_file and len(data_atoms_section)
            == 0) or (make_pdb_file and len(pdb_atoms_section)
                      == 0) or (cfg[PRINT_XYZ_FLAG] and len(atoms_xyz) == 0):
        raise InvalidDataError(
            "Did not file atoms coordinates in file: {}".format(cp2k_file))
    print(
        '"{file_name}",{qmmm_energy:f},"{opt_geom}","{completed_job}"'.format(
            **result_dict))
    if make_data_file:
        f_name = create_out_fname(cp2k_file, ext='.data')
        list_to_file(data_tpl_content[HEAD_CONTENT] + data_atoms_section +
                     data_tpl_content[TAIL_CONTENT],
                     f_name,
                     print_message=False)
    if make_pdb_file:
        f_name = create_out_fname(cp2k_file, ext='.pdb')
        list_to_file(pdb_tpl_content[HEAD_CONTENT] + pdb_atoms_section +
                     pdb_tpl_content[TAIL_CONTENT],
                     f_name,
                     list_format=PDB_FORMAT,
                     print_message=False)
    if cfg[PRINT_XYZ_FLAG]:
        f_name = create_out_fname(cp2k_file, ext=cfg[XYZ_FILE_SUF])
        list_to_file(atoms_xyz, f_name, print_message=False)
예제 #27
0
파일: pdb_edit.py 프로젝트: abb58/md_utils
def print_pdb(head_data, atoms_data, tail_data, file_name, file_format):
    list_to_file(head_data, file_name)
    pdb_atoms_to_file(file_format, atoms_data, file_name, mode='a')
    list_to_file(tail_data, file_name, mode='a', print_message=False)
예제 #28
0
파일: psf_edit.py 프로젝트: abb58/md_utils
def process_psf(cfg, atom_num_dict, mol_num_dict, element_dict, radii_dict):

    with open(cfg[PSF_FILE]) as f:
        psf_data = {HEAD_CONTENT: [], ATOMS_CONTENT: [], TAIL_CONTENT: []}
        num_atoms_pat = re.compile(r"(\d+).*NATOM$")

        num_atoms = 1
        section = SEC_HEAD

        # for printing qmmm info
        qmmm_elem_id_dict = {}
        ca_res_atom_id_dict = {}
        cb_res_atom_id_dict = {}
        atoms_for_vmd = []
        types_for_mm_kind = set()
        qmmm_charge = 0

        # for RENUM_MOL
        last_resid = None
        cur_mol_num = 0

        for line in f.readlines():
            s_line = line.strip()
            # head_content to contain Everything before 'Atoms' section
            # also capture the number of atoms
            if section == SEC_HEAD:
                psf_data[HEAD_CONTENT].append(line.rstrip())

                atoms_match = num_atoms_pat.match(s_line)
                if atoms_match:
                    # regex is 1-based
                    num_atoms = int(atoms_match.group(1))
                    section = SEC_ATOMS

            elif section == SEC_ATOMS:
                if len(s_line) == 0:
                    continue
                split_line = s_line.split()
                atom_num = int(split_line[0])
                segid = split_line[1]
                resid = int(split_line[2])
                resname = split_line[3]
                atom_type = split_line[4]
                charmm_type = split_line[5]
                charge = float(split_line[6])
                atom_wt = float(split_line[7])
                zero = split_line[8]

                # For reordering atoms
                if atom_num in atom_num_dict:
                    atom_num = atom_num_dict[atom_num]

                # For user-specified changing of molecule number
                if resid in mol_num_dict:
                    resid = mol_num_dict[resid]

                if cfg[RENUM_MOL]:
                    if resid != last_resid:
                        last_resid = resid
                        cur_mol_num += 1
                    resid = cur_mol_num

                atom_struct = [
                    atom_num, segid, resid, resname, atom_type, charmm_type,
                    charge, atom_wt, zero
                ]
                psf_data[ATOMS_CONTENT].append(atom_struct)

                if resid in cfg[RESID_QM] or resid in cfg[
                        RESID_QMMM] and atom_type not in cfg[SKIP_ATOM_TYPES]:
                    if resid in cfg[RESID_QMMM]:
                        if atom_type == C_ALPHA:
                            ca_res_atom_id_dict[resid] = atom_num

                    if resid in cfg[RESID_QMMM] and atom_type == C_ALPHA:
                        ca_res_atom_id_dict[resid] = atom_num
                    else:
                        if resid in cfg[RESID_QMMM] and atom_type == C_BETA:
                            cb_res_atom_id_dict[resid] = atom_num
                        if atom_type in element_dict:
                            element = element_dict[atom_type]
                        else:
                            raise InvalidDataError(
                                "Did not find atom type '{}' in the element dictionary. Please "
                                "provide a new atom type, element dictionary (using keyword {} "
                                "in the configuration file) that includes all atom types in the "
                                "residues identified with the '{}' key."
                                "".format(atom_type, ELEMENT_DICT_FILE,
                                          RESID_QMMM))
                        if element in qmmm_elem_id_dict:
                            qmmm_elem_id_dict[element].append(atom_num)
                        else:
                            qmmm_elem_id_dict[element] = [atom_num]
                        qmmm_charge += charge
                        atoms_for_vmd.append(atom_num - 1)

                if cfg[PRINT_FOR_CP2K]:
                    types_for_mm_kind.add(atom_type)

                if len(psf_data[ATOMS_CONTENT]) == num_atoms:
                    section = SEC_TAIL
            # tail_content to contain everything after the 'Atoms' section
            elif section == SEC_TAIL:
                psf_data[TAIL_CONTENT].append(line.rstrip())

    if len(atom_num_dict) > 0:
        warning(
            "This program does not yet edit any sections other than the atoms section."
            "If you are renumbering atoms, the bonds, angles, dihedrals, impropers, and"
            "cross-terms sections will not match.")
        psf_data[ATOMS_CONTENT] = sorted(psf_data[ATOMS_CONTENT],
                                         key=lambda entry: entry[0])

    if cfg[RENUM_MOL] or len(atom_num_dict) + len(mol_num_dict) > 0:
        if cfg[PSF_NEW_FILE] is None:
            f_name = create_out_fname(cfg[PSF_FILE],
                                      suffix="_new",
                                      base_dir=cfg[OUT_BASE_DIR])
        else:
            f_name = cfg[PSF_NEW_FILE]
        list_to_file(psf_data[HEAD_CONTENT] + psf_data[ATOMS_CONTENT] +
                     psf_data[TAIL_CONTENT],
                     f_name,
                     list_format=cfg[PSF_FORMAT])

    if cfg[PRINT_FOR_CP2K]:
        print("Total charge from QM atoms: {:.2f}".format(qmmm_charge))
        # create CP2K input listing amino atom ids
        f_name = create_out_fname('amino_id.dat', base_dir=cfg[OUT_BASE_DIR])
        print_mode = "w"
        for elem in qmmm_elem_id_dict:
            print_qm_kind(qmmm_elem_id_dict[elem],
                          elem,
                          f_name,
                          mode=print_mode)
            print_mode = 'a'
        print_qm_links(ca_res_atom_id_dict,
                       cb_res_atom_id_dict,
                       f_name,
                       mode=print_mode)
        # create CP2K input listing MM atom type radii
        f_name = create_out_fname('mm_kinds.dat', base_dir=cfg[OUT_BASE_DIR])
        print_mode = "w"

        for atom_type in types_for_mm_kind:
            try:
                print_mm_kind(atom_type,
                              radii_dict[atom_type],
                              f_name,
                              mode=print_mode)
                print_mode = 'a'
            except KeyError:
                warning(
                    "Did not find atom type '{}' in the atom_type to radius dictionary: {}\n"
                    "    '{}' printed without this type; user may manually add its radius specification.\n"
                    "    To print this file with all MM types, use the keyword '{}' in the configuration file \n"
                    "    to identify a file with atom_type,radius (one per line, comma-separated) with all "
                    "MM types in the psf".format(atom_type,
                                                 cfg[RADII_DICT_FILE],
                                                 'mm_kinds.dat',
                                                 RADII_DICT_FILE))

        # create VMD input listing amino atom indexes (base-zero counting)
        f_name = create_out_fname('vmd_protein_atoms.dat',
                                  base_dir=cfg[OUT_BASE_DIR])
        list_to_csv([atoms_for_vmd], f_name, delimiter=' ')
예제 #29
0
def process_cp2k_force_file(f_file, out_dir):
    """

    @param f_file: cp2k force output file to read and process (convert last section to kcal/(mol-Angstrom) )
    @param out_dir: where to create the new output file (last section, converted)
    @return: if a valid cp2k file, return a string with the total number of atoms and converted total forces
    """
    forces_pat = re.compile(r"^ATOMIC FORCES in .*")
    comment_pat = re.compile(r"^#.*")
    sum_pat = re.compile(r"^SUM.*")

    force_count = 0
    keep_lines = False
    line_count = 0
    to_print = []

    with open(f_file) as f:
        print('Reading file {}'.format(f_file))
        atom_num = None
        for line in f:
            line = line.strip()
            if len(line) == 0:
                continue
            if forces_pat.match(line):
                force_count += 1
                if force_count == 3:
                    keep_lines = True
            elif keep_lines:
                line_count += 1
                split_line = line.split()
                if comment_pat.match(split_line[0]):
                    continue
                try:
                    if sum_pat.match(split_line[0]):
                        sums = np.asarray(map(float, split_line[4:])) * au_to_N
                        if len(sums) != 4:
                            raise InvalidDataError("Did not find the expected four force values (x, y, z, total)")
                        # sums_str = ' '.join([str(atom_num)] + ['%8.3f'%F for F in sums])
                        f_out = create_out_fname(f_file, prefix=OUT_FILE_PREFIX, base_dir=out_dir, ext='')
                        list_to_file(to_print, f_out)
                        return np.append([atom_num], sums)
                except (ValueError, InvalidDataError) as e:
                    warning("{}\n"
                            "Check file: {}\n"
                            "   Line 'SUM OF ATOMIC FORCES' in the third 'ATOMIC FORCES' section: {}\n"
                            "Continuing to the next line in the file list".format(e, f_file, line))
                    return None

                try:
                    if len(split_line) != 6:
                        raise InvalidDataError("Did not find six expected values (Atom Kind Element X Y Z)")
                    atom_num = int(split_line[0])
                    # kind = int(split_line[1])
                    # element = split_line[2]
                    xyz = np.asarray(map(float, split_line[3:])) * au_to_N
                    # noinspection PyTypeChecker
                    to_print.append([atom_num] + xyz.tolist())
                except (ValueError, InvalidDataError) as e:
                    warning("{}\n"
                            "Check file: {}\n"
                            "  line {} in the third 'ATOMIC FORCES' section: {}\n"
                            "Continuing to the next line in the file list".format(e, f_file, line_count, line))
                    return None
    warning("Invalid file: {}\n"
            "Reached end of file without encountering a third 'SUM OF ATOMIC FORCES' section. "
            "Continuing to the next line in the file list.".format(f_file))
    return None
예제 #30
0
def process_data_tpl(cfg):
    tpl_loc = cfg[DATA_TPL_FILE]
    tpl_data = {HEAD_CONTENT: [], ATOMS_CONTENT: [], TAIL_CONTENT: [], PROT_RES_MOL: [], H3O_MOL: [],
                WATER_MOLS: defaultdict(list), FIRST_H3O_H_INDEX: None}
    section = SEC_HEAD
    num_atoms_pat = re.compile(r"(\d+).*atoms$")
    atoms_pat = re.compile(r"^Atoms.*")
    # put in dummy x y z
    x = 0.0
    y = 0.0
    z = 0.0

    total_charge = 0.0

    # For debugging total charge
    calc_charge_atom_nums = {}
    for name in CALC_CHARGE_NAMES:
        calc_charge_atom_nums[cfg[name]] = name

    with open(tpl_loc) as f:
        for line in f:
            line = line.strip()
            # head_content to contain Everything before 'Atoms' section
            # also capture the number of atoms
            if section == SEC_HEAD:
                tpl_data[HEAD_CONTENT].append(line)
                if NUM_ATOMS not in tpl_data:
                    atoms_match = num_atoms_pat.match(line)
                    if atoms_match:
                        # regex is 1-based
                        tpl_data[NUM_ATOMS] = int(atoms_match.group(1))
                if atoms_pat.match(line):
                    section = SEC_ATOMS
                    tpl_data[HEAD_CONTENT].append('')
            # atoms_content to contain everything but the xyz: atom_num, mol_num, atom_type, charge, type'
            elif section == SEC_ATOMS:
                if len(line) == 0:
                    continue
                split_line = line.split()
                atom_num = int(split_line[0])
                mol_num = int(split_line[1])
                atom_type = int(split_line[2])
                charge = float(split_line[3])
                description = ' '.join(split_line[7:])
                atom_struct = [atom_num, mol_num, atom_type, charge, x, y, z, description]
                tpl_data[ATOMS_CONTENT].append(atom_struct)
                total_charge += charge

                if atom_type == cfg[H3O_O_TYPE]:
                    tpl_data[H3O_MOL].append(atom_struct)
                    tpl_data[H3O_O_CHARGE] = charge
                elif atom_type == cfg[H3O_H_TYPE]:
                    if tpl_data[FIRST_H3O_H_INDEX] is None:
                        tpl_data[FIRST_H3O_H_INDEX] = len(tpl_data[H3O_MOL])
                    tpl_data[H3O_MOL].append(atom_struct)
                    tpl_data[H3O_H_CHARGE] = charge
                elif mol_num == cfg[PROT_RES_MOL_ID]:
                    tpl_data[PROT_RES_MOL].append(atom_struct)
                elif atom_type == cfg[WAT_O_TYPE] or atom_type == cfg[WAT_H_TYPE]:
                    tpl_data[WATER_MOLS][mol_num].append(atom_struct)
                if atom_num == tpl_data[NUM_ATOMS]:
                    section = SEC_TAIL
                    # Perform checks total charge
                    if abs(total_charge) < TOL:
                        print('The data file system is neutral (total charge {:.2e})'.format(total_charge))
                    else:
                        warning('The data file system is not neutral. Total charge {0:.6f}'.format(total_charge))
                    if len(tpl_data[PROT_RES_MOL]) == 0:
                        raise InvalidDataError('Did not find the input {} ({}).'.format(PROT_RES_MOL,
                                                                                        cfg[PROT_RES_MOL]))
                    for mol_list in [H3O_MOL, WATER_MOLS]:
                        if len(tpl_data[mol_list]) == 0:
                            raise InvalidDataError('In reading the data file, found no {}. Check the data file and '
                                                   'the input atom types: \n{} = {}\n{} = {}\n{} = {}\n'
                                                   '{} = {}\n{} = {}.'
                                                   ''.format(mol_list,
                                                             PROT_H_TYPE, cfg[PROT_H_TYPE],
                                                             H3O_O_TYPE, cfg[H3O_O_TYPE],
                                                             H3O_H_TYPE, cfg[H3O_H_TYPE],
                                                             WAT_O_TYPE, cfg[WAT_O_TYPE],
                                                             WAT_H_TYPE, cfg[WAT_H_TYPE]))

                elif atom_num in calc_charge_atom_nums:
                    print('After atom {0} ({1}), the total charge is: {2:.3f}'.format(atom_num,
                                                                                      calc_charge_atom_nums[atom_num],
                                                                                      total_charge))

            # tail_content to contain everything after the 'Atoms' section
            elif section == SEC_TAIL:
                tpl_data[TAIL_CONTENT].append(line)

    # Validate data section
    if len(tpl_data[ATOMS_CONTENT]) != tpl_data[NUM_ATOMS]:
        raise InvalidDataError('In the file {}, The length of the "Atoms" section ({}) does not equal '
                               'the number of atoms ({}).'.format(tpl_loc,
                                                                  len(tpl_data[ATOMS_CONTENT]),
                                                                  tpl_data[NUM_ATOMS]))

    if cfg[REPROD_TPL]:
        f_out = create_out_fname('reproduced_tpl', base_dir=cfg[OUT_BASE_DIR], ext='.data')
        list_to_file(tpl_data[HEAD_CONTENT] + tpl_data[ATOMS_CONTENT][:] + tpl_data[TAIL_CONTENT],
                     f_out)

    return tpl_data
예제 #31
0
def process_data_file(cfg, chk_atom_type, data_dict, data_file, data_tpl_content):
    with open(data_file) as d:
        pdb_data_section = copy.deepcopy(data_tpl_content[ATOMS_CONTENT])
        pdb_atom_num = len(pdb_data_section)
        section = SEC_HEAD
        atom_id = 0
        num_atoms = None
        atom_types = []

        for line in d:
            line = line.strip()
            # not currently keeping anything from the header; just check num atoms
            if section == SEC_HEAD:
                if ATOMS_PAT.match(line):
                    section = SEC_ATOMS
                elif num_atoms is None:
                    atoms_match = NUM_ATOMS_PAT.match(line)
                    if atoms_match:
                        # regex is 1-based
                        num_atoms = int(atoms_match.group(1))
                        if num_atoms != pdb_atom_num:
                            raise InvalidDataError("Mismatched numbers of atoms: \n"
                                                   "  Found {} atoms in file: {}\n"
                                                   "    and {} atoms in file: {}\n"
                                                   "".format(pdb_atom_num, cfg[PDB_TPL_FILE],
                                                             num_atoms, data_file))

            # atoms_content to contain only xyz; also perform some checking
            elif section == SEC_ATOMS:
                if len(line) == 0:
                    continue
                split_line = line.split()

                # Not currently checking molecule number
                # If decide to do so, should make a count from 1 as the PDB is read; the PDB does not
                # have to start from 1, but the data file counts molecules from 1. For now, decided
                # checking atom type is a sufficient check
                # mol_num = int(split_line[1])

                # Keep as string; json save as string and this helps compare
                atom_types.append(split_line[2])
                pdb_data_section[atom_id][5:8] = map(float, split_line[4:7])
                atom_id += 1
                # Check after increment because the counter started at 0
                if atom_id == num_atoms:
                    # Since the tail will come only from the template, nothing more is needed.
                    break

    # Now that finished reading the file...
    if atom_id != num_atoms:
        raise InvalidDataError('In data file: {}\n'
                               '  header section lists {} atoms, but found {} atoms'.format(data_file,
                                                                                            num_atoms, atom_id))
    if chk_atom_type:
        for data_type, atom in zip(atom_types, pdb_data_section):
            try:
                pdb_type = atom[2] + atom[3]
                if pdb_type not in data_dict[data_type]:
                    warning('Did not find type {} in dictionary of values for atom_type {}: ({})'
                            ''.format(pdb_type, data_type, data_dict[data_type]))
                    # print("atom", atom_type, data_dict[atom_type])
            except KeyError:
                warning('Did not find data file atom type {} in the atom type dictionary {}'
                        ''.format(data_type, cfg[ATOM_TYPE_DICT_FILE]))
    f_name = create_out_fname(data_file, ext='.pdb', base_dir=cfg[OUT_BASE_DIR])
    list_to_file(data_tpl_content[HEAD_CONTENT] + pdb_data_section + data_tpl_content[TAIL_CONTENT],
                 f_name,
                 list_format=cfg[PDB_FORMAT])
예제 #32
0
def process_dump_file(cfg, data_tpl_content, dump_file):
    section = None
    box = np.zeros((3,))
    counter = 1
    atom_list_order = [PRE_RES, PROT_RES, POST_RES, HYD_MOL, WAT_MOL, POST_WAT]
    dump_atom_data = []
    atom_lists = {PRE_RES: [],
                  PROT_RES: [],
                  POST_RES: [],
                  HYD_MOL: [],
                  WAT_MOL: [],
                  POST_WAT: []
                  }

    with open(dump_file) as d:
        for line in d:
            line = line.strip()
            if section is None:
                section = find_dump_section_state(line)
                if section is None:
                    raise InvalidDataError('Unexpected line in file {}: {}'.format(dump_file, line))
            elif section == SEC_TIMESTEP:
                timestep = line
                # Reset variables
                water_dict = defaultdict(list)
                dump_atom_data = []
                excess_proton = None
                hydronium = []
                for a_list in atom_lists:
                    atom_lists[a_list] = []
                section = None
            elif section == SEC_NUM_ATOMS:
                if data_tpl_content[NUM_ATOMS] != int(line):
                    raise InvalidDataError('At timestep {} in file {}, the listed number of atoms ({}) does '
                                           'not equal the number of atoms in the template data file '
                                           '({}).'.format(timestep, dump_file, line, data_tpl_content[NUM_ATOMS]))
                section = None
            elif section == SEC_BOX_SIZE:
                split_line = line.split()
                diff = float(split_line[1]) - float(split_line[0])
                box[counter - 1] = diff
                if counter == 3:
                    counter = 0
                    section = None
                counter += 1
            elif section == SEC_ATOMS:
                split_line = line.split()
                # If there is an incomplete line in a dump file, move on to the next file
                if len(split_line) < 7:
                    continue
                atom_num = int(split_line[0])
                mol_num = int(split_line[1])
                atom_type = int(split_line[2])
                charge = float(split_line[3])
                x, y, z = map(float, split_line[4:7])
                description = ''
                atom_struct = [atom_num, mol_num, atom_type, charge, x, y, z, description]

                # Keep track of separate portions of the system to allow sorting and processing
                if mol_num == cfg[PROT_RES_MOL_ID]:
                    if atom_type == cfg[PROT_H_TYPE] and atom_num not in cfg[PROT_H_IGNORE]:
                        excess_proton = atom_struct
                    else:
                        atom_lists[PROT_RES].append(atom_struct)
                elif atom_type == cfg[H3O_O_TYPE] or atom_type == cfg[H3O_H_TYPE]:
                    hydronium.append(atom_struct)
                elif atom_type == cfg[WAT_O_TYPE] or atom_type == cfg[WAT_H_TYPE]:
                    water_dict[mol_num].append(atom_struct)
                # Save everything else in three chunks for recombining sections post-processing
                elif len(atom_lists[PROT_RES]) == 0:
                    atom_lists[PRE_RES].append(atom_struct)
                elif len(water_dict) == 0:
                    atom_lists[POST_RES].append(atom_struct)
                else:
                    atom_lists[POST_WAT].append(atom_struct)

                if counter == data_tpl_content[NUM_ATOMS]:
                    counter = 0
                    section = None

                    # Now that finished reading all atom lines...
                    # Check and process!
                    if len(water_dict) == 0:
                        raise InvalidDataError('Found no water molecules. Check that the input types {} = {} '
                                               'and {} = {} are in the dump '
                                               'file.'.format(WAT_O_TYPE, cfg[WAT_O_TYPE],
                                                              WAT_H_TYPE, cfg[WAT_H_TYPE]))
                    if excess_proton is None:
                        if len(hydronium) != 4:
                            raise InvalidDataError('Did not find an excess proton or one hydronium ion. Check dump '
                                                   'file and input types: {} = {}; {} = {}; {} = {}'
                                                   .format(PROT_H_TYPE, cfg[PROT_H_TYPE],
                                                           H3O_O_TYPE, cfg[H3O_O_TYPE],
                                                           H3O_H_TYPE, cfg[H3O_H_TYPE]))
                    else:
                        if len(hydronium) != 0:
                            raise InvalidDataError('Found an excess proton and a hydronium atoms. Check dump file '
                                                   'and input types: {} = {}; {} = {}; {} = {}'
                                                   .format(PROT_H_TYPE, cfg[PROT_H_TYPE],
                                                           H3O_O_TYPE, cfg[H3O_O_TYPE],
                                                           H3O_H_TYPE, cfg[H3O_H_TYPE]))
                        deprotonate(cfg, atom_lists[PROT_RES], excess_proton, hydronium,
                                    water_dict, box, data_tpl_content)

                    # Ensure in correct order for printing
                    atom_lists[HYD_MOL] = assign_hyd_mol(cfg, hydronium)
                    atom_lists[WAT_MOL] = sort_wat_mols(cfg, water_dict)

                    for a_list in atom_list_order:
                        dump_atom_data += atom_lists[a_list]

                    # overwrite atom_num, mol_num, atom_type, charge, then description
                    for index in range(len(dump_atom_data)):
                        if dump_atom_data[index][3] == data_tpl_content[ATOMS_CONTENT][index][3] or \
                                dump_atom_data[index][0] in cfg[PROT_TYPE_IGNORE_ATOMS]:
                            dump_atom_data[index][0:4] = data_tpl_content[ATOMS_CONTENT][index][0:4]
                            dump_atom_data[index][7] = ' '.join(data_tpl_content[ATOMS_CONTENT][index][7:])
                        else:
                            raise InvalidDataError("In reading file: {}\n found atom index {} with charge {} which "
                                                   "does not match the charge in the data template ({}). \n"
                                                   "To ignore this mis-match, list "
                                                   "the atom's index number in the keyword '{}' in the ini file."
                                                   "".format(dump_file,
                                                             dump_atom_data[index][0], dump_atom_data[index][3],
                                                             data_tpl_content[ATOMS_CONTENT][index][3],
                                                             PROT_TYPE_IGNORE_ATOMS))

                    d_out = create_out_fname(dump_file, suffix='_' + str(timestep),
                                             ext='.data', base_dir=cfg[OUT_BASE_DIR])
                    data_tpl_content[HEAD_CONTENT][0] = "Created by evbdump2data from {} " \
                                                        "timestep {}".format(dump_file, timestep)
                    list_to_file(data_tpl_content[HEAD_CONTENT] + dump_atom_data + data_tpl_content[TAIL_CONTENT],
                                 d_out)
                counter += 1
    if counter == 1:
        print("Completed reading dumpfile {}".format(dump_file))
    else:
        warning("Dump file {} step {} did not have the full list of atom numbers. "
                "Continuing program.".format(dump_file, timestep))