def read_dump_file(dump_file, cfg, data_to_print, gofr_data, out_fieldnames, write_mode, evb_dict): with open(dump_file) as d: # spaces here allow file name to line up with the "completed reading" print line if cfg[PRINT_PROGRESS]: print("{:>17}: {}".format('Reading', dump_file)) section = None box = np.zeros((3, )) box_counter = 1 atom_counter = 1 timesteps_read = 0 num_atoms = 0 timestep = None for line in d: line = line.strip() if section is None: section = find_dump_section_state(line) if section is None and len(line) > 0: raise InvalidDataError( 'Unexpected line in file {}: {}'.format( dump_file, line)) elif section == SEC_TIMESTEP: # Reset variables section = None dump_atom_data = [] try: timestep = int(line) except ValueError as e: raise InvalidDataError( "In attempting to read an integer timestep, encountered error: {}" .format(e)) timesteps_read += 1 if timesteps_read > cfg[MAX_TIMESTEPS]: print( "Reached the maximum timesteps per dumpfile ({}). " "To increase this number, set a larger value for {}. " "Continuing program.".format(cfg[MAX_TIMESTEPS], MAX_TIMESTEPS)) break if timesteps_read % cfg[PRINT_TIMESTEPS] == 0: if cfg[PER_FRAME_OUTPUT]: print_per_frame(dump_file, cfg, data_to_print, out_fieldnames, write_mode) data_to_print = [] write_mode = 'a' if cfg[GOFR_OUTPUT]: print_gofr(cfg, gofr_data) result = { FILE_NAME: os.path.basename(dump_file), TIMESTEP: timestep } if cfg[EVB_SUM_FILE] is not None: if cfg[ALIGN_COL] == FILE_NAME: align_val = os.path.splitext( result[cfg[ALIGN_COL]])[0] + cfg[EVB_FILE_EXT] else: align_val = result[cfg[ALIGN_COL]] if align_val in evb_dict: step_dict = evb_dict[align_val] for evb_header in cfg[EVB_SUM_HEADERS]: result[evb_header] = step_dict[evb_header] if cfg[CALC_CEC_DIST]: result[CEC_XYZ] = np.asarray([ step_dict[CEC_X], step_dict[CEC_Y], step_dict[CEC_Z] ]) else: warning( "Did not find '{}' value {} in the data read from: {}" .format(cfg[ALIGN_COL], result[cfg[ALIGN_COL]], cfg[EVB_SUM_FILE])) elif section == SEC_NUM_ATOMS: num_atoms = int(line) section = None elif section == SEC_BOX_SIZE: split_line = line.split() diff = float(split_line[1]) - float(split_line[0]) box[box_counter - 1] = diff if box_counter == 3: box_counter = 0 section = None box_counter += 1 elif section == SEC_ATOMS: split_line = line.split() # If there is an incomplete line in a dump file, move on to the next file if len(split_line) < 7: break atom_num = int(split_line[0]) mol_num = int(split_line[1]) atom_type = int(split_line[2]) charge = float(split_line[3]) x, y, z = map(float, split_line[4:7]) # Here, the atoms counting starts at 1. However, the template counted from zero atom_struct = { ATOM_NUM: atom_num, MOL_NUM: mol_num, ATOM_TYPE: atom_type, CHARGE: charge, XYZ_COORDS: [x, y, z], } dump_atom_data.append(atom_struct) if atom_counter == num_atoms: if len(cfg[ONLY_STEPS] ) == 0 or timestep in cfg[ONLY_STEPS]: result.update( process_atom_data(cfg, dump_atom_data, box, timestep, gofr_data, result)) data_to_print.append(result) atom_counter = 0 section = None atom_counter += 1 if atom_counter == 1: if cfg[PRINT_PROGRESS]: print("Completed reading: {}".format(dump_file)) else: warning( "FYI: dump file {} step {} did not have the full list of atom numbers. " "Continuing to next dump file.".format(dump_file, timestep))
def process_dump_file(cfg, dump_file, atom_num_dict, atom_type_dict, mol_num_dict): section = None box = np.zeros((3, )) counter = 1 num_atoms = 0 head_content = [] steps_count = 0 step_stop = cfg[MAX_STEPS] * cfg[OUT_FREQ] timestep = None with open(dump_file) as d: d_out = create_out_fname(dump_file, suffix='_reorder', base_dir=cfg[OUT_BASE_DIR]) write_mode = 'w' for line in d: line = line.strip() if section == SEC_ATOMS: split_line = line.split() # If there is an incomplete line in a dump file, move on to the next file if len(split_line) < 7: break atom_num = int(split_line[0]) if atom_num in atom_num_dict: atom_num = atom_num_dict[atom_num] mol_num = int(split_line[1]) if mol_num in mol_num_dict: mol_num = mol_num_dict[mol_num] # Default RENUM_START_MOL is neg 1; if still less than zero, user did not specify renumbering if 0 <= cfg[RENUM_START_MOL] <= mol_num: mol_num += cfg[RENUM_SHIFT] atom_type = int(split_line[2]) if atom_type in atom_type_dict: atom_type = atom_type_dict[atom_type] charge = float(split_line[3]) x, y, z = map(float, split_line[4:7]) atom_struct = [atom_num, mol_num, atom_type, charge, x, y, z] atom_data.append(atom_struct) if counter == num_atoms: if len(atom_num_dict) > 0: atom_data = sorted(atom_data, key=lambda atom: atom[0]) steps_count += 1 if steps_count % cfg[OUT_FREQ] == 0: print_to_dump_file(head_content, atom_data, d_out, mode=write_mode) if write_mode == 'w': write_mode = 'a' if steps_count == step_stop: print( "Reached the maximum number of steps ({})".format( cfg[MAX_STEPS])) counter = 1 break # reset for next timestep head_content = [] counter = 0 section = None counter += 1 else: head_content.append(line) if section is None: section = find_dump_section_state(line) if section is None: raise InvalidDataError( 'Unexpected line in file {}: {}'.format(d, line)) elif section == SEC_TIMESTEP: timestep = line # Reset variables atom_data = [] section = None elif section == SEC_NUM_ATOMS: num_atoms = int(line) section = None elif section == SEC_BOX_SIZE: split_line = line.split() diff = float(split_line[1]) - float(split_line[0]) box[counter - 1] = diff if counter == 3: counter = 0 section = None counter += 1 if counter == 1: print("Completed reading: {}".format(dump_file)) else: warning( "Dump file {} step {} did not have the full list of atom numbers. " "Continuing program.".format(dump_file, timestep))
def process_dump_file(cfg, dump_file, atom_num_dict, atom_type_dict, mol_num_dict): section = None box = np.zeros((3,)) counter = 1 num_atoms = 0 head_content = [] steps_count = 0 step_stop = cfg[MAX_STEPS] * cfg[OUT_FREQ] timestep = None with open(dump_file) as d: d_out = create_out_fname(dump_file, suffix='_reorder', base_dir=cfg[OUT_BASE_DIR]) write_mode = 'w' for line in d: line = line.strip() if section == SEC_ATOMS: split_line = line.split() # If there is an incomplete line in a dump file, move on to the next file if len(split_line) < 7: break atom_num = int(split_line[0]) if atom_num in atom_num_dict: atom_num = atom_num_dict[atom_num] mol_num = int(split_line[1]) if mol_num in mol_num_dict: mol_num = mol_num_dict[mol_num] # Default RENUM_START_MOL is neg 1; if still less than zero, user did not specify renumbering if 0 <= cfg[RENUM_START_MOL] <= mol_num: mol_num += cfg[RENUM_SHIFT] atom_type = int(split_line[2]) if atom_type in atom_type_dict: atom_type = atom_type_dict[atom_type] charge = float(split_line[3]) x, y, z = map(float, split_line[4:7]) atom_struct = [atom_num, mol_num, atom_type, charge, x, y, z] atom_data.append(atom_struct) if counter == num_atoms: if len(atom_num_dict) > 0: atom_data = sorted(atom_data, key=lambda atom: atom[0]) steps_count += 1 if steps_count % cfg[OUT_FREQ] == 0: print_to_dump_file(head_content, atom_data, d_out, mode=write_mode) if write_mode == 'w': write_mode = 'a' if steps_count == step_stop: print("Reached the maximum number of steps ({})".format(cfg[MAX_STEPS])) counter = 1 break # reset for next timestep head_content = [] counter = 0 section = None counter += 1 else: head_content.append(line) if section is None: section = find_dump_section_state(line) if section is None: raise InvalidDataError('Unexpected line in file {}: {}'.format(d, line)) elif section == SEC_TIMESTEP: timestep = line # Reset variables atom_data = [] section = None elif section == SEC_NUM_ATOMS: num_atoms = int(line) section = None elif section == SEC_BOX_SIZE: split_line = line.split() diff = float(split_line[1]) - float(split_line[0]) box[counter - 1] = diff if counter == 3: counter = 0 section = None counter += 1 if counter == 1: print("Completed reading: {}".format(dump_file)) else: warning("Dump file {} step {} did not have the full list of atom numbers. " "Continuing program.".format(dump_file, timestep))
def read_dump_file(dump_file, cfg, data_to_print, gofr_data, out_fieldnames, write_mode): with open(dump_file) as d: # spaces here allow file name to line up with the "completed reading" print line print("{:>17}: {}".format("Reading", dump_file)) section = None box = np.zeros((3,)) box_counter = 1 atom_counter = 1 timesteps_read = 0 num_atoms = 0 timestep = None for line in d: line = line.strip() if section is None: section = find_dump_section_state(line) if section is None and len(line) > 0: raise InvalidDataError("Unexpected line in file {}: {}".format(dump_file, line)) elif section == SEC_TIMESTEP: # Reset variables section = None dump_atom_data = [] try: timestep = int(line) except ValueError as e: raise InvalidDataError("In attempting to read an integer timestep, encountered error: {}".format(e)) timesteps_read += 1 if timesteps_read > cfg[MAX_TIMESTEPS]: print( "Reached the maximum timesteps per dumpfile ({}). " "To increase this number, set a larger value for {}. " "Continuing program.".format(cfg[MAX_TIMESTEPS], MAX_TIMESTEPS) ) break if timesteps_read % cfg[PRINT_TIMESTEPS] == 0: if cfg[PER_FRAME_OUTPUT]: print_per_frame(dump_file, cfg, data_to_print, out_fieldnames, write_mode) data_to_print = [] write_mode = "a" if cfg[GOFR_OUTPUT]: print_gofr(cfg, gofr_data) result = {TIMESTEP: timestep} elif section == SEC_NUM_ATOMS: num_atoms = int(line) section = None elif section == SEC_BOX_SIZE: split_line = line.split() diff = float(split_line[1]) - float(split_line[0]) box[box_counter - 1] = diff if box_counter == 3: box_counter = 0 section = None box_counter += 1 elif section == SEC_ATOMS: split_line = line.split() # If there is an incomplete line in a dump file, move on to the next file if len(split_line) < 7: break atom_num = int(split_line[0]) mol_num = int(split_line[1]) atom_type = int(split_line[2]) charge = float(split_line[3]) x, y, z = map(float, split_line[4:7]) # Here, the atoms counting starts at 1. However, the template counted from zero atom_struct = { ATOM_NUM: atom_num, MOL_NUM: mol_num, ATOM_TYPE: atom_type, CHARGE: charge, XYZ_COORDS: [x, y, z], } dump_atom_data.append(atom_struct) if atom_counter == num_atoms: result.update(process_atom_data(cfg, dump_atom_data, box, timestep, gofr_data)) data_to_print.append(result) atom_counter = 0 section = None atom_counter += 1 if atom_counter == 1: print("Completed reading: {}".format(dump_file)) else: warning( "FYI: dump file {} step {} did not have the full list of atom numbers. " "Continuing to next dump file.".format(dump_file, timestep) )
def process_dump_file(cfg, data_tpl_content, dump_file): section = None box = np.zeros((3,)) counter = 1 atom_list_order = [PRE_RES, PROT_RES, POST_RES, HYD_MOL, WAT_MOL, POST_WAT] dump_atom_data = [] atom_lists = {PRE_RES: [], PROT_RES: [], POST_RES: [], HYD_MOL: [], WAT_MOL: [], POST_WAT: [] } with open(dump_file) as d: for line in d: line = line.strip() if section is None: section = find_dump_section_state(line) if section is None: raise InvalidDataError('Unexpected line in file {}: {}'.format(dump_file, line)) elif section == SEC_TIMESTEP: timestep = line # Reset variables water_dict = defaultdict(list) dump_atom_data = [] excess_proton = None hydronium = [] for a_list in atom_lists: atom_lists[a_list] = [] section = None elif section == SEC_NUM_ATOMS: if data_tpl_content[NUM_ATOMS] != int(line): raise InvalidDataError('At timestep {} in file {}, the listed number of atoms ({}) does ' 'not equal the number of atoms in the template data file ' '({}).'.format(timestep, dump_file, line, data_tpl_content[NUM_ATOMS])) section = None elif section == SEC_BOX_SIZE: split_line = line.split() diff = float(split_line[1]) - float(split_line[0]) box[counter - 1] = diff if counter == 3: counter = 0 section = None counter += 1 elif section == SEC_ATOMS: split_line = line.split() # If there is an incomplete line in a dump file, move on to the next file if len(split_line) < 7: continue atom_num = int(split_line[0]) mol_num = int(split_line[1]) atom_type = int(split_line[2]) charge = float(split_line[3]) x, y, z = map(float, split_line[4:7]) description = '' atom_struct = [atom_num, mol_num, atom_type, charge, x, y, z, description] # Keep track of separate portions of the system to allow sorting and processing if mol_num == cfg[PROT_RES_MOL_ID]: if atom_type == cfg[PROT_H_TYPE] and atom_num not in cfg[PROT_H_IGNORE]: excess_proton = atom_struct else: atom_lists[PROT_RES].append(atom_struct) elif atom_type == cfg[H3O_O_TYPE] or atom_type == cfg[H3O_H_TYPE]: hydronium.append(atom_struct) elif atom_type == cfg[WAT_O_TYPE] or atom_type == cfg[WAT_H_TYPE]: water_dict[mol_num].append(atom_struct) # Save everything else in three chunks for recombining sections post-processing elif len(atom_lists[PROT_RES]) == 0: atom_lists[PRE_RES].append(atom_struct) elif len(water_dict) == 0: atom_lists[POST_RES].append(atom_struct) else: atom_lists[POST_WAT].append(atom_struct) if counter == data_tpl_content[NUM_ATOMS]: counter = 0 section = None # Now that finished reading all atom lines... # Check and process! if len(water_dict) == 0: raise InvalidDataError('Found no water molecules. Check that the input types {} = {} ' 'and {} = {} are in the dump ' 'file.'.format(WAT_O_TYPE, cfg[WAT_O_TYPE], WAT_H_TYPE, cfg[WAT_H_TYPE])) if excess_proton is None: if len(hydronium) != 4: raise InvalidDataError('Did not find an excess proton or one hydronium ion. Check dump ' 'file and input types: {} = {}; {} = {}; {} = {}' .format(PROT_H_TYPE, cfg[PROT_H_TYPE], H3O_O_TYPE, cfg[H3O_O_TYPE], H3O_H_TYPE, cfg[H3O_H_TYPE])) else: if len(hydronium) != 0: raise InvalidDataError('Found an excess proton and a hydronium atoms. Check dump file ' 'and input types: {} = {}; {} = {}; {} = {}' .format(PROT_H_TYPE, cfg[PROT_H_TYPE], H3O_O_TYPE, cfg[H3O_O_TYPE], H3O_H_TYPE, cfg[H3O_H_TYPE])) deprotonate(cfg, atom_lists[PROT_RES], excess_proton, hydronium, water_dict, box, data_tpl_content) # Ensure in correct order for printing atom_lists[HYD_MOL] = assign_hyd_mol(cfg, hydronium) atom_lists[WAT_MOL] = sort_wat_mols(cfg, water_dict) for a_list in atom_list_order: dump_atom_data += atom_lists[a_list] # overwrite atom_num, mol_num, atom_type, charge, then description for index in range(len(dump_atom_data)): if dump_atom_data[index][3] == data_tpl_content[ATOMS_CONTENT][index][3] or \ dump_atom_data[index][0] in cfg[PROT_TYPE_IGNORE_ATOMS]: dump_atom_data[index][0:4] = data_tpl_content[ATOMS_CONTENT][index][0:4] dump_atom_data[index][7] = ' '.join(data_tpl_content[ATOMS_CONTENT][index][7:]) else: raise InvalidDataError("In reading file: {}\n found atom index {} with charge {} which " "does not match the charge in the data template ({}). \n" "To ignore this mis-match, list " "the atom's index number in the keyword '{}' in the ini file." "".format(dump_file, dump_atom_data[index][0], dump_atom_data[index][3], data_tpl_content[ATOMS_CONTENT][index][3], PROT_TYPE_IGNORE_ATOMS)) d_out = create_out_fname(dump_file, suffix='_' + str(timestep), ext='.data', base_dir=cfg[OUT_BASE_DIR]) data_tpl_content[HEAD_CONTENT][0] = "Created by evbdump2data from {} " \ "timestep {}".format(dump_file, timestep) list_to_file(data_tpl_content[HEAD_CONTENT] + dump_atom_data + data_tpl_content[TAIL_CONTENT], d_out) counter += 1 if counter == 1: print("Completed reading dumpfile {}".format(dump_file)) else: warning("Dump file {} step {} did not have the full list of atom numbers. " "Continuing program.".format(dump_file, timestep))
def process_dump_file(cfg, data_tpl_content, dump_file): section = None box = np.zeros((3, )) counter = 1 atom_list_order = [PRE_RES, PROT_RES, POST_RES, HYD_MOL, WAT_MOL, POST_WAT] dump_atom_data = [] atom_lists = { PRE_RES: [], PROT_RES: [], POST_RES: [], HYD_MOL: [], WAT_MOL: [], POST_WAT: [] } with open(dump_file) as d: for line in d: line = line.strip() if section is None: section = find_dump_section_state(line) if section is None: raise InvalidDataError( 'Unexpected line in file {}: {}'.format( dump_file, line)) elif section == SEC_TIMESTEP: timestep = line # Reset variables water_dict = defaultdict(list) dump_atom_data = [] excess_proton = None hydronium = [] for a_list in atom_lists: atom_lists[a_list] = [] section = None elif section == SEC_NUM_ATOMS: if data_tpl_content[NUM_ATOMS] != int(line): raise InvalidDataError( 'At timestep {} in file {}, the listed number of atoms ({}) does ' 'not equal the number of atoms in the template data file ' '({}).'.format(timestep, dump_file, line, data_tpl_content[NUM_ATOMS])) section = None elif section == SEC_BOX_SIZE: split_line = line.split() diff = float(split_line[1]) - float(split_line[0]) box[counter - 1] = diff if counter == 3: counter = 0 section = None counter += 1 elif section == SEC_ATOMS: split_line = line.split() # If there is an incomplete line in a dump file, move on to the next file if len(split_line) < 7: continue atom_num = int(split_line[0]) mol_num = int(split_line[1]) atom_type = int(split_line[2]) charge = float(split_line[3]) x, y, z = map(float, split_line[4:7]) description = '' atom_struct = [ atom_num, mol_num, atom_type, charge, x, y, z, description ] # Keep track of separate portions of the system to allow sorting and processing if mol_num == cfg[PROT_RES_MOL_ID]: if atom_type == cfg[PROT_H_TYPE] and atom_num not in cfg[ PROT_H_IGNORE]: excess_proton = atom_struct else: atom_lists[PROT_RES].append(atom_struct) elif atom_type == cfg[H3O_O_TYPE] or atom_type == cfg[ H3O_H_TYPE]: hydronium.append(atom_struct) elif atom_type == cfg[WAT_O_TYPE] or atom_type == cfg[ WAT_H_TYPE]: water_dict[mol_num].append(atom_struct) # Save everything else in three chunks for recombining sections post-processing elif len(atom_lists[PROT_RES]) == 0: atom_lists[PRE_RES].append(atom_struct) elif len(water_dict) == 0: atom_lists[POST_RES].append(atom_struct) else: atom_lists[POST_WAT].append(atom_struct) if counter == data_tpl_content[NUM_ATOMS]: counter = 0 section = None # Now that finished reading all atom lines... # Check and process! if len(water_dict) == 0: raise InvalidDataError( 'Found no water molecules. Check that the input types {} = {} ' 'and {} = {} are in the dump ' 'file.'.format(WAT_O_TYPE, cfg[WAT_O_TYPE], WAT_H_TYPE, cfg[WAT_H_TYPE])) if excess_proton is None: if len(hydronium) != 4: raise InvalidDataError( 'Did not find an excess proton or one hydronium ion. Check dump ' 'file and input types: {} = {}; {} = {}; {} = {}' .format(PROT_H_TYPE, cfg[PROT_H_TYPE], H3O_O_TYPE, cfg[H3O_O_TYPE], H3O_H_TYPE, cfg[H3O_H_TYPE])) else: if len(hydronium) != 0: raise InvalidDataError( 'Found an excess proton and a hydronium atoms. Check dump file ' 'and input types: {} = {}; {} = {}; {} = {}'. format(PROT_H_TYPE, cfg[PROT_H_TYPE], H3O_O_TYPE, cfg[H3O_O_TYPE], H3O_H_TYPE, cfg[H3O_H_TYPE])) deprotonate(cfg, atom_lists[PROT_RES], excess_proton, hydronium, water_dict, box, data_tpl_content) # Ensure in correct order for printing atom_lists[HYD_MOL] = assign_hyd_mol(cfg, hydronium) atom_lists[WAT_MOL] = sort_wat_mols(cfg, water_dict) for a_list in atom_list_order: dump_atom_data += atom_lists[a_list] # overwrite atom_num, mol_num, atom_type, charge, then description for index in range(len(dump_atom_data)): if dump_atom_data[index][3] == data_tpl_content[ATOMS_CONTENT][index][3] or \ dump_atom_data[index][0] in cfg[PROT_TYPE_IGNORE_ATOMS]: dump_atom_data[index][0:4] = data_tpl_content[ ATOMS_CONTENT][index][0:4] dump_atom_data[index][7] = ' '.join( data_tpl_content[ATOMS_CONTENT][index][7:]) else: raise InvalidDataError( "In reading file: {}\n found atom index {} with charge {} which " "does not match the charge in the data template ({}). \n" "To ignore this mis-match, list " "the atom's index number in the keyword '{}' in the ini file." "".format( dump_file, dump_atom_data[index][0], dump_atom_data[index][3], data_tpl_content[ATOMS_CONTENT][index][3], PROT_TYPE_IGNORE_ATOMS)) d_out = create_out_fname(dump_file, suffix='_' + str(timestep), ext='.data', base_dir=cfg[OUT_BASE_DIR]) data_tpl_content[HEAD_CONTENT][0] = "Created by evbdump2data from {} " \ "timestep {}".format(dump_file, timestep) list_to_file( data_tpl_content[HEAD_CONTENT] + dump_atom_data + data_tpl_content[TAIL_CONTENT], d_out) counter += 1 if counter == 1: print("Completed reading dumpfile {}".format(dump_file)) else: warning( "Dump file {} step {} did not have the full list of atom numbers. " "Continuing program.".format(dump_file, timestep))