def atomistic_non_protein_non_solvent(cg_residue_type,cg_residues): atomistic_fragments={} #### residue dictionary #### run through every residue in a particular residue type residue_type={} residue_type_mass={} for cg_resid, cg_residue in enumerate(cg_residues): atomistic_fragments[cg_resid]={} frag_location=at_mod.fragment_location(cg_residue_type) ### get fragment location from database residue_type[cg_residue_type], residue_type_mass[cg_residue_type] = at_mod.get_atomistic(cg_residue_type, frag_location) connect = at_mod.connection(residue_type[cg_residue_type]) for group in residue_type[cg_residue_type]: # print(residue_type[cg_residue_type][group], '\n') center, at_frag_centers, cg_frag_centers, group_fit = at_mod.rigid_fit(residue_type[cg_residue_type][group], residue_type_mass[cg_residue_type], cg_residue, cg_residues[cg_residue]) at_connect, cg_connect = at_mod.connectivity(cg_residues[cg_residue], connect, at_frag_centers, cg_frag_centers, group_fit, group) if len(at_connect) == len(cg_connect): xyz_rot_apply=at_mod.rotate(np.array(at_connect)-center, np.array(cg_connect)-center, False) else: print('atom connections: '+str(len(at_connections))+' does not equal CG connections: '+str(len(cg_connections))) sys.exit('residue number: '+str(residue_number)+', residue type: '+str(resname)+', group: '+group) for bead in group_fit: for atom in group_fit[bead]: group_fit[bead][atom]['coord'] = at_mod.rotate_atom(group_fit[bead][atom]['coord'], center, xyz_rot_apply) atom_new = group_fit[bead][atom].copy() atomistic_fragments[cg_resid][atom] = atom_new return atomistic_fragments
def hybridise_protein_inputs(final_coordinates_atomistic, atomistic_protein_centered, cg_com, xyz_rot_apply, chain): complete_user_at = {} for residue in final_coordinates_atomistic: exists = False resname = final_coordinates_atomistic[residue][next( iter(final_coordinates_atomistic[residue]))]['res_type'] if resname in g_var.mod_residues: complete_user_at[residue] = final_coordinates_atomistic[residue] elif resname not in g_var.mod_residues: for part_val, part in enumerate(atomistic_protein_centered): if residue in atomistic_protein_centered[part]: exists = True for atom in atomistic_protein_centered[part][residue]: if atomistic_protein_centered[part][residue][atom][ 'res_type'] != resname: print( 'de_novo', resname, 'at_user', atomistic_protein_centered[part][residue][atom] ['res_type']) sys.exit( 'de novo and at user supplied don\'t match') atomistic_protein_centered[part][residue][atom][ 'coord'] = at_mod.rotate_atom( atomistic_protein_centered[part][residue][atom] ['coord'], cg_com[part_val], xyz_rot_apply[part_val]) complete_user_at[residue] = atomistic_protein_centered[ part][residue] if not exists: complete_user_at[residue] = final_coordinates_atomistic[residue] return complete_user_at
def atomistic_non_protein_solvent(cg_residue_type, cg_residues): atomistic_fragments = {} #### residue dictionary #### run through every residue in a particular residue type residue_type = {} residue_type_mass = {} for cg_resid, cg_residue in enumerate(cg_residues): for bead in cg_residues[cg_residue]: fragment = bead break atomistic_fragments[cg_resid] = {} frag_location = at_mod.fragment_location( cg_residue_type) ### get fragment location from database residue_type[cg_residue_type], residue_type_mass[ cg_residue_type] = at_mod.get_atomistic(frag_location) for res_type in residue_type[cg_residue_type]: if fragment in residue_type[cg_residue_type][res_type]: center, at_frag_centers, cg_frag_centers, group_fit = at_mod.rigid_fit( residue_type[cg_residue_type][res_type], residue_type_mass[cg_residue_type], cg_residue, cg_residues[cg_residue]) xyz_rot_apply = [ np.random.uniform(0, math.pi * 2), np.random.uniform(0, math.pi * 2), np.random.uniform(0, math.pi * 2) ] for bead in group_fit: for atom in group_fit[bead]: group_fit[bead][atom]['coord'] = at_mod.rotate_atom( group_fit[bead][atom]['coord'], center, xyz_rot_apply) atom_new = group_fit[bead][atom].copy() atomistic_fragments[cg_resid][atom] = atom_new return atomistic_fragments
def RMSD_align(coord_set_1, coord_set_2): center = np.mean(coord_set_2, axis=0) xyz_rot_apply=at_mod.kabsch_rotate(coord_set_1-center, coord_set_2-center) ali= [] for at_val, atom in enumerate(coord_set_1): ali.append( at_mod.rotate_atom(atom, center, xyz_rot_apply) ) return np.array(ali)
def RMSD_measure(structure_atoms): RMSD_dict = {} for chain in range(g_var.system['PROTEIN']): at_centers = [] #### runs through every residue and atom for residue in structure_atoms[chain]: #### gets center of mass of each residue (note only backbone heavy atoms have a mass) at_centers_iter = [] for atom in structure_atoms[chain][residue]: at_centers_iter.append( np.append( structure_atoms[chain][residue][atom]['coord'], structure_atoms[chain][residue][atom]['frag_mass'])) try: at_centers.append( np.average(np.array(at_centers_iter)[:, :3], axis=0, weights=np.array(at_centers_iter)[:, 3])) except BaseException: print('The fragment probably has no mass\n') for atom in structure_atoms[chain][residue]: print(structure_atoms[chain][residue][atom]) sys.exit() #### checks that the number of residues in the chain are the same between CG and AT if len(at_centers) != len(g_var.backbone_coords[chain]): sys.exit('In chain ' + str(chain) + ' the atomistic input does not match the CG. \n\ number of CG residues ' + str(len(g_var.backbone_coords[chain])) + '\nnumber of AT residues ' + str(len(at_centers))) cg_center = np.mean(np.array(g_var.backbone_coords[chain])[:, :3], axis=0) at_align = np.array(at_centers) - ( np.mean(np.array(at_centers), axis=0) - cg_center) xyz_rot_apply = at_mod.kabsch_rotate( np.array(at_align) - cg_center, np.array(np.array(g_var.backbone_coords[chain])[:, :3]) - cg_center) for at_val, atom in enumerate(at_align): at_align[at_val] = at_mod.rotate_atom(atom, cg_center, xyz_rot_apply) #### finds distance between backbone COM and cg backbone beads dist = np.sqrt((np.array(at_align) - np.array(g_var.backbone_coords[chain])[:, :3])**2) RMSD_val = np.sqrt(np.mean(dist**2)) #### RMSD calculation RMSD_dict[chain] = np.round(RMSD_val, 3) #### stores RMSD in dictionary return RMSD_dict
def atomistic_non_protein_non_solvent(cg_residue_type, cg_residues): atomistic_fragments = {} #### residue dictionary #### run through every residue in a particular residue type residue_type = {} residue_type_mass = {} if not os.path.exists(g_var.working_dir + cg_residue_type + '/' + cg_residue_type + '_all.pdb'): for cg_resid, cg_residue in enumerate(cg_residues): atomistic_fragments[cg_resid] = {} frag_location = gen.fragment_location( cg_residue_type) ### get fragment location from database residue_type[cg_residue_type], residue_type_mass[ cg_residue_type] = at_mod.get_atomistic(frag_location) for group in residue_type[cg_residue_type]: center, at_frag_centers, cg_frag_centers, group_fit = at_mod.rigid_fit( residue_type[cg_residue_type][group], residue_type_mass[cg_residue_type], cg_residue, cg_residues[cg_residue]) at_connect, cg_connect = at_mod.connectivity( cg_residues[cg_residue], at_frag_centers, cg_frag_centers, group_fit, group) if len(at_connect) == len(cg_connect) and len(cg_connect) > 0: try: xyz_rot_apply = at_mod.kabsch_rotate( np.array(at_connect) - center, np.array(cg_connect) - center) except BaseException: sys.exit('There is a issue with residue: ' + cg_residue_type + ' in group: ' + str(group)) else: print('atom connections: ' + str(len(at_connect)) + ' does not match CG connections: ' + str(len(cg_connect))) sys.exit('residue number: ' + str(cg_resid) + ', residue type: ' + str(cg_residue_type) + ', group: ' + group) for bead in group_fit: for atom in group_fit[bead]: group_fit[bead][atom]['coord'] = at_mod.rotate_atom( group_fit[bead][atom]['coord'], center, xyz_rot_apply) atom_new = group_fit[bead][atom].copy() atomistic_fragments[cg_resid][atom] = atom_new return atomistic_fragments, 0 else: return atomistic_fragments, len(cg_residues)
def atomistic_non_protein_solvent(cg_residue_type, cg_residues): atomistic_fragments = {} #### residue dictionary #### run through every residue in a particular residue type residue_type = {} residue_type_mass = {} for cg_resid, cg_residue in enumerate(cg_residues): for bead in cg_residues[cg_residue]: fragment = bead break atomistic_fragments[cg_resid] = {} frag_location = gen.fragment_location( cg_residue_type) ### get fragment location from database residue_type[cg_residue_type], residue_type_mass[ cg_residue_type] = at_mod.get_atomistic(frag_location) if os.path.exists(g_var.working_dir + 'SOL' + '/SOL_all.pdb') and cg_residue_type == 'SOL': sol_p_bead = 0 for atom in residue_type_mass[cg_residue_type][g_var.water]: if atom[3] > 1: sol_p_bead += 1 return sol_p_bead, sol_p_bead * len(cg_residues) for res_type in residue_type[cg_residue_type]: if fragment in residue_type[cg_residue_type][res_type]: center, at_frag_centers, cg_frag_centers, group_fit = at_mod.rigid_fit( residue_type[cg_residue_type][res_type], residue_type_mass[cg_residue_type], cg_residue, cg_residues[cg_residue]) xyz_rot_apply = gen.AnglesToRotMat([ np.random.uniform(0, math.pi * 2), np.random.uniform(0, math.pi * 2), np.random.uniform(0, math.pi * 2) ]) for bead in group_fit: for atom in group_fit[bead]: group_fit[bead][atom]['coord'] = at_mod.rotate_atom( group_fit[bead][atom]['coord'], center, xyz_rot_apply) atom_new = group_fit[bead][atom].copy() atomistic_fragments[cg_resid][atom] = atom_new return atomistic_fragments, 0
def build_multi_residue_atomistic_system(cg_residues, sys_type): #### initisation of counters chain_count = 0 coord_atomistic = {} g_var.seq_cg = {sys_type: {}} g_var.ter_res = {sys_type: {}} gen.mkdir_directory(g_var.working_dir + sys_type) ### make and change to protein directory #### for each residue in protein residue_type = {} residue_type_mass = {} new_chain = True for cg_residue_id, residue_number in enumerate(cg_residues[sys_type]): if np.round((cg_residue_id / len(cg_residues[sys_type])) * 100, 2).is_integer(): print('Converting de_novo ' + sys_type + ': ', np.round((cg_residue_id / len(cg_residues[sys_type])) * 100, 2), '%', end='\r') resname = cg_residues[sys_type][residue_number][next( iter(cg_residues[sys_type][residue_number]))]['residue_name'] if new_chain: if chain_count not in coord_atomistic: if sys_type == 'PROTEIN': g_var.backbone_coords[chain_count] = [] coord_atomistic[chain_count] = {} g_var.seq_cg[sys_type][chain_count] = [] g_var.ter_res[sys_type][chain_count] = [resname, False] new_chain = False coord_atomistic[chain_count][residue_number] = {} frag_location = gen.fragment_location( resname) ### get fragment location from database residue_type[resname], residue_type_mass[ resname] = at_mod.get_atomistic(frag_location) g_var.seq_cg[sys_type] = add_to_sequence(g_var.seq_cg[sys_type], resname, chain_count) new_chain = False for group in residue_type[resname]: for key in list(residue_type[resname][group].keys()): if key not in cg_residues[sys_type][residue_number]: del residue_type[resname][group][key] if len(residue_type[resname][group]) > 0: center, at_frag_centers, cg_frag_centers, group_fit = at_mod.rigid_fit( residue_type[resname][group], residue_type_mass[resname], residue_number, cg_residues[sys_type][residue_number]) at_connect, cg_connect = at_mod.connectivity( cg_residues[sys_type][residue_number], at_frag_centers, cg_frag_centers, group_fit, group) for group_bead in group_fit: if group_bead in g_var.res_top[resname]['CONNECT']: at_connect, cg_connect, new_chain = at_mod.BB_connectivity( at_connect, cg_connect, cg_residues[sys_type], group_fit[group_bead], residue_number, group_bead) if sys_type == 'PROTEIN': g_var.backbone_coords[chain_count].append( np.append( cg_residues[sys_type][residue_number] [group_bead]['coord'], 1)) if len(at_connect) == len(cg_connect) and len(at_connect) != 0: xyz_rot_apply = at_mod.kabsch_rotate( np.array(at_connect) - center, np.array(cg_connect) - center) elif len(at_connect) == 0: xyz_rot_apply = False print('Cannot find any connectivity for residue number: ' + str(residue_number) + ', residue type: ' + str(resname) + ', group: ' + str(group)) else: print('atom connections: ' + str(len(at_connect)) + ' does not equal CG connections: ' + str(len(cg_connect))) sys.exit('residue number: ' + str(residue_number) + ', residue type: ' + str(resname) + ', group: ' + group) for bead in group_fit: for atom in group_fit[bead]: group_fit[bead][atom]['coord'] = at_mod.rotate_atom( group_fit[bead][atom]['coord'], center, xyz_rot_apply) atom_new = group_fit[bead][atom].copy() coord_atomistic[chain_count][residue_number][ atom] = atom_new if new_chain: g_var.ter_res[sys_type][chain_count][1] = resname chain_count += 1 print('Completed initial conversion of ' + sys_type + '\n') g_var.system[sys_type] = chain_count if sys_type == 'PROTEIN': for chain in range(chain_count): g_var.skip_disul[chain] = False return coord_atomistic
def hybridise_protein_inputs(final_coordinates_atomistic, atomistic_protein_centered, cg_com, xyz_rot_apply, chain, box_vec): pdb_output = gen.create_pdb(g_var.working_dir+'PROTEIN/PROTEIN_at_rep_user_supplied_'+str(chain)+'.pdb', box_vec) final_atom={} coord=[] at_id=0 for residue in final_coordinates_atomistic: exists=False for initial_index in final_coordinates_atomistic[residue]: if final_coordinates_atomistic[residue][initial_index]['res_type'] in f_loc.mod_residues: for atom in final_coordinates_atomistic[residue]: short_line=final_coordinates_atomistic[residue][atom] final_atom[at_id]={'atom':short_line['atom'], 'res_type':short_line['res_type'], 'chain':ascii_uppercase[chain], 'residue':residue,\ 'x':short_line['coord'][0],'y':short_line['coord'][1],'z':short_line['coord'][2]} at_id+=1 coord.append(short_line['coord']) elif final_coordinates_atomistic[residue][initial_index]['res_type'] not in f_loc.mod_residues: for part_val, part in enumerate(atomistic_protein_centered): if residue in atomistic_protein_centered[part]: exists=True for atom in atomistic_protein_centered[part][residue]: if atomistic_protein_centered[part][residue][atom]['res_type'] != final_coordinates_atomistic[residue][initial_index]['res_type']: print('de_novo' , final_coordinates_atomistic[residue][initial_index]['res_type'],'at_user', atomistic_protein_centered[part][residue][atom]['res_type']) sys.exit('de novo and at user supplied don\'t match') atomistic_protein_centered[part][residue][atom]['coord'] = at_mod.rotate_atom(atomistic_protein_centered[part][residue][atom]['coord'], cg_com[part_val], xyz_rot_apply[part_val]) short_line = atomistic_protein_centered[part][residue][atom] final_atom[at_id]={'atom':short_line['atom'], 'res_type':short_line['res_type'], 'chain':ascii_uppercase[chain], 'residue':residue,\ 'x':short_line['coord'][0],'y':short_line['coord'][1],'z':short_line['coord'][2]} at_id+=1 coord.append(short_line['coord']) if not exists: for atom in final_coordinates_atomistic[residue]: short_line=final_coordinates_atomistic[residue][atom] final_atom[at_id]={'atom':short_line['atom'], 'res_type':short_line['res_type'], 'chain':ascii_uppercase[chain], 'residue':residue,\ 'x':short_line['coord'][0],'y':short_line['coord'][1],'z':short_line['coord'][2]} at_id+=1 coord.append(short_line['coord']) break merge_coords = at_mod.check_atom_overlap(coord) for at_id, coord in enumerate(merge_coords): pdb_output.write(g_var.pdbline%((at_id+1,final_atom[at_id]['atom'],final_atom[at_id]['res_type'],final_atom[at_id]['chain'],final_atom[at_id]['residue'], coord[0],coord[1],coord[2],1,0))+'\n')
def build_protein_atomistic_system(cg_residues, box_vec): #### initisation of counters chain_count=0 system={} backbone_coords={} backbone_coords[chain_count]=[] terminal={} terminal[chain_count]=[] coordinates_atomistic={} coordinates_atomistic[chain_count]={} sequence={} sequence[chain_count]=[] print('Converting Protein') gen.mkdir_directory(g_var.working_dir+'PROTEIN') ### make and change to protein directory #### for each residue in protein initial=True residue_type={} residue_type_mass={} for cg_residue_id, residue_number in enumerate(cg_residues): resname = cg_residues[residue_number][next(iter(cg_residues[residue_number]))]['residue_name'] if cg_residue_id == 0: terminal[chain_count].append(f_loc.backbone[resname]['ter']) coordinates_atomistic[chain_count][residue_number]={} frag_location=at_mod.fragment_location(resname) ### get fragment location from database residue_type[resname], residue_type_mass[resname] = at_mod.get_atomistic(frag_location) for group in residue_type[resname]: center, at_frag_centers, cg_frag_centers, group_fit = at_mod.rigid_fit(residue_type[resname][group], residue_type_mass[resname], residue_number, cg_residues[residue_number]) at_connect, cg_connect = at_mod.connectivity(cg_residues[residue_number], at_frag_centers, cg_frag_centers, group_fit, group) if 'BB' in group_fit: BB_connect = [] for atom in group_fit['BB']: if group_fit['BB'][atom]['atom'] == f_loc.backbone[resname]['N_ter']: N_ter=atom if group_fit['BB'][atom]['atom'] == f_loc.backbone[resname]['C_ter']: C_ter=atom at_connect, cg_connect, new_chain = BB_connectivity(at_connect,cg_connect, cg_residues, group_fit['BB'], residue_number, N_ter, C_ter) sequence = at_mod.add_to_sequence(sequence, resname, chain_count) backbone_coords[chain_count].append(np.append(cg_residues[residue_number]['BB']['coord'], 1)) if resname == 'CYS' and 'BB' not in group_fit: at_connect, cg_connect, disulphide, disul_at_info, disul_cg_info= find_closest_cysteine(at_connect, cg_connect, cg_residues, group_fit, residue_number) if len(at_connect) == len(cg_connect): xyz_rot_apply=at_mod.rotate(np.array(at_connect)-center, np.array(cg_connect)-center, False) else: print('atom connections: '+str(len(at_connect))+' does not equal CG connections: '+str(len(cg_connect))) sys.exit('residue number: '+str(residue_number)+', residue type: '+str(resname)+', group: '+group) for bead in group_fit: for atom in group_fit[bead]: group_fit[bead][atom]['coord'] = at_mod.rotate_atom(group_fit[bead][atom]['coord'], center, xyz_rot_apply) atom_new = group_fit[bead][atom].copy() coordinates_atomistic[chain_count][residue_number][atom] = atom_new #### if disulphide bond found move the S atoms to within 2 A of each other if 'disulphide' in locals(): if disulphide: coordinates_atomistic[chain_count][residue_number] = shift_sulphur(residue_number, disul_at_info, disul_cg_info, coordinates_atomistic[chain_count], cg_residues) disulphide = False if new_chain: terminal[chain_count].append(f_loc.backbone[resname]['ter']) chain_count+=1 if cg_residue_id+1 != len(cg_residues): backbone_coords[chain_count]=[] coordinates_atomistic[chain_count]={} terminal[chain_count]=[] terminal[chain_count].append(f_loc.backbone[cg_residues[residue_number+1]['BB']['residue_name']]['ter']) sequence[chain_count]=[] if g_var.v >=1: print('\n{0:^15}{1:^12}'.format('chain number', 'length of chain')) # \nchain number\tDelta A\t\tno in pdb\tlength of chain') print('\n{0:^15}{1:^12}'.format('------------', '---------------')) for chain in sequence: print('{0:^15}{1:^12}'.format(chain, len(sequence[chain]))) print() final_coordinates_atomistic = finalise_novo_atomistic(coordinates_atomistic, cg_residues, box_vec) system['terminal_residue']=terminal system['PROTEIN']=chain_count return system, backbone_coords, final_coordinates_atomistic, sequence