def write_posres(chain): #### if not posres file exist create one very_low_posres = posres_header(g_var.working_dir+'PROTEIN/PROTEIN_'+str(chain)+'_very_low_posre.itp') low_posres = posres_header(g_var.working_dir+'PROTEIN/PROTEIN_'+str(chain)+'_low_posre.itp') mid_posres = posres_header(g_var.working_dir+'PROTEIN/PROTEIN_'+str(chain)+'_mid_posre.itp') high_posres = posres_header(g_var.working_dir+'PROTEIN/PROTEIN_'+str(chain)+'_high_posre.itp') very_high_posres = posres_header(g_var.working_dir+'PROTEIN/PROTEIN_'+str(chain)+'_very_high_posre.itp') ultra_posres = posres_header(g_var.working_dir+'PROTEIN/PROTEIN_'+str(chain)+'_ultra_posre.itp') ca_posres = posres_header(g_var.working_dir+'PROTEIN/PROTEIN_'+str(chain)+'_ca_posre.itp') #### read in each chain from after pdb2gmx with open(g_var.working_dir+'PROTEIN/PROTEIN_de_novo_'+str(chain)+'_gmx.pdb', 'r') as pdb_input: at_counter=0 for line in pdb_input.readlines(): if line.startswith('ATOM'): line_sep = gen.pdbatom(line) at_counter+=1 #### if atom is in the restraint list for that residue add to position restraint file if line_sep['atom_name'] == 'CA': ca_posres.write(str(at_counter)+' 1 1000 1000 1000\n') if not gen.is_hydrogen(line_sep['atom_name']): very_low_posres.write(str(at_counter)+' 1 200 200 200\n') low_posres.write(str(at_counter)+' 1 750 750 750\n') mid_posres.write(str(at_counter)+' 1 1500 1500 1500\n') high_posres.write(str(at_counter)+' 1 3000 3000 3000\n') very_high_posres.write(str(at_counter)+' 1 6000 6000 6000\n') ultra_posres.write(str(at_counter)+' 1 10000 10000 10000\n')
def merge_minimised(residue_type, np_system, box_vec): os.chdir(g_var.working_dir+residue_type+'/min') print('Merging individual residues : '+residue_type) #### create merged pdb in min folder pdb_output=gen.create_pdb(g_var.working_dir+residue_type+'/min/'+residue_type+'_merged.pdb', box_vec) if residue_type =='SOL': resid_range=1 else: resid_range=np_system[residue_type] merge,merge_coords=[],[] #### run through every resid for resid in range(resid_range): #### check if it exists merge_temp = [] if os.path.exists(g_var.working_dir+residue_type+'/min/'+residue_type+'_'+str(resid)+'.pdb'): #### read in resid and write straight to merged pdb with open(g_var.working_dir+residue_type+'/min/'+residue_type+'_'+str(resid)+'.pdb', 'r') as pdb_input: for line in pdb_input.readlines(): if line.startswith('ATOM'): line_sep=gen.pdbatom(line) merge_temp.append(line_sep) else: sys.exit('cannot find minimised residue: \n'+ g_var.working_dir+residue_type+'/'+residue_type+'_merged.pdb') merge, merge_coords = at_mod.fix_chirality(merge,merge_temp,merge_coords) if residue_type !='SOL': merge_coords = at_mod.check_atom_overlap(merge_coords) for line_val, line in enumerate(merge): pdb_output.write(g_var.pdbline%((int(line['atom_number']), line['atom_name'], line['residue_name'],' ',line['residue_id'],\ merge_coords[line_val][0],merge_coords[line_val][1],merge_coords[line_val][2],1,0))+'\n') pdb_output.write('TER\nENDMDL') pdb_output.close()
def filter_input(pdb_lines_raw, CG=True): pdb_lines_atoms = [gen.pdbatom(j) for j in pdb_lines_raw if j.startswith('ATOM ')] if len(pdb_lines_atoms) == 0: sys.exit('input coarsegrain structure seems to contain no beads') if CG: box_vec = [j for j in pdb_lines_raw if j.startswith('CRYST')] if len(box_vec) == 0: sys.exit('The input file is missing the Box vectors') return pdb_lines_atoms, box_vec return pdb_lines_atoms
def get_atomistic(frag_location, resname=False): if not resname: resname = frag_location.split('/')[-1][:-4] #### read in atomistic fragments into dictionary residue = { } ## a dictionary of bead in each residue eg residue[group][bead][atom number(1)][residue_name(ASP)/coordinates(coord)/atom name(C)/connectivity(2)/atom_mass(12)] fragment_mass = {} with open(frag_location, 'r') as pdb_input: for line_nr, line in enumerate(pdb_input.readlines()): if line.startswith('['): residue, group, bead = split_fragment_names( line, residue, resname) fragment_mass[bead] = [] if line.startswith('ATOM'): line_sep = gen.pdbatom(line) ## splits up pdb line residue[group][bead][line_sep['atom_number']] = { 'coord': np.array([ line_sep['x'] * g_var.sf, line_sep['y'] * g_var.sf, line_sep['z'] * g_var.sf ]), 'atom': line_sep['atom_name'], 'resid': 1, 'resid_ori': line_sep['residue_id'], 'res_type': line_sep['residue_name'], 'frag_mass': 1 } #### updates fragment mass if not gen.is_hydrogen(line_sep['atom_name']): if line_sep['atom_name'] in g_var.res_top[resname][ 'atom_masses']: residue[group][bead][line_sep['atom_number']][ 'frag_mass'] = g_var.res_top[resname][ 'atom_masses'][line_sep[ 'atom_name']] ### updates atom masses with crude approximations fragment_mass[bead].append([ line_sep['x'] * g_var.sf, line_sep['y'] * g_var.sf, line_sep['z'] * g_var.sf, g_var.res_top[resname] ['atom_masses'][line_sep['atom_name']] ]) else: fragment_mass[bead].append([ line_sep['x'] * g_var.sf, line_sep['y'] * g_var.sf, line_sep['z'] * g_var.sf, 1 ]) return residue, fragment_mass
def write_posres(chain): #### if not posres file exist create one if not os.path.exists(g_var.working_dir+'PROTEIN/PROTEIN_'+str(chain)+'_steered_posre.itp'): posres_output = open(g_var.working_dir+'PROTEIN/PROTEIN_'+str(chain)+'_steered_posre.itp', 'w') posres_output.write('[ position_restraints ]\n; atom type fx fy fz\n') #### read in each chain from after pdb2gmx with open(g_var.working_dir+'PROTEIN/PROTEIN_novo_'+str(chain)+'_gmx.pdb', 'r') as pdb_input: at_counter=0 for line in pdb_input.readlines(): if line.startswith('ATOM'): line_sep = gen.pdbatom(line) at_counter+=1 #### if atom is in the restraint list for that residue add to position restraint file if line_sep['atom_name'] in f_loc.backbone[line_sep['residue_name']]['restraint']: posres_output.write(str(at_counter)+' 1 1000 1000 1000\n')
def read_in_merged_pdbs(merge, merge_coords, location): if os.path.exists(location): #### opens pdb files and writes straight to merged_cg2at pdb with open(location, 'r') as pdb_input: for line in pdb_input.readlines(): if line.startswith('ATOM'): line_sep = gen.pdbatom(line) merge.append(line_sep) merge_coords.append( [line_sep['x'], line_sep['y'], line_sep['z']]) return merge, merge_coords else: sys.exit('cannot find minimised residue: \n' + g_var.working_dir + residue_type + '/' + residue_type + input_type + '_merged.pdb')
def read_in_protein_pdbs(no_chains, file, end): #### reads in each chain into merge list merge, merged_coords = [],[] for chain in range(0,no_chains): merge_temp = [] if os.path.exists(file+'_'+str(chain)+end): with open(file+'_'+str(chain)+end, 'r') as pdb_input: for line in pdb_input.readlines(): if line.startswith('ATOM'): line_sep=gen.pdbatom(line) merge_temp.append(line_sep) else: sys.exit('cannot find minimised protein chain: '+str(chain)) merge, merge_coords = at_mod.fix_chirality(merge,merge_temp,merged_coords) merged_coords = at_mod.check_atom_overlap(merge_coords) merged=[] for line_val, line in enumerate(merge): merged.append(g_var.pdbline%((int(line['atom_number']), line['atom_name'], line['residue_name'],' ',line['residue_id'],\ merged_coords[line_val][0],merged_coords[line_val][1],merged_coords[line_val][2],1,0))+'\n') return merged
def read_initial_cg_pdb(): #### initialisation of dictionaries etc cg_residues = { } ## dictionary of CG beads eg cg_residues[residue type(POPE)][resid(1)][bead name(BB)][residue_name(PO4)/coordinates(coord)] residue_list = { } ## a dictionary of bead in each residue eg residue_list[bead name(BB)][residue_name(PO4)/coordinates(coord)] count = 0 ### residue counter initialisation with open(g_var.input_directory + 'conversion_input.pdb', 'r') as pdb_input: for line in pdb_input.readlines(): #### separates lines if line.startswith('ATOM'): line_sep = gen.pdbatom(line) line_sep['atom_name'], line_sep['residue_name'] = swap( line_sep['atom_name'], line_sep['residue_name'], line_sep['residue_id']) if line_sep['atom_name'].upper( ) != 'SKIP' or line_sep['residue_name'].upper() != 'SKIP': #### set up resnames in dictionaries cg_residues = add_residue_to_dictionary( cg_residues, line_sep) #### sets up previous resid id if 'residue_prev' not in locals(): residue_prev = line_sep['residue_id'] #### if resid the same as previous line if residue_prev == line_sep[ 'residue_id']: ### if resid is the same as the previous line, it adds resname and coordinates to the atom name key in residue_list residue_list[line_sep['atom_name']] = { 'residue_name': line_sep['residue_name'], 'coord': np.array( [line_sep['x'], line_sep['y'], line_sep['z']]) } line_sep_prev = line_sep.copy() #### if resids are different then the residue list is added to cg_residues else: if line_sep_prev[ 'residue_name'] not in f_loc.p_residues: cg_residues[line_sep_prev['residue_name']][count] = { } ### then create sub dictionary cg_residues[resname][count] cg_residues[line_sep_prev['residue_name']][ count] = residue_list ### adds residue list to dictionary key cg_residues[resname][count] if line_sep_prev['residue_name'] == 'ION': cg_residues['SOL'][count] = {} sol_res_list = {} sol_res_list[f_loc.water] = residue_list[ line_sep_prev['atom_name']].copy() sol_res_list[ f_loc.water]['residue_name'] = 'SOL' cg_residues['SOL'][count] = sol_res_list else: for bead in residue_list: if bead.startswith('B'): residue_list['BB'] = residue_list.pop(bead) cg_residues['PROTEIN'][count] = { } ### then create sub dictionary cg_residues['PROTEIN'][count] cg_residues['PROTEIN'][ count] = residue_list ### adds residue list to dictionary key cg_residues['PROTEIN'][count] #### updates dictionaries and counters residue_list = {} ### resets residue list count += 1 ### moves counter along to next residue residue_list[line_sep['atom_name']] = { 'residue_name': line_sep['residue_name'], 'coord': np.array( [line_sep['x'], line_sep['y'], line_sep['z']]) } ### it adds resname and coordinates to the atom name key in residue_list residue_prev = line_sep[ 'residue_id'] ### updates residue_prev with new resid line_sep_prev = line_sep.copy() #### finds box vectors if line.startswith('CRYST'): ### collects box vectors from pdb box_vec = line #### adds final residue to cg_residues in the same manner as above if line_sep['residue_name'] in f_loc.p_residues: if count not in cg_residues['PROTEIN']: cg_residues['PROTEIN'][count] = {} cg_residues['PROTEIN'][count] = residue_list else: if count not in cg_residues[line_sep['residue_name']]: cg_residues[line_sep['residue_name']][count] = {} cg_residues[line_sep['residue_name']][count] = residue_list if line_sep['residue_name'] == 'ION': cg_residues['SOL'][count] = {} sol_res_list = {} sol_res_list[f_loc.water] = residue_list[ line_sep['atom_name']].copy() sol_res_list[f_loc.water]['residue_name'] = 'SOL' cg_residues['SOL'][count] = sol_res_list #### checks if box vectors exist if 'box_vec' not in locals( ): ### stops script if it cannot find box vectors sys.exit('missing box vectors') return cg_residues, box_vec
def read_initial_at_pdb(): at_residues = { } ## dictionary of CG beads eg cg_residues[residue type(POPE)][resid(1)][bead name(BB)][residue_name(PO4)/coordinates(coord)] residue_list = { } ## a dictionary of bead in each residue eg residue_list[bead name(BB)][residue_name(PO4)/coordinates(coord)] count = 0 ### residue counter initialisation with open(g_var.input_directory + 'conversion_input.pdb', 'r') as pdb_input: for line in pdb_input.readlines(): if line.startswith('ATOM'): line_sep = gen.pdbatom(line) line_sep['atom_name'], line_sep['residue_name'] = swap( line_sep['atom_name'], line_sep['residue_name'], line_sep['residue_id']) if line_sep['atom_name'].upper( ) != 'SKIP' or line_sep['residue_name'].upper() != 'SKIP': if line_sep['residue_name'] in f_loc.p_residues: if 'PROTEIN' not in at_residues: ## if protein does not exist add to dict at_residues['PROTEIN'] = {} else: if line_sep['residue_name'] not in at_residues: at_residues[line_sep['residue_name']] = {} if 'residue_prev' not in locals(): residue_prev = line_sep['residue_id'] #### if resid the same as previous line if residue_prev == line_sep[ 'residue_id']: ### if resid is the same as the previous line, it adds resname and coordinates to the atom name key in residue_list residue_list[line_sep['atom_name']] = { 'residue_name': line_sep['residue_name'], 'coord': np.array( [line_sep['x'], line_sep['y'], line_sep['z']]) } line_sep_prev = line_sep.copy() else: if line_sep_prev[ 'residue_name'] not in f_loc.p_residues: at_residues[line_sep_prev['residue_name']][count] = { } ### then create sub dictionary cg_residues[resname][count] at_residues[line_sep_prev['residue_name']][ count] = residue_list else: at_residues['PROTEIN'][count] = { } ### then create sub dictionary cg_residues['PROTEIN'][count] at_residues['PROTEIN'][ count] = residue_list ### adds residue list to dictionary key cg_residues['PROTEIN'][count] #### updates dictionaries and counters residue_list = {} ### resets residue list count += 1 ### moves counter along to next residue residue_list[line_sep['atom_name']] = { 'residue_name': line_sep['residue_name'], 'coord': np.array( [line_sep['x'], line_sep['y'], line_sep['z']]) } ### it adds resname and coordinates to the atom name key in residue_list residue_prev = line_sep[ 'residue_id'] ### updates residue_prev with new resid line_sep_prev = line_sep.copy() #### finds box vectors if line.startswith('CRYST'): ### collects box vectors from pdb box_vec = line #### adds final residue to cg_residues in the same manner as above if line_sep['residue_name'] in f_loc.p_residues: if count not in at_residues['PROTEIN']: at_residues['PROTEIN'][count] = {} at_residues['PROTEIN'][count] = residue_list else: if count not in at_residues[line_sep['residue_name']]: at_residues[line_sep['residue_name']][count] = {} at_residues[line_sep['residue_name']][count] = residue_list if 'box_vec' not in locals( ): ### stops script if it cannot find box vectors sys.exit('missing box vectors') return at_residues, box_vec
def read_in_atomistic(protein, cg_chain_count, sequence, check_alignment): #### reset location and check if pdb exists os.chdir(g_var.start_dir) if not os.path.exists(protein): sys.exit('cannot find atomistic protein : '+protein) #### read in atomistic fragments into dictionary residue_list[0]=x,y,z,atom_name atomistic_protein_input={} chain_count=0 #### read in pdb ter_residues=[] with open(protein, 'r') as pdb_input: atomistic_protein_input[chain_count]={} for line_nr, line in enumerate(pdb_input.readlines()): #### separate line run=False ## turns to true is line is a bead/atom if line.startswith('ATOM'): line_sep = gen.pdbatom(line) # print(line_sep['atom_name']) if line_sep['residue_name'] in f_loc.mod_residues: run=True elif str.isdigit(line_sep['atom_name'][0]) and line_sep['atom_name'][1] != 'H': run=True elif not str.isdigit(line_sep['atom_name'][0]) and not line_sep['atom_name'].startswith('H'): run=True #### if line is correct if run: if line_sep['residue_name'] in f_loc.p_residues or line_sep['residue_name'] in f_loc.mod_residues: if not line_sep['atom_name'].startswith('H') or line_sep['residue_name'] in f_loc.mod_residues: #### sorts out wrong atoms in terminal residues if line_sep['atom_name'] in ['OT', 'O1', 'O2']: line_sep['atom_name']='O' #### makes C_terminal connecting atom variable if line_sep['atom_name'] == f_loc.backbone[line_sep['residue_name']]['C_ter']: C_ter=[line_sep['x'],line_sep['y'],line_sep['z']] C_resid=line_sep['residue_id'] C=True try: #### tries to make a N_terminal connecting atom variable if line_sep['atom_name'] == f_loc.backbone[line_sep['residue_name']]['N_ter']: N_resid=line_sep['residue_id'] N_ter=[line_sep['x'],line_sep['y'],line_sep['z']] N=True #### measures distance between N and C atoms. if the bond is over 3 A it counts as a new protein dist=gen.calculate_distance(N_ter, C_ter) if N and C and C_resid != N_resid and dist > 3.5:# and aas[line_sep['residue_name']] != sequence[chain_count][line_sep['residue_id']]: N_ter, C_ter=False, False ter_residues.append(line_sep['residue_id']) chain_count+=1 atomistic_protein_input[chain_count]={} ### new chain key except: pass if line_sep['residue_id'] not in atomistic_protein_input[chain_count]: ## if protein does not exist add to dict atomistic_protein_input[chain_count][line_sep['residue_id']]={} #### adds atom to dictionary, every atom is given a initial mass of zero atomistic_protein_input[chain_count][line_sep['residue_id']][line_sep['atom_number']]={'coord':np.array([line_sep['x'],line_sep['y'],line_sep['z']]),'atom':line_sep['atom_name'], 'res_type':line_sep['residue_name'],'frag_mass':0, 'resid':line_sep['residue_id']} #### if atom is in the backbone list then its mass is updated to the correct one if line_sep['atom_name'] in f_loc.backbone[line_sep['residue_name']]['atoms']: for atom in line_sep['atom_name']: if atom in g_var.mass: atomistic_protein_input[chain_count][line_sep['residue_id']][line_sep['atom_number']]['frag_mass']=g_var.mass[atom] else: if check_alignment: sys.exit('The residue '+line_sep['residue_name']+' does not exist in the fragment database') if check_alignment: seq_user = check_sequence(atomistic_protein_input, chain_count+1) atomistic_protein_input = align_chains(atomistic_protein_input, seq_user, sequence) #### check if number of monomers is the same elif chain_count+1 != cg_chain_count: sys.exit('number of chains in atomistic protein input ('+str(chain_count+1)+') does not match CG representation ('+str(cg_chain_count)+')') return atomistic_protein_input