Example #1
0
File: gro.py Project: IAlibay/cg2at
def write_posres(chain):
#### if not posres file exist create one
    very_low_posres = posres_header(g_var.working_dir+'PROTEIN/PROTEIN_'+str(chain)+'_very_low_posre.itp')
    low_posres = posres_header(g_var.working_dir+'PROTEIN/PROTEIN_'+str(chain)+'_low_posre.itp')
    mid_posres = posres_header(g_var.working_dir+'PROTEIN/PROTEIN_'+str(chain)+'_mid_posre.itp')
    high_posres = posres_header(g_var.working_dir+'PROTEIN/PROTEIN_'+str(chain)+'_high_posre.itp')
    very_high_posres = posres_header(g_var.working_dir+'PROTEIN/PROTEIN_'+str(chain)+'_very_high_posre.itp')
    ultra_posres = posres_header(g_var.working_dir+'PROTEIN/PROTEIN_'+str(chain)+'_ultra_posre.itp')
    ca_posres = posres_header(g_var.working_dir+'PROTEIN/PROTEIN_'+str(chain)+'_ca_posre.itp')
    #### read in each chain from after pdb2gmx 
    with open(g_var.working_dir+'PROTEIN/PROTEIN_de_novo_'+str(chain)+'_gmx.pdb', 'r') as pdb_input:
        at_counter=0
        for line in pdb_input.readlines():
            if line.startswith('ATOM'):
                line_sep = gen.pdbatom(line)
                at_counter+=1
            #### if atom is in the restraint list for that residue add to position restraint file
                if line_sep['atom_name'] == 'CA':
                    ca_posres.write(str(at_counter)+'     1  1000  1000  1000\n')
                if not gen.is_hydrogen(line_sep['atom_name']):
                    very_low_posres.write(str(at_counter)+'     1  200  200  200\n')
                    low_posres.write(str(at_counter)+'     1  750  750  750\n')
                    mid_posres.write(str(at_counter)+'     1  1500  1500  1500\n')
                    high_posres.write(str(at_counter)+'     1  3000  3000  3000\n')
                    very_high_posres.write(str(at_counter)+'     1  6000  6000  6000\n')
                    ultra_posres.write(str(at_counter)+'     1  10000  10000  10000\n')
Example #2
0
def merge_minimised(residue_type, np_system, box_vec):
    os.chdir(g_var.working_dir+residue_type+'/min')
    print('Merging individual residues : '+residue_type)
#### create merged pdb in min folder
    pdb_output=gen.create_pdb(g_var.working_dir+residue_type+'/min/'+residue_type+'_merged.pdb', box_vec)  
    if residue_type =='SOL':
        resid_range=1
    else:
        resid_range=np_system[residue_type]
    merge,merge_coords=[],[]
#### run through every resid 
    for resid in range(resid_range):
    #### check if it exists
        merge_temp = []
        if os.path.exists(g_var.working_dir+residue_type+'/min/'+residue_type+'_'+str(resid)+'.pdb'):
        #### read in resid and write straight to merged pdb
            with open(g_var.working_dir+residue_type+'/min/'+residue_type+'_'+str(resid)+'.pdb', 'r') as pdb_input:
                for line in pdb_input.readlines():
                    if line.startswith('ATOM'):
                        line_sep=gen.pdbatom(line)
                        merge_temp.append(line_sep)
        else:
            sys.exit('cannot find minimised residue: \n'+ g_var.working_dir+residue_type+'/'+residue_type+'_merged.pdb')
        merge, merge_coords = at_mod.fix_chirality(merge,merge_temp,merge_coords)    
    if residue_type !='SOL':
        merge_coords = at_mod.check_atom_overlap(merge_coords)
    for line_val, line in enumerate(merge):
        pdb_output.write(g_var.pdbline%((int(line['atom_number']), line['atom_name'], line['residue_name'],' ',line['residue_id'],\
            merge_coords[line_val][0],merge_coords[line_val][1],merge_coords[line_val][2],1,0))+'\n')
    pdb_output.write('TER\nENDMDL')
    pdb_output.close()
Example #3
0
def filter_input(pdb_lines_raw, CG=True):
    pdb_lines_atoms = [gen.pdbatom(j) for j in pdb_lines_raw if j.startswith('ATOM ')] 
    if len(pdb_lines_atoms) == 0:
        sys.exit('input coarsegrain structure seems to contain no beads')
    if CG:
        box_vec =  [j for j in pdb_lines_raw if j.startswith('CRYST')]
        if len(box_vec) == 0:
            sys.exit('The input file is missing the Box vectors')
        return pdb_lines_atoms, box_vec
    return pdb_lines_atoms
Example #4
0
def get_atomistic(frag_location, resname=False):
    if not resname:
        resname = frag_location.split('/')[-1][:-4]


#### read in atomistic fragments into dictionary
    residue = {
    }  ## a dictionary of bead in each residue eg residue[group][bead][atom number(1)][residue_name(ASP)/coordinates(coord)/atom name(C)/connectivity(2)/atom_mass(12)]
    fragment_mass = {}
    with open(frag_location, 'r') as pdb_input:
        for line_nr, line in enumerate(pdb_input.readlines()):
            if line.startswith('['):
                residue, group, bead = split_fragment_names(
                    line, residue, resname)
                fragment_mass[bead] = []
            if line.startswith('ATOM'):
                line_sep = gen.pdbatom(line)  ## splits up pdb line
                residue[group][bead][line_sep['atom_number']] = {
                    'coord':
                    np.array([
                        line_sep['x'] * g_var.sf, line_sep['y'] * g_var.sf,
                        line_sep['z'] * g_var.sf
                    ]),
                    'atom':
                    line_sep['atom_name'],
                    'resid':
                    1,
                    'resid_ori':
                    line_sep['residue_id'],
                    'res_type':
                    line_sep['residue_name'],
                    'frag_mass':
                    1
                }
                #### updates fragment mass
                if not gen.is_hydrogen(line_sep['atom_name']):
                    if line_sep['atom_name'] in g_var.res_top[resname][
                            'atom_masses']:
                        residue[group][bead][line_sep['atom_number']][
                            'frag_mass'] = g_var.res_top[resname][
                                'atom_masses'][line_sep[
                                    'atom_name']]  ### updates atom masses with crude approximations
                        fragment_mass[bead].append([
                            line_sep['x'] * g_var.sf, line_sep['y'] * g_var.sf,
                            line_sep['z'] * g_var.sf, g_var.res_top[resname]
                            ['atom_masses'][line_sep['atom_name']]
                        ])
                else:
                    fragment_mass[bead].append([
                        line_sep['x'] * g_var.sf, line_sep['y'] * g_var.sf,
                        line_sep['z'] * g_var.sf, 1
                    ])
    return residue, fragment_mass
Example #5
0
def write_posres(chain):
#### if not posres file exist create one
    if not os.path.exists(g_var.working_dir+'PROTEIN/PROTEIN_'+str(chain)+'_steered_posre.itp'):
        posres_output = open(g_var.working_dir+'PROTEIN/PROTEIN_'+str(chain)+'_steered_posre.itp', 'w')
        posres_output.write('[ position_restraints ]\n; atom  type      fx      fy      fz\n')
    #### read in each chain from after pdb2gmx 
        with open(g_var.working_dir+'PROTEIN/PROTEIN_novo_'+str(chain)+'_gmx.pdb', 'r') as pdb_input:
            at_counter=0
            for line in pdb_input.readlines():
                if line.startswith('ATOM'):
                    line_sep = gen.pdbatom(line)
                    at_counter+=1
                #### if atom is in the restraint list for that residue add to position restraint file
                    if line_sep['atom_name'] in f_loc.backbone[line_sep['residue_name']]['restraint']:
                        posres_output.write(str(at_counter)+'     1  1000  1000  1000\n')
Example #6
0
def read_in_merged_pdbs(merge, merge_coords, location):
    if os.path.exists(location):
        #### opens pdb files and writes straight to merged_cg2at pdb
        with open(location, 'r') as pdb_input:
            for line in pdb_input.readlines():
                if line.startswith('ATOM'):
                    line_sep = gen.pdbatom(line)
                    merge.append(line_sep)
                    merge_coords.append(
                        [line_sep['x'], line_sep['y'], line_sep['z']])
        return merge, merge_coords
    else:
        sys.exit('cannot find minimised residue: \n' + g_var.working_dir +
                 residue_type + '/' + residue_type + input_type +
                 '_merged.pdb')
Example #7
0
def read_in_protein_pdbs(no_chains, file, end):
#### reads in each chain into merge list
    merge, merged_coords = [],[]
    for chain in range(0,no_chains):
        merge_temp = []
        if os.path.exists(file+'_'+str(chain)+end):
            with open(file+'_'+str(chain)+end, 'r') as pdb_input:
                for line in pdb_input.readlines():
                    if line.startswith('ATOM'):
                        line_sep=gen.pdbatom(line)
                        merge_temp.append(line_sep)
        else:
            sys.exit('cannot find minimised protein chain: '+str(chain)) 
        merge, merge_coords = at_mod.fix_chirality(merge,merge_temp,merged_coords)    
    merged_coords = at_mod.check_atom_overlap(merge_coords)
    merged=[]
    for line_val, line in enumerate(merge):
        merged.append(g_var.pdbline%((int(line['atom_number']), line['atom_name'], line['residue_name'],' ',line['residue_id'],\
            merged_coords[line_val][0],merged_coords[line_val][1],merged_coords[line_val][2],1,0))+'\n')
    return merged
Example #8
0
def read_initial_cg_pdb():
    #### initialisation of dictionaries etc
    cg_residues = {
    }  ## dictionary of CG beads eg cg_residues[residue type(POPE)][resid(1)][bead name(BB)][residue_name(PO4)/coordinates(coord)]
    residue_list = {
    }  ## a dictionary of bead in each residue eg residue_list[bead name(BB)][residue_name(PO4)/coordinates(coord)]
    count = 0  ### residue counter initialisation
    with open(g_var.input_directory + 'conversion_input.pdb',
              'r') as pdb_input:
        for line in pdb_input.readlines():
            #### separates lines
            if line.startswith('ATOM'):
                line_sep = gen.pdbatom(line)
                line_sep['atom_name'], line_sep['residue_name'] = swap(
                    line_sep['atom_name'], line_sep['residue_name'],
                    line_sep['residue_id'])
                if line_sep['atom_name'].upper(
                ) != 'SKIP' or line_sep['residue_name'].upper() != 'SKIP':
                    #### set up resnames in dictionaries
                    cg_residues = add_residue_to_dictionary(
                        cg_residues, line_sep)
                    #### sets up previous resid id
                    if 'residue_prev' not in locals():
                        residue_prev = line_sep['residue_id']
    #### if resid the same as previous line
                    if residue_prev == line_sep[
                            'residue_id']:  ### if resid is the same as the previous line, it adds resname and coordinates to the atom name key in residue_list
                        residue_list[line_sep['atom_name']] = {
                            'residue_name':
                            line_sep['residue_name'],
                            'coord':
                            np.array(
                                [line_sep['x'], line_sep['y'], line_sep['z']])
                        }
                        line_sep_prev = line_sep.copy()
    #### if resids are different then the residue list is added to cg_residues
                    else:
                        if line_sep_prev[
                                'residue_name'] not in f_loc.p_residues:
                            cg_residues[line_sep_prev['residue_name']][count] = {
                            }  ### then create sub dictionary cg_residues[resname][count]
                            cg_residues[line_sep_prev['residue_name']][
                                count] = residue_list  ### adds residue list to dictionary key cg_residues[resname][count]
                            if line_sep_prev['residue_name'] == 'ION':
                                cg_residues['SOL'][count] = {}
                                sol_res_list = {}

                                sol_res_list[f_loc.water] = residue_list[
                                    line_sep_prev['atom_name']].copy()
                                sol_res_list[
                                    f_loc.water]['residue_name'] = 'SOL'
                                cg_residues['SOL'][count] = sol_res_list
                        else:
                            for bead in residue_list:
                                if bead.startswith('B'):
                                    residue_list['BB'] = residue_list.pop(bead)
                            cg_residues['PROTEIN'][count] = {
                            }  ### then create sub dictionary cg_residues['PROTEIN'][count]
                            cg_residues['PROTEIN'][
                                count] = residue_list  ### adds residue list to dictionary key cg_residues['PROTEIN'][count]
    #### updates dictionaries and counters
                        residue_list = {}  ### resets residue list
                        count += 1  ### moves counter along to next residue
                        residue_list[line_sep['atom_name']] = {
                            'residue_name':
                            line_sep['residue_name'],
                            'coord':
                            np.array(
                                [line_sep['x'], line_sep['y'], line_sep['z']])
                        }  ### it adds resname and coordinates to the atom name key in residue_list
                        residue_prev = line_sep[
                            'residue_id']  ### updates residue_prev with new resid
                        line_sep_prev = line_sep.copy()
#### finds box vectors
            if line.startswith('CRYST'):  ### collects box vectors from pdb
                box_vec = line
#### adds final residue to cg_residues in the same manner as above
    if line_sep['residue_name'] in f_loc.p_residues:
        if count not in cg_residues['PROTEIN']:
            cg_residues['PROTEIN'][count] = {}
        cg_residues['PROTEIN'][count] = residue_list
    else:
        if count not in cg_residues[line_sep['residue_name']]:
            cg_residues[line_sep['residue_name']][count] = {}
        cg_residues[line_sep['residue_name']][count] = residue_list
        if line_sep['residue_name'] == 'ION':
            cg_residues['SOL'][count] = {}
            sol_res_list = {}
            sol_res_list[f_loc.water] = residue_list[
                line_sep['atom_name']].copy()
            sol_res_list[f_loc.water]['residue_name'] = 'SOL'
            cg_residues['SOL'][count] = sol_res_list


#### checks if box vectors exist
    if 'box_vec' not in locals(
    ):  ### stops script if it cannot find box vectors
        sys.exit('missing box vectors')

    return cg_residues, box_vec
Example #9
0
def read_initial_at_pdb():
    at_residues = {
    }  ## dictionary of CG beads eg cg_residues[residue type(POPE)][resid(1)][bead name(BB)][residue_name(PO4)/coordinates(coord)]
    residue_list = {
    }  ## a dictionary of bead in each residue eg residue_list[bead name(BB)][residue_name(PO4)/coordinates(coord)]
    count = 0  ### residue counter initialisation
    with open(g_var.input_directory + 'conversion_input.pdb',
              'r') as pdb_input:
        for line in pdb_input.readlines():
            if line.startswith('ATOM'):
                line_sep = gen.pdbatom(line)
                line_sep['atom_name'], line_sep['residue_name'] = swap(
                    line_sep['atom_name'], line_sep['residue_name'],
                    line_sep['residue_id'])
                if line_sep['atom_name'].upper(
                ) != 'SKIP' or line_sep['residue_name'].upper() != 'SKIP':
                    if line_sep['residue_name'] in f_loc.p_residues:
                        if 'PROTEIN' not in at_residues:  ## if protein does not exist add to dict
                            at_residues['PROTEIN'] = {}
                    else:
                        if line_sep['residue_name'] not in at_residues:
                            at_residues[line_sep['residue_name']] = {}
                    if 'residue_prev' not in locals():
                        residue_prev = line_sep['residue_id']
        #### if resid the same as previous line
                    if residue_prev == line_sep[
                            'residue_id']:  ### if resid is the same as the previous line, it adds resname and coordinates to the atom name key in residue_list
                        residue_list[line_sep['atom_name']] = {
                            'residue_name':
                            line_sep['residue_name'],
                            'coord':
                            np.array(
                                [line_sep['x'], line_sep['y'], line_sep['z']])
                        }
                        line_sep_prev = line_sep.copy()
                    else:
                        if line_sep_prev[
                                'residue_name'] not in f_loc.p_residues:
                            at_residues[line_sep_prev['residue_name']][count] = {
                            }  ### then create sub dictionary cg_residues[resname][count]
                            at_residues[line_sep_prev['residue_name']][
                                count] = residue_list
                        else:
                            at_residues['PROTEIN'][count] = {
                            }  ### then create sub dictionary cg_residues['PROTEIN'][count]
                            at_residues['PROTEIN'][
                                count] = residue_list  ### adds residue list to dictionary key cg_residues['PROTEIN'][count]
    #### updates dictionaries and counters
                        residue_list = {}  ### resets residue list
                        count += 1  ### moves counter along to next residue
                        residue_list[line_sep['atom_name']] = {
                            'residue_name':
                            line_sep['residue_name'],
                            'coord':
                            np.array(
                                [line_sep['x'], line_sep['y'], line_sep['z']])
                        }  ### it adds resname and coordinates to the atom name key in residue_list
                        residue_prev = line_sep[
                            'residue_id']  ### updates residue_prev with new resid
                        line_sep_prev = line_sep.copy()
#### finds box vectors
            if line.startswith('CRYST'):  ### collects box vectors from pdb
                box_vec = line
#### adds final residue to cg_residues in the same manner as above
    if line_sep['residue_name'] in f_loc.p_residues:
        if count not in at_residues['PROTEIN']:
            at_residues['PROTEIN'][count] = {}
        at_residues['PROTEIN'][count] = residue_list
    else:
        if count not in at_residues[line_sep['residue_name']]:
            at_residues[line_sep['residue_name']][count] = {}
        at_residues[line_sep['residue_name']][count] = residue_list
    if 'box_vec' not in locals(
    ):  ### stops script if it cannot find box vectors
        sys.exit('missing box vectors')
    return at_residues, box_vec
Example #10
0
def read_in_atomistic(protein, cg_chain_count, sequence, check_alignment):
#### reset location and check if pdb exists  
    os.chdir(g_var.start_dir)
    if not os.path.exists(protein):
        sys.exit('cannot find atomistic protein : '+protein)
#### read in atomistic fragments into dictionary residue_list[0]=x,y,z,atom_name    
    atomistic_protein_input={}
    chain_count=0
#### read in pdb
    ter_residues=[]
    with open(protein, 'r') as pdb_input:
        atomistic_protein_input[chain_count]={}
        for line_nr, line in enumerate(pdb_input.readlines()):
            #### separate line 
            run=False ## turns to true is line is a bead/atom
            if line.startswith('ATOM'):
                line_sep = gen.pdbatom(line)
                # print(line_sep['atom_name'])
                if line_sep['residue_name'] in f_loc.mod_residues:
                    run=True
                elif str.isdigit(line_sep['atom_name'][0]) and line_sep['atom_name'][1] != 'H':
                    run=True
                elif not str.isdigit(line_sep['atom_name'][0]) and not line_sep['atom_name'].startswith('H'):
                    run=True
               
            #### if line is correct
            if run:
                if line_sep['residue_name'] in f_loc.p_residues or line_sep['residue_name'] in f_loc.mod_residues:
                    
                    if not line_sep['atom_name'].startswith('H') or line_sep['residue_name'] in f_loc.mod_residues:  
                    #### sorts out wrong atoms in terminal residues
                        if line_sep['atom_name'] in ['OT', 'O1', 'O2']:
                            line_sep['atom_name']='O'
                    #### makes C_terminal connecting atom variable  
                        if line_sep['atom_name'] == f_loc.backbone[line_sep['residue_name']]['C_ter']:
                            C_ter=[line_sep['x'],line_sep['y'],line_sep['z']]
                            C_resid=line_sep['residue_id']
                            C=True
                        try:
                        #### tries to make a N_terminal connecting atom variable
                            if line_sep['atom_name'] == f_loc.backbone[line_sep['residue_name']]['N_ter']:
                                N_resid=line_sep['residue_id']
                                N_ter=[line_sep['x'],line_sep['y'],line_sep['z']]
                                N=True
                        #### measures distance between N and C atoms. if the bond is over 3 A it counts as a new protein
                            dist=gen.calculate_distance(N_ter, C_ter)
                            if N and C and C_resid != N_resid and dist > 3.5:# and aas[line_sep['residue_name']] != sequence[chain_count][line_sep['residue_id']]:
                                N_ter, C_ter=False, False
                                ter_residues.append(line_sep['residue_id'])
                                chain_count+=1
                                atomistic_protein_input[chain_count]={} ### new chain key
                        except:
                            pass
                        if line_sep['residue_id'] not in atomistic_protein_input[chain_count]:  ## if protein does not exist add to dict
                            atomistic_protein_input[chain_count][line_sep['residue_id']]={}
                    #### adds atom to dictionary, every atom is given a initial mass of zero 
                        atomistic_protein_input[chain_count][line_sep['residue_id']][line_sep['atom_number']]={'coord':np.array([line_sep['x'],line_sep['y'],line_sep['z']]),'atom':line_sep['atom_name'], 'res_type':line_sep['residue_name'],'frag_mass':0, 'resid':line_sep['residue_id']}
                    #### if atom is in the backbone list then its mass is updated to the correct one
                        if line_sep['atom_name'] in f_loc.backbone[line_sep['residue_name']]['atoms']:
                            for atom in line_sep['atom_name']:
                                if atom in g_var.mass:
                                    atomistic_protein_input[chain_count][line_sep['residue_id']][line_sep['atom_number']]['frag_mass']=g_var.mass[atom]
                else:
                    if check_alignment:
                        sys.exit('The residue '+line_sep['residue_name']+' does not exist in the fragment database')
    if check_alignment:
        seq_user = check_sequence(atomistic_protein_input, chain_count+1)
        atomistic_protein_input = align_chains(atomistic_protein_input, seq_user, sequence)
#### check if number of monomers is the same
    elif chain_count+1 != cg_chain_count:
        sys.exit('number of chains in atomistic protein input ('+str(chain_count+1)+') does not match CG representation ('+str(cg_chain_count)+')')
    return atomistic_protein_input