Beispiel #1
0
    def get_structure(self):
        """Get the pdb structure of the molecule."""

        # we can have a str or a list of bytes as input
        if isinstance(self.pdb_data, str):
            self.complex = freesasa.Structure(self.pdb_data)
        else:
            self.complex = freesasa.Structure()
            atomdata = self.sql.get('name,resName,resSeq,chainID,x,y,z')
            for atomName, residueName, residueNumber, chainLabel, x, y, z in atomdata:
                atomName = '{:>2}'.format(atomName[0])
                self.complex.addAtom(atomName, residueName, residueNumber,
                                     chainLabel, x, y, z)
        self.result_complex = freesasa.calc(self.complex)

        self.chains = {}
        self.result_chains = {}
        for label in self.chains_label:
            self.chains[label] = freesasa.Structure()
            atomdata = self.sql.get('name,resName,resSeq,chainID,x,y,z',
                                    chainID=label)
            for atomName, residueName, residueNumber, chainLabel, x, y, z in atomdata:
                atomName = '{:>2}'.format(atomName[0])
                self.chains[label].addAtom(atomName, residueName,
                                           residueNumber, chainLabel, x, y, z)
            self.result_chains[label] = freesasa.calc(self.chains[label])
Beispiel #2
0
def featurize(structure: Structure) -> list[Any]:
    """
    Calculates 3D ML features from the `structure`.
    """
    structure1 = freesasa.Structure(pdbpath)
    result = freesasa.calc(structure1)
    area_classes = freesasa.classifyResults(result, structure1)

    Total_area = []
    Total_area.append(result.totalArea())

    Polar_Apolar = []

    for key in area_classes:
        # print( key, ": %.2f A2" % area_classes[key])
        Polar_Apolar.append(area_classes[key])
    # get all the residues
    residues = [res for res in structure.get_residues()]
    seq_length = []
    seq_length.append(len(residues))
    # calculate some random 3D features (you should be smarter here!)
    protein_length = residues[1]["CA"] - residues[-2]["CA"]
    angle = calc_dihedral(
        residues[1]["CA"].get_vector(),
        residues[2]["CA"].get_vector(),
        residues[-3]["CA"].get_vector(),
        residues[-2]["CA"].get_vector(),
    )
    # create the feature vector
    features = [Total_area, Polar_Apolar, protein_length, seq_length, angle]

    return features
Beispiel #3
0
def get_area(this_run,basename):
    
    path_dictionary=setup_paths()
    outpath = path_dictionary["pdb_path"] + basename + '.pdb'
    print('getting area')
    # convert to pdb
    obConversion = openbabel.OBConversion()
    obConversion.SetInFormat("xyz")
    obConversion.SetOutFormat("pdb")
    OBMol = openbabel.OBMol()
    obConversion.ReadFile(OBMol, this_run.init_geopath)
    obConversion.WriteFile(OBMol, outpath)

    # measure free SA
    dc = DerivedClassifierT()
    myopt = {'halt-at-unknown': False,
     'hetatm': True,
     'hydrogen': True,
     'join-models': False,
     'skip-unknown': False}
    structure = freesasa.Structure(outpath,classifier = dc, options = myopt)
    structure.setRadiiWithClassifier(dc)

    result = freesasa.calc(structure).totalArea()
    this_run.area = result
Beispiel #4
0
    def run(self, pdb):
        """Run freesasa on provided PDB file

        Parameters
        ----------

        pdb: str
            Path to input PDB file

        Returns
        -------
        list
            SASA values for each atom of every model in the input PDB.

        """

        structure_array = freesasa.structureArray(bytes(pdb, 'utf-8'),
                                                  options=self.options,
                                                  classifier=self.classifier)

        results = []

        for s in structure_array:
            print('Computing SASA for each model/frame')
            result = freesasa.calc(s)
            atom_areas = [result.atomArea(ndx) for ndx in range(s.nAtoms())]
            results.append(atom_areas)

        return results
Beispiel #5
0
def sa_calc(polymer_pdb, radius):
    # pdb files are needed for calculation surface area
    mol_file = Chem.MolFromMolFile(polymer_pdb)
    # hydrogens are removed in the mol file
    pdb_file = Chem.AddHs(mol_file, addCoords = True)
    # convert mol file to pdb file in rdkit
    Chem.MolToPDBFile(pdb_file, out_dir+NAME+'_new.pdb')

	# hydrogens are removed in the default option
    option_with_Hs =  {    'hetatm' : True,
                           'hydrogen' : True,
                           'join-models' : False,
                           'skip-unknown' : False,
                           'halt-at-unknown' : False    }

    # calculate solvent accessible surface area(probe radius = 1.4 Å or 3.6 Å)
    para = freesasa.Parameters()
    freesasa.Parameters.setProbeRadius(para, radius)
    # calculate sa for different type of polymers
    free_struct = freesasa.Structure(out_dir+NAME+'_new.pdb', options = option_with_Hs)
    free_calc = freesasa.calc(free_struct, para)
    total = free_calc.totalArea()
    # round to 4 decimals
    decimal = round(total, 4)
    print (f'Total SASA is {decimal} Å^2 when probe radius is {radius} Å.')
    atom_number = mol_file.GetNumAtoms()
    normalized_sa = round(decimal / atom_number, 4)

    # save data to a txt file
    with open (out_dir + 'Average surface area.txt', 'a+') as Asa:
       Asa.write(f'The normalized surface area of {NAME} is ' + str(normalized_sa) + ' Å^2 with the probe size of ' + str(radius) + 'Å.\n'
        )
    print ('Nomalized solvent accessible surface area is '+ str(normalized_sa) + ' Å^2 with the probe size of ' + str(radius) + 'Å.\n')
def calculate_sasa(pdb_file):
    # Read PDB structure
    atoms, residues, chains = parse_complex_from_file(pdb_file)
    molecule = Complex(chains, atoms, structure_file_name=pdb_file)

    parameters = DesolvationParameters()

    # Lightdock structure to freesasa structure
    structure = Structure()
    for atom in molecule.atoms:
        structure.addAtom(atom.name, atom.residue_name, atom.residue_number,
                          atom.chain_id, atom.x, atom.y, atom.z)

    atom_names = []
    atom_radius = []
    for atom in molecule.atoms:
        atom_names.append("%-4s" % atom.name)
        if atom.residue_name == 'CYX':
            atom.residue_name = 'CYS'
        atom_radius.append(parameters.radius_per_atom[atom.residue_name + "-" +
                                                      atom.name])

    structure.setRadii(atom_radius)

    start_time = timeit.default_timer()
    result = freesasa.calc(structure)
    elapsed = timeit.default_timer() - start_time

    return result.totalArea(), elapsed
def get_area_classes(file):
    struct = freesasa.Structure(file)
    result = freesasa.calc(struct)
    area_classes = freesasa.classifyResults(result, struct)
    list_areas = [(list(area_classes.values())[0]),
                  (list(area_classes.values())[1]),
                  result.totalArea()]
    return list_areas
Beispiel #8
0
def execute_freesasa_api(structure):
    """
    Calls freesasa using its Python API and returns
    per-residue accessibilities.
    """
    try:
        from freesasa import Classifier, structureFromBioPDB, calc
    except ImportError as err:
        print(
            '[!] The binding affinity prediction tools require the \'freesasa\' Python API',
            file=sys.stderr)
        raise ImportError(err)

    asa_data, rsa_data = {}, {}
    _rsa = rel_asa['total']

    config_path = os.environ.get(
        'FREESASA_PAR',
        pkg_resources.resource_filename('prodigy', 'naccess.config'))
    classifier = Classifier(config_path)
    pkg_resources.cleanup_resources()

    # classifier = freesasa.Classifier( os.environ["FREESASA_PAR"])
    # Disable
    with stdchannel_redirected(sys.stderr, os.devnull):
        try:
            struct = structureFromBioPDB(
                structure,
                classifier,
            )
            result = calc(struct)
        except AssertionError as e:
            error_message = '\n[!] Error when running freesasa: \n[!] {}'.format(
                e)
            print(error_message)
            raise Exception(error_message)

    # iterate over all atoms to get SASA and residue name
    for idx in range(struct.nAtoms()):

        atname = struct.atomName(idx)
        resname = struct.residueName(idx)
        resid = struct.residueNumber(idx)
        chain = struct.chainLabel(idx)
        at_uid = (chain, resname, resid, atname)
        res_uid = (chain, resname, resid)

        asa = result.atomArea(idx)
        asa_data[at_uid] = asa
        # add asa to residue
        rsa_data[res_uid] = rsa_data.get(res_uid, 0) + asa

    # convert total asa ro relative asa
    rsa_data.update(
        (res_uid, asa / _rsa[res_uid[1]]) for res_uid, asa in rsa_data.items())
    return asa_data, rsa_data
def calculate_SAS(temp_dict, pdb_path, seq_len):
    struct = freesasa.Structure(str(pdb_path))
    result = freesasa.calc(struct)
    area_classes = freesasa.classifyResults(result, struct)
    polar = area_classes['Polar']
    apolar = area_classes['Apolar']
    sasa_fraction = (polar + apolar) / seq_len
    temp_dict.update({
        "Polar": polar,
        "Apolar": apolar,
        "SASA Fraction": sasa_fraction
    })
Beispiel #10
0
def calcSASA(Latm, selection):
    """Calcule la surface accessible au solvent (SAS) des acides aminés de la selecion
	Retourne la SAS pour une sélection donnée
	"""
    freesasa.setVerbosity(1)
    structure = freesasa.Structure()
    for a in Latm:
        structure.addAtom(a.ty, a.resname, a.resN, a.chain, a.traj[0],
                          a.traj[1], a.traj[2])
    result = freesasa.calc(structure)
    selections = freesasa.selectArea((selection, 'all, resn ala'), structure,
                                     result)
    return selections[selection.split()[0][:-1]]
Beispiel #11
0
def cal_sasa(prot, resilist):
    structure = freesasa.Structure(prot)
    result = freesasa.calc(structure)

    for i in range(len(resilist)):
        resi_ind = resilist[i]['resi_seq']
        chain = resilist[i]['chain']
        sasa_value = freesasa.selectArea(
            ('alanine, resn ala',
             'we, resi ' + str(resi_ind) + ' and chain ' + chain), structure,
            result)
        resilist[i]['SASA'] = sasa_value['we']
    return resilist
Beispiel #12
0
def run_freesasa_biopython(pdb_path):
	global freesasa
	if freesasa is None:
		try:
			import freesasa
		except ImportError:
			raise RuntimeError("Cannot use this method. Please save the pdb file and rerun with docker")

	with silence_stdout(), silence_stderr():
		#Automatically removes hydrogens
		sasa_struct = freesasa.Structure(pdb_path)
		sasa = freesasa.calc(sasa_struct)

	return sasa, sasa_struct
Beispiel #13
0
def _compute_asa(df):
    """Compute solvent-accessible surface area for provided strucutre."""
    bp = dt.df_to_bp(df)
    structure = freesasa.Structure(
        classifier=freesasa.Classifier.getStandardClassifier('naccess'),
        options={
            'hydrogen': True,
            'skip-unknown': True
        })
    for i, atom in df.iterrows():
        if atom['resname'] != 'UNK' and atom['element'] != 'H':
            structure.addAtom(atom['name'], atom['resname'], atom['residue'],
                              atom['chain'], atom['x'], atom['y'], atom['z'])
    result = freesasa.calc(structure)
    return result.totalArea()
Beispiel #14
0
    def _get_sasa(self):
        if freesasa is None:
            print "SASA not installed! SASA will be 0"
            return None, None
        if self.sasa is None:
        	pdbfd, tmp_pdb_path = tempfile.mkstemp()
            with os.fdopen(pdbfd, 'w') as tmp:
                writePDBStream(tmp, self.structure)

            with silence_stdout(), silence_stderr():
                self.sasa_struct = freesasa.Structure(tmp_pdb_path)
                self.sasa = freesasa.calc(self.sasa_struct)

            os.remove(tmp_pdb_path)

        return self.sasa, self.sasa_struct
Beispiel #15
0
 def get_attributes(self):
     # read pdb file
     with open(self.file_path, "r") as f:
         self.data = f.readlines()
     # calculate solvent access data
     try:
         self.solvent_access = fs.calc(fs.Structure(self.file_path))
     except Exception:
         raise
     self._clean_data()
     try:
         self._ca_attributes()
     except AssertionError:
         raise
     self._distance_to_others()
     self._find_in_range()
    def run(self, residues: SetOfResidues) -> float:
        # attach method get_atoms used by freesasa's BioPython binding (so that it behaves like BioPython's Entity)
        def get_atoms(self):
            for r in self:
                for atom in r.get_atoms():
                    if atom.element != 'H':  # otherwise freesasa somehow crashes with: AssertionError: Error: Radius array is <= 0 for the residue: PHE ,atom: H
                        yield atom

        # freesasa calls get_atoms on the passed object, so add that method to `residues`
        bound_method = get_atoms.__get__(residues)
        object.__setattr__(
            residues, 'get_atoms',
            bound_method)  # setting to a _frozen_ dataclass (SetOfResidues)

        # use freesasa to compute SASA
        sasa_structure = freesasa.structureFromBioPDB(residues)
        result = freesasa.calc(sasa_structure)
        return result.totalArea()
def sasa_from_file(file: Union[str, pathlib.Path]) -> Sasa:
    """Get the freesasa.Result.residueAreas() dictionary
    obtained after parsing a PDB file to a freesasa.Structure
    and calling fresasa.calc() on it.
    """
    if isinstance(file, str):
        file = pathlib.Path(file)
    elif isinstance(file, pathlib.Path):
        pass
    else:
        raise TypeError(
            "Invalid argument type. File should be 'str' or pathlib.Path")

    if not file.exists():
        raise FileNotFoundError(
            f"File {file.absolute().as_posix()} does not exist.")

    _struct = freesasa.Structure(file.absolute().as_posix())
    _sasa = freesasa.calc(_struct)

    return ObjDict(_sasa.residueAreas())
Beispiel #18
0
def getAtomSASA(structure, classifier=None, probe_radius=1.4, mi=0, **kwargs):
    
    if(classifier is None):
        # initialize new classifier
        classifier = Radius(**kwargs)
        
    freesasa_structure = getFreeSASAStructureFromModel(structure, classifier=classifier)
    SASA = freesasa.calc(freesasa_structure, freesasa.Parameters({"probe-radius": probe_radius}))
    
    # get atom SASA
    N = structure.nAtoms()
    for i in range(N):
        sasa = SASA.atomArea(i)
        resi = freesasa_structure.residueNumber(i).strip()
        cid = freesasa_structure.chainLabel(i).strip()
        if(resi[-1].isdigit()):
            ins = " "
        else:
            ins = resi[-1]
            resi = resi[:-1]
        aname = structure.atomName(i).strip()
        structure[mi][cid][(' ', int(resi), ins)][aname].xtra["sasa"] = sasa
Beispiel #19
0
    def _get_scores(self, df, pdb_id, pdb_chain):
        sifts = get_sifts_alignment_for_chain(pdb_id, pdb_chain,
                                              self.sifts_directory,
                                              self.download_sifts)
        if sifts is None:
            scores = None
        else:
            df = pd.merge(df,
                          sifts,
                          left_on='residue',
                          right_on='uniprot position',
                          how='left')

            pdb_file_path = os.path.join(self.pdb_directory, pdb_id + '.pdb')
            if not os.path.isfile(pdb_file_path):
                # PDB file not already downloaded.
                if self.download_pdb_file:
                    download_pdb_file(pdb_id, self.pdb_directory)
                else:
                    raise LookupError(
                        "PDB file {} is not in the pdb_directory {}".format(
                            pdb_id, self.pdb_directory))

            structure = freesasa.Structure(pdb_file_path)
            result = freesasa.calc(structure, self.freesasa_parameters)
            chain_results = result.residueAreas()[pdb_chain]
            scores = np.full(len(df), np.nan)
            for i, residue in enumerate(df['pdb position']):
                if not np.isnan(residue):
                    try:
                        scores[i] = getattr(chain_results[str(int(residue))],
                                            self.metric)
                    except KeyError as e:
                        pass

        return scores
Beispiel #20
0
    def handle(self, *args, **options):
        # grab PDB
        pdb_code = options.get('pdb_code', None).upper()
        
        reference = Structure.objects.get(pdb_code__index=pdb_code) #.prefetch_related('pdb_data')
        preferred_chain = reference.preferred_chain.split(',')[0]
        # read pdb structure (from RCSB) using Biopython
        structure = self.load_pdb_var(pdb_code,reference.pdb_data.pdb)

        # get preferred chain for PDB-code
        
        # grab residues with the generic numbering for this structure
        db_reslist = list(Residue.objects.exclude(generic_number__isnull=True).filter(protein_conformation__protein=reference.protein_conformation.protein).prefetch_related('generic_number'))
        
        #######################################################################
        ############################# filter  pdb #############################
        
        os.chdir("pymol_output")
        
        db_tmlist = [[] for i in range(7)]
        db_set    = set()
        db_set_p  = set()
        oldr = False
        for r in db_reslist:
            if r.generic_number.label[:2] in ["1x","2x","3x","4x","5x","6x","7x"]:
                db_tmlist[int(r.generic_number.label[0])-1].append(r.sequence_number)
                db_set.add((' ',r.sequence_number,' '))
                db_set_p.add((' ',r.sequence_number,' '))
                lastin = True
                
                if oldr:
                    db_set_p.add((' ',oldr.sequence_number,' '))
                    oldr = False
            else:
                oldr = r
                if lastin:
                    db_set_p.add((' ',oldr.sequence_number,' '))
                    lastin=False
        
        def recurse(entity,slist):
            for subenty in entity.get_list():
                if not subenty.id in slist[0]: entity.detach_child(subenty.id)
                elif slist[1:]: recurse(subenty, slist[1:])



        recurse(structure,[[0], preferred_chain])
        hse_struct = deepcopy(structure)
        recurse(structure, [[0], preferred_chain, db_set])
        
        pchain = structure[0][preferred_chain]
        
        #######################################################################
        ############### Calculate the axes through the helices ################
        #######################################################################
        N = 3
        
        hres_list = [np.asarray([pchain[r]["CA"].get_coord() for r in sl], dtype=float) for sl in db_tmlist]
        h_cb_list = [np.asarray([pchain[r]["CB"].get_coord() if "CB" in pchain[r] else np.array([None,None,None]) for r in sl], dtype=float) for sl in db_tmlist]
        
        # fast and fancy way to take the average of N consecutive elements
        hres_three = np.asarray([sum([h[i:-(len(h) % N) or None:N] for i in range(N)])/N for h in hres_list])
        helices_mn = np.asarray([np.mean(h, axis=0) for h in hres_three ])
        self.save_pseudo(hres_three, pdb_code+"helper")
        
        #######################################################################
        ################################# PCA #################################
        #######################################################################
        
        def pca_line(pca,h, r=0):
            if ((not r) if pca.fit_transform(h)[0][0] < 0 else r):
                return pca.inverse_transform(np.asarray([[-20,0,0],[20,0,0]]))
            else:return pca.inverse_transform(np.asarray([[20,0,0],[-20,0,0]]))  
        
        helix_pcas = [PCA() for i in range(7)]
        pos_list = np.asarray([pca_line(helix_pcas[i], h,i%2) for i,h in enumerate(hres_three)])
        self.write_cgo_arrow_pml(pdb_code, "pca",pos_list)
        
        pos_list = np.mean(pos_list,axis=0)
        self.write_cgo_arrow_pml(pdb_code, "pca_mean",[pos_list])
        
        pca = PCA()
        pos_list = pca_line(pca, np.vstack(hres_three))
        self.write_cgo_arrow_pml(pdb_code, "pca_all",[pos_list])
        
        pos_list = np.asarray([pca_line(PCA(), h[:len(h)//2:(-(i%2) or 1)]) for i,h in enumerate(hres_three)])
        pos_list = pos_list - (np.mean(pos_list,axis=1)-helices_mn).reshape(-1,1,3)
        self.write_cgo_arrow_pml(pdb_code, "pca_extra",pos_list)
        self.write_cgo_arrow_pml(pdb_code, "pca_extra_mean",[np.mean(pos_list,axis=0)])
        
        pca_extra = PCA()
        pos_list = pca_line(pca_extra, np.vstack(pos_list))
        self.write_cgo_arrow_pml(pdb_code, "pca_extra_pca",[pos_list])
        
        #######################################################################
        ################################ Angles ###############################
        #######################################################################
        
        def  calc_angle(b,c):
            ba = -b
            bc = c + ba
            ba[:,0] = 0
            return np.degrees(np.arccos(inner1d(ba, bc) / (np.linalg.norm(ba,axis=1) * np.linalg.norm(bc,axis=1))))
        
        def ca_cb_calc(i,pca):
            fin = np.isfinite(h_cb_list[i][:,0])
            return calc_angle(pca.transform(hres_list[i][fin]),pca.transform(h_cb_list[i][fin]))
        
        def axes_calc(i,pca_list,pca):
            p = pca_list[i]
            h = hres_list[i]
            a = (np.roll(np.vstack((h,h[0])),1,axis=0)[:-1] + h + np.roll(np.vstack((h,h[-1])),-1,axis=0)[:-1])/3
            b = p.transform(h)
            b[:,1:] = p.transform(a)[:,1:]
            b = p.inverse_transform(b)
            return calc_angle(pca.transform(b),pca.transform(h))
        
        def set_bfactor(structure,angles):
            for r,an in zip(structure[0][preferred_chain].get_list(),angles):
                for a in r: a.set_bfactor(an)
        
        centerpca = pca
        
        ########################### Axis to CA to CB ##########################

        tv = np.isfinite(np.concatenate(h_cb_list)[:,0])
        angle = np.full_like(tv,-1,dtype=float)
        angle[tv] = np.concatenate([ca_cb_calc(i,centerpca) for i in range(TMNUM)])
        set_bfactor(structure,angle)
        
        self.save_pdb(structure, pdb_code+'angle_colored_ca_cb.pdb')
        
        ######################### Axis to Axis to CA ##########################
        
        angle2 = np.concatenate([axes_calc(i,helix_pcas,centerpca) for i in range(TMNUM)])
        
        set_bfactor(structure,angle2)

        self.save_pdb(structure, pdb_code+'angle_colored_axes.pdb')
        
        ########################### HSE and ASA ###############################
        
#        res, dic = freesasa.calcBioPDB(orig_structure)
        pdbstruct = freesasa.Structure(pdb_code+'angle_colored_axes.pdb')
        res = freesasa.calc(pdbstruct)
        
#        print(res.nAtoms())
#        [print(res.atomArea(a)) for a in range(res.nAtoms())]
#        print()
#        print(sum([res.atomArea(a) for a in range(res.nAtoms())]))
#        print(len(list(orig_structure[0].get_atoms())))
#        print(res.nAtoms())
        
        asa_list = []
        oldnum = -1
        for i in range(res.nAtoms()):
            resnum = pdbstruct.residueNumber(i)
            if resnum == oldnum:
                asa_list[-1] += res.atomArea(i)
            else:
                asa_list.append(res.atomArea(i))
                oldnum = resnum
        
        set_bfactor(structure,asa_list)
        self.save_pdb(structure, pdb_code+'asa_colored.pdb')
        
        # Calculate HSEalpha
        model = hse_struct[0]
        exp_ca = pdb.HSExposure.HSExposureCA(model)
        print(len(exp_ca))
        [[a.set_bfactor(x[1][1]) for a in x[0]] for x in exp_ca]
        recurse(hse_struct, [[0], preferred_chain, db_set])
        r = [x[0] for x in exp_ca]
        #x = model["A"].get_list()
        x = pchain.get_list()
        for r in (set(x) - set(r)):
            for a in r:
                a.set_bfactor(-1)
        
        exp_ca = [a["CA"].get_bfactor() for a in hse_struct[0][preferred_chain].get_list()]
        
#        print(set(x) - set(r))
#        print(len(set(x) - set(r)))
#        print(db_set_p - db_set)
        self.save_pdb(hse_struct, pdb_code+'hsea_colored.pdb')

        
        
        
        
        
        
        
        
        
        
        
        
        
        
def surface_list(file1):

    maximum_area = {
        'ALA': 120.56,
        'CYS': 143.79,
        'ASP': 157.04,
        'GLU': 188.42,
        'PHE': 227.46,
        'GLY': 89.41,
        'HIS': 200.14,
        'ILE': 96.42,
        'LYS': 213.74,
        'LEU': 206.32,
        'MET': 216.63,
        'ASN': 149.85,
        'PRO': 155.07,
        'GLN': 186.83,
        'ARG': 229.51,
        'SER': 128.27,
        'THR': 138.58,
        'VAL': 169.82,
        'TRP': 269.35,
        'TYR': 241.54
    }

    global chain_A
    global chain_B

    surface_list_a1 = []
    surface_list_b1 = []

    structure = freesasa.Structure(file1)
    result = freesasa.calc(structure)

    for residue1 in chain_A.get_residues():
        try:
            res_id = residue1["CA"].get_full_id()[3][1]
            select_word = str(res_id) + ", " + "chain H and resi " + str(
                res_id)
            selections = freesasa.selectArea((select_word, ), structure,
                                             result)
            for key in selections:
                if float('%.3f' % (selections[key] / maximum_area[chain_A[
                        residue1.get_full_id()[3][1]].get_resname()])) > 0.05:
                    surface_list_a1.append(res_id)
        except Exception:
            pass
        continue

    for residue2 in chain_B.get_residues():
        try:
            res_id = residue2["CA"].get_full_id()[3][1]
            select_word = str(res_id) + ", " + "chain L and resi " + str(
                res_id)
            selections = freesasa.selectArea((select_word, ), structure,
                                             result)
            for key in selections:
                if float('%.3f' % (selections[key] / maximum_area[chain_B[
                        residue2.get_full_id()[3][1]].get_resname()])) > 0.05:
                    surface_list_b1.append(res_id)
        except Exception:
            pass
        continue

    return surface_list_a1, surface_list_b1
def sa_conformers(file_1, func_1, file_2, func_2, units, radius):
    # turn off cache
    stk.OPTIONS['cache'] = False
    
    # number of conformers
    N = 10
    """
    functional groups:
       ['diol'] and ['dibromine']/['difluorene']
       or
       ['bromine'] and ['bromine']/['iodine']
    """
    name_1 = file_1.replace('.mol', '')
    unit_1 = stk.StructUnit2(file_1, func_1)

    name_2 = file_2.replace('.mol', '')
    unit_2 = stk.StructUnit2(file_2, func_2)

    # make polymer
    NAME = name_1+'_'+name_2+'_AB_poly'
    print(f'Creating polymer: {NAME}')
    polymer = stk.Polymer([unit_1, unit_2], stk.Linear('AB', [0, 0], n=units, ends='h'))
    # write unoptimized structure
    polymer.write(NAME+'.mol')
    mol_polymer = rdkit.MolFromMolFile(NAME + '.mol')
    #print(f'{NAME} has {polymer.mol.get_no_atoms()} atoms!')
    print(f'Optimizing polymer {NAME} and saving {N} conformers')
    # clean molecule with ETKDG
    embedder = stk.UFF(use_cache=False)
    embedder.optimize(polymer, conformer=-1)
    # write optimized polymer to json
    polymer.dump(NAME+'_opt.json')
    polymer.write(NAME+'_opt.mol')
    # make N conformers of the polymer molecule
    etkdg = rdkit.ETKDGv2()
    etkdg.randomSeed = 1000
    etkdg.verbose = True
    etkdg.maxIterations = 200000
    cids = rdkit.EmbedMultipleConfs(
        mol=polymer.mol, 
        numConfs=N,
        params=etkdg
    )
    print(f'Made {len(cids)} conformers...')
    print(f'Warning! I have not implemented an optimization of the ETKDG cleaned polymers!')

    # iterate over conformers and save structure
    file_dir = '/home/fanyuzhao/Monomers/OH+F/dimer/conformers/'
    new_dir = file_dir+NAME+'_'+str(units)+'_'+str(radius)+'/'
    for cid in cids:
        # build directories
        if not os.path.exists(new_dir):
            os.makedirs(new_dir)
        # write optimized polymer to mol
        polymer.write(new_dir+NAME+'_'+str(cid)+'_opt.mol', conformer=cid)
        # write optimized polymer to pdb
        polymer.write(new_dir+NAME+'_'+str(cid)+'_opt.pdb', conformer=cid)
        print(f'Done! {N} ETKDG conformers of polymer written to {NAME}_{N}_opt.mol/pdb')

    # pdb file from stk can not be read in freesasa
    # save the new pdb file in rdkit from mol files
    for item in os.listdir(new_dir):
        if item.endswith('.mol'):
            file_pdb = item.replace('.mol', '')
            a = rdkit.MolFromMolFile(os.path.join(new_dir, item))
            # hydrogens are removed when converting the file in rdkit
            b = rdkit.AddHs(a, addCoords = True)
            rdkit.MolToPDBFile(b, new_dir + file_pdb + '_new.pdb')

    # calculate solvent accessible surface area(probe radius = 1.4Å and 3.6Å)
    # hydrogens are removed in the default option
    # hetatm are ignored in the default option
    options_with_Hs =  {    'hetatm' : True,
                            'hydrogen' : True,
                            'join-models' : False,
                            'skip-unknown' : False,
                            'halt-at-unknown' : False    }

    sa_list = []
    pdb_list = []
    # loop all new pdb files
    for pdb in os.listdir(new_dir):
        if pdb.endswith("_new.pdb"):
            # use freesasa to calculate SASA
            para = freesasa.Parameters()
            freesasa.Parameters.setProbeRadius(para, radius)
            free_struct = freesasa.Structure(os.path.join(new_dir, pdb), options = options_with_Hs)
            free_calc = freesasa.calc(free_struct, para)
            total = free_calc.totalArea()
            # keep 3 decimals
            decimal = round(total, 4)
            sa_list.append(decimal)
            name_pdb = pdb.replace('.pdb', '')
            pdb_list.append(name_pdb)
    # calculate average SASA(probe radius = 1.4Å)
    sa_average = round(sum(sa_list) / len(sa_list), 4)
    atom_number = mol_polymer.GetNumAtoms()
    normalized_sa = round(sa_average / atom_number, 4)
    with open (file_dir + 'Average surface area of conformers.txt', 'a+') as Asa:
        Asa.write(f'The normalized surface area of {NAME}_{units} is ' + str(normalized_sa) + ' Å^2 with the probe size of ' + str(radius) + f'Å and chain length of {units}.\n')
    print ('The avarage surface area of the conformers is ' + str(sa_average) + ' Å^2 with the probe size of ' + str(radius) + 'Å.')

    # save data to a csv table
    # save pdb file and surface area to a directory
    dic = {p: s for p, s in zip(pdb_list, sa_list)}
    download_dict = new_dir + 'Solvent accessible surface area of ' + NAME +'.csv'
    csv = open(download_dict, 'w')
    columnTitleRow = "Polymer_name, SASA\n"
    csv.write(columnTitleRow)

    for key in dic.keys():
        Polymer_name = key
        SASA = dic[key]
        row = Polymer_name + "," + str(SASA) + "\n"
        csv.write(row)
    print ('Nomalized solvent accessible surface area is '+ str(normalized_sa) + ' Å^2 with the probe size of ' + str(radius) + 'Å.')
Beispiel #23
0
    def __init__(self, comb, pdb_acc_code, chain, **kwargs):
        """ :comb: arg: instance of cls Comb with attributes pdbchain_dict, ifg_selection_info
        :pdb_acc_code: type: str: 4 character pdb accession code
        :param kwargs: 
            path_to_pdb
            path_to_dssp 
        """
        #search for acc code in input_dir_pdb from comb object.
        assert isinstance(pdb_acc_code,
                          str), 'PDB accession code needs to be a string'
        pdb_file = [
            file.name for file in os.scandir(comb.input_dir_pdb)
            if pdb_acc_code in file.name
        ]
        try:
            if pdb_file:
                pdb_file = pdb_file[0]
                self.prody_pdb = pr.parsePDB(comb.input_dir_pdb + pdb_file,
                                             altloc='A',
                                             model=1)
            elif 'path_to_pdb' in kwargs:
                self.prody_pdb = pr.parsePDB(kwargs.get('path_to_pdb'),
                                             altloc='A',
                                             model=1)
            else:  # NEED TO UPDATE: note if going to fetch pdb, it should be sent through Reduce first...
                try:
                    os.mkdir(comb.input_dir_pdb + 'raw')
                    os.mkdir(comb.input_dir_pdb + 'reduce')
                except:
                    pass
                pr.fetchPDB(pdb_acc_code,
                            compressed=False,
                            folder=comb.input_dir_pdb + 'raw')
                os.system(comb.path_to_reduce + comb.reduce +
                          ' -FLIP -Quiet -DB ' + comb.path_to_reduce +
                          'reduce_wwPDB_het_dict.txt ' + comb.input_dir_pdb +
                          'raw/' + pdb_acc_code.lower() + '.pdb > ' +
                          comb.input_dir_pdb + 'reduce/' +
                          pdb_acc_code.lower() + 'H.pdb')
                self.prody_pdb = pr.parsePDB(comb.input_dir_pdb + 'reduce/' +
                                             pdb_acc_code.lower() + 'H.pdb',
                                             altloc='A',
                                             model=1)
        except NameError:
            raise NameError(
                'ParsePDB instance needs a pdb file path or a valid pdb accession code.'
            )

        self.pdb_acc_code = pdb_acc_code.lower()
        self.pdb_chain = chain
        if len(self.prody_pdb) == len(self.prody_pdb.select('icode _')) \
                and self.prody_pdb.select('protein and chain ' + self.pdb_chain) is not None:
            self.contacts = pr.Contacts(self.prody_pdb)
            self.set_bonds()

            if pdb_file:
                self.fs_struct = freesasa.Structure(comb.input_dir_pdb +
                                                    pdb_file)
            elif 'path_to_pdb' in kwargs:
                self.fs_struct = freesasa.Structure(kwargs.get('path_to_pdb'))
            else:
                path = comb.input_dir_pdb + 'reduce/'
                self.fs_struct = freesasa.Structure(path + next(
                    file.name for file in os.scandir(path)
                    if self.pdb_acc_code in file.name))

            self.fs_result = freesasa.calc(self.fs_struct)

            self.fs_result_cb_3A = self.freesasa_cb(probe_radius=3)
            self.fs_result_cb_4A = self.freesasa_cb(probe_radius=4)
            self.fs_result_cb_5A = self.freesasa_cb(probe_radius=5)
            self.prody_pdb_bb_cb_atom_ind = self.prody_pdb.select(
                'protein and (backbone or name CB) '
                'and not element H D').getIndices()

            dssp_file = [
                file.name for file in os.scandir(comb.input_dir_dssp)
                if pdb_acc_code in file.name
            ]
            if dssp_file:
                dssp_file = dssp_file[0]
                self.dssp = pr.parseDSSP(comb.input_dir_dssp + dssp_file,
                                         self.prody_pdb)
            elif 'path_to_dssp' in kwargs:
                self.dssp = pr.parseDSSP(kwargs.get('path_to_dssp'),
                                         self.prody_pdb)
            else:
                if pdb_file:
                    pr.execDSSP(comb.input_dir_pdb + pdb_file,
                                outputdir=comb.input_dir_dssp)
                elif 'path_to_pdb' in kwargs:
                    pr.execDSSP(kwargs.get('path_to_pdb'),
                                outputdir=comb.input_dir_dssp)
                else:
                    path = comb.input_dir_pdb + 'reduce/' + next(
                        file.name
                        for file in os.scandir(comb.input_dir_pdb + 'reduce')
                        if pdb_acc_code in file.name)
                    pr.execDSSP(path, outputdir=comb.input_dir_dssp)

                self.dssp = pr.parseDSSP(
                    comb.input_dir_dssp +
                    next(file.name for file in os.scandir(comb.input_dir_dssp)
                         if pdb_acc_code in file.name), self.prody_pdb)
            self.possible_ifgs = self.find_possible_ifgs(comb)
        else:
            self.possible_ifgs = None
        # valence and hydrogen bond data for vandermers and iFGs of ParsedPDB protein instance
        # iFG specific:
        self._ifg_pdb_info = []
        self._ifg_atom_density = []
        self._ifg_contact_water = []
        self._ifg_contact_ligand = []
        self._ifg_contact_metal = []
        # vdM specific:
        self._vdm_pdb_info = []
        self._vdm_sasa_info = []
        self._ifg_contact_vdm = []
        self._ifg_hbond_vdm = []
        self._ifg_hbond_water = []
        self._ifg_hbond_ligand = []
        self._ifg_ca_hbond_vdm = []
Beispiel #24
0
def openfile():
    global prob, probab, te
    global my_seq
    global anti
    global structure, structure_id, filename
    global antigenicity, hydro, flex, sec
    global m, a, c, b, length, j, k
    global hydroph, flexi, access
    anti = []
    sec = []
    probab = []
    from tkinter import filedialog
    root = Tk()
    root.filename = filedialog.askopenfilename(
        initialdir="/",
        title="Select file",
        filetypes=(("pdb files", "*.pdb"), ("pdb files", "*.pdb")))
    filename = root.filename
    print(filename)
    structure_id = "1e6j"
    structure = PDBParser().get_structure(structure_id, root.filename)
    ppb = PPBuilder()
    for pp in ppb.build_peptides(structure):
        my_seq = pp.get_sequence()  # type: Seq
        print(my_seq)
    for model in structure:
        for chain in model:
            print(chain)
    sequence = list(my_seq)
    m = ''.join(sequence)
    print(m)
    length = len(m)  # type: int
    print("Sequence consist of", length, "Amino Acids")
    from Bio.SeqUtils.ProtParam import ProteinAnalysis
    analysed_seq = ProteinAnalysis(m)
    print("Molecular weight = ", analysed_seq.molecular_weight())
    print("Amino Acid Count = ", analysed_seq.count_amino_acids())
    print("Secondary structure fraction =",
          analysed_seq.secondary_structure_fraction())
    kd = {
        'A': 1.8,
        'R': -4.5,
        'N': -3.5,
        'D': -3.5,
        'C': 2.5,
        'Q': -3.5,
        'E': -3.5,
        'G': -0.4,
        'H': -3.2,
        'I': 4.5,
        'L': 3.8,
        'K': -3.9,
        'M': 1.9,
        'F': 2.8,
        'P': -1.6,
        'S': -0.8,
        'T': -0.7,
        'W': -0.9,
        'Y': -1.3,
        'V': 4.2
    }
    c = list(analysed_seq.flexibility())
    b = list(analysed_seq.protein_scale(kd, 10, 1.0))
    hydro = list(analysed_seq.protein_scale(kd, 10, 1.0))
    flex = list(analysed_seq.flexibility())
    hydroph = list(analysed_seq.protein_scale(kd, 10, 1.0))
    flexi = list(analysed_seq.flexibility())

    i = 1
    j = -1  # type: int
    k = 9
    while i <= (length - 10):
        print("Sequence is = ", m[j + 1:k + 1])
        print("Flexibility value = ", c[j + 1])
        print("Hydrophilicity value = ", b[j + 1])
        ana_seq = ''.join(m[j + 1:k + 1])
        analyze_seq = ProteinAnalysis(ana_seq)
        # For Secondary structure Analysis
        print("Secondary structure fraction =",
              analyze_seq.secondary_structure_fraction())
        a = list(analyze_seq.secondary_structure_fraction())
        a = a[0]
        sec.append(a)
        i += 1
        j += 1
        k += 1
    f = length
    r = 1
    y = 10
    global acc, logacc
    acc = []
    for i in range(0, f):
        str1 = "accessibility, resi "
        str2 = str(r) + "-" + str(y)
        saving = str1 + str2
        print(saving)
        r = r + 1
        y = y + 1
        structure = freesasa.Structure("1e6j.pdb")
        resulta = freesasa.calc(structure)
        area_classes = freesasa.classifyResults(resulta, structure)
        print("Total : %.2f A2" % resulta.totalArea())
        for key in area_classes:
            print(key, ": %.2f A2" % area_classes[key])
        resulta = freesasa.calc(
            structure,
            freesasa.Parameters({
                'algorithm': freesasa.LeeRichards,
                'n-slices': 10
            }))
        selections = freesasa.selectArea(('alanine, resn ala', saving),
                                         structure, resulta)
        for key in selections:
            print(key, ": %.2f A2" % selections[key])
            a = selections[key]
            acc.append(a)

    l = acc[0::2]
    access = l
    print(acc)
    print(l)
    logacc = [math.log(y, 10) for y in l]

    print(logacc)
Beispiel #25
0
    def _get_item_src(self, decoy):
        """
        decoy: str, path to the decoy
        """
        atom_to_num = {
            "C": 1,
            "N": 2,
            "O": 3,
            "S": 4
        }
        residues = []
        atom_positions = self.create_atom_positions()
        residue = self.build_residue()
        structure = fs.Structure(decoy)
        solvent_access = fs.calc(structure)
        with open(decoy, "r") as f:
            line = f.readline().rstrip()
            while not line.startswith("ATOM"):
                line = f.readline().rstrip()
            cur_resi = int(line[22:26])

            # PDB file stardard format
            # COLUMNS   DATA  TYPE    FIELD
            # -------------------------------------------
            #  1 -  6   Record name   "ATOM  "
            #  7 - 11   Integer       Atom serial #
            # 13 - 16   Atom          Atom name
            # 17        Character     Alternate location
            # 18 - 20   Residue name  resName
            # 22        Character     chainID
            # 23 - 26   Integer       resSeq
            # 27        AChar         Code for insertion of residues
            # 31 - 38   Real(8.3)     x
            # 39 - 46   Real(8.3)     y
            # 47 - 54   Real(8.3)     z
            # 55 - 60   Real(6.2)     occupancy
            # 61 - 66   Real(6.2)     tempFactor
            # 77 - 78   LString(2)    element
            # 79 - 80   LString(2)    Charge  on the atom

            while line:
                if line.startswith("TER"):
                    break
                if not line.startswith("ATOM"):
                    line = f.readline().rstrip()
                    continue

                # ignore hydrogens
                atom_type = line[-1]
                if atom_type == "H":
                    line = f.readline().rstrip()
                    continue

                resi_num = int(line[22:26])
                if resi_num > cur_resi:
                    residues.append(residue)
                    if len(residues) == 400:
                        break
                    residue = self.build_residue()
                    cur_resi = resi_num
                residue = self._put_atom_src(
                    line.rstrip(), residue, solvent_access, atom_positions, atom_to_num)
                line = f.readline().rstrip()

        # normalize residues
        pc = np.ones((self.npoints, self.num_channel())) * float("-inf")
        residues = np.array(residues)
        logging.debug("decoy shape: {}".format(residues.shape))
        x_mean = np.mean(residues[:, 1])
        y_mean = np.mean(residues[:, 2])
        z_mean = np.mean(residues[:, 3])
        for i in range(self.num_channel() // self.ATTRIBUTES_EACH_ATOM):
            residues[:, self.ATTRIBUTES_EACH_ATOM*i+1] -= x_mean
            residues[:, self.ATTRIBUTES_EACH_ATOM*i+2] -= y_mean
            residues[:, self.ATTRIBUTES_EACH_ATOM*i+3] -= z_mean
        pc[0:residues.shape[0], :] = residues

        target_path = os.path.dirname(decoy)
        gdt_ts = 0.0
        with open(os.path.join(target_path, "list.dat"), "r") as lst:
            info = lst.readline()
            while info:
                if info.startswith(os.path.basename(decoy)):
                    gdt_ts = float(
                        info.split()[CASPDataset.list_dat["gdt_ts"]])
                    break
                info = lst.readline()

        return pc, gdt_ts
Beispiel #26
0
def get_surface_resids(structure,
                       cutoff=15,
                       config_path=os.environ.get('FREESASA_CONFIG')):
    """
    Calls freesasa using its Python API and returns
    per-residue accessibilities.
    """
    try:
        from freesasa import Classifier, structureFromBioPDB, calc
    except ImportError as err:
        print(
            '[!] The binding affinity prediction tools require the \'freesasa\' Python API',
            file=sys.stderr)
        raise ImportError(err)
    import pkg_resources

    asa_data, rsa_data, rel_main_chain, rel_side_chain = {}, {}, {}, {}
    _rsa = rel_asa['total']
    _rsa_bb = rel_asa['bb']
    _rsa_sc = rel_asa['sc']

    classifier = Classifier(config_path)
    pkg_resources.cleanup_resources()

    with stdchannel_redirected(sys.stderr, os.devnull):
        struct = structureFromBioPDB(
            structure,
            classifier,
        )
        result = calc(struct)

    # iterate over all atoms to get SASA and residue name
    for idx in range(struct.nAtoms()):
        atname = struct.atomName(idx).strip()
        resname = struct.residueName(idx)
        resid = int(struct.residueNumber(idx))
        chain = struct.chainLabel(idx)
        at_uid = (chain, resname, resid, atname)
        res_uid = (chain, resname, resid)

        asa = result.atomArea(idx)
        asa_data[at_uid] = asa
        # add asa to residue
        rsa_data[res_uid] = rsa_data.get(res_uid, 0) + asa

        if atname in ('C', 'N', 'O'):
            rel_main_chain[res_uid] = rel_main_chain.get(res_uid, 0) + asa
        else:
            rel_side_chain[res_uid] = rel_side_chain.get(res_uid, 0) + asa

    # convert total asa ro relative asa
    rsa_data.update(
        (res_uid, asa / _rsa[res_uid[1]]) for res_uid, asa in rsa_data.items())
    rel_main_chain.update((res_uid, asa / _rsa_bb[res_uid[1]] * 100)
                          for res_uid, asa in rel_main_chain.items())
    rel_side_chain.update((res_uid, asa / _rsa_sc[res_uid[1]] * 100)
                          for res_uid, asa in rel_side_chain.items())

    # We format to fit the pipeline
    resid_access = {}
    for res_uid, access in rel_main_chain.items():
        resid_access[res_uid[2]] = {
            'side_chain_rel': rel_side_chain.get(res_uid),
            'main_chain_rel': access
        }
    surface_resids = [
        r for r, v in resid_access.items()
        if v['side_chain_rel'] >= cutoff or v['main_chain_rel'] >= cutoff
    ]
    return surface_resids
Beispiel #27
0
def parse_pdb_coordinates(pdb_path: str,
                          start_position: int,
                          end_position: int,
                          position_correction: int,
                          chain: str,
                          sasa: bool = False) -> DataFrame:
    """
    Parse coordinate of CA atoms. Will also return the bfactor and SASA using freesasa.
    If PDB is missing atoms, it can handle it.
    """

    # Get structure from PDB
    structure = PDBParser().get_structure('pdb', pdb_path)

    coordinates = []
    commands = []
    bfactors = []
    positions_worked = []  # positions present in pdb

    # Iterate over each CA atom and geet coordinates
    for i in np.arange(start_position + position_correction,
                       end_position + position_correction):
        # first check if atom exists
        try:
            structure[0][chain][int(i)].has_id("CA")
            # Get atom from pdb and geet coordinates
            atom = list(structure[0][chain][int(i)]["CA"].get_vector()) + [i]
            coordinates.append(atom)
            # Get SASA command for each residue and bfactor
            residue = "s{}, chain {} and resi {}".format(str(i), chain, str(i))
            commands.append(residue)
            bfactor = (structure[0][chain][int(i)]["CA"].get_bfactor())
            bfactors.append(np.log10(bfactor))
            positions_worked.append(i)
        except:
            print("residue {} not found".format(str(i)))
            coordinates.append([np.nan, np.nan, np.nan, i])

    # Convert to df
    df_coordinates = DataFrame(columns=['x', 'y', 'z', 'Position'],
                               data=coordinates)

    # Center data
    x, y, z = centroid(df_coordinates)
    df_coordinates['x_cent'] = (df_coordinates['x'] - x).abs()**2
    df_coordinates['y_cent'] = (df_coordinates['y'] - y).abs()**2
    df_coordinates['z_cent'] = (df_coordinates['z'] - z).abs()**2
    df_coordinates['Distance'] = df_coordinates['x_cent'] + df_coordinates[
        'y_cent'] + df_coordinates['z_cent']

    # Add sasa values
    if sasa:
        # Get structure for SASA
        structure_sasa = freesasa.Structure(pdb_path)
        result = freesasa.calc(structure_sasa)
        # Calculate sasa
        sasa_area = freesasa.selectArea(commands, structure_sasa, result)
        df_sasa: DataFrame = DataFrame(columns=['SASA'],
                                       data=sasa_area.values())
        df_sasa['log B-factor'] = bfactors
        df_sasa['Position'] = positions_worked

        # Merge
        df_coordinates = df_coordinates.merge(df_sasa,
                                              how='outer',
                                              on='Position')

    return df_coordinates
Beispiel #28
0
    def _get_docking_model(self, molecule, restraints):
        atoms = molecule.atoms
        parsed_restraints = {}
        # Assign properties to atoms
        for atom_index, atom in enumerate(atoms):
            res_id = "%s.%s.%s" % (atom.chain_id, atom.residue_name,
                                   str(atom.residue_number))
            if restraints and res_id in restraints:
                try:
                    parsed_restraints[res_id].append(atom_index)
                except:
                    parsed_restraints[res_id] = [atom_index]
            res_name = atom.residue_name
            atom_name = atom.name
            if res_name == "HIS":
                res_name = 'HID'
            if atom_name in amber.translate:
                atom_name = amber.translate[atom.name]
            atom_id = "%s-%s" % (res_name, atom_name)
            atom.amber_type = amber.amber_types[atom_id]
            atom.charge = amber.charges[atom_id]
            atom.mass = amber.masses[atom.amber_type]
            atom.vdw_energy = vdw.vdw_energy[atom.amber_type]
            atom.vdw_radius = vdw.vdw_radii[atom.amber_type]

        # Prepare common model information
        elec_charges = np.array([atom.charge for atom in atoms])
        vdw_energies = np.array([atom.vdw_energy for atom in atoms])
        vdw_radii = np.array([atom.vdw_radius for atom in atoms])
        coordinates = molecule.copy_coordinates()
        des_energy, des_radii = solvation.get_solvation(molecule)

        # Calculate desolvation reference energy
        log.info('Calculating reference SASA...')
        structure = Structure()
        des_radii_no_H = []
        for i, atom in enumerate(atoms):
            if not atom.is_hydrogen():
                structure.addAtom(atom.name, atom.residue_name,
                                  atom.residue_number, atom.chain_id, atom.x,
                                  atom.y, atom.z)
                des_radii_no_H.append(des_radii[i])
        structure.setRadii(list(des_radii_no_H))
        sasa_result = freesasa.calc(structure)
        sasa = []
        j = 0
        for i, atom in enumerate(atoms):
            if not atom.is_hydrogen():
                sasa.append(sasa_result.atomArea(j))
                j += 1
            else:
                sasa.append(-1.0)
        sasa = np.array(sasa)
        hydrogens = np.array(
            [0 if atom.is_hydrogen() else 1 for atom in atoms])
        log.info('Done.')

        reference_points = ModelAdapter.load_reference_points(molecule)
        try:
            return CPyDockModel(atoms,
                                coordinates,
                                parsed_restraints,
                                elec_charges,
                                vdw_energies,
                                vdw_radii,
                                des_energy,
                                des_radii,
                                sasa,
                                hydrogens,
                                reference_points=reference_points,
                                n_modes=molecule.n_modes.copy())
        except AttributeError:
            return CPyDockModel(atoms,
                                coordinates,
                                parsed_restraints,
                                elec_charges,
                                vdw_energies,
                                vdw_radii,
                                des_energy,
                                des_radii,
                                sasa,
                                hydrogens,
                                reference_points=reference_points)
Beispiel #29
0
 def CalCSASA(self, sasaStruct):
     SASACalc = freesasa.calc(sasaStruct)
     return SASACalc
char_at_base = []

parser = argparse.ArgumentParser()
parser.add_argument("--infile", type=str, default="data/test.zip")
parser.add_argument("--model", type=str, default="model.pkl")
args = parser.parse_args()

#protein_parser = PDBParser()

with temppathlib.TemporaryDirectory() as tmpdir:
    # unzip the file with all the test PDBs
    with zipfile.ZipFile(args.infile, "r") as zip_:
        zip_.extractall(tmpdir.path)
        for test_pdb in tmpdir.path.glob("*.pdb"):
            struct = freesasa.Structure(str(test_pdb))
            result = freesasa.calc(struct)
            areas_classes = freesasa.classifyResults(result, struct)
            list_areas = [(list(areas_classes.values())[0]),
                          (list(areas_classes.values())[1]),
                          result.totalArea()]

            polar_area.append(list_areas[0])
            apolar_area.append(list_areas[1])
            total_area.append(list_areas[2])

print('done')
with temppathlib.TemporaryDirectory() as tmpdir:
    # unzip the file with all the test PDBs
    with zipfile.ZipFile(args.infile, "r") as zip_:
        zip_.extractall(tmpdir.path)
Beispiel #31
0
    def handle(self, *args, **options):
        def recurse(entity, slist):
            """
            filter a pdb structure in a recursive way
            
            entity: the pdb entity, a structure should be given on the top level
            
            slist: the list of filter criterias, for each level.            
            """
            for subenty in entity.get_list():
                if not subenty.id in slist[0]: entity.detach_child(subenty.id)
                elif slist[1:]: recurse(subenty, slist[1:])

        def cal_pseudo_CB(r):
            """
            Calculate pseudo CB for Glycin
            from Bio pdb faq
            """
            a = r['CA'].get_vector()
            n = r['N'].get_vector() - a
            c = r['C'].get_vector() - a
            rot = pdb.rotaxis(-np.pi * 120.0 / 180.0, c)
            b = n.left_multiply(rot) + a
            return b.get_array()

        def pca_line(pca, h, r=0):
            """
            Calculate the pca for h and return the first pc transformed back to
            the original coordinate system
            """
            if ((not r) if pca.fit_transform(h)[0][0] < 0 else r):
                return pca.inverse_transform(
                    np.asarray([[-20, 0, 0], [20, 0, 0]]))
            else:
                return pca.inverse_transform(
                    np.asarray([[20, 0, 0], [-20, 0, 0]]))

        def calc_angle(b, c):
            """
            Calculate the angle between c, b and the orthogonal projection of b
            to the x axis.
            """
            ba = -b
            bc = c + ba
            ba[:, 0] = 0
            return np.degrees(
                np.arccos(
                    inner1d(ba, bc) /
                    (np.linalg.norm(ba, axis=1) * np.linalg.norm(bc, axis=1))))

        def ca_cb_calc(ca, cb, pca):
            """
            Calcuate the angles between ca, cb and center axis
            """
            return calc_angle(pca.transform(ca), pca.transform(cb))

        def axes_calc(h, p, pca):
            """
            Calculate the orthogonal projection of the CA to the helix axis
            which is moved to the mean of three consecutive amino acids
            """
            a = (np.roll(np.vstack((h, h[0])), 1, axis=0)[:-1] + h +
                 np.roll(np.vstack((h, h[-1])), -1, axis=0)[:-1]) / 3
            b = p.transform(h)
            b[:, 1:] = p.transform(a)[:, 1:]
            b = p.inverse_transform(b)
            return calc_angle(pca.transform(b), pca.transform(h))

        def set_bfactor(chain, angles):
            """
            simple helper to set the bfactor of all residues by some value of a
            list
            """
            for r, an in zip(chain.get_list(), angles):
                for a in r:
                    a.set_bfactor(an)

        def qgen(x):
            """
            Helper function to slice a list of all residues of a protein of the
            list of the residues of all proteins
            """
            start = False
            for i in range(len(qset) - 1, 0, -1):
                if not start and qset[i].protein_conformation.protein == x:
                    start = i
                if start and qset[i].protein_conformation.protein != x:
                    if start != len(qset) - 1:
                        del qset[start + 1:]
                        return qset[i + 1:]
                    return qset[i + 1:]
            del qset[start + 1:]
            return qset

        failed = []

        # get preferred chain for PDB-code
        references = Structure.objects.filter(
            protein_conformation__protein__family__slug__startswith="001"
        ).exclude(refined=True).prefetch_related(
            'pdb_code', 'pdb_data',
            'protein_conformation').order_by('protein_conformation__protein')
        references = list(references)

        pids = [ref.protein_conformation.protein.id for ref in references]

        qset = Residue.objects.filter(
            protein_conformation__protein__id__in=pids)
        qset = qset.filter(
            generic_number__label__regex=r'^[1-7]x[0-9]+').order_by(
                '-protein_conformation__protein', '-generic_number__label')
        qset = list(
            qset.prefetch_related('generic_number', 'protein_conformation'))

        res_dict = {
            ref.pdb_code.index: qgen(ref.protein_conformation.protein)
            for ref in references
        }

        #######################################################################
        ######################### Start of main loop ##########################
        #######################################################################

        for reference in references:

            preferred_chain = reference.preferred_chain.split(',')[0]
            pdb_code = reference.pdb_code.index
            state_id = reference.protein_conformation.state.id

            try:

                print(pdb_code)

                structure = self.load_pdb_var(pdb_code, reference.pdb_data.pdb)
                pchain = structure[0][preferred_chain]

                #######################################################################
                ###################### prepare and evaluate query #####################

                db_reslist = res_dict[pdb_code]

                #######################################################################
                ######################### filter data from db #########################

                def reslist_gen(x):
                    try:
                        while db_reslist[-1].generic_number.label[0] == x:
                            yield db_reslist.pop()
                    except IndexError:
                        pass

                # when gdict is not needed the helper can be removed
                #db_tmlist = [[(' ',r.sequence_number,' ') for r in reslist_gen(x) if r.sequence_number in pchain and r.sequence_number < 1000] for x in ["1","2","3","4","5","6","7"]]
                db_helper = [[
                    (r.generic_number.label, r.sequence_number)
                    for r in reslist_gen(x)
                    if r.sequence_number in pchain and r.sequence_number < 1000
                ] for x in ["1", "2", "3", "4", "5", "6", "7"]]
                gdict = {r[1]: r[0] for hlist in db_helper for r in hlist}
                db_tmlist = [[(' ', r[1], ' ') for r in sl]
                             for sl in db_helper]
                db_set = set(db_tmlist[0] + db_tmlist[1] + db_tmlist[2] +
                             db_tmlist[3] + db_tmlist[4] + db_tmlist[5] +
                             db_tmlist[6])

                #######################################################################
                ############################# filter  pdb #############################

                recurse(structure, [[0], preferred_chain, db_set])

                #######################################################################
                ############### Calculate the axes through the helices ################
                #######################################################################
                N = 3

                hres_list = [
                    np.asarray([pchain[r]["CA"].get_coord() for r in sl],
                               dtype=float) for sl in db_tmlist
                ]
                h_cb_list = [
                    np.asarray([
                        pchain[r]["CB"].get_coord()
                        if "CB" in pchain[r] else cal_pseudo_CB(pchain[r])
                        for r in sl
                    ],
                               dtype=float) for sl in db_tmlist
                ]

                # fast and fancy way to take the average of N consecutive elements
                hres_three = np.asarray([
                    sum([h[i:-(len(h) % N) or None:N] for i in range(N)]) / N
                    for h in hres_list
                ])

                #######################################################################
                ################################# PCA #################################
                #######################################################################

                helix_pcas = [PCA() for i in range(7)]
                [
                    pca_line(helix_pcas[i], h, i % 2)
                    for i, h in enumerate(hres_three)
                ]

                # extracellular part
                if extra_pca:
                    helices_mn = np.asarray(
                        [np.mean(h, axis=0) for h in hres_three])
                    pos_list = np.asarray([
                        pca_line(PCA(), h[:len(h) // 2:(-(i % 2) or 1)])
                        for i, h in enumerate(hres_three)
                    ])
                    pos_list = pos_list - (np.mean(pos_list, axis=1) -
                                           helices_mn).reshape(-1, 1, 3)

                    pca = PCA()
                    pca_line(pca, np.vstack(pos_list))
                else:
                    pca = PCA()
                    pca_line(pca, np.vstack(hres_three))

                #######################################################################
                ################################ Angles ###############################
                #######################################################################

                ########################### Axis to CA to CB ##########################

                angle = np.concatenate([
                    ca_cb_calc(ca, cb, pca)
                    for ca, cb in zip(hres_list, h_cb_list)
                ])

                set_bfactor(pchain, angle)

                if print_pdb:
                    self.save_pdb(structure,
                                  pdb_code + 'angle_colored_ca_cb.pdb')

                ######################### Axis to Axis to CA ##########################

                angle2 = np.concatenate([
                    axes_calc(h, p, pca)
                    for h, p in zip(hres_list, helix_pcas)
                ])

                set_bfactor(pchain, angle2)
                if print_pdb:
                    self.save_pdb(structure,
                                  pdb_code + 'angle_colored_axes.pdb')

                ################################ SASA #################################
                if SASA:
                    pdbstruct = freesasa.Structure("pymol_output/" + pdb_code +
                                                   'angle_colored_axes.pdb')
                    res = freesasa.calc(pdbstruct)

                    asa_list = []
                    oldnum = -1
                    for i in range(res.nAtoms()):
                        resnum = pdbstruct.residueNumber(i)
                        if resnum == oldnum:
                            asa_list[-1] += res.atomArea(i)
                        else:
                            asa_list.append(res.atomArea(i))
                            oldnum = resnum

                    set_bfactor(pchain, asa_list)
                    if print_pdb:
                        self.save_pdb(structure, pdb_code + 'asa_colored.pdb')

                ################################# HSE #################################
                if HSE:
                    hse = pdb.HSExposure.HSExposureCB(structure[0])
                    [[a.set_bfactor(x[1][1]) for a in x[0]] for x in hse]

                    if print_pdb:
                        self.save_pdb(structure, pdb_code + 'hsea_colored.pdb')

                ############################### pickle ################################
                if HSE and SASA:
                    reslist = []
                    grslist = []
                    hse = []
                    for r in pchain:
                        reslist.append(r.id[1])
                        grslist.append(gdict[r.id[1]])
                        hse.append(r["CA"].get_bfactor())
                    with open('pymol_output/' + pdb_code + '_measures.pickle',
                              'wb') as handle:
                        pickle.dump(
                            (np.array(reslist), grslist, np.array(asa_list),
                             np.array(hse), angle, angle2, state_id), handle)

                #Angle.objects.bulk_create([Angle(residue=gdict[res.id[1]], angle=res["CA"].get_bfactor(), structure=reference) for res in pchain])

            except Exception as e:
                print("ERROR!!", pdb_code, e)
                failed.append(pdb_code)
                continue

        print(len(failed), "of", len(references), "failed:", failed)
Beispiel #32
0
    def _get_docking_model(self, molecule, restraints):
        atoms = molecule.atoms
        parsed_restraints = {}
        # Assign properties to atoms
        for atom_index, atom in enumerate(atoms):
            res_id = "%s.%s.%s" % (atom.chain_id, atom.residue_name, str(atom.residue_number))
            if restraints and res_id in restraints:
                try:
                    parsed_restraints[res_id].append(atom_index)
                except:
                    parsed_restraints[res_id] = [atom_index]
            res_name = atom.residue_name
            atom_name = atom.name
            if res_name == "HIS":
                res_name = 'HID'
            if atom_name in amber.translate:
                atom_name = amber.translate[atom.name]
            atom_id = "%s-%s" % (res_name, atom_name)
            atom.amber_type = amber.amber_types[atom_id]
            atom.charge = amber.charges[atom_id]
            atom.mass = amber.masses[atom.amber_type]
            atom.vdw_energy = vdw.vdw_energy[atom.amber_type]
            atom.vdw_radius = vdw.vdw_radii[atom.amber_type]

        # Prepare common model information
        elec_charges = np.array([atom.charge for atom in atoms])
        vdw_energies = np.array([atom.vdw_energy for atom in atoms])
        vdw_radii = np.array([atom.vdw_radius for atom in atoms])
        coordinates = molecule.copy_coordinates()
        des_energy, des_radii = solvation.get_solvation(molecule)

        # Calculate desolvation reference energy
        log.info('Calculating reference SASA...')
        structure = Structure()
        des_radii_no_H = []
        for i, atom in enumerate(atoms):
            if not atom.is_hydrogen():
                structure.addAtom(atom.name, atom.residue_name, atom.residue_number, atom.chain_id,
                                  atom.x, atom.y, atom.z)
                des_radii_no_H.append(des_radii[i])
        structure.setRadii(list(des_radii_no_H))
        sasa_result = freesasa.calc(structure)
        sasa = []
        j = 0
        for i, atom in enumerate(atoms):
            if not atom.is_hydrogen():
                sasa.append(sasa_result.atomArea(j))
                j += 1
            else:
                sasa.append(-1.0)
        sasa = np.array(sasa)
        hydrogens = np.array([0 if atom.is_hydrogen() else 1 for atom in atoms])
        log.info('Done.')

        reference_points = ModelAdapter.load_reference_points(molecule)
        try:
            return CPyDockModel(atoms, coordinates, parsed_restraints, elec_charges, vdw_energies, vdw_radii, des_energy, des_radii,
                                sasa, hydrogens, reference_points=reference_points, n_modes=molecule.n_modes.copy())
        except AttributeError:
            return CPyDockModel(atoms, coordinates, parsed_restraints, elec_charges, vdw_energies, vdw_radii, des_energy, des_radii,
                                sasa, hydrogens, reference_points=reference_points)