Ejemplo n.º 1
0
def sql_cg(pdbname):
    if len(pdbname) == 4: #PDBID
        prody.fetchPDB(pdbname, compressed=False)
        pdbname+=".pdb"
    mol = prody.parsePDB(pdbname)
    na=mol.numAtoms()
    print ("num atoms is ",na)
    LOD_NATOMS = (na*LOD_LEVELES).astype(int)
    filename = pdb_directory+pdbname
    binary = sbl_binary_U
    sblname="sbl-ballcovor-pdb-U__inner_approximation.txt"
    vmdscript="*.vmd"
    for i in range(4):
        #os.system(binary+sbl_arg_file+filename+sbl_arg_nballs+str(LOD_NATOMS[i])+sbl_arg_outer+sbl_arg_interpolated+sbl_arg_verbose)
        os.system(binary+sbl_arg_file+filename+sbl_arg_nballs+str(LOD_NATOMS[i])+sbl_arg_verbose)
        data=np.loadtxt(sblname)
        #sqrt the radius and save
        if len(data)==0:
            print ("********************************")
            print (pdbname)
            print ("********************************")
            continue
        data[:,3] = np.sqrt(data[:,3])
        ofilename = pdbname+"_"+str(LOD_LEVELES[i])+'.txt'
        np.savetxt(ofilename, data, fmt='%g')
        print ("success ", ofilename)
        os.system("rm "+sblname)
        os.system("mkdir "+pdbname+"_sph")
        os.system("mv *.vmd "+pdbname+"_sph/.")
Ejemplo n.º 2
0
def get_PDB(args):
    """Gets PDB file or downloads PDF file from rcsb.org"""
    if args.PDB_file:
        my_PDB = args.PDB_file
    else:
        my_PDB = prody.fetchPDB(args.PDB_fetch, compressed=False)
        print()
    return my_PDB
Ejemplo n.º 3
0
def voxelize(pdbname,spacing=10.0,padding=0.0):
    #spacing=1/5.0
    #padding=5.0
    if len(pdbname) == 4: #PDBID
        prody.fetchPDB(pdbname, compressed=False)
        pdbname+=".pdb"
    mol = prody.parsePDB(pdbname)
    na=mol.numAtoms()
    c=mol.getCoords()
    center_c = c - np.average(c,0)
    bot=np.min(center_c,0)+padding
    top=np.max(center_c,0)+padding
    ijk = (1/spacing * (center_c)).astype(int)
    n=np.max(ijk)
    ijku=unique_rows(ijk)
    out_coords = (ijku*spacing)
    np.savetxt(pdbname+".vox", out_coords, delimiter=' ', fmt='%f')
    print ("success ", pdbname+".vox")
Ejemplo n.º 4
0
def voxelize_avg(pdbname,spacing=20.0,padding=20.0):
    #spacing=1/5.0
    #padding=5.0
    if len(pdbname) == 4: #PDBID
        prody.fetchPDB(pdbname, compressed=False)
        pdbname+=".pdb"
    mol = prody.parsePDB(pdbname)
    na=mol.numAtoms()
    c=mol.getCoords()
    center_c = c - np.average(c,0)
    bot=np.min(center_c,0)+padding
    top=np.max(center_c,0)+padding
    #new_center = top-bot

    ind = np.array((1/spacing*(center_c - bot)), 'int')
    maxi = np.max(ind, 0)
    mask = np.zeros( maxi+1 )
    #ind1 = [tuple(x.tolist()) for x in ind]
    mask[ [ind[:,0],ind[:,1],ind[:,2]] ] = 1

    ijk = (1/spacing * (center_c-bot)).astype(int)
    n=np.max(ijk)
    ijku=unique_rows(ijk)
    out_coords = (ijku*spacing)+bot
    np.savetxt(pdbname+".xyz", out_coords, delimiter=' ', fmt='%f')
    avg=[ijk[0].tolist(),]
    coords=[[center_c[0]],]
    for i in range(1,len(ijk)):
        found =False
        for j in range(len(avg)) :
            if ijk[i].tolist()==avg[j] :
                coords[j].append(center_c[i].tolist())
                found = True
                break
        if not found :
            avg.append(ijk[i].tolist())
            coords.append([center_c[i].tolist()])

    cavg=[]
    for c in coords:
       cavg.append(np.average(c,0).tolist())

    np.savetxt(pdbname+"_avg.xyz", cavg, delimiter=' ', fmt='%f')
    print ("success ", pdbname+".xyz")
Ejemplo n.º 5
0
def prody_fetch(opt):
    """Fetch PDB files from PDB FTP server."""
    
    import prody
    pdblist = opt.pdb
    listfn = opt.listfn
    if listfn:
        if os.path.isfile(listfn):
            inp = prody.openFile(listfn)
            for line in inp:
                line = line.strip()
                for s in line.split(','):
                    for pdb in s.split():
                        if len(pdb) == 4: 
                            pdblist.append(pdb)
            inp.close()
        else:    
            opt.subparser.error("No such file: '{0:s}'".format(listfn))
    prody.fetchPDB(pdblist, opt.folder, compressed=opt.gzip, copy=True)
Ejemplo n.º 6
0
def load_or_parse_residues(
        pdb1,
        chain_id1,
        repo_path=REPO_PATH,
        allowed_solving_methods=['SOLUTION NMR', 'X-RAY DIFFRACTION']):
    if tostr(pdb1, chain_id1) in COORDS:
        residues = load_residues(pdb1, chain_id1)
        return residues
    src_path = os.path.join(repo_path, '%s.pdb.gz' % pdb1)
    if not os.path.exists(src_path):
        fetchPDB(pdb1, folder=os.path.dirname(src_path))
    if not os.path.exists(src_path):
        return None
    st1, h1 = parsePDB(src_path, header=True, chain=chain_id1)
    if (st1 is None) or (h1 is None):
        return None
    if h1['experiment'] not in allowed_solving_methods:
        return None
    residues = store_residues(st1, pdb1, chain_id1)
    return residues
Ejemplo n.º 7
0
def prodyLoad(pdbname,biomt=False):
    #biomt?
    if len(pdbname) == 4: #PDBID
        prody.fetchPDB(pdbname.lower(), compressed=False)
        pdbname = pdbname.lower() + ".pdb"
    else :
        if pdbname[-4:] != ".pdb":
            pdbname += ".pdb"

    if biomt:
        mol,header = prody.parsePDB(pdbname, header=True)        
        if len(header['biomoltrans']):
            mol = prody.buildBiomolecules( header, mol)
    else:
        mol = prody.parsePDB(pdbname, header=False)
        
    na=mol.numAtoms()
    c=mol.getCoords()
    center_c = c - np.average(c,0)
    np.savetxt(pdb_directory+os.sep+pdbname+"_cl.txt",
               center_c, fmt='%f')
    return center_c
Ejemplo n.º 8
0
def doKmeans(pdbname,spacing=20.0,padding=20.0, percentile=0.001):
    if len(pdbname) == 4: #PDBID
        prody.fetchPDB(pdbname.lower(), compressed=False)
        pdbname = pdbname.lower() + ".pdb"
    else :
        if pdbname[-4:] != ".pdb":
            pdbname += ".pdb"
    #mol,header = prody.parsePDB(pdbname, header=True)
    mol = prody.parsePDB(pdbname, header=False)
    #if len(header['biomoltrans']):
    #    mol = prody.buildBiomolecules( header, mol)
    na=mol.numAtoms()

    c=mol.getCoords()
    center_c = c - np.average(c,0)
    #print int(round(len(center_c)*0.008))
    ncluster = int(round(len(center_c)*percentile))
    if mol.numAtoms('ca') == mol.numAtoms():
        ncluster *= 5
    if ncluster == 0:
        ncluster = int(na/10.0)
    if ncluster <= 6:
        ncluster *= 2
    if ncluster == 0:
        print (pdbname,"no cluster")
        return
    print (ncluster)

    k_means = KMeans(init='k-means++', n_clusters=ncluster, n_init=10)
    k_means.fit(center_c)
    k_means_labels = k_means.labels_
    k_means_cluster_centers = k_means.cluster_centers_
    k_means_labels_unique = np.unique(k_means_labels)
    np.savetxt(pdbname+"_kmeans2.txt",
               k_means.cluster_centers_, fmt='%f')
    np.savetxt(pdbname+"_cl.txt",
               center_c, fmt='%f')
Ejemplo n.º 9
0
def prody_fetch(*pdb, **kwargs):
    """Fetch PDB files from PDB FTP server.

    :arg pdbs: PDB identifier(s) or filename(s)

    :arg dir: target directory for saving PDB file(s), default is ``'.'``

    :arg gzip: gzip fetched files or not, default is ``False``"""

    import prody

    pdblist = pdb
    if len(pdblist) == 1 and os.path.isfile(pdblist[0]):
        from prody.utilities import openFile
        with openFile(pdblist[0]) as inp:
            for item in inp.read().strip().split():
                for pdb in item.split(','):
                    if len(pdb) == 4 and pdb.isalnum():
                        pdblist.append(pdb)

    prody.fetchPDB(*pdblist,
                   folder=kwargs.get('folder', '.'),
                   compressed=kwargs.get('gzip', False),
                   copy=True)
Ejemplo n.º 10
0
def parse_pdb_files(input_pdb):
    if type(input_pdb) == str or type(input_pdb) == PosixPath:
        input_pdb = Path(input_pdb)
        if input_pdb.is_dir():
            pdb_files = list(input_pdb.glob("*.pdb"))
        elif input_pdb.is_file():
            with open(input_pdb) as f:
                pdb_files = f.read().strip().split("\n")
        else:
            pdb_files = str(input_pdb).split("\n")
    else:
        pdb_files = list(input_pdb)
        if not Path(pdb_files[0]).is_file():
            pdb_files = [pd.fetchPDB(pdb_name) for pdb_name in pdb_files]
    return pdb_files
Ejemplo n.º 11
0
def prody_fetch(*pdb, **kwargs):
    """Fetch PDB files from PDB FTP server.

    :arg pdbs: PDB identifier(s) or filename(s)

    :arg dir: target directory for saving PDB file(s), default is ``'.'``

    :arg gzip: gzip fetched files or not, default is ``False``"""

    import prody


    pdblist = pdb
    if len(pdblist) == 1 and os.path.isfile(pdblist[0]):
        from prody.utilities import openFile
        with openFile(pdblist[0]) as inp:
            for item in inp.read().strip().split():
                for pdb in item.split(','):
                    if len(pdb) == 4 and pdb.isalnum():
                        pdblist.append(pdb)

    prody.fetchPDB(*pdblist, folder=kwargs.get('folder', '.'),
                   compressed=kwargs.get('gzip', False),
                   copy=True)
def getPDB(pdbId):
    """
    Downloads a pdb from the Protein Data Bank (if necessary) and removes all models so that it only has one
    model.

    @param pdbId: A 4 letter pdb id

    @return: The downloaded pdb data structure.
    """
    # Download pdb
    path = prody.fetchPDB(pdbId, compressed = False)
    # Get pdb data structure
    pdb = prody.parsePDB(path)
    number_of_models = pdb.select("protein").numCoordsets()
    # Delete all coordsets but coordset 0
    [pdb.delCoordset(1) for i in range(1,number_of_models)]
    return pdb
Ejemplo n.º 13
0
def prody_blast(opt):
    """Blast search PDB based on command line arguments."""
    
    import prody
    LOGGER = prody.LOGGER
    seq = opt.seq
    title = None
    if os.path.isfile(seq):
        title, seq = readFirstSequenceFasta(seq)
        LOGGER.info("First sequence ({0:s}) is parsed from {1:s}."
                    .format(title, repr(seq)))
    if not seq.isalpha() or not seq.isupper():
        opt.subparser.error("{0:s} is not a valid sequence or a file"
                            .format(repr(seq)))
        
    folder, identity, coverage = opt.folder, opt.identity, opt.coverage
    if not 0 < identity < 100: 
        opt.subparser.error('identity must be between 0 and 100')
    if not 0 < coverage < 100:
        opt.subparser.error('overlap must be between 0 and 100')
    
    blast_results = prody.blastPDB(seq)
    hits = blast_results.getHits(percent_identity=identity, 
                                 percent_coverage=coverage)
    
    #sort hits by decreasing percent identity
    hits2 = []
    for pdb in hits:
        hits2.append( (-hits[pdb]['percent_identity'], pdb) )
    hits2.sort()
    
    for identity, pdb in hits2:
        chain = hits[pdb]['chain_id']
        percent_identity = hits[pdb]['percent_identity']
        title = hits[pdb]['title']
        print(pdb + ' ' + chain + ' ' + ('%5.1f%%' % (percent_identity)) + 
              ' ' + title)
    
    # download hits if --folder is given
    if opt.folder:
        LOGGER.info('Downloading hits to ' + opt.folder)
        pdblist = [ pdb for identity, pdb in hits2 ]
        pdblist2 = prody.fetchPDB(pdblist, opt.folder, 
                                  compressed=opt.gzip, copy=True)
def get_pdb(pdb_id, selection):
    """
    Downloads a pdb from the Protein Data Bank (if necessary) and removes all models so that it only has one
    model.

    :param pdb_id: A 4 letter pdb id

    :return: The downloaded pdb prody data structure and the path to the downloaded file.
    """
    # Download pdb
    path = prody.fetchPDB(pdb_id, compressed=False)
    
    # Get pdb data structure
    pdb = prody.parsePDB(path)
    pdb = pdb.select(selection).copy()
    number_of_models = pdb.numCoordsets()
    
    # Delete all coordsets but coordset 0
    [pdb.delCoordset(1) for _ in range(1, number_of_models)]
    return pdb, path
Ejemplo n.º 15
0
def get_pdb(pdb_id, selection):
    """
    Downloads a pdb from the Protein Data Bank (if necessary) and removes all models so that it only has one
    model.

    :param pdb_id: A 4 letter pdb id

    :return: The downloaded pdb prody data structure and the path to the downloaded file.
    """
    # Download pdb
    path = prody.fetchPDB(pdb_id, compressed=False)

    # Get pdb data structure
    pdb = prody.parsePDB(path)
    pdb = pdb.select(selection).copy()
    number_of_models = pdb.numCoordsets()

    # Delete all coordsets but coordset 0
    [pdb.delCoordset(1) for _ in range(1, number_of_models)]
    return pdb, path
Ejemplo n.º 16
0
	def compare(): 
		
		###get PDB files from databank that are associated with each protein for later use
		##change directory
		
		#create a folder that contains all pdb files from the PDB if it does not exist
		prody.pathPDBFolder(wd + '/challengedata/PDBfiles')
			
		#list of proteins that need to be downloaded
		weeks = []
		for(_, dirnames, _) in os.walk(wd + '/challengedata'): 
			if (dirnames=='latest.txt' or dirnames=='answers' or dirnames =='rdkit-scripts'):
				pass
			elif (dirnames not in weeks): 
				weeks.extend(dirnames)
		proteins = [x for x in weeks if 'celpp' not in x]
		
		#download pdb using prody 
		for x in proteins:
			if x=='rdkit-scripts' or x=='PDBfiles' or x=='answers': 
				pass
			else:
				protein = prody.fetchPDB(x)
Ejemplo n.º 17
0
def worker(args):
    count, pdbid = args
    os.makedirs(f'{datadir}/{pdbid}', mode=0o755, exist_ok=True)
    os.chdir(f'{datadir}/{pdbid}')
    prody.fetchPDB(pdbid)
    return dict(count=count, pdbid=pdbid)
Ejemplo n.º 18
0
    def __init__(self, comb, pdb_acc_code, chain, **kwargs):
        """ :comb: arg: instance of cls Comb with attributes pdbchain_dict, ifg_selection_info
        :pdb_acc_code: type: str: 4 character pdb accession code
        :param kwargs: 
            path_to_pdb
            path_to_dssp 
        """
        #search for acc code in input_dir_pdb from comb object.
        assert isinstance(pdb_acc_code,
                          str), 'PDB accession code needs to be a string'
        pdb_file = [
            file.name for file in os.scandir(comb.input_dir_pdb)
            if pdb_acc_code in file.name
        ]
        try:
            if pdb_file:
                pdb_file = pdb_file[0]
                self.prody_pdb = pr.parsePDB(comb.input_dir_pdb + pdb_file,
                                             altloc='A',
                                             model=1)
            elif 'path_to_pdb' in kwargs:
                self.prody_pdb = pr.parsePDB(kwargs.get('path_to_pdb'),
                                             altloc='A',
                                             model=1)
            else:  # NEED TO UPDATE: note if going to fetch pdb, it should be sent through Reduce first...
                try:
                    os.mkdir(comb.input_dir_pdb + 'raw')
                    os.mkdir(comb.input_dir_pdb + 'reduce')
                except:
                    pass
                pr.fetchPDB(pdb_acc_code,
                            compressed=False,
                            folder=comb.input_dir_pdb + 'raw')
                os.system(comb.path_to_reduce + comb.reduce +
                          ' -FLIP -Quiet -DB ' + comb.path_to_reduce +
                          'reduce_wwPDB_het_dict.txt ' + comb.input_dir_pdb +
                          'raw/' + pdb_acc_code.lower() + '.pdb > ' +
                          comb.input_dir_pdb + 'reduce/' +
                          pdb_acc_code.lower() + 'H.pdb')
                self.prody_pdb = pr.parsePDB(comb.input_dir_pdb + 'reduce/' +
                                             pdb_acc_code.lower() + 'H.pdb',
                                             altloc='A',
                                             model=1)
        except NameError:
            raise NameError(
                'ParsePDB instance needs a pdb file path or a valid pdb accession code.'
            )

        self.pdb_acc_code = pdb_acc_code.lower()
        self.pdb_chain = chain
        if len(self.prody_pdb) == len(self.prody_pdb.select('icode _')) \
                and self.prody_pdb.select('protein and chain ' + self.pdb_chain) is not None:
            self.contacts = pr.Contacts(self.prody_pdb)
            self.set_bonds()

            if pdb_file:
                self.fs_struct = freesasa.Structure(comb.input_dir_pdb +
                                                    pdb_file)
            elif 'path_to_pdb' in kwargs:
                self.fs_struct = freesasa.Structure(kwargs.get('path_to_pdb'))
            else:
                path = comb.input_dir_pdb + 'reduce/'
                self.fs_struct = freesasa.Structure(path + next(
                    file.name for file in os.scandir(path)
                    if self.pdb_acc_code in file.name))

            self.fs_result = freesasa.calc(self.fs_struct)

            self.fs_result_cb_3A = self.freesasa_cb(probe_radius=3)
            self.fs_result_cb_4A = self.freesasa_cb(probe_radius=4)
            self.fs_result_cb_5A = self.freesasa_cb(probe_radius=5)
            self.prody_pdb_bb_cb_atom_ind = self.prody_pdb.select(
                'protein and (backbone or name CB) '
                'and not element H D').getIndices()

            dssp_file = [
                file.name for file in os.scandir(comb.input_dir_dssp)
                if pdb_acc_code in file.name
            ]
            if dssp_file:
                dssp_file = dssp_file[0]
                self.dssp = pr.parseDSSP(comb.input_dir_dssp + dssp_file,
                                         self.prody_pdb)
            elif 'path_to_dssp' in kwargs:
                self.dssp = pr.parseDSSP(kwargs.get('path_to_dssp'),
                                         self.prody_pdb)
            else:
                if pdb_file:
                    pr.execDSSP(comb.input_dir_pdb + pdb_file,
                                outputdir=comb.input_dir_dssp)
                elif 'path_to_pdb' in kwargs:
                    pr.execDSSP(kwargs.get('path_to_pdb'),
                                outputdir=comb.input_dir_dssp)
                else:
                    path = comb.input_dir_pdb + 'reduce/' + next(
                        file.name
                        for file in os.scandir(comb.input_dir_pdb + 'reduce')
                        if pdb_acc_code in file.name)
                    pr.execDSSP(path, outputdir=comb.input_dir_dssp)

                self.dssp = pr.parseDSSP(
                    comb.input_dir_dssp +
                    next(file.name for file in os.scandir(comb.input_dir_dssp)
                         if pdb_acc_code in file.name), self.prody_pdb)
            self.possible_ifgs = self.find_possible_ifgs(comb)
        else:
            self.possible_ifgs = None
        # valence and hydrogen bond data for vandermers and iFGs of ParsedPDB protein instance
        # iFG specific:
        self._ifg_pdb_info = []
        self._ifg_atom_density = []
        self._ifg_contact_water = []
        self._ifg_contact_ligand = []
        self._ifg_contact_metal = []
        # vdM specific:
        self._vdm_pdb_info = []
        self._vdm_sasa_info = []
        self._ifg_contact_vdm = []
        self._ifg_hbond_vdm = []
        self._ifg_hbond_water = []
        self._ifg_hbond_ligand = []
        self._ifg_ca_hbond_vdm = []
    temp[0].setChids('A')
    temp[1].setChids('A')
    # build chain
    for i in range(len(sequence) - 1):
        neighborAA = None
        if (isNeighborDependent):
            if (isRightNeighbor):
                if (i + 2 < len(sequence)):
                    neighborAA = sequence[i + 2]
            else:
                neighborAA = sequence[i]
        aa = sequence[i + 1]
        diamid = thedb.query(aa, neighborAA)
        chain = chainer.appendDiamid2Chain(chain, diamid, i + 2)
    chain = chain.select('not resnum 0').copy()
    chain = chain.select('not resnum ' + str(len(sequence) + 1)).copy()
    return chain


if __name__ == '__main__':
    prody.fetchPDB('1d3z')
    pdb = prody.parsePDB('1d3z.pdb.gz')
    sequence = pdb.select('name CA').getSequence()
    thedb = db.DB('samples/TDRD_R_TCBIG.json', 'data/diamides',
                  'samples/NDRD_R_TCBIG_pretty.json')
    structure = structure_builder(sequence, thedb, True, True)
    prody.writePDB('1d3z_test_ndrd_out.pdb', structure)
    print(structure)
    print(structure.getCoords())
    #print([aa.getResname() for aa in structure.iterResidues()])
Ejemplo n.º 20
0
def prody_blast(sequence, **kwargs):
    """Blast search PDB and download hits.

    :arg sequence: sequence or file in fasta format

    :arg identity: percent sequence identity for blast search, default is 90.0
    :type identity: float

    :arg overlap: percent sequence overlap between sequences, default is 90.0
    :type overlap: float

    :arg outdir: download uncompressed PDB files to given directory
    :type outdir: str

    :arg gzip: write compressed PDB file

    *Blast Parameters*

    :arg filename: a *filename* to save the results in XML format
    :type filename: str

    :arg hitlist_size: search parameters, default is 250
    :type hitlist_size: int

    :arg expect: search parameters, default is 1e-10
    :type expect: float

    :arg sleep: how long to wait to reconnect for results, default is 2
                sleep time is doubled when results are not ready.
    :type sleep: int

    :arg timeout: when to give up waiting for results. default is 30
    :type timeout: int"""

    import prody
    LOGGER = prody.LOGGER
    title = None
    if os.path.isfile(sequence):
        title, sequence = readFirstSequenceFasta(sequence)
        LOGGER.info("First sequence ({0}) is parsed from {1}.".format(
            title, repr(sequence)))
    if not sequence.isalpha() or not sequence.isupper():
        raise ValueError("{0} is not a valid sequence or a file".format(
            repr(sequence)))

    outdir = kwargs.get('outdir')
    identity, overlap = kwargs.get('identity', 90), kwargs.get('overlap', 90)
    if not 0 < identity < 100:
        raise ValueError('identity must be between 0 and 100')
    if not 0 < overlap < 100:
        raise ValueError('overlap must be between 0 and 100')

    filename = kwargs.get('filename', None)
    hitlist_size = kwargs.get('hitlist_size', 250)
    expect = kwargs.get('expect', 1e-10)
    sleep, timeout = kwargs.get('sleep', 2), kwargs.get('timeout', 30)

    blast_results = prody.blastPDB(sequence,
                                   filename=filename,
                                   hitlist_size=hitlist_size,
                                   expect=expect,
                                   sleep=sleep,
                                   timeout=timeout)

    if not blast_results.isSuccess:
        raise IOError('blast search timed out, please try again')

    hits = blast_results.getHits(percent_identity=identity,
                                 percent_overlap=overlap)

    #sort hits by decreasing percent identity
    hits2 = []
    for pdb in hits:
        hits2.append((-hits[pdb]['percent_identity'], pdb))
    hits2.sort()

    stdout = kwargs.get('stdout', False)

    if not stdout:
        finalHits = []
    else:
        from sys import stdout

    for identity, pdb in hits2:
        chain = hits[pdb]['chain_id']
        percent_identity = hits[pdb]['percent_identity']
        title = hits[pdb]['title']
        if stdout:
            stdout.write(pdb + ' ' + chain + ' ' +
                         ('%5.1f%%' % (percent_identity)) + ' ' + title)
        else:
            finalHits.append(
                (pdb, chain, ('%5.1f%%' % (percent_identity)), title))

    # download hits if --outdir is given
    if outdir:
        LOGGER.info('Downloading hits to ' + outdir)
        pdblist = [pdb for identity, pdb in hits2]
        pdblist2 = prody.fetchPDB(pdblist,
                                  outdir,
                                  compressed=kwargs.get('gzip'),
                                  copy=True)

    if not stdout:
        return finalHits
Ejemplo n.º 21
0
def prody_blast(sequence, **kwargs):
    """Blast search PDB and download hits.

    :arg sequence: sequence or file in fasta format

    :arg identity: percent sequence identity for blast search, default is 90.0
    :type identity: float

    :arg overlap: percent sequence overlap between sequences, default is 90.0
    :type overlap: float

    :arg outdir: download uncompressed PDB files to given directory
    :type outdir: str

    :arg gzip: write compressed PDB file

    *Blast Parameters*

    :arg filename: a *filename* to save the results in XML format
    :type filename: str

    :arg hitlist_size: search parameters, default is 250
    :type hitlist_size: int

    :arg expect: search parameters, default is 1e-10
    :type expect: float

    :arg sleep: how long to wait to reconnect for results, default is 2
                sleep time is doubled when results are not ready.
    :type sleep: int

    :arg timeout: when to give up waiting for results. default is 30
    :type timeout: int"""

    import prody
    LOGGER = prody.LOGGER
    title = None
    if os.path.isfile(sequence):
        title, sequence = readFirstSequenceFasta(sequence)
        LOGGER.info("First sequence ({0}) is parsed from {1}."
                    .format(title, repr(sequence)))
    if not sequence.isalpha() or not sequence.isupper():
        raise ValueError("{0} is not a valid sequence or a file"
                        .format(repr(sequence)))

    outdir = kwargs.get('outdir')
    identity, overlap = kwargs.get('identity', 90), kwargs.get('overlap', 90)
    if not 0 < identity < 100:
        raise ValueError('identity must be between 0 and 100')
    if not 0 < overlap < 100:
        raise ValueError('overlap must be between 0 and 100')



    filename = kwargs.get('filename', None)
    hitlist_size = kwargs.get('hitlist_size', 250)
    expect = kwargs.get('expect', 1e-10)
    sleep, timeout = kwargs.get('sleep', 2), kwargs.get('timeout', 30)

    blast_results = prody.blastPDB(sequence,filename=filename,
                                   hitlist_size=hitlist_size, expect=expect,
                                   sleep=sleep, timeout=timeout)

    if blast_results is None:
        raise IOError('blast search timed out, please try again')

    hits = blast_results.getHits(percent_identity=identity,
                                 percent_overlap=overlap)

    #sort hits by decreasing percent identity
    hits2 = []
    for pdb in hits:
        hits2.append( (-hits[pdb]['percent_identity'], pdb) )
    hits2.sort()

    stdout = kwargs.get('stdout', False)

    if not stdout:
        finalHits = []
    else:
        from sys import stdout

    for identity, pdb in hits2:
        chain = hits[pdb]['chain_id']
        percent_identity = hits[pdb]['percent_identity']
        title = hits[pdb]['title']
        if stdout:
            stdout.write(pdb + ' ' + chain + ' ' +
                         ('%5.1f%%' % (percent_identity)) + ' ' + title)
        else:
            finalHits.append((pdb, chain, ('%5.1f%%' % (percent_identity)),
                               title))


    # download hits if --output-dir is given
    if outdir:
        LOGGER.info('Downloading hits to ' + outdir)
        pdblist = [ pdb for identity, pdb in hits2 ]
        pdblist2 = prody.fetchPDB(pdblist, outdir,
                                  compressed=kwargs.get('gzip'), copy=True)

    if not stdout:
        return finalHits
Ejemplo n.º 22
0
    def __init__(self, comb, pdb_acc_code, chain, **kwargs):
        """ :comb: arg: instance of cls Comb with attributes pdbchain_dict, ifg_selection_info
        :pdb_acc_code: type: str: 4 character pdb accession code
        :param kwargs: 
            path_to_pdb
            path_to_dssp 
        """
        #search for acc code in input_dir_pdb from comb object.
        assert isinstance(pdb_acc_code,
                          str), 'PDB accession code needs to be a string'
        pdb_file = [
            file.name for file in os.scandir(comb.input_dir_pdb)
            if pdb_acc_code in file.name
        ]
        try:
            if pdb_file:
                pdb_file = pdb_file[0]
                self.prody_pdb = pr.parsePDB(comb.input_dir_pdb + pdb_file,
                                             altloc='A',
                                             model=1)
            elif 'path_to_pdb' in kwargs:
                self.prody_pdb = pr.parsePDB(kwargs.get('path_to_pdb'),
                                             altloc='A',
                                             model=1)
            else:
                try:
                    os.mkdir(comb.input_dir_pdb + 'raw')
                    os.mkdir(comb.input_dir_pdb + 'reduce')
                except:
                    pass
                pr.fetchPDB(pdb_acc_code,
                            compressed=False,
                            folder=comb.input_dir_pdb + 'raw')
                os.system(comb.path_to_reduce + comb.reduce +
                          ' -FLIP -Quiet -DB ' + comb.path_to_reduce +
                          'reduce_wwPDB_het_dict.txt ' + comb.input_dir_pdb +
                          'raw/' + pdb_acc_code.lower() + '.pdb > ' +
                          comb.input_dir_pdb + 'reduce/' +
                          pdb_acc_code.lower() + 'H.pdb')
                self.prody_pdb = pr.parsePDB(comb.input_dir_pdb + 'reduce/' +
                                             pdb_acc_code.lower() + 'H.pdb',
                                             altloc='A',
                                             model=1)
        except NameError:
            raise NameError(
                'ParsePDB instance needs a pdb file path or a valid pdb accession code.'
            )

        self.pdb_acc_code = pdb_acc_code.lower()
        self.pdb_chain = chain
        if len(self.prody_pdb) == len(self.prody_pdb.select('icode _')) \
                and self.prody_pdb.select('protein and chain ' + self.pdb_chain) is not None:
            self.contacts = pr.Contacts(self.prody_pdb)
            self.set_bonds()

            if pdb_file:
                self.fs_struct = freesasa.Structure(comb.input_dir_pdb +
                                                    pdb_file)
            elif 'path_to_pdb' in kwargs:
                self.fs_struct = freesasa.Structure(kwargs.get('path_to_pdb'))
            else:
                path = comb.input_dir_pdb + 'reduce/'
                self.fs_struct = freesasa.Structure(path + next(
                    file.name for file in os.scandir(path)
                    if self.pdb_acc_code in file.name))

            self.fs_result = freesasa.calc(self.fs_struct)

            self.fs_result_cb_3A = self.freesasa_cb(probe_radius=3)
            self.fs_result_cb_4A = self.freesasa_cb(probe_radius=4)
            self.fs_result_cb_5A = self.freesasa_cb(probe_radius=5)
            self.prody_pdb_bb_cb_atom_ind = self.prody_pdb.select(
                'protein and (backbone or name CB) '
                'and not element H D').getIndices()

            dssp_file = [
                file.name for file in os.scandir(comb.input_dir_dssp)
                if pdb_acc_code in file.name
            ]
            if dssp_file:
                dssp_file = dssp_file[0]
                self.parse_dssp(dssp_file, comb)

            if comb.query_path:
                self.possible_ifgs = self.find_possible_ifgs_rmsd(
                    comb, rmsd_threshold=comb.rmsd_threshold)
            else:
                self.possible_ifgs = self.find_possible_ifgs(comb)
        else:
            self.possible_ifgs = None

        self.alphahull = self.set_alphahull()
        self.segnames = sorted(np.unique(self.prody_pdb.getSegnames()))
        self._ifg_pdb_info = []
        self._ifg_atom_density = []
        self._ifg_contact_water = []
        self._ifg_contact_ligand = []
        self._ifg_contact_metal = []
        # vdM specific:
        self._vdm_pdb_info = []
        self._vdm_sasa_info = []
        self._ifg_contact_vdm = []
        self._ifg_hbond_vdm = []
        self._ifg_hbond_water = []
        self._ifg_hbond_ligand = []
        self._ifg_ca_hbond_vdm = []
Ejemplo n.º 23
0
def fetch_pdb(data=csv):
    df = pd.read_csv(data, sep='\t')
    PDB = list(set([i[:4] for i in df['IDs'].values]))
    fetched = pdy.fetchPDB(PDB)
    return fetched
Ejemplo n.º 24
0
def modelLoops(pdbid, chids, alnfile='temp.ali'):
    from modeller import environ, model, alignment
    from modeller.automodel import loopmodel
    import os as os
    import tempfile as tempfile
    import shutil as shutil
    import prody as prody
    import numpy as np

    prevdir = os.getcwd()
    pdb = []
    with tempfile.TemporaryDirectory() as tmpdir:
        os.chdir(os.path.expanduser(tmpdir))

        prody.fetchPDB(pdbid)
        e = environ()
        for chid in chids:
            knowns = pdbid+'_'+chid
            sequence = pdbid+'_'+chid+'_full'

            try:    # Try to model structure
                aln = alignment(e)
                m = model(e, file=pdbid, model_segment=('FIRST:'+chid, 'LAST:'+chid))
                aln.append_model(m, atom_files=pdbid, align_codes=knowns)
                aln.append_sequence(getSeqres(pdbid, chid)[0])
                aln[-1].code = sequence

                aln.malign()
                aln.write(file=alnfile, alignment_format='PIR')

                a = loopmodel(e, alnfile=alnfile, knowns=knowns, sequence=sequence)
                a.make()
                pdbfile = a.outputs[0]['name']

                h = model(e, file=pdbfile)
                aln = alignment(e)
                aln.append_model(m, atom_files=pdbid, align_codes=knowns)
                aln.append_model(h, atom_files=pdbfile, align_codes=sequence)
                h.res_num_from(m, aln)  # Restore old residue numbering and chain indexing
                h.write(file=pdbfile)

                if not pdb:
                    pdb = prody.parsePDB(pdbfile)
                else:
                    pdb = pdb + prody.parsePDB(pdbfile)

            except: # If it fails, return original PDB file. Likely the original file has no gaps (i.e. NOT EFFICIENT).
                print 'PDB %s chain %s could not be modelled' %(pdbid, chid)
                ref = prody.parsePDB(pdbid)
                sel = 'chain %s' %chid
                atom = ref.select(sel)
                reffile = knowns+'.pdb'
                prody.writePDB(reffile, atom)

                if not pdb:
                    pdb = prody.parsePDB(reffile)
                else:
                    pdb = pdb + prody.parsePDB(reffile)

    os.chdir(prevdir)
    return pdb
Ejemplo n.º 25
0
    def align():

        global wd
        ans = wd + '/challengedata/answers'
        if os.path.isdir(
                ans) == False:  #if the answers directory isnt formed make it
            os.mkdir(wd + '/challengedata/answers')
        rddir = wd + '/challengedata/rdkit-scripts'
        if os.path.isdir(rddir) == False:
            a = 'git clone https://github.com/dkoes/rdkit-scripts'
            os.system(a)
        data = os.listdir(wd + '/challengedata')
        for x in (data):  #for each weeks data
            if x == "readme.txt" or x == "latest.txt" or x == "answers" or x == "rdkit-scripts" or x == 'PDBfiles' or x == 'visual.txt':
                pass
            else:
                toDir = wd + '/challengedata/answers/' + x
                if os.path.isdir(
                        toDir
                ) == False:  #if the path to answers dir doesnt exist
                    os.mkdir(toDir)  #make directory
                dock = os.listdir(wd + '/challengedata/' + x)
                for y in (dock):
                    a = str(os.getcwd() + '/answers/' + x + '/' + y +
                            '/lmcss_docked.sdf')
                    if y == 'readme.txt' or y == 'new_release_structure_sequence_canonical.tsv' or y == 'new_release_structure_nonpolymer.tsv' or y == 'new_release_crystallization_pH.tsv' or y == 'new_release_structure_sequence.tsv':
                        pass
                    elif (os.path.isfile(a) == True):
                        pass
                    else:
                        input = os.listdir(wd + '/challengedata/' + x + '/' +
                                           y)
                        for z in (input):
                            if z.startswith("LMCSS") and z.endswith(".pdb"):
                                if (z.endswith("lig.pdb")):
                                    pass
                                else:
                                    id = z.strip('.pdb')

                                    sts = str("grep ATOM " + z +
                                              " > lmcss_rec.pdb"
                                              )  #creates receptor .pdb file
                                    cd = wd + '/challengedata'
                                    os.chdir(
                                        cd + '/' + x + '/' +
                                        y)  #change directory to week/ligand
                                    os.system(
                                        sts
                                    )  #runs and creates receptor .pbd file
                                    os.chdir(cd)  #back to challenge directory
                                    input = os.listdir(
                                        cd + '/' + x + '/' + y
                                    )  #lists files inside ligand in certain week
                                    for z in (input):
                                        if z.endswith(
                                                ".smi"
                                        ):  # changes .smi -> lig.sdf
                                            cd = str(os.getcwd())
                                            sts = str(" " + cd + '/' + x +
                                                      '/' + y + '/' + z +
                                                      " lig.sdf --maxconfs 1")
                                            os.chdir(cd + '/' + x + '/' + y)
                                            os.system(
                                                cd +
                                                '/rdkit-scripts/rdconf.py' +
                                                sts)
                                            os.chdir(cd)

                                    for z in (input):  # runs smina
                                        if z.endswith("lig.pdb"):
                                            sts = str(
                                                "smina -r lmcss_rec.pdb -l lig.sdf --autobox_ligand "
                                                + z + " -o " + id +
                                                "_docked.sdf")
                                            cd = str(
                                                os.getcwd())  #lignad directory
                                            os.chdir(cd + '/' + x + '/' + y)
                                            #os.system(sts)

                                            sts = str(
                                                "smina -r lmcss_rec.pdb -l lig.sdf --autobox_ligand "
                                                + z + " -o lmcss_docked.sdf")
                                            cd = str(
                                                os.getcwd())  #lignad directory
                                            os.chdir(cd + '/' + x + '/' + y)
                                            os.system(sts)
                                            os.chdir(cd)

                                    cur = str(os.getcwd() + '/answers/' + x +
                                              '/' + y)
                                    if (os.path.isdir(cur) == True):
                                        os.chdir(cd + '/' + x + '/' + y)
                                        os.getcwd()  ##
                                        input = os.listdir(cd + '/' + x + '/' +
                                                           y)

                                        for i in (input):
                                            if i.endswith(
                                                    ".txt"
                                            ) and i != "center.txt" and i != "visual.txt":
                                                f = open(i)
                                                lines = f.readlines()
                                                ligand = lines[2].strip(
                                                    'ligand, ')
                                                ligand = ligand.replace(
                                                    '\n', '')
                                                ligand = str(ligand)
                                                #gets the ligand from txt file
                                            if i.endswith("lig.pdb"):
                                                #see if pdb exists
                                                prody.fetchPDB(y)
                                                proteinPDB = prody.parsePDB(y)
                                                ourPDB = prody.parsePDB(
                                                    'lmcss_rec.pdb')
                                                a, b, seqid, overlap = prody.matchChains(
                                                    proteinPDB, ourPDB)[0]
                                                b, protein_sp = prody.superpose(
                                                    b, a, weights=None)
                                                b.select(ligand +
                                                         '_ligand.pdb')
                                                sts = str("obrms -f " + i +
                                                          ' ' + id +
                                                          "_docked.sdf")
                                                #run obrms
                                                # parse results and output to the visualization txt file
                                                os.system(sts)
                                                f = open('visual.txt', 'ab+')
                                                f.write(x + '	smina	' + y +
                                                        '\n')
                                                f.close
                                                curdir = str(cd + '/' + x +
                                                             '/' + y + '/' +
                                                             id +
                                                             '_docked.sdf')
                                        print(input)  ##
                                        for i in (input):
                                            if i.endswith("lig.pdb"):
                                                #see if pdb exists
                                                protein = prody.fetchPDB(y)
                                                #NEED NUMPY ARRAY
                                                prody.writeArray(
                                                    'lmcss_docked_array.sdf',
                                                    array)
                                                prody.superpose(
                                                    'lmcss_docked.sdf',
                                                    protein,
                                                    weights=None)
                                                sts = str("obrms -f " + i +
                                                          " lmcss_docked.sdf")
                                                #run obrms
                                                # parse results and output to the visualization txt file
                                                os.system(sts)
                                                os.chdir(wd +
                                                         '/challengedata/')
                                                f = open('visual.txt', 'ab+')
                                                f.write(x + '	smina	' + y +
                                                        '\n')
                                                f.close
                                                curdir = str(
                                                    cd + '/' + x + '/' + y +
                                                    '/lmcss_docked.sdf')
                                                todir = str(cd + '/answers/' +
                                                            x + '/' + y + '/')
                                                shutil.copy(curdir, todir)
                                                print(curdir)
                                                break
                                        os.chdir(wd)
                                    else:
                                        os.mkdir(cur)
                                        os.chdir(cd + '/' + x + '/' + y)
                                        input = os.listdir(cd + '/' + x + '/' +
                                                           y)
                                        for i in (input):
                                            if i.endswith(
                                                    ".txt"
                                            ) and i != "center.txt" and i != "visual.txt":
                                                f = open(i)
                                                lines = f.readlines()
                                                ligand = lines[2].strip(
                                                    "ligand, ")
                                                ligand = ligand.replace(
                                                    '\n', '')
                                                ligand = str(ligand)
                                                #gets ligand from txt file

                                            if i.endswith("lig.pdb"):
                                                prody.fetchPDB(y)
                                                proteinPDB = prody.parsePDB(y)
                                                ourPDB = prody.parsePDB(
                                                    'lmcss_rec.pdb')
                                                prody.matchChains(
                                                    proteinPDB, ourPDB)
                                                protein_sp = prody.superpose(
                                                    ourPDB,
                                                    proteinPDB,
                                                    weights=None)
                                                protein_sp.select(
                                                    ligand + '_ligand.pdb')
                                                sts = str("obrms -f " + i +
                                                          ' ' + id +
                                                          "_docked.sdf")
                                                os.system(sts)
                                                f = open('visual.txt', 'ab+')
                                                f.write(x + '	smina	' + y +
                                                        '\n')
                                                f.close
                                                curdir = str(cd + '/' + x +
                                                             '/' + y + '/' +
                                                             id +
                                                             '_docked.sdf')
                                            if i.endswith("lig.pdb"):
                                                protein = prody.fetchPDB(y)
                                                prody.writeArray(
                                                    'lmcss_docked_array.sdf',
                                                    array)
                                                prody.superpose(
                                                    'lmcss_docked.sdf',
                                                    protein,
                                                    weights=None)

                                                sts = str("obrms -f " + i +
                                                          " lmcss_docked.sdf")
                                                os.system(sts)
                                                os.chdir(wd +
                                                         '/challengedata/')
                                                f = open('visual.txt', 'ab+')
                                                f.write(x + '	smina	' + y +
                                                        '\n')
                                                f.close
                                                curdir = str(
                                                    cd + '/' + x + '/' + y +
                                                    '/lmcss_docked.sdf')
                                                todir = str(cd + '/answers/' +
                                                            x + '/' + y + '/')
                                                shutil.copy(curdir, todir)
                                                print(curdir)
                                                break
                                os.chdir(wd)
Ejemplo n.º 26
0
    def align():

        global wd
        ans = wd + '/challengedata/answers'
        if os.path.isdir(
                ans) == False:  #if the answers directory isnt formed make it
            os.mkdir(wd + '/challengedata/answers')
        rddir = wd + '/challengedata/rdkit-scripts'
        if os.path.isdir(rddir) == False:
            a = 'git clone https://github.com/dkoes/rdkit-scripts'
            os.system(a)
        data = os.listdir(wd + '/challengedata')
        for x in (data):  #for each weeks data
            if x == "readme.txt" or x == "latest.txt" or x == "answers" or x == "rdkit-scripts" or x == 'PDBfiles' or x == 'visual.txt':
                pass
            else:
                toDir = wd + '/challengedata/answers/' + x
                if os.path.isdir(
                        toDir
                ) == False:  #if the path to answers dir doesnt exist
                    os.mkdir(toDir)  #make directory
                dock = os.listdir(wd + '/challengedata/' + x)
                for y in (dock):
                    a = str(os.getcwd() + '/answers/' + x + '/' + y +
                            '/lmcss_docked.sdf')
                    if y == 'readme.txt' or y == 'new_release_structure_sequence_canonical.tsv' or y == 'new_release_structure_nonpolymer.tsv' or y == 'new_release_crystallization_pH.tsv' or y == 'new_release_structure_sequence.tsv':
                        pass
                    elif (os.path.isfile(a) == True):
                        pass
                    else:
                        input = os.listdir(wd + '/challengedata/' + x + '/' +
                                           y)
                        for z in (input):
                            if z.startswith("LMCSS") and z.endswith(".pdb"):
                                if (z.endswith("lig.pdb")):
                                    pass
                                else:
                                    sts = str("grep ATOM " + z +
                                              " > lmcss_rec.pdb")
                                    cd = wd + '/challengedata'
                                    os.chdir(cd + '/' + x + '/' + y)
                                    os.system(sts)
                                    os.chdir(cd)
                                    input = os.listdir(cd + '/' + x + '/' + y)
                                    for z in (input):
                                        if z.endswith(".smi"):
                                            cd = str(os.getcwd())
                                            sts = str(" " + cd + '/' + x +
                                                      '/' + y + '/' + z +
                                                      " lig.sdf --maxconfs 1")
                                            os.chdir(cd + '/' + x + '/' + y)
                                            os.system(
                                                cd +
                                                '/rdkit-scripts/rdconf.py' +
                                                sts)
                                            os.chdir(cd)
                                    input = os.listdir(cd + '/' + x + '/' + y)
                                    for z in (input):
                                        if z.endswith("lig.pdb"):
                                            sts = str(
                                                "smina -r lmcss_rec.pdb -l lig.sdf --autobox_ligand "
                                                + z + " -o lmcss_docked.sdf")
                                            cd = str(os.getcwd())
                                            os.chdir(cd + '/' + x + '/' + y)
                                            os.system(sts)
                                            os.chdir(cd)
                                    cur = str(os.getcwd() + '/answers/' + x +
                                              '/' + y)
                                    if (os.path.isdir(cur) == True):
                                        os.chdir(cd + '/' + x + '/' + y)
                                        input = os.listdir(cd + '/' + x + '/' +
                                                           y)
                                        for i in (input):
                                            if i.endswith("lig.pdb"):
                                                #see if pdb exists
                                                protein = prody.fetchPDB(y)
                                                f = open('sdsorted.txt', 'ab+')
                                                bind = subprocess.check_output(
                                                    'sdsorter lmcss_docked.sdf -print',
                                                    shell=True)
                                                f.write(bind)
                                                f.close()
                                                k = open('sdsorted.txt')
                                                lines = k.readlines()
                                                bind = lines[1].strip('1		')
                                                bind = bind.split("	", 1)
                                                print(bind[0])
                                                k.close()
                                                sts = str("obrms -f " + i +
                                                          " lmcss_docked.sdf")
                                                f = open('rmsd.txt', 'ab+')
                                                rm = subprocess.check_output(
                                                    sts, shell=True)
                                                f.write(rm)
                                                f.close()
                                                j = open('rmsd.txt')
                                                lines = j.readlines()
                                                top = lines[1].strip('RMSD : ')
                                                top = top.replace('\n', '')
                                                j.close()
                                                print top
                                                #run obrms
                                                # parse results and output to the visualization txt file
                                                #os.system(sts)
                                                f = open('visual.txt', 'ab+')
                                                f.write(x + '	smina	' + y +
                                                        '	' + top + '	' +
                                                        bind[0] + '\n')
                                                f.close
                                                os.chdir(wd +
                                                         '/challengedata/')
                                                print(x + '    ' + y)
                                                break
                                        os.chdir(wd)
                                    else:
                                        os.mkdir(cur)
                                        os.chdir(cd + '/' + x + '/' + y)
                                        input = os.listdir(cd + '/' + x + '/' +
                                                           y)
                                        for i in (input):
                                            if i.endswith("lig.pdb"):
                                                protein = prody.fetchPDB(y)
                                                f = open('sdsorted.txt', 'ab+')
                                                bind = subprocess.check_output(
                                                    'sdsorter lmcss_docked.sdf -print',
                                                    shell=True)
                                                f.write(bind)
                                                f.close()
                                                k = open('sdsorted.txt')
                                                lines = k.readlines()
                                                bind = lines[1].strip('1		')
                                                bind = bind.split("	", 1)
                                                print(bind[0])
                                                k.close()
                                                sts = str("obrms -f " + i +
                                                          " lmcss_docked.sdf")
                                                f = open('rmsd.txt', 'ab+')
                                                rm = subprocess.check_output(
                                                    sts, shell=True)
                                                f.write(rm)
                                                f.close()
                                                j = open('rmsd.txt')
                                                lines = j.readlines()
                                                top = lines[1].strip('RMSD : ')
                                                top = top.replace('\n', '')
                                                print top
                                                j.close()
                                                #os.system(sts)
                                                f = open('visual.txt', 'ab+')
                                                f.write(x + '	smina	' + y +
                                                        '	' + top + '	' +
                                                        bind[0] + '\n')
                                                f.close()
                                                os.chdir(wd +
                                                         '/challengedata/')
                                                print(x + '    ' + y)
                                                break
                                os.chdir(wd)