コード例 #1
0
def load_from_one_cath_pml_file(pml_file, scratch_path, superfamilies,
                                dssp_path):
    '''Load data from a .pml file of superposed
  homologous superfamilies from the CATH database.
  '''
    superfamilies.append([])
    candidate_proteins = []

    with open(pml_file, 'r') as f:
        while True:
            line = f.readline()
            if not line: break

            # Read one structure

            if line.strip().startswith('cmd.read_pdbstr'):
                pdb_lines = [line.strip()[19:].strip('\\')]
                pdb_id = ''

                while True:
                    line = f.readline()
                    if line.strip().startswith('"""'):
                        pdb_id = line.strip()[5:12]
                        break

                    pdb_line = line.strip().strip('\\')
                    if len(pdb_line) > 17:
                        pdb_line = pdb_line[0:16] + ' ' + pdb_line[
                            17:]  # Remove all altLoc flags

                    pdb_lines.append(pdb_line)  # Remove all altLoc flags

                # Make a pdb file of the structure for DSSP analysis

                structure = structure_from_pdb_string('\n'.join(pdb_lines),
                                                      pdb_id)

                # Store structures without chain breaks

                if len(topology.find_structure_chain_breaks(structure)) == 0:
                    structure_path = os.path.join(scratch_path,
                                                  pdb_id + '.pdb')

                    io = PDB.PDBIO()
                    io.set_structure(structure)
                    io.save(structure_path)

                    candidate_proteins.append({
                        'structure': structure,
                        'path': structure_path
                    })

    for p in candidate_proteins:
        try:
            find_secondary_structures(p, dssp_path)
        except:
            continue
        superfamilies[-1].append(
            p)  # Add a protein to a superfamily if there's no exception
コード例 #2
0
def preparePdb(pdb_fname, out_pdb_fname):
    ''' Prepare the PDB file with only first model and redundancies cut out '''
    # 'Absolutize' the path names - rest is done in the temporary dir
    pdb_fname = os.path.abspath(pdb_fname)
    if not os.path.exists(pdb_fname):
        raise IOError('%s does not exist' % pdb_fname)
    out_pdb_fname = os.path.abspath(out_pdb_fname)
    # Inside the temporary dir
    with tempDir() as tmp_dir:
        # Temporary names for curated input and output files
        new_pdb_fname = 'query.pdb'
        out_tmp_fname = 'out.pdb'
        # If the original PDB is packed with gzip - unpack it into a new file
        if pdb_fname.endswith('.gz'):
            rfh = gzip.open(pdb_fname, 'r')
        else:
            rfh = open(pdb_fname, 'r')
        try:
            with open(new_pdb_fname, 'w') as wfh:
                wfh.write(rfh.read())
        finally:
            rfh.close()
        # Parse structure
        # Redirect standard output/error to a cStringIO,
        #so that PDBParser stops messing the output
        parser = Bio.PDB.PDBParser()
        err_fh = io.StringIO()
        sys.stdout = err_fh
        sys.stderr = err_fh
        struct = parser.get_structure('query', new_pdb_fname)
        sys.stdout = sys.__stdout__
        sys.stderr = sys.__stderr__
        # Output formatted info about PDBParser's work to a log
        s = err_fh.getvalue()
        if s.strip():
            logging.info(
                "Structure parsing generated following error message(s): \n%s\n%s\n%s"
                % ('-' * 120, s, '-' * 120))
        # By default use only first model
        model = struct[0]
        del struct.child_list[1:]
        # Check for discontinuities greater than 5 residues - warn about this _specifically_
        for chain in model:
            chid = chain.id
            last_rid = None
            for residue in chain:
                if last_rid is not None and rid > last_rid + 5:
                    rid = residue.id[1]
                    logging.warn(
                        "Residues %s:%s-%s:%s. Results might be inaccurate, as the break in a protein chain numbers more than 5 residues."
                        % (last_id, chain, rid, chain))
                    last_rid = rid
        # Save structure without hydrogens
        io = Bio.PDB.PDBIO()
        io.set_structure(struct)
        io.save(new_pdb_fname)
        shutil.move(new_pdb_fname, out_pdb_fname)
        return out_pdb_fname
コード例 #3
0
ファイル: entry.py プロジェクト: madieragold/nmrexcneiman7
def save_structure(struct, name):
    file = '{}.pdb'.format(name)

    io = PDBIO()
    io.set_structure(struct)
    io.save(file)
    del io

    with open(file, 'r') as f:
        atoms = f.read()

    data = header() + atoms

    with open(file, 'w') as f:
        f.write(data)
コード例 #4
0
ファイル: hamster_pile.py プロジェクト: carinaj/pyrocko
 def _fixate(self, buf):
     if self._path:
         trbuf = buf.get_traces()[0]
         fns = io.save([trbuf], self._path, format=self._format)
         
         self.remove_file(buf)
         if not self._forget_fixed:
             self.load_files(fns, show_progress=False, fileformat=self._format)
コード例 #5
0
ファイル: shadow_pile.py プロジェクト: carinaj/pyrocko
 def _insert(self, iblock, traces):
     if traces:
         if self._storepath is not None:
             fns = io.save(traces, self._storepath, format='mseed', additional={'iblock': iblock})
             self.load_files(fns, fileformat='mseed', show_progress=False)
         else:
             file = pile.MemTracesFile(None,traces)
             self.add_file(file)
コード例 #6
0
ファイル: plot.py プロジェクト: erwindl0/python-rpc
 def volume(v, name=_REMOTEVOLNAME):
     '''Plot a volume dataset in remote volume view
     '''
     import tempfile
     import os #@Reimport
     tmp = tempfile.mkstemp('.dsr') # '/tmp/blah.dsr'
     os.close(tmp[0])
     vdatafile = tmp[1]
     # convert to byte, int or float as volume viewer cannot cope with boolean, long or double datasets
     if v.dtype == _core.bool:
         v = _core.cast(v, _core.int8)
     elif v.dtype == _core.int64:
         v = _core.cast(v, _core.int32)
     elif v.dtype == _core.float64 or v.dtype == _core.complex64 or v.dtype == _core.complex128:
         v = _core.cast(v, _core.float32)
     _io.save(vdatafile, v, format='binary')
     _plot_volume(name, vdatafile)
     os.remove(vdatafile)
コード例 #7
0
ファイル: get_fragment.py プロジェクト: clarewest/scripts
def get_sequence(pdb, chain):
    pdb_parser = PDBParser(PERMISSIVE=0)                    # The PERMISSIVE instruction allows PDBs presenting errors.
    pdb_structure = pdb_parser.get_structure(pdb,pdb+".pdb")
    pdb_chain = pdb_structure[0][chain]
    i = 1
    lista=[]
    for residue in pdb_chain:
        if i < int(sys.argv[3]) or i > int(sys.argv[4]):
            lista.append(residue.get_id())
            #pdb_chain.detach_child(residue.get_id())
        i+=1
    for x in lista:
        pdb_chain.detach_child(x)

    io = PDBIO()
    io.set_structure(pdb_chain)
    output = sys.argv[5]+"_segment.pdb"
    io.save(output)
コード例 #8
0
 def volume(v, name=_REMOTEVOLNAME):
     '''Plot a volume dataset in remote volume view
     '''
     import tempfile
     import os  #@Reimport
     tmp = tempfile.mkstemp('.dsr')  # '/tmp/blah.dsr'
     os.close(tmp[0])
     vdatafile = tmp[1]
     # convert to byte, int or float as volume viewer cannot cope with boolean, long or double datasets
     if v.dtype == _core.bool:
         v = _core.cast(v, _core.int8)
     elif v.dtype == _core.int64:
         v = _core.cast(v, _core.int32)
     elif v.dtype == _core.float64 or v.dtype == _core.complex64 or v.dtype == _core.complex128:
         v = _core.cast(v, _core.float32)
     _io.save(vdatafile, v, format='binary')
     _plot_volume(name, vdatafile)
     os.remove(vdatafile)
コード例 #9
0
ファイル: pile.py プロジェクト: trokia/pyrocko
    def _fixate(self, buf):
        trbuf = buf.get_traces()[0]
        if self._path:
            fns = io.save([trbuf], self._path, format=self._format)

            self._pile.remove_file(buf)
            if not self._forget_fixed:
                self._pile.load_files(fns, show_progress=False, fileformat=self._format)

        del self._states[trbuf.nslc_id]
コード例 #10
0
    def _fixate(self, buf):
        if self._path:
            trbuf = buf.get_traces()[0]
            fns = io.save([trbuf], self._path, format=self._format)

            self.remove_file(buf)
            if not self._forget_fixed:
                self.load_files(fns,
                                show_progress=False,
                                fileformat=self._format)
コード例 #11
0
ファイル: shadow_pile.py プロジェクト: megies/pyrocko
 def _insert(self, iblock, traces):
     if traces:
         if self._storepath is not None:
             fns = io.save(traces,
                           self._storepath,
                           format='mseed',
                           additional={'iblock': iblock})
             self.load_files(fns, fileformat='mseed', show_progress=False)
         else:
             file = pile.MemTracesFile(None, traces)
             self.add_file(file)
コード例 #12
0
ファイル: lig_pdb2smiles.py プロジェクト: picodase/cbpcode
def extract_ligands(path):
    """ Extraction of the heteroatoms of .pdb files """

    for pfb_file in os.listdir(path + 'pdbs/'):
        i = 1
        if pfb_file.endswith('.pdb') and not pfb_file.startswith("lig_"):
            pdb_code = pfb_file[:-4]
            pdb = PDBParser().get_structure(pdb_code,
                                            path + 'pdbs/' + pfb_file)
            io = PDBIO()
            io.set_structure(pdb)
            model_selected = pdb[0]
            # for model in pdb:

            for chain in model_selected:
                for residue in chain:
                    if not is_het(residue):
                        continue
                    print(f"saving {chain} {residue}")
                    io.save(f"lig_{pdb_code}_{i}.pdb",
                            ResidueSelect(chain, residue))
                    i += 1
コード例 #13
0
    def _fixate(self, buf, complete=True):
        trbuf = buf.get_traces()[0]
        del_state = True
        if self._path:
            if self._fixation_length is not None:
                ttmin = trbuf.tmin
                ytmin = util.year_start(ttmin)
                n = int(math.floor((ttmin - ytmin) / self._fixation_length))
                tmin = ytmin + n * self._fixation_length
                traces = []
                t = tmin
                while t <= trbuf.tmax:
                    try:
                        traces.append(
                            trbuf.chop(t,
                                       t + self._fixation_length,
                                       inplace=False,
                                       snap=(math.ceil, math.ceil)))
                    except trace.NoData:
                        pass
                    t += self._fixation_length

                if abs(traces[-1].tmax -
                       (t - trbuf.deltat)) < trbuf.deltat / 100. or complete:
                    self._pile.remove_file(buf)

                else:  # reinsert incomplete last part
                    new_trbuf = traces.pop()
                    self._pile.remove_file(buf)
                    buf.remove(trbuf)
                    buf.add(new_trbuf)
                    self._pile.add_file(buf)
                    del_state = False

            else:
                traces = [trbuf]
                self._pile.remove_file(buf)

            fns = io.save(traces, self._path, format=self._format)

            if not self._forget_fixed:
                self._pile.load_files(fns,
                                      show_progress=False,
                                      fileformat=self._format)

        if del_state:
            del self._states[trbuf.nslc_id]
コード例 #14
0
ファイル: pile.py プロジェクト: qingkaikong/mtpy
    def _fixate(self, buf, complete=True):
        trbuf = buf.get_traces()[0]
        del_state = True
        if self._path:
            if self._fixation_length is not None:
                ttmin = trbuf.tmin
                ytmin = util.year_start(ttmin)
                n = int(math.floor((ttmin - ytmin) / self._fixation_length))
                tmin = ytmin + n * self._fixation_length
                traces = []
                t = tmin
                while t <= trbuf.tmax:
                    try:
                        traces.append(
                            trbuf.chop(t, t + self._fixation_length, inplace=False, snap=(math.ceil, math.ceil))
                        )
                    except trace.NoData:
                        pass
                    t += self._fixation_length

                if abs(traces[-1].tmax - (t - trbuf.deltat)) < trbuf.deltat / 100.0 or complete:
                    self._pile.remove_file(buf)

                else:  # reinsert incomplete last part
                    new_trbuf = traces.pop()
                    self._pile.remove_file(buf)
                    buf.remove(trbuf)
                    buf.add(new_trbuf)
                    self._pile.add_file(buf)
                    del_state = False

            else:
                traces = [trbuf]
                self._pile.remove_file(buf)

            fns = io.save(traces, self._path, format=self._format)

            if not self._forget_fixed:
                self._pile.load_files(fns, show_progress=False, fileformat=self._format)

        if del_state:
            del self._states[trbuf.nslc_id]
コード例 #15
0
    def download_pdb(self, info):
        pdb_id, chain_id = info

        ## Check if atom has alternative position, if so, keep 'A' position and remove the flag
        ## but somehow this class doesn't seem to function well
        class NotDisordered(Select):
            def accept_atom(self, atom):
                if not atom.is_disordered() or atom.get_altloc() == 'A':
                    atom.set_altloc(' ')
                    return True
                else:
                    return False

        ## BioPython downloads PDB but it gives a lowercase name in pdb{}.ent format
        biopdb_name = '{0}/pdb{1}.ent'.format(self.work_dir, pdb_id.lower())
        biopdb_modf = '{0}/pdb{1}.mod.ent'.format(self.work_dir,
                                                  pdb_id.lower())
        if not os.path.isfile(biopdb_modf):
            try:
                PDB.PDBList(verbose=False).retrieve_pdb_file(
                    pdb_id,
                    pdir=self.work_dir,
                    obsolete=False,
                    file_format='pdb')
            except FileNotFoundError:
                print(
                    '  \033[31m> ERROR: BioPython cannot download PDB: \033[0m'
                    + pdb_id)
                return None

        ## Replace modified AA to avoid mis-recognition in biopython readin
        ## Replace disordered atoms and keep only the "A" variant
        ReplacePDBModifiedAA(biopdb_name, biopdb_modf)
        os.system('grep "REMARK  " {0} > {0}.remark'.format(biopdb_modf))
        with open(biopdb_modf, 'r') as fi:
            remarks = [l for l in fi if re.search('REMARK HET ', l)]

        ## Read the PDB file and extract the chain from structure[0]
        try:
            model = PDB.PDBParser(PERMISSIVE=1,
                                  QUIET=1).get_structure(pdb_id,
                                                         biopdb_modf)[0]
        except KeyError:
            print('  \033[31m> ERROR: BioPython cannot read in PDB: \033[0m' +
                  biopdb_modf)
            return None
        except ValueError:
            print('  \033[31m> ERROR: PDB file is empty: \033[0m' +
                  biopdb_modf)
            return None

        ### Bug alert: as of 20.02.18, Biopython dev hasn't come up with good
        ### strategy to fix the 'atom.disordered_get_list()' issue with alternative
        ### position of residue side chains. To go around this, will physically
        ### remove "B" variant and keep only "A" variant in
        io = PDB.PDBIO()
        io.set_structure(model[chain_id])
        io.save('{0}/{1}_{2}.pdb'.format(self.work_dir, pdb_id, chain_id),
                select=NotDisordered())

        # Attach REMARK to end of PDB as safekeeping
        os.system('cat {0}/{1}_{2}.pdb {3}.remark > {1}.temp'.format(
            self.work_dir, pdb_id, chain_id, biopdb_modf))
        os.system('mv {1}.temp {0}/{1}_{2}.pdb'.format(self.work_dir, pdb_id,
                                                       chain_id))
        #    os.system('mv {1} {0}/{2}.ent'.format(self.work_dir, biopdb_name, pdb_id))
        #    os.system('bzip2 -f {0}/{1}.ent'.format(self.work_dir, pdb_id))
        #    os.system('rm {0} {0}.remark'.format(biopdb_modf))

        return '{0}/{1}_{2}.pdb'.format(self.work_dir, pdb_id, chain_id)
コード例 #16
0
def prepareWithHydrogens(pdb_fname, out_pdb_fname="wth_hydro.pdb"):
    ''' Prepare the PDB file with hydrogen data (clean up and create a new one). '''
    # 'Absolutize' the path names - rest is done in the temporary dir
    pdb_fname = os.path.abspath(pdb_fname)
    if not os.path.exists(pdb_fname) or not os.path.isfile(pdb_fname):
        raise IOError('%s does not exist or is not a file.' % pdb_fname)
    out_pdb_fname = os.path.abspath(out_pdb_fname)
    if pdb_fname.endswith('.gz'):
        rfh = gzip.open(pdb_fname, 'r')
        #print pdb_fname
    else:
        rfh = open(pdb_fname, 'r')
    try:
        # Parse structure
        parser = Bio.PDB.PDBParser()
        # Redirect standard output/error to a cStringIO,
        # so that PDBParser stops messing the output
        err_fh = io.StringIO()
        sys.stdout = err_fh
        sys.stderr = err_fh
        struct = parser.get_structure('query', rfh)
    finally:
        # Restore streams
        sys.stdout = sys.__stdout__
        sys.stderr = sys.__stderr__
        # ... and close up
        rfh.close()
    # Output formatted info about PDBParser's work to a logger
    s = err_fh.getvalue()
    if s.strip():
        logging.info(
            "Structure parsing generated following error message(s): \n%s\n%s\n%s"
            % ('-' * 120, s, '-' * 120))
    # By default use only first model
    # ... delete the rest
    model = struct[0]
    del struct.child_list[1:]
    # Check for discontinuities greater than 5 residues - warn about this _specifically_ (into the logger, again)
    for chain in model:
        chid = chain.id
        last_rid = None
        for residue in chain:
            if last_rid is not None and rid > last_rid + 5:
                rid = residue.id[1]
                logging.warn(
                    "Residues %s:%s-%s:%s. Results might be inaccurate, as the break in a protein chain numbers more than 5 residues."
                    % (last_id, chain, rid, chain))
                last_rid = rid
    # Prepare the remade hydrogens
    remakeHydrogens(struct)
    # Save structure
    if out_pdb_fname.endswith('.gz'):
        with closing(gzip.open(out_pdb_fname, 'w')) as wfh:
            io = Bio.PDB.PDBIO()
            io.set_structure(struct)
            io.save(wfh)
    else:
        io = Bio.PDB.PDBIO()
        io.set_structure(struct)
        io.save(out_pdb_fname)
    return out_pdb_fname
コード例 #17
0
def prepareWithHydrogensPrep23(pdb_fname, out_pdb_fname="wth_hydro.pdb"):
    ''' Prepare the PDB file with hydrogen data (clean up and create a new one). '''
    # 'Absolutize' the path names - rest is done in the temporary dir
    pdb_fname = os.path.abspath(pdb_fname)
    if not os.path.exists(pdb_fname) or not os.path.isfile(pdb_fname):
        raise IOError('%s does not exist or is not a file.' % pdb_fname)
    out_pdb_fname = os.path.abspath(out_pdb_fname)
    # Inside the temporary dir
    with tempDir() as tmp_dir:
        # Temporary names for curated input and output files
        new_pdb_fname = 'query.pdb'
        out_tmp_fname = 'out.pdb'
        # Prepare the sources
        prep_exec = _preparePrepExec()
        # Copy the original file into our temporary directory
        # If the original PDB is packed with gzip - unpack it into a new file
        if pdb_fname.endswith('.gz'):
            rfh = gzip.open(pdb_fname, 'r')
        else:
            rfh = open(pdb_fname, 'r')
        try:
            with open(new_pdb_fname, 'w') as wfh:
                wfh.write(rfh.read())
        finally:
            rfh.close()
        # Parse structure
        # Redirect standard output/error to a cStringIO,
        #so that PDBParser stops messing the output
        parser = Bio.PDB.PDBParser()
        err_fh = io.StringIO()
        sys.stdout = err_fh
        sys.stderr = err_fh
        with open(new_pdb_fname, 'r') as rfh:
            struct = parser.get_structure('query', rfh)
        sys.stdout = sys.__stdout__
        sys.stderr = sys.__stderr__
        # Output formatted info about PDBParser's work to a log
        s = err_fh.getvalue()
        if s.strip():
            logging.info(
                "Structure parsing generated following error message(s): \n%s\n%s\n%s"
                % ('-' * 120, s, '-' * 120))
        # By default use only first model
        model = struct[0]
        del struct.child_list[1:]
        # Check for discontinuities greater than 5 residues - warn about this _specifically_
        for chain in model:
            chid = chain.id
            last_rid = None
            # Curate disordered residues keeping only the last
            chain.child_list = [residue for residue in chain]
            chain.child_dict = dict((residue.id, residue) for residue in chain)
            for residue in chain:
                # Curate disordered atoms keeeping only the last
                residue.child_list = [a for a in residue]
                residue.child_dict = dict((a.id, a) for a in residue)
                if last_rid is not None and rid > last_rid + 5:
                    rid = residue.id[1]
                    logging.warn(
                        "Residues %s:%s-%s:%s. Results might be inaccurate, as the break in a protein chain numbers more than 5 residues."
                        % (last_id, chain, rid, chain))
                    last_rid = rid
        # Save structure without hydrogens
        io = Bio.PDB.PDBIO()
        io.set_structure(struct)
        io.save(new_pdb_fname, NoHydroSelect())
        # Run the preparation executable on the newly created PDB file
        if (subprocess.call("%s %s %s 1>tmp.out 2>tmp.err" %
                            (prep_exec, new_pdb_fname, out_tmp_fname),
                            shell=True) != 0):
            raise RuntimeError(
                'Could not prepare corrected structure file for %s' %
                pdb_fname)
        # Fix the occupancies (creating the last and final temporary PDB file)
        final_fn = "final.pdb"
        #raw_input('WAITING...')
        with open(out_tmp_fname, 'r') as rfh:
            with open(final_fn, 'w') as wfh:
                for line in rfh:
                    if line.startswith('ATOM'):
                        print >> wfh, line[:-1] + "  0.00  0.00           C"
                    else:
                        print >> wfh, line,
        # Move the output file to the desired location
        shutil.move(final_fn, out_pdb_fname)
        return out_pdb_fname