Beispiel #1
0
    def add_backbone_atoms_linearly_from_loop_filepaths(self, loop_json_filepath, fasta_filepath, residue_ids):
        '''A utility wrapper around add_backbone_atoms_linearly. Adds backbone atoms in a straight line from the first to
           the last residue of residue_ids.

           loop_json_filepath is a path to a JSON file using the JSON format for Rosetta loops files. This file identifies
           the insertion points of the sequence.

           fasta_filepath is a path to a FASTA file with one sequence. This sequence will be used as the sequence for
           the inserted residues (between the start and stop residues defined in loop_json_filepath).

           residue_ids is a list of PDB chain residues (columns 22-27 of ATOM lines in the PDB format). It is assumed that
           they are sequential although the logic does not depend on that. This list should have the length length as the
           sequence identified in the FASTA file.
        '''

        # Parse the loop file
        loop_def = json.loads(read_file(loop_json_filepath))
        assert(len(loop_def['LoopSet']) == 1)
        start_res = loop_def['LoopSet'][0]['start']
        end_res = loop_def['LoopSet'][0]['stop']
        start_res = PDB.ChainResidueID2String(start_res['chainID'], (str(start_res['resSeq']) + start_res['iCode']).strip())
        end_res = PDB.ChainResidueID2String(end_res['chainID'], (str(end_res['resSeq']) + end_res['iCode']).strip())
        assert(start_res in residue_ids)
        assert(end_res in residue_ids)

        # Parse the FASTA file and extract the sequence
        f = FASTA(read_file(fasta_filepath), strict = False)
        assert(len(f.get_sequences()) == 1)
        insertion_sequence = f.sequences[0][2]
        if not len(residue_ids) == len(insertion_sequence):
            raise Exception('The sequence in the FASTA file must have the same length as the list of residues.')

        # Create the insertion sequence (a sub-sequence of the FASTA sequence)
        # The post-condition is that the start and end residues are the first and last elements of kept_residues respectively
        kept_residues = []
        insertion_residue_map = {}
        in_section = False
        found_end = False
        for x in range(len(residue_ids)):
            residue_id = residue_ids[x]
            if residue_id == start_res:
                in_section = True
            if in_section:
                kept_residues.append(residue_id)
                insertion_residue_map[residue_id] = insertion_sequence[x]
                if residue_id == end_res:
                    found_end = True
                    break
        if not kept_residues:
            raise Exception('The insertion sequence is empty (check the start and end residue ids).')
        if not found_end:
            raise Exception('The end residue was not encountered when iterating over the insertion sequence (check the start and end residue ids).')

        # Identify the start and end Residue objects
        try:
            start_res = self.residues[start_res[0]][start_res[1:]]
            end_res = self.residues[end_res[0]][end_res[1:]]
        except Exception, e:
            raise Exception('The start or end residue could not be found in the PDB file.')
Beispiel #2
0
 def get_fasta_object(self, pdb_id, acceptable_sequence_percentage_match = 90.0):
     self.log_lookup('FASTA object {0}'.format(pdb_id))
     pdb_id = pdb_id.upper()
     if not self.fasta_objects.get(pdb_id):
         if not self.fasta_contents.get(pdb_id):
             if self.cache_dir:
                 self.add_fasta_contents(pdb_id, download_fasta(pdb_id, self.cache_dir, silent = True))
             else:
                 self.add_fasta_contents(pdb_id, retrieve_fasta(pdb_id, silent = True))
         self.add_fasta_object(pdb_id, FASTA.retrieve(pdb_id, cache_dir = self.cache_dir, bio_cache = self))
     return self.fasta_objects[pdb_id]