Python align Examples, hommod.controllers.kmad.kmad_aligner.align Python Examples

Example #1

0

Show file

File: model.py Project: cbaakman/hommod

    def _choose_best_target_alignment(self, context,
                                      interacting_chain_alignments,
                                      potential_target_sequences, chain_id):

        best_alignment = None

        for target_id in potential_target_sequences:
            template_chain_sequence = context.get_sequence(chain_id)
            template_chain_secstr = context.get_secondary_structure(chain_id)

            alignment = kmad_aligner.align(
                template_chain_sequence, template_chain_secstr,
                potential_target_sequences[target_id])

            _log.debug("alignment {} has coverage {} %".format(
                alignment, alignment.get_percentage_coverage()))

            if alignment.get_percentage_coverage() < 90.0:
                # If the coverage is too low, we need to bother interpro.
                domain_alignments = \
                    domain_aligner.get_domain_alignments(potential_target_sequences[target_id],
                                                         None,
                                                         TemplateID(context.template_pdbid, chain_id))

                interacting_alignments = list(
                    filter(
                        lambda ali: self._preserves_interactions(
                            context, ali.target_alignment.replace('-', ''),
                            chain_id, interacting_chain_alignments),
                        domain_alignments))

                _log.debug(
                    "preserve interactions with chains {}: filtered {} alignments out of {}"
                    .format(interacting_chain_alignments.keys(),
                            len(interacting_alignments),
                            len(domain_alignments)))

                if len(interacting_alignments) > 0:
                    domain_alignment = self._join_alignments_to_best_template_coverage(
                        interacting_alignments)
                elif len(domain_alignments) > 0:
                    domain_alignment = self._join_alignments_to_best_template_coverage(
                        domain_alignments)
                else:
                    continue

                alignment = kmad_aligner.align(
                    template_chain_sequence, template_chain_secstr,
                    domain_alignment.get_target_sequence())

            alignment.target_id = target_id

            if best_alignment is None or \
                    best_alignment.get_percentage_identity() < alignment.get_percentage_identity():
                best_alignment = alignment

        return best_alignment

Example #2

0

Show file

File: model.py Project: cmbi/hommod-rest

    def _choose_best_target_alignment(self, context,
                                      interacting_chain_alignments,
                                      potential_target_sequences,
                                      chain_id):

        best_alignment = None

        for target_id in potential_target_sequences:
            template_chain_sequence = context.get_sequence(chain_id)
            template_chain_secstr = context.get_secondary_structure(chain_id)

            alignment = kmad_aligner.align(template_chain_sequence,
                                           template_chain_secstr,
                                           potential_target_sequences[target_id])

            _log.debug("alignment {} has coverage {} %".format(alignment, alignment.get_percentage_coverage()))

            if alignment.get_percentage_coverage() < 90.0:
                # If the coverage is too low, we need to bother interpro.
                domain_alignments = \
                    domain_aligner.get_domain_alignments(potential_target_sequences[target_id],
                                                         None,
                                                         TemplateID(context.template_pdbid, chain_id))

                interacting_alignments = list(filter(lambda ali: self._preserves_interactions(context,
                                                                                              ali.target_alignment.replace('-', ''),
                                                                                              chain_id, interacting_chain_alignments),
                                                     domain_alignments))

                _log.debug("preserve interactions with chains {}: filtered {} alignments out of {}"
                           .format(interacting_chain_alignments.keys(),
                                   len(interacting_alignments), len(domain_alignments)))

                if len(interacting_alignments) > 0:
                    domain_alignment = self._join_alignments_to_best_template_coverage(interacting_alignments)
                elif len(domain_alignments) > 0:
                    domain_alignment = self._join_alignments_to_best_template_coverage(domain_alignments)
                else:
                    continue

                alignment = kmad_aligner.align(template_chain_sequence,
                                               template_chain_secstr,
                                               domain_alignment.get_target_sequence())

            alignment.target_id = target_id

            if best_alignment is None or \
                    best_alignment.get_percentage_identity() < alignment.get_percentage_identity():
                best_alignment = alignment

        return best_alignment

Example #3

0

Show file

File: test_kmad.py Project: cmbi/hommod

def test_kmad_X():
    target_seq = "AAAAAAAAAAAAAAA"
    template_seq = "XAXRXLXKXGDAFNR"
    template_secstr = "               "

    aligned = kmad_aligner.align(template_seq, template_secstr, target_seq)
    eq_(aligned.template_alignment.replace('-', ''), template_seq)

Example #4

0

Show file

File: domain.py Project: cmbi/hommod-rest

    def _clean_search_space(self, checked_ranges, sample_ranges, ok_ranges_alignments):

        # See if we can merge ranges that have
        # the same template in their blast hits:
        checked_ranges = self._remove_duplicate_ranges(checked_ranges + sample_ranges)
        sample_ranges = []
        shared_hits_ranges = self._find_shared_hits_ranges(ok_ranges_alignments)
        for template_id in shared_hits_ranges:

            ranges = shared_hits_ranges[template_id]

            for i in range(len(ranges)):
                overlapping_indices = []
                for j in range(len(ranges)):
                    if j != i and ranges[j].overlaps_with(ranges[i]):
                        overlapping_indices.append(j)

                for j in overlapping_indices:
                    percentage_overlap = ranges[i].get_percentage_overlap(ranges[j])
                    percentage_length_difference = 100.0 * (abs(ranges[i].get_length() - ranges[j].get_length()) /
                                                            max(ranges[i].get_length(), ranges[j].get_length()))

                    merged = ranges[i].merge_with(ranges[j])

                    # Merge only if:
                    # - the ranges are close together
                    # - the merge has not already been done
                    # - the intersecting parts of the ranges align to
                    #   the template in exactly the same way
                    if merged not in checked_ranges:

                        alignment_i = ok_ranges_alignments[ranges[i]]
                        alignment_j = ok_ranges_alignments[ranges[j]]
                        template_secstr = dssp.get_secondary_structure(template_id)
                        template_sequence = dssp.get_sequence(template_id)
                        try:
                            alignment_m = kmad_aligner.align(template_sequence, template_secstr,
                                                             merged.get_sub_sequence())
                        except:
                            _log.warn(traceback.format_exc())

                            # If kmad fails, then skip this one :(
                            continue

                        intersected = ranges[i].get_intersection(ranges[j])

                        intersect_template_sequence_i = \
                            self._get_template_sequence_in_target_range(alignment_i, intersected - ranges[i].start)

                        intersect_template_sequence_j = \
                            self._get_template_sequence_in_target_range(alignment_j, intersected - ranges[j].start)

                        intersect_template_sequence_m = \
                            self._get_template_sequence_in_target_range(alignment_m, intersected - merged.start)

                        if intersect_template_sequence_i == intersect_template_sequence_m and \
                                intersect_template_sequence_j == intersect_template_sequence_m:
                            sample_ranges.append(merged)

        return sample_ranges

Example #5

0

Show file

    def _clean_search_space(self, checked_ranges, sample_ranges, ok_ranges_alignments):

        # See if we can merge ranges that have
        # the same template in their blast hits:
        checked_ranges = self._remove_duplicate_ranges(checked_ranges + sample_ranges)
        sample_ranges = []
        shared_hits_ranges = self._find_shared_hits_ranges(ok_ranges_alignments)
        for template_id in shared_hits_ranges:

            ranges = shared_hits_ranges[template_id]

            for i in range(len(ranges)):
                overlapping_indices = []
                for j in range(len(ranges)):
                    if j != i and ranges[j].overlaps_with(ranges[i]):
                        overlapping_indices.append(j)

                for j in overlapping_indices:
                    percentage_overlap = ranges[i].get_percentage_overlap(ranges[j])
                    percentage_length_difference = 100.0 * (abs(ranges[i].get_length() - ranges[j].get_length()) /
                                                            max(ranges[i].get_length(), ranges[j].get_length()))

                    merged = ranges[i].merge_with(ranges[j])

                    # Merge only if:
                    # - the ranges are close together
                    # - the merge has not already been done
                    # - the intersecting parts of the ranges align to
                    #   the template in exactly the same way
                    if merged not in checked_ranges:

                        alignment_i = ok_ranges_alignments[ranges[i]]
                        alignment_j = ok_ranges_alignments[ranges[j]]
                        template_secstr = dssp.get_secondary_structure(template_id)
                        template_sequence = dssp.get_sequence(template_id)
                        try:
                            alignment_m = kmad_aligner.align(template_sequence, template_secstr,
                                                             merged.get_sub_sequence())
                        except:
                            _log.warn(traceback.format_exc())

                            # If kmad fails, then skip this one :(
                            continue

                        intersected = ranges[i].get_intersection(ranges[j])

                        intersect_template_sequence_i = \
                            self._get_template_sequence_in_target_range(alignment_i, intersected - ranges[i].start)

                        intersect_template_sequence_j = \
                            self._get_template_sequence_in_target_range(alignment_j, intersected - ranges[j].start)

                        intersect_template_sequence_m = \
                            self._get_template_sequence_in_target_range(alignment_m, intersected - merged.start)

                        if intersect_template_sequence_i == intersect_template_sequence_m and \
                                intersect_template_sequence_j == intersect_template_sequence_m:
                            sample_ranges.append(merged)

        return sample_ranges

Example #6

0

Show file

File: test_kmad.py Project: cmbi/hommod-rest

def test_kmad_X():
    target_seq = "AAAAAAAAAAAAAAA"
    template_seq = "XAXRXLXKXGDAFNR"
    template_secstr = "               "

    aligned = kmad_aligner.align(template_seq, template_secstr, target_seq)
    eq_(aligned.template_alignment.replace('-',''), template_seq)

Example #7

0

Show file

File: test_kmad.py Project: cmbi/hommod

def test_kmad():
    target_seq = "AAACCPSIVARSNFNVCRLPGTPEAICATYTGCIIIPGATCPGSDYAN"
    template_seq = "TTCCPSIVARSNFNVCRLPGTPEAICATYTGCIIIPGATCPGDYAN"
    template_secstr = " EE SSHHHHHHHHHHHTTT  HHHHHHHHS EE SSS   GGG  "

    aligned = kmad_aligner.align(template_seq, template_secstr, target_seq)
    ok_(len(aligned.target_alignment) > 0)
    eq_(len(aligned.target_alignment), len(aligned.template_alignment))

Example #8

0

Show file

File: test_kmad.py Project: cmbi/hommod-rest

def test_kmad():
    target_seq = "AAACCPSIVARSNFNVCRLPGTPEAICATYTGCIIIPGATCPGSDYAN"
    template_seq = "TTCCPSIVARSNFNVCRLPGTPEAICATYTGCIIIPGATCPGDYAN"
    template_secstr = " EE SSHHHHHHHHHHHTTT  HHHHHHHHS EE SSS   GGG  "

    aligned = kmad_aligner.align(template_seq, template_secstr, target_seq)
    ok_(len(aligned.target_alignment) > 0)
    eq_(len(aligned.target_alignment), len(aligned.template_alignment))

Example #9

0

Show file

    def _get_hits(self, range_, template_id):
        if self.template_blast_databank is None:
            raise InitError("blast databank is not set")

        blast_hits = blaster.blastp(range_.get_sub_sequence(), self.template_blast_databank)
        _log.debug("{} blast hits to filter".format(len(blast_hits)))

        count_template_hits = 0
        good_hits = []
        for hit_id in blast_hits:
            for alignment in blast_hits[hit_id]:
                hit_template_id = TemplateID(alignment.get_hit_accession_code(),
                                             alignment.get_hit_chain_id())
                if template_id is not None and hit_template_id != template_id:
                    continue

                count_template_hits += 1

                if template_id is None and blacklister.is_blacklisted(alignment.get_hit_accession_code()):
                    continue

                if not dssp.has_secondary_structure(hit_template_id):
                    continue

                # Replace the blast hit's alignment with the kmad alignment.
                template_secstr = dssp.get_secondary_structure(hit_template_id)
                template_sequence = dssp.get_sequence(hit_template_id)
                try:
                    kmad_alignment = kmad_aligner.align(template_sequence, template_secstr,
                                                        range_.get_sub_sequence())
                except:
                    _log.warn(traceback.format_exc())

                    # If kmad fails, then skip this one :(
                    continue
                alignment.full_query_sequence = range_.sequence
                alignment.query_start = range_.start + 1
                alignment.query_end = range_.end
                alignment.subject_start = 1
                alignment.subject_end = len(template_sequence)
                alignment.query_alignment = kmad_alignment.target_alignment
                alignment.subject_alignment = kmad_alignment.template_alignment

                if alignment.get_percentage_identity() >= get_min_identity(alignment.count_aligned_residues()):
                    good_hits.append(alignment)

        if count_template_hits == 0 and template_id is not None:
            _log.warning("domain sequence {} has no suitable hits with {}".format(range_.get_sub_sequence(), template_id))
            return []

        return good_hits

Example #10

0

Show file

File: domain.py Project: cmbi/hommod-rest

    def _get_hits(self, range_, template_id):
        if self.template_blast_databank is None:
            raise InitError("blast databank is not set")

        blast_hits = blaster.blastp(range_.get_sub_sequence(), self.template_blast_databank)
        _log.debug("{} blast hits to filter".format(len(blast_hits)))

        good_hits = []
        for hit_id in blast_hits:
            for alignment in blast_hits[hit_id]:
                hit_template_id = TemplateID(alignment.get_hit_accession_code(),
                                             alignment.get_hit_chain_id())
                if template_id is not None and hit_template_id != template_id:
                    continue

                if template_id is None and blacklister.is_blacklisted(alignment.get_hit_accession_code()):
                    continue

                if not dssp.has_secondary_structure(hit_template_id):
                    continue

                # Replace the blast hit's alignment with the kmad alignment.
                template_secstr = dssp.get_secondary_structure(hit_template_id)
                template_sequence = dssp.get_sequence(hit_template_id)
                try:
                    kmad_alignment = kmad_aligner.align(template_sequence, template_secstr,
                                                        range_.get_sub_sequence())
                except:
                    _log.warn(traceback.format_exc())

                    # If kmad fails, then skip this one :(
                    continue
                alignment.full_query_sequence = range_.sequence
                alignment.query_start = range_.start + 1
                alignment.query_end = range_.end
                alignment.subject_start = 1
                alignment.subject_end = len(template_sequence)
                alignment.query_alignment = kmad_alignment.target_alignment
                alignment.subject_alignment = kmad_alignment.template_alignment

                if alignment.get_percentage_identity() >= get_min_identity(alignment.count_aligned_residues()):
                    good_hits.append(alignment)

        return good_hits

Example #11

0

Show file

File: model.py Project: cbaakman/hommod

    def _preserves_interactions(self, context, candidate_target_segment,
                                candidate_chain_id,
                                interacting_chain_alignments):

        # The pdb file in the soup can be different from the blast hit
        # So make an alignment first!
        template_chain_sequence = context.get_sequence(candidate_chain_id)
        template_chain_secstr = context.get_secondary_structure(
            candidate_chain_id)
        candidate_alignment = kmad_aligner.align(template_chain_sequence,
                                                 template_chain_secstr,
                                                 candidate_target_segment)

        candidate_residue_indices = candidate_alignment.get_covered_template_residues_indices(
        )
        candidate_residues = context.get_residues(candidate_chain_id)
        covered_candidate_residues = [
            candidate_residues[i] for i in candidate_residue_indices
        ]

        for chain_id in interacting_chain_alignments:

            covered_template_residue_indices = \
                interacting_chain_alignments[chain_id].get_covered_template_residues_indices()

            chain_residues = context.get_residues(chain_id)
            covered_residues = [
                chain_residues[i] for i in covered_template_residue_indices
            ]

            _log.debug(
                "checking chain {} {} residues against chain {} {} residues for interaction"
                .format(candidate_chain_id, len(covered_candidate_residues),
                        chain_id, len(covered_residues)))

            # Check every target-covered residue.
            # Return True if a single interacting residue pair is found:
            for candidate_residue in covered_candidate_residues:
                if context.residue_interacts_with(candidate_residue,
                                                  covered_residues):
                    return True

        return False

Example #12

0

Show file

File: model.py Project: cmbi/hommod-rest

    def _preserves_interactions(self, context,
                                candidate_target_segment, candidate_chain_id,
                                interacting_chain_alignments):

        # The pdb file in the soup can be different from the blast hit
        # So make an alignment first!
        template_chain_sequence = context.get_sequence(candidate_chain_id)
        template_chain_secstr = context.get_secondary_structure(candidate_chain_id)
        candidate_alignment = kmad_aligner.align(template_chain_sequence,
                                                 template_chain_secstr,
                                                 candidate_target_segment)

        candidate_residue_indices = candidate_alignment.get_covered_template_residues_indices()
        candidate_residues = context.get_residues(candidate_chain_id)
        covered_candidate_residues = [candidate_residues[i]
                                      for i in candidate_residue_indices]

        for chain_id in interacting_chain_alignments:

            covered_template_residue_indices = \
                interacting_chain_alignments[chain_id].get_covered_template_residues_indices()

            chain_residues = context.get_residues(chain_id)
            covered_residues = [chain_residues[i]
                                for i in covered_template_residue_indices]

            _log.debug("checking chain {} {} residues against chain {} {} residues for interaction"
                       .format(candidate_chain_id, len(covered_candidate_residues),
                               chain_id, len(covered_residues)))

            # Check every target-covered residue.
            # Return True if a single interacting residue pair is found:
            for candidate_residue in covered_candidate_residues:
                if context.residue_interacts_with(candidate_residue, covered_residues):
                    return True

        return False

Example #13

0

Show file


def pick_random_sequences(n):
    sprot_sequences = parse_fasta(SPROT_FASTA)
    keys = random.sample(sprot_sequences.keys(), n)

    return {key: sprot_sequences[key] for key in keys}


sequences = pick_random_sequences(10)
for key in sequences:

    while True:
        try:
            domain_alignments = domain_aligner.get_domain_alignments(
                sequences[key])
            break
        except HTTPError:
            continue

    for domain_alignment in domain_alignments:
        template_seq = dssp.get_sequence(domain_alignment.template_id)
        template_secstr = dssp.get_secondary_structure(
            domain_alignment.template_id)
        full_alignment = kmad_aligner.align(template_seq, template_secstr,
                                            sequences[key])

        print(key, domain_alignment.template_id,
              domain_alignment.get_percentage_identity(),
              full_alignment.get_percentage_identity())

Example #14

0

Show file

File: model.py Project: cmbi/hommod

    def _make_alignments(self, main_target_sequence, target_species_id,
                         main_domain_alignment, context, require_resnum):
        alignments = {}

        # Choose what chains to align the main_target_on
        main_target_chain_ids = self._pick_identical_chains(main_domain_alignment.template_id.chain_id,
                                                            context)

        ModelLogger.get_current().add("using template chains {} for the main target sequence".format(main_target_chain_ids))

        for chain_id in main_target_chain_ids:

            template_chain_sequence = context.get_sequence(chain_id)
            template_chain_secstr = context.get_secondary_structure(chain_id)

            local_alignment = kmad_aligner.align(template_chain_sequence, template_chain_secstr,
                                                 main_domain_alignment.get_target_sequence())
            alignments[chain_id] = DomainAlignment(local_alignment.target_alignment,
                                                   local_alignment.template_alignment,
                                                   main_domain_alignment.range,
                                                   main_domain_alignment.template_id)

            alignments[chain_id].target_id = model_storage.get_sequence_id(main_target_sequence)

        if require_resnum is not None and \
                not alignments[main_domain_alignment.template_id.chain_id].is_target_residue_covered(require_resnum):
            raise RuntimeError("Cannot align to chain {} so that residue {} is covered"
                               .format(main_domain_alignment.template_id.chain_id, require_resnum))


        # Try to find and align target sequences for interacting chains in the template,
        # while keeping in mind which residues interact and must thus be covered by the alignment.
        # We expand the set of involved template chains with every iteration,
        # until all template chains have been added.
        while len(alignments) < len(context.get_chain_ids()):

            # First, make python remember to which chains the candidate chains interact:
            candidate_chains_interacts_with = {}
            for aligned_chain_id in alignments:
                for interacting_chain_id in context.list_interacting_chains(aligned_chain_id):

                    ModelLogger.get_current().add("template chain {} interacts with {}"
                                                  .format(aligned_chain_id, interacting_chain_id))

                    # Skip those that we've already aligned, to prevent infinite loops:
                    if interacting_chain_id in alignments:
                        continue

                    if interacting_chain_id not in candidate_chains_interacts_with:
                        candidate_chains_interacts_with[interacting_chain_id] = []
                    candidate_chains_interacts_with[interacting_chain_id].append(aligned_chain_id)

            if len(candidate_chains_interacts_with) <= 0:
                break  # Nothing more to add

            # iterate over chains that might interact with the chains that are already in the set:
            for candidate_chain_id in candidate_chains_interacts_with:

                interacting_chain_alignments = {interacting_chain_id: alignments[interacting_chain_id]
                                                for interacting_chain_id in candidate_chains_interacts_with[candidate_chain_id]}

                template_chain_sequence = context.get_sequence(candidate_chain_id)
                template_chain_secstr = context.get_secondary_structure(candidate_chain_id)

                potential_target_sequences = self._find_target_sequences(template_chain_sequence,
                                                                         target_species_id)

                ModelLogger.get_current().add("choosing target sequence for template chain {} from {}"
                                              .format(candidate_chain_id, potential_target_sequences.keys()))

                alignments[candidate_chain_id] = self._choose_best_target_alignment(context,
                                                                                    interacting_chain_alignments,
                                                                                    potential_target_sequences,
                                                                                    candidate_chain_id)
                if alignments[candidate_chain_id] is None:
                    alignments[candidate_chain_id] = self._make_poly_A_alignment(context, candidate_chain_id)
                    alignments[candidate_chain_id].target_id = "poly-A"

                    ModelLogger.get_current().add("found no target for template chain {}, placing poly-A"
                                                  .format(candidate_chain_id))

        return alignments

Example #15

0

Show file

    s = ""
    j = 0
    for i in range(len(ref)):
        if ref[i].isalpha():
            s += ins[j]
            j += 1
        else:
            s += ref[i]
    return s


sequence = "CWAVAVAVGNDGAVAVAVWC"
secstr = "EEEEEEEE    EEEEEEEE"
target = "CWAVAVAVAVAVGGGGGGVAVAVAVAVWC"

kmad_alignment = kmad_aligner.align(sequence, secstr, target)
clustal_alignment = clustal_aligner.align({
    'template': sequence,
    'target': target
})

print 'kmad'
print kmad_alignment.target_alignment
print gap_equally(kmad_alignment.template_alignment, secstr)
print kmad_alignment.template_alignment

print 'clustal'
print clustal_alignment.aligned_sequences['target']
print gap_equally(clustal_alignment.aligned_sequences['template'], secstr)
print clustal_alignment.aligned_sequences['template']

Example #16

0

Show file

File: compare_domain_alignments.py Project: cmbi/hommod-rest

domain_aligner.similar_ranges_min_overlap_percentage = SIMILAR_RANGES_MIN_OVERLAP_PERCENTAGE
domain_aligner.similar_ranges_max_length_difference_percentage = SIMILAR_RANGES_MAX_LENGTH_DIFFERENCE_PERCENTAGE
kmad_aligner.kmad_exe = KMAD_EXE
blaster.blastp_exe = BLASTP_EXE


def pick_random_sequences(n):
    sprot_sequences = parse_fasta(SPROT_FASTA)
    keys = random.sample(sprot_sequences.keys(), n)

    return {key:sprot_sequences[key] for key in keys}


sequences = pick_random_sequences(10)
for key in sequences:

    while True:
        try:
            domain_alignments = domain_aligner.get_domain_alignments(sequences[key])
            break
        except HTTPError:
            continue

    for domain_alignment in domain_alignments:
        template_seq = dssp.get_sequence(domain_alignment.template_id)
        template_secstr = dssp.get_secondary_structure(domain_alignment.template_id)
        full_alignment = kmad_aligner.align(template_seq, template_secstr, sequences[key])

        print(key, domain_alignment.template_id, domain_alignment.get_percentage_identity(),
                                                 full_alignment.get_percentage_identity())

Example #17

0

Show file

File: show_kmad_alignment.py Project: cmbi/hommod-rest

def gap_equally(ref, ins):
    s = ""
    j = 0
    for i in range(len(ref)):
        if ref[i].isalpha():
            s += ins[j]
            j += 1
        else:
            s += ref[i]
    return s


sequence = "CWAVAVAVGNDGAVAVAVWC"
secstr   = "EEEEEEEE    EEEEEEEE"
target   = "CWAVAVAVAVAVGGGGGGVAVAVAVAVWC"


kmad_alignment = kmad_aligner.align(sequence, secstr, target)
clustal_alignment = clustal_aligner.align({'template': sequence, 'target': target})

print 'kmad'
print kmad_alignment.target_alignment
print gap_equally(kmad_alignment.template_alignment, secstr)
print kmad_alignment.template_alignment

print 'clustal'
print clustal_alignment.aligned_sequences['target']
print gap_equally(clustal_alignment.aligned_sequences['template'], secstr)
print clustal_alignment.aligned_sequences['template']

Example #18

0

Show file

File: model.py Project: cmbi/hommod-rest

    def _make_alignments(self, main_target_sequence, target_species_id,
                         main_domain_alignment, context, require_resnum):
        alignments = {}

        # Choose what chains to align the main_target_on
        main_target_chain_ids = self._pick_identical_chains(main_domain_alignment.template_id.chain_id,
                                                            context)

        ModelLogger.get_current().add("using template chains {} for the main target sequence".format(main_target_chain_ids))

        for chain_id in main_target_chain_ids:

            template_chain_sequence = context.get_sequence(chain_id)
            template_chain_secstr = context.get_secondary_structure(chain_id)

            local_alignment = kmad_aligner.align(template_chain_sequence, template_chain_secstr,
                                                 main_domain_alignment.get_target_sequence())
            alignments[chain_id] = DomainAlignment(local_alignment.target_alignment,
                                                   local_alignment.template_alignment,
                                                   main_domain_alignment.range,
                                                   main_domain_alignment.template_id)

            alignments[chain_id].target_id = model_storage.get_sequence_id(main_target_sequence)

        if require_resnum is not None and \
                not alignments[main_domain_alignment.template_id.chain_id].is_target_residue_covered(require_resnum):
            raise RuntimeError("Cannot align to chain {} so that residue {} is covered"
                               .format(main_domain_alignment.template_id.chain_id, require_resnum))


        # Try to find and align target sequences for interacting chains in the template,
        # while keeping in mind which residues interact and must thus be covered by the alignment.
        # We expand the set of involved template chains with every iteration,
        # until all template chains have been added.
        while len(alignments) < len(context.get_chain_ids()):

            # First, make python remember to which chains the candidate chains interact:
            candidate_chains_interacts_with = {}
            for aligned_chain_id in alignments:
                for interacting_chain_id in context.list_interacting_chains(aligned_chain_id):

                    ModelLogger.get_current().add("template chain {} interacts with {}"
                                                  .format(aligned_chain_id, interacting_chain_id))

                    # Skip those that we've already aligned, to prevent infinite loops:
                    if interacting_chain_id in alignments:
                        continue

                    if interacting_chain_id not in candidate_chains_interacts_with:
                        candidate_chains_interacts_with[interacting_chain_id] = []
                    candidate_chains_interacts_with[interacting_chain_id].append(aligned_chain_id)

            if len(candidate_chains_interacts_with) <= 0:
                break  # Nothing more to add

            # iterate over chains that might interact with the chains that are already in the set:
            for candidate_chain_id in candidate_chains_interacts_with:

                interacting_chain_alignments = {interacting_chain_id: alignments[interacting_chain_id]
                                                for interacting_chain_id in candidate_chains_interacts_with[candidate_chain_id]}

                template_chain_sequence = context.get_sequence(candidate_chain_id)
                template_chain_secstr = context.get_secondary_structure(candidate_chain_id)

                potential_target_sequences = self._find_target_sequences(template_chain_sequence,
                                                                         target_species_id)

                ModelLogger.get_current().add("choosing target sequence for template chain {} from {}"
                                              .format(candidate_chain_id, potential_target_sequences.keys()))

                alignments[candidate_chain_id] = self._choose_best_target_alignment(context,
                                                                                    interacting_chain_alignments,
                                                                                    potential_target_sequences,
                                                                                    candidate_chain_id)
                if alignments[candidate_chain_id] is None:
                    alignments[candidate_chain_id] = self._make_poly_A_alignment(context, candidate_chain_id)
                    alignments[candidate_chain_id].target_id = "poly-A"

                    ModelLogger.get_current().add("found no target for template chain {}, placing poly-A"
                                                  .format(candidate_chain_id))

        return alignments