Esempio n. 1
0
    def __call__(self, seqs):
        'It runs the actual annotations'
        if not seqs:
            return seqs
        pep_fhand = NamedTemporaryFile()
        dna_fhand = NamedTemporaryFile()
        _run_estscan(seqs, pep_fhand.name, dna_fhand.name, self._usage_matrix)
        # now we read the result files
        estscan_result = _read_estcan_results(open(pep_fhand.name),
                                              open(dna_fhand.name))
        for seq in seqs:
            seq_name = get_name(seq)
            orfs = estscan_result.get(seq_name, {})
            feats = []
            for (start, end, strand), str_seqs in orfs.viewitems():
                start -= 1
                # end is fine  -- end[
                feat = SeqFeature(location=FeatureLocation(start, end, strand),
                                  type='ORF',
                                  qualifiers=str_seqs)
                feats.append(feat)
            if feats:
                seq.object.features.extend(feats)

        dna_fhand.close()
        pep_fhand.close()
        return seqs
Esempio n. 2
0
def _annotate_polya(seq, min_len, max_cont_mismatches):
    'It annotates the polyA with the EMBOSS trimest method'
    str_seq = get_str_seq(seq)
    polya = _detect_polya_tail(str_seq, THREE_PRIME, min_len,
                               max_cont_mismatches)
    polyt = _detect_polya_tail(str_seq, FIVE_PRIME, min_len,
                               max_cont_mismatches)
    a_len = polya[1] - polya[0] if polya else 0
    t_len = polyt[1] - polyt[0] if polyt else 0
    chosen_tail = None
    if a_len > t_len:
        chosen_tail = 'A'
    elif t_len > a_len:
        chosen_tail = 'T'
    elif a_len and a_len == t_len:
        if randint(0, 1):
            chosen_tail = 'A'
        else:
            chosen_tail = 'T'
    if chosen_tail:
        strand = 1 if chosen_tail == 'A' else -1
        start, end = polya if chosen_tail == 'A' else polyt
        feat = SeqFeature(location=FeatureLocation(start, end, strand),
                          type='polyA_sequence')
        # We're assuming that the seq has a SeqRecord in it
        seq.object.features.append(feat)
Esempio n. 3
0
    def __call__(self, seqrecords):
        'It does the work'
        if not seqrecords:
            return seqrecords
        matcher = Blaster(seqrecords,
                          self.blastdb,
                          self._program,
                          self._dbtype,
                          filters=self._filters,
                          params=self._params,
                          remote=self._remote)
        blasts = matcher.blasts
        blastdb = os.path.basename(self.blastdb)
        for seqrecord in seqrecords:
            align_result = blasts.get(get_name(seqrecord), None)
            if not align_result:
                continue
            match_counter = 0
            for match in align_result['matches']:
                subject = match['subject']['name']
                match_counter += 1
                for match_part in match['match_parts']:
                    if match_part['subject_end'] < match_part['subject_start']:
                        strand = -1
                        subject_start = match_part['subject_end']
                        subject_end = match_part['subject_start']
                    else:
                        strand = 1
                        subject_start = match_part['subject_start']
                        subject_end = match_part['subject_end']

                    query_start = match_part['query_start']
                    query_end = match_part['query_end']
                    qualifiers = {}
                    qualifiers['Target'] = {
                        'start': subject_start,
                        'end': subject_end,
                        'name': subject
                    }
                    qualifiers['score'] = match_part['scores']['expect']
                    qualifiers['identity'] = match_part['scores']['identity']
                    qualifiers['blastdb'] = blastdb
                    location = FeatureLocation(query_start, query_end, strand)
                    feature = SeqFeature(
                        location=location,
                        type='match_part',
                        qualifiers=qualifiers,
                        id='match{0:03d}'.format(match_counter))
                    seqrecord.object.features.append(feature)
        return seqrecords