Python SequenceCollection Examples

Programming Language: Python

Namespace/Package Name: cogent3.core.alignment

Examples at hotexamples.com: 6

Python SequenceCollection - 6 examples found. These are the top rated real world Python examples of cogent3.core.alignment.SequenceCollection extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SequenceCollection(6)

is_ragged(1)

rc(1)

trim_stop_codons(1)

Example #1

Show file

File: test_core_standalone.py Project: rahulghangas/cogent3

    def test_trim_stop_codons_info(self):
        """trim_stop_codons should preserve info attribute"""
        seq_coll = SequenceCollection(
            data={
                "seq1": "ACGTAA",
                "seq2": "ACGACG",
                "seq3": "ACGCGT"
            },
            moltype=DNA,
            info={"key": "value"},
        )
        seq_coll = seq_coll.trim_stop_codons()
        self.assertEqual(seq_coll.info["key"], "value")

        # aligned
        aln = ArrayAlignment(
            data={
                "seq1": "ACGTAA",
                "seq2": "ACGTGA",
                "seq3": "ACGTAA"
            },
            moltype=DNA,
            info={"key": "value"},
        )
        aln = aln.trim_stop_codons()
        self.assertEqual(aln.info["key"], "value")

Example #2

Show file

def clustal_from_alignment(aln, interleave_len=None):
    """Returns a string in Clustal format.

        - aln: can be an Alignment object or a dict.
        - interleave_len: sequence line width.  Only available if sequences are
            aligned.
    """
    if not aln:
        return ""

    # get seq output order
    try:
        order = aln.RowOrder
    except:
        order = list(aln.keys())
        order.sort()

    seqs = SequenceCollection(aln)
    clustal_list = ["CLUSTAL\n"]

    if seqs.is_ragged():
        raise ValueError(
            "Sequences in alignment are not all the same length." +
            "Cannot generate Clustal format.")

    aln_len = seqs.seq_len
    # Get all labels
    labels = copy(seqs.names)

    # Find all label lengths in order to get padding.
    label_lengths = [len(l) for l in labels]
    label_max = max(label_lengths)
    max_spaces = label_max + 4

    # Get ordered seqs
    ordered_seqs = [seqs.named_seqs[label] for label in order]

    if interleave_len is not None:
        curr_ix = 0
        while curr_ix < aln_len:
            clustal_list.extend([
                "%s%s%s" % (
                    x,
                    " " * (max_spaces - len(x)),
                    y[curr_ix:curr_ix + interleave_len],
                ) for x, y in zip(order, ordered_seqs)
            ])
            clustal_list.append("")
            curr_ix += interleave_len
    else:
        clustal_list.extend([
            "%s%s%s" % (x, " " * (max_spaces - len(x)), y)
            for x, y in zip(order, ordered_seqs)
        ])
        clustal_list.append("")

    return "\n".join(clustal_list)

Example #3

Show file

 def test_reverse_complement_info(self):
     """reverse_complement should preserve info attribute"""
     dna = {
         "seq1": "--ACGT--GT---",
         "seq2": "TTACGTA-GT---",
         "seq3": "--ACGTA-GCC--",
     }
     # alignment with gaps
     aln = ArrayAlignment(data=dna, moltype=DNA, info={"key": "value"})
     aln_rc = aln.rc()
     self.assertEqual(aln_rc.info["key"], "value")
     # check collection, with gaps
     coll = SequenceCollection(data=dna, moltype=DNA, info={"key": "value"})
     coll_rc = coll.rc()
     self.assertEqual(coll_rc.info["key"], "value")

Example #4

Show file

File: __init__.py Project: rahulghangas/cogent3

def make_unaligned_seqs(
    data, moltype=None, label_to_name=None, info=None, source=None, **kw
):
    """Initialize an unaligned collection of sequences.

    Parameters
    ----------
    data
        sequences
    moltype
        the moltype, eg DNA, PROTEIN, 'dna', 'protein'
    label_to_name
        function for converting original name into another name.
    info
        a dict from which to make an info object
    source
        origins of this data, defaults to 'unknown'
    **kw
        other keyword arguments passed to SequenceCollection
    """

    if moltype is not None:
        moltype = get_moltype(moltype)

    info = info or {}
    for other_kw in ("constructor_kw", "kw"):
        other_kw = kw.pop(other_kw, None) or {}
        kw.update(other_kw)
    assert isinstance(info, dict), "info must be a dict"
    info["source"] = source or "unknown"

    return SequenceCollection(
        data=data, moltype=moltype, label_to_name=label_to_name, info=info, **kw
    )

Example #5

Show file

    def get_translatable(self, seqs):
        """returns the translatable sequences from seqs.

        translation errors are stroed in the info object"""
        seqs = seqs.degap()
        if self._moltype and self._moltype != seqs.moltype:
            seqs = seqs.to_moltype(self._moltype)

        translatable = []
        error_log = []
        for seq in seqs.seqs:
            try:
                frame = best_frame(seq, self._gc, allow_rc=self._allow_rc)
                if frame < 0:
                    seq = seq.rc()
                    frame *= -1
                frame -= 1  # returned from best frame as 1, 2, 3
                num_codons = (len(seq) - frame) // 3
                seq = seq[frame:frame + (num_codons * 3)]
                if self._trim_terminal_stop:
                    seq = seq.trim_stop_codon(gc=self._gc)
                translatable.append([seq.name, seq])
            except ValueError as msg:
                # TODO handle case where incomplete at end OR beginning
                # plus case where is divisible by 3 but not in frame
                # if not divisible by 3, then calc remainder as len(seq) % 3
                # try translating new[remainder:] and new[:-remainder]
                error_log.append([seq.name, msg.args[0]])

        if translatable:
            translatable = SequenceCollection(data=translatable,
                                              moltype=self._moltype,
                                              info=seqs.info)
            translatable.info["translation_errors"] = error_log
        else:
            translatable = NotCompleted("FALSE",
                                        self,
                                        " ".join(error_log),
                                        source=seqs)

        return translatable

Example #6

Show file

 def get_seq_collection(self, feature_types=None, where_feature=None):
     """returns a SequenceCollection instance of the unaligned sequences"""
     seqs = []
     for member in self.members:
         if feature_types:
             seq = member.get_annotated_seq(feature_types, where_feature)
         else:
             seq = member.seq
         if seq is None:
             continue
         seqs.append((seq.name, seq))
     return SequenceCollection(data=seqs, moltype=DNA)