Python FASTA Examples, paleomix.common.formats.fasta.FASTA Python Examples

Example #1

0

Show file

File: msa_test.py Project: jelber2/paleomix

def test_msa_from_lines__two_entries_with_meta():
    lines = [">seq1", "ACG", ">seq2 Second meta", "TGA"]
    expected = MSA(
        [FASTA("seq1", None, "ACG"),
         FASTA("seq2", "Second meta", "TGA")])
    result = MSA.from_lines(lines)
    assert_equal(result, expected)

Example #2

0

Show file

def test_msa_select__remove_one():
    fa_1 = FASTA("A", None, "ACGT")
    fa_2 = FASTA("B", None, "GCTA")
    initial = MSA([fa_1, fa_2])
    expected = MSA([fa_1])
    result = initial.select(["A"])
    assert result == expected

Example #3

0

Show file

File: msa_test.py Project: jelber2/paleomix

def test_msa_from_file__compressed_bz2():
    expected = MSA([
        FASTA("This_is_BZ_FASTA!", None, "CGTNA"),
        FASTA("This_is_ALSO_BZ_FASTA!", None, "ACGTN")
    ])
    results = MSA.from_file(test_file("fasta_file.fasta.bz2"))
    assert_equal(results, expected)

Example #4

0

Show file

File: msa_test.py Project: jelber2/paleomix

def test_msa_exclude__remove_one():
    fa_1 = FASTA("A", None, "ACGT")
    fa_2 = FASTA("B", None, "GCTA")
    initial = MSA([fa_1, fa_2])
    expected = MSA([fa_1])
    result = initial.exclude(["B"])
    assert_equal(result, expected)

Example #5

0

Show file

def test_fasta__from_lines__multiple_records():
    lines = [">first\n",  "TGTTCTCCACCGTGCACAAC\n", "CCTTCATCCA\n",
             ">Second XT:1:0\n", "GAGAGCTCAGCTAAC\n",
             ">Third\n",  "CGCTGACCAAAAACGGACAG\n", "GGCATTCGGC\n"]
    expected = [FASTA("first", None, "TGTTCTCCACCGTGCACAACCCTTCATCCA"),
                FASTA("Second", "XT:1:0", "GAGAGCTCAGCTAAC"),
                FASTA("Third", None, "CGCTGACCAAAAACGGACAGGGCATTCGGC")]
    assert_list_equal(FASTA.from_lines(lines), expected)

Example #6

0

Show file

def test_msa_join__three_msa():
    expected = MSA((
        FASTA("nc", None, "ACGTGAAAG"),
        FASTA("nm", None, "TGACTTGAG"),
        FASTA("miRNA", None, "UCAGACCAU"),
    ))
    result = MSA.join(_JOIN_MSA_1, _JOIN_MSA_2, _JOIN_MSA_3)
    assert result == expected

Example #7

0

Show file

def test_msa_join__two_msa():
    expected = MSA((
        FASTA("nc", None, "ACGTGA"),
        FASTA("nm", None, "TGACTT"),
        FASTA("miRNA", None, "UCAGAC"),
    ))
    result = MSA.join(_JOIN_MSA_1, _JOIN_MSA_2)
    assert result == expected

Example #8

0

Show file

File: msa_test.py Project: jelber2/paleomix

def test_msa_reduce__multiple_empty_column__all_empty_column_are_removed():
    fa_1 = FASTA("Name_A", "Meta_A", "-AnTN")
    fa_2 = FASTA("Name_B", "Meta_B", "NC-NN")
    initial = MSA([fa_1, fa_2])
    fa_reduced_1 = FASTA("Name_A", "Meta_A", "AT")
    fa_reduced_2 = FASTA("Name_B", "Meta_B", "CN")
    expected = MSA([fa_reduced_1, fa_reduced_2])
    assert_equal(initial.reduce(), expected)

Example #9

0

Show file

def test_msa_filter_singletons__filter_by_third():
    expected = MSA((
        FASTA("Seq1", "Meta1", "nCGNTYCgTn"),
        FASTA("Seq2", "Meta2", "ACTA-WCCTG"),
        FASTA("Seq3", "Meta3", "NCGGTYCGTC"),
    ))
    result = _FILTER_MSA_1.filter_singletons("Seq1", ["Seq3"])
    assert result == expected

Example #10

0

Show file

def test_msa_reduce__one_empty_column__column_are_removed():
    fa_1 = FASTA("Name_A", "Meta_A", "AnT")
    fa_2 = FASTA("Name_B", "Meta_B", "C-N")
    initial = MSA([fa_1, fa_2])
    fa_reduced_1 = FASTA("Name_A", "Meta_A", "AT")
    fa_reduced_2 = FASTA("Name_B", "Meta_B", "CN")
    expected = MSA([fa_reduced_1, fa_reduced_2])
    assert initial.reduce() == expected

Example #11

0

Show file

def test_msa_from_file(func, tmp_path):
    filename = tmp_path / "test.fasta"
    with func(filename, "wt") as handle:
        handle.write(">This_is_FASTA!\nACGTN\n>This_is_ALSO_FASTA!\nCGTNA\n")

    assert MSA.from_file(filename) == MSA([
        FASTA("This_is_FASTA!", None, "ACGTN"),
        FASTA("This_is_ALSO_FASTA!", None, "CGTNA"),
    ])

Example #12

0

Show file

def test_sequentual_phy__different_length_names_2():
    msa = MSA([
        FASTA("Burchelli_4", None, "ACGTTGATAACCAGG"),
        FASTA("Donkey", None, "TGCAGAGTACGACGT"),
    ])
    expected = """2 15

Burchelli_4             ACGTTGATAA  CCAGG
Donkey                  TGCAGAGTAC  GACGT"""
    assert interleaved_phy(msa) == expected

Example #13

0

Show file

def test_msa_repr():
    msa = MSA((
        FASTA("nc", None, "ACGTA"),
        FASTA("nm", "META", "TGAGT"),
        FASTA("miRNA", None, "UCAGA"),
    ))

    assert (str(msa) == "MSA(FASTA('miRNA', '', 'UCAGA'), "
            "FASTA('nc', '', 'ACGTA'), "
            "FASTA('nm', 'META', 'TGAGT'))")

Example #14

0

Show file

File: msa_test.py Project: jelber2/paleomix

def test_msa_to_file__complete_line_test():
    msa = MSA([
        FASTA("barfoo", None, "ACGATA" * 10 + "CGATAG" * 5),
        FASTA("foobar", None, "CGAATG" * 10 + "TGTCAT" * 5)
    ])
    expected = ">barfoo\n%s\n%s\n" % ("ACGATA" * 10, "CGATAG" * 5)
    expected += ">foobar\n%s\n%s\n" % ("CGAATG" * 10, "TGTCAT" * 5)
    stringf = StringIO.StringIO()
    MSA.to_file(msa, stringf)
    assert_equal(stringf.getvalue(), expected)

Example #15

0

Show file

File: database.py Project: jelber2/paleomix

    def _read_mitochondria(self, tar_handle, filename):
        try:
            tar_handle.getmember(filename)
        except KeyError:
            # Missing MT file is allowed
            return None

        handle = tar_handle.extractfile(filename)

        results = {}
        for record in FASTA.from_lines(handle):
            record = FASTA(name=record.name,
                           meta=record.meta,
                           sequence=record.sequence.upper())

            unexpected = set(record.sequence) - set("ACGTN-")
            if unexpected:
                unexpected = ", ".join(map(repr, sorted(unexpected)))
                raise ZonkeyDBError("Unexpected nucleotide in %s; only A, C, "
                                    "G, T, N, and - are allowed, not %s" %
                                    (unexpected, filename))
            elif record.name in results:
                raise ZonkeyDBError("Duplicate sequence name in %s: %r" %
                                    (filename, record.name))

            results[record.name] = record

        lengths = frozenset(
            len(record.sequence) for record in results.itervalues())

        if not lengths:
            raise ZonkeyDBError("No mitochondrial sequences found in %r" %
                                (filename, ))
        elif len(lengths) > 2:
            lengths = tuple(sorted(lengths))
            lengths_s = "%s, and %s" % (", ".join(map(
                str, lengths[:-1])), lengths[-1])

            raise ZonkeyDBError("At most two different sequence lengths "
                                "expected for mitochondrial sequences, but "
                                "found %i different lengths in %r: %s" %
                                (len(lengths), filename, lengths_s))
        elif len(lengths) != 1:
            # Unpadded sequences are allowed
            delta_len = max(lengths) - min(lengths)
            mito_padding = self.settings["MitoPadding"]

            if (delta_len != mito_padding):
                raise ZonkeyDBError("Length difference between mitochondrial "
                                    "sequences in %r does not match the "
                                    "padding; expected a difference of %i bp, "
                                    "but found a %i bp difference." %
                                    (filename, mito_padding, delta_len))

        return results

Example #16

0

Show file

File: fasta_test.py Project: tmancill/paleomix

def test_fasta__from_file(func, tmp_path):
    expected = [
        FASTA("This_is_FASTA!", None, "ACGTN"),
        FASTA("This_is_ALSO_FASTA!", None, "CGTNA"),
    ]

    with func(tmp_path / "file", "wt") as handle:
        for item in expected:
            item.write(handle)

    assert list(FASTA.from_file(tmp_path / "file")) == expected

Example #17

0

Show file

File: paml.py Project: jelber2/paleomix

    def _setup(self, _config, temp):
        self._update_ctl_file(source=self._control_file,
                              destination=os.path.join(temp, "template.ctl"))

        os.symlink(os.path.abspath(self._trees_file),
                   os.path.join(temp, "template.trees"))
        with open(os.path.join(temp, "template.seqs"), "w") as handle:
            for record in FASTA.from_file(self._sequence_file):
                if record.name not in self._exclude_groups:
                    name = record.name
                    sequence = record.sequence.upper()
                    handle.write("%s\n" % (FASTA(name, None, sequence), ))

Example #18

0

Show file

File: sequences.py Project: tmancill/paleomix

    def _run(self, _config, temp):
        fasta_files = []
        for (name, filename) in sorted(self._infiles.items()):
            fasta_files.append((name, pysam.FastaFile(filename)))

        for sequence_name in sorted(self._sequences):
            filename = os.path.join(temp, sequence_name + ".fasta")
            with open(filename, "w") as out_handle:
                for (sample, fasta_file) in fasta_files:
                    sequence = fasta_file.fetch(sequence_name)
                    fasta = FASTA(sample, sequence_name, sequence)
                    fasta.write(out_handle)

Example #19

0

Show file

File: phylip_test.py Project: jelber2/paleomix

def test_sequentual_phy__different_length_names_1():
    msa = MSA([
        FASTA("A_short_name", None, "ACGTTGATAACCAGG"),
        FASTA("Another_really_long_sequence_name_that_is_too_long", None,
              "TGCAGAGTACGACGT")
    ])
    expected = """2 15

A_short_name                        ACGTTGATAA  CCAGG
Another_really_long_sequence_n      TGCAGAGTAC  GACGT"""
    print interleaved_phy(msa), expected
    assert_equal(interleaved_phy(msa), expected)

Example #20

0

Show file

File: fasta_test.py Project: tmancill/paleomix

def test_index_and_collect_contigs__fai_files(tmp_path):
    fasta_file = tmp_path / "test.fasta"
    with fasta_file.open("wt") as handle:
        _TEST_FASTA_1_A.write(handle)

    fai_file = tmp_path / "test.fasta.fai"

    # Fai file should be created once, and then not modified
    FASTA.index_and_collect_contigs(fasta_file)
    stats_1 = fai_file.stat()
    FASTA.index_and_collect_contigs(fasta_file)
    stats_2 = fai_file.stat()

    assert stats_1 == stats_2

Example #21

0

Show file

    def filter_singletons(self, to_filter, filter_using):
        included, excluded, to_filter \
            = self._group(filter_using, to_filter)

        sequence = list(to_filter.sequence)
        sequences = [record.sequence.upper() for record in included]
        for (index, nts) in enumerate(zip(*sequences)):
            current_nt = sequence[index].upper()
            if current_nt in "N-":
                continue

            allowed_nts = set()
            for allowed_nt in nts:
                if allowed_nt not in "N-":
                    allowed_nts.update(NT_CODES[allowed_nt])
            filtered_nts = frozenset(NT_CODES[current_nt]) & allowed_nts

            if not filtered_nts:
                filtered_nts = "N"

            genotype = encode_genotype(filtered_nts)
            if genotype != current_nt:
                sequence[index] = genotype.lower()
        new_record = FASTA(to_filter.name, to_filter.meta, "".join(sequence))

        return MSA([new_record] + included + excluded)

Example #22

0

Show file

File: msa.py Project: muslih14/paleomix

 def from_lines(cls, lines):
     """Parses a MSA from a file/list of lines, and returns a dictionary
     of names to sequences. If read_meta is True, meta information included
     after the first space in header of each sequence:
       >NAME META-INFORMATION
       SEQUENCE
     As suggested above, sequences are expected to be in FASTA format."""
     return MSA(FASTA.from_lines(lines))

Example #23

0

Show file

 def from_lines(cls, lines):
     """Parses a MSA from a file/list of lines, and returns a dictionary
     of names to sequences. If read_meta is True, meta information included
     after the first space in header of each sequence:
       >NAME META-INFORMATION
       SEQUENCE
     As suggested above, sequences are expected to be in FASTA format."""
     return MSA(FASTA.from_lines(lines))

Example #24

0

Show file

File: fasta_test.py Project: muslih14/paleomix

def test_fasta__from_lines__multiple_records():
    lines = [">first\n",  "TGTTCTCCACCGTGCACAAC\n", "CCTTCATCCA\n",
             ">Second XT:1:0\n", "GAGAGCTCAGCTAAC\n",
             ">Third\n",  "CGCTGACCAAAAACGGACAG\n", "GGCATTCGGC\n"]
    expected = [FASTA("first", None, "TGTTCTCCACCGTGCACAACCCTTCATCCA"),
                FASTA("Second", "XT:1:0", "GAGAGCTCAGCTAAC"),
                FASTA("Third", None, "CGCTGACCAAAAACGGACAGGGCATTCGGC")]
    assert_list_equal(FASTA.from_lines(lines), expected)

Example #25

0

Show file

File: build_mito.py Project: tmancill/paleomix

def truncate_sequences(sequences, name):
    result = {}
    to_len = len(sequences[name].sequence)
    for name, record in sequences.items():
        result[name] = FASTA(name=record.name,
                             meta=record.meta,
                             sequence=record.sequence[:to_len])

    return result

Example #26

0

Show file

File: msa_test.py Project: jelber2/paleomix

def test_msa_split__partial_group():
    msa = MSA([FASTA("seq1", None, "ACGCA"), FASTA("seq2", None, "GAGTG")])
    expected = {
        "1": MSA([FASTA("seq1", None, "AC"),
                  FASTA("seq2", None, "GT")]),
        "2": MSA([FASTA("seq1", None, "CA"),
                  FASTA("seq2", None, "AG")]),
        "3": MSA([FASTA("seq1", None, "G"),
                  FASTA("seq2", None, "G")])
    }
    assert_equal(msa.split("123"), expected)

Example #27

0

Show file

def test_msa_split__three_groups():
    msa = MSA([FASTA("seq1", None, "ACGCAT"), FASTA("seq2", None, "GAGTGA")])
    expected = {
        "1": MSA([FASTA("seq1", None, "AC"),
                  FASTA("seq2", None, "GT")]),
        "2": MSA([FASTA("seq1", None, "CA"),
                  FASTA("seq2", None, "AG")]),
        "3": MSA([FASTA("seq1", None, "GT"),
                  FASTA("seq2", None, "GA")]),
    }
    assert msa.split("123") == expected

Example #28

0

Show file

File: msa_test.py Project: jelber2/paleomix

def test_msa_split__empty_group():
    msa = MSA([FASTA("seq1", None, "AC"), FASTA("seq2", None, "GA")])
    expected = {
        "1": MSA([FASTA("seq1", None, "A"),
                  FASTA("seq2", None, "G")]),
        "2": MSA([FASTA("seq1", None, "C"),
                  FASTA("seq2", None, "A")]),
        "3": MSA([FASTA("seq1", None, ""),
                  FASTA("seq2", None, "")])
    }
    assert_equal(msa.split("123"), expected)

Example #29

0

Show file

def _collect_fasta_contigs(filename, cache={}):
    if filename in cache:
        return cache[filename]

    if not os.path.exists(filename + ".fai"):
        log = logging.getLogger(__name__)
        log.info("Indexing %r; this may take a while", filename)

    cache[filename] = contigs = FASTA.index_and_collect_contigs(filename)
    return contigs

Example #30

0

Show file

File: synthesize_reads.py Project: jelber2/paleomix

    def __init__(self, options, filename):
        genome = list(FASTA.from_file(filename))
        assert len(genome) == 1, len(genome)

        self._genome = genome[0].sequence.upper()
        self._sequence = None
        self._positions = None
        self._annotations = None

        self._mutate(options)

Example #31

0

Show file

File: makefile.py Project: muslih14/paleomix

def _collect_fasta_contigs(filename, cache={}):
    if filename in cache:
        return cache[filename]

    if not os.path.exists(filename + ".fai"):
        print_info("      - Index does not exist for %r; this may "
                   "take a while ..." % (filename,))

    cache[filename] = contigs = dict(FASTA.index_and_collect_contigs(filename))
    return contigs

Example #32

0

Show file

File: makefile.py Project: jelber2/paleomix

def _collect_fasta_contigs(filename, cache={}):
    if filename in cache:
        return cache[filename]

    if not os.path.exists(filename + ".fai"):
        print_info("      - Index does not exist for %r; this may "
                   "take a while ..." % (filename,))

    cache[filename] = contigs = dict(FASTA.index_and_collect_contigs(filename))
    return contigs

Example #33

0

Show file

File: synthesize_reads.py Project: muslih14/paleomix

    def __init__(self, options, filename):
        genome = list(FASTA.from_file(filename))
        assert len(genome) == 1, len(genome)

        self._genome = genome[0].sequence.upper()
        self._sequence = None
        self._positions = None
        self._annotations = None

        self._mutate(options)

Example #34

0

Show file

File: makefile.py Project: MikkelSchubert/paleomix

def _validate_prefixes(makefiles):
    """Validates prefixes and regions-of-interest, including an implementation
    of the checks included in GATK, which require that the FASTA for the human
    genome is ordered 1 .. 23. This is required since GATK will not run with
    human genomes in a different order.
    """
    already_validated = {}
    print_info("  - Validating prefixes ...")
    for makefile in makefiles:
        uses_gatk = makefile["Options"]["Features"]["RealignedBAM"]
        for prefix in makefile["Prefixes"].itervalues():
            path = prefix["Path"]
            if path in already_validated:
                prefix["IndexFormat"] = already_validated[path]["IndexFormat"]
                continue

            # Must be set to a valid value, even if FASTA file does not exist
            prefix["IndexFormat"] = ".bai"

            if not os.path.exists(path):
                print_warn("    - Reference FASTA file does not exist:\n"
                           "      %r" % (path,))
                continue
            elif not os.path.exists(path + ".fai"):
                print_info("    - Index does not exist for %r; this may "
                           "take a while ..." % (path,))

            try:
                contigs = FASTA.index_and_collect_contigs(path)
            except FASTAError, error:
                raise MakefileError("Error indexing FASTA:\n %s" % (error,))

            # Implementation of GATK checks for the human genome
            _do_validate_hg_prefix(makefile, prefix, contigs, fatal=uses_gatk)

            contigs = dict(contigs)
            regions_of_interest = prefix.get("RegionsOfInterest", {})
            for (name, fpath) in regions_of_interest.iteritems():
                try:
                    # read_bed_file returns iterator
                    for _ in bedtools.read_bed_file(fpath, contigs=contigs):
                        pass
                except (bedtools.BEDError, IOError), error:
                    raise MakefileError("Error reading regions-of-"
                                        "interest %r for prefix %r:\n%s"
                                        % (name, prefix["Name"], error))

            if max(contigs.itervalues()) > _BAM_MAX_SEQUENCE_LENGTH:
                print_warn("    - FASTA file %r contains sequences longer "
                           "than %i! CSI index files will be used instead "
                           "of BAI index files."
                           % (path, _BAM_MAX_SEQUENCE_LENGTH))
                prefix["IndexFormat"] = ".csi"

            already_validated[path] = prefix

Example #35

0

Show file

File: makefile.py Project: jelber2/paleomix

def _validate_prefixes(makefiles):
    """Validates prefixes and regions-of-interest, including an implementation
    of the checks included in GATK, which require that the FASTA for the human
    genome is ordered 1 .. 23. This is required since GATK will not run with
    human genomes in a different order.
    """
    already_validated = {}
    print_info("  - Validating prefixes ...")
    for makefile in makefiles:
        uses_gatk = makefile["Options"]["Features"]["RealignedBAM"]
        for prefix in makefile["Prefixes"].itervalues():
            path = prefix["Path"]
            if path in already_validated:
                prefix["IndexFormat"] = already_validated[path]["IndexFormat"]
                continue

            # Must be set to a valid value, even if FASTA file does not exist
            prefix["IndexFormat"] = ".bai"

            if not os.path.exists(path):
                print_warn("    - Reference FASTA file does not exist:\n"
                           "      %r" % (path, ))
                continue
            elif not os.path.exists(path + ".fai"):
                print_info("    - Index does not exist for %r; this may "
                           "take a while ..." % (path, ))

            try:
                contigs = FASTA.index_and_collect_contigs(path)
            except FASTAError, error:
                raise MakefileError("Error indexing FASTA:\n %s" % (error, ))

            # Implementation of GATK checks for the human genome
            _do_validate_hg_prefix(makefile, prefix, contigs, fatal=uses_gatk)

            contigs = dict(contigs)
            regions_of_interest = prefix.get("RegionsOfInterest", {})
            for (name, fpath) in regions_of_interest.iteritems():
                try:
                    # read_bed_file returns iterator
                    for _ in bedtools.read_bed_file(fpath, contigs=contigs):
                        pass
                except (bedtools.BEDError, IOError), error:
                    raise MakefileError("Error reading regions-of-"
                                        "interest %r for prefix %r:\n%s" %
                                        (name, prefix["Name"], error))

            if max(contigs.itervalues()) > _BAM_MAX_SEQUENCE_LENGTH:
                print_warn("    - FASTA file %r contains sequences longer "
                           "than %i! CSI index files will be used instead "
                           "of BAI index files." %
                           (path, _BAM_MAX_SEQUENCE_LENGTH))
                prefix["IndexFormat"] = ".csi"

            already_validated[path] = prefix

Example #36

0

Show file

File: database.py Project: MikkelSchubert/paleomix

    def _read_mitochondria(self, tar_handle, filename):
        try:
            tar_handle.getmember(filename)
        except KeyError:
            # Missing MT file is allowed
            return None

        handle = tar_handle.extractfile(filename)

        results = {}
        for record in FASTA.from_lines(handle):
            record = FASTA(name=record.name,
                           meta=record.meta,
                           sequence=record.sequence.upper())

            unexpected = set(record.sequence) - set("ACGTN-")
            if unexpected:
                unexpected = ", ".join(map(repr, sorted(unexpected)))
                raise ZonkeyDBError("Unexpected nucleotide in %s; only A, C, "
                                    "G, T, N, and - are allowed, not %s"
                                    % (unexpected, filename))
            elif record.name in results:
                raise ZonkeyDBError("Duplicate sequence name in %s: %r"
                                    % (filename, record.name))

            results[record.name] = record

        lengths = frozenset(len(record.sequence)
                            for record in results.itervalues())

        if not lengths:
            raise ZonkeyDBError("No mitochondrial sequences found in %r"
                                % (filename,))
        elif len(lengths) > 2:
            lengths = tuple(sorted(lengths))
            lengths_s = "%s, and %s" % (", ".join(map(str, lengths[:-1])),
                                        lengths[-1])

            raise ZonkeyDBError("At most two different sequence lengths "
                                "expected for mitochondrial sequences, but "
                                "found %i different lengths in %r: %s"
                                % (len(lengths), filename, lengths_s))
        elif len(lengths) != 1:
            # Unpadded sequences are allowed
            delta_len = max(lengths) - min(lengths)
            mito_padding = self.settings["MitoPadding"]

            if (delta_len != mito_padding):
                raise ZonkeyDBError("Length difference between mitochondrial "
                                    "sequences in %r does not match the "
                                    "padding; expected a difference of %i bp, "
                                    "but found a %i bp difference."
                                    % (filename, mito_padding, delta_len))

        return results

Example #37

0

Show file

File: paml.py Project: muslih14/paleomix

    def _setup(self, _config, temp):
        self._update_ctl_file(source      = self._control_file,
                              destination = os.path.join(temp, "template.ctl"))

        os.symlink(os.path.abspath(self._trees_file), os.path.join(temp, "template.trees"))
        with open(os.path.join(temp, "template.seqs"), "w") as handle:
            for record in FASTA.from_file(self._sequence_file):
                if record.name not in self._exclude_groups:
                    name = record.name
                    sequence = record.sequence.upper()
                    handle.write("%s\n" % (FASTA(name, None, sequence),))

Example #38

0

Show file

File: fasta_test.py Project: muslih14/paleomix

def test_fasta__from_lines__empty_record_name_only__first():
    list(FASTA.from_lines([">fasta1\n", ">fasta2\n", "AGTC\n"]))

Example #39

0

Show file

File: fasta_test.py Project: muslih14/paleomix

def test_fasta__from_lines_single_record():
    lines = [">single\n", "TGTTCTCCACCGTGCACAAC\n", "CCTTCATCCA\n"]
    expected = [FASTA("single", None, "TGTTCTCCACCGTGCACAACCCTTCATCCA")]
    assert_list_equal(FASTA.from_lines(lines), expected)

Example #40

0

Show file

File: fasta_test.py Project: muslih14/paleomix

def test_fasta__from_lines__no_records():
    assert_list_equal(FASTA.from_lines([]), [])

Example #41

0

Show file

File: fasta_test.py Project: muslih14/paleomix

def test_fasta__from_file__compressed_bz2():
    expected = [FASTA("This_is_BZ_FASTA!", None, "CGTNA"),
                FASTA("This_is_ALSO_BZ_FASTA!", None, "ACGTN")]
    results = list(FASTA.from_file(test_file("fasta_file.fasta.bz2")))
    assert_equal(results, expected)

Example #42

0

Show file

File: fasta_test.py Project: muslih14/paleomix

def test_fasta__from_lines__empty_record_name_only__nothing_else():
    list(FASTA.from_lines([">fasta1\n"]))

Example #43

0

Show file

File: fasta_test.py Project: muslih14/paleomix

def test_fasta__from_lines__empty_name__with_others():
    lines = [">\n", "ACGT\n", ">Foo\n", "ACGGTA\n"]
    list(FASTA.from_lines(lines))

Example #44

0

Show file

File: fasta_test.py Project: muslih14/paleomix

def test_fasta__from_lines__empty_name__alone():
    lines = [">\n", "ACGT\n"]
    list(FASTA.from_lines(lines))

Example #45

0

Show file

File: fasta_test.py Project: muslih14/paleomix

def test_fasta__from_lines__missing_name__alone():
    lines = ["ACGT\n"]
    list(FASTA.from_lines(lines))

Example #46

0

Show file

File: fasta_test.py Project: muslih14/paleomix

def test_fasta__from_lines__empty_record_last():
    lines = [">fasta1\n", "ACGT\n", ">fasta2\n"]
    list(FASTA.from_lines(lines))

Example #47

0

Show file

File: fasta_test.py Project: muslih14/paleomix

def test_fasta__from_lines__empty_record__middle():
    lines = [">fasta0\n", "ACGT\n", ">fasta1\n", ">fasta2\n", "AGTC\n"]
    list(FASTA.from_lines(lines))