def test_rna_conversion(): sequence = seq.NucleotideSequence("ACGT") fasta_file = fasta.FastaFile() fasta.set_sequence(fasta_file, sequence, "seq1", as_rna=False) fasta.set_sequence(fasta_file, sequence, "seq2", as_rna=True) assert fasta_file["seq1"] == "ACGT" assert fasta_file["seq2"] == "ACGU"
def test_sequence_conversion(): path = os.path.join(data_dir("sequence"), "nuc.fasta") file = fasta.FastaFile.read(path) assert seq.NucleotideSequence("ACGCTACGT") == fasta.get_sequence(file) seq_dict = fasta.get_sequences(file) file2 = fasta.FastaFile() fasta.set_sequences(file2, seq_dict) seq_dict2 = fasta.get_sequences(file2) # Cannot compare dicts directly, since the original RNA sequence is # now guessed as protein sequence for seq1, seq2 in zip(seq_dict.values(), seq_dict2.values()): assert str(seq1) == str(seq2) file3 = fasta.FastaFile() fasta.set_sequence(file3, seq.NucleotideSequence("AACCTTGG")) assert file3["sequence"] == "AACCTTGG" path = os.path.join(data_dir("sequence"), "prot.fasta") file4 = fasta.FastaFile.read(path) # Expect a warning for selenocysteine conversion with pytest.warns(UserWarning): assert seq.ProteinSequence("YAHCGFRTGS") == fasta.get_sequence(file4) path = os.path.join(data_dir("sequence"), "invalid.fasta") file5 = fasta.FastaFile.read(path) with pytest.raises(ValueError): seq.NucleotideSequence(fasta.get_sequence(file5))
def test_sequence_conversion(): path = os.path.join(data_dir, "nuc.fasta") file = fasta.FastaFile() file.read(path) assert seq.NucleotideSequence("ACGCTACGT") == fasta.get_sequence(file) seq_dict = fasta.get_sequences(file) file2 = fasta.FastaFile() fasta.set_sequences(file2, seq_dict) seq_dict2 = fasta.get_sequences(file2) assert seq_dict == seq_dict2 file3 = fasta.FastaFile() fasta.set_sequence(file3, seq.NucleotideSequence("AACCTTGG")) assert file3["sequence"] == "AACCTTGG" path = os.path.join(data_dir, "prot.fasta") file4 = fasta.FastaFile() file4.read(path) assert seq.ProteinSequence("YAHGFRTGS") == fasta.get_sequence(file4) path = os.path.join(data_dir, "invalid.fasta") file5 = fasta.FastaFile() file5.read(path) with pytest.raises(ValueError): seq.NucleotideSequence(fasta.get_sequence(file5))
# :class:`ProteinSequence`, instance respectively. # Actually, it just tries to create a :class:`NucleotideSequence`, # and if this fails, a :class:`ProteinSequence` is created instead. # # Sequences can be written into FASTA files in a similar way: either via # dictionary-like access or using the :func:`set_sequence()` # convenience function. # Create new empty FASTA file file = fasta.FastaFile() # PROTIP: Let your cat walk over the keyboard dna_seq1 = seq.NucleotideSequence("ATCGGATCTATCGATGCTAGCTACAGCTAT") dna_seq2 = seq.NucleotideSequence("ACGATCTACTAGCTGATGTCGTGCATGTACG") # Append entries to file... # ... via set_sequence() fasta.set_sequence(file, dna_seq1, header="gibberish") # .. or dictionary style file["more gibberish"] = str(dna_seq2) print(file) file.write(biotite.temp_file("fa")) ######################################################################## # As you see, our file contains our new ``'gibberish'`` and # ``'more gibberish'`` sequences now. # # In a similar manner sequences and sequence quality scores can be read # from FASTQ files. For further reference, have a look at the # :mod:`biotite.sequence.io.fastq` subpackage. # # Alternatively, a sequence can also be loaded from GenBank or GenPept # files, using the :class:`GenBankFile` class (more on this later).
# At least 60 % of all reads covering a certain location must call a # deletion for this location, otherwise the deletion is rejected DELETION_THRESHOLD = 0.6 var_genome = seq.NucleotideSequence() var_genome.code = most_probable_symbol_codes # A deletion is called, if either enough reads include this deletion # or the sequence position is not covered by any read at all deletion_mask = (deletion_number > sequencing_depth * DELETION_THRESHOLD) \ | (sequencing_depth == 0) var_genome = var_genome[~deletion_mask] # Write the assembled genome into a FASTA file out_file = fasta.FastaFile() fasta.set_sequence( out_file, var_genome, header="SARS-CoV-2 B.1.1.7", as_rna=True ) out_file.write(tempfile.NamedTemporaryFile("w")) ######################################################################## # We have done it, the genome of the B.1.1.7 variant is assembled! # Now we would like to have a closer look on the difference between the # original and the B.1.1.7 genome. # # Mutations in the B.1.1.7 variant # -------------------------------- # # To get an rough overview about the overall sequence identity between # the genomes and the locations of mutations in the B.1.1.7 variant, # we need to align the original genome to our assembled one. # As both genomes are expected to be highly similar, we can use a banded