def test_load_unaligned_seqs_from_json(self): """test loading an unaligned object from json file""" with TemporaryDirectory(dir=".") as dirname: json_path = os.path.join(dirname, "unaligned.json") path = os.path.join(data_path, "brca1_5.paml") unaligned = load_unaligned_seqs(path) unaligned.write(json_path) got = load_unaligned_seqs(json_path) self.assertIsInstance(got, SequenceCollection) self.assertEqual(got.to_dict(), unaligned.to_dict()) self.assertEqual(got.info["source"], path) # tests json generated by make_record_for_json uncompleted_record = make_record_for_json("delme", got, False) uncompleted_record_path = os.path.join(dirname, "uncompleted_record.json") completed_record = make_record_for_json("delme", got, True) completed_record_path = os.path.join(dirname, "completed_record.json") with open(uncompleted_record_path, "w") as out: out.write(json.dumps(uncompleted_record)) with open(completed_record_path, "w") as out: out.write(json.dumps(completed_record)) # tests when provided record json file is uncompleted with self.assertRaises(TypeError): load_unaligned_seqs(uncompleted_record_path) # tests when provided record json is completed got = load_unaligned_seqs(completed_record_path) self.assertIsInstance(got, SequenceCollection) self.assertEqual(got.to_dict(), unaligned.to_dict()) self.assertEqual(got.info["source"], path)
def test_load_aligned_seqs_from_json(self): """tests loading an aligned object from json file""" with TemporaryDirectory(dir=".") as dirname: path = os.path.join(data_path, "brca1_5.paml") alignment = load_aligned_seqs(path, array_align=False, moltype="dna") alignment_json_path = os.path.join(dirname, "alignment.json") alignment.write(alignment_json_path) array_alignment = load_aligned_seqs(path, moltype="dna") array_alignment_json_path = os.path.join(dirname, "array_alignment.json") array_alignment.write(array_alignment_json_path) # tests case Alignment got = load_aligned_seqs(alignment_json_path) self.assertIsInstance(got, Alignment) self.assertEqual(got.moltype.label, "dna") self.assertEqual(got.to_dict(), alignment.to_dict()) self.assertEqual(got.info["source"], path) # tests case ArrayAlignment got = load_aligned_seqs(array_alignment_json_path) self.assertIsInstance(got, ArrayAlignment) self.assertEqual(got.moltype.label, "dna") self.assertEqual(got.to_dict(), array_alignment.to_dict()) self.assertEqual(got.info["source"], path) # tests json generated by make_record_for_json uncompleted_record = make_record_for_json("delme", got, False) completed_record = make_record_for_json("delme", got, True) uncompleted_record_path = os.path.join(dirname, "uncompleted_record.json") completed_record_path = os.path.join(dirname, "completed_record.json") with open(uncompleted_record_path, "w") as out: out.write(json.dumps(uncompleted_record)) with open(completed_record_path, "w") as out: out.write(json.dumps(completed_record)) # tests when provided record json file is uncompleted with self.assertRaises(TypeError): load_unaligned_seqs(uncompleted_record_path) # tests when provided record json is completed got = load_aligned_seqs(completed_record_path) self.assertIsInstance(got, ArrayAlignment) self.assertEqual(got.to_dict(), array_alignment.to_dict()) self.assertEqual(got.info["source"], path) # tests wrong input json file json_path = os.path.join(dirname, "unaligned.json") path = os.path.join(data_path, "brca1_5.paml") unaligned = load_unaligned_seqs(path) unaligned.write(json_path) with self.assertRaises(TypeError): load_aligned_seqs(json_path)
def test_load_unaligned_seqs(self): """test loading unaligned from file""" path = os.path.join(data_path, "brca1_5.paml") got = load_unaligned_seqs(path) self.assertIsInstance(got, SequenceCollection) self.assertTrue("Human" in got.to_dict()) self.assertEqual(got.info["source"], path)
def pep2nuc(id, pep, nuc): pro = load_aligned_seqs(pep, format='fasta', moltype='protein') dna = load_unaligned_seqs(nuc, format='fasta', moltype='dna') try: dna_noStop = dna.trim_stop_codons() print('Trim stop complete: ' + pep) except ValueError as er: print("\nERROR: Couldn't trim stop codons for sample " + id) print("Protein file: " + pep + "\nNucleotide file: " + nuc) print(er) exit() # pass # Replace sequences aln_dna = pro.replace_seqs(dna_noStop, aa_to_codon=True) # Write to file aln_dna.write(id + '_translated.fasta', format='fasta')
""" Dotplot ======= A technique (`Gibbs and McIntyre <https://www.ncbi.nlm.nih.gov/pubmed/5456129>`_) for comparing sequences. All ``cogent3`` sequence collections classes (``SequenceCollection``, ``Alignment`` and ``ArrayAlignment``) have a dotplot method. The method returns a drawable, as demonstrated below between unaligned sequences. """ # %% import os from cogent3 import load_unaligned_seqs seqs = load_unaligned_seqs("../../data/SCA1-cds.fasta", moltype="dna") draw = seqs.dotplot() draw.show() #%% # If sequence names are not provided, two randomly chosen sequences are selected (see below). The plot title reflects the parameter values for defining a match. ``window`` is the size of the sequence segments being compared. ``threshold`` is the number of exact matches within ``window`` required for the two sequence segments to be considered a match. ``gap`` is the size of a gap between adjacent matches before merging. # # Modifying the matching parameters # ################################# # # If we set window and threshold to be equal, this is equivalent to an exact match approach. draw = seqs.dotplot(name1="Human", name2="Mouse", window=8, threshold=8) draw.show() #%% # Displaying dotplot for the reverse complement
def test_load_unaligned_seqs_no_format(self): """test loading unaligned from file""" with self.assertRaises(ValueError): got = load_unaligned_seqs("somepath")