def test_readFASTA(self): self.assertDictEqual(bioio.readFASTA(example_fasta_files), expected_fasta_input) with self.assertRaises(SystemExit): bioio.readFASTA(['sample1.fasta', 'path/sample2.fasta']) with self.assertRaises(SystemExit): bioio.readFASTA(['sample1.fasta', 'sample2.txt'])
def test_writeFASTA(self): read_fasta_data = bioio.readFASTA(['sample1.fasta'])['sample1'] read_fasta_data_seqids = bioio.splitFASTA(read_fasta_data)['output_seq_ids'] read_fasta_data_seqs = bioio.splitFASTA(read_fasta_data)['output_seqs'] bioio.writeFASTA('rewritten_sample1.fasta',read_fasta_data_seqids,read_fasta_data_seqs) self.assertEqual(bioio.readFASTA(['rewritten_sample1.fasta'])['rewritten_sample1'],read_fasta_data) os.remove('rewritten_sample1.fasta')
def test_writeFASTA(self): read_fasta_data = bioio.readFASTA(['sample1.fasta'])['sample1'] read_fasta_data_seqids = bioio.splitFASTA( read_fasta_data)['output_seq_ids'] read_fasta_data_seqs = bioio.splitFASTA(read_fasta_data)['output_seqs'] bioio.writeFASTA('rewritten_sample1.fasta', read_fasta_data_seqids, read_fasta_data_seqs) self.assertEqual( bioio.readFASTA(['rewritten_sample1.fasta'])['rewritten_sample1'], read_fasta_data) os.remove('rewritten_sample1.fasta')
import sys import os sys.path.append(os.path.join(sys.path[0],'lib')) sys.path.append(os.path.join(sys.path[0],'test')) import bioio import biomath read_fasta = bioio.readFASTA([sys.argv[-1]]) input_fasta_name = sys.argv[-1][:-6] input_fasta_data = read_fasta[input_fasta_name] prefixList = ["ARC", "DRY", "LRE", "PER", "LRU", "PHO", "SIC", "LSP", "PLE", "SCY", "LAZ"] for prefix in prefixList: bioio.splitOnIDPrefix(input_fasta_data, prefix)
# -*- coding: utf-8 -*- """ @author: thatbudakguy """ import sys import os sys.path.append(os.path.join(sys.path[0],'lib')) sys.path.append(os.path.join(sys.path[0],'test')) import bioio import biomath # strip file extensions,'>' and read files read_fasta = bioio.readFASTA([sys.argv[-2]]) input_fasta_name = sys.argv[-2][:-6] input_fasta_data = read_fasta[input_fasta_name] input_fasta_splitdata = bioio.splitFASTA(input_fasta_data) input_fasta_seq_ids = input_fasta_splitdata['output_seq_ids'] input_fasta_seqs = input_fasta_splitdata['output_seqs'] read_txt = bioio.readTXT([sys.argv[-1]]) input_txt_name = sys.argv[-1][:-4] input_txt_data = read_txt[input_txt_name] # find seqs from database using seqids list output_fasta_data = biomath.reduceNames(input_txt_data,input_fasta_seq_ids,input_fasta_seqs) output_seq_ids = output_fasta_data['output_seq_ids'] output_seqs = output_fasta_data['output_seqs'] # write the seqs to the file output_fasta_name = input_fasta_name+"_concatenated.fasta" bioio.writeFASTA(output_fasta_name,output_seq_ids,output_seqs)
# -*- coding: utf-8 -*- """ @author: thatbudakguy """ import sys import os sys.path.append(os.path.join(sys.path[0],'lib')) sys.path.append(os.path.join(sys.path[0],'test')) import bioio import biomath # strip file extensions and read files read_fasta = bioio.readFASTA(sys.argv[1]) input_fasta_name = sys.argv[1][:-6] input_fasta_data = read_fasta[input_fasta_name] input_fasta_splitdata = bioio.splitFASTA(input_fasta_data) input_fasta_seq_ids = input_fasta_splitdata['output_seq_ids'] input_fasta_seqs = input_fasta_splitdata['output_seqs'] read_txt = bioio.readTXT(sys.argv[2:]) output_combined_data = [] # reformat and combine seqid lists for filename,data in read_txt.iteritems(): # split on greaterthans output_data = bioio.splitLinearSeqids(data) # add venom codes based on filename output_data = bioio.addVenomCodes(output_data,filename)
# -*- coding: utf-8 -*- """ @author: thatbudakguy """ import sys import os sys.path.append(os.path.join(sys.path[0], 'lib')) sys.path.append(os.path.join(sys.path[0], 'test')) import bioio import biomath # strip file extensions and read files input_1 = sys.argv[-2] input_2 = sys.argv[-1] read_fasta_homologs = bioio.readFASTA([input_2]) input_homologs_fasta_name = input_2[:-6] input_homologs_fasta_data = read_fasta_homologs[input_homologs_fasta_name] input_homologs_fasta_seq_ids = bioio.splitFASTA( input_homologs_fasta_data)['output_seq_ids'] read_fasta_proteomes = bioio.readFASTA([input_1]) input_proteomes_fasta_name = input_1[:-6] input_proteomes_fasta_data = read_fasta_proteomes[input_proteomes_fasta_name] input_proteomes_fasta_seq_ids = bioio.splitFASTA( input_proteomes_fasta_data)['output_seq_ids'] # strip venom codes from proteomes input_proteomes_fasta_seq_ids = bioio.trimVenomCodes( input_proteomes_fasta_seq_ids) print input_homologs_fasta_seq_ids
# -*- coding: utf-8 -*- """ @author: thatbudakguy """ import sys import os sys.path.append(os.path.join(sys.path[0],'lib')) sys.path.append(os.path.join(sys.path[0],'test')) import bioio import biomath # strip file extensions and read files input_1 = sys.argv[-2] input_2 = sys.argv[-1] read_fasta_homologs = bioio.readFASTA([input_2]) input_homologs_fasta_name = input_2[:-6] input_homologs_fasta_data = read_fasta_homologs[input_homologs_fasta_name] input_homologs_fasta_seq_ids = bioio.splitFASTA(input_homologs_fasta_data)['output_seq_ids'] read_fasta_proteomes = bioio.readFASTA([input_1]) input_proteomes_fasta_name = input_1[:-6] input_proteomes_fasta_data = read_fasta_proteomes[input_proteomes_fasta_name] input_proteomes_fasta_seq_ids = bioio.splitFASTA(input_proteomes_fasta_data)['output_seq_ids'] # strip venom codes from proteomes input_proteomes_fasta_seq_ids = bioio.trimVenomCodes(input_proteomes_fasta_seq_ids) print input_homologs_fasta_seq_ids # compare input files to find missing and matching lines output_seq_ids_match = biomath.findMatchingSeqs(input_homologs_fasta_seq_ids,input_proteomes_fasta_seq_ids) output_seq_ids_miss = biomath.findMissingSeqs(input_homologs_fasta_seq_ids,input_proteomes_fasta_seq_ids)
def test_splitFASTA(self): example_input_fasta = bioio.readFASTA(['sample1.fasta']) example_input_fasta = bioio.splitFASTA(example_input_fasta['sample1']) self.assertDictEqual(example_input_fasta, expected_split_data)
def test_splitFASTA(self): example_input_fasta = bioio.readFASTA(['sample1.fasta']) example_input_fasta = bioio.splitFASTA(example_input_fasta['sample1']) self.assertDictEqual(example_input_fasta,expected_split_data)
def test_readFASTA(self): self.assertDictEqual(bioio.readFASTA(example_fasta_files),expected_fasta_input) with self.assertRaises(SystemExit): bioio.readFASTA(['sample1.fasta','path/sample2.fasta']) with self.assertRaises(SystemExit): bioio.readFASTA(['sample1.fasta','sample2.txt'])