def test_readTXT(self): self.assertDictEqual(bioio.readTXT(example_txt_files), expected_txt_input) with self.assertRaises(SystemExit): bioio.readTXT(['sample1.txt', 'path/sample2.txt']) with self.assertRaises(SystemExit): bioio.readTXT(['sample1.csv', 'sample2.txt'])
def test_writeTXT(self): read_txt_data = bioio.readTXT(['sample1.txt'])['sample1'] bioio.writeTXT('rewritten_sample1.txt', read_txt_data) self.assertEqual( bioio.readTXT(['rewritten_sample1.txt'])['rewritten_sample1'], read_txt_data) os.remove('rewritten_sample1.txt')
# -*- coding: utf-8 -*- """ @author: thatbudakguy """ import sys import os sys.path.append(os.path.join(sys.path[0],'lib')) sys.path.append(os.path.join(sys.path[0],'test')) import bioio # strip file extension, read file, reset '>' read_txt = bioio.readTXT([sys.argv[-1]]) input_txt_name = sys.argv[-1][:-4] input_txt_data = bioio.addGreaterThans(read_txt[input_txt_name]) output_txt_data = [] # break lines on first instance of a dash for line in input_txt_data: first_dash_index = line.find('-') output_txt_data.append(line[:first_dash_index] + '\n' + line[first_dash_index:]) # write the seqs to the file output_txt_name = input_txt_name + "_clean.txt" bioio.writeTXT(output_txt_name, output_txt_data)
# -*- coding: utf-8 -*- """ @author: thatbudakguy """ import sys import os sys.path.append(os.path.join(sys.path[0], 'lib')) sys.path.append(os.path.join(sys.path[0], 'test')) import bioio # strip file extension, read file, reset '>' read_txt = bioio.readTXT([sys.argv[-1]]) input_txt_name = sys.argv[-1][:-4] input_txt_data = bioio.addGreaterThans(read_txt[input_txt_name]) output_txt_data = [] # break lines on first instance of a dash for line in input_txt_data: first_dash_index = line.find('-') output_txt_data.append(line[:first_dash_index] + '\n' + line[first_dash_index:]) # write the seqs to the file output_txt_name = input_txt_name + "_clean.txt" bioio.writeTXT(output_txt_name, output_txt_data)
import sys import os sys.path.append(os.path.join(sys.path[0],'lib')) sys.path.append(os.path.join(sys.path[0],'test')) import bioio import biomath # strip file extensions and read files read_fasta = bioio.readFASTA(sys.argv[1]) input_fasta_name = sys.argv[1][:-6] input_fasta_data = read_fasta[input_fasta_name] input_fasta_splitdata = bioio.splitFASTA(input_fasta_data) input_fasta_seq_ids = input_fasta_splitdata['output_seq_ids'] input_fasta_seqs = input_fasta_splitdata['output_seqs'] read_txt = bioio.readTXT(sys.argv[2:]) output_combined_data = [] # reformat and combine seqid lists for filename,data in read_txt.iteritems(): # split on greaterthans output_data = bioio.splitLinearSeqids(data) # add venom codes based on filename output_data = bioio.addVenomCodes(output_data,filename) # replace s??? codes with sample info code output_data = bioio.replaceSCodes(output_data) # write the 'fixed' version of each file
# -*- coding: utf-8 -*- """ @author: thatbudakguy """ import sys import os sys.path.append(os.path.join(sys.path[0],'lib')) sys.path.append(os.path.join(sys.path[0],'test')) import bioio import biomath # strip file extensions and read files read_txt = bioio.readTXT(sys.argv[-1]) input_txt_name = sys.argv[-1][:-4] input_txt_data = read_txt[input_txt_name] read_fasta = bioio.readFASTA(sys.argv[-2]) input_fasta_name = sys.argv[-2][:-6] input_fasta_data = read_fasta[input_fasta_name] input_fasta_seq_ids = bioio.splitFASTA(input_fasta_data)['output_seq_ids'] # compare input files to find missing lines output_seq_ids = biomath.findMissingSeqs(input_txt_data,input_fasta_seq_ids) # define names of the resulting files output_txt_name = input_txt_name+"_missing.txt" # write the missing lines to a file bioio.writeTXT(output_txt_name,output_seq_ids)
# -*- coding: utf-8 -*- """ @author: thatbudakguy """ import sys import os sys.path.append(os.path.join(sys.path[0], 'lib')) sys.path.append(os.path.join(sys.path[0], 'test')) import bioio import biomath # strip file extensions and read files read_txt = bioio.readTXT(sys.argv[-1]) input_txt_name = sys.argv[-1][:-4] input_txt_data = read_txt[input_txt_name] read_fasta = bioio.readFASTA(sys.argv[-2]) input_fasta_name = sys.argv[-2][:-6] input_fasta_data = read_fasta[input_fasta_name] input_fasta_seq_ids = bioio.splitFASTA(input_fasta_data)['output_seq_ids'] # compare input files to find missing lines output_seq_ids = biomath.findMissingSeqs(input_txt_data, input_fasta_seq_ids) # define names of the resulting files output_txt_name = input_txt_name + "_missing.txt" # write the missing lines to a file bioio.writeTXT(output_txt_name, output_seq_ids)
def test_writeTXT(self): read_txt_data = bioio.readTXT(['sample1.txt'])['sample1'] bioio.writeTXT('rewritten_sample1.txt',read_txt_data) self.assertEqual(bioio.readTXT(['rewritten_sample1.txt'])['rewritten_sample1'],read_txt_data) os.remove('rewritten_sample1.txt')
def test_readTXT(self): self.assertDictEqual(bioio.readTXT(example_txt_files),expected_txt_input) with self.assertRaises(SystemExit): bioio.readTXT(['sample1.txt','path/sample2.txt']) with self.assertRaises(SystemExit): bioio.readTXT(['sample1.csv','sample2.txt'])