def test_writeFASTA(self): read_fasta_data = bioio.readFASTA(['sample1.fasta'])['sample1'] read_fasta_data_seqids = bioio.splitFASTA(read_fasta_data)['output_seq_ids'] read_fasta_data_seqs = bioio.splitFASTA(read_fasta_data)['output_seqs'] bioio.writeFASTA('rewritten_sample1.fasta',read_fasta_data_seqids,read_fasta_data_seqs) self.assertEqual(bioio.readFASTA(['rewritten_sample1.fasta'])['rewritten_sample1'],read_fasta_data) os.remove('rewritten_sample1.fasta')
def test_writeFASTA(self): read_fasta_data = bioio.readFASTA(['sample1.fasta'])['sample1'] read_fasta_data_seqids = bioio.splitFASTA( read_fasta_data)['output_seq_ids'] read_fasta_data_seqs = bioio.splitFASTA(read_fasta_data)['output_seqs'] bioio.writeFASTA('rewritten_sample1.fasta', read_fasta_data_seqids, read_fasta_data_seqs) self.assertEqual( bioio.readFASTA(['rewritten_sample1.fasta'])['rewritten_sample1'], read_fasta_data) os.remove('rewritten_sample1.fasta')
# -*- coding: utf-8 -*- """ @author: thatbudakguy """ import sys import os sys.path.append(os.path.join(sys.path[0],'lib')) sys.path.append(os.path.join(sys.path[0],'test')) import bioio import biomath # strip file extensions and read file read_fasta = bioio.readFASTA([sys.argv[1]]) input_fasta_name = sys.argv[1][:-6] input_fasta_data = read_fasta[input_fasta_name] input_fasta_seq_ids = bioio.splitFASTA(input_fasta_data)['output_seq_ids'] input_fasta_seqs = bioio.splitFASTA(input_fasta_data)['output_seqs'] # reformat as list of lists output_csv_data = [] for i in range(len(input_fasta_seq_ids)): data = [] data.append(input_fasta_seq_ids[i]) data.append(input_fasta_seqs[i]) output_csv_data.append(data) # write the resulting csv file output_csv_name = input_fasta_name + ".csv" bioio.writeCSV(output_csv_name,output_csv_data)
@author: thatbudakguy """ import sys import os sys.path.append(os.path.join(sys.path[0],'lib')) sys.path.append(os.path.join(sys.path[0],'test')) import bioio import biomath # strip file extension and read file read_csv = bioio.readCSV([sys.argv[-1]]) read_fasta = bioio.readFASTA([sys.argv[-2]]) input_fasta_name = sys.argv[-2][:-6] input_fasta_data = read_fasta[input_fasta_name] input_fasta_splitdata = bioio.splitFASTA(input_fasta_data) input_fasta_seq_ids = input_fasta_splitdata['output_seq_ids'] input_fasta_seqs = input_fasta_splitdata['output_seqs'] input_csv_name = sys.argv[-1][:-4] input_csv_data = read_csv[input_csv_name] # find longest sequences and get the name list of seq ids name_list = biomath.removeDuplicateSequences(input_csv_data) # check name list against the database output_fasta_data = biomath.reduceNames(name_list,input_fasta_seq_ids,input_fasta_seqs) output_seq_ids = output_fasta_data['output_seq_ids'] output_seqs = output_fasta_data['output_seqs'] # write results to a file output_fasta_name = input_csv_name + "_homologs.fasta"
""" @author: thatbudakguy """ import sys import os sys.path.append(os.path.join(sys.path[0],'lib')) sys.path.append(os.path.join(sys.path[0],'test')) import bioio import biomath # strip file extensions,'>' and read files read_fasta = bioio.readFASTA([sys.argv[-2]]) input_fasta_name = sys.argv[-2][:-6] input_fasta_data = read_fasta[input_fasta_name] input_fasta_splitdata = bioio.splitFASTA(input_fasta_data) input_fasta_seq_ids = input_fasta_splitdata['output_seq_ids'] input_fasta_seqs = input_fasta_splitdata['output_seqs'] read_txt = bioio.readTXT([sys.argv[-1]]) input_txt_name = sys.argv[-1][:-4] input_txt_data = read_txt[input_txt_name] # find seqs from database using seqids list output_fasta_data = biomath.reduceNames(input_txt_data,input_fasta_seq_ids,input_fasta_seqs) output_seq_ids = output_fasta_data['output_seq_ids'] output_seqs = output_fasta_data['output_seqs'] # write the seqs to the file output_fasta_name = input_fasta_name+"_concatenated.fasta" bioio.writeFASTA(output_fasta_name,output_seq_ids,output_seqs)
# -*- coding: utf-8 -*- """ @author: thatbudakguy """ import sys import os sys.path.append(os.path.join(sys.path[0], 'lib')) sys.path.append(os.path.join(sys.path[0], 'test')) import bioio import biomath # strip file extensions and read file read_fasta = bioio.readFASTA([sys.argv[1]]) input_fasta_name = sys.argv[1][:-6] input_fasta_data = read_fasta[input_fasta_name] input_fasta_seq_ids = bioio.splitFASTA(input_fasta_data)['output_seq_ids'] input_fasta_seqs = bioio.splitFASTA(input_fasta_data)['output_seqs'] # add venom codes output_fasta_seq_ids = bioio.addVenomCodes(input_fasta_seq_ids, input_fasta_name) # write file output_fasta_name = input_fasta_name + "_vCodes.fasta" bioio.writeFASTA(output_fasta_name, output_fasta_seq_ids, input_fasta_seqs)
""" import sys import os sys.path.append(os.path.join(sys.path[0], 'lib')) sys.path.append(os.path.join(sys.path[0], 'test')) import bioio import biomath # strip file extensions and read files input_1 = sys.argv[-2] input_2 = sys.argv[-1] read_fasta_homologs = bioio.readFASTA([input_2]) input_homologs_fasta_name = input_2[:-6] input_homologs_fasta_data = read_fasta_homologs[input_homologs_fasta_name] input_homologs_fasta_seq_ids = bioio.splitFASTA( input_homologs_fasta_data)['output_seq_ids'] read_fasta_proteomes = bioio.readFASTA([input_1]) input_proteomes_fasta_name = input_1[:-6] input_proteomes_fasta_data = read_fasta_proteomes[input_proteomes_fasta_name] input_proteomes_fasta_seq_ids = bioio.splitFASTA( input_proteomes_fasta_data)['output_seq_ids'] # strip venom codes from proteomes input_proteomes_fasta_seq_ids = bioio.trimVenomCodes( input_proteomes_fasta_seq_ids) print input_homologs_fasta_seq_ids # compare input files to find missing and matching lines output_seq_ids_match = biomath.findMatchingSeqs(input_homologs_fasta_seq_ids, input_proteomes_fasta_seq_ids) output_seq_ids_miss = biomath.findMissingSeqs(input_homologs_fasta_seq_ids,
""" import sys import os sys.path.append(os.path.join(sys.path[0],'lib')) sys.path.append(os.path.join(sys.path[0],'test')) import bioio import biomath # strip file extensions and read files input_1 = sys.argv[-2] input_2 = sys.argv[-1] read_fasta_homologs = bioio.readFASTA([input_2]) input_homologs_fasta_name = input_2[:-6] input_homologs_fasta_data = read_fasta_homologs[input_homologs_fasta_name] input_homologs_fasta_seq_ids = bioio.splitFASTA(input_homologs_fasta_data)['output_seq_ids'] read_fasta_proteomes = bioio.readFASTA([input_1]) input_proteomes_fasta_name = input_1[:-6] input_proteomes_fasta_data = read_fasta_proteomes[input_proteomes_fasta_name] input_proteomes_fasta_seq_ids = bioio.splitFASTA(input_proteomes_fasta_data)['output_seq_ids'] # strip venom codes from proteomes input_proteomes_fasta_seq_ids = bioio.trimVenomCodes(input_proteomes_fasta_seq_ids) print input_homologs_fasta_seq_ids # compare input files to find missing and matching lines output_seq_ids_match = biomath.findMatchingSeqs(input_homologs_fasta_seq_ids,input_proteomes_fasta_seq_ids) output_seq_ids_miss = biomath.findMissingSeqs(input_homologs_fasta_seq_ids,input_proteomes_fasta_seq_ids) # define names of the resulting files output_txt_name_match = input_homologs_fasta_name+"_matching.txt"
def test_splitFASTA(self): example_input_fasta = bioio.readFASTA(['sample1.fasta']) example_input_fasta = bioio.splitFASTA(example_input_fasta['sample1']) self.assertDictEqual(example_input_fasta, expected_split_data)
def test_splitFASTA(self): example_input_fasta = bioio.readFASTA(['sample1.fasta']) example_input_fasta = bioio.splitFASTA(example_input_fasta['sample1']) self.assertDictEqual(example_input_fasta,expected_split_data)