def test_writeFASTA(self):
     read_fasta_data = bioio.readFASTA(['sample1.fasta'])['sample1']
     read_fasta_data_seqids = bioio.splitFASTA(read_fasta_data)['output_seq_ids']
     read_fasta_data_seqs = bioio.splitFASTA(read_fasta_data)['output_seqs']
     bioio.writeFASTA('rewritten_sample1.fasta',read_fasta_data_seqids,read_fasta_data_seqs)
     self.assertEqual(bioio.readFASTA(['rewritten_sample1.fasta'])['rewritten_sample1'],read_fasta_data)
     os.remove('rewritten_sample1.fasta')
Ejemplo n.º 2
0
 def test_writeFASTA(self):
     read_fasta_data = bioio.readFASTA(['sample1.fasta'])['sample1']
     read_fasta_data_seqids = bioio.splitFASTA(
         read_fasta_data)['output_seq_ids']
     read_fasta_data_seqs = bioio.splitFASTA(read_fasta_data)['output_seqs']
     bioio.writeFASTA('rewritten_sample1.fasta', read_fasta_data_seqids,
                      read_fasta_data_seqs)
     self.assertEqual(
         bioio.readFASTA(['rewritten_sample1.fasta'])['rewritten_sample1'],
         read_fasta_data)
     os.remove('rewritten_sample1.fasta')
# -*- coding: utf-8 -*-
"""
@author: thatbudakguy
"""

import sys
import os
sys.path.append(os.path.join(sys.path[0],'lib'))
sys.path.append(os.path.join(sys.path[0],'test'))
import bioio
import biomath

# strip file extensions and read file
read_fasta = bioio.readFASTA([sys.argv[1]])
input_fasta_name = sys.argv[1][:-6]
input_fasta_data = read_fasta[input_fasta_name]
input_fasta_seq_ids = bioio.splitFASTA(input_fasta_data)['output_seq_ids']
input_fasta_seqs = bioio.splitFASTA(input_fasta_data)['output_seqs']

# reformat as list of lists
output_csv_data = []
for i in range(len(input_fasta_seq_ids)):
	data = []
	data.append(input_fasta_seq_ids[i])
	data.append(input_fasta_seqs[i])
	output_csv_data.append(data)

# write the resulting csv file
output_csv_name = input_fasta_name + ".csv"
bioio.writeCSV(output_csv_name,output_csv_data)
@author: thatbudakguy
"""

import sys
import os
sys.path.append(os.path.join(sys.path[0],'lib'))
sys.path.append(os.path.join(sys.path[0],'test'))
import bioio
import biomath

# strip file extension and read file
read_csv = bioio.readCSV([sys.argv[-1]])
read_fasta = bioio.readFASTA([sys.argv[-2]])
input_fasta_name = sys.argv[-2][:-6]
input_fasta_data = read_fasta[input_fasta_name]
input_fasta_splitdata = bioio.splitFASTA(input_fasta_data)
input_fasta_seq_ids = input_fasta_splitdata['output_seq_ids']
input_fasta_seqs = input_fasta_splitdata['output_seqs']
input_csv_name = sys.argv[-1][:-4]
input_csv_data = read_csv[input_csv_name]

# find longest sequences and get the name list of seq ids
name_list = biomath.removeDuplicateSequences(input_csv_data)

# check name list against the database
output_fasta_data = biomath.reduceNames(name_list,input_fasta_seq_ids,input_fasta_seqs)
output_seq_ids = output_fasta_data['output_seq_ids']
output_seqs = output_fasta_data['output_seqs']

# write results to a file
output_fasta_name = input_csv_name + "_homologs.fasta"
Ejemplo n.º 5
0
"""
@author: thatbudakguy
"""

import sys
import os
sys.path.append(os.path.join(sys.path[0],'lib'))
sys.path.append(os.path.join(sys.path[0],'test'))
import bioio
import biomath

# strip file extensions,'>' and read files
read_fasta = bioio.readFASTA([sys.argv[-2]])
input_fasta_name = sys.argv[-2][:-6]
input_fasta_data = read_fasta[input_fasta_name]
input_fasta_splitdata = bioio.splitFASTA(input_fasta_data)
input_fasta_seq_ids = input_fasta_splitdata['output_seq_ids']
input_fasta_seqs = input_fasta_splitdata['output_seqs']
read_txt = bioio.readTXT([sys.argv[-1]])
input_txt_name = sys.argv[-1][:-4]
input_txt_data = read_txt[input_txt_name]

# find seqs from database using seqids list
output_fasta_data = biomath.reduceNames(input_txt_data,input_fasta_seq_ids,input_fasta_seqs)
output_seq_ids = output_fasta_data['output_seq_ids']
output_seqs = output_fasta_data['output_seqs']

# write the seqs to the file
output_fasta_name = input_fasta_name+"_concatenated.fasta"
bioio.writeFASTA(output_fasta_name,output_seq_ids,output_seqs)
Ejemplo n.º 6
0
# -*- coding: utf-8 -*-
"""
@author: thatbudakguy
"""

import sys
import os
sys.path.append(os.path.join(sys.path[0], 'lib'))
sys.path.append(os.path.join(sys.path[0], 'test'))
import bioio
import biomath

# strip file extensions and read file
read_fasta = bioio.readFASTA([sys.argv[1]])
input_fasta_name = sys.argv[1][:-6]
input_fasta_data = read_fasta[input_fasta_name]
input_fasta_seq_ids = bioio.splitFASTA(input_fasta_data)['output_seq_ids']
input_fasta_seqs = bioio.splitFASTA(input_fasta_data)['output_seqs']

# add venom codes
output_fasta_seq_ids = bioio.addVenomCodes(input_fasta_seq_ids,
                                           input_fasta_name)

# write file
output_fasta_name = input_fasta_name + "_vCodes.fasta"
bioio.writeFASTA(output_fasta_name, output_fasta_seq_ids, input_fasta_seqs)
"""

import sys
import os
sys.path.append(os.path.join(sys.path[0], 'lib'))
sys.path.append(os.path.join(sys.path[0], 'test'))
import bioio
import biomath

# strip file extensions and read files
input_1 = sys.argv[-2]
input_2 = sys.argv[-1]
read_fasta_homologs = bioio.readFASTA([input_2])
input_homologs_fasta_name = input_2[:-6]
input_homologs_fasta_data = read_fasta_homologs[input_homologs_fasta_name]
input_homologs_fasta_seq_ids = bioio.splitFASTA(
    input_homologs_fasta_data)['output_seq_ids']
read_fasta_proteomes = bioio.readFASTA([input_1])
input_proteomes_fasta_name = input_1[:-6]
input_proteomes_fasta_data = read_fasta_proteomes[input_proteomes_fasta_name]
input_proteomes_fasta_seq_ids = bioio.splitFASTA(
    input_proteomes_fasta_data)['output_seq_ids']

# strip venom codes from proteomes
input_proteomes_fasta_seq_ids = bioio.trimVenomCodes(
    input_proteomes_fasta_seq_ids)
print input_homologs_fasta_seq_ids

# compare input files to find missing and matching lines
output_seq_ids_match = biomath.findMatchingSeqs(input_homologs_fasta_seq_ids,
                                                input_proteomes_fasta_seq_ids)
output_seq_ids_miss = biomath.findMissingSeqs(input_homologs_fasta_seq_ids,
"""

import sys
import os
sys.path.append(os.path.join(sys.path[0],'lib'))
sys.path.append(os.path.join(sys.path[0],'test'))
import bioio
import biomath

# strip file extensions and read files
input_1 = sys.argv[-2]
input_2 = sys.argv[-1]
read_fasta_homologs = bioio.readFASTA([input_2])
input_homologs_fasta_name = input_2[:-6]
input_homologs_fasta_data = read_fasta_homologs[input_homologs_fasta_name]
input_homologs_fasta_seq_ids = bioio.splitFASTA(input_homologs_fasta_data)['output_seq_ids']
read_fasta_proteomes = bioio.readFASTA([input_1])
input_proteomes_fasta_name = input_1[:-6]
input_proteomes_fasta_data = read_fasta_proteomes[input_proteomes_fasta_name]
input_proteomes_fasta_seq_ids = bioio.splitFASTA(input_proteomes_fasta_data)['output_seq_ids']

# strip venom codes from proteomes
input_proteomes_fasta_seq_ids = bioio.trimVenomCodes(input_proteomes_fasta_seq_ids)
print input_homologs_fasta_seq_ids

# compare input files to find missing and matching lines
output_seq_ids_match = biomath.findMatchingSeqs(input_homologs_fasta_seq_ids,input_proteomes_fasta_seq_ids)
output_seq_ids_miss = biomath.findMissingSeqs(input_homologs_fasta_seq_ids,input_proteomes_fasta_seq_ids)

# define names of the resulting files
output_txt_name_match = input_homologs_fasta_name+"_matching.txt"
Ejemplo n.º 9
0
 def test_splitFASTA(self):
     example_input_fasta = bioio.readFASTA(['sample1.fasta'])
     example_input_fasta = bioio.splitFASTA(example_input_fasta['sample1'])
     self.assertDictEqual(example_input_fasta, expected_split_data)
 def test_splitFASTA(self):
     example_input_fasta = bioio.readFASTA(['sample1.fasta'])
     example_input_fasta = bioio.splitFASTA(example_input_fasta['sample1'])
     self.assertDictEqual(example_input_fasta,expected_split_data)