Esempio n. 1
0
 def write(f):
     i = 0
     for seqrecord in sequences:
         if seqrecord.id == "<unknown id>":
             seqrecord.id = str(i)
         i+=1
     writer = FastaWriter(f)
     writer.write_file(sequences)
     f.flush() #IMPORTANT
Esempio n. 2
0
def write_fasta(sequence, file_handle, wrap=60):
    """
    :param sequence: sequence to write in the file
    :type sequence: :class:`Bio.SeqRecord.SeqRecord` object
    :param file_handle: output file handler
    :type file_handle: 
    """
    _LOGGER.info("Writing output to " + file_handle.name + "...")
    writer = FastaWriter(file_handle, wrap=wrap)
    writer.write_file(sequence)
 def handle_noargs(self, **options):
     outfilename = options['outfile']
     outfileh = open(outfilename, 'w')
     print "Fetching records."
     records = Protein.objects.all()
     seqs = self._records_to_seqs(records)
     print "Writing records to %s" % outfilename
     writer = FastaWriter(outfileh, record2title=lambda x: x.id)
     writer.write_file(seqs)
     outfileh.close()
     print "Done."
Esempio n. 4
0
 def _write(self, file, value):
     """
     Write output to fasta file
     :param folder: file and location of outputfile
     :param value: 
     :return: 
     """
     handle = open(file, "w")
     writer = FastaWriter(handle, wrap=None)
     writer.write_file(value)
     handle.close()
Esempio n. 5
0
def split_files(fasta_file):
    """This next section removes line wraps, so I can
    split the file without interrupting a gene"""
    from Bio.SeqIO.FastaIO import FastaWriter
    output_handle = open("nowrap.fasta", "w")
    seqrecords=[ ]
    writer = FastaWriter(output_handle, wrap=0)
    for record in SeqIO.parse(open(fasta_file), "fasta"):
        seqrecords.append(record)
    writer.write_file(seqrecords)
    output_handle.close()
    """I can always make the number of lines an alterable field"""
    subprocess.check_call("split -l 200000 nowrap.fasta", shell=True)
Esempio n. 6
0
def split_files(fasta_file):
    """This next section removes line wraps, so I can
    split the file without interrupting a gene"""
    from Bio.SeqIO.FastaIO import FastaWriter
    output_handle = open("nowrap.fasta", "w")
    seqrecords=[ ]
    writer = FastaWriter(output_handle, wrap=0)
    for record in SeqIO.parse(open(fasta_file), "fasta"):
        seqrecords.append(record)
    writer.write_file(seqrecords)
    output_handle.close()
    """I can always make the number of lines an alterable field"""
    subprocess.check_call("split -l 200000 nowrap.fasta", shell=True)
Esempio n. 7
0
 def write_by_og(self, output_folder):
     '''
     Write for each og all the mapped sequences into separate fasta files to a specified folder
     :param output_folder: folder where files should be stored
     '''
     if not os.path.exists(output_folder):
         os.makedirs(output_folder)
     for key, value in tqdm(self.og_records.items(),
                            desc="Writing DNA seq sorted by OG",
                            unit=" OG"):
         handle = open(os.path.join(output_folder, 'mapped_' + key + '.fa'),
                       "w")
         writer = FastaWriter(handle, wrap=None)
         writer.write_file(value)
         handle.close()
Esempio n. 8
0
 def write_select_og_dna(self):
     '''
     Write for each species all the DNA sequences into separate fasta files
     :param output_folder: folder where files should be stored
     '''
     output_folder = os.path.join(self.args.output_path, "reference_ogs_dna")
     if not os.path.exists(output_folder):
         os.makedirs(output_folder)
         for key, value in tqdm(self.ogs.items(), desc="Writing OGs sorted by species",
                                unit=" species"):
             handle = open(os.path.join(output_folder, key + '.fa'), "w")
             writer = FastaWriter(handle, wrap=None)
             writer.write_file(value.dna)
             handle.close()
     elif len(self.ogs_dna_by_species) == len(glob.glob(os.path.join(output_folder, '*.fa'))):
         print('Folder with files already exists and will not be overwritten.')
"""
Remove unpaired reads from a fasta file.

This script can be used for the case that unpaired reads (e.g. as
reads were removed during quality trimming) in a pair of fasta files
from paired-end sequencing need to be removed.

"""

import argparse
from Bio import SeqIO
from Bio.SeqIO.FastaIO import FastaWriter

parser = argparse.ArgumentParser()
parser.add_argument("fasta_file_to_filter")
parser.add_argument("reference_fasta_file")
parser.add_argument("--output_fasta", default="output.fa")
args = parser.parse_args()

# Read reference file header
reference_headers = {}
for seq_record in SeqIO.parse(args.reference_fasta_file, "fasta"):
    reference_headers[seq_record.id.split()[0]] = 1

# Read fasta file to filter and write output
with open(args.output_fasta, 'w') as output_fh:
    writer = FastaWriter(output_fh, wrap=0)
    writer.write_file(
        filter(lambda seq_record: seq_record.id.split()[0] in reference_headers,
               SeqIO.parse(args.fasta_file_to_filter, "fasta")))
def write_fasta_output(fasta_output_file, filtered_seqs):
    handle = open(fasta_output_file, "w")
    writer = FastaWriter(handle)
    writer.write_file(filtered_seqs)
    handle.close()
Esempio n. 11
0
 def write_dna(self, species, output_folder):
     handle = open(os.path.join(output_folder, species + '_OGs.fa'), "w")
     writer = FastaWriter(handle, wrap=None)
     writer.write_file(self.dna)
     handle.close()
Esempio n. 12
0
    z=[x.description for x in fa if i in x.description]
    if len(z)>0:
        new_name=df2[i]
        full_name=z[0]
        master_dict.update({full_name : new_name})

for i in fa:
    if i.description in master_dict.keys():
        i.id=master_dict[i.description]
        i.description=""
    

## Write temporary file
handle = open('temp.fa', "w")
writer = FastaWriter(handle, wrap=0)
writer.write_file(fa)
handle.close()

## Read in temporary file and print properly formatted fasta
x = open("temp.fa", "r")
y=x.readlines()
z=''.join(y)
if z[-1]=='\n': 
    z=z[:-1]
print (z)
os.remove("temp.fa") 




########## BIN
def select_from_small_file(args):
    inp_file, db_inp_file, db_out_file, out_file, num = args
    inp = list(SeqIO.parse(open(inp_file), 'fasta'))
    shuffle(inp)
    writer = FastaWriter(open(out_file, 'w'), wrap=0)
    writer.write_file(inp[:num])