def unwrap_fasta(infile, outfile, strip_comment=False): """ This method reads fasta sequences from *infile* and writes them unwrapped in *outfile*. :param str infile: The path to the input FASTA file. :param str outfile: The path to the output file. """ with open(outfile, "w") as fasta_out: if strip_comment: FastaIO.FastaWriter( fasta_out, wrap=None, record2title=Fastq2Fasta.just_name).write_file( SeqIO.parse(infile, 'fasta')) else: FastaIO.FastaWriter(fasta_out, wrap=None).write_file( SeqIO.parse(infile, 'fasta'))
def hashing(unhashed_otu_table_list, unhashed_rep_seqs_list, sample_metadata_list): otu_df_list = [] rep_seq_ids = set() seqs = [] # Create OTU table for unhashed_otu_table in unhashed_otu_table_list: otu_df_list.append(hash_otu_table(unhashed_otu_table)) otu_df = pd.concat(otu_df_list, join="outer", axis=1) otu_df.fillna(0.0, inplace=True) otu_table = Table(otu_df.values, list(otu_df.index), list(otu_df.columns)) # Create rep seqs for unhashed_rep_seqs in unhashed_rep_seqs_list: seqs.extend(hash_rep_seqs(unhashed_rep_seqs, rep_seq_ids)) otu_table_ids = set(otu_df.index) assert otu_table_ids == rep_seq_ids assert len(otu_df.index) == len(rep_seq_ids) # Merge sample metadata sample_metadata = pd.concat( [pd.read_csv(s, sep="\\t") for s in sample_metadata_list]) # Write files sample_metadata.to_csv("sample_metadata.tsv", sep="\\t", index=False) with biom_open("otu_table.biom", "w") as fid: otu_table.to_hdf5(fid, "Constructed by micone in dada2/deblur pipeline") with open("rep_seqs.fasta", "w") as fid: fasta_writer = FastaIO.FastaWriter(fid, wrap=None) fasta_writer.write_file(seqs)
def first_occurrences_only(fasta_in): in_fasta_basename = os.path.splitext(os.path.basename(fasta_in))[0] out_basedir = os.path.realpath(os.path.dirname(fasta_in)) out_filepath = os.path.join( out_basedir, in_fasta_basename + "_first_occurrences_only.fasta") total_seq_count = 0 if os.path.exists(out_filepath): raise IOError("%s already exists; skipping..." % out_filepath) with open(out_filepath, "w") as handle: fasta_out = FastaIO.FastaWriter(handle, wrap=None) fasta_out.write_header() for record in SeqIO.parse(fasta_in, "fasta"): total_seq_count += 1 record_hash = hashlib.sha256(str( record.seq).encode("UTF-8")).hexdigest() if record_hash not in hashes_seen_before: hashes_seen_before.add(record_hash) fasta_out.write_record(record) else: pass #print("{} is identical to a sequence earlier in the input file; skipping...".format(record.id)) print("{} seqs seen".format(total_seq_count)) print("{} unique seqs found".format(len(hashes_seen_before))) print("{} identical duplicates removed".format(total_seq_count - len(hashes_seen_before)))
def remap_tax_id(fasta_in, remap_table): in_fasta_basename = os.path.splitext(os.path.basename(fasta_in))[0] out_basedir = os.path.realpath(os.path.dirname(fasta_in)) out_filepath = os.path.join(out_basedir,in_fasta_basename+"_annotated_for_beast.fasta") if os.path.exists(out_filepath): raise IOError("%s already exists; skipping..." % out_filepath) id_map = dict() with open(remap_table, "r") as map_handle: for line in map_handle: seqid,*other_fields = line.split("\t") if seqid in id_map: raise LookupError("%s already found in map" % seqid) if seqid=="taxa": # if this is a figtree-formatted annotation file, skip the header row # that has nothing but column labels continue id_map[seqid] = other_fields with open(out_filepath, "w") as handle: fasta_out = FastaIO.FastaWriter(handle, wrap=None) fasta_out.write_header() for record in SeqIO.parse(fasta_in, "fasta"): if record.id in id_map: if sum([len(x) for x in id_map[record.id]])>0: new_description="|".join(id_map[record.id]).replace("||","|?|").replace("||","|?|") record.description=new_description fasta_out.write_record(record) else: print("Warning: '{}' not found in {}".format(record.id,os.path.basename(remap_table)))
def write_fasta_1line(input_fasta_file, output_fasta_file): with open(input_fasta_file, "r") as handle: record_list = list(SeqIO.parse(handle, "fasta")) print(input_fasta_file) print("Number of protein sequences: ", len(record_list)) with open(output_fasta_file, "w") as handle: fasta_writer = FastaIO.FastaWriter(handle, wrap=None) fasta_writer.write_file(record_list)
def Main(): parser = argparse.ArgumentParser(description='Generate synthetic reads.', fromfile_prefix_chars='@') parser.add_argument("-t", "--num_transpositions", type=int, default=50, help="Number of transpositions to generate") parser.add_argument("-r", "--num_reads", type=int, default=10000, help="Number of reads to generate per transposition.") parser.add_argument("-l", "--read_length", type=int, default=100, help="Number of reads to generate per transposition.") parser.add_argument("-o", "--output_fname", default='generated_transposition_reads.fa', help="Where to write FASTA output to.") TranspositionParams.AddArgs(parser) args = parser.parse_args() tn_params = TranspositionParams.FromArgs(args) insert_gen = InsertGenerator.FromTranspositionParams(tn_params) n_trans = args.num_transpositions n_reads = args.num_reads read_len = args.read_length print 'Generating %d random transpositions with %d random %d NT reads each' % ( n_trans, n_reads, read_len) print 'Writing generated reads to FASTA' x = 0 with open(args.output_fname, 'w') as fh: writer = FastaIO.FastaWriter(fh) writer.write_header() for construct_num in xrange(n_trans): trans = Transposition(construct_num, insert_gen, tn_params.backbone_seq, tn_params.backbone_start_offset) for read_num in xrange(n_reads): frag = trans.Shear(read_num, read_len) record = frag.ToSeqRecord() writer.write_record(record) x += 1 if x % 1000000 == 0: print "Created %d reads" % x writer.write_footer() print 'Zipping generated reads.' gzip_fname = '%s.gz' % args.output_fname with GzipFile(gzip_fname, mode='w') as gzipf: gzipf.write(args.output_fname)
def export_dna_record(gene_seq, gene_id, gene_description, output_handle): seq_object = Seq(gene_seq, IUPAC.unambiguous_dna) seq_record = SeqRecord(seq_object) seq_record.id = gene_id seq_record.description = gene_description fasta_out = FastaIO.FastaWriter(output_handle, wrap=None) fasta_out.write_header() fasta_out.write_record(seq_record) fasta_out.write_footer()
def write(data, filename): records = [] with open(filename, "w") as handle: fasta_out = FastaIO.FastaWriter(handle, wrap=None) for element in data: sequence = SeqRecord(Seq(element['seq']), id=element['id'], description=element['description']) records.append(sequence) fasta_out.write_file(records)
def recomp(input, output, both=False): fasta_out = FastaIO.FastaWriter(output, wrap=None) fasta_out.write_header() for seq_record in SeqIO.parse(input, "fasta"): rc_rec = seq_record.reverse_complement(id=seq_record.id + "_RC", description="") if both == True: fasta_out.write_record(seq_record) fasta_out.write_record(rc_rec) fasta_out.write_footer()
def get_fasta(pdb_file, fasta_file, transfer_ids=None): fasta_writer = FastaIO.FastaWriter(fasta_file) fasta_writer.write_header() for rec in PdbIO.PdbSeqresIterator(pdb_file): if len(rec.seq) == 0: continue if transfer_ids is not None and rec.id not in transfer_ids: continue print(rec.id, rec.seq, len(rec.seq)) fasta_writer.write_record(rec)
def perform_mapping(mapping_dir, og_files, threshold=1, exclude_species=["none"]): og_dict = {} '''read in og with aa seq''' og = list(SeqIO.parse(og_files, "fasta")) for record in og: key = record.description.split(" | ")[-1] if key in og_dict: ids = [rec.id for rec in og_dict[key]] if record.id not in ids: og_dict[key].append(record) else: og_dict[key] = [] og_dict[key].append(record) # parse the mapped reads to ogs to dictionary all_dict = {} for file in glob.glob(mapping_dir + "*.fa"): og_name = file.split("_")[-1].split(".")[0] og = og_dict[og_name] # change ids to species names for i, record in enumerate(og): s = record.id[0:5] record.id = s # find the best representative seq based by mapping mapping = list(SeqIO.parse(file, "fasta")) best_translated_seq = find_best_translation_by_similarity( mapping, og[0], exclude_species=exclude_species) if best_translated_seq is not None: og.append(best_translated_seq) if get_coverage(og) <= threshold: all_dict[og_name] = og if threshold is not 1: OG_OUT = mapping_dir + 'og' + str(threshold) + "/" elif exclude_species[0] is not "none": OG_OUT = mapping_dir + 'og_without' + "_".join(exclude_species) + "/" else: OG_OUT = mapping_dir + 'og/' if not os.path.exists(OG_OUT): os.makedirs(OG_OUT) for key, item in all_dict.items(): file_name = OG_OUT + key + ".fa" fasta_out = FastaIO.FastaWriter(open(file_name, "w"), wrap=None) fasta_out.write_file(item) print("FINISHED OG RECONSTRUCTION!") return all_dict
def get_seq_from_ids(inputfile=None, outputfile=None, id_file=None): id_list = id_file.readlines() id_list = [x.replace(">", "").rstrip("\n") for x in id_list] record_dict = SeqIO.to_dict(SeqIO.parse(inputfile.name, "fasta")) fasta_out = FastaIO.FastaWriter(outputfile, wrap=None) for i in id_list: print(">" + record_dict[i].id) print(record_dict[i].seq)
def hash_rep_seqs(unhashed_rep_seqs, output_file): seqs = list(SeqIO.parse(unhashed_rep_seqs, "fasta")) seq_ids = [] for seq in seqs: seq.id = hash_function(str(seq.seq)) seq_ids.append(seq.id) seq.description = "" seq.name = "" with open(output_file, "w") as fid: fasta_writer = FastaIO.FastaWriter(fid, wrap=None) fasta_writer.write_file(seqs) return seq_ids
def write_fasta_with_sanitized_ids(fasta_in, out_filepath): with open(out_filepath, "w") as handle: fasta_out = FastaIO.FastaWriter(handle, wrap=None) fasta_out.write_header() for record in SeqIO.parse(fasta_in, "fasta"): record.id=sanitize_id_for_sam_rname(record.id) fasta_out.write_record(record) print("out_filepath",out_filepath) print("os.path.dirname(out_filepath)",os.path.dirname(out_filepath)) print("ls -lah") for line in subprocess.check_output(["ls","-lah",os.path.dirname(out_filepath)]).decode("utf-8").split("\n"): print(line) return out_filepath
def MakeConcensusAlignment(pair): write_tmpfasta = "tmp.fasta" handle = open(write_tmpfasta, "w") writer = FastaIO.FastaWriter(handle, wrap=None) writer.write_file(pair) handle1.close while os.path.exists('tmp.fasta') == False: time.sleep(1) command = "mafft " + write_tmpfasta + " > tmp_2.fasta" print(command) subprocess.call(command, shell=True) return ()
def write_records(records, output_file): """ Writes FASTA records (BioPython SeqRecord) to a file. Parameters ---------- records : list List with BioPython SeqRecord objects. output_file : str Path to the output file. """ with open(output_file, 'w') as output_handle: fasta_out = FastaIO.FastaWriter(output_handle, wrap=None) fasta_out.write_file(records)
def convert_multiline_to_single_line_FASTA(): sequences = [] input_handle = open("preads4falcon.fasta", "rU") for record in SeqIO.parse(input_handle, "fasta"): sequences.append(record) global read_len_dict read_len_dict[record.id] = len(record.seq) record_complement = (record.id) + "'" read_len_dict[record_complement] = len(record.seq) output_handle = open("formatted_preads4falcon.fasta", "w") fasta_out = FastaIO.FastaWriter(output_handle, wrap=None) fasta_out.write_file(sequences) output_handle.close()
def single_line_records(fasta_in): in_fasta_basename = os.path.splitext(os.path.basename(fasta_in))[0] out_basedir = os.path.realpath(os.path.dirname(fasta_in)) out_filepath = os.path.join(out_basedir, in_fasta_basename + "_single_lines.fasta") if os.path.exists(out_filepath): raise IOError("%s already exists; skipping..." % out_filepath) with open(out_filepath, "w") as handle: fasta_out = FastaIO.FastaWriter(handle, wrap=None) fasta_out.write_header() for record in SeqIO.parse(fasta_in, "fasta"): fasta_out.write_record(record)
def main(args): COUNT_SEPARATOR = '_x' seqs = {} for seq_record in SeqIO.parse(args.input_fasta, "fasta"): split_seqid = seq_record.id.split(COUNT_SEPARATOR) if len(split_seqid) == 1: seqs[split_seqid[0]] = {} seqs[split_seqid[0]]['seq'] = seq_record.seq count = 1 seqs[split_seqid[0]]['count'] = count elif len(split_seqid) == 2: seqs[split_seqid[0]] = {} seqs[split_seqid[0]]['seq'] = seq_record.seq count = int(split_seqid[1]) seqs[split_seqid[0]]['count'] = count else: logging.error("Error parsing: ", seq_record.id) # combinations('ABCD', 2) gives: # AB AC AD BC BD CD # ie. we don't need to compare AB and BA for seq1, seq2 in combinations(seqs, 2): # Need to skip over sequences that have been removed on previous iterations if seq1 not in seqs or seq2 not in seqs: continue # Translate each pair of seqs try: seq1_translated = seqs[seq1]['seq'].translate() except TranslationError: print("Error translating: " + seq1, file=sys.stderr) try: seq2_translated = seqs[seq2]['seq'].translate() except TranslationError: print("Error translating: " + seq2, file=sys.stderr) # Remove seq2 from collection of seqs if it translates to the # same amino acid sequence as seq1. Add the counts for seq2 to seq1 before removing. if seq1_translated == seq2_translated: print(seq1, " translates identicaly to ", seq2, ". Deleting ", seq2) seqs[seq1]['count'] += seqs[seq2]['count'] del seqs[seq2] fasta_out = FastaIO.FastaWriter(open('output.fa', 'w'), wrap=None) fasta_out.write_file( (SeqRecord(seqs[seq]['seq'], id=seq + COUNT_SEPARATOR + str(seqs[seq]['count']), description="") for seq in seqs) )
def split_records(fasta_in): for record in SeqIO.parse(fasta_in, "fasta"): in_fasta_basename = os.path.splitext(os.path.basename(fasta_in))[0] out_basedir = os.path.realpath( os.path.join(os.path.dirname(fasta_in), in_fasta_basename)) if not os.path.isdir(out_basedir): os.makedirs(out_basedir, exist_ok=True) out_filepath = os.path.join(out_basedir, record.id + ".fasta") print("%s %i -> %s" % (record.id, len(record), out_filepath)) if not os.path.exists(out_filepath): with open(out_filepath, "w") as handle: fasta_out = FastaIO.FastaWriter(handle, wrap=None) fasta_out.write_header() fasta_out.write_record(record) #SeqIO.write(record, handle, "fasta") else: #raise IOError("%s already exists; skipping..." % out_filepath) print("%s already exists; skipping..." % out_filepath)
def main(): try: opts, args = getopt.getopt( sys.argv[1:], "r:d:o:", ["mapped_reads=", "ref_data=", "out_folder="]) except getopt.GetoptError as e: print(str(e)) print( 'concat_alignments.py -r <mapped_reads> -d <ref_data> -o <out_folder>' ) sys.exit(2) mapped_reads = None ref_data = None out_folder = None for opt, arg in opts: if opt == '-h': print( 'concat_alignments.py -r <mapped_reads> -d <ref_data> -o <out_folder>' ) sys.exit() elif opt in ("-r", "--reads"): mapped_reads = arg elif opt in ("-d", "--ref_data"): ref_data = arg elif opt in ("-o", "--out_folder"): out_folder = arg else: assert False, "unhandled option" read_mappings = list(SeqIO.parse(mapped_reads, "fasta")) og_data = list(SeqIO.parse(ref_data, "fasta")) if out_folder[-1] is not "/": out_folder += "/" list_of_ogs = get_ogs(read_mappings, og_data) if list_of_ogs is not None: for og in list_of_ogs: file_name = out_folder + og + ".fasta" fasta_out = FastaIO.FastaWriter(open(file_name, "w"), wrap=None) fasta_out.write_file(list_of_ogs[og])
def subset_to_ids_not_in_file(fasta_in, ids_file, fasta_out): in_fasta_basename = os.path.splitext(os.path.basename(fasta_in))[0] out_basedir = os.path.realpath(os.path.dirname(fasta_in)) #out_filepath = os.path.join(out_basedir,in_fasta_basename+"_subset.fasta") out_filepath = fasta_out ids_to_include = set() with open(ids_file) as ids_file: for line in ids_file: ids_to_include.add(line.rstrip().replace("\n", "")) #if os.path.exists(out_filepath): # raise IOError("%s already exists; skipping..." % out_filepath) with open(out_filepath, "w") as handle: fasta_out = FastaIO.FastaWriter(handle, wrap=None) fasta_out.write_header() for record in SeqIO.parse(fasta_in, "fasta"): if record.id not in ids_to_include: fasta_out.write_record(record)
def write_and_drop_seqs(fasta_in, fasta_out, gap_threshold=None, ambig_threshold=None): print("ambig_threshold", ambig_threshold) print("gap_threshold", gap_threshold) with open(fasta_out, "w") as handle: fasta_out = FastaIO.FastaWriter(handle, wrap=80) # wrap=None fasta_out.write_header() for record in SeqIO.parse(fasta_in.name, "fasta"): ambig_fraction = record.seq.count("N") / float(len(record)) gap_fraction = record.seq.count("-") / float(len(record)) if (ambig_threshold != None and ambig_fraction > ambig_threshold ) or (gap_threshold != None and gap_fraction > gap_threshold): print("omitting", record.id, "ambig:", ambig_fraction, "gap:", gap_fraction) continue else: #print("writing",record.id,"ambig:",ambig_fraction,"gap:",gap_fraction,"gap_chrs:",record.seq.count("-")) fasta_out.write_record(record)
def testSerialize(self): insert_gen = InsertGenerator(self.INSERT_SEQ, self.FIXED_5P, self.FIXED_3P, extra_bp_5p='T', linker_gen=self.LINKER_GEN) records = [] for tn_id in xrange(10): tn_gen = Transposition(tn_id, insert_gen, self.TARGET, self.ORF_START) records.extend([tn_gen.Shear(i).ToSeqRecord() for i in xrange(100)]) outfile = StringIO() writer = FastaIO.FastaWriter(outfile) writer.write_header() writer.write_records(records) # Parse the generated output. infile = StringIO(outfile.getvalue()) parsed = SeqIO.parse(infile, 'fasta') expected_info_keys = Fragment.INFO_DICT_KEYS for record in parsed: info_dict = Fragment.ParseInfoDict(record) self.assertListEqual(sorted(info_dict.keys()), sorted(expected_info_keys))
def perform_mapping(DIR_MAPPING, FILE_OGS): og_dict = {} '''read in og with aa seq''' og = list(SeqIO.parse(FILE_OGS, "fasta")) for record in og: key = record.description.split(" | ")[-1] if key in og_dict: ids = [rec.id for rec in og_dict[key]] if record.id not in ids: og_dict[key].append(record) else: og_dict[key] = [] og_dict[key].append(record) # parse the mapped reads to ogs to dictionary all_dict = {} for file in glob.glob(DIR_MAPPING + "*.fa"): og_name = file.split("_")[-1].split(".")[0] og = og_dict[og_name] # change ids to species names for i, record in enumerate(og): s = record.id[0:5] record.id = s all_dict[og_name] = og OG_OUT = DIR_MAPPING + 'origin_og/' if not os.path.exists(OG_OUT): os.makedirs(OG_OUT) for key, item in all_dict.items(): file_name = OG_OUT + key + ".fa" fasta_out = FastaIO.FastaWriter(open(file_name, "w"), wrap=None) fasta_out.write_file(item) print("FINISHED PARSING OGs!") return all_dict
def regenerate(): from Bio import SeqIO from Bio.SeqIO import FastaIO import argparse parser = argparse.ArgumentParser(description=''' Change the IDs of all fasta entries (contigs) to prefixed, sanitized IDs. The output format of each new ID is: PREFIX _ 'C' _ OLD-ID ''') parser.add_argument("--prefix", dest='dataset_id', required=True, help='ID prefix') parser.add_argument("--input", dest='input_fasta', required=True, help='fasta input file') parser.add_argument("--output", dest='output_fasta', required=True, help='fasta output file') args = parser.parse_args() with open(args.input_fasta, 'r') as sourceFile: with open(args.output_fasta, 'w') as destFile: dest = FastaIO.FastaWriter(destFile, wrap=None) dest.write_header() for record in SeqIO.parse(sourceFile, "fasta"): new_id = args.dataset_id + '_C_' + extract_unique_element_from_contigid(record.id) record.id = new_id record.description = "" # any comments after contig id are removed dest.write_record(record)
def remap_tax_id(fasta_in, remap_table): in_fasta_basename = os.path.splitext(os.path.basename(fasta_in))[0] out_basedir = os.path.realpath(os.path.dirname(fasta_in)) out_filepath = os.path.join(out_basedir, in_fasta_basename + "_remapped_taxids.fasta") if os.path.exists(out_filepath): raise IOError("%s already exists; skipping..." % out_filepath) id_map = dict() with open(remap_table, "r") as map_handle: for line in map_handle: old, new = line.split("\t") if old in id_map: raise LookupError("%s already found in map" % old) id_map[old] = new with open(out_filepath, "w") as handle: fasta_out = FastaIO.FastaWriter(handle, wrap=None) fasta_out.write_header() for record in SeqIO.parse(fasta_in, "fasta"): record.id = id_map[record.id] fasta_out.write_record(record)
#!/usr/bin/env python # (c) Christian Henke # Licensed under Apache License 2.0 # https://www.apache.org/licenses/LICENSE-2.0 from Bio import SeqIO from Bio.SeqIO import FastaIO import argparse parser = argparse.ArgumentParser(description=''' Remove sequences from fasta that are shorter than the desired minimum length. ''') parser.add_argument("--input", dest='input_fasta', required=True, help='fasta input file') parser.add_argument("--output", dest='output_fasta', required=True, help='fasta output file') parser.add_argument("--min-seq-length", dest='min_seq_len', type=int, required=True, help='minimum sequence length (integer)') args = parser.parse_args() with open(args.input_fasta, 'r') as sourceFile: with open(args.output_fasta, 'w') as destFile: dest = FastaIO.FastaWriter(destFile, wrap=None) dest.write_header() for record in SeqIO.parse(sourceFile, "fasta"): if len(record.seq) >= args.min_seq_len: dest.write_record(record)
names = [] for seq in proteins: names.append(seq.id) orfs = [] for i in names: for j in nucleotides: if i == j.id: orfs.append(j) #for seq in orfs: # seq.description="" handle = open(output + '_QMS_DB.fasta', "w") writer = FastaIO.FastaWriter(handle, wrap=None) writer.write_file(orfs) handle.close() ############################################### print( "\n::::::: BLASTing ORFs and appending hit to description :::::::\n\n v Ignore this warning v \n" ) orfs_name = output + "_QMS_DB.fasta" blast_xml = output + "_blast.xml" command = blast_path + blast_type + ' -query ' + orfs_name + ' -db ' + blast_database + ' -outfmt 5 -num_threads ' + str( num_threads) + ' -max_target_seqs 1 -evalue 0.0001 -out ' + blast_xml subprocess.call(command, shell=True)
def transform_file(source_file, destination_file, arguments): # Get just the file name, useful for naming the temporary file. source_file_type = (arguments.input_format or from_handle(source_file)) destination_file_type = (arguments.output_format or from_handle(destination_file)) # Get an iterator. sorters = {'length': transform.sort_length, 'name': transform.sort_name,} directions = {'asc': 1, 'desc': 0} if arguments.sort: # Sorted iterator key, direction = arguments.sort.split('-') records = sorters[key](source_file=source_file, source_file_type=source_file_type, direction=directions[direction]) else: # Unsorted iterator. records = SeqIO.parse(source_file, source_file_type, alphabet=ALPHABETS.get(arguments.alphabet)) ######################################### # Apply generator functions to iterator.# ######################################### # Apply all the transform functions in transforms if arguments.transforms: # Special case handling for --cut and --relative-to if arguments.cut_relative: for o, n in ((transform.multi_cut_sequences, transform.cut_sequences_relative), (transform.multi_mask_sequences, transform.mask_sequences_relative)): # Add a function to trim any columns which are gaps in the # sequence ID try: f = next(f for f in arguments.transforms if f.func == o) except StopIteration: continue i = arguments.transforms.index(f) arguments.transforms.pop(i) arguments.transforms.insert(i, functools.partial(n, record_id=arguments.cut_relative, **f.keywords)) for function in arguments.transforms: records = function(records) if (arguments.deduplicate_sequences or arguments.deduplicate_sequences is None): records = transform.deduplicate_sequences( records, arguments.deduplicate_sequences) # Apply all the partial functions if arguments.apply_function: for apply_function in arguments.apply_function: records = apply_function(records) # Only the fasta format is supported, as SeqIO.write does not have a 'wrap' # parameter. if (arguments.line_wrap is not None and destination_file_type == 'fasta'): logging.info("Attempting to write fasta with %d line breaks.", arguments.line_wrap) with destination_file: writer = FastaIO.FastaWriter( destination_file, wrap=arguments.line_wrap) writer.write_file(records) else: # Mogrify requires writing all changes to a temporary file by default, # but convert uses a destination file instead if one was specified. Get # sequences from an iterator that has generator functions wrapping it. # After creation, it is then copied back over the original file if all # tasks finish up without an exception being thrown. This avoids # loading the entire sequence file up into memory. logging.info("Applying transformations, writing to %s", destination_file) SeqIO.write(records, destination_file, destination_file_type)