def output_dinuc_shuffled_fasta(fasta_fname, shuffled_fasta_fname, num_shuffles=1): """ Given a FASTA file, output a dinucleotide shuffled version of it. """ fasta_out = fastx_utils.write_open_fastx(shuffled_fasta_fname) for fastx_entry in fastx_utils.get_fastx_entries(fasta_fname): fastx_name, fastx_seq = fastx_entry shuffled_recs = [] for shuffle_num in range(num_shuffles): shuffled_seq = \ get_dinuc_shuffles(fastx_seq)[0] shuffled_rec = (fastx_name, shuffled_seq) shuffled_recs.append(shuffled_rec) fasta_utils.write_fasta(fasta_out, shuffled_recs) fasta_out.close()
def bam_to_fastx(logger, in_file, out_file, record_type="fasta", make_unique_recs=False): """ BAM to FASTX converter, based on code by Brad Chapman. By default converts to FASTA record. If 'make_unique_recs' is set to True, then make each FASTA record unique (append a number to it) so that reads with multiple alignments can be considered. """ logger.info("Converting %s to FASTA" %(in_file)) from Bio import SeqIO, Seq, SeqRecord out_handle = fastx_utils.write_open_fastx(out_file) logger.info(" - Output file: %s" %(out_file)) SeqIO.write(bam_to_rec(in_file, make_unique_recs=make_unique_recs), out_handle, record_type) logger.info("Finished FASTA conversion.") out_handle.close()
def bam_to_fastx(logger, in_file, out_file, record_type="fasta", make_unique_recs=False): """ BAM to FASTX converter, based on code by Brad Chapman. By default converts to FASTA record. If 'make_unique_recs' is set to True, then make each FASTA record unique (append a number to it) so that reads with multiple alignments can be considered. """ logger.info("Converting %s to FASTA" % (in_file)) from Bio import SeqIO, Seq, SeqRecord out_handle = fastx_utils.write_open_fastx(out_file) logger.info(" - Output file: %s" % (out_file)) SeqIO.write(bam_to_rec(in_file, make_unique_recs=make_unique_recs), out_handle, record_type) logger.info("Finished FASTA conversion.") out_handle.close()