def main(args): """Main program""" (ribo_file, transcriptome_fasta, read_lengths, read_offsets, count_five, count_three, output_path, html_file) = \ (args.ribo_file, args.transcriptome_fasta, args.read_lengths, args.read_offsets, args.count_five, args.count_three, args.output_path, args.html_file) log.debug('Supplied arguments\n{}'.format( '\n'.join(['{:<20}: {}'.format(k, v) for k, v in vars(args).items()]))) # error messages (simple format) are written to html file fh = logging.FileHandler(html_file) fh.setLevel(logging.ERROR) fh.setFormatter(ErrorLogFormatter('%(message)s')) log.addHandler(fh) log.info('Checking if required arguments are valid...') ribocore.check_required_arguments(ribo_file=ribo_file, transcriptome_fasta=transcriptome_fasta) log.info('Checking read lengths...') ribocore.check_read_lengths(ribo_file=ribo_file, read_lengths=read_lengths) log.info('Done') log.info('Checking read offsets...') ribocore.check_read_offsets(read_offsets=read_offsets) log.info('Done') log.info('Checking if each read length has a corresponding offset...') ribocore.check_read_lengths_offsets(read_lengths=read_lengths, read_offsets=read_offsets) log.info('Done') with ribocore.open_pysam_file(fname=ribo_file, ftype='bam') as b, ribocore.open_pysam_file(fname=transcriptome_fasta, ftype='fasta') as f: # Total valid transcript count (ones with reads) count = 0 prime = None table_body = '' # HTML table body content if count_five: log.info('Only 5\' read counts requested') prime = '5' elif count_three: log.info('Only 3\' read counts requested') prime = '3' # create output directories if not os.path.exists(output_path): os.mkdir(output_path) # zip_dir contents will be written here and a zip archive will be created # from this directory zip_dir = os.path.join(output_path, 'ribocount_output') if not os.path.exists(zip_dir): os.mkdir(zip_dir) csv_dir = os.path.join(zip_dir, 'csv') if not os.path.exists(csv_dir): os.mkdir(csv_dir) log.info('Get RiboSeq read counts for all transcripts in FASTA') for transcript in f.references: ribo_counts, ribo_reads = ribocore.get_ribo_counts(ribo_fileobj=b, transcript_name=transcript, read_lengths=read_lengths, read_offsets=read_offsets) if not ribo_reads: # no reads for this transcript. skip. continue transcript_sequence = f[transcript] # By default, all counts will be written (ribo_counts) # If 5' or 3' counts requested, filter and use # those counts for printing instead write_counts = ribo_counts log.debug('Total read counts {}'.format(ribo_reads)) # find longest ORF and filter counts based on whether 5' or 3' is # requested longest_orf = {} if count_five or count_three: # use default start and stop codons and find ORFs in all 3 # frames (+) orfs = ribocore.get_three_frame_orfs(sequence=transcript_sequence) if not len(orfs): log.debug('No ORFs for transcript {0}'.format(transcript)) continue longest_orf = ribocore.get_longest_orf(orfs=orfs) orf_start, orf_stop = longest_orf['start'], longest_orf['stop'] log.info('Transcript: {0} Longest ORF Start: {1}, Stop: {2}'.format(transcript, orf_start, orf_stop)) if count_five: write_counts, five_reads = ribocore.filter_ribo_counts(counts=ribo_counts, orf_start=orf_start) log.debug('5\' region read counts: {}'.format(five_reads)) elif count_three: write_counts, three_reads = ribocore.filter_ribo_counts(counts=ribo_counts, orf_stop=orf_stop) log.debug('3\' region read counts: {}'.format(three_reads)) if not len(write_counts): # no counts for transcript continue log.debug('Writing counts to CSV file for transcript {}'.format(transcript)) count += 1 csv_file = 'RiboCounts{}.csv'.format(count) with open(os.path.join(csv_dir, csv_file), 'w') as cw: cw.write('"Position","Nucleotide","Frame 1","Frame 2","Frame 3"\n') for pos in range(1, len(transcript_sequence) + 1): nucleotide = transcript_sequence[pos - 1] if pos in write_counts: cw.write('{0},{1},{2},{3},{4}\n'.format( pos, nucleotide, write_counts[pos][1], write_counts[pos][2], write_counts[pos][3])) else: cw.write('{0},{1},{2},{3},{4}\n'.format(pos, nucleotide, 0, 0, 0)) # HTML table table_body += '<tr><td>{0}</td><td>{1}</td>'.format(transcript, ribo_reads) if count_five: table_body += '<td>{0}</td>'.format(five_reads) elif count_three: table_body += '<td>{0}</td>'.format(three_reads) table_body += '<td><a href="csv/{0}">{0}</a></td></tr>'.format(csv_file) table_body += '</tbody>' # only for display in HTML valid_lengths = ['{}'.format(item) for item in read_lengths] if len(valid_lengths) == 1 and valid_lengths[0] == '0': valid_lengths = ['All'] if not count: if len(valid_lengths) >= 1: log.info('No transcripts found for read lengths: {}'.format(', '.join(valid_lengths))) else: log.info('No transcripts found') else: if prime: template = 'ribocount_prime.html' else: template = 'ribocount.html' with open(os.path.join(CONFIG.PKG_DATA_DIR, template)) as g,\ open(os.path.join(zip_dir, 'index.html'), 'w') as h: h.write(g.read().format(count=count, length='{}'.format(', '.join(valid_lengths)), prime=prime, table_body=table_body)) for asset in ('css', 'js'): asset_dir = os.path.join(zip_dir, asset) if not os.path.exists(asset_dir): os.mkdir(asset_dir) asset_data_dir = os.path.join(CONFIG.PKG_DATA_DIR, asset) for fname in os.listdir(asset_data_dir): shutil.copy(os.path.join(asset_data_dir, fname), os.path.join(zip_dir, asset, fname)) log.info('Creating zip file') os.chdir(output_path) with zipfile.ZipFile('ribocount_output.zip', 'w') as zipf: for root, d, f in os.walk('ribocount_output'): for name in f: zipf.write(os.path.join(root, name)) shutil.rmtree('ribocount_output') os.chdir('../') log.debug('Writing HTML report') with open(os.path.join(CONFIG.PKG_DATA_DIR, 'ribocount_index.html')) as j, open(args.html_file, 'w') as k: k.write(j.read().format(count=count, read_length=', '.join(valid_lengths))) log.info('Finished')
def main(args): """Main program""" (ribo_file, rna_file, transcript_name, transcriptome_fasta, read_lengths, read_offsets, output_path, html_file) = ( args.ribo_file, args.rna_file, args.transcript_name, args.transcriptome_fasta, args.read_lengths, args.read_offsets, args.output_path, args.html_file, ) # error messages (simple format) are written to html file fh = logging.FileHandler(html_file) fh.setLevel(logging.ERROR) fh.setFormatter(ErrorLogFormatter("%(message)s")) log.addHandler(fh) log.debug("Supplied arguments\n{}".format("\n".join(["{:<20}: {}".format(k, v) for k, v in vars(args).items()]))) log.debug("Testing debugggg") log.info("Checking if required arguments are valid...") ribocore.check_required_arguments( ribo_file=ribo_file, transcriptome_fasta=transcriptome_fasta, transcript_name=transcript_name ) log.info("Done") if rna_file: log.info("Checking if RNA-Seq file is valid...") ribocore.check_rna_file(rna_file=rna_file) log.info("Done") log.info("Checking read lengths...") ribocore.check_read_lengths(ribo_file=ribo_file, read_lengths=read_lengths) log.info("Done") log.info("Checking read offsets...") ribocore.check_read_offsets(read_offsets=read_offsets) log.info("Done") log.info("Checking if each read length has a corresponding offset") ribocore.check_read_lengths_offsets(read_lengths=read_lengths, read_offsets=read_offsets) log.info("Done") log.info("Get sequence and length of the given transcript from FASTA file...") record = ribocore.get_fasta_record(transcriptome_fasta, transcript_name) transcript_sequence = record[transcript_name] transcript_length = len(transcript_sequence) log.info("Get ribo-seq read counts and total reads in Ribo-Seq...") with ribocore.open_pysam_file(fname=ribo_file, ftype="bam") as bam_fileobj: ribo_counts, total_reads = ribocore.get_ribo_counts( ribo_fileobj=bam_fileobj, transcript_name=transcript_name, read_lengths=read_lengths, read_offsets=read_offsets, ) if not ribo_counts: msg = "No RiboSeq read counts for transcript {}. No plot will be " "generated!".format(transcript_name) log.error(msg) raise ribocore.RiboPlotError(msg) else: log.info("Get RNA counts for the given transcript...") mrna_counts = {} if rna_file: try: mrna_counts = get_rna_counts(rna_file, transcript_name) except OSError as e: log.error(e) raise if not mrna_counts: log.warn( "No RNA counts for this transcript from the given RNA Seq file. " "RNA-Seq coverage will not be generated" ) else: log.debug("No RNA-Seq data provided. Not generating coverage") log.info("Get start/stop positions in transcript sequence (3 frames)...") codon_positions = get_start_stops(transcript_sequence) if not os.path.exists(output_path): os.mkdir(output_path) log.info("Writing RiboSeq read counts for {}".format(transcript_name)) with open(os.path.join(output_path, "RiboCounts.csv"), "w") as f: f.write('"Position","Nucleotide","Frame 1","Frame 2","Frame 3"\n') for pos in range(1, transcript_length + 1): if pos in ribo_counts: f.write( "{0},{1},{2},{3},{4}\n".format( pos, transcript_sequence[pos - 1], ribo_counts[pos][1], ribo_counts[pos][2], ribo_counts[pos][3], ) ) else: f.write("{0},{1},{2},{3},{4}\n".format(pos, transcript_sequence[pos - 1], 0, 0, 0)) log.info("Generating RiboPlot...") plot_profile( ribo_counts, transcript_name, transcript_length, codon_positions, read_lengths, read_offsets, mrna_counts, color_scheme=args.color_scheme, html_file=args.html_file, output_path=args.output_path, ) log.info("Finished!")
def main(args): """Main program""" (ribo_file, rna_file, transcript_name, transcriptome_fasta, read_lengths, read_offsets, output_path, html_file) = (args.ribo_file, args.rna_file, args.transcript_name, args.transcriptome_fasta, args.read_lengths, args.read_offsets, args.output_path, args.html_file) # error messages (simple format) are written to html file fh = logging.FileHandler(html_file) fh.setLevel(logging.ERROR) fh.setFormatter(ErrorLogFormatter('%(message)s')) log.addHandler(fh) log.debug('Supplied arguments\n{}'.format('\n'.join( ['{:<20}: {}'.format(k, v) for k, v in vars(args).items()]))) log.debug('Testing debugggg') log.info('Checking if required arguments are valid...') ribocore.check_required_arguments(ribo_file=ribo_file, transcriptome_fasta=transcriptome_fasta, transcript_name=transcript_name) log.info('Done') if rna_file: log.info('Checking if RNA-Seq file is valid...') ribocore.check_rna_file(rna_file=rna_file) log.info('Done') log.info('Checking read lengths...') ribocore.check_read_lengths(ribo_file=ribo_file, read_lengths=read_lengths) log.info('Done') log.info('Checking read offsets...') ribocore.check_read_offsets(read_offsets=read_offsets) log.info('Done') log.info('Checking if each read length has a corresponding offset') ribocore.check_read_lengths_offsets(read_lengths=read_lengths, read_offsets=read_offsets) log.info('Done') log.info( 'Get sequence and length of the given transcript from FASTA file...') record = ribocore.get_fasta_record(transcriptome_fasta, transcript_name) transcript_sequence = record[transcript_name] transcript_length = len(transcript_sequence) log.info('Get ribo-seq read counts and total reads in Ribo-Seq...') with ribocore.open_pysam_file(fname=ribo_file, ftype='bam') as bam_fileobj: ribo_counts, total_reads = ribocore.get_ribo_counts( ribo_fileobj=bam_fileobj, transcript_name=transcript_name, read_lengths=read_lengths, read_offsets=read_offsets) if not ribo_counts: msg = ('No RiboSeq read counts for transcript {}. No plot will be ' 'generated!'.format(transcript_name)) log.error(msg) raise ribocore.RiboPlotError(msg) else: log.info('Get RNA counts for the given transcript...') mrna_counts = {} if rna_file: try: mrna_counts = get_rna_counts(rna_file, transcript_name) except OSError as e: log.error(e) raise if not mrna_counts: log.warn( 'No RNA counts for this transcript from the given RNA Seq file. ' 'RNA-Seq coverage will not be generated') else: log.debug('No RNA-Seq data provided. Not generating coverage') log.info( 'Get start/stop positions in transcript sequence (3 frames)...') codon_positions = get_start_stops(transcript_sequence) if not os.path.exists(output_path): os.mkdir(output_path) log.info('Writing RiboSeq read counts for {}'.format(transcript_name)) with open(os.path.join(output_path, 'RiboCounts.csv'), 'w') as f: f.write('"Position","Nucleotide","Frame 1","Frame 2","Frame 3"\n') for pos in range(1, transcript_length + 1): if pos in ribo_counts: f.write('{0},{1},{2},{3},{4}\n'.format( pos, transcript_sequence[pos - 1], ribo_counts[pos][1], ribo_counts[pos][2], ribo_counts[pos][3])) else: f.write('{0},{1},{2},{3},{4}\n'.format( pos, transcript_sequence[pos - 1], 0, 0, 0)) log.info('Generating RiboPlot...') plot_profile(ribo_counts, transcript_name, transcript_length, codon_positions, read_lengths, read_offsets, mrna_counts, color_scheme=args.color_scheme, html_file=args.html_file, output_path=args.output_path) log.info('Finished!')