def combine_alignment_summary(input, output): """Combine formatted alignment log files `input`: Formatted alignment stat log files (*fastqLog.final.txt) `output`: Combined alignment stat csv file named (DATA_alignment_summary.csv) """ print tasks.comment() #print input #print output print colored("Stage 9: Aggrigate alignment summary ....", "green") print tasks.comment() result = tasks.combineAlignmentSummary(input, output) return result
def format_count(input,output): """Format count csv file `input`: csv file `output`: Formatted *.csv file """ print tasks.comment() print colored("Stage 8: Formatting count file ... ", "green") print input print output print tasks.comment() result = tasks.formatCount(input,output) return result
def plot_alignment_summary(input, output): """Plot alignment summary `input`: Alignment summary csv file `output`: output png file bar plot """ print tasks.comment() print colored("Stage 10: Plot alignment summary ...", "green") print input print output print tasks.comment() result = tasks.plotAlignmentStat(input, output) return result
def combine_count_data(input, output): """Combine count files `input`: Formatted *.out.txt count files `output`: A single summary count csv file nammed 'DATA_COUNT_countcombined.csv' under project dir """ print tasks.comment() print input print output print colored("Stage 7: Combining count data ...", "green") print tasks.comment() result = tasks.combineCount(input, output) return result
def format_count(input,output): """Format count csv file `input`: csv file `output`: Formatted *.csv file """ print tasks.comment() print colored("Stage 8: Formatting count file ... ", "green") #print input #print output print tasks.comment() result = tasks.formatCount(input,output) return result
def combine_count_data(input, output): """Combine count files `input`: Formatted *.out.txt count files `output`: A single summary count csv file nammed 'DATA_COUNT_countcombined.csv' under project dir """ print tasks.comment() #print input #print output print colored("Stage 7: Combining count data ...", "green") print tasks.comment() result = tasks.combineCount(input, output) return result
def main(): if options.indexed == "yes": click.echo( "Indexing the reference genome {}, be patient, it takes longer time" .format(genomeDir)) pipeline_run(["indexGenome"], verbose=1, multiprocess=cpuNum) pipeline_run([ "prepare_analysis", "cleanFastq", "bsAlign", "mergeBamSameTissue", "bamSort", "bamIndex", "createCGmap", "extractCG_Context", "mergeConCGcall", "icrHotSpot", "convertToBed", "unionBed", "mergeBed", "countICR" ], verbose=1, multiprocess=cpuNum) # Flowcharts can be printed in a large number of formats including jpg, svg, png and pdf pipeline_printout_graph("flowchart.pdf", "pdf", [countICR], user_colour_scheme={"colour_scheme_index": 6}, pipeline_name="Putative ICR pipeline", no_key_legend=False) else: """Console script for puticr.""" click.echo(tasks.comment()) t0 = time.time() click.echo("Starting the process .....") #click.echo("Starting the pipeline, staring time ...{}".format(datetime.timedelta(seconds=t0))) #tasks_torun = [prepare_analysis, cleanFastq] # pipeline_run(["prepare_analysis", "cleanFastq", "bsAlign", "mergeBamSameTissue", "bamSort", "bamIndex", "createCGmap", # "extractCG_Context", "mergeConCGcall", "icrHotSpot", "convertToBed", "unionBed", "mergeBed", "countICR"], verbose=1, multiprocess=cpuNum) pipeline_run([ "mergeBamSameTissue", "bamSort", "bamIndex", "createCGmap", "extractCG_Context", "mergeConCGcall", "icrHotSpot", "convertToBed", "unionBed", "mergeBed", "countICR" ], verbose=1, multiprocess=cpuNum) #pipeline_run(["icrHotSpot", "convertToBed", "unionBed", "mergeBed", "countICR"], verbose=1, multiprocess=cpuNum) # Flowcharts can be printed in a large number of formats including jpg, svg, png and pdf pipeline_printout_graph("flowchart.pdf", "pdf", [countICR], user_colour_scheme={"colour_scheme_index": 6}, pipeline_name="Putative ICR pipeline", no_key_legend=False) click.echo(".................. {}".format(resultDir)) elapsedTime = int((time.time()) - t0) elapsedTime = str(datetime.timedelta(seconds=elapsedTime)) click.echo("Time to complete the task .....{}".format( colored(elapsedTime, "red"))) click.echo(tasks.comment())
def alignment_summary(input, output): """Generate Alignment summary `input`: *fastqLog.final.out files `output`: Extracted necessary data and create *.txt file for each count log file """ outfile = basename(input) out_suffix = splitext(outfile)[0] out_file_name = out_suffix + output out_file_name = join(tempDir, out_file_name) print tasks.comment() print colored("Stage 8: Generate Alingmnet summary ....", "green") #print input #print output print tasks.comment() result = tasks.alignmentSummary(input, out_file_name) return result
def count_mapped_reads(bamFile, outfile): """Coun the mapped sequence to the genome featur5e `bamFile`: A bam alignment file `outfile`: Count txt file """ import re p=re.match(r'(.*)_manifest.csv', probFile, re.M|re.I) gtfF = p.group(1) + ".gtf" gtfFile = join(resultDir,gtfF) print tasks.comment() print colored("Stage 6: Count Mapped file that overlap with genome feature ... ", "green") print bamFile print gtfFile print tasks.comment() result = tasks.count_mapped(bamFile, outfile, gtfFile) return result
def count_mapped_reads(bamFile, outfile): """Coun the mapped sequence to the genome featur5e `bamFile`: A bam alignment file `outfile`: Count txt file """ import re p=re.match(r'(.*)_manifest.csv', probFile, re.M|re.I) gtfF = p.group(1) + ".gtf" gtfFile = join(resultDir,gtfF) print tasks.comment() print colored("Stage 6: Count Mapped file that overlap with genome feature ... ", "green") #print bamFile #print gtfFile print tasks.comment() result = tasks.count_mapped(bamFile, outfile, gtfFile) return result
def map_to_probes(fastq, output): """Map the fastq file to the indexed probe sequences. The fastq must be in the gzipped with the following extension. (*.fastq.gz) `fastq`: a dir that contains all *.fastq.gz file for the experment `output`: output .bam files and '*fastqReadPrepGene.out.tab' count files """ outfile = basename(output) outfile = join(tempDir, outfile) suf = splitext(outfile)[0] outPrefix = os.path.abspath(suf) print tasks.comment() print colored("Stage 5: Map sequence fastq file to the indexed genome file ... ", "green") print fastq print output print genomeDir print outPrefix print tasks.comment() result = tasks.map_seq_to_probes(fastq, genomeDir, cpuNum, outPrefix) return result
def map_to_probes(fastq, output): """Map the fastq file to the indexed probe sequences. The fastq must be in the gzipped with the following extension. (*.fastq.gz) `fastq`: a dir that contains all *.fastq.gz file for the experment `output`: output .bam files and '*fastqReadPrepGene.out.tab' count files """ outfile = basename(output) outfile = join(tempDir, outfile) suf = splitext(outfile)[0] outPrefix = os.path.abspath(suf) print tasks.comment() print colored("Stage 5: Map sequence fastq file to the indexed genome file ... ", "green") #print fastq #print output #print genomeDir #print outPrefix print tasks.comment() result = tasks.map_seq_to_probes(fastq, genomeDir, cpuNum, outPrefix) return result
def main(): t0 = time.time() print (" Starting time ..... :") + str(t0) tasks_torun = [prepare_analysis, prepareDB_file, create_gtf_file, indexGenomeFile, map_to_probes, format_count, combine_count_data, alignment_summary, combine_alignment_summary,plot_alignment_summary] pipeline_printout_graph('summary_pipeline_stages_to_run.ps', 'ps', tasks_torun, user_colour_scheme={"colour_scheme_index": 6}, no_key_legend=False, pipeline_name="TempO-seq Analysis", size=(11, 8), dpi = 30, forcedtorun_tasks = [indexGenomeFile, combine_count_data],draw_vertically=True, ignore_upstream_of_target=False) pipeline_run(["prepare_analysis", "prepareDB_file",'create_gtf_file', 'indexGenomeFile', 'map_to_probes','count_mapped_reads', 'combine_count_data', 'format_count', 'alignment_summary','combine_alignment_summary'],verbose = 1, multiprocess = cpuNum) print "....................." + resultDir tasks.comment() psfile = options.flowchart #psfile = "./summary_pipeline_stages_to_run.ps" convertPs(psfile) tasks.comment() elapsedTime = int((time.time()) - t0) elapsedTime = str(datetime.timedelta(seconds=elapsedTime)) print("Time to complete the task ....." ) + colored (elapsedTime, "red")
def main(): t0 = time.time() print (" Starting time ..... :") + str(t0) tasks_torun = [prepare_analysis, prepareDB_file, create_gtf_file, indexGenomeFile, map_to_probes, format_count, combine_count_data, alignment_summary, combine_alignment_summary,plot_alignment_summary] pipeline_printout_graph('summary_pipeline_stages_to_run.ps', 'ps', tasks_torun, user_colour_scheme={"colour_scheme_index": 6}, no_key_legend=False, pipeline_name="TempO-seq Analysis", size=(11, 8), dpi = 30, forcedtorun_tasks = [indexGenomeFile, combine_count_data],draw_vertically=True, ignore_upstream_of_target=False) pipeline_run(["prepare_analysis", "prepareDB_file",'create_gtf_file', 'indexGenomeFile', 'map_to_probes','count_mapped_reads', 'combine_count_data', 'format_count', 'alignment_summary','combine_alignment_summary','plot_alignment_summary'],verbose = 1, multiprocess = cpuNum) print "....................." + resultDir tasks.comment() psfile = options.flowchart #psfile = "./summary_pipeline_stages_to_run.ps" convertPs(psfile) tasks.comment() elapsedTime = int((time.time()) - t0) elapsedTime = str(datetime.timedelta(seconds=elapsedTime)) print("Time to complete the task ....." ) + colored (elapsedTime, "red")
import sys import tasks import users import log if len(sys.argv) < 3: print('error: expected 2 arguments') print('usage: comment <path-to-task> <comments>') sys.exit() sys.argv[0] = 'comment' taskPath = sys.argv[1] comments = ' '.join(sys.argv[2:]) print("adding comment: " + comments) tasks.comment(taskPath, users.current, comments) log.add(' '.join(sys.argv)) print('comment added.')