def makefile_local(name2sample, outdir, paired): final_files = [] mlist = [] if (paired): for name, pair in name2sample.items(): input_files = pair output_files = [ os.path.join(outdir, "%s.%d.fastq" % (name, x)) for x in (1, 2) ] script = get_script('collapse_paired.py', seq_package, arguments={ '--output': os.path.join(outdir, name), '--log': args.log }, inp=input_files) mlist.append(dependence(input_files, output_files, script)) final_files.extend(output_files) else: for name, pair in name2sample.items(): input_files = pair output_files = os.path.join(outdir, "%s.fastq" % name) script = get_script('collapse_single.py', seq_package, arguments={ '--output': os.path.join(outdir, name), '--log': args.log }, inp=input_files) mlist.append(dependence(input_files, output_files, script)) final_files.append(output_files) mlist.insert(0, get_header(final_files)) mlist.append('clean:\n\techo "nothing to clean."\n') return "\n\n".join(mlist)
def makefile_local(m_input, control): #print(m_input) #print(control) final_files = [] mlist = [] if (type(m_input) == str): name = os.path.basename(m_input).split(".")[0] else: name = os.path.basename(m_input[0]).split(".")[0] log_dir = os.path.join('log', name) os.makedirs(os.path.join(project_path, log_dir), exist_ok=True) log_file = os.path.join(log_dir, "log.txt") # Processing bowite2 settings bs_list = get_bowtie_call(bowtie_settings, args.bowtie_args, args.index, m_input, name, threads=args.threads, reads_format=args.reads_format) bs_list = ['echo', '\'###bowtie\'', '>', log_file, ';' ] + bs_list + ['2>> >(tee -a %s>&2)' % log_file] # Map reads with bowtie2 input_files = m_input output_files = os.path.join('sam', '%s.sam' % name) script = bs_list #print(script) mlist.append(dependence(input_files, output_files, script)) # Convert mappings into coverage input_files = output_files output_files = [ os.path.join('coverage', '%s.%s.bed' % (name, x)) for x in ['plus', 'minus'] ] script = get_script('get_sam_stat_paired.py', mapping_package, arguments={ '--genome': args.genome, '--outstat': log_dir, '--outcoverage': 'coverage', '--ambiguous': args.ambiguous }, inp=input_files) mlist.append(dependence(input_files, output_files, script)) # Assign TPM and annotate the mappings input_files = output_files output_files = os.path.join('transcripts', '%s.gff' % name) covpath = output_files script = get_script('assign_mappings.py', mapping_package, inp=input_files, out=output_files, arguments={ '--transcripts': args.annotation, '--logdir': log_dir }) mlist.append(dependence(input_files, output_files, script)) #Get header and cleaner for the makefile mlist.insert(0, get_header(output_files)) mlist.append('clean:\n\techo "nothing to clean."\n') return "\n\n".join(mlist), name, [output_files]
inp='log', out=output_files) mlist.append(dependence(input_files, output_files, script)) #python /home/a_filipchyk/afp/chap/annotate.py regions/regions.gff --maxshift 50 --flen 50 --genes /home/a_filipchyk/genomic_data/coryne/annotation/improved_annotation_2017.gff ###CHANGE if (False and args.annotation): input_files = os.path.join('regions', 'regions.gff') output_files = os.path.join('regions', 'regions.annotated.gff') final_files.append(output_files) script = get_script('annotate.py', chap_package, arguments={ '--maxshift': region_settings['maxshift'], '--flen': region_settings['flank'], '--genes': args.annotation }, inp=input_files, out=output_files) mlist.append(dependence(input_files, output_files, script)) #makefile header mlist.insert(0, get_header(final_files, phonyfiles=mf_names)) # makefie cleaner mlist.append('clean:\n%s' % ("\n".join(["\t$(MAKE) -f %s clean" % x for x in mf_multipath]))) with open(os.path.join(project_path, 'Makefile'), 'w') as mf: mf.write("\n\n".join(mlist))
def makefile_local(m_input, control): #print(m_input) #print(control) todel = [] final_files = [] mlist=[]; if(type(m_input) == str): name = os.path.basename(m_input).split(".")[0]; else: name = os.path.basename(m_input[0]).split(".")[0]; log_dir = os.path.join('log', name) os.makedirs(os.path.join(project_path, log_dir), exist_ok=True); log_file = os.path.join(log_dir, "log.txt") # Processing bowite2 settings bs_list = get_bowtie_call(bowtie_settings, args.bowtie_args, args.index, m_input, name, threads=args.threads, reads_format=args.reads_format) bs_list = ['echo', '\'###bowtie\'', '>', log_file, ';'] + bs_list + ['2>> >(tee -a %s>&2)' % log_file] # Map reads with bowtie2 input_files = m_input output_files = os.path.join('sam', '%s.sam' % name) todel.append(output_files) script = bs_list #print(script) mlist.append(dependence(input_files, output_files, script)) # Convert mappings into coverage input_files = output_files; output_files = [os.path.join('coverage', '%s.%s.bed' % (name, x)) for x in ['minus', 'plus']] arguments = {'--genome': args.genome, '--outstat': log_dir, '--outcoverage': 'coverage', '--ambiguous': args.ambiguous}; if(args.paired): arguments['--paired'] = True; script = get_script('get_sam_stat_paired.py', mapping_package, arguments=arguments, inp = input_files) mlist.append(dependence(input_files, output_files, script)); # Merge coverages coming from different strands input_files = output_files; output_files = os.path.join('coverage', '%s.bed' % name) covpath = output_files script = get_script('merge_coverages.py', chap_package, inp = input_files, out = output_files) mlist.append(dependence(input_files, output_files, script)); if(control): log_dir_control = os.path.join('log', name + "_control") os.makedirs(os.path.join(project_path, log_dir_control), exist_ok=True); # Processing of the left chimeric part bowite2 settings bs_list = get_bowtie_call(bowtie_settings, args.bowtie_args, args.index, control, "%s.control" % name, threads=args.threads) # Map reads with bowtie2 input_files = control output_files = os.path.join('sam', '%s.control.sam' % name) todel.append(output_files) bs_list = ['echo', '\'###bowtie_control\'', '>>', log_file, ';'] + bs_list + ['2>> >(tee -a %s>&2)' % log_file] script = bs_list mlist.append(dependence(input_files, output_files, script)) # Convert mappings into coverage input_files = output_files; output_files = [os.path.join('coverage', '%s.control.%s.bed' % (name, x)) for x in ['minus', 'plus']] arguments = {'--genome': args.genome, '--outstat': log_dir_control, '--outcoverage': 'coverage'}; if(args.paired): arguments['--paired'] = True; script = get_script('get_sam_stat_paired.py', mapping_package, arguments=arguments, inp = input_files) mlist.append(dependence(input_files, output_files, script)); # Merge coverages coming from different strands input_files = output_files; output_files = os.path.join('coverage', '%s.control.bed' % name) script = get_script('merge_coverages.py', chap_package, inp = input_files, out = output_files) mlist.append(dependence(input_files, output_files, script)); input_files = [covpath, output_files] output_files = os.path.join('coverage', '%s.adjusted.bed' % name) covpath = output_files script = get_script('adjust_coverage_to_control.py', chap_package, inp = input_files[0], arguments={'--control': input_files[1], '--outdir': log_dir }, out = output_files) mlist.append(dependence(input_files, output_files, script)); # Detect peaks input_files = output_files output_files = [os.path.join('peaks', '%s.raw.bed' % name), os.path.join(log_dir, 'convolution.bed'), os.path.join(log_dir, 'convolution.png')] script = get_script('detect_peaks.py', chap_package, arguments={'--threads': args.threads, '--widthfactor': peak_detection_settings['widthfactor'], '--meanmult': peak_detection_settings['meanmult'], '--convolution': output_files[1] , '--plot': output_files[2]}, inp = input_files, out = output_files[0], log=log_file) mlist.append(dependence(input_files, output_files, script)); # Filter peaks input_files = output_files[0] output_files = [os.path.join('peaks', '%s.filtered.bed' % name), os.path.join("peaks", '%s.assigned.tsv' % name), os.path.join(log_dir, 'filtering.png')] filtered_path = output_files[0] script = get_script('filter_peaks.py', chap_package, arguments={'--zscore': peak_filtering_settings['zscore'], '--minmedian': peak_filtering_settings['minmedian'], '-ap': output_files[1] , '--plot': output_files[2], '--coverage': covpath}, inp = input_files, out = output_files[0], log=log_file) mlist.append(dependence(input_files, output_files, script)); # Normalize coverage input_files = [output_files[1], covpath] output_files = os.path.join('coverage', '%s.normalized.bed' % name) normed_covpath = output_files; script = get_script('normalize_coverage.py', chap_package, arguments={'--zscore': coverage_settings['zscore'], '--mode': coverage_settings['mode'], '--coverage': input_files[1]}, inp = input_files[0], out = output_files) mlist.append(dependence(input_files, output_files, script)); # Create UCSC tracks trackopts = "\'track name=%s description=\"CHAP seq genomic coverage for sample %s\" %s\'" % (name, name, " ".join(["%s=%s" % x for x in coverage_settings['trackopts'].items()])) input_files = output_files output_files = os.path.join('ucsc', '%s.bedgraph' % name) final_files.append(output_files) script = get_script('coverage2bedgraph.py', mapping_package, arguments={'--multiplier': coverage_settings['multiplier'], '--convert': True, '--trackopts': trackopts}, inp = input_files, out = output_files) mlist.append(dependence(input_files, output_files, script)); # Annotate peaks #if(not multi): input_files = [filtered_path, normed_covpath] output_files = os.path.join('peaks', '%s.annotated.gff' % name) local_arguments = {'--coverage': input_files[1], '--outdir': log_dir} if(args.annotation): local_arguments['--transcripts'] = args.annotation script = get_script('annotate.py', chap_package, arguments=local_arguments, inp = input_files[0], out = output_files) mlist.append(dependence(input_files, output_files, script)); #Create html report #python ~/afp/chap/log_html.py log/sven3_18h --css ~/afp/afbio/html/table.css > test.html input_files = [log_dir, output_files] output_files = os.path.join(log_dir, 'report.html'); final_files.append(output_files) arguments = {'--css': os.path.join(html_lib, 'table.css')} if(args.paired): arguments['--paired'] = True; script = get_script('log_html.py', chap_package, arguments=arguments, inp = input_files[0], out = output_files) mlist.append(dependence(input_files, output_files, script)); if(args.annotation): input_files = os.path.join('peaks', '%s.annotated.gff' % name) output_files = os.path.join(log_dir, 'peaks.html'), os.path.join(log_dir, 'peaks.tsv'); final_files.extend(output_files) script = get_script('html_annotated_peaks.py', chap_package, arguments={'--css': os.path.join(html_lib, 'table.css'), '--js': os.path.join(html_lib, 'table.js'), '--ucsc': args.ucsc, '--name': name, '--outdir': log_dir, '--genome': args.genome}, inp = input_files) mlist.append(dependence(input_files, output_files, script)); #Get header and cleaner for the makefile mlist.insert(0, get_header(final_files)) todel = "\n\t".join( ['rm %s' % x for x in todel] ) mlist.append('clean:\n\t%s\n' % todel); return "\n\n".join(mlist), name, [normed_covpath] + [filtered_path]
def makefile_local(m_input): todel = [] final_files = [] mlist = [] if (type(m_input) == str): name = os.path.basename(m_input).split(".")[0] else: name = os.path.basename(m_input[0]).split(".")[0] log_dir = os.path.join('log', name) os.makedirs(os.path.join(project_path, log_dir), exist_ok=True) log_file = os.path.join(log_dir, "log.txt") # Processing bowite2 settings bs_list = get_bowtie_call(bowtie_settings, args.bowtie_args, args.index, m_input, name, threads=args.threads, reads_format=args.reads_format) bs_list = ['echo', '\'###bowtie\'', '>', log_file, ';' ] + bs_list + ['2>> >(tee -a %s>&2)' % log_file] # Map reads with bowtie2 input_files = m_input output_files = os.path.join('sam', '%s.sam' % name) todel.append(output_files) script = bs_list #print(script) mlist.append(dependence(input_files, output_files, script)) # Demultiplex mapping hits into single and chimeric reads input_files = output_files # SAM FILE output_files = os.path.join('chimeras', '%s.bed' % name) script = get_script('demultiplex_chimera.py', arguments={ '--maxgap': chimera_settings['maxgap'], '--s_distance': chimera_settings['s_distance'], '--ch_distance': chimera_settings['ch_distance'], '--splice_distance': chimera_settings['splice_distance'], '--maxoverlap': chimera_settings['maxoverlap'] }, inp=input_files, out=output_files, package=chimera_package) mlist.append(dependence(input_files, output_files, script)) #Annotate chimeras with their types input_files = output_files output_files = os.path.join('chimeras', '%s.annotated.gff' % name) script = get_script('annotate_novel.py', arguments={ '--reverse': True, '--reference': args.genome }, inp=input_files, out=output_files, package=chimera_package) mlist.append(dependence(input_files, output_files, script)) #Annotate chimeras with their types input_files = output_files output_files = os.path.join('chimeras', '%s.sorted.gff' % name) script = get_script('sort.py', arguments={}, inp=input_files, out=output_files, package=bin_package) mlist.append(dependence(input_files, output_files, script)) #Merge sam hits into chimeras in doublebed format input_files = output_files output_files = os.path.join('interactions', '%s.gff' % name) script = get_script('collapse2interaction.py', arguments={ '--name': name, '--dictionary': os.path.join('interactions', '%s.dict.tsv' % name) }, inp=input_files, out=output_files, package=chimera_package) mlist.append(dependence(input_files, output_files, script)) final_files.append(output_files) #Get header and cleaner for the makefile mlist.insert(0, get_header(final_files)) todel = "\n\t".join(['rm %s' % x for x in todel]) mlist.append('clean:\n\t%s\n' % todel) return "\n\n".join(mlist), name, final_files
def makefile_local(m_input, control): #print(m_input) #print(control) todel = [] final_files = [] mlist=[]; if(type(m_input) == str): name = os.path.basename(m_input).split(".")[0]; else: name = os.path.basename(m_input[0]).split(".")[0]; log_dir = os.path.join('log', name) os.makedirs(os.path.join(project_path, log_dir), exist_ok=True); log_file = os.path.join(log_dir, "log.txt") # Processing bowite2 settings bs_list = get_bowtie_call(bowtie_settings, args.bowtie_args, args.index, m_input, name, threads=args.threads, reads_format=args.reads_format) bs_list = ['echo', '\'###bowtie\'', '>', log_file, ';'] + bs_list + ['2>> >(tee -a %s>&2)' % log_file] # Map reads with bowtie2 input_files = m_input output_files = os.path.join('sam', '%s.sam' % name) todel.append(output_files) script = bs_list #print(script) mlist.append(dependence(input_files, output_files, script)) # Convert mappings into coverage input_files = output_files; output_files = [os.path.join('coverage', '%s.%s.bed' % (name, x)) for x in ['plus', 'minus']] arguments = {'--genome': args.genome, '--outstat': log_dir, '--outcoverage': 'coverage', '--ambiguous': args.ambiguous} if(args.collapsed): arguments['--collapsed'] = 'True' script = get_script('get_sam_stat_paired.py', mapping_package, arguments=arguments, inp = input_files) mlist.append(dependence(input_files, output_files, script)); # UCSC coverage for of, strand in zip(output_files, ['plus', 'minus']): trackopts = "\'track name=%s_%s description=\"CHAP seq genomic coverage for sample %s\" %s\'" % (name, strand, name, " ".join(["%s=%s" % x for x in coverage_settings['trackopts'].items()])) input_files = of output_files = os.path.join('ucsc', '%s_%s.bedgraph' % (name, strand)) final_files.append(output_files) script = get_script('coverage2bedgraph.py', mapping_package, arguments={'--multiplier': coverage_settings['multiplier'], '--convert': True, '--trackopts': trackopts}, inp = input_files, out = output_files) mlist.append(dependence(input_files, output_files, script)); # Assign TPM and annotate the mappings input_files = [os.path.join('coverage', '%s.%s.bed' % (name, x)) for x in ['plus', 'minus']] output_files = os.path.join('transcripts', '%s.gff' % name) covpath = output_files script = get_script('assign_mappings.py', mapping_package, inp = input_files, out = output_files, arguments={'--transcripts': args.annotation, '--logdir': log_dir} ) mlist.append(dependence(input_files, output_files, script)); final_files.append(output_files) #Get header and cleaner for the makefile mlist.insert(0, get_header(final_files)) todel = "\n\t".join( ['rm %s' % x for x in todel] ) mlist.append('clean:\n\t%s\n' % todel); return "\n\n".join(mlist), name, final_files