コード例 #1
0
def makefile_local(name2sample, outdir, paired):
    final_files = []
    mlist = []

    if (paired):
        for name, pair in name2sample.items():
            input_files = pair
            output_files = [
                os.path.join(outdir, "%s.%d.fastq" % (name, x)) for x in (1, 2)
            ]
            script = get_script('collapse_paired.py',
                                seq_package,
                                arguments={
                                    '--output': os.path.join(outdir, name),
                                    '--log': args.log
                                },
                                inp=input_files)
            mlist.append(dependence(input_files, output_files, script))
            final_files.extend(output_files)

    else:
        for name, pair in name2sample.items():
            input_files = pair
            output_files = os.path.join(outdir, "%s.fastq" % name)
            script = get_script('collapse_single.py',
                                seq_package,
                                arguments={
                                    '--output': os.path.join(outdir, name),
                                    '--log': args.log
                                },
                                inp=input_files)
            mlist.append(dependence(input_files, output_files, script))
            final_files.append(output_files)

    mlist.insert(0, get_header(final_files))
    mlist.append('clean:\n\techo "nothing to clean."\n')

    return "\n\n".join(mlist)
コード例 #2
0
ファイル: rnaseq.py プロジェクト: xjyx/afp
def makefile_local(m_input, control):
    #print(m_input)
    #print(control)
    final_files = []
    mlist = []
    if (type(m_input) == str):
        name = os.path.basename(m_input).split(".")[0]
    else:
        name = os.path.basename(m_input[0]).split(".")[0]

    log_dir = os.path.join('log', name)
    os.makedirs(os.path.join(project_path, log_dir), exist_ok=True)
    log_file = os.path.join(log_dir, "log.txt")

    # Processing bowite2 settings
    bs_list = get_bowtie_call(bowtie_settings,
                              args.bowtie_args,
                              args.index,
                              m_input,
                              name,
                              threads=args.threads,
                              reads_format=args.reads_format)
    bs_list = ['echo', '\'###bowtie\'', '>', log_file, ';'
               ] + bs_list + ['2>> >(tee -a %s>&2)' % log_file]

    # Map reads with bowtie2
    input_files = m_input
    output_files = os.path.join('sam', '%s.sam' % name)
    script = bs_list
    #print(script)
    mlist.append(dependence(input_files, output_files, script))

    # Convert mappings into coverage
    input_files = output_files
    output_files = [
        os.path.join('coverage', '%s.%s.bed' % (name, x))
        for x in ['plus', 'minus']
    ]
    script = get_script('get_sam_stat_paired.py',
                        mapping_package,
                        arguments={
                            '--genome': args.genome,
                            '--outstat': log_dir,
                            '--outcoverage': 'coverage',
                            '--ambiguous': args.ambiguous
                        },
                        inp=input_files)
    mlist.append(dependence(input_files, output_files, script))

    # Assign TPM and annotate the mappings

    input_files = output_files
    output_files = os.path.join('transcripts', '%s.gff' % name)
    covpath = output_files
    script = get_script('assign_mappings.py',
                        mapping_package,
                        inp=input_files,
                        out=output_files,
                        arguments={
                            '--transcripts': args.annotation,
                            '--logdir': log_dir
                        })
    mlist.append(dependence(input_files, output_files, script))

    #Get header and cleaner for the makefile
    mlist.insert(0, get_header(output_files))
    mlist.append('clean:\n\techo "nothing to clean."\n')

    return "\n\n".join(mlist), name, [output_files]
コード例 #3
0
ファイル: rnaseq.py プロジェクト: xjyx/afp
        mf.write(local_makefile)

#######################################################################################################################
# Create Master makefile

mlist = []
final_files = []
mf_multipath = [os.path.join(project_path, 'makefiles', x) for x in mf_names]

for mf_name, mf_path, input_names in zip(mf_names, mf_multipath, input_list):
    if (type(input_names) == str):
        input_names = [input_names]
    input_files = [mf_path] + list(input_names)
    output_files = mf_name
    script = ["$(MAKE)", '-f', '$<']
    mlist.append(dependence(input_files, output_files, script))

if (args.multi):
    all_coverages = [x[0] for x in all_outputs]
    all_peaks = [x[1] for x in all_outputs]

    input_files = mf_names
    output_files = os.path.join('regions', 'regions.gff')
    script = get_script('merge_peaks.py',
                        chap_package,
                        arguments={
                            '--coverage': all_coverages,
                            '--zscore': region_settings['zscore'],
                            '--flank': region_settings['flank']
                        },
                        inp=all_peaks,
コード例 #4
0
ファイル: execute.py プロジェクト: afilipch/afp
def makefile_local(m_input,  control):
    #print(m_input)
    #print(control)
    todel = []
    final_files = []
    mlist=[];
    if(type(m_input) == str):
        name = os.path.basename(m_input).split(".")[0];
    else:
        name = os.path.basename(m_input[0]).split(".")[0];
    
    log_dir = os.path.join('log', name)
    os.makedirs(os.path.join(project_path, log_dir), exist_ok=True);
    log_file = os.path.join(log_dir, "log.txt")
    

    # Processing bowite2 settings
    bs_list = get_bowtie_call(bowtie_settings, args.bowtie_args, args.index, m_input, name, threads=args.threads, reads_format=args.reads_format)
    bs_list = ['echo', '\'###bowtie\'', '>', log_file, ';'] + bs_list + ['2>> >(tee -a %s>&2)' % log_file]

    # Map reads with bowtie2
    input_files = m_input
    output_files = os.path.join('sam', '%s.sam' % name)
    todel.append(output_files)
    script = bs_list
    #print(script)
    mlist.append(dependence(input_files, output_files, script))
    
    # Convert mappings into coverage
    input_files = output_files;
    output_files = [os.path.join('coverage', '%s.%s.bed' % (name, x)) for x in ['minus', 'plus']]
    arguments = {'--genome': args.genome, '--outstat': log_dir, '--outcoverage': 'coverage', '--ambiguous': args.ambiguous};
    if(args.paired):
        arguments['--paired'] = True;
    script = get_script('get_sam_stat_paired.py', mapping_package, arguments=arguments, inp = input_files)
    mlist.append(dependence(input_files, output_files, script));   
    
    # Merge coverages coming from different strands
    input_files = output_files;
    output_files = os.path.join('coverage', '%s.bed' % name)
    covpath = output_files
    script = get_script('merge_coverages.py', chap_package, inp = input_files, out = output_files)
    mlist.append(dependence(input_files, output_files, script));

        
    if(control):
        log_dir_control = os.path.join('log', name + "_control")
        os.makedirs(os.path.join(project_path, log_dir_control), exist_ok=True);
    
        # Processing of the left chimeric part bowite2 settings
        bs_list = get_bowtie_call(bowtie_settings, args.bowtie_args, args.index, control, "%s.control" % name, threads=args.threads)

        # Map reads with bowtie2
        input_files = control
        output_files = os.path.join('sam', '%s.control.sam' % name)
        todel.append(output_files)
        bs_list = ['echo', '\'###bowtie_control\'', '>>', log_file, ';'] + bs_list + ['2>> >(tee -a %s>&2)' % log_file]
        script = bs_list
        mlist.append(dependence(input_files, output_files, script))
        
        # Convert mappings into coverage
        input_files = output_files;
        output_files = [os.path.join('coverage', '%s.control.%s.bed' % (name, x)) for x in ['minus', 'plus']]
        arguments = {'--genome': args.genome, '--outstat': log_dir_control, '--outcoverage': 'coverage'};
        if(args.paired):
            arguments['--paired'] = True;
        script = get_script('get_sam_stat_paired.py', mapping_package, arguments=arguments, inp = input_files)
        mlist.append(dependence(input_files, output_files, script));   
        
        # Merge coverages coming from different strands
        input_files = output_files;
        output_files = os.path.join('coverage', '%s.control.bed' % name)
        script = get_script('merge_coverages.py', chap_package, inp = input_files, out = output_files)
        mlist.append(dependence(input_files, output_files, script));
        
        input_files = [covpath, output_files]
        output_files = os.path.join('coverage', '%s.adjusted.bed' % name)
        covpath = output_files
        script = get_script('adjust_coverage_to_control.py', chap_package, inp = input_files[0], arguments={'--control': input_files[1], '--outdir': log_dir }, out = output_files)
        mlist.append(dependence(input_files, output_files, script));        
        
    
    # Detect peaks
    input_files = output_files
    output_files = [os.path.join('peaks', '%s.raw.bed' % name), os.path.join(log_dir, 'convolution.bed'), os.path.join(log_dir, 'convolution.png')]
    script = get_script('detect_peaks.py', chap_package, arguments={'--threads': args.threads, '--widthfactor': peak_detection_settings['widthfactor'], '--meanmult': peak_detection_settings['meanmult'], '--convolution': output_files[1] , '--plot': output_files[2]}, inp = input_files, out = output_files[0], log=log_file)
    mlist.append(dependence(input_files, output_files, script));  
    
    # Filter peaks
    input_files = output_files[0]
    output_files = [os.path.join('peaks', '%s.filtered.bed' % name), os.path.join("peaks", '%s.assigned.tsv' % name), os.path.join(log_dir, 'filtering.png')]
    filtered_path = output_files[0]
    script = get_script('filter_peaks.py', chap_package, arguments={'--zscore': peak_filtering_settings['zscore'], '--minmedian': peak_filtering_settings['minmedian'], '-ap': output_files[1] , '--plot': output_files[2], '--coverage': covpath}, inp = input_files, out = output_files[0], log=log_file)
    mlist.append(dependence(input_files, output_files, script));
    
    # Normalize coverage
    input_files = [output_files[1], covpath]
    output_files = os.path.join('coverage', '%s.normalized.bed' % name)
    normed_covpath = output_files;
    script = get_script('normalize_coverage.py', chap_package, arguments={'--zscore': coverage_settings['zscore'], '--mode': coverage_settings['mode'], '--coverage': input_files[1]}, inp = input_files[0], out = output_files)
    mlist.append(dependence(input_files, output_files, script));
    
    # Create UCSC tracks
    trackopts = "\'track name=%s description=\"CHAP seq genomic coverage for sample %s\" %s\'" % (name, name, " ".join(["%s=%s" % x for x in coverage_settings['trackopts'].items()]))
    input_files = output_files
    output_files = os.path.join('ucsc', '%s.bedgraph' % name)
    final_files.append(output_files)
    script = get_script('coverage2bedgraph.py', mapping_package, arguments={'--multiplier': coverage_settings['multiplier'], '--convert': True, '--trackopts': trackopts}, inp = input_files, out = output_files)
    mlist.append(dependence(input_files, output_files, script));
    
    
    
    # Annotate peaks
    #if(not multi):
    input_files = [filtered_path, normed_covpath]
    output_files = os.path.join('peaks', '%s.annotated.gff' % name)
    local_arguments = {'--coverage': input_files[1], '--outdir': log_dir}
    if(args.annotation):
        local_arguments['--transcripts'] = args.annotation
    script = get_script('annotate.py', chap_package, arguments=local_arguments, inp = input_files[0], out = output_files)
    mlist.append(dependence(input_files, output_files, script)); 
    
    
    #Create html report
    #python ~/afp/chap/log_html.py log/sven3_18h --css ~/afp/afbio/html/table.css > test.html
    input_files = [log_dir, output_files]
    output_files = os.path.join(log_dir, 'report.html');
    final_files.append(output_files)
    arguments = {'--css': os.path.join(html_lib, 'table.css')}
    if(args.paired):
        arguments['--paired'] = True;
    script = get_script('log_html.py', chap_package, arguments=arguments, inp = input_files[0], out = output_files)
    mlist.append(dependence(input_files, output_files, script));
    
    if(args.annotation):
        input_files = os.path.join('peaks', '%s.annotated.gff' % name)
        output_files = os.path.join(log_dir, 'peaks.html'), os.path.join(log_dir, 'peaks.tsv');
        final_files.extend(output_files)
        script = get_script('html_annotated_peaks.py', chap_package, arguments={'--css': os.path.join(html_lib, 'table.css'), '--js': os.path.join(html_lib, 'table.js'), '--ucsc': args.ucsc, '--name': name, '--outdir': log_dir, '--genome': args.genome}, inp = input_files)
        mlist.append(dependence(input_files, output_files, script));


    
    #Get header and cleaner for the makefile
    mlist.insert(0, get_header(final_files))
    todel = "\n\t".join( ['rm %s' % x  for x in todel] )
    mlist.append('clean:\n\t%s\n' % todel);

    return "\n\n".join(mlist), name, [normed_covpath] + [filtered_path]
コード例 #5
0
ファイル: execute_chimera.py プロジェクト: afilipch/afp
def makefile_local(m_input):
    todel = []
    final_files = []
    mlist = []
    if (type(m_input) == str):
        name = os.path.basename(m_input).split(".")[0]
    else:
        name = os.path.basename(m_input[0]).split(".")[0]

    log_dir = os.path.join('log', name)
    os.makedirs(os.path.join(project_path, log_dir), exist_ok=True)
    log_file = os.path.join(log_dir, "log.txt")

    # Processing bowite2 settings
    bs_list = get_bowtie_call(bowtie_settings,
                              args.bowtie_args,
                              args.index,
                              m_input,
                              name,
                              threads=args.threads,
                              reads_format=args.reads_format)
    bs_list = ['echo', '\'###bowtie\'', '>', log_file, ';'
               ] + bs_list + ['2>> >(tee -a %s>&2)' % log_file]

    # Map reads with bowtie2
    input_files = m_input
    output_files = os.path.join('sam', '%s.sam' % name)
    todel.append(output_files)
    script = bs_list
    #print(script)
    mlist.append(dependence(input_files, output_files, script))

    # Demultiplex mapping hits into single and chimeric reads
    input_files = output_files  # SAM FILE
    output_files = os.path.join('chimeras', '%s.bed' % name)
    script = get_script('demultiplex_chimera.py',
                        arguments={
                            '--maxgap':
                            chimera_settings['maxgap'],
                            '--s_distance':
                            chimera_settings['s_distance'],
                            '--ch_distance':
                            chimera_settings['ch_distance'],
                            '--splice_distance':
                            chimera_settings['splice_distance'],
                            '--maxoverlap':
                            chimera_settings['maxoverlap']
                        },
                        inp=input_files,
                        out=output_files,
                        package=chimera_package)
    mlist.append(dependence(input_files, output_files, script))

    #Annotate chimeras with their types
    input_files = output_files
    output_files = os.path.join('chimeras', '%s.annotated.gff' % name)
    script = get_script('annotate_novel.py',
                        arguments={
                            '--reverse': True,
                            '--reference': args.genome
                        },
                        inp=input_files,
                        out=output_files,
                        package=chimera_package)
    mlist.append(dependence(input_files, output_files, script))

    #Annotate chimeras with their types
    input_files = output_files
    output_files = os.path.join('chimeras', '%s.sorted.gff' % name)
    script = get_script('sort.py',
                        arguments={},
                        inp=input_files,
                        out=output_files,
                        package=bin_package)
    mlist.append(dependence(input_files, output_files, script))

    #Merge sam hits into chimeras in doublebed format
    input_files = output_files
    output_files = os.path.join('interactions', '%s.gff' % name)
    script = get_script('collapse2interaction.py',
                        arguments={
                            '--name':
                            name,
                            '--dictionary':
                            os.path.join('interactions', '%s.dict.tsv' % name)
                        },
                        inp=input_files,
                        out=output_files,
                        package=chimera_package)
    mlist.append(dependence(input_files, output_files, script))

    final_files.append(output_files)
    #Get header and cleaner for the makefile
    mlist.insert(0, get_header(final_files))
    todel = "\n\t".join(['rm %s' % x for x in todel])
    mlist.append('clean:\n\t%s\n' % todel)

    return "\n\n".join(mlist), name, final_files
コード例 #6
0
def makefile_local(m_input,  control):
    #print(m_input)
    #print(control)
    todel = []
    final_files = []
    mlist=[];
    if(type(m_input) == str):
        name = os.path.basename(m_input).split(".")[0];
    else:
        name = os.path.basename(m_input[0]).split(".")[0];
    
    log_dir = os.path.join('log', name)
    os.makedirs(os.path.join(project_path, log_dir), exist_ok=True);
    log_file = os.path.join(log_dir, "log.txt")
    

    # Processing bowite2 settings
    bs_list = get_bowtie_call(bowtie_settings, args.bowtie_args, args.index, m_input, name, threads=args.threads, reads_format=args.reads_format)
    bs_list = ['echo', '\'###bowtie\'', '>', log_file, ';'] + bs_list + ['2>> >(tee -a %s>&2)' % log_file]

    # Map reads with bowtie2
    input_files = m_input
    output_files = os.path.join('sam', '%s.sam' % name)
    todel.append(output_files)
    script = bs_list
    #print(script)
    mlist.append(dependence(input_files, output_files, script))
    
    # Convert mappings into coverage
    input_files = output_files;
    output_files = [os.path.join('coverage', '%s.%s.bed' % (name, x)) for x in ['plus', 'minus']]
    arguments = {'--genome': args.genome, '--outstat': log_dir, '--outcoverage': 'coverage', '--ambiguous': args.ambiguous}
    if(args.collapsed):
        arguments['--collapsed'] = 'True'
    script = get_script('get_sam_stat_paired.py', mapping_package, arguments=arguments, inp = input_files)
    mlist.append(dependence(input_files, output_files, script));   
    
    
    # UCSC coverage
    for of, strand in zip(output_files, ['plus', 'minus']):
        trackopts = "\'track name=%s_%s description=\"CHAP seq genomic coverage for sample %s\" %s\'" % (name, strand, name, " ".join(["%s=%s" % x for x in coverage_settings['trackopts'].items()]))
        input_files = of
        output_files = os.path.join('ucsc', '%s_%s.bedgraph' % (name, strand))
        final_files.append(output_files)
        script = get_script('coverage2bedgraph.py', mapping_package, arguments={'--multiplier': coverage_settings['multiplier'], '--convert': True, '--trackopts': trackopts}, inp = input_files, out = output_files)
        mlist.append(dependence(input_files, output_files, script));
    
    
    # Assign TPM and annotate the mappings
    
    input_files = [os.path.join('coverage', '%s.%s.bed' % (name, x)) for x in ['plus', 'minus']]
    output_files = os.path.join('transcripts', '%s.gff' % name)
    covpath = output_files
    script = get_script('assign_mappings.py', mapping_package, inp = input_files, out = output_files, arguments={'--transcripts': args.annotation, '--logdir': log_dir} )
    mlist.append(dependence(input_files, output_files, script));
            


    final_files.append(output_files)
    #Get header and cleaner for the makefile
    mlist.insert(0, get_header(final_files))
    todel = "\n\t".join( ['rm %s' % x  for x in todel] )
    mlist.append('clean:\n\t%s\n' % todel);

    return "\n\n".join(mlist), name, final_files