コード例 #1
0
def align_pe(fastq, sai, reference, fastq_basename):
    '''Use BWA to align PE data.'''

    sam_filename = "%s.sam" % (fastq_basename)
    badcigar_filename = "%s.badReads" % (fastq_basename)
    bam_filename = '%s.bam' % (fastq_basename)

    # Remove read pairs with bad CIGAR strings and sort by position
    steps = [
        "bwa sampe -P %s %s %s %s %s" %
        (reference, sai[0], sai[1], fastq[0], fastq[1]),
        "tee %s" % (sam_filename),
        r"""awk 'BEGIN {FS="\t" ; OFS="\t"} ! /^@/ && $6!="*" { cigar=$6; gsub("[0-9]+D","",cigar); n = split(cigar,vals,"[A-Z]"); s = 0; for (i=1;i<=n;i++) s=s+vals[i]; seqlen=length($10) ; if (s!=seqlen) print $1"\t" ; }'""",
        "sort", "uniq"
    ]

    out, err = utils.run_pipe(steps, badcigar_filename)
    if err:
        logger.error("sampe error: %s", err)

    steps = [
        "cat %s" % (sam_filename),
        "grep -v -F -f %s" % (badcigar_filename),
        "samtools view -@%d -Su -" % (cpu_count()),
        "samtools sort -@%d -o %s" % (cpu_count(), bam_filename)
    ]

    out, err = utils.run_pipe(steps)
    if err:
        logger.error("samtools error: %s", err)

    return bam_filename
コード例 #2
0
ファイル: xcor.py プロジェクト: JAMKuttan/ChIPseq_Analysis
def xcor(tag, paired):
    '''Use spp to calculate cross-correlation stats.'''

    tag_basename = os.path.basename(
        utils.strip_extensions(tag, STRIP_EXTENSIONS))
    uncompressed_tag_filename = tag_basename

    # Subsample tagAlign file
    number_reads = 20000000
    subsampled_tag_filename = \
        tag_basename + ".%d.tagAlign.gz" % (number_reads/1000000)

    tag_extended = 'cat.tagAlign.gz'
    out, err = utils.run_pipe(["zcat %s %s %s" % (tag, tag, tag)],
                              outfile=tag_extended)

    steps = [
        'zcat %s' % (tag), 'grep -v "chrM"',
        'shuf -n %d --random-source=%s' % (number_reads, tag_extended)
    ]

    if paired:
        steps.extend([r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'"""])

    steps.extend(['gzip -nc'])

    out, err = utils.run_pipe(steps, outfile=subsampled_tag_filename)

    # Calculate Cross-correlation QC scores
    cc_scores_filename = tag_basename + ".cc.qc"
    cc_plot_filename = tag_basename + ".cc.plot.pdf"

    # CC_SCORE FILE format
    # Filename <tab>
    # numReads <tab>
    # estFragLen <tab>
    # corr_estFragLen <tab>
    # PhantomPeak <tab>
    # corr_phantomPeak <tab>
    # argmin_corr <tab>
    # min_corr <tab>
    # phantomPeakCoef <tab>
    # relPhantomPeakCoef <tab>
    # QualityTag

    run_spp_command = shutil.which("run_spp.R")
    out, err = utils.run_pipe([
        "Rscript %s -c=%s -p=%d -filtchr=chrM -savp=%s -out=%s" %
        (run_spp_command, subsampled_tag_filename, cpu_count(),
         cc_plot_filename, cc_scores_filename)
    ])

    return cc_scores_filename
コード例 #3
0
def test_run_one_step(steps_1, capsys):
    check_output = 'ENCLB144FDT\tENCSR238SGC\tlimb\tH3K4me1\tNone\t1\tENCLB304SBJ\tENCFF833BLU.fastq.gz'.encode(
        'UTF-8')
    out, err = utils.run_pipe(steps_1)
    output, errors = capsys.readouterr()
    assert "first step shlex to stdout" in output
    assert check_output in out
コード例 #4
0
def test_run_last_step_file(steps_2, capsys, tmpdir):
    check_output = 'ENCFF833BLU.fastq.gz\nENCFF646LXU.fastq.gz'
    tmp_outfile = tmpdir.join('output.txt')
    out, err = utils.run_pipe(steps_2, tmp_outfile.strpath)
    output, errors = capsys.readouterr()
    assert "last step shlex" in output
    assert check_output in tmp_outfile.read()
コード例 #5
0
def convert_mapped(bam, tag_filename):
    '''Use bedtools to convert to tagAlign.'''

    out, err = utils.run_pipe([
        "bamToBed -i %s" % (bam),
        r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'""", "gzip -nc"
    ],
                              outfile=tag_filename)
コード例 #6
0
def compute_complexity(bam, paired, bam_basename):
    '''Calculate library complexity .'''

    pbc_file_qc_filename = bam_basename + ".pbc.qc"
    tmp_pbc_file_qc_filename = "tmp.%s" % (pbc_file_qc_filename)

    # Sort by name
    # convert to bedPE and obtain fragment coordinates
    # sort by position and strand
    # Obtain unique count statistics

    # PBC File output
    # Sample Name[tab]
    # TotalReadPairs [tab]
    # DistinctReadPairs [tab]
    # OneReadPair [tab]
    # TwoReadPairs [tab]
    # NRF=Distinct/Total [tab]
    # PBC1=OnePair/Distinct [tab]
    # PBC2=OnePair/TwoPair
    pbc_headers = ['TotalReadPairs',
                   'DistinctReadPairs',
                   'OneReadPair',
                   'TwoReadPairs',
                   'NRF',
                   'PBC1',
                   'PBC2']

    if paired:
        steps = [
            "samtools sort -@%d -n %s" % (cpu_count(), bam),
            "bamToBed -bedpe -i stdin",
            r"""awk 'BEGIN{OFS="\t"}{print $1,$2,$4,$6,$9,$10}'"""]
    else:
        steps = [
            "bamToBed -i %s" % (bam),
            r"""awk 'BEGIN{OFS="\t"}{print $1,$2,$3,$6}'"""]
    steps.extend([
        "grep -v 'chrM'",
        "sort",
        "uniq -c",
        r"""awk 'BEGIN{mt=0;m0=0;m1=0;m2=0} ($1==1){m1=m1+1} ($1==2){m2=m2+1} {m0=m0+1} {mt=mt+$1} END{printf "%d\t%d\t%d\t%d\t%f\t%f\t%f\n",mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}'"""
        ])
    out, err = utils.run_pipe(steps, tmp_pbc_file_qc_filename)
    if err:
        logger.error("PBC file error: %s", err)

    # Add Sample Name and headers
    pbc_file = pd.read_csv(tmp_pbc_file_qc_filename, sep='\t', header=None,
                           names=pbc_headers)
    pbc_file['Sample'] = bam_basename
    pbc_headers_new = list(pbc_file)
    pbc_headers_new.insert(0, pbc_headers_new.pop(pbc_headers_new.index('Sample')))
    pbc_file = pbc_file[pbc_headers_new]
    pbc_file.to_csv(pbc_file_qc_filename, header=True, sep='\t', index=False)
    os.remove(bam)
    os.remove(bam + '.bai')
    os.remove(tmp_pbc_file_qc_filename)
コード例 #7
0
def filter_mapped_pe(bam, bam_basename):
    '''Use samtools to filter unmapped reads for PE data.'''

    filt_bam_prefix = bam_basename + ".filt.srt"
    filt_bam_filename = filt_bam_prefix + ".bam"
    tmp_filt_bam_prefix = "tmp.%s" % (filt_bam_prefix)
    tmp_filt_bam_filename = tmp_filt_bam_prefix + ".bam"

    # Remove  unmapped, mate unmapped
    # not primary alignment, reads failing platform
    # Remove low MAPQ reads
    # Only keep properly paired reads
    # Obtain name sorted BAM file
    out, err = utils.run_pipe([
        # filter: -F 1804 FlAG bits to exclude; -f 2 FLAG bits to reqire;
        # -q 30 exclude MAPQ < 30; -u uncompressed output
        # exclude FLAG 1804: unmapped, next segment unmapped, secondary
        # alignments, not passing platform q, PCR or optical duplicates
        # require FLAG 2: properly aligned
        "samtools view -F 1804 -f 2 -q 30 -u %s" % (bam),
        # sort:  -n sort by name; - take input from stdin;
        # out to specified filename
        # Will produce name sorted BAM
        "samtools sort -n -@ %d -o %s" % (cpu_count(), tmp_filt_bam_filename)
    ])
    if err:
        logger.error("samtools filter error: %s", err)

    # Remove orphan reads (pair was removed)
    # and read pairs mapping to different chromosomes
    # Obtain position sorted BAM
    out, err = utils.run_pipe([
        # fill in mate coordinates, ISIZE and mate-related flags
        # fixmate requires name-sorted alignment; -r removes secondary and
        # unmapped (redundant here because already done above?)
        # - send output to stdout
        "samtools fixmate -r %s -" % (tmp_filt_bam_filename),
        # repeat filtering after mate repair
        "samtools view -F 1804 -f 2 -u -",
        # produce the coordinate-sorted BAM
        "samtools sort -@ %d -o %s" % (cpu_count(), filt_bam_filename)
    ])

    os.remove(tmp_filt_bam_filename)
    return filt_bam_filename
コード例 #8
0
def bedpe_to_tagalign(tag_file, outfile):
    '''Convert read pairs to reads into standard tagAlign file.'''

    se_tag_filename = outfile + ".tagAlign.gz"

    # Convert read pairs to reads into standard tagAlign file
    tag_steps = ["zcat -f %s" % (tag_file)]
    tag_steps.extend([r"""awk 'BEGIN{OFS="\t"}{printf "%s\t%s\t%s\tN\t1000\t%s\n%s\t%s\t%s\tN\t1000\t%s\n",$1,$2,$3,$9,$4,$5,$6,$10}'"""])
    tag_steps.extend(['gzip -cn'])
    out, err = utils.run_pipe(tag_steps, outfile=se_tag_filename)

    return se_tag_filename
コード例 #9
0
def motif_search(filename, genome, experiment, peak):
    '''Run motif serach on peaks.'''

    file_basename = os.path.basename(
        utils.strip_extensions(filename, STRIP_EXTENSIONS))

    out_fa = '%s.fa' % (experiment)
    out_motif = '%s_memechip' % (experiment)

    # Sort Bed file and limit number of peaks
    if peak == -1:
        peak = utils.count_lines(filename)
        peak_no = 'all'
    else:
        peak_no = peak

    sorted_fn = '%s.%s.narrowPeak' % (file_basename, peak_no)

    out, err = utils.run_pipe(
        ['sort -k %dgr,%dgr %s' % (5, 5, filename),
         'head -n %s' % (peak)],
        outfile=sorted_fn)

    # Get fasta file
    out, err = utils.run_pipe([
        'bedtools getfasta -fi %s -bed %s -fo %s' % (genome, sorted_fn, out_fa)
    ])

    if err:
        logger.error("bedtools error: %s", err)

    # Call memechip
    out, err = utils.run_pipe([
        'meme-chip -oc %s -meme-minw 5 -meme-maxw 15 -meme-nmotifs 10 %s -norand'
        % (out_motif, out_fa)
    ])
    if err:
        logger.error("meme-chip error: %s", err)
コード例 #10
0
def align_se(fastq, sai, reference, fastq_basename):
    '''Use BWA to align SE data.'''

    bam_filename = '%s.bam' % (fastq_basename)

    steps = [
        "bwa samse %s %s %s" % (reference, sai[0], fastq[0]),
        "samtools view -@%d -Su -" % (cpu_count()),
        "samtools sort -@%d -o %s" % (cpu_count(), bam_filename)
    ]

    out, err = utils.run_pipe(steps)
    if err:
        logger.error("samse/samtools error: %s", err)

    return bam_filename
コード例 #11
0
ファイル: map_reads.py プロジェクト: JAMKuttan/CutNRun
def align_pe(fastq, reference, fastq_basename):
    '''Use bowtie2 to align PE data.'''

    bam_filename = '%s.bam' % (fastq_basename)

    steps = [
        "bowtie2 -p %d --very-sensitive -x %s -1 %s -2 %s"
        % (cpu_count(), reference, fastq[0], fastq[1])
        "samtools view -@%d -Su -" % (cpu_count()),
        "samtools sort -@%d -o %s"
        % (cpu_count(), bam_filename)]

    out, err = utils.run_pipe(steps)
    if err:
        logger.error("samtools error: %s", err)

    return bam_filename
コード例 #12
0
def convert_mapped_pe(bam, bam_basename):
    '''Use bedtools to convert to tagAlign PE data.'''

    bedpe_filename = bam_basename + ".bedpe.gz"

    # Name sort bam to make BEDPE
    nmsrt_bam_filename = bam_basename + ".nmsrt.bam"
    samtools_sort_command = \
        "samtools sort -n -@%d -o %s %s" \
        % (cpu_count(), nmsrt_bam_filename, bam)

    logger.info(samtools_sort_command)
    subprocess.check_output(shlex.split(samtools_sort_command))

    out, err = utils.run_pipe(
        ["bamToBed -bedpe -mate1 -i %s" % (nmsrt_bam_filename), "gzip -nc"],
        outfile=bedpe_filename)
コード例 #13
0
def filter_mapped_pe(bam, bam_basename):
    '''Use samtools to filter unmapped reads for PE data.'''

    filt_bam_prefix = bam_basename + ".filt.srt"
    filt_bam_filename = filt_bam_prefix + ".bam"

    out, err = utils.run_pipe([
        "samtools view -F 1804 -f 3 -q 30 -hu %s" % (bam),
        "samtools sort -n -@ %d -o %s" % (cpu_count(), filt_bam_filename)])
    if err:
        logger.error("samtools filter error: %s", err)

    filter_index_command = \
        "samtools index -@ %d %s" % (cpu_count(), filt_bam_filename)
    logger.info(filter_index_command)
    subprocess.check_output(shlex.split(filter_index_command))

    return filt_bam_filename
コード例 #14
0
def pool(tag_files, outfile, paired):
    '''Pool files together.'''

    if paired:
        file_extension = '.bedpe.gz'
    else:
        file_extension = '.bedse.gz'

    pool_basename = os.path.basename(
        utils.strip_extensions(outfile, STRIP_EXTENSIONS))

    pooled_filename = pool_basename + file_extension

    # Merge files
    out, err = utils.run_pipe([
        'gzip -dc %s' % (' '.join(tag_files)),
        'gzip -cn'], outfile=pooled_filename)

    return pooled_filename
コード例 #15
0
def self_psuedoreplication(tag_file, prefix, paired):
    '''Make 2 self-psuedoreplicates.'''

    # Get total number of reads
    no_lines = utils.count_lines(tag_file)

    # Number of lines to split into
    lines_per_rep = (no_lines+1)/2

    # Make an array of number of psuedoreplicatesfile names
    pseudoreplicate_dict = {r: prefix + '.pr' + str(r) + '.tagAlign.gz'
                            for r in [0, 1]}

    # Shuffle and split file into equal parts
    # by using the input to seed shuf we ensure multiple runs with the same
    # input will produce the same output
    # Produces two files named splits_prefix0n, n=1,2

    splits_prefix = 'temp_split'

    psuedo_command = 'bash -c "zcat {} | shuf --random-source=<(openssl enc -aes-256-ctr -pass pass:$(zcat -f {} | wc -c) -nosalt </dev/zero 2>/dev/null) | '
    psuedo_command += 'split -d -l {} - {}."'
    psuedo_command = psuedo_command.format(
        tag_file,
        tag_file,
        int(lines_per_rep),
        splits_prefix)
    logger.info("Running psuedo with %s", psuedo_command)
    subprocess.check_call(shlex.split(psuedo_command))

    # Convert read pairs to reads into standard tagAlign file

    for i, index in enumerate([0, 1]):
        string_index = '.0' + str(index)
        steps = ['cat %s' % (splits_prefix + string_index)]
        if paired:
            steps.extend([r"""awk 'BEGIN{OFS="\t"}{printf "%s\t%s\t%s\tN\t1000\t%s\n%s\t%s\t%s\tN\t1000\t%s\n",$1,$2,$3,$9,$4,$5,$6,$10}'"""])
        steps.extend(['gzip -cn'])
        out, err = utils.run_pipe(steps, outfile=pseudoreplicate_dict[i])

    return pseudoreplicate_dict
コード例 #16
0
def call_peaks_macs(experiment, xcor, control, prefix, genome_size,
                    chrom_sizes):
    '''Call peaks and signal tracks'''

    # Extract the fragment length estimate from column 3 of the
    # cross-correlation scores file
    with open(xcor, 'r') as xcor_fh:
        firstline = xcor_fh.readline()
        frag_lengths = firstline.split()[2]  # third column
        fragment_length = frag_lengths.split(',')[0]  # grab first value
        logger.info("Fraglen %s", fragment_length)

    # Generate narrow peaks and preliminary signal tracks

    command = 'macs2 callpeak ' + \
              '-t %s -c %s ' % (experiment, control) + \
              '-f BED -n %s ' % (prefix) + \
              '-g %s -p 1e-2 --nomodel --shift 0 --extsize %s --keep-dup all -B --SPMR' % (genome_size, fragment_length)

    logger.info(command)
    returncode = utils.block_on(command)
    logger.info("MACS2 exited with returncode %d", returncode)
    assert returncode == 0, "MACS2 non-zero return"

    # MACS2 sometimes calls features off the end of chromosomes.
    # Remove coordinates outside chromosome sizes

    int_narrowpeak_fn = '%s_peaks.narrowPeak' % (prefix)
    narrowpeak_fn = '%s.narrowPeak' % (prefix)
    clipped_narrowpeak_fn = 'clipped-%s' % (narrowpeak_fn)

    steps = [
        'slopBed -i %s -g %s -b 0' % (int_narrowpeak_fn, chrom_sizes),
        'bedClip stdin %s %s' % (chrom_sizes, clipped_narrowpeak_fn)
    ]

    out, err = utils.run_pipe(steps)

    # Rescale Col5 scores to range 10-1000 to conform to narrowPeak.as format
    # (score must be <1000)
    rescaled_narrowpeak_fn = utils.rescale_scores(clipped_narrowpeak_fn,
                                                  scores_col=5)

    # Sort by Col8 in descending order and replace long peak names in Column 4
    # with Peak_<peakRank>
    steps = [
        'sort -k 8gr,8gr %s' % (rescaled_narrowpeak_fn),
        r"""awk 'BEGIN{OFS="\t"}{$4="Peak_"NR ; print $0}'"""
    ]

    out, err = utils.run_pipe(steps, '%s' % (narrowpeak_fn))

    # For Fold enrichment signal tracks

    # This file is a tab delimited file with 2 columns Col1 (chromosome name),
    # Col2 (chromosome size in bp).

    command = 'macs2 bdgcmp ' + \
          '-t %s_treat_pileup.bdg ' % (prefix) + \
          '-c %s_control_lambda.bdg ' % (prefix) + \
          '-o %s_FE.bdg ' % (prefix) + \
          '-m FE'

    logger.info(command)
    returncode = utils.block_on(command)
    logger.info("MACS2 exited with returncode %d", returncode)
    assert returncode == 0, "MACS2 non-zero return"

    # Remove coordinates outside chromosome sizes (MACS2 bug)
    fc_bedgraph_fn = '%s.fc.signal.bedgraph' % (prefix)
    fc_bedgraph_sorted_fn = 'sorted-%s' % (fc_bedgraph_fn)
    fc_signal_fn = "%s.fc_signal.bw" % (prefix)
    steps = [
        'slopBed -i %s_FE.bdg -g %s -b 0' % (prefix, chrom_sizes),
        'bedClip stdin %s %s' % (chrom_sizes, fc_bedgraph_fn)
    ]

    out, err = utils.run_pipe(steps)

    # Sort file
    out, err = utils.run_pipe(
        ['bedSort %s %s' % (fc_bedgraph_fn, fc_bedgraph_sorted_fn)])

    # Convert bedgraph to bigwig
    command = 'bedGraphToBigWig ' + \
          '%s ' % (fc_bedgraph_sorted_fn) + \
          '%s ' % (chrom_sizes) + \
          '%s' % (fc_signal_fn)

    logger.info(command)
    returncode = utils.block_on(command)
    logger.info("bedGraphToBigWig exited with returncode %d", returncode)
    assert returncode == 0, "bedGraphToBigWig non-zero return"

    # For -log10(p-value) signal tracks

    # Compute sval =
    # min(no. of reads in ChIP, no. of reads in control) / 1,000,000
    out, err = utils.run_pipe(['gzip -dc %s' % (experiment), 'wc -l'])
    chip_reads = out.strip()
    out, err = utils.run_pipe(['gzip -dc %s' % (control), 'wc -l'])
    control_reads = out.strip()
    sval = str(min(float(chip_reads), float(control_reads)) / 1000000)

    logger.info("chip_reads = %s, control_reads = %s, sval = %s" %
                (chip_reads, control_reads, sval))

    command = 'macs2 bdgcmp ' + \
          '-t %s_treat_pileup.bdg ' % (prefix) + \
          '-c %s_control_lambda.bdg ' % (prefix) + \
          '-o %s_ppois.bdg ' % (prefix) + \
          '-m ppois -S %s' % (sval)

    logger.info(command)
    returncode = utils.block_on(command)
    assert returncode == 0, "MACS2 non-zero return"

    # Remove coordinates outside chromosome sizes (MACS2 bug)
    pvalue_bedgraph_fn = '%s.pval.signal.bedgraph' % (prefix)
    pvalue_bedgraph_sorted_fn = 'sort-%s' % (pvalue_bedgraph_fn)
    pvalue_signal_fn = "%s.pvalue_signal.bw" % (prefix)
    steps = [
        'slopBed -i %s_ppois.bdg -g %s -b 0' % (prefix, chrom_sizes),
        'bedClip stdin %s %s' % (chrom_sizes, pvalue_bedgraph_fn)
    ]

    out, err = utils.run_pipe(steps)

    # Sort file
    out, err = utils.run_pipe(
        ['bedSort %s %s' % (fc_bedgraph_fn, pvalue_bedgraph_sorted_fn)])

    # Convert bedgraph to bigwig
    command = 'bedGraphToBigWig ' + \
          '%s ' % (pvalue_bedgraph_sorted_fn) + \
          '%s ' % (chrom_sizes) + \
          '%s' % (pvalue_signal_fn)

    logger.info(command)
    returncode = utils.block_on(command)
    logger.info("bedGraphToBigWig exited with returncode %d", returncode)
    assert returncode == 0, "bedGraphToBigWig non-zero return"

    # Remove temporary files
    os.remove(clipped_narrowpeak_fn)
    os.remove(rescaled_narrowpeak_fn)
    os.remove(int_narrowpeak_fn)
コード例 #17
0
def test_run_two_step(steps_2, capsys):
    check_output = 'ENCFF833BLU.fastq.gz\nENCFF646LXU.fastq.gz'.encode('UTF-8')
    out, err = utils.run_pipe(steps_2)
    output, errors = capsys.readouterr()
    assert "intermediate step 2 shlex to stdout" in output
    assert check_output in out
コード例 #18
0
def overlap(experiment, design):
    '''Calculate the overlap of peaks'''

    logger.info("Determining consenus peaks for experiment %s.", experiment)

    # Output File names
    peak_type = 'narrowPeak'
    overlapping_peaks_fn = '%s.replicated.%s' % (experiment, peak_type)
    rejected_peaks_fn = '%s.rejected.%s' % (experiment, peak_type)

    # Intermediate File names
    overlap_tr_fn = 'replicated_tr.%s' % (peak_type)
    overlap_pr_fn = 'replicated_pr.%s' % (peak_type)

    # Assign Pooled and Psuedoreplicate peaks
    pool_peaks = design.loc[design.replicate == 'pooled', 'peaks'].values[0]
    pr1_peaks = design.loc[design.replicate == '1_pr', 'peaks'].values[0]
    pr2_peaks = design.loc[design.replicate == '2_pr', 'peaks'].values[0]

    # Remove non true replicate rows
    not_replicates = ['1_pr', '2_pr', 'pooled']
    design_true_reps = design[~design['replicate'].isin(not_replicates)]
    true_rep_peaks = design_true_reps.peaks.unique()

    # Find overlaps
    awk_command = r"""awk 'BEGIN{FS="\t";OFS="\t"}{s1=$3-$2; s2=$13-$12; if (($21/s1 >= 0.5) || ($21/s2 >= 0.5)) {print $0}}'"""
    cut_command = 'cut -f 1-10'

    # Find pooled peaks that overlap Rep1 and Rep2
    # where overlap is defined as the fractional overlap
    # with any one of the overlapping peak pairs  >= 0.5

    steps_true = [
        'intersectBed -wo -a %s -b %s' % (pool_peaks, true_rep_peaks[0]),
        awk_command, cut_command, 'sort -u'
    ]

    iter_true_peaks = iter(true_rep_peaks)
    next(iter_true_peaks)

    if len(true_rep_peaks) > 1:
        for true_peak in true_rep_peaks[1:]:
            steps_true.extend([
                'intersectBed -wo -a stdin -b %s' % (true_peak), awk_command,
                cut_command, 'sort -u'
            ])

    out, err = utils.run_pipe(steps_true, outfile=overlap_tr_fn)
    print("%d peaks overlap with both true replicates" %
          (utils.count_lines(overlap_tr_fn)))

    # Find pooled peaks that overlap PseudoRep1 and PseudoRep2
    # where overlap is defined as the fractional overlap
    # with any one of the overlapping peak pairs  >= 0.5

    steps_pseudo = [
        'intersectBed -wo -a %s -b %s' % (pool_peaks, pr1_peaks), awk_command,
        cut_command, 'sort -u',
        'intersectBed -wo -a stdin -b %s' % (pr2_peaks), awk_command,
        cut_command, 'sort -u'
    ]

    out, err = utils.run_pipe(steps_pseudo, outfile=overlap_pr_fn)
    print("%d peaks overlap with both pooled pseudoreplicates" %
          (utils.count_lines(overlap_pr_fn)))

    # Make union of peak lists
    out, err = utils.run_pipe(
        ['cat %s %s' % (overlap_tr_fn, overlap_pr_fn), 'sort -u'],
        overlapping_peaks_fn)
    print(
        "%d peaks overlap with true replicates or with pooled pseudorepliates"
        % (utils.count_lines(overlapping_peaks_fn)))

    # Make rejected peak list
    out, err = utils.run_pipe([
        'intersectBed -wa -v -a %s -b %s' % (pool_peaks, overlapping_peaks_fn)
    ], rejected_peaks_fn)
    print("%d peaks were rejected" % (utils.count_lines(rejected_peaks_fn)))

    # Remove temporary files
    os.remove(overlap_tr_fn)
    os.remove(overlap_pr_fn)

    return os.path.abspath(overlapping_peaks_fn)