Python PipelineChipseq.getPeakShiftFromMacs 예제들

프로그래밍 언어: Python

클래스/타입: PipelineChipseq

메소드/함수: getPeakShiftFromMacs

hotexamples.com에서의 예제들: 4

Python PipelineChipseq.getPeakShiftFromMacs - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 PipelineChipseq.getPeakShiftFromMacs에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

exportMacsIntervalsAsBed(2)

getPeakShiftFromMacs(2)

loadMACS(2)

buildSimpleNormalizedBAM(1)

countPeaks(1)

mergeIntervalsWithScores(1)

summarizeMACS(1)

summarizeMACSsolo(1)

예제 #1

파일 보기

def getMergedBigWigPeakShift(infiles, outfile):
    '''Merge multiple BAM files per replicate to produce a single peak-shifted bigwig file'''
    expt = P.snip(os.path.basename(outfile), ".merge.bw").replace("-agg", "")
    in_list = " --bamfile=".join(infiles)

    offsets = []
    for t in infiles:
        track = P.snip(os.path.basename(t), ".norm.bam")
        fn = "macs/with_input/%s.macs" % track
        if os.path.exists(fn):
            offsets.append(str(PIntervals.getPeakShiftFromMacs(fn)))

    shifts = " --shift=".join(offsets)
    statement = '''python %(scriptsdir)s/bam2wiggle.py 
                      --output-format=bigwig
                      %(in_list)s
                      %(shifts)s > %(outfile)s'''
    P.run()

예제 #2

파일 보기

파일: pipeline_proj012_chipseq.py 프로젝트: BioinformaticsArchive/cgat

def getMergedBigWigPeakShift( infiles, outfile ):
    '''Merge multiple BAM files per replicate to produce a single peak-shifted bigwig file'''
    expt = P.snip( os.path.basename( outfile ), ".merge.bw").replace("-agg","")
    in_list = " --bamfile=".join(infiles)
    
    offsets = []
    for t in infiles:
        track = P.snip(os.path.basename(t), ".norm.bam")
        fn = "macs/with_input/%s.macs" % track
        if os.path.exists( fn ):
            offsets.append( str(PIntervals.getPeakShiftFromMacs( fn )) )

    shifts = " --shift=".join(offsets)
    statement = '''python %(scriptsdir)s/bam2wiggle.py 
                      --output-format=bigwig
                      %(in_list)s
                      %(shifts)s > %(outfile)s'''
    P.run()

예제 #3

파일 보기

def loadMergedIntervals(infile, outfile):
    '''load combined intervals.

    Also, re-evaluate the intervals by counting reads within
    the interval. In contrast to the initial pipeline, the
    genome is not binned. In particular, the meaning of the
    columns in the table changes to:

    nProbes: number of reads in interval
    PeakCenter: position with maximum number of reads in interval
    AvgVal: average coverage within interval

    If *replicates* is true, only replicates will be considered
    for the counting. Otherwise the counts aggregate both replicates
    and conditions.
    '''

    # Write header to output file
    tmpfile = tempfile.NamedTemporaryFile(delete=False)
    headers = ("contig", "start", "end", "interval_id", "nPeaks", "PeakCenter",
               "Length", "AvgVal", "PeakVal", "nProbes", "Fold")
    tmpfile.write("\t".join(headers) + "\n")
    contig, start, end, interval_id, npeaks, peakcenter, length, avgval, peakval, nprobes = "", 0, 0, 0, 0, 0, 0, 0, 0, 0

    # Get SAM file and Macs offset
    samfiles, offsets = [], []
    track = P.snip(os.path.basename(infile), ".merged.cleaned.bed")
    base_track = track.replace(".solo", "")

    fn = "bam/%s.norm.bam" % track
    assert os.path.exists(
        fn), "could not find bamfile %s for track %s" % (fn, track)
    samfiles.append(pysam.Samfile(fn, "rb"))
    if track.find("solo") > -1:
        fn = "macs/no_input/%s.macs" % track
    else:
        fn = "macs/with_input/%s.macs" % track
    if os.path.exists(fn):
        offsets.append(PIntervals.getPeakShiftFromMacs(fn))

    # Loop over input Bed file and calculate stats for merged intervals
    c = E.Counter()
    for line in open(infile, "r"):
        c.input += 1
        contig, start, end, int_id, fc = line[:-1].split()[:5]
        start, end = int(start), int(end)
        interval_id = c.input

        npeaks, peakcenter, length, avgval, peakval, nprobes = PIntervals.countPeaks(
            contig, start, end, samfiles, offsets)

        # nreads can be 0 if the intervals overlap only slightly
        # and due to the binning, no reads are actually in the overlap region.
        # However, most of these intervals should be small and have already be deleted via
        # the merge_min_interval_length cutoff.
        # do not output intervals without reads.
        if nprobes == 0:
            c.skipped_reads += 1

        c.output += 1
        tmpfile.write("\t".join(
            map(str, (contig, start, end, int_id, npeaks, peakcenter, length,
                      avgval, peakval, nprobes, fc))) + "\n")

    tmpfile.close()

    tmpfilename = tmpfile.name
    tablename = "%s_macs_merged_intervals" % track

    statement = '''python %(scriptsdir)s/csv2db.py %(csv2db_options)s
                       --index=interval_id
                       --index=contig,start 
                       --table=%(tablename)s
                   < %(tmpfilename)s > %(outfile)s '''
    P.run()
    os.unlink(tmpfile.name)
    L.info("%s\n" % str(c))

예제 #4

파일 보기

파일: pipeline_proj012_chipseq.py 프로젝트: BioinformaticsArchive/cgat

def loadMergedIntervals( infile, outfile ):
    '''load combined intervals.

    Also, re-evaluate the intervals by counting reads within
    the interval. In contrast to the initial pipeline, the
    genome is not binned. In particular, the meaning of the
    columns in the table changes to:

    nProbes: number of reads in interval
    PeakCenter: position with maximum number of reads in interval
    AvgVal: average coverage within interval

    If *replicates* is true, only replicates will be considered
    for the counting. Otherwise the counts aggregate both replicates
    and conditions.
    '''

    # Write header to output file
    tmpfile = tempfile.NamedTemporaryFile(delete=False)
    headers = ( "contig","start","end","interval_id","nPeaks","PeakCenter","Length","AvgVal","PeakVal","nProbes", "Fold" )
    tmpfile.write( "\t".join(headers) + "\n" )
    contig,start,end,interval_id,npeaks,peakcenter,length,avgval,peakval,nprobes = "",0,0,0,0,0,0,0,0,0

    # Get SAM file and Macs offset
    samfiles, offsets = [], []
    track = P.snip( os.path.basename(infile), ".merged.cleaned.bed")
    base_track = track.replace(".solo","")

    fn = "bam/%s.norm.bam" % track
    assert os.path.exists( fn ), "could not find bamfile %s for track %s" % ( fn, track)
    samfiles.append( pysam.Samfile( fn,  "rb" ) )
    if track.find("solo") > -1:
        fn = "macs/no_input/%s.macs" % track
    else:
        fn = "macs/with_input/%s.macs" % track
    if os.path.exists( fn ):
        offsets.append( PIntervals.getPeakShiftFromMacs( fn ) )

    # Loop over input Bed file and calculate stats for merged intervals
    c = E.Counter()
    for line in open(infile, "r"):
        c.input += 1
        contig, start, end, int_id, fc = line[:-1].split()[:5]
        start, end = int(start), int(end)
        interval_id = c.input

        npeaks, peakcenter, length, avgval, peakval, nprobes = PIntervals.countPeaks( contig, start, end, samfiles, offsets )

        # nreads can be 0 if the intervals overlap only slightly
        # and due to the binning, no reads are actually in the overlap region.
        # However, most of these intervals should be small and have already be deleted via 
        # the merge_min_interval_length cutoff.
        # do not output intervals without reads.
        if nprobes == 0:
            c.skipped_reads += 1
            
        c.output += 1
        tmpfile.write( "\t".join( map( str, (contig,start,end,int_id,npeaks,peakcenter,length,avgval,peakval,nprobes,fc) )) + "\n" )
 
    tmpfile.close()

    tmpfilename = tmpfile.name
    tablename = "%s_macs_merged_intervals" % track
    
    statement = '''python %(scriptsdir)s/csv2db.py %(csv2db_options)s
                       --index=interval_id
                       --index=contig,start 
                       --table=%(tablename)s
                   < %(tmpfilename)s > %(outfile)s '''
    P.run()
    os.unlink( tmpfile.name )
    L.info( "%s\n" % str(c) )