def print_read_to_bed12(key, reads):
    """ Merge the reads by blocks and print a single read in the BED12 format on stdout.
    It assumes that the reads are on the same TSS and contains
    barcode and fingerprint information in the read's name.

    Args:
        key: A tuple that contain the chromosome, barcode and fingerprint information.

        reads: A list of reads (in a list) from the same TSS, that have similar barcode and fingerprint.

    >>> reads = []
    >>> reads.append(['chrX', '100', '200', 'BC:AAA;FP:0012', '12', '+', '100', '110', '255,0,0', '2', '20,25', '0,75'])
    >>> reads.append(['chrX', '100', '300', 'BC:AAA;FP:0012', '12', '+', '100', '110', '255,0,0', '3', '20,25', '0,175'])
    >>> print_read_to_bed12(('chrX', 'AAA', '0012'), reads) #doctest: +NORMALIZE_WHITESPACE
    chrX    100	300	BC:AAA;FP:0012	2	+	100	120	255,0,0	3	20,25,25	0,75,175
    """
    block_sizes, block_starts = bed12.merge_overlapping_blocks(reads)

    # bed12
    first_read = sorted(reads, key=bed12.get_start)[0]
    chrom, barcode, fingerprint = key
    start = bed12.get_start(first_read)
    end = start + block_starts[-1] + block_sizes[-1]
    name = "BC:{0};FP:{1}".format(barcode, fingerprint)
    score = len(reads)

    strand = bed12.get_strand(first_read)

    if strand == "+":
        thick_start = start
        thick_end = start + block_sizes[0]
    else:
        thick_start = end - block_sizes[-1]
        thick_end = end

    color = "255,0,0"
    block_count = len(block_sizes)
    block_sizes = ",".join(map(str, block_sizes))
    block_starts = ",".join(map(str, block_starts))

    output = [
        chrom,
        start,
        end,
        name,
        score,
        strand,
        thick_start,
        thick_end,
        color,
        block_count,
        block_sizes,
        block_starts,
    ]

    output_str = map(str, output)
    print "\t".join(output_str)
Example #2
0
def print_read_to_bed12(key, reads):
    """ Merge the reads by blocks and print a single read in the BED12 format on stdout.
    It assumes that the reads are on the same TSS and contains
    fingerprint information in the read's name.

    Args:
        key: A tuple that contain the chromosome, barcode and fingerprint information.

        reads: A list of reads (in a list) from the same TSS, that have similar barcode and fingerprint.

    >>> reads = []
    >>> reads.append(['chrX', '100', '200', 'FP:0012', '12', '+', '100', '110', '255,0,0', '2', '20,25', '0,75'])
    >>> reads.append(['chrX', '100', '300', 'FP:0012', '12', '+', '100', '110', '255,0,0', '3', '20,25', '0,175'])
    >>> print_read_to_bed12(('chrX', '0012'), reads) #doctest: +NORMALIZE_WHITESPACE
    chrX    100	300	FP:0012	2	+	100	120	255,0,0	3	20,25,25	0,75,175
    """
    block_sizes, block_starts = bed12.merge_overlapping_blocks(reads)

    #bed12
    first_read = sorted(reads, key=bed12.get_start)[0]
    chrom, fingerprint = key
    start = bed12.get_start(first_read)
    end = start + block_starts[-1] + block_sizes[-1]
    name = "FP:{0}".format(fingerprint)
    score = len(reads)

    strand = bed12.get_strand(first_read)

    if strand == '+':
        thick_start = start
        thick_end = start + block_sizes[0]
    else:
        thick_start = end - block_sizes[-1]
        thick_end = end

    color = "255,0,0"
    block_count = len(block_sizes)
    block_sizes = ','.join(map(str, block_sizes))
    block_starts = ','.join(map(str, block_starts))

    output = [
        chrom, start, end, name, score, strand, thick_start, thick_end, color,
        block_count, block_sizes, block_starts
    ]

    output_str = map(str, output)
    print '\t'.join(output_str)
def print_read_to_bed12(reads):
    """ Merge the reads by blocks and print a single read in the BED12 format on stdout.
    It assumes that the reads are on the same TSS and contains
    fingerprint information in the read's name.

    Args:
        reads: A list of reads

    """
    block_sizes, block_starts = bed12.merge_overlapping_blocks(reads)

    #bed12
    first_read = sorted(reads, key=bed12.get_start)[0]
    chrom = bed12.get_chrom(first_read)
    start = bed12.get_start(first_read)
    end = start + block_starts[-1] + block_sizes[-1]

    score = len(reads)

    strand = bed12.get_strand(first_read)

    if strand == '+':
        thick_start = start
        thick_end = start + block_sizes[0]
    else:
        thick_start = end - block_sizes[-1]
        thick_end = end

    color = "255,0,0"
    block_count = len(block_sizes)
    block_sizes = ','.join(map(str, block_sizes))
    block_starts = ','.join(map(str, block_starts))

    name = map(str, [chrom, start, end, strand])
    name = ":".join(name)

    output = [
        chrom, start, end, name, score, strand, thick_start, thick_end, color,
        block_count, block_sizes, block_starts
    ]

    output_str = map(str, output)
    print '\t'.join(output_str)
def print_read_to_bed12(reads):
    """ Merge the reads by blocks and print a single read in the BED12 format on stdout.
    It assumes that the reads are on the same TSS and contains
    fingerprint information in the read's name.

    Args:
        reads: A list of reads

    """
    block_sizes, block_starts = bed12.merge_overlapping_blocks(reads)
        
    #bed12
    first_read = sorted(reads, key=bed12.get_start)[0]
    chrom = bed12.get_chrom(first_read)
    start = bed12.get_start(first_read)
    end = start + block_starts[-1] + block_sizes[-1]

    score = len(reads)
    
    strand = bed12.get_strand(first_read)
    
    if strand == '+':
        thick_start = start
        thick_end = start + block_sizes[0]
    else:
        thick_start = end - block_sizes[-1]
        thick_end = end
        
    color = "255,0,0"
    block_count = len(block_sizes)
    block_sizes = ','.join(map(str, block_sizes))
    block_starts = ','.join(map(str, block_starts))

    name = map(str, [chrom, start, end, strand])
    name = ":".join(name)
    
    output = [chrom, start, end, name, score, strand, thick_start, thick_end,
              color, block_count, block_sizes, block_starts]
    
    output_str = map(str, output)
    print '\t'.join(output_str)