예제 #1
0
파일: bam2wig.py 프로젝트: bdgp/grit
def generate_wiggle(reads, ofps, num_threads=1, contig=None ):
    all_args = []
    for chrm_length, chrm  in sorted(izip(reads.lengths, reads.references)):
        strands = ['+', '-'] if len(ofps) == 2 else [None,]
        # skip regions not in the specified contig, if requested 
        if contig is not None and clean_chr_name(chrm) != clean_chr_name(contig): 
            continue
        for strand in strands:
            ofp = ofps[strand]
            assert (ofp, reads, chrm, chrm_length, strand ) not in all_args
            all_args.append((ofp, reads, chrm, chrm_length, strand ))
    
    if num_threads == 1:
        for args in reversed(all_args):
            populate_cvg_array_for_contig( *args )
    else:
        ps = [None]*num_threads
        while len( all_args ) > 0:
            for i in xrange(num_threads):
                if ps[i] is None or not ps[i].is_alive():
                    ps[i] = multiprocessing.Process( 
                        target=populate_cvg_array_for_contig, 
                        args=all_args.pop() )
                    ps[i].start()
                    break
            time.sleep( 0.1 )

        for p in ps:
            if p is not None: p.join()
    
    for fp in ofps.values(): fp.close()
    
    return
예제 #2
0
def write_array_to_opstream(ofp, buffer, buff_start, chrm, chrm_length,
                            strand):
    """write buffer to disk, buff_start determines the start of buffer in 
       genomic coordinates.
    """
    chrm = fix_chrm_name(clean_chr_name(chrm))

    prev_pos = 0
    prev_val = buffer[0]
    for pos, val in enumerate(buffer[1:]):
        # make sure this doesn't extend past the end of the chromosome
        # bedGraphs are 0-based, so use chrm_length-1
        if buff_start + pos + 1 >= chrm_length:
            pos = chrm_length - buff_start - 1
            break
        if val != prev_val:
            if prev_val > 1e-12:
                write_val = -prev_val if strand == '-' else prev_val
                line = "%s\t%i\t%i\t%.2f" % (chrm, buff_start + prev_pos,
                                             buff_start + pos + 1, write_val)
                ofp.write(line + "\n")
            prev_pos, prev_val = pos + 1, val

    if prev_val > 1e-12:
        write_val = -prev_val if strand == '-' else prev_val
        line = "%s\t%i\t%i\t%.2f" % (chrm, buff_start + prev_pos,
                                     buff_start + pos + 1, write_val)
        ofp.write(line + "\n")

    return
예제 #3
0
def parse_arguments():
    allowed_assays = ['cage', 'rampage', 'rnaseq', 'polya']
    
    import argparse
    parser = argparse.ArgumentParser(
        description='Extract elements from a bed or gtf file.')
    parser.add_argument( 'region', 
                         help='A region string of the form contig:strand:start-stop')
    parser.add_argument( 'input_file', type=file,
                         help='An bed file to extract elements from')
    args = parser.parse_args()
    
    chrm, strand, poss = args.region.strip().split(":")
    start, stop = [int(x) for x in poss.replace(",", "").split('-')]
    
    return ( GenomicInterval(clean_chr_name(chrm), strand, start, stop), 
            args.input_file )
예제 #4
0
def parse_arguments():
    allowed_assays = ['cage', 'rampage', 'rnaseq', 'polya']

    import argparse
    parser = argparse.ArgumentParser(
        description='Extract elements from a bed or gtf file.')
    parser.add_argument(
        'region', help='A region string of the form contig:strand:start-stop')
    parser.add_argument('input_file',
                        type=file,
                        help='An bed file to extract elements from')
    args = parser.parse_args()

    chrm, strand, poss = args.region.strip().split(":")
    start, stop = [int(x) for x in poss.replace(",", "").split('-')]

    return (GenomicInterval(clean_chr_name(chrm), strand, start,
                            stop), args.input_file)
예제 #5
0
def fix_chr_name(x):
    return "chr" + clean_chr_name(x)
예제 #6
0
def fix_chr_name(x):
    return "chr" + clean_chr_name(x)