コード例 #1
0
def read_scores(f):
    scores_by_chrom = dict()
    for chrom, pos, val in bx.wiggle.Reader(f):
        if chrom not in scores_by_chrom:
            scores_by_chrom[chrom] = BinnedArray()
        scores_by_chrom[chrom][pos] = val
    return scores_by_chrom
コード例 #2
0
def load_scores_wiggle(fname, chrom_buffer_size=3):
    """
    Read a wiggle file and return a dict of BinnedArray objects keyed
    by chromosome.
    """
    scores_by_chrom = dict()
    try:
        for chrom, pos, val in bx.wiggle.Reader(UCSCOutWrapper(open(fname))):
            if chrom not in scores_by_chrom:
                if chrom_buffer_size:
                    scores_by_chrom[chrom] = BinnedArray()
                    chrom_buffer_size -= 1
                else:
                    scores_by_chrom[chrom] = PositionalScoresOnDisk()
            scores_by_chrom[chrom][pos] = val
    except UCSCLimitException:
        # Wiggle data was truncated, at the very least need to warn the user.
        print(
            'Encountered message from UCSC: "Reached output limit of 100000 data values", so be aware your data was truncated.'
        )
    except IndexError:
        stop_err(
            'Data error: one or more column data values is missing in "%s"' %
            fname)
    except ValueError:
        stop_err(
            'Data error: invalid data type for one or more values in "%s".' %
            fname)
    return scores_by_chrom
コード例 #3
0
def load_scores_wiggle( fname ):
    """
    Read a wiggle file and return a dict of BinnedArray objects keyed 
    by chromosome.
    """
    scores_by_chrom = dict()
    for chrom, pos, val in bx.wiggle.Reader( misc.open_compressed( fname ) ):
        if chrom not in scores_by_chrom:
            scores_by_chrom[chrom] = BinnedArray()
        scores_by_chrom[chrom][pos] = val
    return scores_by_chrom
コード例 #4
0
def main():
    
    # Parse command line
    options, args = doc_optparse.parse( __doc__ )
    try:
        if options.comp:
            comp_type = options.comp
        else:
            comp_type = None
        score_fname = args[0]
        out_fname = args[1]
    except:
        doc_optparse.exit()

    scores = BinnedArray()

    ## last_chrom = None
    for i, ( chrom, pos, val ) in enumerate( bx.wiggle.Reader( misc.open_compressed( score_fname ) ) ):
        #if last_chrom is None: 
        #    last_chrom = chrom
        #else: 
        #    assert chrom == last_chrom, "This script expects a 'wiggle' input on only one chromosome"
        scores[pos] = val
        # Status
        if i % 10000 == 0: print i, "scores processed"

    out = open( out_fname, "w" )
    if comp_type:
        scores.to_file( out, comp_type=comp_type )
    else:    
        scores.to_file( out )
    out.close()
コード例 #5
0
ファイル: wiggle.py プロジェクト: timothyjamesbecker/FusorSV
 def __init__(self, wigFile):
     '''read wig file, creat wig obj'''
     self.scores = {}
     self.num_re = re.compile(r'[\d\.\-\+]+')
     fh = open(wigFile)
     #infile=open(wigFile,'r')
     for i, (chrom, pos, val) in enumerate(bx.wiggle.Reader(fh)):
         chrom = chrom.upper()
         if not chrom in self.scores: self.scores[chrom] = BinnedArray()
         self.scores[chrom][pos] = val
         if i % 100000 == 0: print "%i datapoints loaded \r" % i
     #print self.scores.keys()
     print "total " + str(i) + " points loaded"
コード例 #6
0
def main():
    options, args = doc_optparse.parse(__doc__)

    scores = {}
    for i, (chrom, pos, val) in enumerate(bx.wiggle.Reader(open(sys.argv[1]))):
        if chrom not in scores:
            scores[chrom] = BinnedArray()
        scores[chrom][pos] = val

        # Status
        if i % 10000 == 0:
            print(i, "scores processed")

    for chr in scores.keys():
        out = open(chr, "w")
        scores[chr].to_file(out)
        out.close()
コード例 #7
0
def read_WIG(wig_file):
	wig = open(wig_file, "r")
	ribo_cov = {}
	for line in wig:
		if line.startswith('variableStep'):  # specific for Shoelaces wig files
			stepType = 'variable'
			fields = line.split()[1:]
			declarations = dict([(p[0], p[1].strip('"')) for p in [x.split("=") for x in fields]])
			chrom = declarations['chrom']
			span = 1
			ribo_cov[chrom] = BinnedArray()
		else:
			tmp = line.strip().split()
			pos = int(tmp[0])
			val = float(tmp[1])
			ribo_cov[chrom][pos] = val
	wig.close()
	return ribo_cov
コード例 #8
0
def main():
    
    # Parse command line
    options, args = doc_optparse.parse( __doc__ )
    try:
        score_fname = args[0]
    except:
        doc_optparse.exit()

    scores = {}
    for i, ( chrom, pos, val ) in enumerate( bx.wiggle.Reader( open(sys.argv[1]) ) ):
        if not chrom in scores: scores[ chrom ] = BinnedArray()
        scores[chrom][pos] = val

        # Status
        if i % 10000 == 0: print i, "scores processed"

    for chr in scores.keys():
        out = open( chr, "w" )
        scores[chr].to_file( out )
        out.close()
コード例 #9
0
#!/usr/bin/env python
from bx.binned_array import BinnedArray
import os

COMPTYPE = 'zlib'
TYPECODE = 'I'
newarray = lambda: BinnedArray(default=0, typecode=TYPECODE)


def open_with_newdir(filename):
    dname = os.path.dirname(filename)
    if not os.path.isdir(dname):
        os.makedirs(dname)
    return open(filename, 'w')


BASESPACE = ['depth', 'A', 'C', 'G', 'T', 'N']
BASESPACE2i = dict((n, i) for i, n in enumerate(BASESPACE))


def process(inpf, prefix):
    arrchrom = None
    arrtype = None
    arr = []

    def flush():
        if arrchrom is None:
            return

        filenamefmt = '%s/%%s/%s.%s' % (prefix, arrchrom[0], arrchrom[1])
        if arrtype != 'M':