def __init__(self, line, percent_slop=0, fixed_slop=0): ''' Initialize with slop for probabilities ''' self.l = line (self.sv_type, chr_l, chr_r, self.strands, start_l, end_l, start_r, end_r, m) = l_bp.split_v(line) try: self.left = BreakpointInterval(chr_l, start_l, end_l, self.floats_from_tag(m, 'PRPOS')) self.right = BreakpointInterval(chr_r, start_r, end_r, self.floats_from_tag(m, 'PREND')) except RuntimeError as e: raise MissingProbabilitiesException(str(e)) if ((percent_slop > 0) or (fixed_slop > 0)): self.left.pad_slop(percent_slop, fixed_slop) self.right.pad_slop(percent_slop, fixed_slop)
def print_var_line(l): A = l.rstrip().split('\t') if A[4] not in ['<DEL>', '<DUP>', '<INV>']: [sv_type, chr_l, chr_r, start_l, end_l, start_r, end_r, m] = l_bp.split_v(l) CHROM = chr_r POS = m['END'] ID = A[2] + '_2' REF = 'N' ALT = '' if ']' in A[4]: ALT = '[' + chr_l + ':' + A[1] + '[N' else: ALT = 'N]' + chr_l + ':' + A[1] + ']' QUAL = A[5] FILTER = '.' SVTYPE = 'BND' STRANDS = m['STRANDS'] SVLEN = '0' CIPOS = m['CIEND'] CIEND = m['CIPOS'] CIPOS95 = m['CIEND95'] CIEND95 = m['CIPOS95'] IMPRECISE = 'IMPRECISE' SU = m['SU'] PE = m['PE'] SR = m['SR'] PRPOS = m['PREND'] PREND = m['PRPOS'] SNAME = m['SNAME'] EVENT = m['EVENT'] SECONDARY = 'SECONDARY' MATEID = A[2] + '_1' INFO = ';'.join([ 'SVTYPE=' + str(SVTYPE), 'STRANDS=' + str(STRANDS), 'SVLEN=' + str(SVLEN), 'CIPOS=' + str(CIPOS), 'CIEND=' + str(CIEND), 'CIPOS95=' + str(CIPOS95), 'CIEND95=' + str(CIEND95), str(IMPRECISE), str(SECONDARY), 'SU=' + str(SU), 'PE=' + str(PE), 'SR=' + str(SR), 'PRPOS=' + str(PRPOS), 'PREND=' + str(PREND), 'SNAME=' + str(SNAME), 'EVENT=' + str(EVENT), 'MATEID=' + str(MATEID) ]) O = [CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO] A[7] += ';MATEID=' + A[2] + '_2' A[2] += '_1' print '\t'.join(A[:8]) print '\t'.join([str(o) for o in O]) else: print l
def __init__(self, line, percent_slop=0, fixed_slop=0): ''' Initialize with slop for probabilities ''' self.l = line (self.sv_type, self.chr_l, self.chr_r, self.strands, self.start_l, self.end_l, self.start_r, self.end_r, m) = l_bp.split_v(line) # TODO Handle missing PRPOS and PREND with intelligent message. Pull out into method. self.p_l = [float(x) for x in m['PRPOS'].split(',')] self.p_r = [float(x) for x in m['PREND'].split(',')] slop_prob = 1e-100 # FIXME This is a constant. Pull out to make more obvious if ((percent_slop > 0) or (fixed_slop > 0)): l_slop = int(max(percent_slop * (self.end_l - self.start_l + 1), fixed_slop)) r_slop = int(max(percent_slop * (self.end_r - self.start_r + 1), fixed_slop)) # pad each interval with slop_prob on each side. TODO This should be a method self.start_l = self.start_l - l_slop self.end_l = self.end_l + l_slop new_p_l = [slop_prob] * l_slop + self.p_l + [slop_prob] * l_slop self.start_r = self.start_r - r_slop self.end_r = self.end_r + r_slop new_p_r = [slop_prob] * r_slop + self.p_r + [slop_prob] * r_slop # chew off overhang if self.start_l or self.start_r less than 0 TODO This should also be a method if self.start_l < 0: new_p_l = new_p_l[-self.start_l:] self.start_l = 0 if self.start_r < 0: new_p_r = new_p_r[-self.start_r:] self.start_r = 0 # normalize so each probability curve sums to 1. TODO Should be a method sum_p_l = sum(new_p_l) self.p_l = [float(x)/sum_p_l for x in new_p_l] sum_p_r = sum(new_p_r) self.p_r = [float(x)/sum_p_r for x in new_p_r]
def __init__(self, line, percent_slop=0, fixed_slop=0): ''' Initialize with slop for probabilities ''' self.l = line (self.sv_type, self.chr_l, self.chr_r, self.strands, self.start_l, self.end_l, self.start_r, self.end_r, m) = l_bp.split_v(line) # TODO Handle missing PRPOS and PREND with intelligent message. Pull out into method. self.p_l = [float(x) for x in m['PRPOS'].split(',')] self.p_r = [float(x) for x in m['PREND'].split(',')] slop_prob = 1e-100 # FIXME This is a constant. Pull out to make more obvious if ((percent_slop > 0) or (fixed_slop > 0)): l_slop = int( max(percent_slop * (self.end_l - self.start_l + 1), fixed_slop)) r_slop = int( max(percent_slop * (self.end_r - self.start_r + 1), fixed_slop)) # pad each interval with slop_prob on each side. TODO This should be a method self.start_l = self.start_l - l_slop self.end_l = self.end_l + l_slop new_p_l = [slop_prob] * l_slop + self.p_l + [slop_prob] * l_slop self.start_r = self.start_r - r_slop self.end_r = self.end_r + r_slop new_p_r = [slop_prob] * r_slop + self.p_r + [slop_prob] * r_slop # chew off overhang if self.start_l or self.start_r less than 0 TODO This should also be a method if self.start_l < 0: new_p_l = new_p_l[-self.start_l:] self.start_l = 0 if self.start_r < 0: new_p_r = new_p_r[-self.start_r:] self.start_r = 0 # normalize so each probability curve sums to 1. TODO Should be a method sum_p_l = sum(new_p_l) self.p_l = [float(x) / sum_p_l for x in new_p_l] sum_p_r = sum(new_p_r) self.p_r = [float(x) / sum_p_r for x in new_p_r]
def print_var_line(l): A = l.rstrip().split('\t') if A[4] == '<INV>' and ('--:0' in A[7] or '++:0' in A[7]): [sv_type,chr_l,chr_r,strands,start_l,end_l,start_r,end_r,m] = \ l_bp.split_v(l) STRAND_DICT = dict(x.split(':') for x in m['STRANDS'].split(',')) for o in STRAND_DICT.keys(): if STRAND_DICT[o] == '0': del(STRAND_DICT[o]) STRANDS = ','.join(['%s:%s' % (o,STRAND_DICT[o]) for o in STRAND_DICT]) if STRANDS[:2] == '++': ALT = 'N]' + chr_l + ':' + m['END'] + ']' elif STRANDS[:2] == '--': ALT = '[' + chr_l + ':' + m['END'] + '[N' SVTYPE = 'BND' CIPOS = m['CIEND'] CIEND = m['CIPOS'] CIPOS95 = m['CIEND95'] CIEND95 = m['CIPOS95'] IMPRECISE = 'IMPRECISE' SU = m['SU'] PE = m['PE'] SR = m['SR'] PRPOS = m['PREND'] PREND = m['PRPOS'] SNAME = m['SNAME'] EVENT = A[2] A[4] = ALT A[7] = ';'.join(['SVTYPE=' + str(SVTYPE), 'STRANDS=' + str(STRANDS), 'CIPOS=' + str(CIPOS), 'CIEND=' + str(CIEND), 'CIPOS95=' + str(CIPOS95), 'CIEND95=' + str(CIEND95), str(IMPRECISE), 'SU=' + str(SU), 'PE=' + str(PE), 'SR=' + str(SR), 'PRPOS=' + str(PRPOS), 'PREND=' + str(PREND), 'SNAME=' + str(SNAME), 'EVENT=' + str(EVENT)]) # reconstruct the line l = '\t'.join(A) if A[4] not in ['<DEL>', '<DUP>', '<INV>']: [sv_type,chr_l,chr_r,strands,start_l,end_l,start_r,end_r,m] = \ l_bp.split_v(l) CHROM = chr_r POS = m['END'] ID = A[2] + '_2' REF = 'N' ALT = '' if A[4][0] == '[': ALT = '[' + chr_l + ':' + A[1] + '[N' elif A[4][0] == ']': ALT = 'N[' + chr_l + ':' + A[1] + '[' elif A[4][-1] == '[': ALT = ']' + chr_l + ':' + A[1] + ']N' elif A[4][-1] == ']': ALT = 'N]' + chr_l + ':' + A[1] + ']' QUAL = A[5] FILTER = '.' SVTYPE = 'BND' STRANDS = m['STRANDS'] CIPOS = m['CIEND'] CIEND = m['CIPOS'] CIPOS95 = m['CIEND95'] CIEND95 = m['CIPOS95'] IMPRECISE = 'IMPRECISE' SU = m['SU'] PE = m['PE'] SR = m['SR'] PRPOS = m['PREND'] PREND = m['PRPOS'] SNAME = m['SNAME'] EVENT = A[2] SECONDARY = 'SECONDARY' MATEID=A[2] + '_1' INFO = ';'.join(['SVTYPE=' + str(SVTYPE), 'STRANDS=' + str(STRANDS), 'CIPOS=' + str(CIPOS), 'CIEND=' + str(CIEND), 'CIPOS95=' + str(CIPOS95), 'CIEND95=' + str(CIEND95), str(IMPRECISE), str(SECONDARY), 'SU=' + str(SU), 'PE=' + str(PE), 'SR=' + str(SR), 'PRPOS=' + str(PRPOS), 'PREND=' + str(PREND), 'SNAME=' + str(SNAME), 'EVENT=' + str(EVENT), 'MATEID=' + str(MATEID)]) O = [CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO] A[7] += ';MATEID=' + A[2] + '_2' A[2] += '_1' print '\t'.join(A[:8]) print '\t'.join([str(o) for o in O]) else: print '\t'.join(A[:8])
def print_var_line(l): A = l.rstrip().split('\t') if A[4] not in ['<DEL>', '<DUP>', '<INV>']: [sv_type,chr_l,chr_r,start_l,end_l,start_r,end_r,m] = l_bp.split_v(l) CHROM = chr_r POS = m['END'] ID = A[2] + '_2' REF = 'N' ALT = '' if ']' in A[4]: ALT = '[' + chr_l + ':' + A[1] + '[N' else: ALT = 'N]' + chr_l + ':' + A[1] + ']' QUAL = A[5] FILTER = '.' SVTYPE = 'BND' STRANDS = m['STRANDS'] SVLEN = '0' CIPOS = m['CIEND'] CIEND = m['CIPOS'] CIPOS95 = m['CIEND95'] CIEND95 = m['CIPOS95'] IMPRECISE = 'IMPRECISE' SU = m['SU'] PE = m['PE'] SR = m['SR'] PRPOS = m['PREND'] PREND = m['PRPOS'] SNAME = m['SNAME'] EVENT = m['EVENT'] SECONDARY = 'SECONDARY' MATEID=A[2] + '_1' INFO = ';'.join(['SVTYPE=' + str(SVTYPE), 'STRANDS=' + str(STRANDS), 'SVLEN=' + str(SVLEN), 'CIPOS=' + str(CIPOS), 'CIEND=' + str(CIEND), 'CIPOS95=' + str(CIPOS95), 'CIEND95=' + str(CIEND95), str(IMPRECISE), str(SECONDARY), 'SU=' + str(SU), 'PE=' + str(PE), 'SR=' + str(SR), 'PRPOS=' + str(PRPOS), 'PREND=' + str(PREND), 'SNAME=' + str(SNAME), 'EVENT=' + str(EVENT), 'MATEID=' + str(MATEID)]) O = [CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO] A[7] += ';MATEID=' + A[2] + '_2' A[2] += '_1' print '\t'.join(A[:8]) print '\t'.join([str(o) for o in O]) else: print l
def print_var_line(l): A = l.rstrip().split("\t") if A[4] == "<INV>" and ("--:0" in A[7] or "++:0" in A[7]): [sv_type, chr_l, chr_r, strands, start_l, end_l, start_r, end_r, m] = l_bp.split_v(l) STRAND_DICT = dict(x.split(":") for x in m["STRANDS"].split(",")) for o in STRAND_DICT.keys(): if STRAND_DICT[o] == "0": del (STRAND_DICT[o]) STRANDS = ",".join(["%s:%s" % (o, STRAND_DICT[o]) for o in STRAND_DICT]) if STRANDS[:2] == "++": ALT = "N]" + chr_l + ":" + m["END"] + "]" elif STRANDS[:2] == "--": ALT = "[" + chr_l + ":" + m["END"] + "[N" SVTYPE = "BND" CIPOS = m["CIEND"] CIEND = m["CIPOS"] CIPOS95 = m["CIEND95"] CIEND95 = m["CIPOS95"] IMPRECISE = "IMPRECISE" SU = m["SU"] PE = m["PE"] SR = m["SR"] PRPOS = m["PREND"] PREND = m["PRPOS"] SNAME = m["SNAME"] EVENT = A[2] A[4] = ALT A[7] = ";".join( [ "SVTYPE=" + str(SVTYPE), "STRANDS=" + str(STRANDS), "CIPOS=" + str(CIPOS), "CIEND=" + str(CIEND), "CIPOS95=" + str(CIPOS95), "CIEND95=" + str(CIEND95), str(IMPRECISE), "SU=" + str(SU), "PE=" + str(PE), "SR=" + str(SR), "PRPOS=" + str(PRPOS), "PREND=" + str(PREND), "SNAME=" + str(SNAME), "EVENT=" + str(EVENT), ] ) # reconstruct the line l = "\t".join(A) if A[4] not in ["<DEL>", "<DUP>", "<INV>"]: [sv_type, chr_l, chr_r, strands, start_l, end_l, start_r, end_r, m] = l_bp.split_v(l) CHROM = chr_r POS = m["END"] ID = A[2] + "_2" REF = "N" ALT = "" if A[4][0] == "[": ALT = "[" + chr_l + ":" + A[1] + "[N" elif A[4][0] == "]": ALT = "N[" + chr_l + ":" + A[1] + "[" elif A[4][-1] == "[": ALT = "]" + chr_l + ":" + A[1] + "]N" elif A[4][-1] == "]": ALT = "N]" + chr_l + ":" + A[1] + "]" QUAL = A[5] FILTER = "." SVTYPE = "BND" STRANDS = m["STRANDS"] CIPOS = m["CIEND"] CIEND = m["CIPOS"] CIPOS95 = m["CIEND95"] CIEND95 = m["CIPOS95"] IMPRECISE = "IMPRECISE" SU = m["SU"] PE = m["PE"] SR = m["SR"] PRPOS = m["PREND"] PREND = m["PRPOS"] SNAME = m["SNAME"] EVENT = A[2] SECONDARY = "SECONDARY" MATEID = A[2] + "_1" INFO = ";".join( [ "SVTYPE=" + str(SVTYPE), "STRANDS=" + str(STRANDS), "CIPOS=" + str(CIPOS), "CIEND=" + str(CIEND), "CIPOS95=" + str(CIPOS95), "CIEND95=" + str(CIEND95), str(IMPRECISE), str(SECONDARY), "SU=" + str(SU), "PE=" + str(PE), "SR=" + str(SR), "PRPOS=" + str(PRPOS), "PREND=" + str(PREND), "SNAME=" + str(SNAME), "EVENT=" + str(EVENT), "MATEID=" + str(MATEID), ] ) O = [CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO] A[7] += ";MATEID=" + A[2] + "_2" A[2] += "_1" print "\t".join(A[:8]) print "\t".join([str(o) for o in O]) else: print "\t".join(A[:8])