def SNP_MIP_Gap(chrome,start,end,min_hom_length,max_hom_length,tm_min,tm_max,gc_threshold_min,gc_threshold_max,ref,alt): returnStr="" ## New code on 1/21/2015 # SNP on the gap fill (use + for gap) # if gap fill is 2 bases, it will do -+, +- # if gap fill is 3 bases, it will do --+, -+-, +-- ## Fetch sequences for n in range(1,gap_num+1): for j in range(0,n): upstream_seq = nibFragger(chrome.replace("chr",""),start-max_hom_length-(n-j-1),max_hom_length).lower() downstream_seq = nibFragger(chrome.replace("chr",""),end+1+j,max_hom_length).lower() upstream_list = get_seq(upstream_seq, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=False) downstream_list = get_seq(downstream_seq, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=True) gapfill_W = nibFragger(chrome.replace("chr",""),start-(n-j-1),n) tmplist= list(gapfill_W) tmplist[n-j-1]=alt gapfill_M = "".join(tmplist) upstream_pos = chrome+":"+str(start-(n-j-1)-upstream_list[0][3])+"-"+str(start-(n-j-1)-1) downstream_pos = chrome+":"+str(end+1+j)+"-"+str(end+1+j+downstream_list[0][3]-1) returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="+",MIP_Mut_Alignment="SNP_on_GF",GapFillBase_M=gapfill_M,GapFillBase_W=gapfill_W) returnStr = returnStr.rstrip("\n") returnStr+="\t"+upstream_pos+"\t"+downstream_pos+"\t"+getName(n,j)+"\n" returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="-",MIP_Mut_Alignment="SNP_on_GF",GapFillBase_M=revcomp(gapfill_M),GapFillBase_W=revcomp(gapfill_W)) returnStr = returnStr.rstrip("\n") returnStr+="\t"+upstream_pos+"\t"+downstream_pos+"\t"+getName(n,j)+"\n" return returnStr
def MNP_MIP(chrome, start, end, min_hom_length, max_hom_length, tm_min, tm_max, gc_threshold_min, gc_threshold_max, ref, alt): returnStr = "" # MNP on the H2 forward and H1 reverse ## Fetch sequences upstream_seq = nibFragger( chrome.replace("chr", ""), end - max_hom_length + 1, max_hom_length).lower() downstream_seq = nibFragger( chrome.replace("chr", ""), end + 2, max_hom_length).lower() gapfill = nibFragger(chrome.replace("chr", ""), end + 1, 1) upstream_seq = replaceString(upstream_seq, alt, first=False) upstream_list = get_seq( upstream_seq, MIN_LENGTH=min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max, right2left=False) downstream_list = get_seq( downstream_seq, MIN_LENGTH=min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max, right2left=True) returnStr += make_Hom_pairs( upstream_list, downstream_list, hom_strand="+", MIP_Mut_Alignment="MNP_on_H2", GapFillBase_M=gapfill, GapFillBase_W=gapfill) returnStr += make_Hom_pairs( upstream_list, downstream_list, hom_strand="-", MIP_Mut_Alignment="MNP_on_H1", GapFillBase_M=revcomp(gapfill), GapFillBase_W=revcomp(gapfill)) ## MNP on the H2 reverse Strand or H1 forward strand upstream_seq = nibFragger( chrome.replace("chr", ""), start - max_hom_length - 1, max_hom_length).lower() downstream_seq = nibFragger( chrome.replace("chr", ""), start, max_hom_length).lower() gapfill = nibFragger(chrome.replace("chr", ""), start - 1, 1) ### replace the first chracter downstream_seq_replaced = replaceString(downstream_seq, alt, first=True) upstream_list = get_seq( upstream_seq, MIN_LENGTH=min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max, right2left=False) downstream_list = get_seq( downstream_seq_replaced, MIN_LENGTH=min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max, right2left=True) returnStr += make_Hom_pairs( upstream_list, downstream_list, hom_strand="-", MIP_Mut_Alignment="MNP_on_H2", GapFillBase_M=revcomp(gapfill), GapFillBase_W=revcomp(gapfill)) returnStr += make_Hom_pairs( upstream_list, downstream_list, hom_strand="+", MIP_Mut_Alignment="MNP_on_H1", GapFillBase_M=gapfill, GapFillBase_W=gapfill) ## Do MNP_on_H2GF for i in range(1, len(ref) + 1): upstream_seq = nibFragger( chrome.replace("chr", ""), start - max_hom_length - 1 + i, max_hom_length).lower() downstream_seq = nibFragger( chrome.replace("chr", ""), start + i, max_hom_length).lower() gapfillM = alt[i - 1] gapfillW = ref[i - 1] upOverlapWithMutation = i - 1 downOverlapWithMutation = len(ref) - i if upOverlapWithMutation > 0: upstream_seq = replaceString( upstream_seq, alt[:i - 1], first=False) if downOverlapWithMutation > 0: downstream_seq = replaceString(downstream_seq, alt[i:], first=True) upstream_list = get_seq( upstream_seq, MIN_LENGTH=min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max, right2left=False) downstream_list = get_seq( downstream_seq, MIN_LENGTH=min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max, right2left=True) MIP_name = "MNP_on_" if upOverlapWithMutation > 0: MIP_name += "H2" MIP_name += "Gap" if downOverlapWithMutation > 0: MIP_name += "H1" returnStr += make_Hom_pairs( upstream_list, downstream_list, hom_strand="+", MIP_Mut_Alignment=MIP_name, GapFillBase_M=gapfillM, GapFillBase_W=gapfillW) MIP_name = "MNP_on_" if downOverlapWithMutation > 0: MIP_name += "H2" MIP_name += "Gap" if upOverlapWithMutation > 0: MIP_name += "H1" returnStr += make_Hom_pairs( upstream_list, downstream_list, hom_strand="-", MIP_Mut_Alignment=MIP_name, GapFillBase_M=revcomp(gapfillM), GapFillBase_W=revcomp(gapfillW)) return returnStr
def SNP_MIP_original(chrome,start,end,min_hom_length,max_hom_length,tm_min,tm_max,gc_threshold_min,gc_threshold_max,ref,alt): returnStr="" # SNP on the gap fill ## Fetch sequences if (re.search(r"[AT]",ref) and re.search(r"[CG]",alt)) or (re.search(r"[AT]",alt) and re.search(r"[CG]",ref)): upstream_seq = nibFragger(chrome.replace("chr",""),start-max_hom_length,max_hom_length).lower() downstream_seq = nibFragger(chrome.replace("chr",""),end+1,max_hom_length).lower() upstream_list = get_seq(upstream_seq, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=False) downstream_list = get_seq(downstream_seq, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=True) returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="+",MIP_Mut_Alignment="SNP_on_GF",GapFillBase_M=alt,GapFillBase_W=ref) returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="-",MIP_Mut_Alignment="SNP_on_GF",GapFillBase_M=revcomp(alt),GapFillBase_W=revcomp(ref)) ## SNP on the H2 forward strand and H1 reverse - Mutation Type upstream_seq = nibFragger(chrome.replace("chr",""),start-max_hom_length+1,max_hom_length).lower() downstream_seq = nibFragger(chrome.replace("chr",""),end+2,max_hom_length).lower() gapfill = nibFragger(chrome.replace("chr",""),end+1,1) ### replace the last chracter upstream_seq_replaced=replaceString(upstream_seq,alt,first=False) upstream_list = get_seq(upstream_seq_replaced, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=False) downstream_list = get_seq(downstream_seq, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=True) returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="+",MIP_Mut_Alignment="SNP_on_H2_M",GapFillBase_M=gapfill,GapFillBase_W=gapfill) returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="-",MIP_Mut_Alignment="SNP_on_H1_M",GapFillBase_M=revcomp(gapfill),GapFillBase_W=revcomp(gapfill)) ## SNP on the H2 forward strand and H1 reverse - Wild Type upstream_seq = nibFragger(chrome.replace("chr",""),start-max_hom_length+1,max_hom_length).lower() downstream_seq = nibFragger(chrome.replace("chr",""),end+2,max_hom_length).lower() gapfill = nibFragger(chrome.replace("chr",""),end+1,1) ### replace the last chracter upstream_seq_replaced=replaceString(upstream_seq,ref,first=False) upstream_list = get_seq(upstream_seq_replaced, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=False) downstream_list = get_seq(downstream_seq, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=True) returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="+",MIP_Mut_Alignment="SNP_on_H2_W",GapFillBase_M=gapfill,GapFillBase_W=gapfill) returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="-",MIP_Mut_Alignment="SNP_on_H1_W",GapFillBase_M=revcomp(gapfill),GapFillBase_W=revcomp(gapfill)) ## SNP on the H2 reverse Strand or H1 forward strand - Mutation Type upstream_seq = nibFragger(chrome.replace("chr",""),start-max_hom_length-1,max_hom_length).lower() downstream_seq = nibFragger(chrome.replace("chr",""),end,max_hom_length).lower() Mgapfill = nibFragger(chrome.replace("chr",""),end-1,1) ### replace the first chracter downstream_seq_replaced=replaceString(downstream_seq,alt,first=True) upstream_list = get_seq(upstream_seq, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=False) downstream_list = get_seq(downstream_seq_replaced, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=True) returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="-",MIP_Mut_Alignment="SNP_on_H2_M",GapFillBase_M=revcomp(gapfill),GapFillBase_W=revcomp(gapfill)) returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="+",MIP_Mut_Alignment="SNP_on_H1_M",GapFillBase_M=gapfill,GapFillBase_W=gapfill) ## SNP on the H2 reverse Strand or H1 forward strand - Wild Type upstream_seq = nibFragger(chrome.replace("chr",""),start-max_hom_length-1,max_hom_length).lower() downstream_seq = nibFragger(chrome.replace("chr",""),end,max_hom_length).lower() Mgapfill = nibFragger(chrome.replace("chr",""),end-1,1) ### replace the first chracter downstream_seq_replaced=replaceString(downstream_seq,ref,first=True) upstream_list = get_seq(upstream_seq, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=False) downstream_list = get_seq(downstream_seq_replaced, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=True) returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="-",MIP_Mut_Alignment="SNP_on_H2_W",GapFillBase_M=revcomp(gapfill),GapFillBase_W=revcomp(gapfill)) returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="+",MIP_Mut_Alignment="SNP_on_H1_W",GapFillBase_M=gapfill,GapFillBase_W=gapfill) return returnStr