def p_g_1D(k,g,l): if k > l: print >> sys.stderr, "Warning: there are not %s tbs in the window of size %s" % (k,l) return None w = (k-1)*g+k num = max(0,l-w+1)*math.pow((g+1),k-1)+d_0(k,g,min(l,w-1)) den = combinations(l,k) return float(num)/den
def p_d(k,la,lb,nab,na,nb): if nab > min(na,nb): print >> sys.stderr, "Warning: during the computation of p_d(k=%s,la=%s,lb=%s,nab=%s,na=%s,nb=%s), there was too many dispersed paralogies in the MHP, thus for the calculation we chose nab=min(na,nb)=%s" % (k,la,lb,nab,na,nb,min(na,nb)) nab = min(na,nb) elif k > nab: print >> sys.stderr, "Warning: not able to compute p_d(k=%s,la=%s,lb=%s,nab=%s,na=%s,nb=%s) because there are not %s hps in the MHP" % (k,la,lb,nab,na,nb,k) return None elif k > min([la, lb]): print >> sys.stderr, "Warning: not able to compute p_d(k=%s,la=%s,lb=%s,nab=%s,na=%s,nb=%s) because there are too many dispersed paralogies in the window" % (k,la,lb,nab,na,nb) return None limSum = min(la-k,nab-k) sum_ = 0 for i in range(0,limSum+1): try: foo = combinations(nab-k,i)*combinations(na-nab,la-(k+i))*combinations(nb-(k+i),lb-k) except Exception as e: print >> sys.stderr, "Warning: not able to compute p_d(k=%s,la=%s,lb=%s,nab=%s,na=%s,nb=%s) because %s" % (k,la,lb,nab,na,nb,e) foo = 0 sum_ += foo num = combinations(nab,k)*sum_ den = combinations(na,la)*combinations(nb,lb) if den == float('inf') and num != float('inf'): return 0.0 elif den == float('inf') and num == float('inf'): return None elif den == 0.0 and num != float('inf'): return float('inf') elif den == 0.0 and num == float('inf'): return None else : return float(num)/den
num = combinations(nab,k)*sum_ den = combinations(na,la)*combinations(nb,lb) if den == float('inf') and num != float('inf'): return 0.0 elif den == float('inf') and num == float('inf'): return None elif den == 0.0 and num != float('inf'): return float('inf') elif den == 0.0 and num == float('inf'): return None else : return float(num)/den d_0 = lambda k,g,l : sum([((-1)**i)*combinations(k-1,i)*combinations(l-i*(g+1),k) for i in range(0,int(math.floor(float(l-k)/float(g+1))+1))]) # probability that k marked tbs form a maxgap(g)-cluster in a sequence of l tbs def p_g_1D(k,g,l): if k > l: print >> sys.stderr, "Warning: there are not %s tbs in the window of size %s" % (k,l) return None w = (k-1)*g+k num = max(0,l-w+1)*math.pow((g+1),k-1)+d_0(k,g,min(l,w-1)) den = combinations(l,k) return float(num)/den # probability that k marked hps form a maxgap(g)-cluster in both sequences of lengths la and lb def p_g_2D(k,g,la,lb): if k > min(la,lb): print >> sys.stderr, "Warning: there are not %s tbs in the window of size %sx%s" % (k,la,lb)