def doTheCons(sqs, trh, lengthQuant = 40) : als = [(lengthQuant*(len(s)//lengthQuant), s) for s in sqs] als = [x[1] for x in sorted(als, reverse=1)] scmp = random.sample(sqs, min(5,len(sqs))) cbest = (None,100000) for i in range(3) : al = seqMultiAlign(als) c = stripseq(cons(calign.createProfile(al))) # spot check p = mean(calign.allpairs(c, scmp, report = calign.JCcorrection))/2 if p < cbest[1] : cbest = (c, p) if i == 2 or p < trh : break if i == 0 : # try original order als = sqs elif i == 1 : # try random order als = list(sqs) random.shuffle(als) return cbest[0]
def getDist(i,j) : mi,mj = mhs[i],mhs[j] anyCons = False if mi < lowDiversity : ri = [getCons(i)] anyCons = True else : ri = getReps(i) if mj < lowDiversity : rj = [getCons(j)] anyCons = True else : rj = getReps(j) nhs = len(ri)*len(rj) if nhs == 1 : h = calign.globalAlign(ri[0], rj[0], scores = defaultMatchScores, report = calign.JCcorrection) else : ap = calign.allpairs(ri, rj, align=True, scores = defaultMatchScores, report = calign.JCcorrection) h = sum([sum(x) for x in ap])/nhs global acnt acnt += nhs lowLim = 2*max(mi,mj) if anyCons and (h < lowLim or (h < refineUpperLimit and h < lowLim*refineFactor)) : xri = getReps(i) if len(ri) == 1 else ri xrj = getReps(j) if len(rj) == 1 else rj if ri != xri or rj != xrj : ap1 = calign.allpairs(xri, xrj, align=True, scores = defaultMatchScores, report = calign.JCcorrection) h1 = sum([sum(x) for x in ap1]) xnhs = (len(xri)*len(xrj)) acnt += xnhs h = (h * nhs + h1)/(nhs + xnhs) return max(h, lowLim)