Example #1
0
def doTheCons(sqs, trh, lengthQuant = 40) :
  als = [(lengthQuant*(len(s)//lengthQuant), s) for s in sqs]
  als = [x[1] for x in sorted(als, reverse=1)]
  scmp = random.sample(sqs, min(5,len(sqs)))
  cbest = (None,100000)
  
  for i in range(3) :
    al = seqMultiAlign(als)
    c = stripseq(cons(calign.createProfile(al)))
    # spot check
    p = mean(calign.allpairs(c, scmp, report = calign.JCcorrection))/2
    if p < cbest[1] :
      cbest = (c, p)
      
    if i == 2 or p < trh :
      break
    if i == 0 :
      # try original order
      als = sqs
    elif i == 1 :
      # try random order
      als = list(sqs)
      random.shuffle(als)

  return cbest[0]
Example #2
0
  def getDist(i,j) :
    mi,mj = mhs[i],mhs[j]
    anyCons = False
    if mi < lowDiversity :
      ri = [getCons(i)]
      anyCons = True
    else :
      ri = getReps(i)
    if mj < lowDiversity :
      rj = [getCons(j)]
      anyCons = True
    else :
      rj = getReps(j)

    nhs = len(ri)*len(rj)
    if nhs == 1 :
      h = calign.globalAlign(ri[0], rj[0], scores = defaultMatchScores,
                             report = calign.JCcorrection)
    else :
      ap = calign.allpairs(ri, rj, align=True, scores = defaultMatchScores,
                           report = calign.JCcorrection)
      h = sum([sum(x) for x in ap])/nhs
      
    global acnt
    acnt += nhs
    
    lowLim = 2*max(mi,mj)
    
    if anyCons and (h < lowLim or (h < refineUpperLimit and h < lowLim*refineFactor)) :
      xri = getReps(i) if len(ri) == 1 else ri
      xrj = getReps(j) if len(rj) == 1 else rj

      if ri != xri or rj != xrj :
        ap1 = calign.allpairs(xri, xrj, align=True, scores = defaultMatchScores,
                              report = calign.JCcorrection)
        h1 = sum([sum(x) for x in ap1])

        xnhs = (len(xri)*len(xrj))
        acnt += xnhs
      
        h = (h * nhs + h1)/(nhs + xnhs)
    return max(h, lowLim)