def ReadSeqs3(FileName, OnSeq, ShowProgress=True): File = open(FileName) if ShowProgress: progress.FileInit(File, FileName) Label = "" Seq = "" n = 0 while 1: Line = File.readline() if len(Line) == 0: if Seq != "": n += 1 OnSeq(Label, Seq) if ShowProgress: progress.FileDone() return Line = Line.strip() if len(Line) == 0: continue if Line[0] == ">": if Seq != "": if ShowProgress and n % 100 == 0: progress.FileStep() if TRUNC_LABELS: Label = Label.split()[0] OnSeq(Label, Seq) Label = Line[1:] Seq = "" else: Seq += Line
def ReadSeqs2(FileName, ShowProgress=True): Seqs = [] Labels = [] File = open(FileName) if ShowProgress: progress.FileInit(File, FileName) while 1: if len(Labels) % 100 == 0: progress.FileStep() Line = File.readline() if len(Line) == 0: if ShowProgress: progress.FileDone() return Labels, Seqs Line = Line.strip() if len(Line) == 0: continue if Line[0] == ">": Id = Line[1:] if TRUNC_LABELS: Id = Id.split()[0] Labels.append(Id) Seqs.append("") else: i = len(Seqs) - 1 Seqs[i] = Seqs[i] + Line
PctIdToCount = {} PctIdToLCRCount = {} def Add(LCR, PctId): sortdict.IncCount(LCRToCount, LCR) sortdict.IncCount(PctIdToCount, PctId) sortdict.IncCount2(PctIdToLCRCount, PctId, LCR) MinPctId = 100 File = open(FileName) progress.FileInit(File, FileName) while 1: Line = File.readline() progress.FileStep() if len(Line) == 0: progress.FileDone() break Fields = Line.strip().split('\t') assert len(Fields) == 3 Label1 = Fields[0] Label2 = Fields[1] Dist = float(Fields[2]) assert Dist >= 0.0 and Dist <= 1.0 if Label1 == Label2: continue LCR = utax2.GetLCRFromLabels(Label1, Label2) PctId = int(100.0 * (1.0 - Dist)) if PctId < MinPctId: