else: symbolCount[symbol] = 1 if ("-" in symbolCount) and symbolCount["-"] == len(column): columnHomogeneity.append(0) else: upperTerm = 0 lowerTerm = 0 for symbol in symbolCount: if symbol != "-": upperTerm += pow(symbolCount[symbol], 2) lowerTerm += symbolCount[symbol] columnHomogeneity.append(upperTerm / pow(lowerTerm, 2)) allSymbolCounts.append(symbolCount) # for sc,ch in zip(allSymbolCounts,columnHomogeneity): # print(sc, ch) return columnHomogeneity if __name__ == "__main__": filename = "../testfiles/mini-fasta-test.fst" balign = cReadFASTA(filename) calign = purgeColordal(columnifyBasal(balign)) print(calign) columnHomogeneity = computeColumnHomogeneity(calign.columns) print(columnHomogeneity) print(sum(columnHomogeneity) / len(columnHomogeneity))
#columnHomogeneity = columneval.computeColumnHomogeneity(colordalign.columnifyBasal(basalPreIM).columns) #print('Initial CH:', sum(columnHomogeneity)/len(columnHomogeneity)) sio = io.StringIO() #basicalign.printFASTA(basalPreIM, sio) tmpFile = open('/tmp/alintmp.fst','w') basicalign.printFASTA(basalPreIM,tmpFile) tmpFile.close() process = subprocess.Popen(FFTNS, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = process.communicate( bytes(sio.getvalue(), 'utf-8') ) basalPostIM = basicalign.readFASTA( io.StringIO(out.decode()) ) colordal = colordalign.columnifyBasal(basalPostIM) partitions = partitionalign.partitionAlignment(colordal, columneval.computeColumnHomogeneity, 0.85, 10) partitionsPost = [] offset = 0 size = 0; mutable_counter = 0; for part,ann in partitions: offset = offset + size size = len(part) #if ann: if ann and size >= mlp: mutable_counter = mutable_counter + 1 print("Mutable", offset, '-', offset+size-1, '('+str(size)+')', file=sys.stderr) #print('Mutable partition:') basalpartPre = partitionalign.basalifyPartition(part, colordal.names)