def main_consensus(args, stdout, stderr) : stderr.write("Note that positions with only gaps are removed before " "calculations\n") consensus = dict() profiles = dict() i = 0 total = str(len(args.input)) for fastaFile in args.input : i += 1 stderr.write("Processing file " + str(i) + "/" + total + " ") try : aln = pyalign.AlignIO.read(fastaFile, "fasta") stderr.write("- " + str(len(aln)) + " sequences ") stderr.write(".") aln = pyalign.ungapAln(aln)["ungappedAln"] stderr.write(".") k = os.path.basename(fastaFile) assert not k in consensus consensus[k] = pyalign.makeConsensus(aln) stderr.write(".") stdout.write(">" + k + "\n") stdout.write(consensus[k] + "\n") profiles[k] = pyalign.conservationProfile(aln) stderr.write(".\n") except ValueError : msg = "Problem with " + fastaFile + "\n" stderr.write(msg) keys = list(consensus.keys()) if args.profiles is not None : stderr.write("Writing profiles\n") with open(args.profiles, "w") as fo : for k in keys : fo.write("\t".join([k] + [str(x) for x in profiles[k]]) + "\n") if args.conservation is not None : stderr.write("Writing conservation values\n") def mean(x) : return sum(x) * 1. / len(x) with open(args.conservation, "w") as fo : for k in keys : fo.write(k + "\t" + str(mean(profiles[k])) + "\n")
def main_ungap(args, stdout, stderr) : if args.all : assert args.position is None and args.seq is None and args.syncNtDir is None for inputFile in args.input : stderr.write("Processing file " + inputFile + "\n") outFile = os.path.join(args.outDir, inputFile) pyalign.ungapFastaFile(inputFile, outFile) if args.position is not None or args.seq is not None : if args.position is None : args.position = 1 if args.seq is None : args.seq = 0 for inputFile in args.input : stderr.write("Processing file " + inputFile + "\n") if args.outDir is None : outFile = inputFile else : outFile = os.path.join(args.outDir, os.path.basename(inputFile)) try : inputAln = AlignIO.read(inputFile, "fasta") loaded = True except ValueError : loaded = False if args.outDir == "." : os.remove(outFile) if loaded : ungapResult = pyalign.ungapAln(inputAln, args.position, args.seq) outputAln = ungapResult["ungappedAln"] with open(outFile, "w") as fo : for seq in outputAln : fo.write(">" + seq.description + "\n") fo.write(str(seq.seq) + "\n") if args.syncNtDir is not None : ntFile = os.path.join(args.syncNtDir, os.path.basename(inputFile) + ".alnNt") pyalign.ungapNtAlnFile(ntFile, ungapResult["removedSeq"], ungapResult["removedPos"], ntFile)