Esempio n. 1
0
def main_consensus(args, stdout, stderr) :
    stderr.write("Note that positions with only gaps are removed before "
                 "calculations\n")
    consensus = dict()
    profiles = dict()
    i = 0
    total = str(len(args.input))
    for fastaFile in args.input :
        i += 1
        stderr.write("Processing file " + str(i) + "/" + total + " ")
        try :
            aln = pyalign.AlignIO.read(fastaFile, "fasta")
            stderr.write("- " + str(len(aln)) + " sequences ")
            stderr.write(".")
            aln = pyalign.ungapAln(aln)["ungappedAln"]
            stderr.write(".")
            k = os.path.basename(fastaFile)
            assert not k in consensus
            consensus[k] = pyalign.makeConsensus(aln)
            stderr.write(".")
            stdout.write(">" + k + "\n")
            stdout.write(consensus[k] + "\n")
            profiles[k] = pyalign.conservationProfile(aln)
            stderr.write(".\n")
        except ValueError :
            msg = "Problem with " + fastaFile + "\n"
            stderr.write(msg)
    keys = list(consensus.keys())
    if args.profiles is not None :
        stderr.write("Writing profiles\n")
        with open(args.profiles, "w") as fo :
            for k in keys :
                fo.write("\t".join([k] + [str(x) for x in profiles[k]]) + "\n")
    if args.conservation is not None :
        stderr.write("Writing conservation values\n")
        def mean(x) :
            return sum(x) * 1. / len(x)
        with open(args.conservation, "w") as fo :
            for k in keys :
                fo.write(k + "\t" + str(mean(profiles[k])) + "\n")
Esempio n. 2
0
def main_ungap(args, stdout, stderr) :
    if args.all :
        assert args.position is None and args.seq is None and args.syncNtDir is None
        for inputFile in args.input :
            stderr.write("Processing file " + inputFile + "\n")
            outFile = os.path.join(args.outDir, inputFile)
            pyalign.ungapFastaFile(inputFile, outFile)
    if args.position is not None or args.seq is not None :
        if args.position is None :
            args.position = 1
        if args.seq is None :
            args.seq = 0
        for inputFile in args.input :
            stderr.write("Processing file " + inputFile + "\n")
            if args.outDir is None :
                outFile = inputFile
            else :
                outFile = os.path.join(args.outDir, os.path.basename(inputFile))
            try :
                inputAln = AlignIO.read(inputFile, "fasta")
                loaded = True
            except ValueError :
                loaded = False
                if args.outDir == "." :
                    os.remove(outFile)
            if loaded :
                ungapResult = pyalign.ungapAln(inputAln, args.position, args.seq)
                outputAln = ungapResult["ungappedAln"] 
                with open(outFile, "w") as fo :
                    for seq in outputAln :
                        fo.write(">" + seq.description + "\n")
                        fo.write(str(seq.seq) + "\n")
                if args.syncNtDir is not None :
                    ntFile = os.path.join(args.syncNtDir,
                                          os.path.basename(inputFile) + ".alnNt")
                    pyalign.ungapNtAlnFile(ntFile, ungapResult["removedSeq"],
                                           ungapResult["removedPos"], ntFile)