Example #1
0
def main_validate(args, stdout, stderr) :
    def mean(x) :
        return sum(x) * 1. / len(x)
    if args.outDir is None :
        args.outDir = "."
    for inputFile in args.input :
        try :
            stderr.write("Processing file " + inputFile + "\n")
            aln = AlignIO.read(inputFile, "fasta")
            if args.N_seqs is None or len(aln) <= args.N_seqs :
                seqScores = pyalign.sequenceConservation(aln)
                seqsKept = [seq for (seq, score) in zip(aln, seqScores) if score >= args.seqcons]
                cleanAln = MultipleSeqAlignment(seqsKept)
                alnCons = mean(pyalign.conservationProfile(cleanAln))
                outFile = os.path.join(args.outDir, inputFile)
                if (len(cleanAln) >= args.n_seqs) and (alnCons >= args.conservation) :
                    with open(outFile, "w") as fo :
                        for seq in cleanAln :
                            fo.write(">" + seq.description + "\n")
                            fo.write(str(seq.seq) + "\n")
                else :
                    if args.outDir == "." :
                        os.remove(inputFile)
            else :
                if args.outDir == "." :
                    os.remove(inputFile)
        except :
            stderr.write("Problem with " + inputFile + "\n")
Example #2
0
def main_consensus(args, stdout, stderr) :
    stderr.write("Note that positions with only gaps are removed before "
                 "calculations\n")
    consensus = dict()
    profiles = dict()
    i = 0
    total = str(len(args.input))
    for fastaFile in args.input :
        i += 1
        stderr.write("Processing file " + str(i) + "/" + total + " ")
        try :
            aln = pyalign.AlignIO.read(fastaFile, "fasta")
            stderr.write("- " + str(len(aln)) + " sequences ")
            stderr.write(".")
            aln = pyalign.ungapAln(aln)["ungappedAln"]
            stderr.write(".")
            k = os.path.basename(fastaFile)
            assert not k in consensus
            consensus[k] = pyalign.makeConsensus(aln)
            stderr.write(".")
            stdout.write(">" + k + "\n")
            stdout.write(consensus[k] + "\n")
            profiles[k] = pyalign.conservationProfile(aln)
            stderr.write(".\n")
        except ValueError :
            msg = "Problem with " + fastaFile + "\n"
            stderr.write(msg)
    keys = list(consensus.keys())
    if args.profiles is not None :
        stderr.write("Writing profiles\n")
        with open(args.profiles, "w") as fo :
            for k in keys :
                fo.write("\t".join([k] + [str(x) for x in profiles[k]]) + "\n")
    if args.conservation is not None :
        stderr.write("Writing conservation values\n")
        def mean(x) :
            return sum(x) * 1. / len(x)
        with open(args.conservation, "w") as fo :
            for k in keys :
                fo.write(k + "\t" + str(mean(profiles[k])) + "\n")