Python Genomics.CalculateCodonFrequenciesFromCounts Beispiele

Programmiersprache: Python

Namespace / Paketname: CGAT

Klasse / Typ: Genomics

Methode / Funktion: CalculateCodonFrequenciesFromCounts

Beispiele auf hotexamples.com: 4

Python Genomics.CalculateCodonFrequenciesFromCounts - 4 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die CGAT.Genomics.CalculateCodonFrequenciesFromCounts, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

IsPositiveStrand(13)

IsNegativeStrand(10)

MapCodon2AA(10)

IsStopCodon(7)

CalculatePairIndices(7)

Alignment2PeptideAlignment(6)

GetHID(6)

ReadGenomicSequences(5)

CalculateCodonFrequenciesFromCounts(4)

CountGeneFeatures(4)

GetDegeneracy(4)

Alignment2ExonBoundaries(3)

GetUniformCodonUsage(3)

ReadContigSizes(3)

ParseFasta2Hash(2)

Protein2Wobble(2)

MaskStopCodons(2)

Alignment2CDNA(2)

CountCodons(2)

Alignment2String(2)

GetIntronType(2)

GetMapAA2Codons(1)

GetGenomicSequence(1)

GetDegenerateSites(1)

MapSequences(1)

CalculateCAIWeightsFromCounts(1)

ParseFasta2HashFromIndex(1)

AlignmentProtein2CDNA(1)

ReadClusters(1)

Beispiel #1

Datei anzeigen

 def getKL(self, usage):
     """return Kullback-Leibler Divergence (relative entropy) of sequences with
     respect to reference codon usage.
     """
     e = 0
     freqs = Genomics.CalculateCodonFrequenciesFromCounts(
         self.mCodonCounts, self.mPseudoCounts)
     for codon, count in list(self.mCodonCounts.items()):
         e += usage[codon] * math.log(usage[codon] / freqs[codon])
     return e

Beispiel #2

Datei anzeigen

    def getEntropy(self, usage=None):
        """return entropy of a source in terms of a reference usage.
        Also called conditional entropy or encoding cost.

        Note that here I compute the sum over 20 entropies,
        one for each amino acid.

        If not given, calculate entropy.
        """

        e = 0
        freqs = Genomics.CalculateCodonFrequenciesFromCounts(
            self.mCodonCounts, self.mPseudoCounts)
        if usage is None:
            usage = freqs
        for codon, count in list(self.mCodonCounts.items()):
            e -= freqs[codon] * math.log(usage[codon])
        return e

Beispiel #3

Datei anzeigen

    def updateProperties(self):

        SequencePropertiesCodons.updateProperties(self)

        self.mCodonFrequencies = Genomics.CalculateCodonFrequenciesFromCounts(
            self.mCodonCounts)

Beispiel #4

Datei anzeigen

Datei: codonbias_acai2tsv.py Projekt: lesheng/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: codonbias_acai2tsv.py 865 2007-01-15 13:44:43Z andreas $"
    )

    parser.add_option("-o",
                      "--input-file-trace",
                      dest="input_filename_trace",
                      type="string",
                      help="input filename for cai.",
                      metavar="FILE")

    parser.add_option("-e",
                      "--input-file-genes",
                      dest="input_filename_genes",
                      type="string",
                      help="input filename for genes information from cai.",
                      metavar="FILE")

    parser.add_option("-c",
                      "--input-file-codons",
                      dest="input_filename_codons",
                      type="string",
                      help="input filename for codon usage information.",
                      metavar="FILE")

    parser.add_option("--input-file-sequences",
                      dest="input_filename_sequences",
                      type="string",
                      help="input filename with sequences.",
                      metavar="FILE")

    parser.add_option("-t",
                      "--input-file-subset",
                      dest="input_filename_subset",
                      type="string",
                      help="input filename with subset.",
                      metavar="FILE")

    parser.add_option("--codon-table-format",
                      dest="codon_table_format",
                      type="choice",
                      choices=("list", "matrix"),
                      help="output options for output codon tables.")

    parser.add_option("--codon-table-type",
                      dest="codon_table_type",
                      type="choice",
                      choices=("counts", "frequencies", "weights",
                               "absolute-frequencies"),
                      help="type of codon table.")

    parser.add_option("-r",
                      "--reference",
                      dest="reference",
                      type="string",
                      help="dump CAI reference weights for species.")

    parser.add_option("-s",
                      "--select",
                      dest="select",
                      type="string",
                      help="fields to select from genes table.")

    parser.add_option("-m",
                      "--map",
                      dest="input_filename_map",
                      type="string",
                      help="filename with mapping information for gene names.",
                      metavar="FILE")

    parser.add_option("-i",
                      "--invert-map",
                      dest="invert_map",
                      action="store_true",
                      help="invert map.")

    parser.add_option(
        "-d",
        "--dominant-set",
        dest="dominant_set",
        type="float",
        help="only print out dominant set (# fraction of most biased genes).")

    parser.add_option(
        "--reverse-set",
        dest="reverse_set",
        action="store_true",
        help="print the reverse set, i.e., then non-dominant set.")

    parser.add_option(
        "-u",
        "--codon-usage",
        dest="codon_usage",
        type="string",
        help="print codon usage for the full/biased set of genes [full|biased]."
    )

    parser.add_option(
        "-w",
        "--weights",
        dest="weights",
        type="string",
        help=
        "print weights [final-list|final-matrix|random|compute|weights|frequencies|absolute-frequencies]."
    )

    parser.add_option("--weights-matrix2table",
                      dest="weights_matrix2table",
                      action="store_true",
                      help="convert a weights matrix to a weights table.")

    parser.add_option("--get-preferred-codons",
                      dest="get_preferred_codons",
                      type="string",
                      help="compute overview of preferred codons.")

    parser.set_defaults(input_filename="-",
                        input_filename_trace=None,
                        input_filename_genes=None,
                        input_filename_codons=None,
                        input_filename_map=None,
                        input_filename_subset=None,
                        input_filename_sequences=None,
                        invert_map=False,
                        select=None,
                        codon_usage=None,
                        weights=None,
                        revserse_set=False,
                        pseudocounts=1,
                        codon_table_format="list",
                        codon_table_type="weights",
                        weights_matrix2table=False,
                        random_size=1000,
                        get_preferred_codons=None,
                        dominant_set=0.0)

    (options, args) = E.Start(parser)
    if options.select:
        options.select = options.select.split(",")

    outfile = options.stdout

    ###################################################################
    # convert weights table to a codon table
    if options.weights_matrix2table:
        lines = options.stdin.readlines()
        data = []
        for line in lines:
            if line[0] == "#":
                continue
            data += list(map(float, line[:-1].split(",")))

        weights = {}
        x = 0
        for cc in OUTPUT_ORDER_CODON_MATRIX:
            for c in cc:
                weights[c] = data[x]
                x += 1

        outfile.write("CODON\tWEIGHT\n")
        codons = weights.keys()
        codons.sort()
        for codon in codons:
            outfile.write("%s\t%f\n" % (codon, weights[codon]))

        E.Stop()
        sys.exit(1)

    ###################################################################
    map_genes = {}

    if options.input_filename_map:
        data = map(
            lambda x: x[:-1].split("\t")[:2],
            filter(lambda x: x[0] != "#",
                   open(options.input_filename_map, "r").readlines()))

        for a, b in data:
            if options.invert_map:
                a, b = b, a
            map_genes[a] = b

    result = WrapperAdaptiveCAI.AdaptiveCAIResult()

    if options.input_filename_genes:
        gene_file = open(options.input_filename_genes, "r")
    else:
        gene_file = None

    if options.input_filename_codons:
        codon_file = open(options.input_filename_codons, "r")
    else:
        codon_file = None

    if options.input_filename_trace:
        trace_file = open(options.input_filename_trace, "r")
    else:
        trace_file = None

    if options.input_filename_subset:
        l, e = IOTools.ReadList(open(options.input_filename_subset, "r"))
        subset = set(l)
        if options.loglevel >= 1:
            options.stdlog.write("# read %i entries into subset from %s.\n" %
                                 (len(subset), options.input_filename_subset))
    else:
        subset = None

    result.Read(gene_file=gene_file,
                codon_file=codon_file,
                trace_file=trace_file)

    if gene_file:
        gene_file.close()
    if codon_file:
        codon_file.close()
    if trace_file:
        trace_file.close()

    if options.reference:
        if options.reference not in CODON_PREFERENCES:
            raise "unknown species %s: possibles species are: %s" % (
                options.reference, str(CODON_PREFERNCES.keys()))

        weights = Genomics.CalculateCAIWeightsFromCounts(
            CODON_PREFERENCES[options.reference], options.pseudocounts)

        for x in range(len(OUTPUT_ORDER_CODON_MATRIX)):
            outfile.write(",".join(
                map(lambda z: "%5.3f" % z, [
                    weights[codon.upper()]
                    for codon in OUTPUT_ORDER_CODON_MATRIX[x]
                ])))
            outfile.write("\n")

    if options.dominant_set and gene_file:
        cai_threshold = result.GetDominantThreshold(options.dominant_set)
    else:
        if options.reverse_set:
            cai_threshold = 1.0
        else:
            cai_threshold = 0.0

    if options.select:

        fields = []
        titles = []
        for x in options.select:
            f = re.match("(\S+) (AS|as) (\S+)", x)
            if f:
                fields.append(f.groups()[0].upper())
                titles.append(f.groups()[2])
            else:
                fields.append(x.upper())
                titles.append(x)

        outfile.write("GENENAME\t" + string.join(titles, "\t") + "\n")

        for genename, data in result.mGeneInfo.items():
            if genename in map_genes:
                genename = map_genes[genename]

            if options.reverse_set:
                if data["CAICLASS"] >= cai_threshold:
                    continue
            else:
                if data["CAICLASS"] < cai_threshold:
                    continue

            outfile.write(genename)
            for c in fields:
                outfile.write("\t%s" % str(data[c]))
            outfile.write("\n")

    if options.weights:

        format = options.codon_table_format

        if options.weights in ("compute-counts", "compute-weights",
                               "compute-frequencies"):
            # compute codon usage weights from a set of sequences
            codons = CODON_PREFERENCES["dmelanogaster"].keys()
            counts = {}
            for x in codons:
                counts[x] = 0

            if options.input_filename_sequences:
                sequences = Genomics.ReadPeptideSequences(open(
                    options.input_filename_sequences, "r"),
                                                          filter=subset)
                for key, sequence in sequences.items():
                    sequence = re.sub(" ", "", sequence)
                    if len(sequence) % 3 != 0:
                        raise "warning: sequence %s is not multiple of 3" % key
                    for codon in [
                            sequence[x:x + 3]
                            for x in range(0, len(sequence), 3)
                    ]:
                        counts[codon.upper()] += 1

            if options.weights == "compute-frequencies":
                weights = Genomics.CalculateCodonFrequenciesFromCounts(
                    counts, options.pseudocounts)
            elif options.weights == "compute-weights":
                weights = Genomics.CalculateCAIWeightsFromCounts(
                    counts, options.pseudocounts)
            else:
                weights = counts

        elif options.weights in ("final-list", "final-matrix"):

            weights = result.mFinalWeights
            if options.weights == "final-list":
                format = "list"
            else:
                format = "matrix"

        elif options.weights == "random":
            # get random weights
            codons = CODON_PREFERENCES["dmelanogaster"].keys()
            counts = {}
            for x in codons:
                counts[x] = random.randint(1, options.random_size)

            weights = Genomics.CalculateCAIWeightsFromCounts(
                counts, options.pseudocounts)
            format = "matrix"

        elif options.weights == "biased":
            # get biased weights
            codons = Genomics.GetUniformCodonUsage()

            weights = Genomics.CalculateCAIWeightsFromCounts(
                counts, options.pseudocounts)
            format = "matrix"

        elif options.weights in ("uniform-weights", "uniform-frequencies"):
            # get uniform weights
            codons = Genomics.GetUniformCodonUsage()

            if options.weights == ("uniform-weights"):
                weights = Genomics.CalculateCAIWeightsFromCounts(
                    counts, options.pseudocounts)
                format = "matrix"
            else:
                weights = codons
                format = "list"

        elif options.weights in ("counts", "frequencies",
                                 "absolute-frequencies"):
            # get weights as frequencies
            # compute from scratch. In the caijava file, the absolute frequencey f / gene_length is
            # given. Thus the total number of codons is f * gene_length.
            codons = CODON_PREFERENCES["dmelanogaster"].keys()
            counts = {}
            for c in codons:
                counts[c] = 0

            for genename, data in result.mGeneInfo.items():

                if options.reverse_set:
                    if data["CAICLASS"] >= cai_threshold:
                        continue
                else:
                    if data["CAICLASS"] < cai_threshold:
                        continue

                l = data["GENELENGTH"]
                for c in codons:
                    counts[c] += int(data[c] * l)

            if options.weights == "frequencies":
                weights = Genomics.CalculateCodonFrequenciesFromCounts(
                    counts, options.pseudocounts)
            elif options.weights == "counts":
                weights = counts
            elif options.weights == "absolute-frequencies":
                # compute absolute frequencies (with pseudo-counts, but do not
                # normalize per aa)
                weights = {}
                m = sum(counts.values())
                for k, v in counts.items():
                    weights[k] = float(v) / m

            format = "list"

        elif options.weights == "subset":

            codons = CODON_PREFERENCES["dmelanogaster"].keys()
            counts = {}
            for c in codons:
                counts[c] = 0

            for genename, data in result.mGeneInfo.items():

                found = genename in subset
                if (not found and not options.reverse_set) or (
                        found and options.reverse_set):
                    continue

                l = data["GENELENGTH"]
                for c in codons:
                    counts[c] += int(data[c] * l)

            if options.codon_table_type == "frequencies":
                weights = Genomics.CalculateCodonFrequenciesFromCounts(
                    counts, options.pseudocounts)
            elif options.codon_table_type == "weights":
                weights = Genomics.CalculateCAIWeightsFromCounts(
                    counts, options.pseudocounts)
            elif options.codon_table_type == "counts":
                weights = counts
            if options.codon_table_type == "absolute-frequencies":
                # compute absolute frequencies (with pseudo-counts, but do not
                # normalize per aa)
                weights = {}
                m = sum(counts.values())
                for k, v in counts.items():
                    weights[k] = float(v) / m

        else:
            raise "unknown weights %s" % options.weights

        if format == "list":
            outfile.write("CODON\tWEIGHT\n")
            codons = weights.keys()
            codons.sort()
            for codon in codons:
                outfile.write("%s\t%f\n" % (codon, weights[codon]))

        elif format == "matrix":

            for x in range(len(OUTPUT_ORDER_CODON_MATRIX)):
                outfile.write(",".join(
                    map(lambda z: "%5.3f" % z, [
                        weights[codon.upper()]
                        for codon in OUTPUT_ORDER_CODON_MATRIX[x]
                    ])))
                outfile.write("\n")

    if options.codon_usage:
        outfile.write("CODON\tFREQUENCY\n")

        if options.codon_usage == "biased":
            usages = result.mCodonUsages[-1]
        elif options.codon_usage == "full":
            usages = result.mCodonUsages[0]
        elif options.codon_usage == "weights":
            usages = WrapperAdaptiveCAI.CalculateWeightsFromUsage(
                result.mCodonUsages[0])
        else:
            raise "unknown option '%s' for codon-usage." % options.codon_usage

        codons = usages.keys()
        codons.sort()
        for codon in codons:
            outfile.write("%s\t%f\n" % (codon, usages[codon]))

    E.Stop()