Ejemplos de Genomics.CalculateCodonFrequenciesFromCounts en Python

Lenguaje de programación: Python

Namespace/Package Name: CGAT

Clase / Tipo: Genomics

Método / Función: CalculateCodonFrequenciesFromCounts

Ejemplos en hotexamples.com: 4

Python Genomics.CalculateCodonFrequenciesFromCounts - 4 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de CGAT.Genomics.CalculateCodonFrequenciesFromCounts extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

IsPositiveStrand(13)

IsNegativeStrand(10)

MapCodon2AA(10)

IsStopCodon(7)

CalculatePairIndices(7)

Alignment2PeptideAlignment(6)

GetHID(6)

ReadGenomicSequences(5)

CalculateCodonFrequenciesFromCounts(4)

CountGeneFeatures(4)

GetDegeneracy(4)

Alignment2ExonBoundaries(3)

GetUniformCodonUsage(3)

ReadContigSizes(3)

ParseFasta2Hash(2)

Protein2Wobble(2)

MaskStopCodons(2)

Alignment2CDNA(2)

CountCodons(2)

Alignment2String(2)

GetIntronType(2)

GetMapAA2Codons(1)

GetGenomicSequence(1)

GetDegenerateSites(1)

MapSequences(1)

CalculateCAIWeightsFromCounts(1)

ParseFasta2HashFromIndex(1)

AlignmentProtein2CDNA(1)

ReadClusters(1)

Ejemplo n.º 1

Mostrar archivo

 def getKL(self, usage):
     """return Kullback-Leibler Divergence (relative entropy) of sequences with
     respect to reference codon usage.
     """
     e = 0
     freqs = Genomics.CalculateCodonFrequenciesFromCounts(
         self.mCodonCounts, self.mPseudoCounts)
     for codon, count in list(self.mCodonCounts.items()):
         e += usage[codon] * math.log(usage[codon] / freqs[codon])
     return e

Ejemplo n.º 2

Mostrar archivo

    def getEntropy(self, usage=None):
        """return entropy of a source in terms of a reference usage.
        Also called conditional entropy or encoding cost.

        Note that here I compute the sum over 20 entropies,
        one for each amino acid.

        If not given, calculate entropy.
        """

        e = 0
        freqs = Genomics.CalculateCodonFrequenciesFromCounts(
            self.mCodonCounts, self.mPseudoCounts)
        if usage is None:
            usage = freqs
        for codon, count in list(self.mCodonCounts.items()):
            e -= freqs[codon] * math.log(usage[codon])
        return e

Ejemplo n.º 3

Mostrar archivo

    def updateProperties(self):

        SequencePropertiesCodons.updateProperties(self)

        self.mCodonFrequencies = Genomics.CalculateCodonFrequenciesFromCounts(
            self.mCodonCounts)

Ejemplo n.º 4

Mostrar archivo

Archivo: codonbias_acai2tsv.py Proyecto: lesheng/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: codonbias_acai2tsv.py 865 2007-01-15 13:44:43Z andreas $"
    )

    parser.add_option("-o",
                      "--input-file-trace",
                      dest="input_filename_trace",
                      type="string",
                      help="input filename for cai.",
                      metavar="FILE")

    parser.add_option("-e",
                      "--input-file-genes",
                      dest="input_filename_genes",
                      type="string",
                      help="input filename for genes information from cai.",
                      metavar="FILE")

    parser.add_option("-c",
                      "--input-file-codons",
                      dest="input_filename_codons",
                      type="string",
                      help="input filename for codon usage information.",
                      metavar="FILE")

    parser.add_option("--input-file-sequences",
                      dest="input_filename_sequences",
                      type="string",
                      help="input filename with sequences.",
                      metavar="FILE")

    parser.add_option("-t",
                      "--input-file-subset",
                      dest="input_filename_subset",
                      type="string",
                      help="input filename with subset.",
                      metavar="FILE")

    parser.add_option("--codon-table-format",
                      dest="codon_table_format",
                      type="choice",
                      choices=("list", "matrix"),
                      help="output options for output codon tables.")

    parser.add_option("--codon-table-type",
                      dest="codon_table_type",
                      type="choice",
                      choices=("counts", "frequencies", "weights",
                               "absolute-frequencies"),
                      help="type of codon table.")

    parser.add_option("-r",
                      "--reference",
                      dest="reference",
                      type="string",
                      help="dump CAI reference weights for species.")

    parser.add_option("-s",
                      "--select",
                      dest="select",
                      type="string",
                      help="fields to select from genes table.")

    parser.add_option("-m",
                      "--map",
                      dest="input_filename_map",
                      type="string",
                      help="filename with mapping information for gene names.",
                      metavar="FILE")

    parser.add_option("-i",
                      "--invert-map",
                      dest="invert_map",
                      action="store_true",
                      help="invert map.")

    parser.add_option(
        "-d",
        "--dominant-set",
        dest="dominant_set",
        type="float",
        help="only print out dominant set (# fraction of most biased genes).")

    parser.add_option(
        "--reverse-set",
        dest="reverse_set",
        action="store_true",
        help="print the reverse set, i.e., then non-dominant set.")

    parser.add_option(
        "-u",
        "--codon-usage",
        dest="codon_usage",
        type="string",
        help="print codon usage for the full/biased set of genes [full|biased]."
    )

    parser.add_option(
        "-w",
        "--weights",
        dest="weights",
        type="string",
        help=
        "print weights [final-list|final-matrix|random|compute|weights|frequencies|absolute-frequencies]."
    )

    parser.add_option("--weights-matrix2table",
                      dest="weights_matrix2table",
                      action="store_true",
                      help="convert a weights matrix to a weights table.")

    parser.add_option("--get-preferred-codons",
                      dest="get_preferred_codons",
                      type="string",
                      help="compute overview of preferred codons.")

    parser.set_defaults(input_filename="-",
                        input_filename_trace=None,
                        input_filename_genes=None,
                        input_filename_codons=None,
                        input_filename_map=None,
                        input_filename_subset=None,
                        input_filename_sequences=None,
                        invert_map=False,
                        select=None,
                        codon_usage=None,
                        weights=None,
                        revserse_set=False,
                        pseudocounts=1,
                        codon_table_format="list",
                        codon_table_type="weights",
                        weights_matrix2table=False,
                        random_size=1000,
                        get_preferred_codons=None,
                        dominant_set=0.0)

    (options, args) = E.Start(parser)
    if options.select:
        options.select = options.select.split(",")

    outfile = options.stdout

    ###################################################################
    # convert weights table to a codon table
    if options.weights_matrix2table:
        lines = options.stdin.readlines()
        data = []
        for line in lines:
            if line[0] == "#":
                continue
            data += list(map(float, line[:-1].split(",")))

        weights = {}
        x = 0
        for cc in OUTPUT_ORDER_CODON_MATRIX:
            for c in cc:
                weights[c] = data[x]
                x += 1

        outfile.write("CODON\tWEIGHT\n")
        codons = weights.keys()
        codons.sort()
        for codon in codons:
            outfile.write("%s\t%f\n" % (codon, weights[codon]))

        E.Stop()
        sys.exit(1)

    ###################################################################
    map_genes = {}

    if options.input_filename_map:
        data = map(
            lambda x: x[:-1].split("\t")[:2],
            filter(lambda x: x[0] != "#",
                   open(options.input_filename_map, "r").readlines()))

        for a, b in data:
            if options.invert_map:
                a, b = b, a
            map_genes[a] = b

    result = WrapperAdaptiveCAI.AdaptiveCAIResult()

    if options.input_filename_genes:
        gene_file = open(options.input_filename_genes, "r")
    else:
        gene_file = None

    if options.input_filename_codons:
        codon_file = open(options.input_filename_codons, "r")
    else:
        codon_file = None

    if options.input_filename_trace:
        trace_file = open(options.input_filename_trace, "r")
    else:
        trace_file = None

    if options.input_filename_subset:
        l, e = IOTools.ReadList(open(options.input_filename_subset, "r"))
        subset = set(l)
        if options.loglevel >= 1:
            options.stdlog.write("# read %i entries into subset from %s.\n" %
                                 (len(subset), options.input_filename_subset))
    else:
        subset = None

    result.Read(gene_file=gene_file,
                codon_file=codon_file,
                trace_file=trace_file)

    if gene_file:
        gene_file.close()
    if codon_file:
        codon_file.close()
    if trace_file:
        trace_file.close()

    if options.reference:
        if options.reference not in CODON_PREFERENCES:
            raise "unknown species %s: possibles species are: %s" % (
                options.reference, str(CODON_PREFERNCES.keys()))

        weights = Genomics.CalculateCAIWeightsFromCounts(
            CODON_PREFERENCES[options.reference], options.pseudocounts)

        for x in range(len(OUTPUT_ORDER_CODON_MATRIX)):
            outfile.write(",".join(
                map(lambda z: "%5.3f" % z, [
                    weights[codon.upper()]
                    for codon in OUTPUT_ORDER_CODON_MATRIX[x]
                ])))
            outfile.write("\n")

    if options.dominant_set and gene_file:
        cai_threshold = result.GetDominantThreshold(options.dominant_set)
    else:
        if options.reverse_set:
            cai_threshold = 1.0
        else:
            cai_threshold = 0.0

    if options.select:

        fields = []
        titles = []
        for x in options.select:
            f = re.match("(\S+) (AS|as) (\S+)", x)
            if f:
                fields.append(f.groups()[0].upper())
                titles.append(f.groups()[2])
            else:
                fields.append(x.upper())
                titles.append(x)

        outfile.write("GENENAME\t" + string.join(titles, "\t") + "\n")

        for genename, data in result.mGeneInfo.items():
            if genename in map_genes:
                genename = map_genes[genename]

            if options.reverse_set:
                if data["CAICLASS"] >= cai_threshold:
                    continue
            else:
                if data["CAICLASS"] < cai_threshold:
                    continue

            outfile.write(genename)
            for c in fields:
                outfile.write("\t%s" % str(data[c]))
            outfile.write("\n")

    if options.weights:

        format = options.codon_table_format

        if options.weights in ("compute-counts", "compute-weights",
                               "compute-frequencies"):
            # compute codon usage weights from a set of sequences
            codons = CODON_PREFERENCES["dmelanogaster"].keys()
            counts = {}
            for x in codons:
                counts[x] = 0

            if options.input_filename_sequences:
                sequences = Genomics.ReadPeptideSequences(open(
                    options.input_filename_sequences, "r"),
                                                          filter=subset)
                for key, sequence in sequences.items():
                    sequence = re.sub(" ", "", sequence)
                    if len(sequence) % 3 != 0:
                        raise "warning: sequence %s is not multiple of 3" % key
                    for codon in [
                            sequence[x:x + 3]
                            for x in range(0, len(sequence), 3)
                    ]:
                        counts[codon.upper()] += 1

            if options.weights == "compute-frequencies":
                weights = Genomics.CalculateCodonFrequenciesFromCounts(
                    counts, options.pseudocounts)
            elif options.weights == "compute-weights":
                weights = Genomics.CalculateCAIWeightsFromCounts(
                    counts, options.pseudocounts)
            else:
                weights = counts

        elif options.weights in ("final-list", "final-matrix"):

            weights = result.mFinalWeights
            if options.weights == "final-list":
                format = "list"
            else:
                format = "matrix"

        elif options.weights == "random":
            # get random weights
            codons = CODON_PREFERENCES["dmelanogaster"].keys()
            counts = {}
            for x in codons:
                counts[x] = random.randint(1, options.random_size)

            weights = Genomics.CalculateCAIWeightsFromCounts(
                counts, options.pseudocounts)
            format = "matrix"

        elif options.weights == "biased":
            # get biased weights
            codons = Genomics.GetUniformCodonUsage()

            weights = Genomics.CalculateCAIWeightsFromCounts(
                counts, options.pseudocounts)
            format = "matrix"

        elif options.weights in ("uniform-weights", "uniform-frequencies"):
            # get uniform weights
            codons = Genomics.GetUniformCodonUsage()

            if options.weights == ("uniform-weights"):
                weights = Genomics.CalculateCAIWeightsFromCounts(
                    counts, options.pseudocounts)
                format = "matrix"
            else:
                weights = codons
                format = "list"

        elif options.weights in ("counts", "frequencies",
                                 "absolute-frequencies"):
            # get weights as frequencies
            # compute from scratch. In the caijava file, the absolute frequencey f / gene_length is
            # given. Thus the total number of codons is f * gene_length.
            codons = CODON_PREFERENCES["dmelanogaster"].keys()
            counts = {}
            for c in codons:
                counts[c] = 0

            for genename, data in result.mGeneInfo.items():

                if options.reverse_set:
                    if data["CAICLASS"] >= cai_threshold:
                        continue
                else:
                    if data["CAICLASS"] < cai_threshold:
                        continue

                l = data["GENELENGTH"]
                for c in codons:
                    counts[c] += int(data[c] * l)

            if options.weights == "frequencies":
                weights = Genomics.CalculateCodonFrequenciesFromCounts(
                    counts, options.pseudocounts)
            elif options.weights == "counts":
                weights = counts
            elif options.weights == "absolute-frequencies":
                # compute absolute frequencies (with pseudo-counts, but do not
                # normalize per aa)
                weights = {}
                m = sum(counts.values())
                for k, v in counts.items():
                    weights[k] = float(v) / m

            format = "list"

        elif options.weights == "subset":

            codons = CODON_PREFERENCES["dmelanogaster"].keys()
            counts = {}
            for c in codons:
                counts[c] = 0

            for genename, data in result.mGeneInfo.items():

                found = genename in subset
                if (not found and not options.reverse_set) or (
                        found and options.reverse_set):
                    continue

                l = data["GENELENGTH"]
                for c in codons:
                    counts[c] += int(data[c] * l)

            if options.codon_table_type == "frequencies":
                weights = Genomics.CalculateCodonFrequenciesFromCounts(
                    counts, options.pseudocounts)
            elif options.codon_table_type == "weights":
                weights = Genomics.CalculateCAIWeightsFromCounts(
                    counts, options.pseudocounts)
            elif options.codon_table_type == "counts":
                weights = counts
            if options.codon_table_type == "absolute-frequencies":
                # compute absolute frequencies (with pseudo-counts, but do not
                # normalize per aa)
                weights = {}
                m = sum(counts.values())
                for k, v in counts.items():
                    weights[k] = float(v) / m

        else:
            raise "unknown weights %s" % options.weights

        if format == "list":
            outfile.write("CODON\tWEIGHT\n")
            codons = weights.keys()
            codons.sort()
            for codon in codons:
                outfile.write("%s\t%f\n" % (codon, weights[codon]))

        elif format == "matrix":

            for x in range(len(OUTPUT_ORDER_CODON_MATRIX)):
                outfile.write(",".join(
                    map(lambda z: "%5.3f" % z, [
                        weights[codon.upper()]
                        for codon in OUTPUT_ORDER_CODON_MATRIX[x]
                    ])))
                outfile.write("\n")

    if options.codon_usage:
        outfile.write("CODON\tFREQUENCY\n")

        if options.codon_usage == "biased":
            usages = result.mCodonUsages[-1]
        elif options.codon_usage == "full":
            usages = result.mCodonUsages[0]
        elif options.codon_usage == "weights":
            usages = WrapperAdaptiveCAI.CalculateWeightsFromUsage(
                result.mCodonUsages[0])
        else:
            raise "unknown option '%s' for codon-usage." % options.codon_usage

        codons = usages.keys()
        codons.sort()
        for codon in codons:
            outfile.write("%s\t%f\n" % (codon, usages[codon]))

    E.Stop()