예제 #1
0
def GetContig(prediction):
    """get contig sequence for prediction."""

    global global_last_filename_genome
    global global_forward_sequences
    global global_reverse_sequences

    if "%s" in param_genome_file:
        filename_genome = param_genome_file % prediction.mSbjctToken
    else:
        filename_genome = param_genome_file

    if global_last_filename_genome != filename_genome:
        if param_loglevel >= 2:
            print "# reading genome %s" % filename_genome

        try:
            global_forward_sequences, global_reverse_sequences = Genomics.ReadGenomicSequences(
                open(filename_genome, "r"))
        except IOError:
            raise "# WARNING: genome %s not found" % filename_genome

        global_last_filename_genome = filename_genome

    if prediction.mSbjctStrand == "+":
        return (prediction.mSbjctToken,
                global_forward_sequences[prediction.mSbjctToken], False)
    else:
        return (prediction.mSbjctToken,
                global_reverse_sequences[prediction.mSbjctToken], True)
예제 #2
0
            param_max_intron = int(a)
        elif o in ("-c", "--min-coverage-query"):
            param_min_coverage_query = float(a)
        elif o in ("-s", "--min-score"):
            param_min_total_score = float(a)

    if len(args) > 0:
        print USAGE, "no arguments required."
        sys.exit(2)

    print E.GetHeader()
    print E.GetParams()

    # read complete genomic sequence
    if param_filename_genome:
        forward_sequences, reverse_sequences = Genomics.ReadGenomicSequences(
            open(param_filename_genome, "r"))
    else:
        forward_sequences = None
        reverse_sequences = None

    # read peptide sequences
    if param_filename_peptides:
        peptide_sequences = Genomics.ReadPeptideSequences(
            open(param_filename_peptides, "r"))
    else:
        peptide_sequences = {}

    print HEADER

    if param_loglevel >= 2:
        print SHORT_HEADER_SUMMARY
예제 #3
0
    if options.range_peptide:
        options.range_peptide = map(int, options.range_peptide.split(","))

    wrapper = Exonerate(options=options.options,
                        output_options=options.output_options)
    wrapper.mLogLevel = options.loglevel

    if options.loglevel >= 2:
        print "# reading peptide sequence."
    peptide_sequences = Genomics.ReadPeptideSequences(
        open(options.input_filename_peptide, "r"))

    if options.loglevel >= 2:
        print "# reading genome sequence."
    genome_sequences = Genomics.ReadGenomicSequences(open(
        options.input_filename_genome, "r"),
                                                     do_reverse=0)

    if not options.id_peptides:
        options.id_peptides = peptide_sequences.keys()
    if not options.id_genomes:
        options.id_genomes = genome_sequences.keys()

    for x in options.id_peptides:
        ps = peptide_sequences[x]
        if options.range_peptide:
            ps = ps[options.range_peptide[0]:options.range_peptide[1]]
        for y in options.id_genomes:
            gs = genome_sequences[y]
            if options.range_genome:
                gs = gs[options.range_genome[0]:options.range_genome[1]]
예제 #4
0
def ReadTranscriptsAndCds(transcript_ids1, transcript_ids2):

    if param_loglevel >= 1:
        print "# reading %i left and %i right transcripts" % (
            len(transcript_ids1), len(transcript_ids2))
        sys.stdout.flush()
    if param_loglevel >= 1:
        print "# reading exon boundaries."
        sys.stdout.flush()

    cds1 = Exons.ReadExonBoundaries(open(param_filename_cds1, "r"),
                                    filter=transcript_ids1,
                                    reset=True)
    cds2 = Exons.ReadExonBoundaries(open(param_filename_cds2, "r"),
                                    filter=transcript_ids2,
                                    reset=True)

    if param_loglevel >= 1:
        print "# read %i left and %i right cds" % (len(cds1), len(cds2))
        sys.stdout.flush()

    if param_loglevel >= 2:
        if len(cds1) != len(transcript_ids1):
            print "# missed in left:  %s" % ":".join(
                set(transcript_ids1.keys()).difference(cds1.keys()))
        if len(cds2) != len(transcript_ids2):
            print "# missed in right: %s" % ":".join(
                set(transcript_ids2.keys()).difference(cds2.keys()))

    if param_loglevel >= 1:
        print "# reading genomic sequences."
        sys.stdout.flush()

    transcripts1 = {}
    if param_filename_transcripts1:
        if param_mode_genome1 == "indexed":
            transcripts1 = Genomics.ParseFasta2HashFromIndex(
                param_filename_transcripts1, filter=transcript_ids1)
        else:
            transcripts1 = Genomics.ReadGenomicSequences(
                open(param_filename_transcripts1, "r"),
                do_reverse=0,
                filter=transcript_ids1,
                mask=param_mask)
    transcripts2 = {}
    if param_filename_transcripts2:
        if param_mode_genome2 == "indexed":
            transcripts2 = Genomics.ParseFasta2HashFromIndex(
                param_filename_transcripts2, filter=transcript_ids2)
        else:
            transcripts2 = Genomics.ReadGenomicSequences(
                open(param_filename_transcripts2, "r"),
                do_reverse=0,
                filter=transcript_ids2,
                mask=param_mask)
    if param_loglevel >= 1:
        print "# read %i left and %i right transcript sequences" % (
            len(transcripts1), len(transcripts2))
        sys.stdout.flush()

    return transcripts1, transcripts2, cds1, cds2
예제 #5
0
파일: get_genes.py 프로젝트: santayana/cgat
def BuildLines(dbhandle,
               statement,
               genome_lengths,
               prefix="",
               default_color=None):

    c = dbhandle.cursor()
    c.execute(statement)

    if param_loglevel >= 2:
        print "# received %i results." % c.rowcount

    sbjct_token = ""
    sbjct_strand = None
    sbjct_from = 10000000000000000
    sbjct_to = 0

    lines = []

    nmatches = 0

    for line in c.fetchall():

        entry = PredictionParser.PredictionParserEntry()

        entry.FillFromTable(line)

        if not genome_lengths.has_key(entry.mSbjctToken):
            filename_genome = param_genome_file % entry.mSbjctToken
            forward_sequences, reverse_sequences = Genomics.ReadGenomicSequences(
                open(filename_genome, "r"))
            genome_lengths[entry.mSbjctToken] = (len(
                forward_sequences[entry.mSbjctToken]), 0)

        lgenome, offset = genome_lengths[entry.mSbjctToken]

        if param_loglevel >= 4:
            print "# lgenome=%i, offset=%i" % (lgenome, offset)

        # get cds information
        exons = []
        if param_tablename_exons:
            cc = dbhandle.cursor()

            statement = """SELECT exon_from, exon_to, exon_frame, genome_exon_from, genome_exon_to
            FROM %s WHERE prediction_id = %i""" % (
                param_tablename_exons,
                entry.mPredictionId,
            )

            if param_restrict_good_exons:
                statement += " AND is_ok = TRUE"

            try:
                cc.execute(statement)
                result = cc.fetchall()
            except pgdb.DatabaseError, msg:
                print "# query failed with message", msg
                result = []

            exons = result
            cc.close()

        if not exons:
            if entry.mMapPeptide2Genome:
                exons = Genomics.Alignment2ExonBoundaries(
                    entry.mMapPeptide2Genome,
                    query_from=entry.mQueryFrom - 1,
                    sbjct_from=entry.mSbjctGenomeFrom,
                    add_stop_codon=1)
            else:
                exons = [("", "", 0, entry.mSbjctGenomeFrom,
                          entry.mSbjctGenomeTo)]

        # select gene id
        if param_tablename_genes:
            cc = dbhandle.cursor()
            statement = """SELECT gene_id
            FROM %s WHERE prediction_id = %i""" % (param_tablename_genes,
                                                   entry.mPredictionId)

            try:
                cc.execute(statement)
                result = cc.fetchone()
            except pgdb.DatabaseError, msg:
                print "# query failed with message", msg
                result = None

            gene_id = result[0]
            dbhandle.commit()
            cc.close()