Python Genomics 예제들, Genomics Python 예제들

예제 #1

0

파일 보기

파일: Motifs.py 프로젝트: BioinformaticsArchive/cgat

def countMotifs( infile, motifs ):
    '''find regular expression *motifs* in
    sequences within fasta formatted *infile*.
    '''
    
    it = FastaIterator.FastaIterator( infile )
    positions = []
    while 1:
        try:
            seq = it.next()
        except StopIteration:
            break
        if not seq: break
        
        rseq = Genomics.complement( seq.sequence )
        lsequence = len(seq.sequence)
        pos = []
        for motif, pattern in motifs:

            for x in pattern.finditer( seq.sequence ):
                pos.append( ( motif, "+", x.start(), x.end()) )
            for x in pattern.finditer( rseq ):
                pos.append( ( motif, "-", lsequence - x.end(), lsequence - x.start()) )

        positions.append( (seq.title, pos) )

    return positions

예제 #2

0

파일 보기

def FilterJunk(orthologs):
    """remove assignments to junk contigs.
    """

    for id, oo in orthologs.items():
        oo = filter(lambda x: not Genomics.IsJunk(x.contig), oo)
        if len(oo) > 0:
            orthologs[id] = oo
        else:
            del orthologs[id]

예제 #3

0

파일 보기

파일: WrapperDialign.py 프로젝트: yangjl/cgat

    def Align(self, s1, s2, result):

        result.clear()

        handle_tmpfile, filename_tmpfile = tempfile.mkstemp()
        os.write(handle_tmpfile, ">s1\n%s\n" % (s1))
        os.write(handle_tmpfile, ">s2\n%s\n" % (s2))
        os.close(handle_tmpfile)

        statement = string.join(("(", self.mEnvironment, self.mExecutable,
                                 self.mOptions, filename_tmpfile, ")"), " ")

        p = subprocess.Popen(statement,
                             shell=True,
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE,
                             close_fds=True)

        (file_stdout, file_stdin, file_stderr) = (p.stdin, p.stdout, p.stderr)
        file_stdin.close()
        lines = file_stdout.readlines()
        lines_stderr = file_stderr.readlines()
        exit_code = file_stdout.close()
        file_stderr.close()

        if exit_code:
            raise "Error while executing statement %s" % statement

        r = None
        for x in range(len(lines)):
            if re.search("Alignment \(FASTA format\):", lines[x]):
                r = Genomics.ParseFasta2Hash(lines[x + 2:])
                break

        if not r: return None

        a1 = r['s1']
        a2 = r['s2']

        x1 = 1
        x2 = 1
        for pos in range(len(a1)):
            if a1[pos] in string.uppercase and a2[pos] in string.uppercase:
                result.addPairExplicit(x1, x2, 0)
                x1 += 1
                x2 += 1
                continue

            if a1[pos] != "-": x1 += 1
            if a2[pos] != "-": x2 += 1

        os.remove(filename_tmpfile)

        return result

예제 #4

0

파일 보기

    def loadSequence(self, sequence):
        """load sequence properties from a sequence."""

        if len(sequence) % 3:
            raise "sequence %s is not multiples of 3: length=%i!" % (
                cur_record.title, len(sequence))

        SequenceProperties.loadSequence(self, sequence)

        ## uppercase all letters and count codons
        self.mCodonCounts = Genomics.CountCodons(sequence.upper())

예제 #5

0

파일 보기

    def Expand(self):
        self.mExpand = True

        if self.mMapPeptide2Translation.getLength() > 0:
            f = alignlib_lite.py_AlignmentFormatEmissions(
                self.mMapPeptide2Translation)
            self.mQueryAli, self.mSbjctAli = f.mRowAlignment, f.mColAlignment
            self.mQueryFrom = self.mMapPeptide2Translation.getRowFrom()
            self.mQueryTo = self.mMapPeptide2Translation.getRowTo()
            self.mSbjctFrom = self.mMapPeptide2Translation.getColFrom()
            self.mSbjctTo = self.mMapPeptide2Translation.getColTo()

        self.mMapPeptide2Genome = Genomics.String2Alignment(
            self.mAlignmentString)

예제 #6

0

파일 보기

    def loadSequence(self, sequence):
        """load sequence properties from a sequence."""

        SequenceProperties.loadSequence(self, sequence)

        ## counts of amino acids
        self.mCountsAA = {}

        for x in Bio.Alphabet.IUPAC.extended_protein.letters:
            self.mCountsAA[x] = 0

        for codon in [sequence[x:x + 3] for x in range(0, len(sequence), 3)]:
            aa = Genomics.MapCodon2AA(codon)
            self.mCountsAA[aa] += 1

예제 #7

0

파일 보기

    def getKL(self, usage):
        """return Kullback-Leibler Divergence (relative entropy) of sequences with
        respect to reference codon usage.
        """

        e = 0

        freqs = Genomics.CalculateCodonFrequenciesFromCounts(
            self.mCodonCounts, self.mPseudoCounts)

        for codon, count in self.mCodonCounts.items():
            e += usage[codon] * math.log(usage[codon] / freqs[codon])

        return e

예제 #8

0

파일 보기

    def getCopy(self):
        """return a new copy.
        """

        new_entry = Prediction()

        new_entry.mExpand = self.mExpand

        new_entry.mPredictionId = self.mPredictionId
        new_entry.mQueryToken = self.mQueryToken
        new_entry.mQueryFrom = self.mQueryFrom
        new_entry.mQueryTo = self.mQueryTo
        new_entry.mSbjctToken = self.mSbjctToken
        new_entry.mSbjctStrand = self.mSbjctStrand
        new_entry.mSbjctFrom = self.mSbjctFrom
        new_entry.mSbjctTo = self.mSbjctTo
        new_entry.mRank = self.mRank
        new_entry.score = self.score
        new_entry.mQueryLength = self.mQueryLength
        new_entry.mQueryCoverage = self.mQueryCoverage
        new_entry.mNGaps = self.mNGaps
        new_entry.mNFrameShifts = self.mNFrameShifts
        new_entry.mNIntrons = self.mNIntrons
        new_entry.mNSplits = self.mNSplits
        new_entry.mNStopCodons = self.mNStopCodons
        new_entry.mPercentIdentity = self.mPercentIdentity
        new_entry.mPercentSimilarity = self.mPercentSimilarity
        new_entry.mTranslation = self.mTranslation
        new_entry.mSbjctGenomeFrom = self.mSbjctGenomeFrom
        new_entry.mSbjctGenomeTo = self.mSbjctGenomeTo
        new_entry.mAlignmentString = self.mAlignmentString
        new_entry.mQueryAli = self.mQueryAli
        new_entry.mSbjctAli = self.mSbjctAli

        if self.mExpand:
            new_entry.mMapPeptide2Translation = alignlib_lite.py_makeAlignmentVector(
            )
            alignlib_lite.py_copyAlignment(new_entry.mMapPeptide2Translation,
                                           self.mMapPeptide2Translation)
            new_entry.mMapPeptide2Genome = Genomics.String2Alignment(
                new_entry.mAlignmentString)
        else:
            new_entry.mMapPeptide2Translation = self.mMapPeptide2Translation = None
            new_entry.mMapPeptide2Genome = self.mMapPeptide2Genome = None

        return new_entry

예제 #9

0

파일 보기

    def fillFromTable(self, table_row):

        if len(table_row) == 25:
            (self.mPredictionId, self.mQueryToken, self.mSbjctToken,
             self.mSbjctStrand, self.mRank, self.score, self.mQueryFrom,
             self.mQueryTo, self.mQueryAli, self.mSbjctFrom, self.mSbjctTo,
             self.mSbjctAli, self.mQueryLength, self.mQueryCoverage,
             self.mNGaps, self.mNFrameShifts, self.mNIntrons, self.mNSplits,
             self.mNStopCodons, self.mPercentIdentity, self.mPercentSimilarity,
             self.mTranslation, self.mSbjctGenomeFrom, self.mSbjctGenomeTo,
             self.mAlignmentString) = table_row
        elif len(table_row) == 26:
            (self.mPredictionId, self.mQueryToken, self.mSbjctToken,
             self.mSbjctStrand, self.mRank, self.score, self.mQueryFrom,
             self.mQueryTo, self.mQueryAli, self.mSbjctFrom, self.mSbjctTo,
             self.mSbjctAli, self.mQueryLength, self.mQueryCoverage,
             self.mNGaps, self.mNFrameShifts, self.mNIntrons, self.mNSplits,
             self.mNStopCodons, self.mPercentIdentity, self.mPercentSimilarity,
             self.mTranslation, self.mSbjctGenomeFrom, self.mSbjctGenomeTo,
             self.mAlignmentString, self.mNAssembled) = table_row[:26]
        elif len(table_row) > 26:
            (self.mPredictionId, self.mQueryToken, self.mSbjctToken,
             self.mSbjctStrand, self.mRank, self.score, self.mQueryFrom,
             self.mQueryTo, self.mQueryAli, self.mSbjctFrom, self.mSbjctTo,
             self.mSbjctAli, self.mQueryLength, self.mQueryCoverage,
             self.mNGaps, self.mNFrameShifts, self.mNIntrons, self.mNSplits,
             self.mNStopCodons, self.mPercentIdentity, self.mPercentSimilarity,
             self.mTranslation, self.mSbjctGenomeFrom, self.mSbjctGenomeTo,
             self.mAlignmentString, self.mNAssembled) = table_row[:26]
        else:
            raise ValueError, "unknown format: %i fields" % len(data)
            sys.exit(0)

        if self.mExpand:
            self.mMapPeptide2Translation = alignlib_lite.py_makeAlignmentVector(
            )

            if self.mQueryAli != "" and self.mSbjctAli != "":
                alignlib_lite.py_AlignmentFormatEmissions(
                    self.mQueryFrom, self.mQueryAli, self.mSbjctFrom,
                    self.mSbjctAli).copy(self.mMapPeptide2Translation)

            self.mMapPeptide2Genome = Genomics.String2Alignment(
                self.mAlignmentString)

예제 #10

0

파일 보기

파일: WrapperGblocks.py 프로젝트: yangjl/cgat

    def GetBlocks( self, s1, s2 ):
        """the strings have to be already aligned!!!"""
        
        handle_tmpfile, filename_tmpfile = tempfile.mkstemp()
        os.write( handle_tmpfile, ">s1\n%s\n" % (s1))
        os.write( handle_tmpfile, ">s2\n%s\n" % (s2))
        os.close( handle_tmpfile )

        statement = " ".join( "(", self.mEnvironment, 
                              self.mExecutable % filename_tmpfile,
                              self.mOptions, ")" )

        p = subprocess.Popen( statement , 
                              shell=True, 
                              stdin=subprocess.PIPE, 
                              stdout=subprocess.PIPE, 
                              stderr=subprocess.PIPE, 
                              close_fds=True)

        (file_stdout, file_stdin, file_stderr) = (p.stdin, p.stdout, p.stderr)

        file_stdin.close()
        lines = file_stdout.readlines()
        lines_stderr = file_stderr.readlines()
        exit_code = file_stdout.close()
        file_stderr.close()
        if exit_code:
            raise ValueError("Error while executing statement %s" % statement)

        if not os.path.exists( filename_tmpfile + "-gb"):
            os.remove( filename_tmpfile )            
            return "", ""
        
        lines = open( filename_tmpfile + "-gb").readlines()
        r = Genomics.ParseFasta2Hash( lines)

        if not r: return "", ""

        os.remove( filename_tmpfile )
        os.remove( filename_tmpfile + "-gb" )
        os.remove( filename_tmpfile + "-gb.htm")        
        
        return r['s1'], r['s2']

예제 #11

0

파일 보기

    def getEntropy(self, usage=None):
        """return entropy of a source in terms of a reference usage.
        Also called conditional entropy or encoding cost.

        Note that here I compute the sum over 20 entropies, one for each amino acid.

        If not given, calculate entropy.
        """

        e = 0

        freqs = Genomics.CalculateCodonFrequenciesFromCounts(
            self.mCodonCounts, self.mPseudoCounts)
        if usage == None: usage = freqs

        for codon, count in self.mCodonCounts.items():
            e -= freqs[codon] * math.log(usage[codon])

        return e

예제 #12

0

파일 보기

파일: WrapperExonerate.py 프로젝트: yangjl/cgat

        id_peptides = None,
        id_genomes = None,
        output_options = []
        )

    (options, args) = E.Start( parser ) 

    if options.range_genome: options.range_genome = map(int, options.range_genome.split(","))
    if options.range_peptide: options.range_peptide = map(int, options.range_peptide.split(","))    

    wrapper = Exonerate( options=options.options, output_options=options.output_options )
    wrapper.mLogLevel = options.loglevel

    if options.loglevel >= 2:
        print "# reading peptide sequence."
    peptide_sequences = Genomics.ReadPeptideSequences( open(options.input_filename_peptide, "r") )
    
    if options.loglevel >= 2:
        print "# reading genome sequence."
    genome_sequences = Genomics.ReadGenomicSequences( open(options.input_filename_genome, "r"), do_reverse = 0 )    

    if not options.id_peptides:
        options.id_peptides= peptide_sequences.keys()
    if not options.id_genomes:
        options.id_genomes= genome_sequences.keys()
        
    for x in options.id_peptides:
        ps = peptide_sequences[x]
        if options.range_peptide:
            ps = ps[options.range_peptide[0]:options.range_peptide[1]]
        for y in options.id_genomes:

예제 #13

0

파일 보기

    def loadSequence(self, sequence):
        """load sequence properties from a sequence."""

        SequenceProperties.loadSequence(self, sequence)

        ## uppercase all letters
        sequence = sequence.upper()

        self.mNStopCodons = 0

        ## setup counting arrays
        ## nucleotide counts for each position (is not a sum of the counts
        ## per degenerate site, as the codon might be intelligible, e.g. GNN).
        self.mCounts = [{
            'A': 0,
            'C': 0,
            'G': 0,
            'T': 0,
            'X': 0,
            'N': 0
        }, {
            'A': 0,
            'C': 0,
            'G': 0,
            'T': 0,
            'X': 0,
            'N': 0
        }, {
            'A': 0,
            'C': 0,
            'G': 0,
            'T': 0,
            'X': 0,
            'N': 0
        }]

        ## nucleotide counts for each position per degeneracy
        self.mCountsDegeneracy = []

        for x in (0, 1, 2):
            xx = []
            for y in range(5):
                yy = {}
                for z in Bio.Alphabet.IUPAC.extended_dna.letters:
                    yy[z] = 0
                xx.append(yy)
            self.mCountsDegeneracy.append(xx)

        for codon in [sequence[x:x + 3] for x in range(0, len(sequence), 3)]:

            for x in (0, 1, 2):
                self.mCounts[x][codon[x]] += 1

            if Genomics.IsStopCodon(codon):
                self.mNStopCodons += 1
                continue

            try:
                aa, deg1, deg2, deg3 = Genomics.GetDegeneracy(codon)
                degrees = (deg1, deg2, deg3)
                for x in range(len(degrees)):
                    self.mCountsDegeneracy[x][degrees[x]][codon[x]] += 1

            except KeyError:
                pass

예제 #14

0

파일 보기

def updateVariants(variants, lcontig, strand, phased=True):
    '''update variants such that they use same coordinate
    system (and strand) as the transcript

    fixes 1-ness of variants
    '''

    new_variants = []
    is_positive = Genomics.IsPositiveStrand(strand)

    for variant in variants:

        pos = variant.pos
        genotype = bytes(variant.genotype)
        reference = bytes(variant.reference)

        # fix 1-ness of variants
        # pos -= 1

        if len(genotype) == 1:
            variantseqs = list(Genomics.decodeGenotype(genotype))
            has_wildtype = reference in variantseqs
            action = "="
            start, end = pos, pos + 1
        else:

            variantseqs = [x[1:] for x in genotype.split("/")]
            lvariant = max([len(x) for x in variantseqs])
            if not phased: variantseqs = [x for x in variantseqs if x]
            has_wildtype = "*" in genotype

            if "+" in genotype and "-" in genotype:
                # both insertion and deletion at position
                # the range is given by the deletion
                # see below for explanations
                if genotype.startswith("+"):
                    action = ">"
                    variantseqs[1] += "-" * (lvariant - len(variantseqs[1]))
                else:
                    action = "<"
                    variantseqs[0] += "-" * (lvariant - len(variantseqs[0]))

                start, end = pos + 1, pos + lvariant + 1

            elif "-" in genotype:
                action = "-"
                # samtools: deletions are after the base denoted by snp.position
                #   * <- deletion at 1
                # 0 1 2 3 4 5 6
                #     - -
                # 6 5 4 3 2 1 0
                # deletion of 2+3 = (2,4)
                # on reverse: (7-4, 7-2) = (3,5)
                start, end = pos + 1, pos + lvariant + 1

                # deletions of unequal length are filled up with "-"
                # This is necessary to deal with negative strands:
                # -at/-atg on the positive strand deletes a t [g]
                # -at/-atg on the negative strand deletes [g] t a
                variantseqs = [
                    x + "-" * (lvariant - len(x)) for x in variantseqs
                ]

            elif "+" in genotype:
                action = "+"
                # indels are after the base denoted by position
                # as region use both flanking base so that negative strand
                # coordinates work
                # insertion between position 2 and 3
                #     * <- insection at pos 2
                # 0 1 2i3 4
                # 4 3 2i1 0
                # is insertion between 1 and 2 in reverse
                # including both flanking residues makes it work:
                # (2,3) = (5-3,5-2) = (2,3)
                # but:
                # (2,4) = (5-4,5-2) = (1,3)
                start, end = pos, pos + 2

        # revert strand
        if not is_positive:
            reference = Genomics.complement(reference)
            variantseqs = [Genomics.complement(x.upper()) for x in variantseqs]
            start, end = lcontig - end, lcontig - start

        new_variants.append(
            ExtendedVariant._make((start, end, reference.upper(), action,
                                   has_wildtype, variantseqs)))

    return new_variants

예제 #15

0

파일 보기

 def setTranslation(self, genomic_sequence):
     """set translation from genomic sequence."""
     self.mMapPeptide2Translation, self.mTranslation = Genomics.Alignment2PeptideAlignment( \
             self.mMapPeptide2Genome, self.mQueryFrom, self.mSbjctGenomeFrom, genomic_sequence )

예제 #16

0

파일 보기

파일: AlignedPairs.py 프로젝트: BioinformaticsArchive/cgat

    def Align( self, method, anchor = 0, loglevel = 1 ):
        """align a pair of sequences.
        get rid of this and use a method class instead in the future
        """
        
        map_a2b = alignlib_lite.py_makeAlignmentVector()
        s1 = "A" * anchor + self.mSequence1 + "A" * anchor
        s2 = "A" * anchor + self.mSequence2 + "A" * anchor    

        self.strand = "+"

        if method == "dialign":
            dialign = WrapperDialign.Dialign( self.mOptionsDialign )
            dialign.Align( s1, s2, map_a2b )
        elif method == "blastz":
            blastz = WrapperBlastZ.BlastZ( self.mOptionsBlastZ )
            blastz.Align( s1, s2, map_a2b )
            if blastz.isReverseComplement():
                self.strand = "-"
                self.mSequence2 = Genomics.complement( self.mSequence2 )

        elif method == "dialignlgs":
            dialignlgs = WrapperDialign.Dialign( self.mOptionsDialignLGS )
            dialignlgs.Align( s1, s2, map_a2b ) 
        elif method == "dba":
            dba = WrapperDBA.DBA()
            dba.Align( s1, s2, map_a2b )
        elif method == "clustal":
            raise NotImplementedError( "clustal wrapper needs to be updated")
            clustal = WrapperClustal.Clustal()
            clustal.Align( s1, s2, map_a2b )
        elif method == "nw":
            seq1 = alignlib_lite.py_makeSequence( s1 )
            seq2 = alignlib_lite.py_makeSequence( s2 )
            alignator = alignlib_lite.py_makeAlignatorDPFull( alignlib_lite.py_ALIGNMENT_GLOBAL,
                                                      gop=-12.0,
                                                      gep=-2.0 )
            alignator.align( map_a2b, seq1, seq2 )
        elif method == "sw":                        
            seq1 = alignlib_lite.py_makeSequence( s1 )
            seq2 = alignlib_lite.py_makeSequence( s2 )
            alignlib_lite.py_performIterativeAlignment( map_a2b, seq1, seq2, alignator_sw, min_score_sw )
        else:
            ## use callback function
            method(s1, s2, map_a2b)

        if map_a2b.getLength() == 0:
            raise AlignmentError("empty alignment")

        if anchor:
            map_a2b.removeRowRegion( anchor + len(self.mSequence1) + 1, map_a2b.getRowTo() )
            map_a2b.removeRowRegion( 1, anchor)        
            map_a2b.removeColRegion( anchor + len(self.mSequence2) + 1, map_a2b.getColTo() )        
            map_a2b.removeColRegion( 1, anchor)
            map_a2b.moveAlignment( -anchor, -anchor )

        f = alignlib_lite.py_AlignmentFormatExplicit( map_a2b, 
                                              alignlib_lite.py_makeSequence( self.mSequence1),
                                              alignlib_lite.py_makeSequence( self.mSequence2) )

        self.mMethod = method
        self.mAlignment = map_a2b
        self.mAlignedSequence1, self.mAlignedSequence2 = f.mRowAlignment, f.mColAlignment
        f = alignlib_lite.py_AlignmentFormatEmissions( map_a2b )
        self.mAlignment1, self.mAlignment2 = f.mRowAlignment, f.mColAlignment
        self.mAlignmentFrom1 = map_a2b.getRowFrom()
        self.mAlignmentTo1 = map_a2b.getRowTo()        
        self.mAlignmentFrom2 = map_a2b.getColFrom()
        self.mAlignmentTo2 = map_a2b.getColTo()        
        self.mNumGaps, self.mLength = map_a2b.getNumGaps(), map_a2b.getLength()
        self.mAligned = self.mLength - self.mNumGaps

        self.SetPercentIdentity()
        self.SetBlockSizes()

예제 #17

0

파일 보기

    def updateProperties(self):

        SequencePropertiesCodons.updateProperties(self)

        self.mCodonFrequencies = Genomics.CalculateCodonFrequenciesFromCounts(
            self.mCodonCounts)

예제 #18

0

파일 보기

파일: Variants.py 프로젝트: BioinformaticsArchive/cgat

def updateVariants( variants, lcontig, strand, phased = True ):
    '''update variants such that they use same coordinate
    system (and strand) as the transcript

    fixes 1-ness of variants
    '''

    new_variants = []
    is_positive = Genomics.IsPositiveStrand( strand )
    
    for variant in variants:

        pos = variant.pos
        genotype = bytes(variant.genotype)
        reference = bytes(variant.reference)

        # fix 1-ness of variants
        # pos -= 1

        if len(genotype) == 1:
            variantseqs = list(Genomics.decodeGenotype( genotype ) )
            has_wildtype = reference in variantseqs
            action = "="
            start, end = pos, pos+1
        else:

            variantseqs = [ x[1:] for x in genotype.split("/") ]
            lvariant = max( [len(x) for x in variantseqs ] )
            if not phased: variantseqs = [ x for x in variantseqs if x ]
            has_wildtype = "*" in genotype

            if "+" in genotype and "-" in genotype:
                # both insertion and deletion at position
                # the range is given by the deletion
                # see below for explanations
                if genotype.startswith("+"):
                    action = ">"
                    variantseqs[1] += "-" * (lvariant - len(variantseqs[1]))
                else:
                    action = "<"
                    variantseqs[0] += "-" * (lvariant - len(variantseqs[0]))

                start, end = pos + 1, pos + lvariant + 1

            elif "-" in genotype:
                action = "-"
                # samtools: deletions are after the base denoted by snp.position
                #   * <- deletion at 1
                # 0 1 2 3 4 5 6
                #     - -
                # 6 5 4 3 2 1 0
                # deletion of 2+3 = (2,4)
                # on reverse: (7-4, 7-2) = (3,5)
                start, end = pos + 1, pos + lvariant + 1

                # deletions of unequal length are filled up with "-"
                # This is necessary to deal with negative strands:
                # -at/-atg on the positive strand deletes a t [g]
                # -at/-atg on the negative strand deletes [g] t a
                variantseqs = [ x + "-" * (lvariant - len( x )) for x in variantseqs ]

            elif "+" in genotype:
                action = "+"
                # indels are after the base denoted by position
                # as region use both flanking base so that negative strand
                # coordinates work
                # insertion between position 2 and 3
                #     * <- insection at pos 2
                # 0 1 2i3 4 
                # 4 3 2i1 0
                # is insertion between 1 and 2 in reverse
                # including both flanking residues makes it work:
                # (2,3) = (5-3,5-2) = (2,3) 
                # but:
                # (2,4) = (5-4,5-2) = (1,3)  
                start, end = pos, pos + 2

        # revert strand
        if not is_positive: 
            reference = Genomics.complement( reference )
            variantseqs = [ Genomics.complement( x.upper() ) for x in variantseqs ]
            start, end = lcontig - end, lcontig - start

        new_variants.append( ExtendedVariant._make( (
                    start, end, reference.upper(), action, has_wildtype, variantseqs ) ))

    return new_variants

예제 #19

0

파일 보기

파일: WrapperBlastZ.py 프로젝트: yangjl/cgat

                      "--options",
                      dest="options",
                      type="string",
                      help="BlastZ options.")

    parser.set_defaults( \
        input_filename_seq1 = None,
        input_filename_seq2 = None,
        options = "B=0 C=2")

    (options, args) = E.Start(parser)

    wrapper = BlastZ(options.options)

    import alignlib_lite
    seqs1 = Genomics.ReadPeptideSequences(
        open(options.input_filename_seq1, "r"))
    seqs2 = Genomics.ReadPeptideSequences(
        open(options.input_filename_seq2, "r"))
    seq1 = seqs1[seqs1.keys()[0]]
    seq2 = seqs2[seqs2.keys()[0]]
    result = alignlib_lite.py_makeAlignmentVector()
    wrapper.Align(seq1, seq2, result)

    print str(
        alignlib_lite.py_AlignmentFormatExplicit(
            result, alignlib_lite.py_makeSequence(seq1),
            alignlib_lite.py_makeSequence(seq2)))

    E.Stop()