コード例 #1
0
ファイル: motifs.py プロジェクト: asntech/rSeqPipeline
    def _initJASPAR(self,motifData):
        """Sets Motif's attributes using a JASPAR motif record."""
        
        def _buildPWM(dataString):
            """Convert jaspars's matrix data to my format and
            return the PWM/barcode. 
            dataString = 'A  [0  3]\nC  [94 75]\nG  [1  0]\nT  [2 19]"""
            
            dataString = dataString.replace('[',' ')
            dataString = dataString.replace(']',' ')
            data       = dataString.split('\n')
            data       = [x.split() for x in data]

            pwm = {}
            for d in data:
                pwm[d[0]] = tuple([int(x) for x in d[1:]])
            
            bc = hashlib.md5(str(pwm)).hexdigest()
            return pwm,bc
        
        acc,mName = motifData[0].lstrip('>').rstrip('\n').split()
        self.accession = acc
        self.name      = mName
        self.pwm,self.barcode = _buildPWM(motifData[1]) 
        PWM.__init__(self,self._getMotilityMatrix(self.pwm)) # call motility's PWM's init
        self.consensus = self._generateConsensus()
        self.source = 'jaspar'
        self._setID()
コード例 #2
0
ファイル: motifs.py プロジェクト: asntech/rSeqPipeline
    def _initSCOPE(self,motifData):
        """Sets Motif's attributes using a single SCOPE 'motif' data set."""

        def _scopeBaseWeights(base,motifData):
            """Return list of base counts/freqs for all columns
            of motif."""
            baseWeights = []
            bases  = motifData.getElementsByTagName('base')
            for b in bases:
                if b.attributes.getNamedItem('name').value.upper().startswith(base):
                    for w in b.getElementsByTagName('weight'):
                        #wVal = w.childNodes[0].data
                        baseWeights.append(float(w.childNodes[0].data))
                        #break
            return tuple(baseWeights)

        def _scopePWM(motifData):
            """Returns pwm and barcode data from scope motif."""
            pwm = {}
            for base in self.bases:
                pwm[base] = _scopeBaseWeights(base,motifData)
            bc = hashlib.md5(str(pwm)).hexdigest()
            return pwm,bc

        def _scopeInstances(motifData):
            """Returns tuple of namedtuples:
            keys=(sequence, strand<-/+>,start[negVersion],end[negVersion],geneName))"""
            # Build instances namedtuples:
            Instance = namedtuple('instance','sequence strand begin end gene')
            instList = [Instance(x.getAttribute('sequence'),
                                 x.getAttribute('strand'),
                                 x.getAttribute('begin'),
                                 x.getAttribute('end'),
                                 x.getAttribute('gene')) for x in motifData.getElementsByTagName('instance')]
            return tuple(instList)

        self.consensus  = motifData.getAttribute('sequence')
        self.sigvalue   = motifData.getAttribute('sigvalue')
        self.algorithm  = motifData.getAttribute('algorithm')
        self.genes      = tuple(sorted(list(set([x.getAttribute('gene') for x in motifData.getElementsByTagName('instance')])))) # (sorted tuple)
        self.instances  = _scopeInstances(motifData) # tuple of namedtuples keys=(sequence, strand<-/+>,start[negVersion],end[negVersion],geneName))
        self.name       = '%s_%s' % (self.sigvalue,self.consensus)
        self.accession  = '%s_%s' % (self.consensus,int(float(self.sigvalue)))
        self.pwm,self.barcode = _scopePWM(motifData) # key(nulceotide),value(tuple of freqs at each position)
        self._setID()
        PWM.__init__(self,self._getMotilityMatrix(self.pwm))
コード例 #3
0
ファイル: motifs.py プロジェクト: asntech/rSeqPipeline
    def _initXMS(self,motifData):
        """Sets Motif's attributes using a single 'motif' node from a
        minidom object representing the XMS file."""

        def _xmsBaseWeights(base,motifData):
            """Return list of base counts/freqs for all columns
            of motif."""
            baseWeights = []
            weights  = motifData.getElementsByTagName('weight')
            for w in weights:
                if w.attributes.getNamedItem('symbol').value.upper().startswith(base):
                    baseWeights.append(float(w.childNodes[0].data))
            return tuple(baseWeights)

        def _xmsProp(motifData,propKey):
            """Returns stored property values if they exist from XMS motif."""
            props     = motifData.getElementsByTagName('prop')
            propVal = None
            for p in props:
                if p.getElementsByTagName('key')[0].childNodes[0].data.lower() == propKey.lower():
                    propVal = p.getElementsByTagName('value')[0].childNodes[0].data
                    break
            return propVal

        def _xmsPWM(motifData):
            """Returns pwm and barcode data from xms motif."""
            pwm = {}
            for base in self.bases:
                pwm[base] = _xmsBaseWeights(base,motifData)
            bc = hashlib.md5(str(pwm)).hexdigest()
            return pwm,bc

        # init instance attributes from xms motifData
        self.name       = _xmsProp(motifData,'name')
        self.accession  = _xmsProp(motifData,'accession')
        self.consensus  = _xmsProp(motifData,'consensus')
        self.sigvalue   = _xmsProp(motifData,'sigvalue')
        self.rank       = _xmsProp(motifData,'rank')
        self.algorithm  = _xmsProp(motifData,'algorithm')

        self.pwm,self.barcode = _xmsPWM(motifData)
        self._setID()
        PWM.__init__(self,self._getMotilityMatrix(self.pwm))