def _initJASPAR(self,motifData): """Sets Motif's attributes using a JASPAR motif record.""" def _buildPWM(dataString): """Convert jaspars's matrix data to my format and return the PWM/barcode. dataString = 'A [0 3]\nC [94 75]\nG [1 0]\nT [2 19]""" dataString = dataString.replace('[',' ') dataString = dataString.replace(']',' ') data = dataString.split('\n') data = [x.split() for x in data] pwm = {} for d in data: pwm[d[0]] = tuple([int(x) for x in d[1:]]) bc = hashlib.md5(str(pwm)).hexdigest() return pwm,bc acc,mName = motifData[0].lstrip('>').rstrip('\n').split() self.accession = acc self.name = mName self.pwm,self.barcode = _buildPWM(motifData[1]) PWM.__init__(self,self._getMotilityMatrix(self.pwm)) # call motility's PWM's init self.consensus = self._generateConsensus() self.source = 'jaspar' self._setID()
def _initSCOPE(self,motifData): """Sets Motif's attributes using a single SCOPE 'motif' data set.""" def _scopeBaseWeights(base,motifData): """Return list of base counts/freqs for all columns of motif.""" baseWeights = [] bases = motifData.getElementsByTagName('base') for b in bases: if b.attributes.getNamedItem('name').value.upper().startswith(base): for w in b.getElementsByTagName('weight'): #wVal = w.childNodes[0].data baseWeights.append(float(w.childNodes[0].data)) #break return tuple(baseWeights) def _scopePWM(motifData): """Returns pwm and barcode data from scope motif.""" pwm = {} for base in self.bases: pwm[base] = _scopeBaseWeights(base,motifData) bc = hashlib.md5(str(pwm)).hexdigest() return pwm,bc def _scopeInstances(motifData): """Returns tuple of namedtuples: keys=(sequence, strand<-/+>,start[negVersion],end[negVersion],geneName))""" # Build instances namedtuples: Instance = namedtuple('instance','sequence strand begin end gene') instList = [Instance(x.getAttribute('sequence'), x.getAttribute('strand'), x.getAttribute('begin'), x.getAttribute('end'), x.getAttribute('gene')) for x in motifData.getElementsByTagName('instance')] return tuple(instList) self.consensus = motifData.getAttribute('sequence') self.sigvalue = motifData.getAttribute('sigvalue') self.algorithm = motifData.getAttribute('algorithm') self.genes = tuple(sorted(list(set([x.getAttribute('gene') for x in motifData.getElementsByTagName('instance')])))) # (sorted tuple) self.instances = _scopeInstances(motifData) # tuple of namedtuples keys=(sequence, strand<-/+>,start[negVersion],end[negVersion],geneName)) self.name = '%s_%s' % (self.sigvalue,self.consensus) self.accession = '%s_%s' % (self.consensus,int(float(self.sigvalue))) self.pwm,self.barcode = _scopePWM(motifData) # key(nulceotide),value(tuple of freqs at each position) self._setID() PWM.__init__(self,self._getMotilityMatrix(self.pwm))
def _initXMS(self,motifData): """Sets Motif's attributes using a single 'motif' node from a minidom object representing the XMS file.""" def _xmsBaseWeights(base,motifData): """Return list of base counts/freqs for all columns of motif.""" baseWeights = [] weights = motifData.getElementsByTagName('weight') for w in weights: if w.attributes.getNamedItem('symbol').value.upper().startswith(base): baseWeights.append(float(w.childNodes[0].data)) return tuple(baseWeights) def _xmsProp(motifData,propKey): """Returns stored property values if they exist from XMS motif.""" props = motifData.getElementsByTagName('prop') propVal = None for p in props: if p.getElementsByTagName('key')[0].childNodes[0].data.lower() == propKey.lower(): propVal = p.getElementsByTagName('value')[0].childNodes[0].data break return propVal def _xmsPWM(motifData): """Returns pwm and barcode data from xms motif.""" pwm = {} for base in self.bases: pwm[base] = _xmsBaseWeights(base,motifData) bc = hashlib.md5(str(pwm)).hexdigest() return pwm,bc # init instance attributes from xms motifData self.name = _xmsProp(motifData,'name') self.accession = _xmsProp(motifData,'accession') self.consensus = _xmsProp(motifData,'consensus') self.sigvalue = _xmsProp(motifData,'sigvalue') self.rank = _xmsProp(motifData,'rank') self.algorithm = _xmsProp(motifData,'algorithm') self.pwm,self.barcode = _xmsPWM(motifData) self._setID() PWM.__init__(self,self._getMotilityMatrix(self.pwm))