def mergeRa(self, other): ''' Input: Two RaFile objects Output: A merged RaFile Common stanzas and key-val pairs are collapsed into one with identical values being preserved, differences are marked with a >>> and <<< ''' mergedKeys = ucscUtils.mergeList(list(self), list(other)) selfKeys = set(self) otherKeys = set(other) newCommon = RaFile() p = re.compile('^\s*#') p2 = re.compile('^\s*$') for i in mergedKeys: if p.match(i) or p2.match(i): newCommon.append(i) continue if i not in selfKeys: newCommon[i] = other[i] continue if i not in otherKeys: newCommon[i] = self[i] continue if i in otherKeys and i in selfKeys: newStanza = RaStanza() selfStanzaKeys = set(self[i].iterkeys()) otherStanzaKeys = set(other[i].iterkeys()) stanzaKeys = ucscUtils.mergeList(list(self[i]), list(other[i])) for j in stanzaKeys: if p.match(j): newStanza.append(j) continue if j not in selfStanzaKeys: newStanza[j] = other[i][j] continue if j not in otherStanzaKeys: newStanza[j] = self[i][j] continue if j in selfStanzaKeys and j in otherStanzaKeys: if self[i][j] == other[i][j]: newStanza[j] = self[i][j] else: in_j = '>>>>>%s' % j out_j = '<<<<<%s' % j newStanza[out_j] = self[i][j] newStanza[in_j] = other[i][j] newCommon[i] = newStanza return newCommon
def processAntibodyEntry(entry, species, downloadsDirectory, noDownload, username, password, wikiBaseUrl): """ For a single wiki table entry, generate an appropriate RA file stanza and download any validation documents into the download directory, into a filename that meets the naming convention for antibody documents. When finished, print a new stanza to stdout. """ cells = entry.findAll("td") # # Skip over any example entries if re.search("(Example)", getContents(cells[0])): return ((None, False)) else: stanza = RaStanza() term = getContents(cells[0]) (vendorName, vendorId, orderUrl) = processSource(cells[3]) # # The naming standard (as of May 3, 2011) is to name antibodies as # <target>_(<vendorId>), such as TAF7_(SC-101167). In the "term" cell, # the antibody might already have that name, or (more likely) it might be # named by just the target. If the vendor ID isn't in the name yet, add it. # If the vendor ID is "missing" (the default value parsed if the field isn't # filled in), then don't add it. # if re.search(vendorId, term): stanza["term"] = term elif vendorId == "missing": stanza["term"] = term else: stanza["term"] = term + "_(" + vendorId + ")" m = re.search("\s+", stanza['term']) if m: print "ERROR: term %s has spaces in the name" % stanza['term'] sys.exit() stanza["tag"] = re.sub("[-_\(\)]", "", stanza["term"]).upper() stanza["type"] = "Antibody" stanza["antibodyDescription"] = getContents(cells[1]) stanza["target"] = re.split("_", stanza["term"])[0] stanza["targetDescription"] = getContents(cells[2]) stanza["vendorName"] = vendorName stanza["vendorId"] = vendorId stanza["orderUrl"] = orderUrl stanza["lab"] = getContents(cells[4]) stanza["lots"] = getContents(cells[5]) (stanza["targetId"], stanza["targetUrl"]) = processFactorId(cells[6]) stanza["validation"] = processValidation(cells[7], species, stanza["term"], stanza["lab"], downloadsDirectory, noDownload, username, password, wikiBaseUrl) # # Indicate whether or not the document (if any) is approved by the NHGRI if re.search("^[Y|y]", getContents(cells[8])): approved = True else: approved = False return ((stanza, approved))
def updateDiffFilter(self, term, other): ''' Replicates updateMetadata. Input: Term Other raFile Output: Merged RaFile Stanzas found in 'self' and 'other' that have the 'Term' in 'other' are overwritten (or inserted if not found) into 'self'. Final merged dictionary is returned. ''' ret = self common = set(self.iterkeys()) & set(other.iterkeys()) for stanza in common: if term not in self[stanza] and term not in other[stanza]: continue if term in self[stanza] and term not in other[stanza]: del ret[stanza][term] continue if term in other[stanza]: #Remake stanza to keep order of terms tempStanza = RaStanza() tempStanza._name = stanza selfKeys = list(self[stanza].iterkeys()) otherKeys = list(other[stanza].iterkeys()) newOther = list() #filter out keys in other that aren't in self, or the term we're interested in for i in otherKeys: if not i in selfKeys and i != term: continue else: newOther.append(i) #merge self keylist and filtered other list masterList = ucscUtils.mergeList(newOther, selfKeys) for i in masterList: if i == term: tempStanza[i] = other[stanza][i] else: tempStanza[i] = self[stanza][i] ret[stanza] = tempStanza return ret
def readStanza(self, stanza, key=None, scopes=None): ''' Override this to create custom stanza behavior in derived types. IN stanza: list of strings with keyval data key: optional key for selective key filtering. Don't worry about it OUT namekey: the key of the stanza's name nameval: the value of the stanza's name entry: the stanza itself ''' entry = RaStanza() if entry.readStanza(stanza, key, scopes) == None: return None, None, None entry = RaStanza() val1, val2 = entry.readStanza(stanza, key, scopes) return val1, val2, entry
def readStanza(self, stanza, key=None): ''' Override this to create custom stanza behavior in derived types. IN stanza: list of strings with keyval data key: optional key for selective key filtering. Don't worry about it OUT namekey: the key of the stanza's name nameval: the value of the stanza's name entry: the stanza itself ''' entry = RaStanza() if entry.readStanza(stanza, key) == None: return None, None, None entry = RaStanza() val1, val2 = entry.readStanza(stanza, key) return val1, val2, entry
def __init__(self): RaStanza.__init__(self)
def __init__(self, parent): RaStanza.__init__(self) self._parent = parent
def createStanza(self, key, value): self[value] = RaStanza(key, value)