Exemple #1
0
    def mergeRa(self, other):
        '''
        Input:
            Two RaFile objects
        Output:
            A merged RaFile

        Common stanzas and key-val pairs are collapsed into
        one with identical values being preserved,
        differences are marked with a >>> and <<<
        '''

        mergedKeys = ucscUtils.mergeList(list(self), list(other))
        selfKeys = set(self)
        otherKeys = set(other)
        newCommon = RaFile()
        p = re.compile('^\s*#')
        p2 = re.compile('^\s*$')
        for i in mergedKeys:
            if p.match(i) or p2.match(i):
                newCommon.append(i)
                continue
            if i not in selfKeys:
                newCommon[i] = other[i]
                continue
            if i not in otherKeys:
                newCommon[i] = self[i]
                continue
            if i in otherKeys and i in selfKeys:
                newStanza = RaStanza()
                selfStanzaKeys = set(self[i].iterkeys())
                otherStanzaKeys = set(other[i].iterkeys())
                stanzaKeys = ucscUtils.mergeList(list(self[i]), list(other[i]))
                for j in stanzaKeys:
                    if p.match(j):
                        newStanza.append(j)
                        continue
                    if j not in selfStanzaKeys:
                        newStanza[j] = other[i][j]
                        continue
                    if j not in otherStanzaKeys:
                        newStanza[j] = self[i][j]
                        continue
                    if j in selfStanzaKeys and j in otherStanzaKeys:
                        if self[i][j] == other[i][j]:
                            newStanza[j] = self[i][j]
                        else:
                            in_j = '>>>>>%s' % j
                            out_j = '<<<<<%s' % j
                            newStanza[out_j] = self[i][j]
                            newStanza[in_j] = other[i][j]
                newCommon[i] = newStanza
        return newCommon
Exemple #2
0
    def mergeRa(self, other):
        '''
        Input:
            Two RaFile objects
        Output:
            A merged RaFile

        Common stanzas and key-val pairs are collapsed into
        one with identical values being preserved,
        differences are marked with a >>> and <<<
        '''

        mergedKeys = ucscUtils.mergeList(list(self), list(other))
        selfKeys = set(self)
        otherKeys = set(other)
        newCommon = RaFile()
        p = re.compile('^\s*#')
        p2 = re.compile('^\s*$')
        for i in mergedKeys:
            if p.match(i) or p2.match(i):
                newCommon.append(i)
                continue
            if i not in selfKeys:
                newCommon[i] = other[i]
                continue
            if i not in otherKeys:
                newCommon[i] = self[i]
                continue
            if i in otherKeys and i in selfKeys:
                newStanza = RaStanza()
                selfStanzaKeys = set(self[i].iterkeys())
                otherStanzaKeys = set(other[i].iterkeys())
                stanzaKeys = ucscUtils.mergeList(list(self[i]), list(other[i]))
                for j in stanzaKeys:
                    if p.match(j):
                        newStanza.append(j)
                        continue
                    if j not in selfStanzaKeys:
                        newStanza[j] = other[i][j]
                        continue
                    if j not in otherStanzaKeys:
                        newStanza[j] = self[i][j]
                        continue
                    if j in selfStanzaKeys and j in otherStanzaKeys:
                        if self[i][j] == other[i][j]:
                            newStanza[j] = self[i][j]
                        else:
                            in_j = '>>>>>%s' % j
                            out_j = '<<<<<%s' % j
                            newStanza[out_j] = self[i][j]
                            newStanza[in_j] = other[i][j]
                newCommon[i] = newStanza
        return newCommon
def processAntibodyEntry(entry, species, downloadsDirectory, noDownload,
                         username, password, wikiBaseUrl):
    """
    For a single wiki table entry, generate an appropriate RA file stanza and
    download any validation documents into the download directory, into a filename
    that meets the naming convention for antibody documents.  When finished, print
    a new stanza to stdout.
    """
    cells = entry.findAll("td")
    #
    # Skip over any example entries
    if re.search("(Example)", getContents(cells[0])):
        return ((None, False))
    else:
        stanza = RaStanza()
        term = getContents(cells[0])
        (vendorName, vendorId, orderUrl) = processSource(cells[3])
        #
        # The naming standard (as of May 3, 2011) is to name antibodies as
        # <target>_(<vendorId>), such as TAF7_(SC-101167).  In the "term" cell,
        # the antibody might already have that name, or (more likely) it might be
        # named by just the target.  If the vendor ID isn't in the name yet, add it.
        # If the vendor ID is "missing" (the default value parsed if the field isn't
        # filled in), then don't add it.
        #
        if re.search(vendorId, term):
            stanza["term"] = term
        elif vendorId == "missing":
            stanza["term"] = term
        else:
            stanza["term"] = term + "_(" + vendorId + ")"
        m = re.search("\s+", stanza['term'])
        if m:
            print "ERROR: term %s has spaces in the name" % stanza['term']
            sys.exit()
        stanza["tag"] = re.sub("[-_\(\)]", "", stanza["term"]).upper()
        stanza["type"] = "Antibody"
        stanza["antibodyDescription"] = getContents(cells[1])
        stanza["target"] = re.split("_", stanza["term"])[0]
        stanza["targetDescription"] = getContents(cells[2])
        stanza["vendorName"] = vendorName
        stanza["vendorId"] = vendorId
        stanza["orderUrl"] = orderUrl
        stanza["lab"] = getContents(cells[4])
        stanza["lots"] = getContents(cells[5])
        (stanza["targetId"], stanza["targetUrl"]) = processFactorId(cells[6])
        stanza["validation"] = processValidation(cells[7], species,
                                                 stanza["term"], stanza["lab"],
                                                 downloadsDirectory,
                                                 noDownload, username,
                                                 password, wikiBaseUrl)
        #
        # Indicate whether or not the document (if any) is approved by the NHGRI
        if re.search("^[Y|y]", getContents(cells[8])):
            approved = True
        else:
            approved = False
        return ((stanza, approved))
Exemple #4
0
    def updateDiffFilter(self, term, other):
        '''
        Replicates updateMetadata.
        Input:
            Term
            Other raFile

        Output:
            Merged RaFile
                Stanzas found in 'self' and 'other' that have the 'Term' in 'other'
                are overwritten (or inserted if not found) into 'self'. 
                Final merged dictionary is returned.
        '''
        ret = self
        common = set(self.iterkeys()) & set(other.iterkeys())
        for stanza in common:
            if term not in self[stanza] and term not in other[stanza]:
                continue
            if term in self[stanza] and term not in other[stanza]:
                del ret[stanza][term]
                continue
            if term in other[stanza]:
                #Remake stanza to keep order of terms
                tempStanza = RaStanza()
                tempStanza._name = stanza
                selfKeys = list(self[stanza].iterkeys())
                otherKeys = list(other[stanza].iterkeys())
                newOther = list()
                #filter out keys in other that aren't in self, or the term we're interested in
                for i in otherKeys:
                    if not i in selfKeys and i != term:
                        continue
                    else:
                        newOther.append(i)
                #merge self keylist and filtered other list
                masterList = ucscUtils.mergeList(newOther, selfKeys)
                for i in masterList:
                    if i == term:
                        tempStanza[i] = other[stanza][i]
                    else:
                        tempStanza[i] = self[stanza][i]
            ret[stanza] = tempStanza
        return ret
Exemple #5
0
    def updateDiffFilter(self, term, other):
        '''
        Replicates updateMetadata.
        Input:
            Term
            Other raFile

        Output:
            Merged RaFile
                Stanzas found in 'self' and 'other' that have the 'Term' in 'other'
                are overwritten (or inserted if not found) into 'self'. 
                Final merged dictionary is returned.
        '''
        ret = self
        common = set(self.iterkeys()) & set(other.iterkeys())
        for stanza in common:
            if term not in self[stanza] and term not in other[stanza]:
                continue
            if term in self[stanza] and term not in other[stanza]:
                    del ret[stanza][term]
                    continue
            if term in other[stanza]:
                #Remake stanza to keep order of terms
                tempStanza = RaStanza()
                tempStanza._name = stanza
                selfKeys = list(self[stanza].iterkeys())
                otherKeys = list(other[stanza].iterkeys())
                newOther = list()
                #filter out keys in other that aren't in self, or the term we're interested in
                for i in otherKeys:
                    if not i in selfKeys and i != term:
                        continue
                    else:
                        newOther.append(i)
                #merge self keylist and filtered other list
                masterList = ucscUtils.mergeList(newOther, selfKeys)
                for i in masterList:
                    if i == term:
                        tempStanza[i] = other[stanza][i]
                    else:
                        tempStanza[i] = self[stanza][i]
            ret[stanza] = tempStanza
        return ret
Exemple #6
0
    def readStanza(self, stanza, key=None, scopes=None):
        '''
        Override this to create custom stanza behavior in derived types.
        
        IN
        stanza: list of strings with keyval data
        key: optional key for selective key filtering. Don't worry about it

        OUT
        namekey: the key of the stanza's name
        nameval: the value of the stanza's name
        entry: the stanza itself
        '''
        entry = RaStanza()
        if entry.readStanza(stanza, key, scopes) == None:
            return None, None, None
        entry = RaStanza()
        val1, val2 = entry.readStanza(stanza, key, scopes)
        return val1, val2, entry
Exemple #7
0
    def readStanza(self, stanza, key=None):
        '''
        Override this to create custom stanza behavior in derived types.
        
        IN
        stanza: list of strings with keyval data
        key: optional key for selective key filtering. Don't worry about it

        OUT
        namekey: the key of the stanza's name
        nameval: the value of the stanza's name
        entry: the stanza itself
        '''
        entry = RaStanza()
        if entry.readStanza(stanza, key) == None:
            return None, None, None
        entry = RaStanza()
        val1, val2 = entry.readStanza(stanza, key)
        return val1, val2, entry
Exemple #8
0
 def __init__(self):
     RaStanza.__init__(self)
Exemple #9
0
 def __init__(self, parent):
     RaStanza.__init__(self)
     self._parent = parent
Exemple #10
0
 def createStanza(self, key, value):
     self[value] = RaStanza(key, value)
Exemple #11
0
 def __init__(self):
     RaStanza.__init__(self)
Exemple #12
0
 def __init__(self, parent):
     RaStanza.__init__(self)
     self._parent = parent