def trackDb(self): '''The Ra file in the trackDb for this composite''' try: return self._trackDb except AttributeError: self._trackDb = RaFile(self._trackDbPath) return self._trackDb
def __init__(self, filePath=None, handler=None, protocolPath=None): """sets up exception handling method, and optionally reads from a file""" RaFile.__init__(self) self.handler = handler if handler == None: self.handler = self.raiseException if filePath == None: filePath = encodeUtils.defaultCvPath() self.protocolPath = protocolPath if protocolPath == None: self.protocolPath == os.path.expanduser("~/htdocsExtras/ENCODE/") self.missingTypes = set() self.read(filePath)
def __init__(self, filePath=None, handler=None, protocolPath=None): '''sets up exception handling method, and optionally reads from a file''' RaFile.__init__(self) self.handler = handler if handler == None: self.handler = self.raiseException if filePath == None: filePath = encodeUtils.defaultCvPath() self.protocolPath = protocolPath if protocolPath == None: self.protocolPath == os.path.expanduser('~/htdocsExtras/ENCODE/') self.missingTypes = set() self.read(filePath)
def checkMetaDbForFiles(self, status, state): if state == 'new': (mdb, files, loose) = (self.newMdb, self.newReleaseFiles, self.loose) elif state == 'old': (mdb, files, loose) = (self.oldMdb, self.oldReleaseFiles, self.loose) errors = [] revokedset = set() revokedfiles = set() atticset = set() supplementalset = set() filtermdb = RaFile() for i in files: if re.match('supplemental', i): supplementalset.add(i) if not re.match('wgEncode.*', i): continue filestanza = mdb.filter( lambda s: re.match(".*%s.*" % i, s['fileName']), lambda s: s) #should only return 1, just in case if filestanza: for j in filestanza: filtermdb[j.name] = j if 'objStatus' in j and re.search( 'revoked|replaced|renamed', j['objStatus']): revokedfiles.add(i) revokedset.add(j.name) if 'attic' in j: atticset.add(j.name) else: #pass if loose and state == 'old' and re.match('.*bai', i): pass else: errors.append("metaDb: %s is not mentioned in %s" % (i, status)) return (filtermdb, revokedset, revokedfiles, atticset, supplementalset, errors)
import sys, string import re from ucscGb.gbData.ordereddict import OrderedDict from ucscGb.gbData.ra.raFile import RaFile from ucscGb.gbData import ucscUtils import collections rafile = RaFile("../data/cvJan15.ra") treatment = 0 for key in rafile.keys(): thisstanza = rafile[key] if thisstanza["type"] == "treatment": print thisstanza["term"]
def __init__(self, filepath): RaFile.__init__(self) self.read(filepath)
import sys, string import re from ucscGb.gbData.ordereddict import OrderedDict from ucscGb.gbData.ra.raFile import RaFile from ucscGb.gbData import ucscUtils import collections rafile = RaFile('../data/cv.ra') metafile = RaFile('../data/mouse_meta.ra') cell_id = 1 cells = {} centers = {"CRG-Guigo-m":7,"CSHL-m":7,"Caltech-m":3,"FSU-m":8,"NHGRI-Elnitski":"REMOVE","PSU-m":4,"Stanford-m":5,"UW-m":1,"Yale-m":5,"LICR-m":2} points = {} datatype = 0 control = {"Control_32bp":161,"Control_36bp":161,"Control_50bp":161,"IgG-Yale":165,"IgG-mus":162,"IgG-rab":163,"IgG-rat":164,"Input":161,"std":166} antibodies = {} targets = {'H3ac':105,'H3K27ac':106,'H3K27me3':107,'H3K36me3':108,'H3K4me1':109,'H3K4me2':110,'H3K4me3':111,'H3K79me2':112,'H3K79me3':113,'H3K9ac':114,'H3K9me3':115,'BHLHE40':116,'CEBPB':117,'CHD1':118 ,'CHD2':119,'CTCF':120,'E2F4':121,'EP300':122,'ETS1':123,'FLI1':124,'FOSL1':125,'GABPA':126,'GATA1':127,'GATA2':128,'HCFC1':129,'JUN':130,'JUND':131,'KAT2A':132,'MAFK':133,'MAX':134,'MAZ':135,'MXI1':136 ,'MYB':137,'MYC':138,'MYOD1':139,'MYOG':140,'PAX5':141,'POLR2A':142,'RAD21':143,'RCOR1':144,'RDBP':145,'REST':146,'SIN3A':147,'SMC3':148,'SRF':149,'TAL1':150,'TBP':151,'TCF12':152,'TCF3':153,'UBTF':154 ,'USF1':155,'USF2':156,'ZC3H11A':157,'ZKSCAN1':158,'ZMIZ1':159,'ZNF384':160} cell_item_id = 4001 center_id = 2001 exps = {} exps2 = {} for key in rafile.keys(): thisstanza = rafile[key]
def __init__(self, args): self.releaseNew = args['releaseNew'] self.releaseOld = args['releaseOld'] self.database = args['database'] self.composite = args['composite'] self.loose = args['loose'] self.ignore = args['ignore'] self.summary = args['summary'] self.specialMdb = args['specialMdb'] self.args = args if 'verbose' in args: self.verbose = args['verbose'] else: self.verbose = 0 errors = [] c = track.CompositeTrack(self.database, self.composite, None, self.specialMdb) #sanitize arguments if not self.releaseOld.isdigit(): self.releaseOld = 'solo' elif int(self.releaseOld) <= 0: self.releaseOlf = 'solo' elif self.releaseOld > self.releaseNew: self.releaseOld = 'solo' if self.verbose >= 1: sys.stderr.write("Initializing MkChangeNotes\n") self.releasePath = c.httpDownloadsPath + 'release' + args['releaseNew'] self.gbdbPath = "/gbdb/%s/bbi" % args['database'] self.trackDbFile = c.currentTrackDb if not self.trackDbFile: self.trackDb = None errors.append( "track: There is no entry in trackDb.wgEncode.ra for %s with the alpha tag" % self.composite) else: self.trackDb = RaFile(self.trackDbFile, "track") if int(self.releaseNew) > 1 and str(self.releaseOld) != 'solo': if self.verbose >= 2: sys.stderr.write("Comparison mode\n") self.newReleaseFiles = c.releases[int(self.releaseNew) - 1] self.oldReleaseFiles = c.releases[int(self.releaseOld) - 1] self.releasePathOld = c.httpDownloadsPath + 'release' + args[ 'releaseOld'] self.newMdb = c.alphaMetaDb self.oldMdb = c.publicMetaDb if self.verbose >= 2: sys.stderr.write("Checking for missing files\n") #make a list of missing files self.missingFiles = self.__checkFilesForDropped() #filter them out of old release files if self.verbose >= 1: sys.stderr.write("Scanning and parsing release directories\n") #check if all files listed in release directories have associated metaDb entries (self.newMdb, self.revokedSet, self.revokedFiles, self.atticSet, self.newSupplementalSet, newFileErrors) = self.checkMetaDbForFiles("alpha metaDb", "new") (self.oldMdb, self.oldRevokedSet, self.oldRevokedFiles, self.oldAtticSet, self.oldSupplementalSet, oldFileErrors) = self.checkMetaDbForFiles("public metaDb", "old") self.expIds = set( self.newMdb.filter(lambda s: 'expId' in s, lambda s: s['expId'])) if self.verbose >= 2: sys.stderr.write("Checking for attic files\n") #check that attic fiels aren't in trackDb if self.trackDb: errors.extend(self.__checkAtticNotInTrackDb()) #checks to see that nothing has disappeared between public and alpha if self.verbose >= 1: sys.stderr.write("Checking new metaDb for missing stanzas\n") errors.extend(self.__checkAlphaForDropped("alpha metaDb", "stanza")) if self.verbose >= 1: sys.stderr.write("Checking file md5sums across releases\n") errors.extend(self.__checkMd5sums()) #checks and gets tables that are present, also returns a revoked set of tables for new if self.verbose >= 1: sys.stderr.write("Checking table status\n") (self.newTableSet, self.revokedTableSet, self.newMissingTables, newTableError) = self.checkTableStatus("alpha metaDb", "new") (self.oldTableSet, spam, self.droppedTables, oldTableError) = self.checkTableStatus("public metaDb", "old") self.newInAttic = self.atticSet - self.oldAtticSet self.stillInAttic = self.oldAtticSet & self.atticSet self.oldTableSet = self.oldTableSet - self.atticSet self.noMoreAttic = self.oldAtticSet - self.atticSet self.changedTables = self.oldTableSet - self.newTableSet - self.revokedTableSet #same as above except for gbdbs if self.verbose >= 1: sys.stderr.write("Checking GBDB status\n") (self.newGbdbSet, self.revokedGbdbs, newGbdbError) = self.getGbdbFiles("new") (self.oldGbdbSet, eggs, oldGbdbError) = self.getGbdbFiles("old") #remove missing files from gbdbs self.oldGbdbSet = self.oldGbdbSet - self.missingFiles self.oldGbdbSet = self.oldGbdbSet - self.atticSet self.changedGbdbs = self.oldGbdbSet - self.newGbdbSet - self.revokedGbdbs for i in self.missingFiles: if i in self.oldReleaseFiles: del self.oldReleaseFiles[i] #fill in the errors errors.extend(newFileErrors) errors.extend(oldFileErrors) errors.extend(newTableError) errors.extend(oldTableError) errors.extend(newGbdbError) errors.extend(oldGbdbError) if self.changedTables: errors.append( "These tables were tables in the old release, but are no longer tables in the new release:" ) errors.extend(list(self.changedTables)) if self.changedGbdbs: errors.append( "These GBDBs were GBDB tables in the old release, but are no longer GBDB tables in the new release:" ) errors.extend(list(self.changedGbdbs)) #for ease of typing totalFiles = set(self.newReleaseFiles) oldTotalFiles = set(self.oldReleaseFiles) #these could honestly be moved earlier, get a file list processing section or something #they clean out special fiels out and separated the master fiels list into the 3 required #ones: wgEncode, supplemental and additional. self.totalFiles = self.__cleanSpecialFiles(totalFiles) self.oldTotalFiles = self.__cleanSpecialFiles(oldTotalFiles) (self.oldTotalFiles, self.additionalList, self.oldAdditionalList, self.totalFiles) = self.__separateOutAdditional() #get the stuff you need to push self.pushTables = set(sorted( (self.newTableSet - self.oldTableSet))) self.pushFiles = set(sorted( (self.totalFiles - self.oldTotalFiles))) self.pushGbdbs = set(sorted((self.newGbdbSet - self.oldGbdbSet))) self.newSupp = self.newSupplementalSet - self.oldSupplementalSet self.newTables = set(self.pushTables) self.newFiles = set( ucscUtils.printIter(self.pushFiles, self.releasePath)) self.newGbdbs = set( ucscUtils.printIter(self.pushGbdbs, self.gbdbPath)) self.newSupplemental = set( ucscUtils.printIter(self.newSupp, self.releasePath)) self.newOthers = set( ucscUtils.printIter(self.additionalList, self.releasePath)) self.fullFiles = sorted(self.totalFiles - self.revokedFiles) self.fullTables = self.oldTableSet & self.newTableSet self.errors = errors #don't output.append(report unless ignore option is on or no errors #module mode doesn't generate output by default if self.verbose >= 1: sys.stderr.write("Creating report\n") if (not errors) or self.ignore: self.output = self.printReport(args, c) else: self.output = self.printErrors(errors, self.missingFiles) elif self.releaseOld == 'solo': self.newReleaseFiles = c.releases[int(self.releaseNew) - 1] self.oldReleaseFiles = set() self.newMdb = c.alphaMetaDb #check that attic fiels aren't in trackDb if self.trackDb: errors.extend(self.__checkAtticNotInTrackDb()) (self.newMdb, self.revokedSet, self.revokedFiles, self.atticSet, self.newSupplementalSet, newFileErrors) = self.checkMetaDbForFiles("alpha metaDb", "new") self.expIds = set( self.newMdb.filter(lambda s: 'expId' in s, lambda s: s['expId'])) (self.newTableSet, self.revokedTableSet, spam, newTableError) = self.checkTableStatus("alpha metaDb", "new") self.tableSize = self.__getTableSize() (self.newGbdbSet, self.revokedGbdbs, newGbdbError) = self.getGbdbFiles("new") #collect errors errors.extend(newFileErrors) errors.extend(newTableError) errors.extend(newGbdbError) #set for easy operations totalFiles = set(self.newReleaseFiles) #clean out special fiels we don't push i.e. md5sum.history self.totalFiles = self.__cleanSpecialFiles(totalFiles) self.pushTables = self.newTableSet self.pushFiles = self.totalFiles self.pushGbdbs = self.newGbdbSet self.newSupp = self.newSupplementalSet self.fullFiles = self.totalFiles self.fullTables = self.newTableSet #makes list for additional files (self.oldTotalFiles, self.oldSupplementalSet) = (set(), set()) (self.oldReleaseFiles, self.additionalList, self.oldAdditionalList, self.totalFiles) = self.__separateOutAdditional() self.errors = errors if (not errors) or self.ignore: self.output = self.printReportOne(args, c) else: self.droppedTables = set() self.output = self.printErrors(errors, set())