def testVisitPath(self): f.makedirs('TestDir/a/b/c/d/e/f') fi = open('TestDir/a/b/c/d/D0','w') fi.write("hi\n") fi.close seen = set() def collector(x): seen.add(x) top = 'TestDir/a' last = 'TestDir/a/b/c/d' absTop = os.path.normpath(top) expected = set([absTop]) for i in [['b'],['b','c'],['b','c','d']]: expected.add(os.path.join(absTop,os.sep.join(i))) f.visitPath(top,last,collector) assert expected == seen, 'but x-s=%s and s-x=%s'%(expected-seen,seen-expected) seen.clear() top = 'TestDir/a/b' last = 'TestDir/a/b/c/d/D0' normTop = os.path.normpath(top) expected = set([normTop]) for i in [['c'],['c','d']]: expected.add(os.path.join(normTop,os.sep.join(i))) f.visitPath(top,last,collector) assert expected == seen, 'but x-s=%s and s-x=%s'%(expected-seen,seen-expected) #Test for non-existent leaf assert_raises(OSError,f.visitPath,'TestDir','TestDir/A/BB',collector) #Test for rootDir not abover fullPath assert_raises(OSError,f.visitPath,'TestDir/A/B','TestDir/A',collector)
def testNewEntryPermissions(self): dirPermissions=0707 dumpPermissions=0500 sfl = SilentFakeLogger() j = JDS.JsonDumpStorage(root=self.testDir,dirPermissions=dirPermissions,dumpPermissions=dumpPermissions,logger=sfl) u = str(socorro_uuid.uuid1()) f1, f2 = j.newEntry(u) f1.close() f2.close() jpath = j.getJson(u) gotPermissions = stat.S_IMODE(os.stat(jpath)[0]) assert stat.S_IMODE(os.stat(jpath)[0]) == dumpPermissions, "%s: Expected %o, got %o" % (jpath, dumpPermissions, gotPermissions) dpath = j.getDump(u) gotPermissions = stat.S_IMODE(os.stat(dpath)[0]) assert stat.S_IMODE(os.stat(dpath)[0]) == dumpPermissions, "%s: Expected %o, got %o" % (dpath, dumpPermissions, gotPermissions) udir = os.path.split(dpath)[0] datePath = os.path.abspath(os.path.join(udir,os.readlink(os.path.splitext(dpath)[0]))) namePath = os.path.abspath(os.path.splitext(dpath)[0]) topPath = os.path.abspath(self.testDir) dailies = os.listdir(topPath) def assertPermVisitor(p): gotPerm = stat.S_IMODE(os.stat(p)[0]) assert dirPermissions == gotPerm, "%s: Expected %0o, got %0o"%(p,dirPermissions,gotPerm) for d in dailies: # visitPath quietly ignores a file as the leaf socorro_fs.visitPath(os.path.join(topPath,d),datePath,assertPermVisitor) socorro_fs.visitPath(os.path.join(topPath,d),namePath,assertPermVisitor)
def makeDateDir(self,date, webheadName = None): """Assure existence of date directory for the given date, return path, and list of components""" dpath,dparts = self.datePath(date,webheadName) um = self.osModule.umask(0) try: try: socorro_fs.makedirs(dpath,self.dirPermissions,self.osModule) except OSError,e: if not self.osModule.path.isdir(dpath): #self.logger.debug("%s - in makeDateDir, got not isdir(%s): %s",threading.currentThread().getName(),dpath,e) raise finally: self.osModule.umask(um) if self.dumpGID: socorro_fs.visitPath(os.path.join(*dparts[:2]),dpath,self.chownGidVisitor) return dpath,dparts
def testCopyFromPermissions(self): dirPermissions = 0777 dumpPermissions = 0755 sfl = SilentFakeLogger() j = JDS.JsonDumpStorage( root=self.testDir, dirPermissions=dirPermissions, dumpPermissions=dumpPermissions, logger=sfl ) os.makedirs(self.testMoveFrom) u = str(socorro_uuid.uuid1()) jopath = os.path.join(self.testMoveFrom, u + j.jsonSuffix) dopath = os.path.join(self.testMoveFrom, u + j.dumpSuffix) fj = open(jopath, "w") fd = open(dopath, "w") fj.close() fd.close() j.copyFrom(u, jopath, dopath, "w", DT.datetime(2008, 8, 8, 8, 8), createLinks=True) jpath = j.getJson(u) gotPermissions = stat.S_IMODE(os.stat(jpath)[0]) assert dumpPermissions == stat.S_IMODE(os.stat(jpath)[0]), "%s: Expected %o, got %o" % ( jpath, dumpPermissions, gotPermissions, ) dpath = j.getDump(u) gotPermissions = stat.S_IMODE(os.stat(dpath)[0]) assert dumpPermissions == stat.S_IMODE(os.stat(dpath)[0]), "%s: Expected %o, got %o" % ( dpath, dumpPermissions, gotPermissions, ) udir = os.path.split(dpath)[0] datePath = os.path.abspath(os.path.join(udir, os.readlink(os.path.splitext(dpath)[0]))) namePath = os.path.abspath(os.path.splitext(dpath)[0]) topPath = os.path.abspath(self.testDir) dailies = os.listdir(topPath) def assertPermVisitor(p): gotPerm = stat.S_IMODE(os.stat(p)[0]) assert dirPermissions == gotPerm, "%s: Expected %0o, got %0o" % (p, dirPermissions, gotPerm) for d in dailies: # visitPath quietly ignores a file as the leaf socorro_fs.visitPath(os.path.join(topPath, d), datePath, assertPermVisitor) socorro_fs.visitPath(os.path.join(topPath, d), namePath, assertPermVisitor)
def makeNameDir(self,ooid, timestamp=None): """ Make sure the name directory exists, and return its path, and list of path components Raises OSError on failure """ npath,nparts = self.namePath(ooid,timestamp) #self.logger.debug("%s - trying makedirs %s",threading.currentThread().getName(),npath) um = self.osModule.umask(0) try: try: socorro_fs.makedirs(npath,self.dirPermissions,self.osModule) except OSError,e: if not self.osModule.path.isdir(npath): #self.logger.debug("%s - in makeNameDir, got not isdir(%s): %s",threading.currentThread().getName(),npath,e) raise finally: self.osModule.umask(um) if self.dumpGID: socorro_fs.visitPath(os.path.join(*nparts[:2]),npath,self.chownGidVisitor) return npath,nparts
def testVisitPath(self): f.makedirs('TestDir/a/b/c/d/e/f') fi = open('TestDir/a/b/c/d/D0', 'w') fi.write("hi\n") fi.close seen = set() def collector(x): seen.add(x) top = 'TestDir/a' last = 'TestDir/a/b/c/d' absTop = os.path.normpath(top) expected = set([absTop]) for i in [['b'], ['b', 'c'], ['b', 'c', 'd']]: expected.add(os.path.join(absTop, os.sep.join(i))) f.visitPath(top, last, collector) assert expected == seen, 'but x-s=%s and s-x=%s' % (expected - seen, seen - expected) seen.clear() top = 'TestDir/a/b' last = 'TestDir/a/b/c/d/D0' normTop = os.path.normpath(top) expected = set([normTop]) for i in [['c'], ['c', 'd']]: expected.add(os.path.join(normTop, os.sep.join(i))) f.visitPath(top, last, collector) assert expected == seen, 'but x-s=%s and s-x=%s' % (expected - seen, seen - expected) #Test for non-existent leaf assert_raises(OSError, f.visitPath, 'TestDir', 'TestDir/A/BB', collector) #Test for rootDir not abover fullPath assert_raises(OSError, f.visitPath, 'TestDir/A/B', 'TestDir/A', collector)
def testCopyFromPermissions(self): dirPermissions=0777 dumpPermissions=0755 sfl = SilentFakeLogger() j = JDS.JsonDumpStorage(root=self.testDir,dirPermissions=dirPermissions,dumpPermissions=dumpPermissions,logger=sfl) os.makedirs(self.testMoveFrom) u = str(socorro_uuid.uuid1()) jopath = os.path.join(self.testMoveFrom,u+j.jsonSuffix) dopath = os.path.join(self.testMoveFrom,u+j.dumpSuffix) fj = open(jopath,'w') fd = open(dopath,'w') fj.close() fd.close() j.copyFrom(u,jopath,dopath,'w', DT.datetime(2008,8,8,8,8, tzinfo=UTC),createLinks = True) jpath = j.getJson(u) gotPermissions = stat.S_IMODE(os.stat(jpath)[0]) assert dumpPermissions == stat.S_IMODE(os.stat(jpath)[0]), "%s: Expected %o, got %o" % (jpath, dumpPermissions, gotPermissions) dpath = j.getDump(u) gotPermissions = stat.S_IMODE(os.stat(dpath)[0]) assert dumpPermissions == stat.S_IMODE(os.stat(dpath)[0]), "%s: Expected %o, got %o" % (dpath, dumpPermissions, gotPermissions) udir = os.path.split(dpath)[0] datePath = os.path.abspath(os.path.join(udir,os.readlink(os.path.splitext(dpath)[0]))) namePath = os.path.abspath(os.path.splitext(dpath)[0]) topPath = os.path.abspath(self.testDir) dailies = os.listdir(topPath) def assertPermVisitor(p): gotPerm = stat.S_IMODE(os.stat(p)[0]) assert dirPermissions == gotPerm, "%s: Expected %0o, got %0o"%(p,dirPermissions,gotPerm) for d in dailies: # visitPath quietly ignores a file as the leaf socorro_fs.visitPath(os.path.join(topPath,d),datePath,assertPermVisitor) socorro_fs.visitPath(os.path.join(topPath,d),namePath,assertPermVisitor)
class JsonDumpStorage(socorro_dumpStorage.DumpStorage): """ This class implements a file system storage scheme for the JSON and dump files of the Socorro project. It create a tree with two branches: the name branch and the date branch. - The name branch consists of paths based on the first 8 characters of the ooid file name. It holds the two data files and a relative symbolic link to the date branch directory associated with the particular ooid. see socorro.lib.ooid.py for details of date and depth encoding within the ooid For the ooid: 22adfb61-f75b-11dc-b6be-001322081225 - the json file is stored as %(root)s/%(daypart)s/name/22/ad/22adfb61-f75b-11dc-b6be-001322081225.json - the dump file is stored as %(root)s/name/22/ad/22adfb61-f75b-11dc-b6be-001322081225.dump - the symbolic link is stored as %(root)s/name/22/ad/22adfb61-f75b-11dc-b6be-001322081225 and (see below) references %(toDateFromName)s/date/2008/12/25/12/05/webhead01_0 - The date branch consists of paths based on the year, month, day, hour, minute-segment, webhead host name and a small sequence number. For each ooid, it holds a relative symbolic link referring to the actual storage (name) directory holding the data for that ooid. For the ooid above, submitted at 2008-12-25T12:05 from webhead01 - the symbolic link is stored as %(root)s/date/2008/09/30/12/05/webhead01_0/22adfb61-f75b-11dc-b6be-001322081225 and references %(toNameFromDate)s/name/22/ad/ Note: The symbolic links are relative, so they begin with some rounds of '../'. This is to avoid issues that might arise from variously mounted nfs volumes. If the layout changes, self.toNameFromDate and toDateFromName must be changed to match, as well as a number of the private methods. Note: If so configured, the bottom nodes in the date path will be %(webheadName)s_n for n in range(N) for some reasonable (5, perhaps) N. Files are placed into these buckets in rotation. """ #----------------------------------------------------------------------------------------------------------------- def __init__(self, root=".", osModule=os, **kwargs): """ Take note of our root directory and other necessities. Yes, it is perfectly legal to call super(...).__init__() after doing some other code. ... As long as you expect the behavior you get, anyway... """ kwargs.setdefault('minutesPerSlot',1) kwargs.setdefault('subSlotCount',1) # that is: use xxx_0 every time by default super(JsonDumpStorage, self).__init__(root=root,osModule=osModule,**kwargs) tmp = kwargs.get('cleanIndexDirectories','false') self.cleanIndexDirectories = 'true' == tmp.lower() self.jsonSuffix = kwargs.get('jsonSuffix','.json') if not self.jsonSuffix.startswith('.'): self.jsonSuffix = ".%s" % (self.jsonSuffix) self.dumpSuffix = kwargs.get('dumpSuffix','.dump') if not self.dumpSuffix.startswith('.'): self.dumpSuffix = ".%s" % (self.dumpSuffix) self.logger = kwargs.get('logger', socorro_util.FakeLogger()) #----------------------------------------------------------------------------------------------------------------- def newEntry (self, ooid, webheadHostName='webhead01', timestamp=None): """ Sets up the name and date storage directory branches for the given ooid. Creates any directories that it needs along the path to the appropriate storage location. Creates two relative symbolic links: the date branch link pointing to the name directory holding the files; the name branch link pointing to the date branch directory holding that link. Returns a 2-tuple containing files open for writing: (jsonfile,dumpfile) If self.dumpGID, then the file tree from root to and including the data files are chown'd If self.dumpPermissions, then chmod is called on the data files """ # note: after this call, dateDir already holds link to nameDir nameDir, dateDir = super(JsonDumpStorage,self).newEntry(ooid,timestamp,webheadHostName) df,jf = None,None jname = os.path.join(nameDir,ooid+self.jsonSuffix) try: jf = open(jname,'w') except IOError,x: if 2 == x.errno: nameDir = self.makeNameDir(ooid,timestamp) # deliberately leave this dir behind if next line throws jf = open(jname,'w') else: raise x try: # Do all this in a try/finally block to unroll in case of error self.osModule.chmod(jname,self.dumpPermissions) dname = os.path.join(nameDir,ooid+self.dumpSuffix) df = open(dname,'w') self.osModule.chmod(dname,self.dumpPermissions) nameDepth = socorro_ooid.depthFromOoid(ooid) if not nameDepth: nameDepth = 4 rparts = [os.path.pardir,]*(1+nameDepth) rparts.append(self.dateName) dateDepth = 2 # .../hh/mm_slot... if webheadHostName and self.subSlotCount: dateDepth = 3 # .../webHeadName_slot dateParts = dateDir.split(os.path.sep)[-dateDepth:] rparts.extend(dateParts) self.osModule.symlink(os.path.sep.join(rparts),os.path.join(nameDir,ooid)) if self.dumpGID: def chown1(path): self.osModule.chown(path,-1,self.dumpGID) socorro_fs.visitPath(self.root,os.path.join(nameDir,ooid+self.jsonSuffix),chown1,self.osModule) self.osModule.chown(os.path.join(nameDir,ooid+self.dumpSuffix),-1,self.dumpGID) #socorro_fs.visitPath(self.root,os.path.join(dateDir,ooid),chown1) finally: if not jf or not df: if jf: jf.close() if df: df.close() try: self.osModule.unlink(os.path.join(dateDir,ooid)) except: pass # ok if not there try: self.osModule.unlink(os.path.join(nameDir,ooid)) except: pass # ok if not there df,jf = None,None return (jf,df)