def testVisitPath(self): f.makedirs('TestDir/a/b/c/d/e/f') fi = open('TestDir/a/b/c/d/D0','w') fi.write("hi\n") fi.close seen = set() def collector(x): seen.add(x) top = 'TestDir/a' last = 'TestDir/a/b/c/d' absTop = os.path.normpath(top) expected = set([absTop]) for i in [['b'],['b','c'],['b','c','d']]: expected.add(os.path.join(absTop,os.sep.join(i))) f.visitPath(top,last,collector) assert expected == seen, 'but x-s=%s and s-x=%s'%(expected-seen,seen-expected) seen.clear() top = 'TestDir/a/b' last = 'TestDir/a/b/c/d/D0' normTop = os.path.normpath(top) expected = set([normTop]) for i in [['c'],['c','d']]: expected.add(os.path.join(normTop,os.sep.join(i))) f.visitPath(top,last,collector) assert expected == seen, 'but x-s=%s and s-x=%s'%(expected-seen,seen-expected) #Test for non-existent leaf assert_raises(OSError,f.visitPath,'TestDir','TestDir/A/BB',collector) #Test for rootDir not abover fullPath assert_raises(OSError,f.visitPath,'TestDir/A/B','TestDir/A',collector)
def testNewEntryPermissions(self): dirPermissions=0707 dumpPermissions=0500 sfl = SilentFakeLogger() j = JDS.JsonDumpStorage(root=self.testDir,dirPermissions=dirPermissions,dumpPermissions=dumpPermissions,logger=sfl) u = str(socorro_uuid.uuid1()) f1, f2 = j.newEntry(u) f1.close() f2.close() jpath = j.getJson(u) gotPermissions = stat.S_IMODE(os.stat(jpath)[0]) assert stat.S_IMODE(os.stat(jpath)[0]) == dumpPermissions, "%s: Expected %o, got %o" % (jpath, dumpPermissions, gotPermissions) dpath = j.getDump(u) gotPermissions = stat.S_IMODE(os.stat(dpath)[0]) assert stat.S_IMODE(os.stat(dpath)[0]) == dumpPermissions, "%s: Expected %o, got %o" % (dpath, dumpPermissions, gotPermissions) udir = os.path.split(dpath)[0] datePath = os.path.abspath(os.path.join(udir,os.readlink(os.path.splitext(dpath)[0]))) namePath = os.path.abspath(os.path.splitext(dpath)[0]) topPath = os.path.abspath(self.testDir) dailies = os.listdir(topPath) def assertPermVisitor(p): gotPerm = stat.S_IMODE(os.stat(p)[0]) assert dirPermissions == gotPerm, "%s: Expected %0o, got %0o"%(p,dirPermissions,gotPerm) for d in dailies: # visitPath quietly ignores a file as the leaf socorro_fs.visitPath(os.path.join(topPath,d),datePath,assertPermVisitor) socorro_fs.visitPath(os.path.join(topPath,d),namePath,assertPermVisitor)
def testNewEntryPermissions(self): dirPermissions=0707 dumpPermissions=0500 sfl = SilentFakeLogger() j = JDS.JsonDumpStorage(root=self.testDir,dirPermissions=dirPermissions,dumpPermissions=dumpPermissions,logger=sfl) u = str(socorro_uuid.uuid1()) f1, f2 = j.newEntry(u) f1.close() f2.close() jpath = j.getJson(u) gotPermissions = stat.S_IMODE(os.stat(jpath)[0]) assert stat.S_IMODE(os.stat(jpath)[0]) == dumpPermissions, "%s: Expected %o, got %o" % (jpath, dumpPermissions, gotPermissions) dpath = j.getDump(u) gotPermissions = stat.S_IMODE(os.stat(dpath)[0]) assert stat.S_IMODE(os.stat(dpath)[0]) == dumpPermissions, "%s: Expected %o, got %o" % (dpath, dumpPermissions, gotPermissions) udir = os.path.split(dpath)[0] datePath = os.path.abspath(os.path.join(udir,os.readlink(os.path.splitext(dpath)[0]))) namePath = os.path.abspath(os.path.splitext(dpath)[0]) topPath = os.path.abspath(self.testDir) dailies = os.listdir(topPath) def assertPermVisitor(p): gotPerm = stat.S_IMODE(os.stat(p)[0]) assert dirPermissions == gotPerm, "%s: Expected %0o, got %0o"%(p,dirPermissions,gotPerm) for d in dailies: # visitPath quietly ignores a file as the leaf socorro_fs.visitPath(os.path.join(topPath,d),datePath,assertPermVisitor) socorro_fs.visitPath(os.path.join(topPath,d),namePath,assertPermVisitor)
def testVisitPath(self): f.makedirs('TestDir/a/b/c/d/e/f') fi = open('TestDir/a/b/c/d/D0','w') fi.write("hi\n") fi.close seen = set() def collector(x): seen.add(x) top = 'TestDir/a' last = 'TestDir/a/b/c/d' absTop = os.path.normpath(top) expected = set([absTop]) for i in [['b'],['b','c'],['b','c','d']]: expected.add(os.path.join(absTop,os.sep.join(i))) f.visitPath(top,last,collector) assert expected == seen, 'but x-s=%s and s-x=%s'%(expected-seen,seen-expected) seen.clear() top = 'TestDir/a/b' last = 'TestDir/a/b/c/d/D0' normTop = os.path.normpath(top) expected = set([normTop]) for i in [['c'],['c','d']]: expected.add(os.path.join(normTop,os.sep.join(i))) f.visitPath(top,last,collector) assert expected == seen, 'but x-s=%s and s-x=%s'%(expected-seen,seen-expected) #Test for non-existent leaf assert_raises(OSError,f.visitPath,'TestDir','TestDir/A/BB',collector) #Test for rootDir not abover fullPath assert_raises(OSError,f.visitPath,'TestDir/A/B','TestDir/A',collector)
def makeNameDir(self, ooid, timestamp=None): """Make sure the name directory exists, and return its path, and list of path components. Raises OSError on failure""" npath, nparts = self.namePath(ooid, timestamp) #self.logger.debug( # "%s - trying makedirs %s", # threading.currentThread().getName(), # npath #) um = self.osModule.umask(0) try: try: socorro_fs.makedirs(npath, self.dirPermissions, self.osModule) except OSError: if not self.osModule.path.isdir(npath): raise finally: self.osModule.umask(um) if self.dumpGID: socorro_fs.visitPath( os.path.join(*nparts[:2]), npath, self.chownGidVisitor ) return npath, nparts
def makeDateDir(self, date, webheadName=None): """Assure existence of date directory for the given date, return path, and list of components""" dpath, dparts = self.datePath(date, webheadName) um = self.osModule.umask(0) try: try: socorro_fs.makedirs(dpath, self.dirPermissions, self.osModule) except OSError, e: if not self.osModule.path.isdir(dpath): #self.logger.debug( # "%s - in makeDateDir, got not isdir(%s): %s", # threading.currentThread().getName(), # dpath, # e #) raise finally: self.osModule.umask(um) if self.dumpGID: socorro_fs.visitPath( os.path.join(*dparts[:2]), dpath, self.chownGidVisitor ) return dpath, dparts
def makeDateDir(self, date, webheadName=None): """Assure existence of date directory for the given date, return path, and list of components""" dpath, dparts = self.datePath(date, webheadName) um = self.osModule.umask(0) try: try: socorro_fs.makedirs(dpath, self.dirPermissions, self.osModule) except OSError: if not self.osModule.path.isdir(dpath): raise finally: self.osModule.umask(um) if self.dumpGID: socorro_fs.visitPath( os.path.join(*dparts[:2]), dpath, self.chownGidVisitor ) return dpath, dparts
def makeDateDir(self, date, webheadName=None): """Assure existence of date directory for the given date, return path, and list of components""" dpath, dparts = self.datePath(date, webheadName) um = self.osModule.umask(0) try: try: socorro_fs.makedirs(dpath, self.dirPermissions, self.osModule) except OSError, e: if not self.osModule.path.isdir(dpath): #self.logger.debug( # "%s - in makeDateDir, got not isdir(%s): %s", # threading.currentThread().getName(), # dpath, # e #) raise finally: self.osModule.umask(um) if self.dumpGID: socorro_fs.visitPath(os.path.join(*dparts[:2]), dpath, self.chownGidVisitor) return dpath, dparts
class JsonDumpStorage(socorro_dumpStorage.DumpStorage): """ This class implements a file system storage scheme for the JSON and dump files of the Socorro project. It create a tree with two branches: the name branch and the date branch. - The name branch consists of paths based on the first 8 characters of the crash_id file name. It holds the two data files and a relative symbolic link to the date branch directory associated with the particular crash_id. see socorro.lib.ooid.py for details of date and depth encoding within the crash_id For the crash_id: 22adfb61-f75b-11dc-b6be-001322081225 - the json file is stored as %(root)s/%(daypart)s/name/22/ad/22adfb61-f75b-11dc-b6be-001322081225 .json - the dump file is stored as %(root)s/name/22/ad/22adfb61-f75b-11dc-b6be-001322081225.dump - the symbolic link is stored as %(root)s/name/22/ad/22adfb61-f75b-11dc-b6be-001322081225 and (see below) references %(toDateFromName)s/date/2008/12/25/12/05/webhead01_0 - The date branch consists of paths based on the year, month, day, hour, minute-segment, webhead host name and a small sequence number. For each crash_id, it holds a relative symbolic link referring to the actual storage (name) directory holding the data for that crash_id. For the crash_id above, submitted at 2008-12-25T12:05 from webhead01 - the symbolic link is stored as %(root)s/date/2008/09/30/12/05/webhead01_0/22adfb61-f75b-11dc-b6be- 001322081225 and references %(toNameFromDate)s/name/22/ad/ Note: The symbolic links are relative, so they begin with some rounds of '../'. This is to avoid issues that might arise from variously mounted nfs volumes. If the layout changes, self.toNameFromDate and toDateFromName must be changed to match, as well as a number of the private methods. Note: If so configured, the bottom nodes in the date path will be %(webheadName)s_n for n in range(N) for some reasonable (5, perhaps) N. Files are placed into these buckets in rotation. """ #-------------------------------------------------------------------------- def __init__(self, root=".", osModule=os, **kwargs): """ Take note of our root directory and other necessities. Yes, it is perfectly legal to call super(...).__init__() after doing some other code. As long as you expect the behavior you get, anyway... """ kwargs.setdefault('minutesPerSlot', 1) kwargs.setdefault('subSlotCount', 1) # that is: use xxx_0 every time # by default super(JsonDumpStorage, self).__init__(root=root, osModule=osModule, **kwargs) tmp = kwargs.get('cleanIndexDirectories', 'false') self.cleanIndexDirectories = 'true' == tmp.lower() self.jsonSuffix = kwargs.get('jsonSuffix', '.json') if not self.jsonSuffix.startswith('.'): self.jsonSuffix = ".%s" % (self.jsonSuffix) self.dumpSuffix = kwargs.get('dumpSuffix', '.dump') if not self.dumpSuffix.startswith('.'): self.dumpSuffix = ".%s" % (self.dumpSuffix) self.logger = kwargs.get('logger', socorro_util.FakeLogger()) #-------------------------------------------------------------------------- def new_entry(self, crash_id, raw_crash, dumps_dict, webhead_host_name='webhead01', timestamp=None): if not isinstance(dumps_dict, collections.Mapping): dumps_dict = {self.dump_field: dumps_dict} name_dir, date_dir = super(JsonDumpStorage, self).newEntry( crash_id, datetimeFromISOdateString(raw_crash['submitted_timestamp']), webhead_host_name) raw_crash_pathname = os.path.join(name_dir, crash_id + self.jsonSuffix) with open(raw_crash_pathname, "w") as rcf: json.dump(raw_crash, rcf) for dump_name, dump in dumps_dict.iteritems(): full_dump_name = self.dump_file_name(crash_id, dump_name) dump_pathname = os.path.join(name_dir, full_dump_name) with open(dump_pathname, "w") as dp: dp.write(dump) self.osModule.chmod(dump_pathname, self.dumpPermissions) name_depth = socorro_ooid.depthFromOoid(crash_id) if not name_depth: name_depth = 4 rparts = [ os.path.pardir, ] * (1 + name_depth) rparts.append(self.dateName) date_depth = 2 # .../hh/mm_slot... if webhead_host_name and self.subSlotCount: date_depth = 3 # .../webHeadName_slot date_parts = date_dir.split(os.path.sep)[-date_depth:] rparts.extend(date_parts) self.osModule.symlink(os.path.sep.join(rparts), os.path.join(name_dir, crash_id)) #-------------------------------------------------------------------------- def newEntry(self, crash_id, webheadHostName='webhead01', timestamp=None): """ Sets up the name and date storage directory branches for the given crash_id. Creates any directories that it needs along the path to the appropriate storage location. Creates two relative symbolic links: the date branch link pointing to the name directory holding the files; the name branch link pointing to the date branch directory holding that link. Returns a 2-tuple containing files open for writing: (jsonfile, dumpfile) If self.dumpGID, then the file tree from root to and including the data files are chown'd If self.dumpPermissions, then chmod is called on the data files """ # note: after this call, dateDir already holds link to nameDir nameDir, dateDir = super(JsonDumpStorage, self).newEntry(crash_id, timestamp, webheadHostName) df, jf = None, None jname = os.path.join(nameDir, crash_id + self.jsonSuffix) try: jf = open(jname, 'w') except IOError, x: if 2 == x.errno: nameDir = self.makeNameDir(crash_id, timestamp) # deliberately # leave this dir behind if next line throws jf = open(jname, 'w') else: raise x try: # Do all this in a try/finally block to unroll in case of error self.osModule.chmod(jname, self.dumpPermissions) dname = os.path.join(nameDir, crash_id + self.dumpSuffix) df = open(dname, 'w') self.osModule.chmod(dname, self.dumpPermissions) nameDepth = socorro_ooid.depthFromOoid(crash_id) if not nameDepth: nameDepth = 4 rparts = [ os.path.pardir, ] * (1 + nameDepth) rparts.append(self.dateName) dateDepth = 2 # .../hh/mm_slot... if webheadHostName and self.subSlotCount: dateDepth = 3 # .../webHeadName_slot dateParts = dateDir.split(os.path.sep)[-dateDepth:] rparts.extend(dateParts) self.osModule.symlink(os.path.sep.join(rparts), os.path.join(nameDir, crash_id)) if self.dumpGID: def chown1(path): self.osModule.chown(path, -1, self.dumpGID) socorro_fs.visitPath( self.root, os.path.join(nameDir, crash_id + self.jsonSuffix), chown1, self.osModule) self.osModule.chown( os.path.join(nameDir, crash_id + self.dumpSuffix), -1, self.dumpGID) # socorro_fs.visitPath(self.root, # os.path.join(dateDir,crash_id), # chown1 # ) finally: if not jf or not df: if jf: jf.close() if df: df.close() try: self.osModule.unlink(os.path.join(dateDir, crash_id)) except Exception: pass # ok if not there try: self.osModule.unlink(os.path.join(nameDir, crash_id)) except Exception: pass # ok if not there df, jf = None, None return (jf, df)