def testGetDate(self): for ooid in self.yyyyoids: assert self.baseDate == oo.dateFromOoid( ooid), 'Expected %s got %s' % (self.baseDate, oo.dateFromOoid(ooid)) assert 4 == oo.depthFromOoid( ooid), 'Expected %d, got %d' % (4, oo.depthFromOoid(ooid)) assert None == oo.dateFromOoid(self.badooid0) assert None == oo.dateFromOoid(self.badooid1)
def toDateFromName(self,uuid): """Given uuid, get the relative path to the top of the date directory from the name location""" depth = socorro_ooid.depthFromOoid(uuid) if not depth: depth = 4 # prior, when hardcoded depth=4, uuid[-8:] was yyyymmdd, year was always (20xx) ups = ['..' for x in range(depth+1)] ups.append(self.dateName) return os.sep.join(ups)
def testCreateNewOoid(self): ooid = oo.createNewOoid() ndate = oo.dateFromOoid(ooid) ndepth = oo.depthFromOoid(ooid) assert self.nowstamp == ndate, 'Expect date of %s, got %s' %(self.nowstamp,ndate) assert oo.defaultDepth == ndepth, 'Expect default depth (%d) got %d' % (oo.defaultDepth,ndepth) ooid = oo.createNewOoid(timestamp=self.xmas05) ndate = oo.dateFromOoid(ooid) ndepth = oo.depthFromOoid(ooid) assert self.xmas05 == ndate, 'Expect date of %s, got %s' %(self.xmas05,ndate) assert oo.defaultDepth == ndepth, 'Expect default depth (%d) got %d' % (oo.defaultDepth,ndepth) for d in range(1,5): ooid0 = oo.createNewOoid(depth=d) ooid1 = oo.createNewOoid(timestamp=self.xmas05,depth=d) ndate0 = oo.dateFromOoid(ooid0) ndepth0 = oo.depthFromOoid(ooid0) ndate1 = oo.dateFromOoid(ooid1) ndepth1 = oo.depthFromOoid(ooid1) assert self.nowstamp == ndate0, 'Expect date of %s, got %s' %(self.nowstamp,ndate0) assert self.xmas05 == ndate1, 'Expect date of %s, got %s' %(self.xmas05,ndate1) assert ndepth0 == ndepth1, 'Expect depth0(%d) == depth1(%d)' %(ndepth0,ndepth1) assert d == ndepth0, 'Expect depth %d, got %d' % (d,ndepth0) assert None == oo.depthFromOoid(self.badooid0) assert None == oo.depthFromOoid(self.badooid1)
def testCreateNewOoid(self): ooid = oo.createNewOoid() ndate = oo.dateFromOoid(ooid) ndepth = oo.depthFromOoid(ooid) assert self.nowstamp == ndate, 'Expect date of %s, got %s' %(self.nowstamp,ndate) assert oo.defaultDepth == ndepth, 'Expect default depth (%d) got %d' % (oo.defaultDepth,ndepth) ooid = oo.createNewOoid(timestamp=self.xmas05) ndate = oo.dateFromOoid(ooid) ndepth = oo.depthFromOoid(ooid) assert self.xmas05 == ndate, 'Expect date of %s, got %s' %(self.xmas05,ndate) assert oo.defaultDepth == ndepth, 'Expect default depth (%d) got %d' % (oo.defaultDepth,ndepth) for d in range(1,5): ooid0 = oo.createNewOoid(depth=d) ooid1 = oo.createNewOoid(timestamp=self.xmas05,depth=d) ndate0 = oo.dateFromOoid(ooid0) ndepth0 = oo.depthFromOoid(ooid0) ndate1 = oo.dateFromOoid(ooid1) ndepth1 = oo.depthFromOoid(ooid1) assert self.nowstamp == ndate0, 'Expect date of %s, got %s' %(self.nowstamp,ndate0) assert self.xmas05 == ndate1, 'Expect date of %s, got %s' %(self.xmas05,ndate1) assert ndepth0 == ndepth1, 'Expect depth0(%d) == depth1(%d)' %(ndepth0,ndepth1) assert d == ndepth0, 'Expect depth %d, got %d' % (d,ndepth0) assert None == oo.depthFromOoid(self.badooid0) assert None == oo.depthFromOoid(self.badooid1)
def testCreateNewOoid(self): new_ooid = ooid.create_new_ooid() ndate = ooid.dateFromOoid(new_ooid) ndepth = ooid.depthFromOoid(new_ooid) assert self.nowstamp == ndate assert ooid.defaultDepth == ndepth new_ooid = ooid.create_new_ooid(timestamp=self.xmas05) ndate = ooid.dateFromOoid(new_ooid) ndepth = ooid.depthFromOoid(new_ooid) assert self.xmas05 == ndate assert ooid.defaultDepth == ndepth for d in range(1, 5): ooid0 = ooid.create_new_ooid(depth=d) ooid1 = ooid.create_new_ooid(timestamp=self.xmas05, depth=d) ndate0 = ooid.dateFromOoid(ooid0) ndepth0 = ooid.depthFromOoid(ooid0) ndate1 = ooid.dateFromOoid(ooid1) ndepth1 = ooid.depthFromOoid(ooid1) assert self.nowstamp == ndate0 assert self.xmas05 == ndate1 assert ndepth0 == ndepth1 assert d == ndepth0 assert ooid.depthFromOoid(self.badooid0) is None assert ooid.depthFromOoid(self.badooid1) is None
def testCreateNewOoid(self): new_ooid = ooid.create_new_ooid() ndate = ooid.dateFromOoid(new_ooid) ndepth = ooid.depthFromOoid(new_ooid) assert self.nowstamp == ndate assert ooid.defaultDepth == ndepth new_ooid = ooid.create_new_ooid(timestamp=self.xmas05) ndate = ooid.dateFromOoid(new_ooid) ndepth = ooid.depthFromOoid(new_ooid) assert self.xmas05 == ndate assert ooid.defaultDepth == ndepth for d in range(1, 5): ooid0 = ooid.create_new_ooid(depth=d) ooid1 = ooid.create_new_ooid(timestamp=self.xmas05, depth=d) ndate0 = ooid.dateFromOoid(ooid0) ndepth0 = ooid.depthFromOoid(ooid0) ndate1 = ooid.dateFromOoid(ooid1) ndepth1 = ooid.depthFromOoid(ooid1) assert self.nowstamp == ndate0 assert self.xmas05 == ndate1 assert ndepth0 == ndepth1 assert d == ndepth0 assert ooid.depthFromOoid(self.badooid0) is None assert ooid.depthFromOoid(self.badooid1) is None
def new_entry(self, crash_id, raw_crash, dumps_dict, webhead_host_name='webhead01', timestamp=None): if not isinstance(dumps_dict, collections.Mapping): dumps_dict = {self.dump_field: dumps_dict} name_dir, date_dir = super(JsonDumpStorage, self).newEntry( crash_id, timestamp, webhead_host_name ) raw_crash_pathname = os.path.join( name_dir, crash_id + self.jsonSuffix ) with open(raw_crash_pathname, "w") as rcf: json.dump(raw_crash, rcf) for dump_name, dump in dumps_dict.iteritems(): full_dump_name = self.dump_file_name(crash_id, dump_name) dump_pathname = os.path.join( name_dir, full_dump_name ) with open(dump_pathname, "w") as dp: dp.write(dump) self.osModule.chmod(dump_pathname, self.dumpPermissions) name_depth = socorro_ooid.depthFromOoid(crash_id) if not name_depth: name_depth = 4 rparts = [os.path.pardir, ] * (1 + name_depth) rparts.append(self.dateName) date_depth = 2 # .../hh/mm_slot... if webhead_host_name and self.subSlotCount: date_depth = 3 # .../webHeadName_slot date_parts = date_dir.split(os.path.sep)[-date_depth:] rparts.extend(date_parts) self.osModule.symlink( os.path.sep.join(rparts), os.path.join(name_dir, crash_id) )
def _get_radix(crash_id): return [crash_id[i * 2 : (i + 1) * 2] for i in range(depthFromOoid(crash_id))]
def testGetDate(self): for ooid in self.yyyyoids: assert self.baseDate == oo.dateFromOoid(ooid), 'Expected %s got %s' %(self.baseDate, oo.dateFromOoid(ooid)) assert 4 == oo.depthFromOoid(ooid), 'Expected %d, got %d' %(4, oo.depthFromOoid(ooid)) assert None == oo.dateFromOoid(self.badooid0) assert None == oo.dateFromOoid(self.badooid1)
try: self.logger.debug('%s - about to copy dump %s to %s', threading.currentThread().getName(), dumppath,dumpNewPath) shutil.copy2(dumppath,dumpNewPath) self.osModule.chmod(dumpNewPath,self.dumpPermissions) if self.dumpGID: self.osModule.chown(dumpNewPath,-1,self.dumpGID) self.osModule.chown(jsonNewPath,-1,self.dumpGID) except OSError, e: try: self.osModule.unlink(jsonNewPath) finally: raise e if createLinks: self.logger.debug('%s - building links', threading.currentThread().getName()) dateDir,dparts = self.makeDateDir(timestamp,webheadHostName) nameDepth = socorro_ooid.depthFromOoid(ooid) if not nameDepth: nameDepth = 4 nameToDateParts = [os.pardir,]*(1+nameDepth) nameToDateParts.extend(dparts[2:]) self.osModule.symlink(os.sep.join(nameToDateParts),os.path.join(nameDir,ooid)) try: dateToNameParts = [os.pardir,]*(len(dparts)-2) dateToNameParts.extend(nparts[2:]) self.osModule.symlink(os.sep.join(dateToNameParts),os.path.join(dateDir,ooid)) except OSError, e: self.osModule.unlink(os.path.join(nameDir,ooid)) raise e if removeOld: self.logger.debug('%s - removing old %s, %s', threading.currentThread().getName(), jsonpath, dumppath) try: self.osModule.unlink(jsonpath)
def __namePath(self,uuid,startswith): """Because the name structure is almost simple, so is the method that creates one""" depth = socorro_ooid.depthFromOoid(uuid) if not depth: depth = 4 # prior, when hardcoded depth=4, uuid[-8:] was yyyymmdd, year was always (20xx) # split the first 2*depth characters into duples, join them, and prepend startswith return os.sep.join([startswith,os.sep.join([ uuid[2*x:2*x+2] for x in range(depth)])])
def relativeNameParts(self, ooid): depth = socorro_ooid.depthFromOoid(ooid) if not depth: depth = 4 return [ooid[2 * x: 2 * x + 2] for x in range(depth)]
def testGetDate(self): for this_ooid in self.yyyyoids: assert self.baseDate == ooid.dateFromOoid(this_ooid) assert 4 == ooid.depthFromOoid(this_ooid) assert ooid.dateFromOoid(self.badooid0) is None assert ooid.dateFromOoid(self.badooid1) is None
def testGetDate(self): for this_ooid in self.yyyyoids: assert self.baseDate == ooid.dateFromOoid(this_ooid) assert 4 == ooid.depthFromOoid(this_ooid) assert ooid.dateFromOoid(self.badooid0) is None assert ooid.dateFromOoid(self.badooid1) is None
class JsonDumpStorage(socorro_dumpStorage.DumpStorage): """ This class implements a file system storage scheme for the JSON and dump files of the Socorro project. It create a tree with two branches: the name branch and the date branch. - The name branch consists of paths based on the first 8 characters of the crash_id file name. It holds the two data files and a relative symbolic link to the date branch directory associated with the particular crash_id. see socorro.lib.ooid.py for details of date and depth encoding within the crash_id For the crash_id: 22adfb61-f75b-11dc-b6be-001322081225 - the json file is stored as %(root)s/%(daypart)s/name/22/ad/22adfb61-f75b-11dc-b6be-001322081225 .json - the dump file is stored as %(root)s/name/22/ad/22adfb61-f75b-11dc-b6be-001322081225.dump - the symbolic link is stored as %(root)s/name/22/ad/22adfb61-f75b-11dc-b6be-001322081225 and (see below) references %(toDateFromName)s/date/2008/12/25/12/05/webhead01_0 - The date branch consists of paths based on the year, month, day, hour, minute-segment, webhead host name and a small sequence number. For each crash_id, it holds a relative symbolic link referring to the actual storage (name) directory holding the data for that crash_id. For the crash_id above, submitted at 2008-12-25T12:05 from webhead01 - the symbolic link is stored as %(root)s/date/2008/09/30/12/05/webhead01_0/22adfb61-f75b-11dc-b6be- 001322081225 and references %(toNameFromDate)s/name/22/ad/ Note: The symbolic links are relative, so they begin with some rounds of '../'. This is to avoid issues that might arise from variously mounted nfs volumes. If the layout changes, self.toNameFromDate and toDateFromName must be changed to match, as well as a number of the private methods. Note: If so configured, the bottom nodes in the date path will be %(webheadName)s_n for n in range(N) for some reasonable (5, perhaps) N. Files are placed into these buckets in rotation. """ #-------------------------------------------------------------------------- def __init__(self, root=".", osModule=os, **kwargs): """ Take note of our root directory and other necessities. Yes, it is perfectly legal to call super(...).__init__() after doing some other code. As long as you expect the behavior you get, anyway... """ kwargs.setdefault('minutesPerSlot', 1) kwargs.setdefault('subSlotCount', 1) # that is: use xxx_0 every time # by default super(JsonDumpStorage, self).__init__(root=root, osModule=osModule, **kwargs) tmp = kwargs.get('cleanIndexDirectories', 'false') self.cleanIndexDirectories = 'true' == tmp.lower() self.jsonSuffix = kwargs.get('jsonSuffix', '.json') if not self.jsonSuffix.startswith('.'): self.jsonSuffix = ".%s" % (self.jsonSuffix) self.dumpSuffix = kwargs.get('dumpSuffix', '.dump') if not self.dumpSuffix.startswith('.'): self.dumpSuffix = ".%s" % (self.dumpSuffix) self.logger = kwargs.get('logger', socorro_util.FakeLogger()) #-------------------------------------------------------------------------- def new_entry(self, crash_id, raw_crash, dumps_dict, webhead_host_name='webhead01', timestamp=None): if not isinstance(dumps_dict, collections.Mapping): dumps_dict = {self.dump_field: dumps_dict} name_dir, date_dir = super(JsonDumpStorage, self).newEntry( crash_id, datetimeFromISOdateString(raw_crash['submitted_timestamp']), webhead_host_name) raw_crash_pathname = os.path.join(name_dir, crash_id + self.jsonSuffix) with open(raw_crash_pathname, "w") as rcf: json.dump(raw_crash, rcf) for dump_name, dump in dumps_dict.iteritems(): full_dump_name = self.dump_file_name(crash_id, dump_name) dump_pathname = os.path.join(name_dir, full_dump_name) with open(dump_pathname, "w") as dp: dp.write(dump) self.osModule.chmod(dump_pathname, self.dumpPermissions) name_depth = socorro_ooid.depthFromOoid(crash_id) if not name_depth: name_depth = 4 rparts = [ os.path.pardir, ] * (1 + name_depth) rparts.append(self.dateName) date_depth = 2 # .../hh/mm_slot... if webhead_host_name and self.subSlotCount: date_depth = 3 # .../webHeadName_slot date_parts = date_dir.split(os.path.sep)[-date_depth:] rparts.extend(date_parts) self.osModule.symlink(os.path.sep.join(rparts), os.path.join(name_dir, crash_id)) #-------------------------------------------------------------------------- def newEntry(self, crash_id, webheadHostName='webhead01', timestamp=None): """ Sets up the name and date storage directory branches for the given crash_id. Creates any directories that it needs along the path to the appropriate storage location. Creates two relative symbolic links: the date branch link pointing to the name directory holding the files; the name branch link pointing to the date branch directory holding that link. Returns a 2-tuple containing files open for writing: (jsonfile, dumpfile) If self.dumpGID, then the file tree from root to and including the data files are chown'd If self.dumpPermissions, then chmod is called on the data files """ # note: after this call, dateDir already holds link to nameDir nameDir, dateDir = super(JsonDumpStorage, self).newEntry(crash_id, timestamp, webheadHostName) df, jf = None, None jname = os.path.join(nameDir, crash_id + self.jsonSuffix) try: jf = open(jname, 'w') except IOError, x: if 2 == x.errno: nameDir = self.makeNameDir(crash_id, timestamp) # deliberately # leave this dir behind if next line throws jf = open(jname, 'w') else: raise x try: # Do all this in a try/finally block to unroll in case of error self.osModule.chmod(jname, self.dumpPermissions) dname = os.path.join(nameDir, crash_id + self.dumpSuffix) df = open(dname, 'w') self.osModule.chmod(dname, self.dumpPermissions) nameDepth = socorro_ooid.depthFromOoid(crash_id) if not nameDepth: nameDepth = 4 rparts = [ os.path.pardir, ] * (1 + nameDepth) rparts.append(self.dateName) dateDepth = 2 # .../hh/mm_slot... if webheadHostName and self.subSlotCount: dateDepth = 3 # .../webHeadName_slot dateParts = dateDir.split(os.path.sep)[-dateDepth:] rparts.extend(dateParts) self.osModule.symlink(os.path.sep.join(rparts), os.path.join(nameDir, crash_id)) if self.dumpGID: def chown1(path): self.osModule.chown(path, -1, self.dumpGID) socorro_fs.visitPath( self.root, os.path.join(nameDir, crash_id + self.jsonSuffix), chown1, self.osModule) self.osModule.chown( os.path.join(nameDir, crash_id + self.dumpSuffix), -1, self.dumpGID) # socorro_fs.visitPath(self.root, # os.path.join(dateDir,crash_id), # chown1 # ) finally: if not jf or not df: if jf: jf.close() if df: df.close() try: self.osModule.unlink(os.path.join(dateDir, crash_id)) except Exception: pass # ok if not there try: self.osModule.unlink(os.path.join(nameDir, crash_id)) except Exception: pass # ok if not there df, jf = None, None return (jf, df)
def _get_radix(crash_id): return [ crash_id[i * 2:(i + 1) * 2] for i in range(depthFromOoid(crash_id)) ]
self.logger.debug('about to copy dump %s to %s', dumppath, dumpNewPath) shutil.copy2(dumppath, dumpNewPath) self.osModule.chmod(dumpNewPath, self.dumpPermissions) if self.dumpGID: self.osModule.chown(dumpNewPath, -1, self.dumpGID) self.osModule.chown(jsonNewPath, -1, self.dumpGID) except OSError, e: try: self.osModule.unlink(jsonNewPath) finally: raise e if createLinks: self.logger.debug('building links') dateDir, dparts = self.makeDateDir(timestamp, webheadHostName) nameDepth = socorro_ooid.depthFromOoid(ooid) if not nameDepth: nameDepth = 4 nameToDateParts = [ os.pardir, ] * (1 + nameDepth) nameToDateParts.extend(dparts[2:]) self.osModule.symlink(os.sep.join(nameToDateParts), os.path.join(nameDir, ooid)) try: dateToNameParts = [ os.pardir, ] * (len(dparts) - 2) dateToNameParts.extend(nparts[2:]) self.osModule.symlink(os.sep.join(dateToNameParts), os.path.join(dateDir, ooid))