def _storeFile(self, request, filekey, reqKu, nodeID): # [XXX: memory management is not happy here. might want to look at # request.registerProducer(). Otherwise, might have to scrap # using the STORE(ROOT(RESOURCE)) deal in favor of # producer/consumer model for STORE ops # (http://itamarst.org/writings/OSCON03/twisted_internet-108.html). # Another option might include subclassing web.resource.Resource # and making this derive from that... Or might be web.Site that # needs to be subclassed... Or maybe web.site.Request - # web.site.Request.process()? Request seems doubly-bad: perhaps a # copy is made somewhere, because memory mushrooms to 2x big # upload, then goes back down to around 1x. # [update: This should be fixable in twisted.web2, but I am informed # that in the current version, there is no workaround] # get the data to a tmp file loggerstor.debug("writing store data to tmpfile") tmpfile = tempfile.mktemp(dir=self.config.storedir) tarball = os.path.join(self.config.storedir,reqKu.id()+".tar") # rename and/or prepend the data appropriately tmpTarMode = None if filekey[-4:] == ".tar": tmpfile = tmpfile+".tar" tmpTarMode = 'r' targetTar = tarball elif filekey[-7:] == ".tar.gz": tmpfile = tmpfile+".tar.gz" tmpTarMode = 'r:gz' targetTar = tarball+".gz" loggerstor.debug("tmpfile is %s" % tmpfile) # XXX: if the server supports both .tar and tar.gz, this is wrong; we'd # need to check *both* for already existing dudes instead of just # choosing one if os.path.exists(tarball+'.gz'): tarball = (tarball+'.gz', 'r:gz') elif os.path.exists(tarball): tarball = (tarball, 'r') else: tarball = None loggerstor.debug("tarball is %s" % str(tarball)) data = request.args.get('filename')[0] # XXX: file in mem! need web2. # XXX: bad blocking stuff here f = open(tmpfile, 'wb') f.write(data) f.close() ftype = os.popen('file %s' % tmpfile) loggerstor.debug("ftype of %s is %s" % (tmpfile, ftype.read())) ftype.close() if tmpTarMode: # client sent a tarball loggerstor.debug("about to chksum %s" % tmpfile) digests = TarfileUtils.verifyHashes(tmpfile, '.meta') loggerstor.debug("chksum returned %s" % digests) ftype = os.popen('file %s' % tmpfile) loggerstor.debug("ftype of %s is %s" % (tmpfile, ftype.read())) ftype.close() if not digests: msg = "Attempted to use non-CAS storage key(s) for" \ " STORE tarball" loggerstor.debug(msg) os.remove(tmpfile) request.setResponseCode(http.CONFLICT, msg) return msg # XXX: add digests to a db of already stored files (for quick # lookup) if tarball: tarname, tarnameMode = tarball loggerstor.debug("concatenating tarfiles %s and %s" % (tarname, tmpfile)) f1 = tarfile.open(tarname, tarnameMode) f2 = tarfile.open(tmpfile, tmpTarMode) f1names = f1.getnames() f2names = f2.getnames() f1.close() f2.close() dupes = [f for f in f1names if f in f2names] TarfileUtils.delete(tmpfile, dupes) ftype = os.popen('file %s' % tarname) loggerstor.debug("ftype of %s is %s" % (tarname, ftype.read())) ftype.close() TarfileUtils.concatenate(tarname, tmpfile) ftype = os.popen('file %s' % tarname) loggerstor.debug("ftype of %s is %s" % (tarname, ftype.read())) ftype.close() else: loggerstor.debug("saving %s as tarfile %s" % (tmpfile, targetTar)) os.rename(tmpfile, targetTar) else: # client sent regular file h = hashfile(tmpfile) if request.args.has_key('meta') and request.args.has_key('metakey'): metakey = request.args.get('metakey')[0] meta = request.args.get('meta')[0] # XXX: file in mem! else: metakey = None meta = None if fencode(long(h, 16)) != filekey: msg = "Attempted to use non-CAS storage key for STORE data " msg += "(%s != %s)" % (filekey, fencode(long(h, 16))) os.remove(tmpfile) request.setResponseCode(http.CONFLICT, msg) return msg fname = os.path.join(self.config.storedir, filekey) if os.path.exists(fname): loggerstor.debug("adding metadata to %s" % fname) f = BlockFile.open(fname,'rb+') if not f.hasNode(nodeID): f.addNode(int(nodeID,16), {metakey: meta}) f.close() os.remove(tmpfile) else: if os.path.exists(nodeID+".tar"): # XXX: need to do something with metadata! print "XXX: need to do something with metadata for tar!" tarball = tarfile.open(tarname, 'r') if fname in tarball.getnames(): loggerstor.debug("%s already stored in tarball" % fname) # if the file is already in the corresponding tarball, # update its timestamp and return success. loggerstor.debug("%s already stored" % filekey) # XXX: update timestamp for filekey in tarball return "Successful STORE" else: loggerstor.debug("tarball for %s, but %s not in tarball" % (nodeID,fname)) if len(data) < 8192 and fname != tarname: #XXX: magic # (blk sz) # If the file is small, move it into the appropriate # tarball. Note that this code is unlikely to ever be # executed if the client is an official flud client, as # they do the tarball aggregation thing already, and all # tarballs will be > 8192. This is, then, really just # defensive coding -- clients aren't required to implement # that tarball aggregation strategy. And it is really only # useful for filesystems with inefficient small file # storage. loggerstor.debug("moving small file '%s' into tarball" % fname) if not os.path.exists(tarname): tarball = tarfile.open(tarname, 'w') else: tarball = tarfile.open(tarname, 'a') # XXX: more bad blocking stuff tarball.add(tmpfile, os.path.basename(fname)) if meta: metafilename = "%s.%s.meta" % (os.path.basename(fname), metakey) loggerstor.debug("adding metadata file to tarball %s" % metafilename) metaio = StringIO(meta) tinfo = tarfile.TarInfo(metafilename) tinfo.size = len(meta) tarball.addfile(tinfo, metaio) tarball.close() os.remove(tmpfile) else: # store the file loggerstor.debug("storing %s" % fname) os.rename(tmpfile, fname) BlockFile.convert(fname, (int(nodeID,16), {metakey: meta})) loggerstor.debug("successful STORE for %s" % filekey) return "Successful STORE"
def main(): # test plain TarfileUtils.delete() (tballname, contents) = maketarball(5, 4096) TarfileUtils.delete(tballname, contents[2:4]) tarball = tarfile.open(tballname, 'r') os.remove(tballname) assert(tarball.getnames() == contents[:2]+contents[4:]) tarball.close() # test gzip TarfileUtils.delete() (tballname, contents) = maketarball(5, 4096) tballname = gzipTarball(tballname) TarfileUtils.delete(tballname, contents[2:4]) tarball = tarfile.open(tballname, 'r') os.remove(tballname) assert(tarball.getnames() == contents[:2]+contents[4:]) tarball.close() # test plain TarfileUtils.concatenate() (tballname1, contents1) = maketarball(5, 4096) (tballname2, contents2) = maketarball(5, 4096) TarfileUtils.concatenate(tballname1, tballname2) assert(not os.path.exists(tballname2)) tarball = tarfile.open(tballname1, 'r') os.remove(tballname1) assert(tarball.getnames() == contents1+contents2) # test TarfileUtils.concatenate(gz, plain) (tballname1, contents1) = maketarball(5, 4096) (tballname2, contents2) = maketarball(5, 4096) tballname1 = gzipTarball(tballname1) TarfileUtils.concatenate(tballname1, tballname2) assert(not os.path.exists(tballname2)) tarball = tarfile.open(tballname1, 'r') os.remove(tballname1) assert(tarball.getnames() == contents1+contents2) # test TarfileUtils.concatenate(plain, gz) (tballname1, contents1) = maketarball(5, 4096) (tballname2, contents2) = maketarball(5, 4096) tballname2 = gzipTarball(tballname2) TarfileUtils.concatenate(tballname1, tballname2) assert(not os.path.exists(tballname2)) tarball = tarfile.open(tballname1, 'r') os.remove(tballname1) assert(tarball.getnames() == contents1+contents2) # test TarfileUtils.concatenate(gz, gz) (tballname1, contents1) = maketarball(5, 4096) (tballname2, contents2) = maketarball(5, 4096) tballname1 = gzipTarball(tballname1) tballname2 = gzipTarball(tballname2) TarfileUtils.concatenate(tballname1, tballname2) assert(not os.path.exists(tballname2)) tarball = tarfile.open(tballname1, 'r') os.remove(tballname1) assert(tarball.getnames() == contents1+contents2) # test TarfileUtils.verifyHashes(plain no meta) (tballname, contents) = maketarball(5, 4096, True) assert(TarfileUtils.verifyHashes(tballname, contents[2:4])) os.remove(tballname) # test TarfileUtils.verifyHashes(plain with meta) (tballname, contents) = maketarball(5, 4096, True, True) assert(TarfileUtils.verifyHashes(tballname, contents[2:4]), ".meta") os.remove(tballname) # test TarfileUtils.verifyHashes(gzipped no meta) (tballname, contents) = maketarball(5, 4096, True) tballname = gzipTarball(tballname) assert(TarfileUtils.verifyHashes(tballname, contents[2:4])) os.remove(tballname) # test TarfileUtils.verifyHashes(gzipped with meta) (tballname, contents) = maketarball(5, 4096, True, True) tballname = gzipTarball(tballname) assert(TarfileUtils.verifyHashes(tballname, contents[2:4]), ".meta") os.remove(tballname) print "all tests passed"