def _deleteFile(self, request, filekey, metakey, reqKu, reqID): fname = os.path.join(self.config.storedir, filekey) loggerdele.debug("reading file data from %s" % fname) if not os.path.exists(fname): # check for tarball for originator tarballs = [] tarballbase = os.path.join(self.config.storedir, reqKu.id())+".tar" if os.path.exists(tarballbase+".gz"): tarballs.append((tarballbase+".gz", 'r:gz')) if os.path.exists(tarballbase): tarballs.append((tarballbase, 'r')) for tarball, openmode in tarballs: mfilekey = "%s.%s.meta" % (filekey, metakey) loggerdele.debug("opening %s, %s for delete..." % (tarball, openmode)) ftype = os.popen('file %s' % tarball) loggerdele.debug("ftype of %s is %s" % (tarball, ftype.read())) ftype.close() tar = tarfile.open(tarball, openmode) mnames = [n for n in tar.getnames() if n[:len(filekey)] == filekey] tar.close() if len(mnames) > 2: deleted = TarfileUtils.delete(tarball, mfilekey) else: deleted = TarfileUtils.delete(tarball, [filekey, mfilekey]) if deleted: loggerdele.info("DELETED %s (from %s)" % (deleted, tarball)) return "" request.setResponseCode(http.NOT_FOUND, "Not found: %s" % filekey) request.write("Not found: %s" % filekey) else: f = BlockFile.open(fname,"rb+") nID = int(reqID, 16) if f.hasNode(nID): # remove this node/metakey from owning this file block f.delNode(nID, metakey) if f.emptyNodes(): # if this was the only owning node, delete it f.close() os.remove(fname) f.close() loggerdele.debug("returning DELETE response") return ""
def _storeFile(self, request, filekey, reqKu, nodeID): # [XXX: memory management is not happy here. might want to look at # request.registerProducer(). Otherwise, might have to scrap # using the STORE(ROOT(RESOURCE)) deal in favor of # producer/consumer model for STORE ops # (http://itamarst.org/writings/OSCON03/twisted_internet-108.html). # Another option might include subclassing web.resource.Resource # and making this derive from that... Or might be web.Site that # needs to be subclassed... Or maybe web.site.Request - # web.site.Request.process()? Request seems doubly-bad: perhaps a # copy is made somewhere, because memory mushrooms to 2x big # upload, then goes back down to around 1x. # [update: This should be fixable in twisted.web2, but I am informed # that in the current version, there is no workaround] # get the data to a tmp file loggerstor.debug("writing store data to tmpfile") tmpfile = tempfile.mktemp(dir=self.config.storedir) tarball = os.path.join(self.config.storedir,reqKu.id()+".tar") # rename and/or prepend the data appropriately tmpTarMode = None if filekey[-4:] == ".tar": tmpfile = tmpfile+".tar" tmpTarMode = 'r' targetTar = tarball elif filekey[-7:] == ".tar.gz": tmpfile = tmpfile+".tar.gz" tmpTarMode = 'r:gz' targetTar = tarball+".gz" loggerstor.debug("tmpfile is %s" % tmpfile) # XXX: if the server supports both .tar and tar.gz, this is wrong; we'd # need to check *both* for already existing dudes instead of just # choosing one if os.path.exists(tarball+'.gz'): tarball = (tarball+'.gz', 'r:gz') elif os.path.exists(tarball): tarball = (tarball, 'r') else: tarball = None loggerstor.debug("tarball is %s" % str(tarball)) data = request.args.get('filename')[0] # XXX: file in mem! need web2. # XXX: bad blocking stuff here f = open(tmpfile, 'wb') f.write(data) f.close() ftype = os.popen('file %s' % tmpfile) loggerstor.debug("ftype of %s is %s" % (tmpfile, ftype.read())) ftype.close() if tmpTarMode: # client sent a tarball loggerstor.debug("about to chksum %s" % tmpfile) digests = TarfileUtils.verifyHashes(tmpfile, '.meta') loggerstor.debug("chksum returned %s" % digests) ftype = os.popen('file %s' % tmpfile) loggerstor.debug("ftype of %s is %s" % (tmpfile, ftype.read())) ftype.close() if not digests: msg = "Attempted to use non-CAS storage key(s) for" \ " STORE tarball" loggerstor.debug(msg) os.remove(tmpfile) request.setResponseCode(http.CONFLICT, msg) return msg # XXX: add digests to a db of already stored files (for quick # lookup) if tarball: tarname, tarnameMode = tarball loggerstor.debug("concatenating tarfiles %s and %s" % (tarname, tmpfile)) f1 = tarfile.open(tarname, tarnameMode) f2 = tarfile.open(tmpfile, tmpTarMode) f1names = f1.getnames() f2names = f2.getnames() f1.close() f2.close() dupes = [f for f in f1names if f in f2names] TarfileUtils.delete(tmpfile, dupes) ftype = os.popen('file %s' % tarname) loggerstor.debug("ftype of %s is %s" % (tarname, ftype.read())) ftype.close() TarfileUtils.concatenate(tarname, tmpfile) ftype = os.popen('file %s' % tarname) loggerstor.debug("ftype of %s is %s" % (tarname, ftype.read())) ftype.close() else: loggerstor.debug("saving %s as tarfile %s" % (tmpfile, targetTar)) os.rename(tmpfile, targetTar) else: # client sent regular file h = hashfile(tmpfile) if request.args.has_key('meta') and request.args.has_key('metakey'): metakey = request.args.get('metakey')[0] meta = request.args.get('meta')[0] # XXX: file in mem! else: metakey = None meta = None if fencode(long(h, 16)) != filekey: msg = "Attempted to use non-CAS storage key for STORE data " msg += "(%s != %s)" % (filekey, fencode(long(h, 16))) os.remove(tmpfile) request.setResponseCode(http.CONFLICT, msg) return msg fname = os.path.join(self.config.storedir, filekey) if os.path.exists(fname): loggerstor.debug("adding metadata to %s" % fname) f = BlockFile.open(fname,'rb+') if not f.hasNode(nodeID): f.addNode(int(nodeID,16), {metakey: meta}) f.close() os.remove(tmpfile) else: if os.path.exists(nodeID+".tar"): # XXX: need to do something with metadata! print "XXX: need to do something with metadata for tar!" tarball = tarfile.open(tarname, 'r') if fname in tarball.getnames(): loggerstor.debug("%s already stored in tarball" % fname) # if the file is already in the corresponding tarball, # update its timestamp and return success. loggerstor.debug("%s already stored" % filekey) # XXX: update timestamp for filekey in tarball return "Successful STORE" else: loggerstor.debug("tarball for %s, but %s not in tarball" % (nodeID,fname)) if len(data) < 8192 and fname != tarname: #XXX: magic # (blk sz) # If the file is small, move it into the appropriate # tarball. Note that this code is unlikely to ever be # executed if the client is an official flud client, as # they do the tarball aggregation thing already, and all # tarballs will be > 8192. This is, then, really just # defensive coding -- clients aren't required to implement # that tarball aggregation strategy. And it is really only # useful for filesystems with inefficient small file # storage. loggerstor.debug("moving small file '%s' into tarball" % fname) if not os.path.exists(tarname): tarball = tarfile.open(tarname, 'w') else: tarball = tarfile.open(tarname, 'a') # XXX: more bad blocking stuff tarball.add(tmpfile, os.path.basename(fname)) if meta: metafilename = "%s.%s.meta" % (os.path.basename(fname), metakey) loggerstor.debug("adding metadata file to tarball %s" % metafilename) metaio = StringIO(meta) tinfo = tarfile.TarInfo(metafilename) tinfo.size = len(meta) tarball.addfile(tinfo, metaio) tarball.close() os.remove(tmpfile) else: # store the file loggerstor.debug("storing %s" % fname) os.rename(tmpfile, fname) BlockFile.convert(fname, (int(nodeID,16), {metakey: meta})) loggerstor.debug("successful STORE for %s" % filekey) return "Successful STORE"
def _sendVerify(self, request, filekey, offset, length, reqKu, nodeID, meta): fname = os.path.join(self.config.storedir,filekey) loggervrfy.debug("request for %s" % fname) if os.path.exists(fname): loggervrfy.debug("looking in regular blockfile for %s" % fname) if meta: f = BlockFile.open(fname, 'rb+') else: f = BlockFile.open(fname, 'rb') else: # check for tarball for originator loggervrfy.debug("checking tarball for %s" % fname) tarballs = [] tarballbase = os.path.join(self.config.storedir, reqKu.id())+".tar" if os.path.exists(tarballbase+".gz"): tarballs.append((tarballbase+".gz", 'r:gz')) if os.path.exists(tarballbase): tarballs.append((tarballbase, 'r')) loggervrfy.debug("tarballs is %s" % tarballs) for tarball, openmode in tarballs: loggervrfy.debug("looking in tarball %s..." % tarball) tar = tarfile.open(tarball, openmode) try: tarf = tar.extractfile(filekey) tari = tar.getmember(filekey) # XXX: update timestamp on tarf in tarball fsize = tari.size if offset > fsize or (offset+length) > fsize: # XXX: should limit length loggervrfy.debug("VERIFY response failed (from %s):" " bad offset/length" % tarball) msg = "Bad request: bad offset/length in VERIFY" request.setResponseCode(http.BAD_REQUEST, msg) return msg # XXX: could avoid seek/read if length == 0 tarf.seek(offset) # XXX: bad blocking read data = tarf.read(length) tarf.close() if meta: mfname = "%s.%s.meta" % (filekey, meta[0]) loggervrfy.debug("looking for %s" % mfname) if mfname in tar.getnames(): # make sure that the data is the same, if not, # remove it and re-add it tarmf = tar.extractfile(mfname) # XXX: bad blocking read stored_meta = tarmf.read() tarmf.close() if meta[1] != stored_meta: loggervrfy.debug("updating tarball" " metadata for %s.%s" % (filekey, meta[0])) tar.close() TarfileUtils.delete(tarball, mfname) if openmode == 'r:gz': tarball = TarfileUtils.gunzipTarball( tarball) tar = tarfile.open(tarball, 'a') metaio = StringIO(meta[1]) tinfo = tarfile.TarInfo(mfname) tinfo.size = len(meta[1]) tar.addfile(tinfo, metaio) tar.close() if openmode == 'r:gz': tarball = TarfileUtils.gzipTarball( tarball) else: loggervrfy.debug("no need to update tarball" " metadata for %s.%s" % (filekey, meta[0])) else: # add it loggervrfy.debug("adding tarball metadata" " for %s.%s" % (filekey, meta[0])) tar.close() if openmode == 'r:gz': tarball = TarfileUtils.gunzipTarball(tarball) tar = tarfile.open(tarball, 'a') metaio = StringIO(meta[1]) tinfo = tarfile.TarInfo(mfname) tinfo.size = len(meta[1]) tar.addfile(tinfo, metaio) tar.close() if openmode == 'r:gz': tarball = TarfileUtils.gzipTarball( tarball) tar.close() hash = hashstring(data) loggervrfy.info("successful VERIFY (from %s)" % tarball) return hash except: tar.close() loggervrfy.debug("requested file %s doesn't exist" % fname) msg = "Not found: not storing %s" % filekey request.setResponseCode(http.NOT_FOUND, msg) return msg # make sure request is reasonable fsize = os.stat(fname)[stat.ST_SIZE] if offset > fsize or (offset+length) > fsize: # XXX: should limit length loggervrfy.debug("VERIFY response failed (bad offset/length)") msg = "Bad request: bad offset/length in VERIFY" request.setResponseCode(http.BAD_REQUEST, msg) return msg else: # XXX: blocking # XXX: could avoid seek/read if length == 0 (noop for meta update) f.seek(offset) data = f.read(length) if meta: loggervrfy.debug("adding metadata for %s.%s" % (fname, meta[0])) f.addNode(int(nodeID, 16) , {meta[0]: meta[1]}) # XXX: blocking f.close() hash = hashstring(data) loggervrfy.debug("returning VERIFY") return hash
def main(): # test plain TarfileUtils.delete() (tballname, contents) = maketarball(5, 4096) TarfileUtils.delete(tballname, contents[2:4]) tarball = tarfile.open(tballname, 'r') os.remove(tballname) assert(tarball.getnames() == contents[:2]+contents[4:]) tarball.close() # test gzip TarfileUtils.delete() (tballname, contents) = maketarball(5, 4096) tballname = gzipTarball(tballname) TarfileUtils.delete(tballname, contents[2:4]) tarball = tarfile.open(tballname, 'r') os.remove(tballname) assert(tarball.getnames() == contents[:2]+contents[4:]) tarball.close() # test plain TarfileUtils.concatenate() (tballname1, contents1) = maketarball(5, 4096) (tballname2, contents2) = maketarball(5, 4096) TarfileUtils.concatenate(tballname1, tballname2) assert(not os.path.exists(tballname2)) tarball = tarfile.open(tballname1, 'r') os.remove(tballname1) assert(tarball.getnames() == contents1+contents2) # test TarfileUtils.concatenate(gz, plain) (tballname1, contents1) = maketarball(5, 4096) (tballname2, contents2) = maketarball(5, 4096) tballname1 = gzipTarball(tballname1) TarfileUtils.concatenate(tballname1, tballname2) assert(not os.path.exists(tballname2)) tarball = tarfile.open(tballname1, 'r') os.remove(tballname1) assert(tarball.getnames() == contents1+contents2) # test TarfileUtils.concatenate(plain, gz) (tballname1, contents1) = maketarball(5, 4096) (tballname2, contents2) = maketarball(5, 4096) tballname2 = gzipTarball(tballname2) TarfileUtils.concatenate(tballname1, tballname2) assert(not os.path.exists(tballname2)) tarball = tarfile.open(tballname1, 'r') os.remove(tballname1) assert(tarball.getnames() == contents1+contents2) # test TarfileUtils.concatenate(gz, gz) (tballname1, contents1) = maketarball(5, 4096) (tballname2, contents2) = maketarball(5, 4096) tballname1 = gzipTarball(tballname1) tballname2 = gzipTarball(tballname2) TarfileUtils.concatenate(tballname1, tballname2) assert(not os.path.exists(tballname2)) tarball = tarfile.open(tballname1, 'r') os.remove(tballname1) assert(tarball.getnames() == contents1+contents2) # test TarfileUtils.verifyHashes(plain no meta) (tballname, contents) = maketarball(5, 4096, True) assert(TarfileUtils.verifyHashes(tballname, contents[2:4])) os.remove(tballname) # test TarfileUtils.verifyHashes(plain with meta) (tballname, contents) = maketarball(5, 4096, True, True) assert(TarfileUtils.verifyHashes(tballname, contents[2:4]), ".meta") os.remove(tballname) # test TarfileUtils.verifyHashes(gzipped no meta) (tballname, contents) = maketarball(5, 4096, True) tballname = gzipTarball(tballname) assert(TarfileUtils.verifyHashes(tballname, contents[2:4])) os.remove(tballname) # test TarfileUtils.verifyHashes(gzipped with meta) (tballname, contents) = maketarball(5, 4096, True, True) tballname = gzipTarball(tballname) assert(TarfileUtils.verifyHashes(tballname, contents[2:4]), ".meta") os.remove(tballname) print "all tests passed"