def _deleteFile(self, request, filekey, metakey, reqKu, reqID): fname = os.path.join(self.config.storedir, filekey) loggerdele.debug("reading file data from %s" % fname) if not os.path.exists(fname): # check for tarball for originator tarballs = [] tarballbase = os.path.join(self.config.storedir, reqKu.id())+".tar" if os.path.exists(tarballbase+".gz"): tarballs.append((tarballbase+".gz", 'r:gz')) if os.path.exists(tarballbase): tarballs.append((tarballbase, 'r')) for tarball, openmode in tarballs: mfilekey = "%s.%s.meta" % (filekey, metakey) loggerdele.debug("opening %s, %s for delete..." % (tarball, openmode)) ftype = os.popen('file %s' % tarball) loggerdele.debug("ftype of %s is %s" % (tarball, ftype.read())) ftype.close() tar = tarfile.open(tarball, openmode) mnames = [n for n in tar.getnames() if n[:len(filekey)] == filekey] tar.close() if len(mnames) > 2: deleted = TarfileUtils.delete(tarball, mfilekey) else: deleted = TarfileUtils.delete(tarball, [filekey, mfilekey]) if deleted: loggerdele.info("DELETED %s (from %s)" % (deleted, tarball)) return "" request.setResponseCode(http.NOT_FOUND, "Not found: %s" % filekey) request.write("Not found: %s" % filekey) else: f = BlockFile.open(fname,"rb+") nID = int(reqID, 16) if f.hasNode(nID): # remove this node/metakey from owning this file block f.delNode(nID, metakey) if f.emptyNodes(): # if this was the only owning node, delete it f.close() os.remove(fname) f.close() loggerdele.debug("returning DELETE response") return ""
def _sendFile(self, request, filekey, reqKu, returnMeta): fname = os.path.join(self.config.storedir,filekey) loggerretr.debug("reading file data from %s" % fname) # XXX: make sure requestor owns the file? if returnMeta: loggerretr.debug("returnMeta = %s" % returnMeta) request.setHeader('Content-type', 'Multipart/Related') rand_bound = binascii.hexlify(generateRandom(13)) request.setHeader('boundary', rand_bound) if not os.path.exists(fname): # check for tarball for originator tarball = os.path.join(self.config.storedir,reqKu.id()+".tar") tarballs = [] if os.path.exists(tarball+'.gz'): tarballs.append((tarball+'.gz', 'r:gz')) if os.path.exists(tarball): tarballs.append((tarball, 'r')) loggerretr.debug("tarballs = %s" % tarballs) # XXX: does this work? does it close both tarballs if both got # opened? for tarball, openmode in tarballs: tar = tarfile.open(tarball, openmode) try: tinfo = tar.getmember(filekey) returnedMeta = False if returnMeta: loggerretr.debug("tar returnMeta %s" % filekey) try: metas = [f for f in tar.getnames() if f[:len(filekey)] == filekey and f[-4:] == 'meta'] loggerretr.debug("tar returnMetas=%s" % metas) for m in metas: minfo = tar.getmember(m) H = [] H.append("--%s" % rand_bound) H.append("Content-Type: " "Application/octet-stream") H.append("Content-ID: %s" % m) H.append("Content-Length: %d" % minfo.size) H.append("") H = '\r\n'.join(H) request.write(H) request.write('\r\n') tarm = tar.extractfile(minfo) loggerretr.debug("successful metadata" " RETRIEVE (from %s)" % tarball) # XXX: bad blocking stuff while 1: buf = tarm.read() if buf == "": break request.write(buf) request.write('\r\n') tarm.close() H = [] H.append("--%s" % rand_bound) H.append("Content-Type: Application/octet-stream") H.append("Content-ID: %s" % filekey) H.append("Content-Length: %d" % tinfo.size) H.append("") H = '\r\n'.join(H) request.write(H) request.write('\r\n') returnedMeta = True except: # couldn't find any metadata, just return normal # file loggerretr.debug("no metadata found") pass # XXX: bad blocking stuff tarf = tar.extractfile(tinfo) # XXX: update timestamp on tarf in tarball loggerretr.debug("successful RETRIEVE (from %s)" % tarball) # XXX: bad blocking stuff while 1: buf = tarf.read() if buf == "": break request.write(buf) tarf.close() tar.close() if returnedMeta: T = [] T.append("") T.append("--%s--" % rand_bound) T.append("") T = '\r\n'.join(T) request.write(T) return "" except: tar.close() request.setResponseCode(http.NOT_FOUND, "Not found: %s" % filekey) request.write("Not found: %s" % filekey) else: f = BlockFile.open(fname,"rb") loggerretr.log(logging.INFO, "successful RETRIEVE for %s" % filekey) meta = f.meta(int(reqKu.id(),16)) if returnMeta and meta: loggerretr.debug("returnMeta %s" % filekey) loggerretr.debug("returnMetas=%s" % meta) for m in meta: H = [] H.append("--%s" % rand_bound) H.append("Content-Type: Application/octet-stream") H.append("Content-ID: %s.%s.meta" % (filekey, m)) H.append("Content-Length: %d" % len(meta[m])) H.append("") H.append(meta[m]) H = '\r\n'.join(H) request.write(H) request.write('\r\n') H = [] H.append("--%s" % rand_bound) H.append("Content-Type: Application/octet-stream") H.append("Content-ID: %s" % filekey) H.append("Content-Length: %d" % f.size()) H.append("") H = '\r\n'.join(H) request.write(H) request.write('\r\n') # XXX: bad blocking stuff while 1: buf = f.read() if buf == "": break request.write(buf) f.close() if returnMeta and meta: T = [] T.append("") T.append("--%s--" % rand_bound) T.append("") request.write('\r\n'.join(T)) return ""
def _sendVerify(self, request, filekey, offset, length, reqKu, nodeID, meta): fname = os.path.join(self.config.storedir,filekey) loggervrfy.debug("request for %s" % fname) if os.path.exists(fname): loggervrfy.debug("looking in regular blockfile for %s" % fname) if meta: f = BlockFile.open(fname, 'rb+') else: f = BlockFile.open(fname, 'rb') else: # check for tarball for originator loggervrfy.debug("checking tarball for %s" % fname) tarballs = [] tarballbase = os.path.join(self.config.storedir, reqKu.id())+".tar" if os.path.exists(tarballbase+".gz"): tarballs.append((tarballbase+".gz", 'r:gz')) if os.path.exists(tarballbase): tarballs.append((tarballbase, 'r')) loggervrfy.debug("tarballs is %s" % tarballs) for tarball, openmode in tarballs: loggervrfy.debug("looking in tarball %s..." % tarball) tar = tarfile.open(tarball, openmode) try: tarf = tar.extractfile(filekey) tari = tar.getmember(filekey) # XXX: update timestamp on tarf in tarball fsize = tari.size if offset > fsize or (offset+length) > fsize: # XXX: should limit length loggervrfy.debug("VERIFY response failed (from %s):" " bad offset/length" % tarball) msg = "Bad request: bad offset/length in VERIFY" request.setResponseCode(http.BAD_REQUEST, msg) return msg # XXX: could avoid seek/read if length == 0 tarf.seek(offset) # XXX: bad blocking read data = tarf.read(length) tarf.close() if meta: mfname = "%s.%s.meta" % (filekey, meta[0]) loggervrfy.debug("looking for %s" % mfname) if mfname in tar.getnames(): # make sure that the data is the same, if not, # remove it and re-add it tarmf = tar.extractfile(mfname) # XXX: bad blocking read stored_meta = tarmf.read() tarmf.close() if meta[1] != stored_meta: loggervrfy.debug("updating tarball" " metadata for %s.%s" % (filekey, meta[0])) tar.close() TarfileUtils.delete(tarball, mfname) if openmode == 'r:gz': tarball = TarfileUtils.gunzipTarball( tarball) tar = tarfile.open(tarball, 'a') metaio = StringIO(meta[1]) tinfo = tarfile.TarInfo(mfname) tinfo.size = len(meta[1]) tar.addfile(tinfo, metaio) tar.close() if openmode == 'r:gz': tarball = TarfileUtils.gzipTarball( tarball) else: loggervrfy.debug("no need to update tarball" " metadata for %s.%s" % (filekey, meta[0])) else: # add it loggervrfy.debug("adding tarball metadata" " for %s.%s" % (filekey, meta[0])) tar.close() if openmode == 'r:gz': tarball = TarfileUtils.gunzipTarball(tarball) tar = tarfile.open(tarball, 'a') metaio = StringIO(meta[1]) tinfo = tarfile.TarInfo(mfname) tinfo.size = len(meta[1]) tar.addfile(tinfo, metaio) tar.close() if openmode == 'r:gz': tarball = TarfileUtils.gzipTarball( tarball) tar.close() hash = hashstring(data) loggervrfy.info("successful VERIFY (from %s)" % tarball) return hash except: tar.close() loggervrfy.debug("requested file %s doesn't exist" % fname) msg = "Not found: not storing %s" % filekey request.setResponseCode(http.NOT_FOUND, msg) return msg # make sure request is reasonable fsize = os.stat(fname)[stat.ST_SIZE] if offset > fsize or (offset+length) > fsize: # XXX: should limit length loggervrfy.debug("VERIFY response failed (bad offset/length)") msg = "Bad request: bad offset/length in VERIFY" request.setResponseCode(http.BAD_REQUEST, msg) return msg else: # XXX: blocking # XXX: could avoid seek/read if length == 0 (noop for meta update) f.seek(offset) data = f.read(length) if meta: loggervrfy.debug("adding metadata for %s.%s" % (fname, meta[0])) f.addNode(int(nodeID, 16) , {meta[0]: meta[1]}) # XXX: blocking f.close() hash = hashstring(data) loggervrfy.debug("returning VERIFY") return hash
def _storeFile(self, request, filekey, reqKu, nodeID): # [XXX: memory management is not happy here. might want to look at # request.registerProducer(). Otherwise, might have to scrap # using the STORE(ROOT(RESOURCE)) deal in favor of # producer/consumer model for STORE ops # (http://itamarst.org/writings/OSCON03/twisted_internet-108.html). # Another option might include subclassing web.resource.Resource # and making this derive from that... Or might be web.Site that # needs to be subclassed... Or maybe web.site.Request - # web.site.Request.process()? Request seems doubly-bad: perhaps a # copy is made somewhere, because memory mushrooms to 2x big # upload, then goes back down to around 1x. # [update: This should be fixable in twisted.web2, but I am informed # that in the current version, there is no workaround] # get the data to a tmp file loggerstor.debug("writing store data to tmpfile") tmpfile = tempfile.mktemp(dir=self.config.storedir) tarball = os.path.join(self.config.storedir,reqKu.id()+".tar") # rename and/or prepend the data appropriately tmpTarMode = None if filekey[-4:] == ".tar": tmpfile = tmpfile+".tar" tmpTarMode = 'r' targetTar = tarball elif filekey[-7:] == ".tar.gz": tmpfile = tmpfile+".tar.gz" tmpTarMode = 'r:gz' targetTar = tarball+".gz" loggerstor.debug("tmpfile is %s" % tmpfile) # XXX: if the server supports both .tar and tar.gz, this is wrong; we'd # need to check *both* for already existing dudes instead of just # choosing one if os.path.exists(tarball+'.gz'): tarball = (tarball+'.gz', 'r:gz') elif os.path.exists(tarball): tarball = (tarball, 'r') else: tarball = None loggerstor.debug("tarball is %s" % str(tarball)) data = request.args.get('filename')[0] # XXX: file in mem! need web2. # XXX: bad blocking stuff here f = open(tmpfile, 'wb') f.write(data) f.close() ftype = os.popen('file %s' % tmpfile) loggerstor.debug("ftype of %s is %s" % (tmpfile, ftype.read())) ftype.close() if tmpTarMode: # client sent a tarball loggerstor.debug("about to chksum %s" % tmpfile) digests = TarfileUtils.verifyHashes(tmpfile, '.meta') loggerstor.debug("chksum returned %s" % digests) ftype = os.popen('file %s' % tmpfile) loggerstor.debug("ftype of %s is %s" % (tmpfile, ftype.read())) ftype.close() if not digests: msg = "Attempted to use non-CAS storage key(s) for" \ " STORE tarball" loggerstor.debug(msg) os.remove(tmpfile) request.setResponseCode(http.CONFLICT, msg) return msg # XXX: add digests to a db of already stored files (for quick # lookup) if tarball: tarname, tarnameMode = tarball loggerstor.debug("concatenating tarfiles %s and %s" % (tarname, tmpfile)) f1 = tarfile.open(tarname, tarnameMode) f2 = tarfile.open(tmpfile, tmpTarMode) f1names = f1.getnames() f2names = f2.getnames() f1.close() f2.close() dupes = [f for f in f1names if f in f2names] TarfileUtils.delete(tmpfile, dupes) ftype = os.popen('file %s' % tarname) loggerstor.debug("ftype of %s is %s" % (tarname, ftype.read())) ftype.close() TarfileUtils.concatenate(tarname, tmpfile) ftype = os.popen('file %s' % tarname) loggerstor.debug("ftype of %s is %s" % (tarname, ftype.read())) ftype.close() else: loggerstor.debug("saving %s as tarfile %s" % (tmpfile, targetTar)) os.rename(tmpfile, targetTar) else: # client sent regular file h = hashfile(tmpfile) if request.args.has_key('meta') and request.args.has_key('metakey'): metakey = request.args.get('metakey')[0] meta = request.args.get('meta')[0] # XXX: file in mem! else: metakey = None meta = None if fencode(long(h, 16)) != filekey: msg = "Attempted to use non-CAS storage key for STORE data " msg += "(%s != %s)" % (filekey, fencode(long(h, 16))) os.remove(tmpfile) request.setResponseCode(http.CONFLICT, msg) return msg fname = os.path.join(self.config.storedir, filekey) if os.path.exists(fname): loggerstor.debug("adding metadata to %s" % fname) f = BlockFile.open(fname,'rb+') if not f.hasNode(nodeID): f.addNode(int(nodeID,16), {metakey: meta}) f.close() os.remove(tmpfile) else: if os.path.exists(nodeID+".tar"): # XXX: need to do something with metadata! print "XXX: need to do something with metadata for tar!" tarball = tarfile.open(tarname, 'r') if fname in tarball.getnames(): loggerstor.debug("%s already stored in tarball" % fname) # if the file is already in the corresponding tarball, # update its timestamp and return success. loggerstor.debug("%s already stored" % filekey) # XXX: update timestamp for filekey in tarball return "Successful STORE" else: loggerstor.debug("tarball for %s, but %s not in tarball" % (nodeID,fname)) if len(data) < 8192 and fname != tarname: #XXX: magic # (blk sz) # If the file is small, move it into the appropriate # tarball. Note that this code is unlikely to ever be # executed if the client is an official flud client, as # they do the tarball aggregation thing already, and all # tarballs will be > 8192. This is, then, really just # defensive coding -- clients aren't required to implement # that tarball aggregation strategy. And it is really only # useful for filesystems with inefficient small file # storage. loggerstor.debug("moving small file '%s' into tarball" % fname) if not os.path.exists(tarname): tarball = tarfile.open(tarname, 'w') else: tarball = tarfile.open(tarname, 'a') # XXX: more bad blocking stuff tarball.add(tmpfile, os.path.basename(fname)) if meta: metafilename = "%s.%s.meta" % (os.path.basename(fname), metakey) loggerstor.debug("adding metadata file to tarball %s" % metafilename) metaio = StringIO(meta) tinfo = tarfile.TarInfo(metafilename) tinfo.size = len(meta) tarball.addfile(tinfo, metaio) tarball.close() os.remove(tmpfile) else: # store the file loggerstor.debug("storing %s" % fname) os.rename(tmpfile, fname) BlockFile.convert(fname, (int(nodeID,16), {metakey: meta})) loggerstor.debug("successful STORE for %s" % filekey) return "Successful STORE"