def upload(self, backend, direct=False): """ @param client: backend client. Can be a redis client or ardb client - example: j.clients.redis.get(ipaddr=<ipaddr>, port=<port>, ardb_patch=True)) @param direct: bool, if True, will use the directhub client to upload the data directhub client allow to test existence and upload per batch key and data If false, simple use exist/set from the backend client """ self._kvs.close() for path in self.root._flist._added_files: if not os.path.exists(path): raise RuntimeError('file not found %s' % path) logger.debug("hash %s", path) hashs = g8storclient.encrypt(path) if hashs is None: return for hash in hashs: if not backend.exists(hash['hash']): logger.debug("upload %s", path) backend.set(hash['hash'], hash['data']) return self.export()
def upload_diff(self, backend_instance): """ @param backend_instance: instance name of the hubdirect client to use """ directclient = j.clients.hubdirect.get(backend_instance, create=False, interactive=False) hash_data = {} to_upload = [] for path in self.root._flist._added_files: data = g8storclient.encrypt(path) or [] for item in data: hash_data[item['hash']] = item['data'] # keys.extend([x['hash'] for x in data]) res = directclient.exists(list(hash_data.keys())) missing_keys = res.json() missing_keys = list(map(base64.b64decode, missing_keys)) missing_keys = list(map(bytes.decode, missing_keys)) # let's adding all missing keys to_upload.extend(missing_keys) logger.info("[+] %d chunks to upload" % len(to_upload)) if len(to_upload) == 0: return # filter the has_data dict to only keep what needs to be uploaded upload = {k: v for k, v in hash_data.items() if k in to_upload} logger.info("[+] uploading last data...") directclient.insert(upload) return self.export()
def procFile(dirobj, type, name, subobj, args): fullpath = "%s/%s/%s" % (self.rootpath, dirobj.dbobj.location, name) self.logger.info("[+] uploading: %s" % fullpath) hashs = g8storclient.encrypt(fullpath) if hashs is None: return for hash in hashs: if not backend.exists(hash['hash']): backend.set(hash['hash'], hash['data'])
def procFile(dirobj, type, name, subobj, args): fullpath = "%s/%s/%s" % (self.rootpath, dirobj.dbobj.location, name) self.logger.debug("[+] populating: %s" % fullpath) hashs = g8storclient.encrypt(fullpath) if hashs is None: return for index, value in enumerate(hashs): hashs[index].pop('data', None) subobj.attributes.file.blocks = hashs dirobj.save()
def populate_missing_chunks(self, hubdirect_instance='main'): import g8storclient directclient = j.clients.hubdirect.get(hubdirect_instance) all_files = {} bykeys = {} to_upload = [] for source, _, files in os.walk(self.rootpath): for f in files: path = os.path.join(source, f) data = g8storclient.encrypt(path) or [] all_files[f] = data # keeping a way to find the chunk back from it's hash for id, chunk in enumerate(data): bykeys[chunk['hash']] = {'file': f, 'index': id} # exists_post now wants binary keys # we know we are dealing with strings hash, let's simply encode them before for file in all_files: for id, chunk in enumerate(all_files[file]): all_files[file][id]['bhash'] = chunk['hash'].encode('utf-8') for path, chunks in all_files.items(): res = directclient.api.exists.exists_post( set([chunk['bhash'] for chunk in chunks])) keys = res.json() # let's adding all missing keys # to_upload += [base64.b64decode(key).decode('utf-8') for key in keys] to_upload += keys self.logger.info("[+] %d chunks to upload" % len(to_upload)) if len(to_upload) == 0: return upload = () currentsize = 0 for bhash in to_upload: # we will upload all theses chunks, we decode them because we know # theses are string hashs hash = base64.b64decode(bhash).decode('utf-8') if not bykeys.get(hash): raise RuntimeError("Key not indexed, this should not happend") filename = bykeys[hash]['file'] chunkindex = bykeys[hash]['index'] chunk = all_files[filename][chunkindex] payload = base64.b64encode(chunk['data']) upload += (('files[]', (bhash, payload)), ) currentsize += len(payload) # if this pack is more than 20MB, uploading it if currentsize > 20 * 1024 * 1024: self.logger.info("[+] uploading part of the data...") try: directclient.insert.insert_put(upload) currentsize = 0 except Exception as e: # weird error. could be an existing chunk undetected by previous check self.logger.error(e) self.logger.info("[+] uploading last data...") directclient.api.insert.insert_put(upload)
def _add_file(self, src): if os.path.isdir(src): raise ValueError("src must be a file (%s)" % src) _, self_key = self._flist.path2key(self.abspath) src_stat = os.stat(src, follow_symlinks=False) # add new inode into the contents of the current directory new_inode = self._new_inode() new_inode.name = os.path.basename(src) new_inode.size = src_stat.st_size new_inode.modificationTime = int(src_stat.st_mtime) new_inode.creationTime = int(src_stat.st_ctime) if S_ISLNK(src_stat.st_mode): # Checking absolute path, relative may fail new_inode.attributes.link = new_inode.attributes.init('link') new_inode.attributes.link.target = os.readlink(src) elif S_ISREG(src_stat.st_mode): new_inode.attributes.file = new_inode.attributes.init('file') new_inode.attributes.file.blockSize = 128 # FIXME ? fullpath = os.path.abspath(src) logger.debug("[+] populating: %s" % fullpath) hashs = g8storclient.encrypt(fullpath) if hashs is None: return for index, value in enumerate(hashs): hashs[index].pop('data', None) new_inode.attributes.file.blocks = hashs # keep the path of the added file, so we can upload the content of the file on the backend # once we're done editing the flist self._flist._added_files.add(src) else: # special file new_inode.attributes.special = new_inode.attributes.init('special') if S_ISSOCK(src_stat.st_mode): new_inode.attributes.special.type = "socket" elif S_ISBLK(src_stat.st_mode): new_inode.attributes.special.type = "block" elif S_ISCHR(src_stat.st_mode): new_inode.attributes.special.type = "chardev" elif S_ISFIFO(src_stat.st_mode): new_inode.attributes.special.type = "fifopipe" else: new_inode.attributes.special.type = "unknown" if S_ISBLK(src_stat.st_mode) or S_ISCHR(src_stat.st_mode): id = '%d,%d' % (os.major( src_stat.st_rdev), os.minor(src_stat.st_rdev)) new_inode.attributes.special.data = id # set ACI on new inode uname = str(src_stat.st_uid) gname = str(src_stat.st_gid) # ignore if the username/groupname is not found on host try: uname = pwd.getpwuid(src_stat.st_uid).pw_name gname = grp.getgrgid(src_stat.st_gid).gr_name except Exception: pass aci = self._flist.aciCollection.new() aci.dbobj.uname = uname aci.dbobj.gname = gname aci.dbobj.mode = src_stat.st_mode if not self._flist.aciCollection.exists(aci.key): aci.save() new_inode.aclkey = aci.key self._obj.modificationTime = j.data.time.epoch model = self._flist.dirCollection.get(self_key) model.dbobj = self._obj model.save() return Path(new_inode, self, self._flist)