def put(self, request, sname): with self._handle_exception(request): share = self._validate_share(request, sname) if ('size' in request.data): new_size = self._validate_share_size(request, share.pool) disk = Disk.objects.filter(pool=share.pool)[0] qid = qgroup_id(share.pool, share.subvol_name) cur_rusage, cur_eusage = share_usage(share.pool, qid) if (new_size < cur_rusage): e_msg = ('Unable to resize because requested new size(%dKB) ' 'is less than current usage(%dKB) of the share.' % (new_size, cur_rusage)) handle_exception(Exception(e_msg), request) update_quota(share.pool, share.pqgroup, new_size * 1024) share.size = new_size if ('compression' in request.data): new_compression = self._validate_compression(request) if (share.compression_algo != new_compression): share.compression_algo = new_compression mnt_pt = '%s%s' % (settings.MNT_PT, sname) if (new_compression == 'no'): new_compression = '' set_property(mnt_pt, 'compression', new_compression) share.save() return Response(ShareSerializer(share).data)
def put(self, request, sname): with self._handle_exception(request): share = self._validate_share(request, sname) if ('size' in request.data): new_size = self._validate_share_size(request, share.pool) disk = Disk.objects.filter(pool=share.pool)[0] qid = qgroup_id(share.pool, share.subvol_name) cur_rusage, cur_eusage = share_usage(share.pool, qid) if (new_size < cur_rusage): e_msg = ( 'Unable to resize because requested new size(%dKB) ' 'is less than current usage(%dKB) of the share.' % (new_size, cur_rusage)) handle_exception(Exception(e_msg), request) update_quota(share.pool, share.pqgroup, new_size * 1024) share.size = new_size if ('compression' in request.data): new_compression = self._validate_compression(request) if (share.compression_algo != new_compression): share.compression_algo = new_compression mnt_pt = '%s%s' % (settings.MNT_PT, sname) if (new_compression == 'no'): new_compression = '' set_property(mnt_pt, 'compression', new_compression) share.save() return Response(ShareSerializer(share).data)
def post(self, request, sname, command): with self._handle_exception(request): share = self._validate_share(request, sname) if command == "clone": new_name = request.DATA.get("name", "") return create_clone(share, new_name, request, logger) if command == "rollback": snap = self._validate_snapshot(request, share) if NFSExport.objects.filter(share=share).exists(): e_msg = ( "Share(%s) cannot be rolled back as it is " "exported via nfs. Delete nfs exports and " "try again" % sname ) handle_exception(Exception(e_msg), request) if SambaShare.objects.filter(share=share).exists(): e_msg = ( "Share(%s) cannot be rolled back as it is shared" " via Samba. Unshare and try again" % sname ) handle_exception(Exception(e_msg), request) pool_device = Disk.objects.filter(pool=share.pool)[0].name rollback_snap(snap.real_name, share.name, share.subvol_name, share.pool, pool_device) update_quota(share.pool, pool_device, snap.qgroup, share.size * 1024) share.qgroup = snap.qgroup share.save() snap.delete() return Response() if command == "compress": algo = request.DATA.get("compress", None) if algo is None: e_msg = ( "Compression algorithm must be specified. Valid " "options are: %s" % settings.COMPRESSION_TYPES ) handle_exception(Exception(e_msg), request) if algo not in settings.COMPRESSION_TYPES: e_msg = ( "Compression algorithm(%s) is invalid. Valid " "options are: %s" % settings.COMPRESSION_TYPES ) handle_exception(Exception(e_msg), request) mnt_pt = "%s%s" % (settings.MNT_PT, share.name) if not is_share_mounted(share.name): disk = Disk.objects.filter(pool=share.pool)[0].name mount_share(share, disk, mnt_pt) share.compression_algo = algo share.save() if algo == "no": algo = "" set_property(mnt_pt, "compression", algo) return Response(ShareSerializer(share).data)
def post(self, request): with self._handle_exception(request): pool_name = request.data.get('pool', None) try: pool = Pool.objects.get(name=pool_name) except: e_msg = ('Pool(%s) does not exist.' % pool_name) handle_exception(Exception(e_msg), request) compression = self._validate_compression(request) size = self._validate_share_size(request, pool) sname = request.data.get('sname', None) if ((sname is None or re.match('%s$' % settings.SHARE_REGEX, sname) is None)): e_msg = ('Share name must start with a alphanumeric(a-z0-9) ' 'character and can be followed by any of the ' 'following characters: letter(a-z), digits(0-9), ' 'hyphen(-), underscore(_) or a period(.).') handle_exception(Exception(e_msg), request) if (Share.objects.filter(name=sname).exists()): e_msg = ('Share(%s) already exists. Choose a different name' % sname) handle_exception(Exception(e_msg), request) if (Pool.objects.filter(name=sname).exists()): e_msg = ( 'A Pool with this name(%s) exists. Share and Pool names ' 'must be distinct. Choose a different name' % sname) handle_exception(Exception(e_msg), request) disk = Disk.objects.filter(pool=pool)[0] replica = False if ('replica' in request.data): replica = request.data['replica'] if (type(replica) != bool): e_msg = ('replica must be a boolean, not %s' % type(replica)) handle_exception(Exception(e_msg), request) add_share(pool, disk.name, sname) qid = qgroup_id(pool, disk.name, sname) update_quota(pool, disk.name, qid, size * 1024) s = Share(pool=pool, qgroup=qid, name=sname, size=size, subvol_name=sname, replica=replica, compression_algo=compression) s.save() mnt_pt = '%s%s' % (settings.MNT_PT, sname) if (not is_share_mounted(sname)): disk = Disk.objects.filter(pool=pool)[0].name mount_share(s, disk, mnt_pt) if (compression != 'no'): set_property(mnt_pt, 'compression', compression) return Response(ShareSerializer(s).data)
def post(self, request): with self._handle_exception(request): pool_name = request.DATA.get('pool', None) try: pool = Pool.objects.get(name=pool_name) except: e_msg = ('Pool(%s) does not exist.' % pool_name) handle_exception(Exception(e_msg), request) compression = self._validate_compression(request) size = self._validate_share_size(request, pool) sname = request.DATA.get('sname', None) if ((sname is None or re.match('%s$' % settings.SHARE_REGEX, sname) is None)): e_msg = ('Share name must start with a letter(a-z) and can ' 'be followed by any of the following characters: ' 'letter(a-z), digits(0-9), hyphen(-), underscore' '(_) or a period(.).') handle_exception(Exception(e_msg), request) if (Share.objects.filter(name=sname).exists()): e_msg = ('Share(%s) already exists.' % sname) handle_exception(Exception(e_msg), request) try: disk = Disk.objects.filter(pool=pool)[0] except: e_msg = ('Pool(%s) does not have any disks in it.' % pool_name) handle_exception(Exception(e_msg), request) replica = False if ('replica' in request.DATA): replica = request.DATA['replica'] if (type(replica) != bool): e_msg = ('replica must be a boolean, not %s' % type(replica)) handle_exception(Exception(e_msg), request) add_share(pool, disk.name, sname) qgroup_id = self._update_quota(pool, disk.name, sname, size) s = Share(pool=pool, qgroup=qgroup_id, name=sname, size=size, subvol_name=sname, replica=replica, compression_algo=compression) s.save() mnt_pt = '%s%s' % (settings.MNT_PT, sname) if (not is_share_mounted(sname)): disk = Disk.objects.filter(pool=pool)[0].name mount_share(s, disk, mnt_pt) if (compression != 'no'): set_property(mnt_pt, 'compression', compression) return Response(ShareSerializer(s).data)
def post(self, request): with self._handle_exception(request): pool_name = request.data.get('pool', None) try: pool = Pool.objects.get(name=pool_name) except: e_msg = ('Pool(%s) does not exist.' % pool_name) handle_exception(Exception(e_msg), request) compression = self._validate_compression(request) size = self._validate_share_size(request, pool) sname = request.data.get('sname', None) if ((sname is None or re.match('%s$' % settings.SHARE_REGEX, sname) is None)): e_msg = ('Share name must start with a alphanumeric(a-z0-9) ' 'character and can be followed by any of the ' 'following characters: letter(a-z), digits(0-9), ' 'hyphen(-), underscore(_) or a period(.).') handle_exception(Exception(e_msg), request) if (Share.objects.filter(name=sname).exists()): e_msg = ('Share(%s) already exists. Choose a different name' % sname) handle_exception(Exception(e_msg), request) if (Pool.objects.filter(name=sname).exists()): e_msg = ('A Pool with this name(%s) exists. Share and Pool names ' 'must be distinct. Choose a different name' % sname) handle_exception(Exception(e_msg), request) disk = Disk.objects.filter(pool=pool)[0] replica = False if ('replica' in request.data): replica = request.data['replica'] if (type(replica) != bool): e_msg = ('replica must be a boolean, not %s' % type(replica)) handle_exception(Exception(e_msg), request) add_share(pool, disk.name, sname) qid = qgroup_id(pool, disk.name, sname) update_quota(pool, disk.name, qid, size * 1024) s = Share(pool=pool, qgroup=qgroup_id, name=sname, size=size, subvol_name=sname, replica=replica, compression_algo=compression) s.save() mnt_pt = '%s%s' % (settings.MNT_PT, sname) if (not is_share_mounted(sname)): disk = Disk.objects.filter(pool=pool)[0].name mount_share(s, disk, mnt_pt) if (compression != 'no'): set_property(mnt_pt, 'compression', compression) return Response(ShareSerializer(s).data)
def put(self, request, sid): with self._handle_exception(request): share = self._validate_share(request, sid) if ('size' in request.data): new_size = self._validate_share_size(request, share.pool) qid = qgroup_id(share.pool, share.subvol_name) cur_rusage, cur_eusage = volume_usage(share.pool, qid) if (new_size < cur_rusage): e_msg = ('Unable to resize because requested new ' 'size {} KB is less than current usage {} KB ' 'of the share.').format(new_size, cur_rusage) handle_exception(Exception(e_msg), request) # quota maintenance if share.pool.quotas_enabled: # Only try create / update quotas if they are enabled, # pqgroup of PQGROUP_DEFAULT (-1/-1) indicates no pqgroup, # ie quotas were disabled when update was requested. if share.pqgroup == PQGROUP_DEFAULT or \ not share.pqgroup_exist: # if quotas were disabled or pqgroup non-existent. share.pqgroup = qgroup_create(share.pool) share.save() if share.pqgroup is not PQGROUP_DEFAULT: # Only update quota and assign if now non default as # default can also indicate Read-only fs at this point. update_quota(share.pool, share.pqgroup, new_size * 1024) share_pqgroup_assign(share.pqgroup, share) else: # Our pool's quotas are disabled so reset pqgroup to -1/-1. if share.pqgroup != PQGROUP_DEFAULT: # Only reset if necessary share.pqgroup = PQGROUP_DEFAULT share.save() share.size = new_size if ('compression' in request.data): new_compression = self._validate_compression(request) if (share.compression_algo != new_compression): share.compression_algo = new_compression mnt_pt = '%s%s' % (settings.MNT_PT, share.name) if (new_compression == 'no'): new_compression = '' set_property(mnt_pt, 'compression', new_compression) share.save() return Response(ShareSerializer(share).data)
def post(self, request): # qgroup notes for shares. we need to create a qgroup prior to share # creation. qgroup ids 0/<subvol_id> automatically get created when a # subvolume(i.e., a Share or a Snapshot) is created. So let's create a # new qgroup: 2015/<some_number> whenever a Share is # created. <some_number> starts from 1 and is incremented as more # Shares are created. So, for the very first Share in a pool, it's # qgroup will be 1/1. 2015 is arbitrarily chose. # Before creating a new Share, we create the qgroup for it. And during # it's creation, we assign this qgroup to it. During it's creation a # 0/x qgroup will automatically be created, but it will become the # child of our explicitly-created qgroup(2015/x). # We will set the qgroup limit on our qgroup and it will enforce the # quota on every subvolume(i.e., Share and Snapshot) in that qgroup. # When a Share is deleted, we need to destroy two qgroups. One is it's # auto 0/x qgroup and the other is our explicitly-created 2015/y # qgroup. with self._handle_exception(request): pool_name = request.data.get('pool', None) try: pool = Pool.objects.get(name=pool_name) except: e_msg = 'Pool ({}) does not exist.'.format(pool_name) handle_exception(Exception(e_msg), request) compression = self._validate_compression(request) size = self._validate_share_size(request, pool) sname = request.data.get('sname', None) if ((sname is None or re.match('%s$' % settings.SHARE_REGEX, sname) is None)): e_msg = ('Invalid characters in share name. Following are ' 'allowed: letter(a-z or A-Z), digit(0-9), ' 'hyphen(-), underscore(_) or a period(.).') handle_exception(Exception(e_msg), request) if (len(sname) > 254): # btrfs subvolume names cannot exceed 254 characters. e_msg = ('Share name length cannot exceed 254 characters.') handle_exception(Exception(e_msg), request) if (Share.objects.filter(name=sname).exists()): # Note e_msg is consumed by replication/util.py create_share() e_msg = ('Share ({}) already exists. Choose a ' 'different name.').format(sname) handle_exception(Exception(e_msg), request) if (Pool.objects.filter(name=sname).exists()): e_msg = ('A pool with this name ({}) exists. Share ' 'and pool names must be distinct. Choose ' 'a different name.').format(sname) handle_exception(Exception(e_msg), request) replica = False if ('replica' in request.data): replica = request.data['replica'] if (type(replica) != bool): # TODO: confirm this 'type' call works as format parameter. e_msg = ('Replica must be a boolean, ' 'not ({}).').format(type(replica)) handle_exception(Exception(e_msg), request) pqid = qgroup_create(pool) add_share(pool, sname, pqid) qid = qgroup_id(pool, sname) s = Share(pool=pool, qgroup=qid, pqgroup=pqid, name=sname, size=size, subvol_name=sname, replica=replica, compression_algo=compression) # The following pool.save() was informed by test_share.py pool.save() s.save() if pqid is not PQGROUP_DEFAULT: update_quota(pool, pqid, size * 1024) share_pqgroup_assign(pqid, s) mnt_pt = '%s%s' % (settings.MNT_PT, sname) if not s.is_mounted: mount_share(s, mnt_pt) if (compression != 'no'): set_property(mnt_pt, 'compression', compression) return Response(ShareSerializer(s).data)
def post(self, request): #qgroup notes for shares. we need to create a qgroup prior to share #creation. qgroup ids 0/<subvol_id> automatically get created when a #subvolume(i.e., a Share or a Snapshot) is created. So let's create a #new qgroup: 2015/<some_number> whenever a Share is #created. <some_number> starts from 1 and is incremented as more Shares #are created. So, for the very first Share in a pool, it's qgroup will #be 1/1. 2015 is arbitrarily chose. #Before creating a new Share, we create the qgroup for it. And during #it's creation, we assign this qgroup to it. During it's creation a 0/x #qgroup will automatically be created, but it will become the child of #our explicitly-created qgroup(2015/x). #We will set the qgroup limit on our qgroup and it will enforce the #quota on every subvolume(i.e., Share and Snapshot) in that qgroup. #When a Share is deleted, we need to destroy two qgroups. One is it's #auto 0/x qgroup and the other is our explicitly-created 2015/y qgroup. with self._handle_exception(request): pool_name = request.data.get('pool', None) try: pool = Pool.objects.get(name=pool_name) except: e_msg = ('Pool(%s) does not exist.' % pool_name) handle_exception(Exception(e_msg), request) compression = self._validate_compression(request) size = self._validate_share_size(request, pool) sname = request.data.get('sname', None) if ((sname is None or re.match('%s$' % settings.SHARE_REGEX, sname) is None)): e_msg = ('Share name must start with a alphanumeric(a-z0-9) ' 'character and can be followed by any of the ' 'following characters: letter(a-z), digits(0-9), ' 'hyphen(-), underscore(_) or a period(.).') handle_exception(Exception(e_msg), request) if (len(sname) > 254): #btrfs subvolume names cannot exceed 254 characters. e_msg = ('Share name length cannot exceed 254 characters') handle_exception(Exception(e_msg), request) if (Share.objects.filter(name=sname).exists()): e_msg = ('Share(%s) already exists. Choose a different name' % sname) handle_exception(Exception(e_msg), request) if (Pool.objects.filter(name=sname).exists()): e_msg = ('A Pool with this name(%s) exists. Share and Pool names ' 'must be distinct. Choose a different name' % sname) handle_exception(Exception(e_msg), request) disk = Disk.objects.filter(pool=pool)[0] replica = False if ('replica' in request.data): replica = request.data['replica'] if (type(replica) != bool): e_msg = ('replica must be a boolean, not %s' % type(replica)) handle_exception(Exception(e_msg), request) pqid = qgroup_create(pool) add_share(pool, sname, pqid) qid = qgroup_id(pool, sname) update_quota(pool, pqid, size * 1024) s = Share(pool=pool, qgroup=qid, pqgroup=pqid, name=sname, size=size, subvol_name=sname, replica=replica, compression_algo=compression) s.save() mnt_pt = '%s%s' % (settings.MNT_PT, sname) if (not is_share_mounted(sname)): mount_share(s, mnt_pt) if (compression != 'no'): set_property(mnt_pt, 'compression', compression) return Response(ShareSerializer(s).data)
def post(self, request): #qgroup notes for shares. we need to create a qgroup prior to share #creation. qgroup ids 0/<subvol_id> automatically get created when a #subvolume(i.e., a Share or a Snapshot) is created. So let's create a #new qgroup: 2015/<some_number> whenever a Share is #created. <some_number> starts from 1 and is incremented as more Shares #are created. So, for the very first Share in a pool, it's qgroup will #be 1/1. 2015 is arbitrarily chose. #Before creating a new Share, we create the qgroup for it. And during #it's creation, we assign this qgroup to it. During it's creation a 0/x #qgroup will automatically be created, but it will become the child of #our explicitly-created qgroup(2015/x). #We will set the qgroup limit on our qgroup and it will enforce the #quota on every subvolume(i.e., Share and Snapshot) in that qgroup. #When a Share is deleted, we need to destroy two qgroups. One is it's #auto 0/x qgroup and the other is our explicitly-created 2015/y qgroup. with self._handle_exception(request): pool_name = request.data.get('pool', None) try: pool = Pool.objects.get(name=pool_name) except: e_msg = ('Pool(%s) does not exist.' % pool_name) handle_exception(Exception(e_msg), request) compression = self._validate_compression(request) size = self._validate_share_size(request, pool) sname = request.data.get('sname', None) if ((sname is None or re.match('%s$' % settings.SHARE_REGEX, sname) is None)): e_msg = ('Share name must start with a alphanumeric(a-z0-9) ' 'character and can be followed by any of the ' 'following characters: letter(a-z), digits(0-9), ' 'hyphen(-), underscore(_) or a period(.).') handle_exception(Exception(e_msg), request) if (len(sname) > 254): #btrfs subvolume names cannot exceed 254 characters. e_msg = ('Share name length cannot exceed 254 characters') handle_exception(Exception(e_msg), request) if (Share.objects.filter(name=sname).exists()): e_msg = ('Share(%s) already exists. Choose a different name' % sname) handle_exception(Exception(e_msg), request) if (Pool.objects.filter(name=sname).exists()): e_msg = ( 'A Pool with this name(%s) exists. Share and Pool names ' 'must be distinct. Choose a different name' % sname) handle_exception(Exception(e_msg), request) disk = Disk.objects.filter(pool=pool)[0] replica = False if ('replica' in request.data): replica = request.data['replica'] if (type(replica) != bool): e_msg = ('replica must be a boolean, not %s' % type(replica)) handle_exception(Exception(e_msg), request) pqid = qgroup_create(pool) add_share(pool, sname, pqid) qid = qgroup_id(pool, sname) update_quota(pool, pqid, size * 1024) s = Share(pool=pool, qgroup=qid, pqgroup=pqid, name=sname, size=size, subvol_name=sname, replica=replica, compression_algo=compression) s.save() mnt_pt = '%s%s' % (settings.MNT_PT, sname) if (not is_share_mounted(sname)): mount_share(s, mnt_pt) if (compression != 'no'): set_property(mnt_pt, 'compression', compression) return Response(ShareSerializer(s).data)
def create_repclone(share, request, logger, snapshot): """ Variant of create_clone but where the share already exists and is to be supplanted by a snapshot which is effectively moved into the shares prior position, both in the db and on the file system. This is achieved thus: Unmount target share - (via remove_share()). Btrfs subvol delete target share (via remove_share()). Remove prior target share mount point (dir). Move snap source to target share's former location (becomes share on disk). Update existing target share db entry with source snap's qgroup / usage. Remove source snap's db entry: updated share db entry makes it redundant. Remount share (which now represents the prior snap's subvol relocated). :param share: Share object to be supplanted :param request: :param logger: Logger object to reference :param snapshot: Source snapshot/quirk share object to supplant target. :return: response of serialized share (in it's updated form) """ try: logger.info("Supplanting share ({}) with " "snapshot ({}).".format(share.name, snapshot.name)) # We first strip our snapshot.name of any path as when we encounter the # initially created receive subvol it is identified as a share with a # snapshots location as it's subvol name (current quirk of import sys). # E.g. first receive subvol/share-in-snapdir name example: # ".snapshots/C583C37F-...1712B_sharename/sharename_19_replication_1". # Subsequent more regular snapshots (in db as such) are named thus: # "sharename_19_replication_2" or "sharename_19_replication_2" and on. # The 19 in the above names is the generation of the replication task. # # Normalise source name across initial quirk share & subsequent snaps. source_name = snapshot.name.split("/")[-1] # Note in the above we have to use Object.name for polymorphism, but # our share is passed by it's subvol (potential fragility point). snap_path = "{}/.snapshots/{}/{}".format(share.pool.mnt_pt, share.name, source_name).replace( "//", "/") # e.g. for above: /mnt2/poolname/.snapshots/sharename/snapname # or /.snapshots/sharename/snapname for system pool shares share_path = ("{}/{}".format(share.pool.mnt_pt, share.name)).replace("//", "/") # e.g. for above: /mnt2/poolname/sharename or /sharename for system pool shares # Passed db snap assured by caller but this does not guarantee on disk. if not is_subvol(snap_path): raise Exception("Subvol with path ({}) does not exist. Aborting " "replacement of share with path ({}).".format( snap_path, share_path)) # unmounts and then subvol deletes our on disk share remove_share(share.pool, share.name, PQGROUP_DEFAULT) # Remove read only flag on our snapshot subvol set_property(snap_path, "ro", "false", mount=False) # Ensure removed share path is clean, ie remove mount point. run_command(["/usr/bin/rm", "-rf", share_path], throw=False) # Now move snapshot to prior shares location. Given both a share and # a snapshot are subvols, we effectively promote the snap to a share. logger.info( "Moving snapshot ({}) to prior share's pool location ({})".format( snap_path, share_path)) shutil.move(snap_path, share_path) # This should have re-established our just removed subvol. # Supplant share db info with snap info to reflect new on disk state. share.qgroup = snapshot.qgroup share.rusage = snapshot.rusage share.eusage = snapshot.eusage share.save() # delete our now redundant snapshot/quirky share db entry snapshot.delete() # update our share's quota update_quota(share.pool, share.pqgroup, share.size * 1024) # mount our newly supplanted share # We independently mount all shares, data pool or system pool, in /mnt2/name mnt_pt = "{}{}".format(settings.MNT_PT, share.name) mount_share(share, mnt_pt) return Response(ShareSerializer(share).data) except Exception as e: handle_exception(e, request)
def run(self): msg = ('Failed to get the sender ip from the uuid(%s) for meta: %s' % (self.meta['uuid'], self.meta)) with self._clean_exit_handler(msg): self.sender_ip = get_sender_ip(self.meta['uuid'], logger) msg = ('Failed to connect to the sender(%s) on data_port(%s). meta: ' '%s. Aborting.' % (self.sender_ip, self.data_port, self.meta)) with self._clean_exit_handler(msg): #@todo: add validation recv_sub = self.ctx.socket(zmq.SUB) recv_sub.connect('tcp://%s:%d' % (self.sender_ip, self.data_port)) recv_sub.RCVTIMEO = 100 recv_sub.setsockopt(zmq.SUBSCRIBE, str(self.meta['id'])) msg = ('Failed to connect to the sender(%s) on ' 'meta_port(%d). meta: %s. Aborting.' % (self.sender_ip, self.meta_port, self.meta)) with self._clean_exit_handler(msg): self.meta_push = self.ctx.socket(zmq.PUSH) self.meta_push.connect('tcp://%s:%d' % (self.sender_ip, self.meta_port)) sname = ('%s_%s' % (self.sender_id, self.src_share)) if (not self.incremental): msg = ('Failed to verify/create share: %s. meta: %s. ' 'Aborting.' % (sname, self.meta)) with self._clean_exit_handler(msg, ack=True): create_share(sname, self.dest_pool, logger) msg = ('Failed to create the replica metadata object ' 'for share: %s. meta: %s. Aborting.' % (sname, self.meta)) with self._clean_exit_handler(msg, ack=True): data = {'share': sname, 'appliance': self.sender_ip, 'src_share': self.src_share, 'data_port': self.data_port, 'meta_port': self.meta_port, } self.rid = create_rshare(data, logger) else: msg = ('Failed to retreive the replica metadata object for ' 'share: %s. meta: %s. Aborting.' % (sname, self.meta)) with self._clean_exit_handler(msg): self.rid = rshare_id(sname, logger) sub_vol = ('%s%s/%s' % (settings.MNT_PT, self.meta['pool'], sname)) if (not is_subvol(sub_vol)): msg = ('Failed to create parent subvolume %s' % sub_vol) with self._clean_exit_handler(msg, ack=True): run_command([BTRFS, 'subvolume', 'create', sub_vol]) snap_fp = ('%s/%s' % (sub_vol, self.snap_name)) with self._clean_exit_handler(msg): if (is_subvol(snap_fp)): ack = {'msg': 'snap_exists', 'id': self.meta['id'], } self.meta_push.send_json(ack) cmd = [BTRFS, 'receive', sub_vol] msg = ('Failed to start the low level btrfs receive command(%s)' '. Aborting.' % (cmd)) with self._clean_exit_handler(msg, ack=True): rp = subprocess.Popen(cmd, shell=False, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) msg = ('Failed to send begin_ok to the sender for meta: %s' % self.meta) with self._clean_exit_handler(msg): ack = {'msg': 'begin_ok', 'id': self.meta['id'], } self.meta_push.send_json(ack) recv_timeout_counter = 0 credit = settings.DEFAULT_SEND_CREDIT check_credit = True while True: if (check_credit is True and credit < 5): ack = {'msg': 'send_more', 'id': self.meta['id'], 'credit': settings.DEFAULT_SEND_CREDIT, } self.meta_push.send_json(ack) credit = credit + settings.DEFAULT_SEND_CREDIT logger.debug('%d KB received for %s' % (int(self.kb_received / 1024), sname)) try: recv_data = recv_sub.recv() recv_data = recv_data[len(self.meta['id']):] credit = credit - 1 recv_timeout_counter = 0 self.kb_received = self.kb_received + len(recv_data) if (self.rtid is None): msg = ('Failed to create snapshot: %s. Aborting.' % self.snap_name) # create a snapshot only if it's not already from a previous failed attempt with self._clean_exit_handler(msg, ack=True): create_snapshot(sname, self.snap_name, logger, snap_type='receiver') data = {'snap_name': self.snap_name} msg = ('Failed to create receive trail for rid: %d' '. meta: %s' % (self.rid, self.meta)) with self._clean_exit_handler(msg, ack=True): self.rtid = create_receive_trail(self.rid, data, logger) if (recv_data == 'END_SUCCESS' or recv_data == 'END_FAIL'): check_credit = False ts = datetime.utcnow().replace(tzinfo=utc) data = {'kb_received': self.kb_received / 1024, } if (recv_data == 'END_SUCCESS'): data['receive_succeeded'] = ts.strftime(settings.SNAP_TS_FORMAT) #delete the share, move the oldest snap to share oldest_snap = get_oldest_snap(sub_vol, 3) if (oldest_snap is not None): snap_path = ('%s/%s' % (sub_vol, oldest_snap)) share_path = ('%s%s/%s' % (settings.MNT_PT, self.dest_pool, sname)) msg = ('Failed to promote the oldest Snapshot(%s) ' 'to Share(%s)' % (snap_path, share_path)) try: pool = Pool.objects.get(name=self.dest_pool) remove_share(pool, sname) set_property(snap_path, 'ro', 'false', mount=False) run_command(['/usr/bin/rm', '-rf', share_path], throw=False) shutil.move(snap_path, share_path) set_property(share_path, 'ro', 'true', mount=False) delete_snapshot(sname, oldest_snap, logger) except Exception, e: logger.error(msg) logger.exception(msg) else: logger.error('END_FAIL received for meta: %s. ' 'Terminating.' % self.meta) rp.terminate() data['receive_failed'] = ts data['status'] = 'failed' msg = ('Failed to update receive trail for rtid: %d' '. meta: %s' % (self.rtid, self.meta)) with self._clean_exit_handler(msg, ack=True): update_receive_trail(self.rtid, data, logger) break if (rp.poll() is None): rp.stdin.write(recv_data) rp.stdin.flush() else: logger.error('It seems the btrfs receive process died' ' unexpectedly.') out, err = rp.communicate() msg = ('Low level system error from btrfs receive ' 'command. out: %s err: %s for rtid: %s meta: %s' % (out, err, self.rtid, self.meta)) with self._clean_exit_handler(msg, ack=True): ts = datetime.utcnow().replace(tzinfo=utc) data = {'receive_failed': ts.strftime(settings.SNAP_TS_FORMAT), 'status': 'failed', 'error': msg, } update_receive_trail(self.rtid, data, logger) except zmq.error.Again: recv_timeout_counter = recv_timeout_counter + 1 if (recv_timeout_counter > 600): logger.error('Nothing received in the last 60 seconds ' 'from the sender for meta: %s. Aborting.' % self.meta) self._sys_exit(3)
def create_repclone(share, request, logger, snapshot): """ Variant of create_clone but where the share already exists and is to be supplanted by a snapshot which is effectively moved into the shares prior position, both in the db and on the file system. This is achieved thus: Unmount target share - (via remove_share()). Btrfs subvol delete target share (via remove_share()). Remove prior target share mount point (dir). Move snap source to target share's former location (becomes share on disk). Update existing target share db entry with source snap's qgroup / usage. Remove source snap's db entry: updated share db entry makes it redundant. Remount share (which now represents the prior snap's subvol relocated). :param share: Share object to be supplanted :param request: :param logger: Logger object to reference :param snapshot: Source snapshot/quirk share object to supplant target. :return: response of serialized share (in it's updated form) """ try: logger.info('Supplanting share ({}) with ' 'snapshot ({}).'.format(share.name, snapshot.name)) # We first strip our snapshot.name of any path as when we encounter the # initially created receive subvol it is identified as a share with a # snapshots location as it's subvol name (current quirk of import sys). # E.g. first receive subvol/share-in-snapdir name example: # ".snapshots/C583C37F-...1712B_sharename/sharename_19_replication_1". # Subsequent more regular snapshots (in db as such) are named thus: # "sharename_19_replication_2" or "sharename_19_replication_2" and on. # The 19 in the above names is the generation of the replication task. # # Normalise source name across initial quirk share & subsequent snaps. source_name = snapshot.name.split('/')[-1] # Note in the above we have to use Object.name for polymorphism, but # our share is passed by it's subvol (potential fragility point). snap_path = '{}{}/.snapshots/{}/{}'.format(settings.MNT_PT, share.pool.name, share.name, source_name) # eg /mnt2/poolname/.snapshots/sharename/snapname share_path = ('{}{}/{}'.format(settings.MNT_PT, share.pool.name, share.name)) # eg /mnt2/poolname/sharename # Passed db snap assured by caller but this does not guarantee on disk. if not is_subvol(snap_path): raise Exception('Subvol with path ({}) does not exist. Aborting ' 'replacement of share ({}).'.format(snap_path, share.name)) # unmounts and then subvol deletes our on disk share remove_share(share.pool, share.name, PQGROUP_DEFAULT) # Remove read only flag on our snapshot subvol set_property(snap_path, 'ro', 'false', mount=False) # Ensure removed share path is clean, ie remove mount point. run_command(['/usr/bin/rm', '-rf', share_path], throw=False) # Now move snapshot to prior shares location. Given both a share and # a snapshot are subvols, we effectively promote the snap to a share. shutil.move(snap_path, share_path) # This should have re-established our just removed subvol. # Supplant share db info with snap info to reflect new on disk state. share.qgroup = snapshot.qgroup share.rusage = snapshot.rusage share.eusage = snapshot.eusage share.save() # delete our now redundant snapshot/quirky share db entry snapshot.delete() # update our share's quota update_quota(share.pool, share.pqgroup, share.size * 1024) # mount our newly supplanted share mnt_pt = '{}{}'.format(settings.MNT_PT, share.name) mount_share(share, mnt_pt) return Response(ShareSerializer(share).data) except Exception as e: handle_exception(e, request)
def run(self): msg = ('Failed to get the sender ip from the uuid(%s) for meta: %s' % (self.meta['uuid'], self.meta)) with self._clean_exit_handler(msg): self.sender_ip = get_sender_ip(self.meta['uuid'], logger) msg = ('Failed to connect to the sender(%s) on data_port(%s). meta: ' '%s. Aborting.' % (self.sender_ip, self.data_port, self.meta)) with self._clean_exit_handler(msg): #@todo: add validation recv_sub = self.ctx.socket(zmq.SUB) recv_sub.connect('tcp://%s:%d' % (self.sender_ip, self.data_port)) recv_sub.RCVTIMEO = 100 recv_sub.setsockopt(zmq.SUBSCRIBE, str(self.meta['id'])) msg = ('Failed to connect to the sender(%s) on ' 'meta_port(%d). meta: %s. Aborting.' % (self.sender_ip, self.meta_port, self.meta)) with self._clean_exit_handler(msg): self.meta_push = self.ctx.socket(zmq.PUSH) self.meta_push.connect('tcp://%s:%d' % (self.sender_ip, self.meta_port)) sname = ('%s_%s' % (self.sender_id, self.src_share)) if (not self.incremental): msg = ('Failed to verify/create share: %s. meta: %s. ' 'Aborting.' % (sname, self.meta)) with self._clean_exit_handler(msg, ack=True): create_share(sname, self.dest_pool, logger) msg = ('Failed to create the replica metadata object ' 'for share: %s. meta: %s. Aborting.' % (sname, self.meta)) with self._clean_exit_handler(msg, ack=True): data = {'share': sname, 'appliance': self.sender_ip, 'src_share': self.src_share, 'data_port': self.data_port, 'meta_port': self.meta_port, } self.rid = create_rshare(data, logger) else: msg = ('Failed to retreive the replica metadata object for ' 'share: %s. meta: %s. Aborting.' % (sname, self.meta)) with self._clean_exit_handler(msg): self.rid = rshare_id(sname, logger) sub_vol = ('%s%s/.snapshots/%s' % (settings.MNT_PT, self.meta['pool'], sname)) if (not is_subvol(sub_vol)): msg = ('Failed to create parent subvolume %s' % sub_vol) with self._clean_exit_handler(msg, ack=True): run_command([BTRFS, 'subvolume', 'create', sub_vol]) snap_fp = ('%s/%s' % (sub_vol, self.snap_name)) with self._clean_exit_handler(msg): if (is_subvol(snap_fp)): ack = {'msg': 'snap_exists', 'id': self.meta['id'], } self.meta_push.send_json(ack) cmd = [BTRFS, 'receive', sub_vol] msg = ('Failed to start the low level btrfs receive command(%s)' '. Aborting.' % (cmd)) with self._clean_exit_handler(msg, ack=True): rp = subprocess.Popen(cmd, shell=False, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) msg = ('Failed to send begin_ok to the sender for meta: %s' % self.meta) with self._clean_exit_handler(msg): ack = {'msg': 'begin_ok', 'id': self.meta['id'], } self.meta_push.send_json(ack) recv_timeout_counter = 0 credit = settings.DEFAULT_SEND_CREDIT check_credit = True while True: if (check_credit is True and credit < 5): ack = {'msg': 'send_more', 'id': self.meta['id'], 'credit': settings.DEFAULT_SEND_CREDIT, } self.meta_push.send_json(ack) credit = credit + settings.DEFAULT_SEND_CREDIT logger.debug('%d KB received for %s' % (int(self.kb_received / 1024), sname)) try: recv_data = recv_sub.recv() recv_data = recv_data[len(self.meta['id']):] credit = credit - 1 recv_timeout_counter = 0 self.kb_received = self.kb_received + len(recv_data) if (self.rtid is None): msg = ('Failed to create snapshot: %s. Aborting.' % self.snap_name) # create a snapshot only if it's not already from a previous failed attempt with self._clean_exit_handler(msg, ack=True): create_snapshot(sname, self.snap_name, logger, snap_type='receiver') data = {'snap_name': self.snap_name} msg = ('Failed to create receive trail for rid: %d' '. meta: %s' % (self.rid, self.meta)) with self._clean_exit_handler(msg, ack=True): self.rtid = create_receive_trail(self.rid, data, logger) if (recv_data == 'END_SUCCESS' or recv_data == 'END_FAIL'): check_credit = False ts = datetime.utcnow().replace(tzinfo=utc) data = {'kb_received': self.kb_received / 1024, } if (recv_data == 'END_SUCCESS'): data['receive_succeeded'] = ts #delete the share, move the oldest snap to share oldest_snap = get_oldest_snap(sub_vol, 3) if (oldest_snap is not None): snap_path = ('%s/%s' % (sub_vol, oldest_snap)) share_path = ('%s%s/%s' % (settings.MNT_PT, self.dest_pool, sname)) msg = ('Failed to promote the oldest Snapshot(%s) ' 'to Share(%s)' % (snap_path, share_path)) try: pool = Pool.objects.get(name=self.dest_pool) remove_share(pool, sname) set_property(snap_path, 'ro', 'false', mount=False) run_command(['/usr/bin/rm', '-rf', share_path], throw=False) shutil.move(snap_path, share_path) set_property(share_path, 'ro', 'true', mount=False) delete_snapshot(sname, oldest_snap, logger) except Exception, e: logger.error(msg) logger.exception(msg) else: logger.error('END_FAIL received for meta: %s. ' 'Terminating.' % self.meta) rp.terminate() data['receive_failed'] = ts data['status'] = 'failed' msg = ('Failed to update receive trail for rtid: %d' '. meta: %s' % (self.rtid, self.meta)) with self._clean_exit_handler(msg, ack=True): update_receive_trail(self.rtid, data, logger) break if (rp.poll() is None): rp.stdin.write(recv_data) rp.stdin.flush() else: logger.error('It seems the btrfs receive process died' ' unexpectedly.') out, err = rp.communicate() msg = ('Low level system error from btrfs receive ' 'command. out: %s err: %s for rtid: %s meta: %s' % (out, err, self.rtid, self.meta)) with self._clean_exit_handler(msg, ack=True): ts = datetime.utcnow().replace(tzinfo=utc) data = {'receive_failed': ts, 'status': 'failed', 'error': msg, } update_receive_trail(self.rtid, data, logger) except zmq.error.Again: recv_timeout_counter = recv_timeout_counter + 1 if (recv_timeout_counter > 600): logger.error('Nothing received in the last 60 seconds ' 'from the sender for meta: %s. Aborting.' % self.meta) self._sys_exit(3)
def run(self): logger.debug('Id: %s. Starting a new Receiver for meta: %s' % (self.identity, self.meta)) self.msg = ('Top level exception in receiver') latest_snap = None with self._clean_exit_handler(): self.law = APIWrapper() self.poll = zmq.Poller() self.dealer = self.ctx.socket(zmq.DEALER) self.dealer.setsockopt_string(zmq.IDENTITY, u'%s' % self.identity) self.dealer.set_hwm(10) self.dealer.connect('ipc://%s' % settings.REPLICATION.get('ipc_socket')) self.poll.register(self.dealer, zmq.POLLIN) self.ack = True self.msg = ('Failed to get the sender ip for appliance: %s' % self.sender_id) self.sender_ip = Appliance.objects.get(uuid=self.sender_id).ip if (not self.incremental): self.msg = ('Failed to verify/create share: %s.' % self.sname) self.create_share(self.sname, self.dest_pool) self.msg = ('Failed to create the replica metadata object ' 'for share: %s.' % self.sname) data = { 'share': self.sname, 'appliance': self.sender_ip, 'src_share': self.src_share, } self.rid = self.create_rshare(data) else: self.msg = ('Failed to retreive the replica metadata ' 'object for share: %s.' % self.sname) rso = ReplicaShare.objects.get(share=self.sname) self.rid = rso.id # Find and send the current snapshot to the sender. This will # be used as the start by btrfs-send diff. self.msg = ('Failed to verify latest replication snapshot ' 'on the system.') latest_snap = self._latest_snap(rso) self.msg = ('Failed to create receive trail for rid: %d' % self.rid) data = { 'snap_name': self.snap_name, } self.rtid = self.create_receive_trail(self.rid, data) # delete the share, move the oldest snap to share self.msg = ('Failed to promote the oldest Snapshot to Share.') oldest_snap = get_oldest_snap(self.snap_dir, self.num_retain_snaps, regex='_replication_') if (oldest_snap is not None): snap_path = ('%s/%s' % (self.snap_dir, oldest_snap)) share_path = ('%s%s/%s' % (settings.MNT_PT, self.dest_pool, self.sname)) pool = Pool.objects.get(name=self.dest_pool) remove_share(pool, self.sname, '-1/-1') set_property(snap_path, 'ro', 'false', mount=False) run_command(['/usr/bin/rm', '-rf', share_path], throw=False) shutil.move(snap_path, share_path) self.delete_snapshot(self.sname, oldest_snap) self.msg = ('Failed to prune old Snapshots') self._delete_old_snaps(self.sname, self.snap_dir, self.num_retain_snaps + 1) self.msg = ('Failed to validate the source share(%s) on ' 'sender(uuid: %s ' ') Did the ip of the sender change?' % (self.src_share, self.sender_id)) self.validate_src_share(self.sender_id, self.src_share) sub_vol = ('%s%s/%s' % (settings.MNT_PT, self.dest_pool, self.sname)) if (not is_subvol(sub_vol)): self.msg = ('Failed to create parent subvolume %s' % sub_vol) run_command([BTRFS, 'subvolume', 'create', sub_vol]) self.msg = ('Failed to create snapshot directory: %s' % self.snap_dir) run_command(['/usr/bin/mkdir', '-p', self.snap_dir]) snap_fp = ('%s/%s' % (self.snap_dir, self.snap_name)) # If the snapshot already exists, presumably from the previous # attempt and the sender tries to send the same, reply back with # snap_exists and do not start the btrfs-receive if (is_subvol(snap_fp)): logger.debug('Id: %s. Snapshot to be sent(%s) already ' 'exists. Not starting a new receive process' % (self.identity, snap_fp)) self._send_recv('snap-exists') self._sys_exit(0) cmd = [BTRFS, 'receive', self.snap_dir] self.msg = ('Failed to start the low level btrfs receive ' 'command(%s). Aborting.' % cmd) self.rp = subprocess.Popen(cmd, shell=False, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) self.msg = ('Failed to send receiver-ready') rcommand, rmsg = self._send_recv('receiver-ready', latest_snap or '') if (rcommand is None): logger.error('Id: %s. No response from the broker for ' 'receiver-ready command. Aborting.' % self.identity) self._sys_exit(3) term_commands = ( 'btrfs-send-init-error', 'btrfs-send-unexpected-termination-error', 'btrfs-send-nonzero-termination-error', ) num_tries = 10 poll_interval = 6000 # 6 seconds num_msgs = 0 t0 = time.time() while (True): socks = dict(self.poll.poll(poll_interval)) if (socks.get(self.dealer) == zmq.POLLIN): # reset to wait upto 60(poll_interval x num_tries # milliseconds) for every message num_tries = 10 command, message = self.dealer.recv_multipart() if (command == 'btrfs-send-stream-finished'): # this command concludes fsdata transfer. After this, # btrfs-recev process should be # terminated(.communicate). if (self.rp.poll() is None): self.msg = ('Failed to terminate btrfs-recv ' 'command') out, err = self.rp.communicate() out = out.split('\n') err = err.split('\n') logger.debug('Id: %s. Terminated btrfs-recv. ' 'cmd = %s out = %s err: %s rc: %s' % (self.identity, cmd, out, err, self.rp.returncode)) if (self.rp.returncode != 0): self.msg = ('btrfs-recv exited with unexpected ' 'exitcode(%s). ' % self.rp.returncode) raise Exception(self.msg) self._send_recv('btrfs-recv-finished') self.refresh_share_state() self.refresh_snapshot_state() self.msg = ('Failed to update receive trail for ' 'rtid: %d' % self.rtid) self.update_receive_trail(self.rtid, { 'status': 'succeeded', }) dsize, drate = self.size_report( self.total_bytes_received, t0) logger.debug('Id: %s. Receive complete. Total data ' 'transferred: %s. Rate: %s/sec.' % (self.identity, dsize, drate)) self._sys_exit(0) if (command in term_commands): self.msg = ('Terminal command(%s) received from the ' 'sender. Aborting.' % command) raise Exception(self.msg) if (self.rp.poll() is None): self.rp.stdin.write(message) self.rp.stdin.flush() # @todo: implement advanced credit request system. self.dealer.send_multipart([b'send-more', '']) num_msgs += 1 self.total_bytes_received += len(message) if (num_msgs == 1000): num_msgs = 0 dsize, drate = self.size_report( self.total_bytes_received, t0) logger.debug('Id: %s. Receiver alive. Data ' 'transferred: %s. Rate: %s/sec.' % (self.identity, dsize, drate)) else: out, err = self.rp.communicate() out = out.split('\n') err = err.split('\n') logger.error('Id: %s. btrfs-recv died unexpectedly. ' 'cmd: %s out: %s. err: %s' % (self.identity, cmd, out, err)) msg = ( 'Low level system error from btrfs receive ' 'command. cmd: %s out: %s err: %s for rtid: %s' % (cmd, out, err, self.rtid)) data = { 'status': 'failed', 'error': msg, } self.msg = ('Failed to update receive trail for ' 'rtid: %d.' % self.rtid) self.update_receive_trail(self.rtid, data) self.msg = msg raise Exception(self.msg) else: num_tries -= 1 msg = ('No response received from the broker. ' 'remaining tries: %d' % num_tries) logger.error('Id: %s. %s' % (self.identity, msg)) if (num_tries == 0): self.msg = ('%s. Terminating the receiver.' % msg) raise Exception(self.msg)