Example #1
0
 def validate_src_share(self, sender_uuid, sname):
     url = 'https://'
     if (self.raw is None):
         a = Appliance.objects.get(uuid=sender_uuid)
         url = ('%s%s:%s' % (url, a.ip, a.mgmt_port))
         self.raw = APIWrapper(client_id=a.client_id,
                               client_secret=a.client_secret,
                               url=url)
     return self.raw.api_call(url='shares/%s' % sname)
Example #2
0
 def validate_src_share(self, sender_uuid, sname):
     url = "https://"
     if self.raw is None:
         a = Appliance.objects.get(uuid=sender_uuid)
         url = "%s%s:%s" % (url, a.ip, a.mgmt_port)
         self.raw = APIWrapper(client_id=a.client_id,
                               client_secret=a.client_secret,
                               url=url)
     # TODO: update url to include senders shareId as sname is now invalid
     return self.raw.api_call(url="shares/%s" % sname)
Example #3
0
class ReplicationMixin(object):


    @classmethod
    def get_logger(cls):
        if (hasattr(cls, 'logger')):
            return cls.logger
        cls.logger = logging.getLogger()
        cls.logger.setLevel(logging.DEBUG)
        ch = logging.StreamHandler(sys.stdout)
        ch.setLevel(logging.DEBUG)
        formatter = logging.Formatter('[%(asctime)s] %(levelname)s [%(name)s:%(lineno)s] %(message)s', datefmt='%d/%b/%Y %H:%M:%S')
        ch.setFormatter(formatter)
        cls.logger.addHandler(ch)
        return cls.logger

    def validate_src_share(self, sender_uuid, sname):
        url = 'https://'
        if (self.raw is None):
            a = Appliance.objects.get(uuid=sender_uuid)
            url = ('%s%s:%s' % (url, a.ip, a.mgmt_port))
            self.raw = APIWrapper(client_id=a.client_id,
                                  client_secret=a.client_secret,
                                  url=url)
        return self.raw.api_call(url='shares/%s' % sname)

    def update_replica_status(self, rtid, data):
        try:
            url = ('sm/replicas/trail/%d' % rtid)
            return self.law.api_call(url, data=data, calltype='put')
        except Exception, e:
            msg = ('Exception while updating replica(%s) status to %s: %s' %
                   (url, data['status'], e.__str__()))
            raise Exception(msg)
Example #4
0
 def validate_src_share(self, sender_uuid, sname):
     url = 'https://'
     if (self.raw is None):
         a = Appliance.objects.get(uuid=sender_uuid)
         url = ('%s%s:%s' % (url, a.ip, a.mgmt_port))
         self.raw = APIWrapper(client_id=a.client_id,
                               client_secret=a.client_secret,
                               url=url)
     return self.raw.api_call(url='shares/%s' % sname)
Example #5
0
class ReplicationMixin(object):
    def validate_src_share(self, sender_uuid, sname):
        url = 'https://'
        if (self.raw is None):
            a = Appliance.objects.get(uuid=sender_uuid)
            url = ('%s%s:%s' % (url, a.ip, a.mgmt_port))
            self.raw = APIWrapper(client_id=a.client_id,
                                  client_secret=a.client_secret,
                                  url=url)
        return self.raw.api_call(url='shares/%s' % sname)

    def update_replica_status(self, rtid, data):
        try:
            url = ('sm/replicas/trail/%d' % rtid)
            return self.law.api_call(url, data=data, calltype='put')
        except Exception, e:
            msg = ('Exception while updating replica(%s) status to %s: %s' %
                   (url, data['status'], e.__str__()))
            raise Exception(msg)
Example #6
0
class ReplicationMixin(object):

    def validate_src_share(self, sender_uuid, sname):
        url = 'https://'
        if (self.raw is None):
            a = Appliance.objects.get(uuid=sender_uuid)
            url = ('%s%s:%s' % (url, a.ip, a.mgmt_port))
            self.raw = APIWrapper(client_id=a.client_id,
                                  client_secret=a.client_secret,
                                  url=url)
        return self.raw.api_call(url='shares/%s' % sname)

    def update_replica_status(self, rtid, data):
        try:
            url = ('sm/replicas/trail/%d' % rtid)
            return self.law.api_call(url, data=data, calltype='put')
        except Exception, e:
            msg = ('Exception while updating replica(%s) status to %s: %s' %
                   (url, data['status'], e.__str__()))
            raise Exception(msg)
Example #7
0
    def run(self):
        self.law = APIWrapper()

        try:
            so = Service.objects.get(name="replication")
            config_d = json.loads(so.config)
            self.listener_port = int(config_d["listener_port"])
            nco = NetworkConnection.objects.get(name=config_d["network_interface"])
            self.listener_interface = nco.ipaddr
        except NetworkConnection.DoesNotExist:
            self.listener_interface = "0.0.0.0"
        except Exception as e:
            msg = (
                "Failed to fetch network interface for Listner/Broker. "
                "Exception: %s" % e.__str__()
            )
            return logger.error(msg)

        try:
            self.uuid = Appliance.objects.get(current_appliance=True).uuid
        except Exception as e:
            msg = (
                "Failed to get uuid of current appliance. Aborting. "
                "Exception: %s" % e.__str__()
            )
            return logger.error(msg)

        ctx = zmq.Context()
        frontend = ctx.socket(zmq.ROUTER)
        frontend.set_hwm(10)
        frontend.bind("tcp://%s:%d" % (self.listener_interface, self.listener_port))

        backend = ctx.socket(zmq.ROUTER)
        backend.bind("ipc://%s" % settings.REPLICATION.get("ipc_socket"))

        poller = zmq.Poller()
        poller.register(frontend, zmq.POLLIN)
        poller.register(backend, zmq.POLLIN)
        self.local_receivers = {}

        iterations = 10
        poll_interval = 6000  # 6 seconds
        msg_count = 0
        while True:
            # This loop may still continue even if replication service
            # is terminated, as long as data is coming in.
            socks = dict(poller.poll(timeout=poll_interval))
            if frontend in socks and socks[frontend] == zmq.POLLIN:
                address, command, msg = frontend.recv_multipart()
                if address not in self.remote_senders:
                    self.remote_senders[address] = 1
                else:
                    self.remote_senders[address] += 1
                msg_count += 1
                if msg_count == 1000:
                    msg_count = 0
                    for rs, count in self.remote_senders.items():
                        logger.debug(
                            "Active Receiver: %s. Messages processed:"
                            "%d" % (rs, count)
                        )
                if command == "sender-ready":
                    logger.debug("initial greeting from %s" % address)
                    # Start a new receiver and send the appropriate response
                    try:
                        start_nr = True
                        if address in self.local_receivers:
                            start_nr = False
                            ecode = self.local_receivers[address].exitcode
                            if ecode is not None:
                                del self.local_receivers[address]
                                logger.debug(
                                    "Receiver(%s) exited. exitcode: "
                                    "%s. Forcing removal from broker "
                                    "list." % (address, ecode)
                                )
                                start_nr = True
                            else:
                                msg = (
                                    "Receiver(%s) already exists. "
                                    "Will not start a new one." % address
                                )
                                logger.error(msg)
                                # @todo: There may be a different way to handle
                                # this. For example, we can pass the message to
                                # the active receiver and factor into it's
                                # retry/robust logic. But that is for later.
                                frontend.send_multipart(
                                    [address, "receiver-init-error", msg]
                                )
                        if start_nr:
                            nr = Receiver(address, msg)
                            nr.daemon = True
                            nr.start()
                            logger.debug("New Receiver(%s) started." % address)
                            self.local_receivers[address] = nr
                        continue
                    except Exception as e:
                        msg = (
                            "Exception while starting the "
                            "new receiver for %s: %s" % (address, e.__str__())
                        )
                        logger.error(msg)
                        frontend.send_multipart([address, "receiver-init-error", msg])
                else:
                    # do we hit hwm? is the dealer still connected?
                    backend.send_multipart([address, command, msg])

            elif backend in socks and socks[backend] == zmq.POLLIN:
                address, command, msg = backend.recv_multipart()
                if command == "new-send":
                    rid = int(msg)
                    logger.debug("new-send request received for %d" % rid)
                    rcommand = "ERROR"
                    try:
                        replica = Replica.objects.get(id=rid)
                        if replica.enabled:
                            self._process_send(replica)
                            msg = (
                                "A new Sender started successfully for "
                                "Replication Task(%d)." % rid
                            )
                            rcommand = "SUCCESS"
                        else:
                            msg = (
                                "Failed to start a new Sender for "
                                "Replication "
                                "Task(%d) because it is disabled." % rid
                            )
                    except Exception as e:
                        msg = (
                            "Failed to start a new Sender for Replication "
                            "Task(%d). Exception: %s" % (rid, e.__str__())
                        )
                        logger.error(msg)
                    finally:
                        backend.send_multipart([address, rcommand, str(msg)])
                elif address in self.remote_senders:
                    if command in (
                        "receiver-ready",
                        "receiver-error",
                        "btrfs-recv-finished",
                    ):  # noqa E501
                        logger.debug("Identitiy: %s command: %s" % (address, command))
                        backend.send_multipart([address, b"ACK", ""])
                        # a new receiver has started. reply to the sender that
                        # must be waiting
                    frontend.send_multipart([address, command, msg])

            else:
                iterations -= 1
                if iterations == 0:
                    iterations = 10
                    self._prune_senders()
                    self._delete_receivers()
                    cur_time = time.time()
                    if (
                        self.trail_prune_time is None
                        or (cur_time - self.trail_prune_time) > 3600
                    ):
                        # prune send/receive trails every hour or so.
                        self.trail_prune_time = cur_time
                        map(self.prune_replica_trail, Replica.objects.filter())
                        map(self.prune_receive_trail, ReplicaShare.objects.filter())
                        logger.debug("Replica trails are truncated successfully.")

                    if os.getppid() != self.ppid:
                        logger.error("Parent exited. Aborting.")
                        ctx.destroy()
                        # do some cleanup of senders before quitting?
                        break
Example #8
0
class ReplicationMixin(object):
    def validate_src_share(self, sender_uuid, sname):
        url = 'https://'
        if (self.raw is None):
            a = Appliance.objects.get(uuid=sender_uuid)
            url = ('%s%s:%s' % (url, a.ip, a.mgmt_port))
            self.raw = APIWrapper(client_id=a.client_id,
                                  client_secret=a.client_secret,
                                  url=url)
        return self.raw.api_call(url='shares/%s' % sname)

    def update_replica_status(self, rtid, data):
        try:
            url = ('sm/replicas/trail/%d' % rtid)
            return self.law.api_call(url, data=data, calltype='put')
        except Exception as e:
            msg = ('Exception while updating replica(%s) status to %s: %s' %
                   (url, data['status'], e.__str__()))
            raise Exception(msg)

    def disable_replica(self, rid):
        try:
            url = ('sm/replicas/%d' % rid)
            headers = {
                'content-type': 'application/json',
            }
            return self.law.api_call(url,
                                     data={
                                         'enabled': False,
                                     },
                                     calltype='put',
                                     save_error=False,
                                     headers=headers)
        except Exception as e:
            msg = ('Exception while disabling replica(%s): %s' %
                   (url, e.__str__()))
            raise Exception(msg)

    def create_replica_trail(self, rid, snap_name):
        url = ('sm/replicas/trail/replica/%d' % rid)
        return self.law.api_call(url,
                                 data={
                                     'snap_name': snap_name,
                                 },
                                 calltype='post',
                                 save_error=False)

    def rshare_id(self, sname):
        url = ('sm/replicas/rshare/%s' % sname)
        rshare = self.law.api_call(url, save_error=False)
        return rshare['id']

    def create_rshare(self, data):
        try:
            url = 'sm/replicas/rshare'
            rshare = self.law.api_call(url,
                                       data=data,
                                       calltype='post',
                                       save_error=False)
            return rshare['id']
        except RockStorAPIException as e:
            if (e.detail == 'Replicashare(%s) already exists.' %
                    data['share']):  # noqa E501
                return self.rshare_id(data['share'])
            raise e

    def create_receive_trail(self, rid, data):
        url = ('sm/replicas/rtrail/rshare/%d' % rid)
        rt = self.law.api_call(url,
                               data=data,
                               calltype='post',
                               save_error=False)
        return rt['id']

    def update_receive_trail(self, rtid, data):
        url = ('sm/replicas/rtrail/%d' % rtid)
        try:
            return self.law.api_call(url,
                                     data=data,
                                     calltype='put',
                                     save_error=False)
        except Exception as e:
            msg = ('Exception while updating receive trail(%s): %s' %
                   (url, e.__str__()))
            raise Exception(msg)

    def prune_trail(self, url, days=7):
        try:
            data = {
                'days': days,
            }
            return self.law.api_call(url,
                                     data=data,
                                     calltype='delete',
                                     save_error=False)
        except Exception as e:
            msg = ('Exception while pruning trail for url(%s): %s' %
                   (url, e.__str__()))
            raise Exception(msg)

    def prune_receive_trail(self, ro):
        url = ('sm/replicas/rtrail/rshare/%d' % ro.id)
        return self.prune_trail(url)

    def prune_replica_trail(self, ro):
        url = ('sm/replicas/trail/replica/%d' % ro.id)
        return self.prune_trail(url)

    def create_snapshot(self, sname, snap_name, snap_type='replication'):
        try:
            url = ('shares/%s/snapshots/%s' % (sname, snap_name))
            return self.law.api_call(url,
                                     data={
                                         'snap_type': snap_type,
                                     },
                                     calltype='post',
                                     save_error=False)
        except RockStorAPIException as e:
            if (e.detail == ('Snapshot(%s) already exists for the Share(%s).' %
                             (snap_name, sname))):
                return logger.debug(e.detail)
            raise e

    def delete_snapshot(self, sname, snap_name):
        try:
            url = ('shares/%s/snapshots/%s' % (sname, snap_name))
            self.law.api_call(url, calltype='delete', save_error=False)
            return True
        except RockStorAPIException as e:
            if (e.detail == 'Snapshot(%s) does not exist.' % snap_name):
                logger.debug(e.detail)
                return False
            raise e

    def create_share(self, sname, pool):
        try:
            url = 'shares'
            data = {
                'pool': pool,
                'replica': True,
                'sname': sname,
            }
            headers = {
                'content-type': 'application/json',
            }
            return self.law.api_call(url,
                                     data=data,
                                     calltype='post',
                                     headers=headers,
                                     save_error=False)
        except RockStorAPIException as e:
            if (e.detail == 'Share(%s) already exists. Choose a different name'
                    % sname):  # noqa E501
                return logger.debug(e.detail)
            raise e

    def refresh_snapshot_state(self):
        try:
            return self.law.api_call('commands/refresh-snapshot-state',
                                     data=None,
                                     calltype='post',
                                     save_error=False)
        except Exception as e:
            logger.error('Exception while refreshing Snapshot state: %s' %
                         e.__str__())

    def refresh_share_state(self):
        try:
            return self.law.api_call('commands/refresh-share-state',
                                     data=None,
                                     calltype='post',
                                     save_error=False)
        except Exception as e:
            logger.error('Exception while refresh Shar state: %s' %
                         e.__str__())

    def humanize_bytes(self, num, units=(
        'Bytes',
        'KB',
        'MB',
        'GB',
    )):
        if (num < 1024 or len(units) == 1):
            return '%.2f %s' % (num, units[0])
        return self.humanize_bytes(num / 1024, units[1:])

    def size_report(self, num, t0):
        t1 = time.time()
        dsize = self.humanize_bytes(float(num))
        drate = self.humanize_bytes(float(num / (t1 - t0)))
        return dsize, drate
Example #9
0
class ReplicationMixin(object):
    def validate_src_share(self, sender_uuid, sname):
        url = 'https://'
        if (self.raw is None):
            a = Appliance.objects.get(uuid=sender_uuid)
            url = ('%s%s:%s' % (url, a.ip, a.mgmt_port))
            self.raw = APIWrapper(client_id=a.client_id,
                                  client_secret=a.client_secret,
                                  url=url)
        # TODO: update url to include senders shareId as sname is now invalid
        return self.raw.api_call(url='shares/%s' % sname)

    def update_replica_status(self, rtid, data):
        try:
            url = ('sm/replicas/trail/%d' % rtid)
            return self.law.api_call(url, data=data, calltype='put')
        except Exception as e:
            msg = ('Exception while updating replica(%s) status to %s: %s' %
                   (url, data['status'], e.__str__()))
            raise Exception(msg)

    def disable_replica(self, rid):
        try:
            url = ('sm/replicas/%d' % rid)
            headers = {
                'content-type': 'application/json',
            }
            return self.law.api_call(url,
                                     data={
                                         'enabled': False,
                                     },
                                     calltype='put',
                                     save_error=False,
                                     headers=headers)
        except Exception as e:
            msg = ('Exception while disabling replica(%s): %s' %
                   (url, e.__str__()))
            raise Exception(msg)

    def create_replica_trail(self, rid, snap_name):
        url = ('sm/replicas/trail/replica/%d' % rid)
        return self.law.api_call(url,
                                 data={
                                     'snap_name': snap_name,
                                 },
                                 calltype='post',
                                 save_error=False)

    def rshare_id(self, sname):
        url = ('sm/replicas/rshare/%s' % sname)
        rshare = self.law.api_call(url, save_error=False)
        return rshare['id']

    def create_rshare(self, data):
        try:
            url = 'sm/replicas/rshare'
            rshare = self.law.api_call(url,
                                       data=data,
                                       calltype='post',
                                       save_error=False)
            return rshare['id']
        except RockStorAPIException as e:
            # Note replica_share.py post() generates this exception message.
            if (e.detail == 'Replicashare(%s) already exists.' %
                    data['share']):  # noqa E501
                return self.rshare_id(data['share'])
            raise e

    def create_receive_trail(self, rid, data):
        url = ('sm/replicas/rtrail/rshare/%d' % rid)
        rt = self.law.api_call(url,
                               data=data,
                               calltype='post',
                               save_error=False)
        return rt['id']

    def update_receive_trail(self, rtid, data):
        url = ('sm/replicas/rtrail/%d' % rtid)
        try:
            return self.law.api_call(url,
                                     data=data,
                                     calltype='put',
                                     save_error=False)
        except Exception as e:
            msg = ('Exception while updating receive trail(%s): %s' %
                   (url, e.__str__()))
            raise Exception(msg)

    def prune_trail(self, url, days=7):
        try:
            data = {
                'days': days,
            }
            return self.law.api_call(url,
                                     data=data,
                                     calltype='delete',
                                     save_error=False)
        except Exception as e:
            msg = ('Exception while pruning trail for url(%s): %s' %
                   (url, e.__str__()))
            raise Exception(msg)

    def prune_receive_trail(self, ro):
        url = ('sm/replicas/rtrail/rshare/%d' % ro.id)
        return self.prune_trail(url)

    def prune_replica_trail(self, ro):
        url = ('sm/replicas/trail/replica/%d' % ro.id)
        return self.prune_trail(url)

    def create_snapshot(self, sname, snap_name, snap_type='replication'):
        try:
            share = Share.objects.get(name=sname)
            url = ('shares/%s/snapshots/%s' % (share.id, snap_name))
            return self.law.api_call(url,
                                     data={
                                         'snap_type': snap_type,
                                     },
                                     calltype='post',
                                     save_error=False)
        except RockStorAPIException as e:
            # Note snapshot.py _create() generates this exception message.
            if (e.detail == ('Snapshot ({}) already exists for the share '
                             '({}).').format(snap_name, sname)):
                return logger.debug(e.detail)
            raise e

    def update_repclone(self, sname, snap_name):
        """
        Call the dedicated create_repclone via it's url to supplant our
        share with the given snapshot. Intended for use in receive.py to turn
        the oldest snapshot into an existing share via unmount, mv, mount
        cycle.
        :param sname: Existing share name
        :param snap_name: Name of snapshot to supplant given share with.
        :return: False if there is a failure.
        """
        try:
            share = Share.objects.get(name=sname)
            url = 'shares/{}/snapshots/{}/repclone'.format(share.id, snap_name)
            return self.law.api_call(url, calltype='post', save_error=False)
        except RockStorAPIException as e:
            # TODO: need to look further at the following as command repclone
            # TODO: (snapshot.py post) catches Snapshot.DoesNotExist.
            # TODO: and doesn't appear to call _delete_snapshot()
            # Note snapshot.py _delete_snapshot() generates this exception msg.
            if (e.detail == 'Snapshot name ({}) does not '
                    'exist.'.format(snap_name)):
                logger.debug(e.detail)
                return False
            raise e

    def delete_snapshot(self, sname, snap_name):
        try:
            share = Share.objects.get(name=sname)
            url = ('shares/%s/snapshots/%s' % (share.id, snap_name))
            self.law.api_call(url, calltype='delete', save_error=False)
            return True
        except RockStorAPIException as e:
            # Note snapshot.py _delete_snapshot() generates this exception msg.
            if (e.detail == 'Snapshot name ({}) does not '
                    'exist.'.format(snap_name)):
                logger.debug(e.detail)
                return False
            raise e

    def create_share(self, sname, pool):
        try:
            url = 'shares'
            data = {
                'pool': pool,
                'replica': True,
                'sname': sname,
            }
            headers = {
                'content-type': 'application/json',
            }
            return self.law.api_call(url,
                                     data=data,
                                     calltype='post',
                                     headers=headers,
                                     save_error=False)
        except RockStorAPIException as e:
            # Note share.py post() generates this exception message.
            if (e.detail == 'Share ({}) already exists. Choose a different '
                    'name.'.format(sname)):  # noqa E501
                return logger.debug(e.detail)
            raise e

    def refresh_snapshot_state(self):
        try:
            return self.law.api_call('commands/refresh-snapshot-state',
                                     data=None,
                                     calltype='post',
                                     save_error=False)
        except Exception as e:
            logger.error('Exception while refreshing Snapshot state: %s' %
                         e.__str__())

    def refresh_share_state(self):
        try:
            return self.law.api_call('commands/refresh-share-state',
                                     data=None,
                                     calltype='post',
                                     save_error=False)
        except Exception as e:
            logger.error('Exception while refreshing Share state: %s' %
                         e.__str__())

    def humanize_bytes(self, num, units=(
        'Bytes',
        'KB',
        'MB',
        'GB',
    )):
        """
        Recursive routine to establish and then return the most appropriate
        num expression given the contents of units. Ie 1023 Bytes or 4096 KB
        :param num: Assumed to be in Byte units.
        :param units: list of units to recurse through
        :return: "1023 Bytes" or "4.28 KB" etc given num=1023 or num=4384 )
        """
        if (num < 1024 or len(units) == 1):
            return '%.2f %s' % (num, units[0])
        return self.humanize_bytes(num / 1024, units[1:])

    def size_report(self, num, t0):
        t1 = time.time()
        dsize = self.humanize_bytes(float(num))
        drate = self.humanize_bytes(float(num / (t1 - t0)))
        return dsize, drate
Example #10
0
class ReplicationMixin(object):
    def validate_src_share(self, sender_uuid, sname):
        url = "https://"
        if self.raw is None:
            a = Appliance.objects.get(uuid=sender_uuid)
            url = "%s%s:%s" % (url, a.ip, a.mgmt_port)
            self.raw = APIWrapper(client_id=a.client_id,
                                  client_secret=a.client_secret,
                                  url=url)
        # TODO: update url to include senders shareId as sname is now invalid
        return self.raw.api_call(url="shares/%s" % sname)

    def update_replica_status(self, rtid, data):
        try:
            url = "sm/replicas/trail/%d" % rtid
            return self.law.api_call(url, data=data, calltype="put")
        except Exception as e:
            msg = "Exception while updating replica(%s) status to %s: %s" % (
                url,
                data["status"],
                e.__str__(),
            )
            raise Exception(msg)

    def disable_replica(self, rid):
        try:
            url = "sm/replicas/%d" % rid
            headers = {
                "content-type": "application/json",
            }
            return self.law.api_call(
                url,
                data={
                    "enabled": False,
                },
                calltype="put",
                save_error=False,
                headers=headers,
            )
        except Exception as e:
            msg = "Exception while disabling replica(%s): %s" % (url,
                                                                 e.__str__())
            raise Exception(msg)

    def create_replica_trail(self, rid, snap_name):
        url = "sm/replicas/trail/replica/%d" % rid
        return self.law.api_call(url,
                                 data={
                                     "snap_name": snap_name,
                                 },
                                 calltype="post",
                                 save_error=False)

    def rshare_id(self, sname):
        url = "sm/replicas/rshare/%s" % sname
        rshare = self.law.api_call(url, save_error=False)
        return rshare["id"]

    def create_rshare(self, data):
        try:
            url = "sm/replicas/rshare"
            rshare = self.law.api_call(url,
                                       data=data,
                                       calltype="post",
                                       save_error=False)
            return rshare["id"]
        except RockStorAPIException as e:
            # Note replica_share.py post() generates this exception message.
            if (e.detail == "Replicashare(%s) already exists." %
                    data["share"]):  # noqa E501
                return self.rshare_id(data["share"])
            raise e

    def create_receive_trail(self, rid, data):
        url = "sm/replicas/rtrail/rshare/%d" % rid
        rt = self.law.api_call(url,
                               data=data,
                               calltype="post",
                               save_error=False)
        return rt["id"]

    def update_receive_trail(self, rtid, data):
        url = "sm/replicas/rtrail/%d" % rtid
        try:
            return self.law.api_call(url,
                                     data=data,
                                     calltype="put",
                                     save_error=False)
        except Exception as e:
            msg = "Exception while updating receive trail(%s): %s" % (
                url, e.__str__())
            raise Exception(msg)

    def prune_trail(self, url, days=7):
        try:
            data = {
                "days": days,
            }
            return self.law.api_call(url,
                                     data=data,
                                     calltype="delete",
                                     save_error=False)
        except Exception as e:
            msg = "Exception while pruning trail for url(%s): %s" % (
                url, e.__str__())
            raise Exception(msg)

    def prune_receive_trail(self, ro):
        url = "sm/replicas/rtrail/rshare/%d" % ro.id
        return self.prune_trail(url)

    def prune_replica_trail(self, ro):
        url = "sm/replicas/trail/replica/%d" % ro.id
        return self.prune_trail(url)

    def create_snapshot(self, sname, snap_name, snap_type="replication"):
        try:
            share = Share.objects.get(name=sname)
            url = "shares/%s/snapshots/%s" % (share.id, snap_name)
            return self.law.api_call(url,
                                     data={
                                         "snap_type": snap_type,
                                     },
                                     calltype="post",
                                     save_error=False)
        except RockStorAPIException as e:
            # Note snapshot.py _create() generates this exception message.
            if e.detail == (
                    "Snapshot ({}) already exists for the share ({}).").format(
                        snap_name, sname):
                return logger.debug(e.detail)
            raise e

    def update_repclone(self, sname, snap_name):
        """
        Call the dedicated create_repclone via it's url to supplant our
        share with the given snapshot. Intended for use in receive.py to turn
        the oldest snapshot into an existing share via unmount, mv, mount
        cycle.
        :param sname: Existing share name
        :param snap_name: Name of snapshot to supplant given share with.
        :return: False if there is a failure.
        """
        try:
            share = Share.objects.get(name=sname)
            url = "shares/{}/snapshots/{}/repclone".format(share.id, snap_name)
            return self.law.api_call(url, calltype="post", save_error=False)
        except RockStorAPIException as e:
            # TODO: need to look further at the following as command repclone
            # TODO: (snapshot.py post) catches Snapshot.DoesNotExist.
            # TODO: and doesn't appear to call _delete_snapshot()
            # Note snapshot.py _delete_snapshot() generates this exception msg.
            if e.detail == "Snapshot name ({}) does not exist.".format(
                    snap_name):
                logger.debug(e.detail)
                return False
            raise e

    def delete_snapshot(self, sname, snap_name):
        try:
            share = Share.objects.get(name=sname)
            url = "shares/%s/snapshots/%s" % (share.id, snap_name)
            self.law.api_call(url, calltype="delete", save_error=False)
            return True
        except RockStorAPIException as e:
            # Note snapshot.py _delete_snapshot() generates this exception msg.
            if e.detail == "Snapshot name ({}) does not exist.".format(
                    snap_name):
                logger.debug(e.detail)
                return False
            raise e

    def create_share(self, sname, pool):
        try:
            url = "shares"
            data = {
                "pool": pool,
                "replica": True,
                "sname": sname,
            }
            headers = {
                "content-type": "application/json",
            }
            return self.law.api_call(url,
                                     data=data,
                                     calltype="post",
                                     headers=headers,
                                     save_error=False)
        except RockStorAPIException as e:
            # Note share.py post() generates this exception message.
            if (e.detail == "Share ({}) already exists. Choose a different "
                    "name.".format(sname)):  # noqa E501
                return logger.debug(e.detail)
            raise e

    def refresh_snapshot_state(self):
        try:
            return self.law.api_call(
                "commands/refresh-snapshot-state",
                data=None,
                calltype="post",
                save_error=False,
            )
        except Exception as e:
            logger.error("Exception while refreshing Snapshot state: %s" %
                         e.__str__())

    def refresh_share_state(self):
        try:
            return self.law.api_call(
                "commands/refresh-share-state",
                data=None,
                calltype="post",
                save_error=False,
            )
        except Exception as e:
            logger.error("Exception while refreshing Share state: %s" %
                         e.__str__())

    def humanize_bytes(self, num, units=(
        "Bytes",
        "KB",
        "MB",
        "GB",
    )):
        """
        Recursive routine to establish and then return the most appropriate
        num expression given the contents of units. Ie 1023 Bytes or 4096 KB
        :param num: Assumed to be in Byte units.
        :param units: list of units to recurse through
        :return: "1023 Bytes" or "4.28 KB" etc given num=1023 or num=4384 )
        """
        if num < 1024 or len(units) == 1:
            return "%.2f %s" % (num, units[0])
        return self.humanize_bytes(num / 1024, units[1:])

    def size_report(self, num, t0):
        t1 = time.time()
        dsize = self.humanize_bytes(float(num))
        drate = self.humanize_bytes(float(num / (t1 - t0)))
        return dsize, drate
Example #11
0
    def run(self):
        logger.debug('Id: %s. Starting a new Receiver for meta: %s' %
                     (self.identity, self.meta))
        self.msg = ('Top level exception in receiver')
        latest_snap = None
        with self._clean_exit_handler():
            self.law = APIWrapper()
            self.poll = zmq.Poller()
            self.dealer = self.ctx.socket(zmq.DEALER)
            self.dealer.setsockopt_string(zmq.IDENTITY, u'%s' % self.identity)
            self.dealer.set_hwm(10)
            self.dealer.connect('ipc://%s' %
                                settings.REPLICATION.get('ipc_socket'))
            self.poll.register(self.dealer, zmq.POLLIN)

            self.ack = True
            self.msg = ('Failed to get the sender ip for appliance: %s' %
                        self.sender_id)
            self.sender_ip = Appliance.objects.get(uuid=self.sender_id).ip

            if (not self.incremental):
                self.msg = ('Failed to verify/create share: %s.' % self.sname)
                self.create_share(self.sname, self.dest_pool)

                self.msg = ('Failed to create the replica metadata object '
                            'for share: %s.' % self.sname)
                data = {
                    'share': self.sname,
                    'appliance': self.sender_ip,
                    'src_share': self.src_share,
                }
                self.rid = self.create_rshare(data)
            else:
                self.msg = ('Failed to retreive the replica metadata '
                            'object for share: %s.' % self.sname)
                rso = ReplicaShare.objects.get(share=self.sname)
                self.rid = rso.id
                # Find and send the current snapshot to the sender. This will
                # be used as the start by btrfs-send diff.
                self.msg = ('Failed to verify latest replication snapshot '
                            'on the system.')
                latest_snap = self._latest_snap(rso)

            self.msg = ('Failed to create receive trail for rid: %d' %
                        self.rid)
            data = {
                'snap_name': self.snap_name,
            }
            self.rtid = self.create_receive_trail(self.rid, data)

            # delete the share, move the oldest snap to share
            self.msg = ('Failed to promote the oldest Snapshot to Share.')
            oldest_snap = get_oldest_snap(self.snap_dir,
                                          self.num_retain_snaps,
                                          regex='_replication_')
            if (oldest_snap is not None):
                self.update_repclone(self.sname, oldest_snap)
                self.refresh_share_state()
                self.refresh_snapshot_state()

            self.msg = ('Failed to prune old Snapshots')
            self._delete_old_snaps(self.sname, self.snap_dir,
                                   self.num_retain_snaps + 1)

            # TODO: The following should be re-instantiated once we have a
            # TODO: working method for doing so. see validate_src_share.
            # self.msg = ('Failed to validate the source share(%s) on '
            #             'sender(uuid: %s '
            #             ') Did the ip of the sender change?' %
            #             (self.src_share, self.sender_id))
            # self.validate_src_share(self.sender_id, self.src_share)

            sub_vol = ('%s%s/%s' %
                       (settings.MNT_PT, self.dest_pool, self.sname))
            if (not is_subvol(sub_vol)):
                self.msg = ('Failed to create parent subvolume %s' % sub_vol)
                run_command([BTRFS, 'subvolume', 'create', sub_vol])

            self.msg = ('Failed to create snapshot directory: %s' %
                        self.snap_dir)
            run_command(['/usr/bin/mkdir', '-p', self.snap_dir])
            snap_fp = ('%s/%s' % (self.snap_dir, self.snap_name))

            # If the snapshot already exists, presumably from the previous
            # attempt and the sender tries to send the same, reply back with
            # snap_exists and do not start the btrfs-receive
            if (is_subvol(snap_fp)):
                logger.debug('Id: %s. Snapshot to be sent(%s) already '
                             'exists. Not starting a new receive process' %
                             (self.identity, snap_fp))
                self._send_recv('snap-exists')
                self._sys_exit(0)

            cmd = [BTRFS, 'receive', self.snap_dir]
            self.msg = ('Failed to start the low level btrfs receive '
                        'command(%s). Aborting.' % cmd)
            self.rp = subprocess.Popen(cmd,
                                       shell=False,
                                       stdin=subprocess.PIPE,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE)

            self.msg = ('Failed to send receiver-ready')
            rcommand, rmsg = self._send_recv('receiver-ready', latest_snap
                                             or '')
            if (rcommand is None):
                logger.error('Id: %s. No response from the broker for '
                             'receiver-ready command. Aborting.' %
                             self.identity)
                self._sys_exit(3)

            term_commands = (
                'btrfs-send-init-error',
                'btrfs-send-unexpected-termination-error',
                'btrfs-send-nonzero-termination-error',
            )
            num_tries = 10
            poll_interval = 6000  # 6 seconds
            num_msgs = 0
            t0 = time.time()
            while (True):
                socks = dict(self.poll.poll(poll_interval))
                if (socks.get(self.dealer) == zmq.POLLIN):
                    # reset to wait upto 60(poll_interval x num_tries
                    # milliseconds) for every message
                    num_tries = 10
                    command, message = self.dealer.recv_multipart()
                    if (command == 'btrfs-send-stream-finished'):
                        # this command concludes fsdata transfer. After this,
                        # btrfs-recev process should be
                        # terminated(.communicate).
                        if (self.rp.poll() is None):
                            self.msg = ('Failed to terminate btrfs-recv '
                                        'command')
                            out, err = self.rp.communicate()
                            out = out.split('\n')
                            err = err.split('\n')
                            logger.debug('Id: %s. Terminated btrfs-recv. '
                                         'cmd = %s out = %s err: %s rc: %s' %
                                         (self.identity, cmd, out, err,
                                          self.rp.returncode))
                        if (self.rp.returncode != 0):
                            self.msg = ('btrfs-recv exited with unexpected '
                                        'exitcode(%s). ' % self.rp.returncode)
                            raise Exception(self.msg)
                        data = {
                            'status': 'succeeded',
                            'kb_received': self.total_bytes_received / 1024,
                        }
                        self.msg = ('Failed to update receive trail for '
                                    'rtid: %d' % self.rtid)
                        self.update_receive_trail(self.rtid, data)

                        self._send_recv('btrfs-recv-finished')
                        self.refresh_share_state()
                        self.refresh_snapshot_state()

                        dsize, drate = self.size_report(
                            self.total_bytes_received, t0)
                        logger.debug('Id: %s. Receive complete. Total data '
                                     'transferred: %s. Rate: %s/sec.' %
                                     (self.identity, dsize, drate))
                        self._sys_exit(0)

                    if (command in term_commands):
                        self.msg = ('Terminal command(%s) received from the '
                                    'sender. Aborting.' % command)
                        raise Exception(self.msg)

                    if (self.rp.poll() is None):
                        self.rp.stdin.write(message)
                        self.rp.stdin.flush()
                        # @todo: implement advanced credit request system.
                        self.dealer.send_multipart([b'send-more', ''])
                        num_msgs += 1
                        self.total_bytes_received += len(message)
                        if (num_msgs == 1000):
                            num_msgs = 0
                            data = {
                                'status': 'pending',
                                'kb_received':
                                self.total_bytes_received / 1024,
                            }
                            self.update_receive_trail(self.rtid, data)

                            dsize, drate = self.size_report(
                                self.total_bytes_received, t0)
                            logger.debug('Id: %s. Receiver alive. Data '
                                         'transferred: %s. Rate: %s/sec.' %
                                         (self.identity, dsize, drate))
                    else:
                        out, err = self.rp.communicate()
                        out = out.split('\n')
                        err = err.split('\n')
                        logger.error('Id: %s. btrfs-recv died unexpectedly. '
                                     'cmd: %s out: %s. err: %s' %
                                     (self.identity, cmd, out, err))
                        msg = (
                            'Low level system error from btrfs receive '
                            'command. cmd: %s out: %s err: %s for rtid: %s' %
                            (cmd, out, err, self.rtid))
                        data = {
                            'status': 'failed',
                            'error': msg,
                        }
                        self.msg = ('Failed to update receive trail for '
                                    'rtid: %d.' % self.rtid)
                        self.update_receive_trail(self.rtid, data)
                        self.msg = msg
                        raise Exception(self.msg)
                else:
                    num_tries -= 1
                    msg = ('No response received from the broker. '
                           'remaining tries: %d' % num_tries)
                    logger.error('Id: %s. %s' % (self.identity, msg))
                    if (num_tries == 0):
                        self.msg = ('%s. Terminating the receiver.' % msg)
                        raise Exception(self.msg)
Example #12
0
    def run(self):

        self.msg = ('Top level exception in sender: %s' % self.identity)
        with self._clean_exit_handler():
            self.law = APIWrapper()
            self.poll = zmq.Poller()
            self._init_greeting()

            #  create a new replica trail if it's the very first time
            # or if the last one succeeded
            self.msg = ('Failed to create local replica trail for snap_name:'
                        ' %s. Aborting.' % self.snap_name)
            self.rt2 = self.create_replica_trail(self.replica.id,
                                                 self.snap_name)
            self.rt2_id = self.rt2['id']

            # prune old snapshots.
            self.update_trail = True
            self.msg = ('Failed to prune old snapshots')
            share_path = (
                '%s%s/.snapshots/%s' %
                (settings.MNT_PT, self.replica.pool, self.replica.share))
            self._delete_old_snaps(share_path)

            # Refresh replica trail.
            if (self.rt is not None):
                self.rt = self._refresh_rt()

            #  create a snapshot only if it's not already from a previous
            #  failed attempt.
            self.msg = ('Failed to create snapshot: %s. Aborting.' %
                        self.snap_name)
            self.create_snapshot(self.replica.share, self.snap_name)

            retries_left = 10
            poll_interval = 6000  # 6 seconds
            while (True):
                socks = dict(self.poll.poll(poll_interval))
                if (socks.get(self.send_req) == zmq.POLLIN):
                    # not really necessary because we just want one reply for
                    # now.
                    retries_left = 10
                    command, reply = self.send_req.recv_multipart()
                    if (command == 'receiver-ready'):
                        if (self.rt is not None):
                            self.rlatest_snap = reply
                            self.rt = self._refresh_rt()
                        logger.debug('Id: %s. command(%s) and message(%s) '
                                     'received. Proceeding to send fsdata.' %
                                     (self.identity, command, reply))
                        break
                    else:
                        if (command in 'receiver-init-error'):
                            self.msg = ('%s received for %s. extended reply: '
                                        '%s. Aborting.' %
                                        (command, self.identity, reply))
                        elif (command == 'snap-exists'):
                            logger.debug('Id: %s. %s received. Not sending '
                                         'fsdata' % (self.identity, command))
                            data = {
                                'status': 'succeeded',
                                'error':
                                'snapshot already exists on the receiver',
                            }  # noqa E501
                            self.msg = ('Failed to  update replica status for '
                                        '%s' % self.snap_id)
                            self.update_replica_status(self.rt2_id, data)
                            self._sys_exit(0)
                        else:
                            self.msg = ('unexpected reply(%s) for %s. '
                                        'extended reply: %s. Aborting' %
                                        (command, self.identity, reply))
                        raise Exception(self.msg)
                else:
                    retries_left -= 1
                    logger.debug('Id: %s. No response from receiver. Number '
                                 'of retry attempts left: %d' %
                                 (self.identity, retries_left))
                    if (retries_left == 0):
                        self.msg = ('Receiver(%s:%d) is unreachable. '
                                    'Aborting.' %
                                    (self.receiver_ip, self.receiver_port))
                        raise Exception(self.msg)
                    self.send_req.setsockopt(zmq.LINGER, 0)
                    self.send_req.close()
                    self.poll.unregister(self.send_req)
                    self._init_greeting()

            snap_path = ('%s%s/.snapshots/%s/%s' %
                         (settings.MNT_PT, self.replica.pool,
                          self.replica.share, self.snap_name))
            cmd = [BTRFS, 'send', snap_path]
            if (self.rt is not None):
                prev_snap = ('%s%s/.snapshots/%s/%s' %
                             (settings.MNT_PT, self.replica.pool,
                              self.replica.share, self.rt.snap_name))
                logger.info('Id: %s. Sending incremental replica between '
                            '%s -- %s' % (self.identity, prev_snap, snap_path))
                cmd = [BTRFS, 'send', '-p', prev_snap, snap_path]
            else:
                logger.info('Id: %s. Sending full replica: %s' %
                            (self.identity, snap_path))

            try:
                self.sp = subprocess.Popen(cmd,
                                           shell=False,
                                           stdout=subprocess.PIPE,
                                           stderr=subprocess.PIPE)
                fcntl.fcntl(self.sp.stdout.fileno(), fcntl.F_SETFL,
                            os.O_NONBLOCK)
            except Exception as e:
                self.msg = ('Failed to start the low level btrfs send '
                            'command(%s). Aborting. Exception: ' %
                            (cmd, e.__str__()))
                logger.error('Id: %s. %s' % (self.identity, self.msg))
                self._send_recv('btrfs-send-init-error')
                self._sys_exit(3)

            alive = True
            num_msgs = 0
            t0 = time.time()
            while (alive):
                try:
                    if (self.sp.poll() is not None):
                        logger.debug(
                            'Id: %s. send process finished '
                            'for %s. rc: %d. stderr: %s' %
                            (self.identity, self.snap_id, self.sp.returncode,
                             self.sp.stderr.read()))
                        alive = False
                    fs_data = self.sp.stdout.read()
                except IOError:
                    continue
                except Exception as e:
                    self.msg = ('Exception occured while reading low '
                                'level btrfs '
                                'send data for %s. Aborting.' % self.snap_id)
                    if (alive):
                        self.sp.terminate()
                    self.update_trail = True
                    self._send_recv('btrfs-send-unexpected-termination-error')
                    self._sys_exit(3)

                self.msg = ('Failed to send fsdata to the receiver for %s. '
                            'Aborting.' % (self.snap_id))
                self.update_trail = True
                command, message = self._send_recv('', fs_data)
                self.total_bytes_sent += len(fs_data)
                num_msgs += 1
                if (num_msgs == 1000):
                    num_msgs = 0
                    dsize, drate = self.size_report(self.total_bytes_sent, t0)
                    logger.debug('Id: %s Sender alive. Data transferred: '
                                 '%s. Rate: %s/sec.' %
                                 (self.identity, dsize, drate))
                if (command is None or command == 'receiver-error'):
                    # command is None when the remote side vanishes.
                    self.msg = ('Got null or error command(%s) message(%s) '
                                'from the Receiver while'
                                ' transmitting fsdata. Aborting.' %
                                (command, message))
                    raise Exception(message)

                if (not alive):
                    if (self.sp.returncode != 0):
                        # do we mark failed?
                        command, message = self._send_recv(
                            'btrfs-send-nonzero-termination-error')
                    else:
                        command, message = self._send_recv(
                            'btrfs-send-stream-finished')

                if (os.getppid() != self.ppid):
                    logger.error('Id: %s. Scheduler exited. Sender for %s '
                                 'cannot go on. '
                                 'Aborting.' % (self.identity, self.snap_id))
                    self._sys_exit(3)

            data = {
                'status': 'succeeded',
                'kb_sent': self.total_bytes_sent / 1024,
            }
            self.msg = ('Failed to update final replica status for %s'
                        '. Aborting.' % self.snap_id)
            self.update_replica_status(self.rt2_id, data)
            dsize, drate = self.size_report(self.total_bytes_sent, t0)
            logger.debug('Id: %s. Send complete. Total data transferred: %s.'
                         ' Rate: %s/sec.' % (self.identity, dsize, drate))
            self._sys_exit(0)
Example #13
0
    def run(self):
        logger.debug("Id: %s. Starting a new Receiver for meta: %s" %
                     (self.identity, self.meta))
        self.msg = "Top level exception in receiver"
        latest_snap = None
        with self._clean_exit_handler():
            self.law = APIWrapper()
            self.poll = zmq.Poller()
            self.dealer = self.ctx.socket(zmq.DEALER)
            self.dealer.setsockopt_string(zmq.IDENTITY, u"%s" % self.identity)
            self.dealer.set_hwm(10)
            self.dealer.connect("ipc://%s" %
                                settings.REPLICATION.get("ipc_socket"))
            self.poll.register(self.dealer, zmq.POLLIN)

            self.ack = True
            self.msg = "Failed to get the sender ip for appliance: %s" % self.sender_id
            self.sender_ip = Appliance.objects.get(uuid=self.sender_id).ip

            if not self.incremental:
                self.msg = "Failed to verify/create share: %s." % self.sname
                self.create_share(self.sname, self.dest_pool)

                self.msg = ("Failed to create the replica metadata object "
                            "for share: %s." % self.sname)
                data = {
                    "share": self.sname,
                    "appliance": self.sender_ip,
                    "src_share": self.src_share,
                }
                self.rid = self.create_rshare(data)
            else:
                self.msg = ("Failed to retreive the replica metadata "
                            "object for share: %s." % self.sname)
                rso = ReplicaShare.objects.get(share=self.sname)
                self.rid = rso.id
                # Find and send the current snapshot to the sender. This will
                # be used as the start by btrfs-send diff.
                self.msg = (
                    "Failed to verify latest replication snapshot on the system."
                )
                latest_snap = self._latest_snap(rso)

            self.msg = "Failed to create receive trail for rid: %d" % self.rid
            data = {
                "snap_name": self.snap_name,
            }
            self.rtid = self.create_receive_trail(self.rid, data)

            # delete the share, move the oldest snap to share
            self.msg = "Failed to promote the oldest Snapshot to Share."
            oldest_snap = get_oldest_snap(self.snap_dir,
                                          self.num_retain_snaps,
                                          regex="_replication_")
            if oldest_snap is not None:
                self.update_repclone(self.sname, oldest_snap)
                self.refresh_share_state()
                self.refresh_snapshot_state()

            self.msg = "Failed to prune old Snapshots"
            self._delete_old_snaps(self.sname, self.snap_dir,
                                   self.num_retain_snaps + 1)

            # TODO: The following should be re-instantiated once we have a
            # TODO: working method for doing so. see validate_src_share.
            # self.msg = ('Failed to validate the source share(%s) on '
            #             'sender(uuid: %s '
            #             ') Did the ip of the sender change?' %
            #             (self.src_share, self.sender_id))
            # self.validate_src_share(self.sender_id, self.src_share)

            sub_vol = "%s%s/%s" % (settings.MNT_PT, self.dest_pool, self.sname)
            if not is_subvol(sub_vol):
                self.msg = "Failed to create parent subvolume %s" % sub_vol
                run_command([BTRFS, "subvolume", "create", sub_vol])

            self.msg = "Failed to create snapshot directory: %s" % self.snap_dir
            run_command(["/usr/bin/mkdir", "-p", self.snap_dir])
            snap_fp = "%s/%s" % (self.snap_dir, self.snap_name)

            # If the snapshot already exists, presumably from the previous
            # attempt and the sender tries to send the same, reply back with
            # snap_exists and do not start the btrfs-receive
            if is_subvol(snap_fp):
                logger.debug("Id: %s. Snapshot to be sent(%s) already "
                             "exists. Not starting a new receive process" %
                             (self.identity, snap_fp))
                self._send_recv("snap-exists")
                self._sys_exit(0)

            cmd = [BTRFS, "receive", self.snap_dir]
            self.msg = ("Failed to start the low level btrfs receive "
                        "command(%s). Aborting." % cmd)
            self.rp = subprocess.Popen(
                cmd,
                shell=False,
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
            )

            self.msg = "Failed to send receiver-ready"
            rcommand, rmsg = self._send_recv("receiver-ready", latest_snap
                                             or "")
            if rcommand is None:
                logger.error("Id: %s. No response from the broker for "
                             "receiver-ready command. Aborting." %
                             self.identity)
                self._sys_exit(3)

            term_commands = (
                "btrfs-send-init-error",
                "btrfs-send-unexpected-termination-error",
                "btrfs-send-nonzero-termination-error",
            )
            num_tries = 10
            poll_interval = 6000  # 6 seconds
            num_msgs = 0
            t0 = time.time()
            while True:
                socks = dict(self.poll.poll(poll_interval))
                if socks.get(self.dealer) == zmq.POLLIN:
                    # reset to wait upto 60(poll_interval x num_tries
                    # milliseconds) for every message
                    num_tries = 10
                    command, message = self.dealer.recv_multipart()
                    if command == "btrfs-send-stream-finished":
                        # this command concludes fsdata transfer. After this,
                        # btrfs-recev process should be
                        # terminated(.communicate).
                        if self.rp.poll() is None:
                            self.msg = "Failed to terminate btrfs-recv command"
                            out, err = self.rp.communicate()
                            out = out.split("\n")
                            err = err.split("\n")
                            logger.debug("Id: %s. Terminated btrfs-recv. "
                                         "cmd = %s out = %s err: %s rc: %s" %
                                         (self.identity, cmd, out, err,
                                          self.rp.returncode))
                        if self.rp.returncode != 0:
                            self.msg = ("btrfs-recv exited with unexpected "
                                        "exitcode(%s). " % self.rp.returncode)
                            raise Exception(self.msg)
                        data = {
                            "status": "succeeded",
                            "kb_received": self.total_bytes_received / 1024,
                        }
                        self.msg = (
                            "Failed to update receive trail for rtid: %d" %
                            self.rtid)
                        self.update_receive_trail(self.rtid, data)

                        self._send_recv("btrfs-recv-finished")
                        self.refresh_share_state()
                        self.refresh_snapshot_state()

                        dsize, drate = self.size_report(
                            self.total_bytes_received, t0)
                        logger.debug("Id: %s. Receive complete. Total data "
                                     "transferred: %s. Rate: %s/sec." %
                                     (self.identity, dsize, drate))
                        self._sys_exit(0)

                    if command in term_commands:
                        self.msg = ("Terminal command(%s) received from the "
                                    "sender. Aborting." % command)
                        raise Exception(self.msg)

                    if self.rp.poll() is None:
                        self.rp.stdin.write(message)
                        self.rp.stdin.flush()
                        # @todo: implement advanced credit request system.
                        self.dealer.send_multipart([b"send-more", ""])
                        num_msgs += 1
                        self.total_bytes_received += len(message)
                        if num_msgs == 1000:
                            num_msgs = 0
                            data = {
                                "status": "pending",
                                "kb_received":
                                self.total_bytes_received / 1024,
                            }
                            self.update_receive_trail(self.rtid, data)

                            dsize, drate = self.size_report(
                                self.total_bytes_received, t0)
                            logger.debug("Id: %s. Receiver alive. Data "
                                         "transferred: %s. Rate: %s/sec." %
                                         (self.identity, dsize, drate))
                    else:
                        out, err = self.rp.communicate()
                        out = out.split("\n")
                        err = err.split("\n")
                        logger.error("Id: %s. btrfs-recv died unexpectedly. "
                                     "cmd: %s out: %s. err: %s" %
                                     (self.identity, cmd, out, err))
                        msg = (
                            "Low level system error from btrfs receive "
                            "command. cmd: %s out: %s err: %s for rtid: %s" %
                            (cmd, out, err, self.rtid))
                        data = {
                            "status": "failed",
                            "error": msg,
                        }
                        self.msg = ("Failed to update receive trail for "
                                    "rtid: %d." % self.rtid)
                        self.update_receive_trail(self.rtid, data)
                        self.msg = msg
                        raise Exception(self.msg)
                else:
                    num_tries -= 1
                    msg = ("No response received from the broker. "
                           "remaining tries: %d" % num_tries)
                    logger.error("Id: %s. %s" % (self.identity, msg))
                    if num_tries == 0:
                        self.msg = "%s. Terminating the receiver." % msg
                        raise Exception(self.msg)
Example #14
0
class ReplicationMixin(object):

    def validate_src_share(self, sender_uuid, sname):
        url = 'https://'
        if (self.raw is None):
            a = Appliance.objects.get(uuid=sender_uuid)
            url = ('%s%s:%s' % (url, a.ip, a.mgmt_port))
            self.raw = APIWrapper(client_id=a.client_id,
                                  client_secret=a.client_secret,
                                  url=url)
        return self.raw.api_call(url='shares/%s' % sname)

    def update_replica_status(self, rtid, data):
        try:
            url = ('sm/replicas/trail/%d' % rtid)
            return self.law.api_call(url, data=data, calltype='put')
        except Exception as e:
            msg = ('Exception while updating replica(%s) status to %s: %s' %
                   (url, data['status'], e.__str__()))
            raise Exception(msg)

    def disable_replica(self, rid):
        try:
            url = ('sm/replicas/%d' % rid)
            headers = {'content-type': 'application/json', }
            return self.law.api_call(url, data={'enabled': False, },
                                     calltype='put', save_error=False,
                                     headers=headers)
        except Exception as e:
            msg = ('Exception while disabling replica(%s): %s' %
                   (url, e.__str__()))
            raise Exception(msg)

    def create_replica_trail(self, rid, snap_name):
        url = ('sm/replicas/trail/replica/%d' % rid)
        return self.law.api_call(url, data={'snap_name': snap_name, },
                                 calltype='post', save_error=False)

    def rshare_id(self, sname):
        url = ('sm/replicas/rshare/%s' % sname)
        rshare = self.law.api_call(url, save_error=False)
        return rshare['id']

    def create_rshare(self, data):
        try:
            url = 'sm/replicas/rshare'
            rshare = self.law.api_call(url, data=data, calltype='post',
                                       save_error=False)
            return rshare['id']
        except RockStorAPIException as e:
            if (e.detail == 'Replicashare(%s) already exists.' % data['share']):  # noqa E501
                return self.rshare_id(data['share'])
            raise e

    def create_receive_trail(self, rid, data):
        url = ('sm/replicas/rtrail/rshare/%d' % rid)
        rt = self.law.api_call(url, data=data, calltype='post',
                               save_error=False)
        return rt['id']

    def update_receive_trail(self, rtid, data):
        url = ('sm/replicas/rtrail/%d' % rtid)
        try:
            return self.law.api_call(url, data=data, calltype='put',
                                     save_error=False)
        except Exception as e:
            msg = ('Exception while updating receive trail(%s): %s' %
                   (url, e.__str__()))
            raise Exception(msg)

    def prune_trail(self, url, days=7):
        try:
            data = {'days': days, }
            return self.law.api_call(url, data=data, calltype='delete',
                                     save_error=False)
        except Exception as e:
            msg = ('Exception while pruning trail for url(%s): %s'
                   % (url, e.__str__()))
            raise Exception(msg)

    def prune_receive_trail(self, ro):
        url = ('sm/replicas/rtrail/rshare/%d' % ro.id)
        return self.prune_trail(url)

    def prune_replica_trail(self, ro):
        url = ('sm/replicas/trail/replica/%d' % ro.id)
        return self.prune_trail(url)

    def create_snapshot(self, sname, snap_name, snap_type='replication'):
        try:
            url = ('shares/%s/snapshots/%s' % (sname, snap_name))
            return self.law.api_call(url, data={'snap_type': snap_type, },
                                     calltype='post', save_error=False)
        except RockStorAPIException as e:
            if (e.detail == ('Snapshot(%s) already exists for the Share(%s).' %
                             (snap_name, sname))):
                return logger.debug(e.detail)
            raise e

    def delete_snapshot(self, sname, snap_name):
        try:
            url = ('shares/%s/snapshots/%s' % (sname, snap_name))
            self.law.api_call(url, calltype='delete', save_error=False)
            return True
        except RockStorAPIException as e:
            if (e.detail == 'Snapshot(%s) does not exist.' % snap_name):
                logger.debug(e.detail)
                return False
            raise e

    def create_share(self, sname, pool):
        try:
            url = 'shares'
            data = {'pool': pool,
                    'replica': True,
                    'sname': sname, }
            headers = {'content-type': 'application/json', }
            return self.law.api_call(url, data=data, calltype='post',
                                     headers=headers, save_error=False)
        except RockStorAPIException as e:
            if (e.detail == 'Share(%s) already exists. Choose a different name' % sname):  # noqa E501
                return logger.debug(e.detail)
            raise e

    def refresh_snapshot_state(self):
        try:
            return self.law.api_call('commands/refresh-snapshot-state',
                                     data=None, calltype='post',
                                     save_error=False)
        except Exception as e:
            logger.error('Exception while refreshing Snapshot state: %s'
                         % e.__str__())

    def refresh_share_state(self):
        try:
            return self.law.api_call('commands/refresh-share-state', data=None,
                                     calltype='post', save_error=False)
        except Exception as e:
            logger.error('Exception while refresh Shar state: %s'
                         % e.__str__())

    def humanize_bytes(self, num, units=('Bytes', 'KB', 'MB', 'GB',)):
        if (num < 1024 or len(units) == 1):
            return '%.2f %s' % (num, units[0])
        return self.humanize_bytes(num/1024, units[1:])

    def size_report(self, num, t0):
        t1 = time.time()
        dsize = self.humanize_bytes(float(num))
        drate = self.humanize_bytes(float(num/(t1 - t0)))
        return dsize, drate
Example #15
0
    def run(self):

        self.msg = "Top level exception in sender: %s" % self.identity
        with self._clean_exit_handler():
            self.law = APIWrapper()
            self.poll = zmq.Poller()
            self._init_greeting()

            #  create a new replica trail if it's the very first time
            # or if the last one succeeded
            self.msg = ("Failed to create local replica trail for snap_name:"
                        " %s. Aborting." % self.snap_name)
            self.rt2 = self.create_replica_trail(self.replica.id,
                                                 self.snap_name)
            self.rt2_id = self.rt2["id"]

            # prune old snapshots.
            self.update_trail = True
            self.msg = "Failed to prune old snapshots"
            share_path = "%s%s/.snapshots/%s" % (
                settings.MNT_PT,
                self.replica.pool,
                self.replica.share,
            )
            self._delete_old_snaps(share_path)

            # Refresh replica trail.
            if self.rt is not None:
                self.rt = self._refresh_rt()

            #  create a snapshot only if it's not already from a previous
            #  failed attempt.
            # TODO: If one does exist we fail which seems harsh as we may be
            # TODO: able to pickup where we left of depending on the failure.
            self.msg = "Failed to create snapshot: %s. Aborting." % self.snap_name
            self.create_snapshot(self.replica.share, self.snap_name)

            retries_left = settings.REPLICATION.get("max_send_attempts")

            poll_interval = 6000  # 6 seconds
            while True:
                socks = dict(self.poll.poll(poll_interval))
                if socks.get(self.send_req) == zmq.POLLIN:
                    # not really necessary because we just want one reply for
                    # now.
                    retries_left = settings.REPLICATION.get(
                        "max_send_attempts")
                    command, reply = self.send_req.recv_multipart()
                    if command == "receiver-ready":
                        if self.rt is not None:
                            self.rlatest_snap = reply
                            self.rt = self._refresh_rt()
                        logger.debug("Id: %s. command(%s) and message(%s) "
                                     "received. Proceeding to send fsdata." %
                                     (self.identity, command, reply))
                        break
                    else:
                        if command in "receiver-init-error":
                            self.msg = ("%s received for %s. extended reply: "
                                        "%s. Aborting." %
                                        (command, self.identity, reply))
                        elif command == "snap-exists":
                            logger.debug("Id: %s. %s received. Not sending "
                                         "fsdata" % (self.identity, command))
                            data = {
                                "status": "succeeded",
                                "error":
                                "snapshot already exists on the receiver",
                            }  # noqa E501
                            self.msg = ("Failed to  update replica status for "
                                        "%s" % self.snap_id)
                            self.update_replica_status(self.rt2_id, data)
                            self._sys_exit(0)
                        else:
                            self.msg = ("unexpected reply(%s) for %s. "
                                        "extended reply: %s. Aborting" %
                                        (command, self.identity, reply))
                        raise Exception(self.msg)
                else:
                    retries_left -= 1
                    logger.debug("Id: %s. No response from receiver. Number "
                                 "of retry attempts left: %d" %
                                 (self.identity, retries_left))
                    if retries_left == 0:
                        self.msg = "Receiver(%s:%d) is unreachable. Aborting." % (
                            self.receiver_ip,
                            self.receiver_port,
                        )
                        raise Exception(self.msg)
                    self.send_req.setsockopt(zmq.LINGER, 0)
                    self.send_req.close()
                    self.poll.unregister(self.send_req)
                    self._init_greeting()

            snap_path = "%s%s/.snapshots/%s/%s" % (
                settings.MNT_PT,
                self.replica.pool,
                self.replica.share,
                self.snap_name,
            )
            cmd = [BTRFS, "send", snap_path]
            if self.rt is not None:
                prev_snap = "%s%s/.snapshots/%s/%s" % (
                    settings.MNT_PT,
                    self.replica.pool,
                    self.replica.share,
                    self.rt.snap_name,
                )
                logger.info("Id: %s. Sending incremental replica between "
                            "%s -- %s" % (self.identity, prev_snap, snap_path))
                cmd = [BTRFS, "send", "-p", prev_snap, snap_path]
            else:
                logger.info("Id: %s. Sending full replica: %s" %
                            (self.identity, snap_path))

            try:
                self.sp = subprocess.Popen(cmd,
                                           shell=False,
                                           stdout=subprocess.PIPE,
                                           stderr=subprocess.PIPE)
                fcntl.fcntl(self.sp.stdout.fileno(), fcntl.F_SETFL,
                            os.O_NONBLOCK)
            except Exception as e:
                self.msg = ("Failed to start the low level btrfs send "
                            "command(%s). Aborting. Exception: " %
                            (cmd, e.__str__()))
                logger.error("Id: %s. %s" % (self.identity, self.msg))
                self._send_recv("btrfs-send-init-error")
                self._sys_exit(3)

            alive = True
            num_msgs = 0
            t0 = time.time()
            while alive:
                try:
                    if self.sp.poll() is not None:
                        logger.debug("Id: %s. send process finished "
                                     "for %s. rc: %d. stderr: %s" % (
                                         self.identity,
                                         self.snap_id,
                                         self.sp.returncode,
                                         self.sp.stderr.read(),
                                     ))
                        alive = False
                    fs_data = self.sp.stdout.read()
                except IOError:
                    continue
                except Exception as e:
                    self.msg = ("Exception occurred while reading low "
                                "level btrfs "
                                "send data for %s. Aborting." % self.snap_id)
                    if alive:
                        self.sp.terminate()
                    self.update_trail = True
                    self._send_recv("btrfs-send-unexpected-termination-error")
                    self._sys_exit(3)

                self.msg = ("Failed to send fsdata to the receiver for %s. "
                            "Aborting." % (self.snap_id))
                self.update_trail = True
                command, message = self._send_recv("", fs_data)
                self.total_bytes_sent += len(fs_data)
                num_msgs += 1
                if num_msgs == 1000:
                    num_msgs = 0
                    dsize, drate = self.size_report(self.total_bytes_sent, t0)
                    logger.debug("Id: %s Sender alive. Data transferred: "
                                 "%s. Rate: %s/sec." %
                                 (self.identity, dsize, drate))
                if command is None or command == "receiver-error":
                    # command is None when the remote side vanishes.
                    self.msg = ("Got null or error command(%s) message(%s) "
                                "from the Receiver while"
                                " transmitting fsdata. Aborting." %
                                (command, message))
                    raise Exception(message)

                if not alive:
                    if self.sp.returncode != 0:
                        # do we mark failed?
                        command, message = self._send_recv(
                            "btrfs-send-nonzero-termination-error")
                    else:
                        command, message = self._send_recv(
                            "btrfs-send-stream-finished")

                if os.getppid() != self.ppid:
                    logger.error("Id: %s. Scheduler exited. Sender for %s "
                                 "cannot go on. "
                                 "Aborting." % (self.identity, self.snap_id))
                    self._sys_exit(3)

            data = {
                "status": "succeeded",
                "kb_sent": self.total_bytes_sent / 1024,
            }
            self.msg = ("Failed to update final replica status for %s"
                        ". Aborting." % self.snap_id)
            self.update_replica_status(self.rt2_id, data)
            dsize, drate = self.size_report(self.total_bytes_sent, t0)
            logger.debug("Id: %s. Send complete. Total data transferred: %s."
                         " Rate: %s/sec." % (self.identity, dsize, drate))
            self._sys_exit(0)
Example #16
0
class ReplicationMixin(object):

    def validate_src_share(self, sender_uuid, sname):
        url = 'https://'
        if (self.raw is None):
            a = Appliance.objects.get(uuid=sender_uuid)
            url = ('%s%s:%s' % (url, a.ip, a.mgmt_port))
            self.raw = APIWrapper(client_id=a.client_id,
                                  client_secret=a.client_secret,
                                  url=url)
        # TODO: update url to include senders shareId as sname is now invalid
        return self.raw.api_call(url='shares/%s' % sname)

    def update_replica_status(self, rtid, data):
        try:
            url = ('sm/replicas/trail/%d' % rtid)
            return self.law.api_call(url, data=data, calltype='put')
        except Exception as e:
            msg = ('Exception while updating replica(%s) status to %s: %s' %
                   (url, data['status'], e.__str__()))
            raise Exception(msg)

    def disable_replica(self, rid):
        try:
            url = ('sm/replicas/%d' % rid)
            headers = {'content-type': 'application/json', }
            return self.law.api_call(url, data={'enabled': False, },
                                     calltype='put', save_error=False,
                                     headers=headers)
        except Exception as e:
            msg = ('Exception while disabling replica(%s): %s' %
                   (url, e.__str__()))
            raise Exception(msg)

    def create_replica_trail(self, rid, snap_name):
        url = ('sm/replicas/trail/replica/%d' % rid)
        return self.law.api_call(url, data={'snap_name': snap_name, },
                                 calltype='post', save_error=False)

    def rshare_id(self, sname):
        url = ('sm/replicas/rshare/%s' % sname)
        rshare = self.law.api_call(url, save_error=False)
        return rshare['id']

    def create_rshare(self, data):
        try:
            url = 'sm/replicas/rshare'
            rshare = self.law.api_call(url, data=data, calltype='post',
                                       save_error=False)
            return rshare['id']
        except RockStorAPIException as e:
            # Note replica_share.py post() generates this exception message.
            if (e.detail == 'Replicashare(%s) already exists.' % data['share']):  # noqa E501
                return self.rshare_id(data['share'])
            raise e

    def create_receive_trail(self, rid, data):
        url = ('sm/replicas/rtrail/rshare/%d' % rid)
        rt = self.law.api_call(url, data=data, calltype='post',
                               save_error=False)
        return rt['id']

    def update_receive_trail(self, rtid, data):
        url = ('sm/replicas/rtrail/%d' % rtid)
        try:
            return self.law.api_call(url, data=data, calltype='put',
                                     save_error=False)
        except Exception as e:
            msg = ('Exception while updating receive trail(%s): %s' %
                   (url, e.__str__()))
            raise Exception(msg)

    def prune_trail(self, url, days=7):
        try:
            data = {'days': days, }
            return self.law.api_call(url, data=data, calltype='delete',
                                     save_error=False)
        except Exception as e:
            msg = ('Exception while pruning trail for url(%s): %s'
                   % (url, e.__str__()))
            raise Exception(msg)

    def prune_receive_trail(self, ro):
        url = ('sm/replicas/rtrail/rshare/%d' % ro.id)
        return self.prune_trail(url)

    def prune_replica_trail(self, ro):
        url = ('sm/replicas/trail/replica/%d' % ro.id)
        return self.prune_trail(url)

    def create_snapshot(self, sname, snap_name, snap_type='replication'):
        try:
            share = Share.objects.get(name=sname)
            url = ('shares/%s/snapshots/%s' % (share.id, snap_name))
            return self.law.api_call(url, data={'snap_type': snap_type, },
                                     calltype='post', save_error=False)
        except RockStorAPIException as e:
            # Note snapshot.py _create() generates this exception message.
            if (e.detail == ('Snapshot ({}) already exists for the share '
                             '({}).').format(snap_name, sname)):
                return logger.debug(e.detail)
            raise e

    def update_repclone(self, sname, snap_name):
        """
        Call the dedicated create_repclone via it's url to supplant our
        share with the given snapshot. Intended for use in receive.py to turn
        the oldest snapshot into an existing share via unmount, mv, mount
        cycle.
        :param sname: Existing share name
        :param snap_name: Name of snapshot to supplant given share with.
        :return: False if there is a failure.
        """
        try:
            share = Share.objects.get(name=sname)
            url = 'shares/{}/snapshots/{}/repclone'.format(share.id, snap_name)
            return self.law.api_call(url, calltype='post', save_error=False)
        except RockStorAPIException as e:
            # TODO: need to look further at the following as command repclone
            # TODO: (snapshot.py post) catches Snapshot.DoesNotExist.
            # TODO: and doesn't appear to call _delete_snapshot()
            # Note snapshot.py _delete_snapshot() generates this exception msg.
            if (e.detail == 'Snapshot name ({}) does not '
                            'exist.'.format(snap_name)):
                logger.debug(e.detail)
                return False
            raise e

    def delete_snapshot(self, sname, snap_name):
        try:
            share = Share.objects.get(name=sname)
            url = ('shares/%s/snapshots/%s' % (share.id, snap_name))
            self.law.api_call(url, calltype='delete', save_error=False)
            return True
        except RockStorAPIException as e:
            # Note snapshot.py _delete_snapshot() generates this exception msg.
            if (e.detail == 'Snapshot name ({}) does not '
                            'exist.'.format(snap_name)):
                logger.debug(e.detail)
                return False
            raise e

    def create_share(self, sname, pool):
        try:
            url = 'shares'
            data = {'pool': pool,
                    'replica': True,
                    'sname': sname, }
            headers = {'content-type': 'application/json', }
            return self.law.api_call(url, data=data, calltype='post',
                                     headers=headers, save_error=False)
        except RockStorAPIException as e:
            # Note share.py post() generates this exception message.
            if (e.detail == 'Share ({}) already exists. Choose a different '
                            'name.'.format(sname)):  # noqa E501
                return logger.debug(e.detail)
            raise e

    def refresh_snapshot_state(self):
        try:
            return self.law.api_call('commands/refresh-snapshot-state',
                                     data=None, calltype='post',
                                     save_error=False)
        except Exception as e:
            logger.error('Exception while refreshing Snapshot state: %s'
                         % e.__str__())

    def refresh_share_state(self):
        try:
            return self.law.api_call('commands/refresh-share-state', data=None,
                                     calltype='post', save_error=False)
        except Exception as e:
            logger.error('Exception while refreshing Share state: %s'
                         % e.__str__())

    def humanize_bytes(self, num, units=('Bytes', 'KB', 'MB', 'GB',)):
        """
        Recursive routine to establish and then return the most appropriate
        num expression given the contents of units. Ie 1023 Bytes or 4096 KB
        :param num: Assumed to be in Byte units.
        :param units: list of units to recurse through
        :return: "1023 Bytes" or "4.28 KB" etc given num=1023 or num=4384 )
        """
        if (num < 1024 or len(units) == 1):
            return '%.2f %s' % (num, units[0])
        return self.humanize_bytes(num/1024, units[1:])

    def size_report(self, num, t0):
        t1 = time.time()
        dsize = self.humanize_bytes(float(num))
        drate = self.humanize_bytes(float(num/(t1 - t0)))
        return dsize, drate