Ejemplo n.º 1
0
 def setUp(self):
     super(TestClientLock, self).setUp()
     self.locker = etcd.Lock(self.client, 'test_lock')
Ejemplo n.º 2
0
 def get_lock(self, *args, **kwargs):
     return etcd.Lock(self, *args, **kwargs)
Ejemplo n.º 3
0
    def run(self):
        args = {}
        vol = NS.gluster.objects.Volume(
            vol_id=self.parameters['Volume.vol_id']).load()
        if self.parameters.get('Volume.replica_count') is not None:
            args.update(
                {"replica_count": self.parameters.get('Volume.replica_count')})
            if vol.replica_count != self.parameters.get(
                    'Volume.replica_count'):
                args.update({"decrease_replica_count": True})
        elif self.parameters.get('Volume.disperse_count') is not None:
            args.update({
                "disperse_count":
                self.parameters.get('Volume.disperse_count')
            })
        else:
            if int(vol.replica_count) > 1:
                args.update({"replica_count": vol.replica_count})
            elif int(vol.disperse_count) > 1:
                args.update({"disperse_count": vol.disperse_count})

        if self.parameters.get('Volume.force') is not None:
            args.update({"force": self.parameters.get('Volume.force')})

        action = self.parameters.get('Volume.action')

        logger.log(
            "info",
            NS.publisher_id, {
                "message":
                "Shrinking the volume %s" % self.parameters['Volume.volname']
            },
            job_id=self.parameters["job_id"],
            flow_id=self.parameters["flow_id"],
            integration_id=NS.tendrl_context.integration_id)
        if NS.gdeploy_plugin.shrink_volume(
                self.parameters.get('Volume.volname'),
                self.parameters.get('Volume.bricks'), action, **args):
            logger.log("info",
                       NS.publisher_id, {
                           "message":
                           "Shrinked the volume %s" %
                           self.parameters['Volume.volname']
                       },
                       job_id=self.parameters["job_id"],
                       flow_id=self.parameters["flow_id"],
                       integration_id=NS.tendrl_context.integration_id)
            if action != "commit" and not "decrease_"\
               "replica_count" in args:
                return True
            try:
                # Delete the bricks from central store
                # Acquire lock before deleting the bricks from etcd
                # We are blocking till we acquire the lock
                # the lock will live for 60 sec after which it will
                # be released.
                lock = etcd.Lock(NS._int.wclient, 'volume')

                while not lock.is_acquired:
                    try:
                        # with ttl set, lock will be blocked only for 60 sec
                        # after which it will raise lock_expired exception.
                        # if this is raised, we have to retry for lock
                        lock.acquire(blocking=True, lock_ttl=60)
                        if lock.is_acquired:
                            # renewing lock as we are not sure, how long we
                            # were blocked before the lock was given.
                            # NOTE: blocked time also counts as ttl
                            lock.acquire(lock_ttl=60)
                    except etcd.EtcdLockExpired:
                        continue
                for sub_vol in self.parameters.get('Volume.bricks'):
                    for b in sub_vol:
                        brick_name = b.keys()[0] + ":" + b.values()[0].replace(
                            "/", "_")
                        try:
                            NS._int.wclient.delete(
                                "clusters/%s/Volumes/%s/Bricks/%s" %
                                (NS.tendrl_context.integration_id,
                                 self.parameters['Volume.vol_id'], brick_name),
                                recursive=True)
                        except etcd.EtcdKeyNotFound:
                            continue
            except Exception:
                raise
            finally:
                lock.release()

            logger.log(
                "info",
                NS.publisher_id, {
                    "message":
                    "Deleted bricks for volume %s"
                    " from central store" % self.parameters['Volume.volname']
                },
                job_id=self.parameters["job_id"],
                flow_id=self.parameters["flow_id"],
                integration_id=NS.tendrl_context.integration_id)
            return True
        else:
            logger.log("error",
                       NS.publisher_id, {
                           "message":
                           "Volume shrink failed for volume %s" %
                           self.parameters['Volume.volname']
                       },
                       job_id=self.parameters["job_id"],
                       flow_id=self.parameters["flow_id"],
                       integration_id=NS.tendrl_context.integration_id)
            return False
Ejemplo n.º 4
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import etcd

# Initialize the lock object:
# NOTE: this does not acquire a lock yet
client = etcd.Client(host='127.0.0.1', port=2379)
# Or you can custom lock prefix, default is '/_locks/' if you are using HEAD
client = etcd.Client(lock_prefix='/my_etcd_root/_locks')
lock = etcd.Lock(client, 'my_lock_name')

# Use the lock object:
lock.acquire(
    blocking=True,  # will block until the lock is acquired
    lock_ttl=None)  # lock will live until we release it
lock.is_acquired  # True
lock.acquire(lock_ttl=60)  # renew a lock
lock.release()  # release an existing lock
lock.is_acquired  # False

# The lock object may also be used as a context manager:
client = etcd.Client()
with etcd.Lock(client, 'customer1') as my_lock:
    # do_stuff() #应该是做一些事情的意思
    my_lock.is_acquired  # True
    my_lock.acquire(lock_ttl=60)
my_lock.is_acquired  # False
Ejemplo n.º 5
0
    def __init__(self,
                 cluster_controller_started_event=None,
                 terminate_event=None):
        """
        Initialise the ClusterController.

        This sets some variables, registers as a member with ETC and start's the run loop
        :param terminate_event:
        """
        self.logger = create_logger(
            name=multiprocessing.current_process().name)
        self.logger.info("Starting Cluster Controller")
        self.state = 'started'
        self.container = os.uname()[1]
        self.instance_id = str(uuid.uuid4())
        self.terminate_event = terminate_event
        self.cluster_controller_started_event = cluster_controller_started_event

        # Check required variables
        if self.etcd_port:
            try:
                self.etcd_port = int(self.etcd_port)
            except ValueError:
                self.logger.error(
                    f'ETCD Port should be a valid integer value.',
                    extra={
                        'stack': True,
                    })
                self.state = 'stopping'

        if not isinstance(self.etcd_hosts, list) or not isinstance(
                self.etcd_port, int):
            self.logger.error(
                f'No valid ETCD hosts and/or port specified: {self.etcd_hosts}:{self.etcd_port}',
                extra={
                    'stack': True,
                })
            self.state = 'stopping'

        if not self.environment or not self.service:
            self.logger.error('Environment and/or service is not set.',
                              extra={
                                  'stack': True,
                              })
            self.state = 'stopping'

        if self.state == 'stopping':
            self.terminate_controller(exitcode=0)

        # Connect to ETCD
        connected = False

        timeout = time() + 30

        while time() < timeout and not connected:
            for host in self.etcd_hosts:
                self.logger.info(f'Trying to connect to ETCD host {host}',
                                 extra={
                                     'stack': True,
                                 })
                try:
                    self.etcd_client = func_timeout(5,
                                                    etcd.Client,
                                                    args=(),
                                                    kwargs={
                                                        'host': host,
                                                        'port':
                                                        int(self.etcd_port),
                                                        'allow_reconnect': True
                                                    })
                    machines = self.etcd_client.machines
                    if len(machines) >= 1:
                        connected = True
                        self.logger.info(
                            f'Connected to ETCD machines: {machines}',
                            extra={
                                'stack': True,
                            })
                        break
                except FunctionTimedOut as error:
                    self.logger.info(
                        f'Timeout while connecting to ETCD host {host}',
                        extra={
                            'stack': True,
                        })
                except etcd.EtcdException as error:
                    self.logger.info(f'Unable to connect to ETCD: {error}',
                                     extra={
                                         'stack': True,
                                     })
                sleep(1)

        if not connected:
            self.logger.warning(f'Unable to connect to ETCD, giving up...',
                                extra={
                                    'stack': True,
                                })
            self.state = 'stopping'
            self.terminate_controller(exitcode=1)
        else:
            # Set ETC lock name, members dir and master key location
            self.lock_name = self.environment + '_' + self.service

            self.members_dir = f"/{self.environment}/{self.service}/members"
            self.master_key = f"/{self.environment}/{self.service}/master"

            self.member_dir = f"{self.members_dir}/{self.instance_id}"
            self.member_state_key = f"{self.member_dir}/state"
            self.member_container_key = f"{self.member_dir}/container"
            self.member_role_key = f"{self.member_dir}/role"

            self.master_lock = etcd.Lock(self.etcd_client, self.lock_name)

            # Start the schedule thread
            self.terminate_schedule_event = self.run_schedule_continously(
                schedule=schedule, interval=1)

            # Try to acquire the mater lock.
            master = self.acquire_master_lock()

            # Start the run loop
            self.terminate_run_event = self.run()

            # Run start to include a child class startup logic and raise the init event when finished.
            self.start()
            self.cluster_controller_started_event.set()

            if master:
                self.started_as_master()
            else:
                self.started_as_slave()

            # Register in ETCD
            self.etcd_client.write(self.member_state_key, self.state, ttl=60)
            self.etcd_client.write(self.member_role_key, self.role, ttl=60)
            self.etcd_client.write(self.member_container_key,
                                   self.container,
                                   ttl=60)

            # Keep the main process alive while the terminate event is not set.
            while not self.terminate_event.is_set():
                self.check_active(ports=self.ports)
                sleep(1)
Ejemplo n.º 6
0
def brick_status_alert(hostname):
    try:
        # fetching brick details of disconnected node
        lock = None
        path = "clusters/%s/Bricks/all/%s" % (
            NS.tendrl_context.integration_id,
            hostname
        )
        lock = etcd.Lock(
            NS._int.client,
            path
        )
        lock.acquire(
            blocking=True,
            lock_ttl=60
        )
        if lock.is_acquired:
            bricks = NS.gluster.objects.Brick(
                fqdn=hostname
            ).load_all()
            for brick in bricks:
                if brick.status.lower() == BRICK_STARTED:
                    # raise an alert for brick
                    msg = (
                        "Status of brick: %s "
                        "under volume %s in cluster %s chan"
                        "ged from %s to %s") % (
                            brick.brick_path,
                            brick.vol_name,
                            NS.tendrl_context.integration_id,
                            BRICK_STARTED.title(),
                            BRICK_STOPPED.title()
                        )
                    instance = "volume_%s|brick_%s" % (
                        brick.vol_name,
                        brick.brick_path,
                    )
                    event_utils.emit_event(
                        "brick_status",
                        BRICK_STOPPED.title(),
                        msg,
                        instance,
                        'WARNING',
                        tags={"entity_type": RESOURCE_TYPE_BRICK,
                              "volume_name": brick.vol_name,
                              "node_id": brick.node_id,
                              "fqdn": brick.hostname
                              }
                    )
                    # Update brick status as stopped
                    brick.status = BRICK_STOPPED.title()
                    brick.save()
                    lock.release()
    except (
        etcd.EtcdException,
        KeyError,
        ValueError,
        AttributeError
    ) as ex:
        Event(
            ExceptionMessage(
                priority="error",
                publisher=NS.publisher_id,
                payload={
                    "message": "Unable to raise an brick status "
                               "alert for host %s" % hostname,
                    "exception": ex
                }
            )
        )
    finally:
        if isinstance(lock, etcd.lock.Lock) and lock.is_acquired:
            lock.release()
Ejemplo n.º 7
0
    def run(self):
        vol_id = self.parameters['Volume.vol_id']
        if NS.gdeploy_plugin.stop_volume(
                self.parameters.get('Volume.volname')):
            Event(
                Message(
                    priority="info",
                    publisher=NS.publisher_id,
                    payload={
                        "message":
                        "Stopped the volume %s before delete" %
                        self.parameters['Volume.volname']
                    },
                    job_id=self.parameters["job_id"],
                    flow_id=self.parameters["flow_id"],
                    cluster_id=NS.tendrl_context.integration_id,
                ))
        else:
            Event(
                Message(
                    priority="error",
                    publisher=NS.publisher_id,
                    payload={
                        "message":
                        "Could not stop volume %s before delete" %
                        self.parameters['Volume.volname']
                    },
                    job_id=self.parameters["job_id"],
                    flow_id=self.parameters["flow_id"],
                    cluster_id=NS.tendrl_context.integration_id,
                ))
            return False
        args = {}
        if self.parameters.get('Volume.volname') is not None:
            args.update(
                {"format_bricks": self.parameters.get('Volume.format_bricks')})

        if NS.gdeploy_plugin.delete_volume(
                self.parameters.get('Volume.volname'), **args):
            Event(
                Message(
                    priority="info",
                    publisher=NS.publisher_id,
                    payload={
                        "message":
                        "Deleted the volume %s" %
                        self.parameters['Volume.volname']
                    },
                    job_id=self.parameters["job_id"],
                    flow_id=self.parameters["flow_id"],
                    cluster_id=NS.tendrl_context.integration_id,
                ))
        else:
            Event(
                Message(
                    priority="error",
                    publisher=NS.publisher_id,
                    payload={
                        "message":
                        "Failed to delete volume %s" %
                        self.parameters['Volume.volname']
                    },
                    job_id=self.parameters["job_id"],
                    flow_id=self.parameters["flow_id"],
                    cluster_id=NS.tendrl_context.integration_id,
                ))
            return False

        while True:
            try:
                # Acquire lock before deleting the volume from etcd
                # We are blocking till we acquire the lock
                # the lock will live for 60 sec after which it will be released.
                lock = etcd.Lock(NS._int.wclient, 'volume')

                while not lock.is_acquired:
                    try:
                        # with ttl set, lock will be blocked only for 60 sec
                        # after which it will raise lock_expired exception.
                        # if this is raised, we have to retry for lock
                        lock.acquire(blocking=True, lock_ttl=60)
                        if lock.is_acquired:
                            # renewing lock as we are not sure, how long we
                            # were blocked before the lock was given.
                            # NOTE: blocked time also counts as ttl
                            lock.acquire(lock_ttl=60)
                    except etcd.EtcdLockExpired:
                        continue

                NS._int.wclient.delete("clusters/%s/Volumes/%s" %
                                       (NS.tendrl_context.integration_id,
                                        self.parameters['Volume.vol_id']),
                                       recursive=True)
            except (etcd.EtcdKeyNotFound, KeyError):
                Event(
                    Message(
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={
                            "message":
                            "Deleted the volume %s" %
                            self.parameters['Volume.volname']
                        },
                        job_id=self.parameters["job_id"],
                        flow_id=self.parameters["flow_id"],
                        cluster_id=NS.tendrl_context.integration_id,
                    ))
            finally:
                lock.release()

                return True
Ejemplo n.º 8
0
    def s3kv_get(self, bucket_name, obj_key):
        obj_key = re.sub(r'^/*(.+?)/*$', '\g<1>',
                         obj_key)  # remove prepending and appending /
        lock_name = re.sub(
            r'^/*(.+?)/*$', '\g<1>/',
            bucket_name) + obj_key  # lock name <bucket_name>/<obj_key>
        etcd_name = re.sub(
            r'^/*(.+?)/*$', '/\g<1>/',
            bucket_name) + obj_key  # etcd name /<bucket_name>/<obj_key>

        lock = etcd.Lock(self.etcd_client, lock_name)

        lock.acquire(
            blocking=True,
            lock_ttl=None)  # Acquire the lock over the full path of data obj

        # Redis read 1: fetch cached obj from Redis
        val_obj = redis.get(obj_key)

        # double check the hash value of fetched obj matches h
        if val_obj is not None:
            lock.release()  # release the lock
            return val_obj

        try:
            # For a Redis cache miss, read the latest hash value of the data obj / h is the hash value
            h = self.etcd_client.read(etcd_name).value
            while True:
                try:
                    # fetch specified version ID until found, also this could avoid interferences from outside.
                    response = self.s3_client.get_object(
                        Bucket=bucket_name,
                        Key=obj_key,
                        # VersionId = version_id
                    )  # keep fetching obj...

                    # until the fetched obj is not NULL
                    if response.get('Body').read() != None:
                        break
                except Exception as e:
                    response = None
                    # print("not found: " + version_id);
        except etcd.EtcdKeyNotFound:
            response = None
        except Exception as e:
            logging.error(traceback.format_exc())

        lock.release()  # release the lock

        # double check the hash value of fetched obj matches h
        obj_data = None
        if response:
            obj_data = response.get('Body').read()

        m = hashlib.md5()
        m.update(obj_data)
        hash_obj_data = m.hexdigest()

        if hash_obj_data == h:
            redis.set(obj_key, obj_data)
            return obj_data
        else:
            return None
Ejemplo n.º 9
0
    def run(self):
        vol_id = self.parameters['Volume.vol_id']
        if NS.gdeploy_plugin.stop_volume(
                self.parameters.get('Volume.volname')):
            Event(
                Message(
                    priority="info",
                    publisher=NS.publisher_id,
                    payload={
                        "message":
                        "Stopped the volume %s before delete" %
                        self.parameters['Volume.volname']
                    },
                    job_id=self.parameters["job_id"],
                    flow_id=self.parameters["flow_id"],
                    cluster_id=NS.tendrl_context.integration_id,
                ))
        else:
            Event(
                Message(
                    priority="error",
                    publisher=NS.publisher_id,
                    payload={
                        "message":
                        "Could not stop volume %s before delete" %
                        self.parameters['Volume.volname']
                    },
                    job_id=self.parameters["job_id"],
                    flow_id=self.parameters["flow_id"],
                    cluster_id=NS.tendrl_context.integration_id,
                ))
            return False
        args = {}
        if self.parameters.get('Volume.volname') is not None:
            args.update(
                {"format_bricks": self.parameters.get('Volume.format_bricks')})

        if NS.gdeploy_plugin.delete_volume(
                self.parameters.get('Volume.volname'), **args):
            Event(
                Message(
                    priority="info",
                    publisher=NS.publisher_id,
                    payload={
                        "message":
                        "Deleted the volume %s" %
                        self.parameters['Volume.volname']
                    },
                    job_id=self.parameters["job_id"],
                    flow_id=self.parameters["flow_id"],
                    cluster_id=NS.tendrl_context.integration_id,
                ))
        else:
            Event(
                Message(
                    priority="error",
                    publisher=NS.publisher_id,
                    payload={
                        "message":
                        "Failed to delete volume %s" %
                        self.parameters['Volume.volname']
                    },
                    job_id=self.parameters["job_id"],
                    flow_id=self.parameters["flow_id"],
                    cluster_id=NS.tendrl_context.integration_id,
                ))
            return False

        while True:
            try:
                # Acquire lock before deleting the volume from etcd
                # We are blocking till we acquire the lock
                lock = etcd.Lock(NS.etcd_orm.client, 'volume')
                lock.acquire(blocking=True, lock_ttl=None)
                NS.etcd_orm.client.delete("clusters/%s/Volumes/%s" %
                                          (NS.tendrl_context.integration_id,
                                           self.parameters['Volume.vol_id']),
                                          recursive=True)
            except (etcd.EtcdKeyNotFound, KeyError):
                Event(
                    Message(
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={
                            "message":
                            "Deleted the volume %s" %
                            self.parameters['Volume.volname']
                        },
                        job_id=self.parameters["job_id"],
                        flow_id=self.parameters["flow_id"],
                        cluster_id=NS.tendrl_context.integration_id,
                    ))
            finally:
                lock.release()

                return True