예제 #1
0
파일: reconciler.py 프로젝트: sapcc/swift
def direct_delete_container_entry(container_ring,
                                  account_name,
                                  container_name,
                                  object_name,
                                  headers=None):
    """
    Talk directly to the primary container servers to delete a particular
    object listing. Does not talk to object servers; use this only when a
    container entry does not actually have a corresponding object.
    """
    if headers is None:
        headers = {}
    headers[USE_REPLICATION_NETWORK_HEADER] = 'true'

    pool = GreenPool()
    part, nodes = container_ring.get_nodes(account_name, container_name)
    for node in nodes:
        pool.spawn_n(direct_delete_container_object,
                     node,
                     part,
                     account_name,
                     container_name,
                     object_name,
                     headers=headers)

    # This either worked or it didn't; if it didn't, we'll retry on the next
    # reconciler loop when we see the queue entry again.
    pool.waitall()
예제 #2
0
파일: reconciler.py 프로젝트: sapcc/swift
    def reconcile(self):
        """
        Main entry point for concurrent processing of misplaced objects.

        Iterate over all queue entries and delegate processing to spawned
        workers in the pool.
        """
        self.logger.debug('pulling items from the queue')
        pool = GreenPool(self.concurrency)
        for container in self._iter_containers():
            self.logger.debug('checking container %s', container)
            for raw_obj in self._iter_objects(container):
                try:
                    queue_item = parse_raw_obj(raw_obj)
                except Exception:
                    self.stats_log('invalid_record',
                                   'invalid queue record: %r',
                                   raw_obj,
                                   level=logging.ERROR,
                                   exc_info=True)
                    continue
                if self.should_process(queue_item):
                    pool.spawn_n(self.process_queue_item, container,
                                 raw_obj['name'], queue_item)
            self.log_stats()
        pool.waitall()
예제 #3
0
    def run(self):
        signal.signal(signal.SIGINT, self.signal_handler)
        pool = GreenPool()

        with open("beka.yaml") as file:
            config = yaml.load(file.read())
        for router in config["routers"]:
            printmsg("Starting Beka on %s" % router["local_address"])
            beka = Beka(
                router["local_address"],
                router["bgp_port"],
                router["local_as"],
                router["router_id"],
                self.peer_up_handler,
                self.peer_down_handler,
                self.route_handler,
                self.error_handler
            )
            for peer in router["peers"]:
                beka.add_neighbor(
                    "passive",
                    peer["peer_ip"],
                    peer["peer_as"],
                )
            if "routes" in router:
                for route in router["routes"]:
                    beka.add_route(
                        route["prefix"],
                        route["next_hop"]
                    )
            self.bekas.append(beka)
            pool.spawn_n(beka.run)
        pool.waitall()
        printmsg("All greenlets gone, exiting")
예제 #4
0
 def run_fd_server(self):
     server = eventlet.listen(('127.0.0.1', consts.GLOBAL_FB_PORT))
     pool = GreenPool(10000)
     while True:
         fd, addr = server.accept()  #accept returns (conn,address) so fd is a connection
         self.logger.info("global receives a connection")
         # self.global_conn(fd)  
         pool.spawn_n(self.global_conn, fd)
예제 #5
0
 def runtestsmulti(self, envlist):
     pool = GreenPool(size=self._toxconfig.option.numproc)
     for env in envlist:
         pool.spawn_n(self.runtests, env)
     pool.waitall()
     if not self.toxsession.config.option.sdistonly:
         retcode = self._toxsession._summary()
         return retcode
예제 #6
0
class Concurrency(object):
    """
    Convenience class to support concurrency, if Eventlet is available;
    otherwise it just performs at single concurrency.

    :param concurrency: The level of concurrency desired. Default: 10
    """

    def __init__(self, concurrency=10):
        if concurrency and GreenPool:
            self._pool = GreenPool(concurrency)
        else:
            self._pool = None
        self._queue = Queue()
        self._results = {}

    def _spawner(self, ident, func, *args, **kwargs):
        self._queue.put((ident, func(*args, **kwargs)))

    def spawn(self, ident, func, *args, **kwargs):
        """
        Returns immediately to the caller and begins executing the func in
        the background. Use get_results and the ident given to retrieve the
        results of the func.

        :param ident: An identifier to find the results of the func from
            get_results. This identifier can be anything unique to
            the Concurrency instance.
        :param func: The function to execute in the concurrently.
        :param args: The args to give the func.
        :param kwargs: The keyword args to the give the func.
        :returns: None
        """
        if self._pool:
            self._pool.spawn_n(self._spawner, ident, func, *args, **kwargs)
        else:
            self._spawner(ident, func, *args, **kwargs)

    def get_results(self):
        """
        Returns a dict of the results currently available. The keys are the
        ident values given with the calls to spawn. The values are the return
        values of the funcs.
        """
        try:
            while True:
                ident, value = self._queue.get(block=False)
                self._results[ident] = value
        except Empty:
            pass
        return self._results

    def join(self):
        """
        Blocks until all currently pending functions have finished.
        """
        if self._pool:
            self._pool.waitall()
예제 #7
0
class Concurrency(object):
    """
    Convenience class to support concurrency, if Eventlet is available;
    otherwise it just performs at single concurrency.

    :param concurrency: The level of concurrency desired. Default: 10
    """
    def __init__(self, concurrency=10):
        if concurrency and GreenPool:
            self._pool = GreenPool(concurrency)
        else:
            self._pool = None
        self._queue = Queue()
        self._results = {}

    def _spawner(self, ident, func, *args, **kwargs):
        self._queue.put((ident, func(*args, **kwargs)))

    def spawn(self, ident, func, *args, **kwargs):
        """
        Returns immediately to the caller and begins executing the func in
        the background. Use get_results and the ident given to retrieve the
        results of the func.

        :param ident: An identifier to find the results of the func from
            get_results. This identifier can be anything unique to
            the Concurrency instance.
        :param func: The function to execute in the concurrently.
        :param args: The args to give the func.
        :param kwargs: The keyword args to the give the func.
        :returns: None
        """
        if self._pool:
            self._pool.spawn_n(self._spawner, ident, func, *args, **kwargs)
        else:
            self._spawner(ident, func, *args, **kwargs)

    def get_results(self):
        """
        Returns a dict of the results currently available. The keys are the
        ident values given with the calls to spawn. The values are the return
        values of the funcs.
        """
        try:
            while True:
                ident, value = self._queue.get(block=False)
                self._results[ident] = value
        except Empty:
            pass
        return self._results

    def join(self):
        """
        Blocks until all currently pending functions have finished.
        """
        if self._pool:
            self._pool.waitall()
예제 #8
0
파일: agent.py 프로젝트: korween/oio-sds
    def run(self, *args, **kwargs):
        try:
            self.logger.info('event agent: starting')

            pool = GreenPool(len(self.workers))

            for worker in self.workers:
                pool.spawn(worker.start)

            def front(server, backend):
                while True:
                    msg = server.recv_multipart()
                    if validate_msg(msg):
                        try:
                            event_id = sqlite3.Binary(msg[2])
                            data = msg[3]
                            self.queue.put(event_id, data)
                            event = ['', msg[2], msg[3]]
                            backend.send_multipart(event)
                        except Exception:
                            pass
                        finally:
                            ack = msg[0:3]
                            server.send_multipart(ack)

            def back(backend):
                while True:
                    msg = backend.recv_multipart()
                    event_id = msg[1]
                    event_id = sqlite3.Binary(event_id)
                    self.queue.delete(event_id)

            boss_pool = GreenPool(2)
            boss_pool.spawn_n(front, self.server, self.backend)
            boss_pool.spawn_n(back, self.backend)
            while True:
                sleep(1)

                now = time.time()
                if now - self.last_retry > self.retry_interval:
                    self.retry()
                    self.last_retry = now

                for w in self.workers:
                    if w.failed:
                        self.workers.remove(w)
                        self.logger.warn('restart worker "%s"', w.name)
                        new_w = EventWorker(self.conf, w.name, self.context)
                        self.workers.append(new_w)
                        pool.spawn(new_w.start)

        except Exception as e:
            self.logger.error('ERROR in main loop %s', e)
            raise e
        finally:
            self.logger.warn('event agent: stopping')
            self.stop_workers()
예제 #9
0
def calculate_similar_movies(n=10, similarity=sim_pearson):
    """
    Calculate and save similarity scores for all movies in the database. this
    will take a long time. Algorithm is parallelized using a greenlet pool.
    """
    pool = GreenPool(size=30)
    movies = db.get_movies()
    movie_count = len(movies)
    sys.stdout.write("Processing {0} movies\n".format(movie_count))
    for movie in movies:
        pool.spawn_n(do_movie_similarity_calculation, movie, n=n, similarity=sim_distance)
    pool.waitall()
예제 #10
0
def calculate_similar_movies(n=10, similarity=sim_pearson):
    """
    Calculate and save similarity scores for all movies in the database. this
    will take a long time. Algorithm is parallelized using a greenlet pool.
    """
    pool = GreenPool(size=30)
    movies = db.get_movies()
    movie_count = len(movies)
    sys.stdout.write("Processing {0} movies\n".format(movie_count))
    for movie in movies:
        pool.spawn_n(do_movie_similarity_calculation,
                     movie,
                     n=n,
                     similarity=sim_distance)
    pool.waitall()
    def test_connection(self):

        """
        conn = Connection(auth_endpoint="https://identity.api.rackspacecloud.com/v2.0",
                          client_id=str(uuid.uuid4()),
                          endpoint="http://localhost:8888/v1/12345",
                          user="", key="")

        """

        conn = Connection(auth_endpoint="https://identity.api.rackspacecloud.com/v2.0",
                          client_id=str(uuid.uuid4()),
                          endpoint="http://166.78.143.130/v1/12345",
                          user="", key="")


        conn.connect(token='blah')

        def create_worker(queue_name):
            return conn.create_queue(queue_name, 100)

        def post_worker(queue):
            return queue.post_message('test_message', 10)

        def delete_worker(queue_name):
            conn.delete_queue(queue_name)
            return queue_name

        pool = GreenPool(1000)

        def on_message_posted(greenthread):
            msg = greenthread.wait()
            print msg._href

        def on_queue_created(greenthread):
            queue = greenthread.wait()
            print queue.name

            for x in range(0, 10):
                gt = pool.spawn(post_worker, queue)
                gt.link(on_message_posted)

        queue_names = ["queue-"+str(x) for x in xrange(0,5)]

        for queue_name in queue_names:
            gt = pool.spawn(create_worker, queue_name)
            gt.link(on_queue_created)

        pool.waitall()

        def delete_worker(queue_name):
            conn.delete_queue(queue_name)
            print "Queue:", queue_name, " deleted"

        for queue in conn.get_queues():
            gt = pool.spawn_n(delete_worker, queue.name)

        print "Waiting for everything to finish"
        pool.waitall()
        print "Done"
예제 #12
0
파일: reconciler.py 프로젝트: 701/swift
def direct_delete_container_entry(container_ring, account_name, container_name,
                                  object_name, headers=None):
    """
    Talk directly to the primary container servers to delete a particular
    object listing. Does not talk to object servers; use this only when a
    container entry does not actually have a corresponding object.
    """
    pool = GreenPool()
    part, nodes = container_ring.get_nodes(account_name, container_name)
    for node in nodes:
        pool.spawn_n(direct_delete_container_object, node, part, account_name,
                     container_name, object_name, headers=headers)

    # This either worked or it didn't; if it didn't, we'll retry on the next
    # reconciler loop when we see the queue entry again.
    pool.waitall()
예제 #13
0
class ZerovmDaemon:

    def __init__(self, socket_name):
        self.server_address = socket_name
        self.zerovm_exename = ['zerovm']
        self.pool = GreenPool()
        self.jobs = set()
        self.stats_dir = '/tmp'

    def parse_command(self, fd):
        try:
            size = int(fd.read(8), 0)
            data = fd.read(size)
            return data
        except IOError:
            return None

    def handle(self, fd):
        data = self.parse_command(fd)
        manifest = data
        report = self.execute(manifest)
        self.send_response(fd, report)

    def serve(self):
        try:
            os.remove(self.server_address)
        except OSError:
            pass
        server = listen(self.server_address, family=socket.AF_UNIX)
        while True:
            try:
                new_sock, address = server.accept()
                self.pool.spawn_n(self.handle, new_sock.makefile('rw'))
            except (SystemExit, KeyboardInterrupt):
                break

    def send_response(self, fd, report):
        data = '0x%06x%s' % (len(report), report)
        try:
            fd.write(data)
        except IOError:
            pass

    def execute(self, manifest):
        pass
예제 #14
0
파일: code.py 프로젝트: hammadk373/crawley
    def run(self, run_command):
        """
            Run the crawler of a code project
        """

        crawler = import_user_module("crawlers")
        models = import_user_module("models")

        pool = GreenPool()

        for crawler_class in user_crawlers:

            spider = crawler_class(sessions=run_command.syncdb.sessions, debug=run_command.settings.SHOW_DEBUG_INFO)
            pool.spawn_n(spider.start)

        pool.waitall()

        for session in run_command.syncdb.sessions:
            session.close()
예제 #15
0
class Replicator(Daemon):
    """
    Implements the logic for directing db replication.
    """

    def __init__(self, conf, logger=None):
        self.conf = conf
        self.logger = logger or get_logger(conf, log_route='replicator')
        self.root = conf.get('devices', '/srv/node')
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        self.port = int(conf.get('bind_port', self.default_port))
        concurrency = int(conf.get('concurrency', 8))
        self.cpool = GreenPool(size=concurrency)
        swift_dir = conf.get('swift_dir', '/etc/swift')
        self.ring = ring.Ring(swift_dir, ring_name=self.server_type)
        self._local_device_ids = set()
        self.per_diff = int(conf.get('per_diff', 1000))
        self.max_diffs = int(conf.get('max_diffs') or 100)
        self.interval = int(conf.get('interval') or
                            conf.get('run_pause') or 30)
        self.vm_test_mode = config_true_value(conf.get('vm_test_mode', 'no'))
        self.node_timeout = int(conf.get('node_timeout', 10))
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.reclaim_age = float(conf.get('reclaim_age', 86400 * 7))
        swift.common.db.DB_PREALLOCATION = \
            config_true_value(conf.get('db_preallocation', 'f'))
        self._zero_stats()
        self.recon_cache_path = conf.get('recon_cache_path',
                                         '/var/cache/swift')
        self.recon_replicator = '%s.recon' % self.server_type
        self.rcache = os.path.join(self.recon_cache_path,
                                   self.recon_replicator)
        self.extract_device_re = re.compile('%s%s([^%s]+)' % (
            self.root, os.path.sep, os.path.sep))

    def _zero_stats(self):
        """Zero out the stats."""
        self.stats = {'attempted': 0, 'success': 0, 'failure': 0, 'ts_repl': 0,
                      'no_change': 0, 'hashmatch': 0, 'rsync': 0, 'diff': 0,
                      'remove': 0, 'empty': 0, 'remote_merge': 0,
                      'start': time.time(), 'diff_capped': 0}

    def _report_stats(self):
        """Report the current stats to the logs."""
        now = time.time()
        self.logger.info(
            _('Attempted to replicate %(count)d dbs in %(time).5f seconds '
              '(%(rate).5f/s)'),
            {'count': self.stats['attempted'],
             'time': now - self.stats['start'],
             'rate': self.stats['attempted'] /
                (now - self.stats['start'] + 0.0000001)})
        self.logger.info(_('Removed %(remove)d dbs') % self.stats)
        self.logger.info(_('%(success)s successes, %(failure)s failures')
                         % self.stats)
        dump_recon_cache(
            {'replication_stats': self.stats,
             'replication_time': now - self.stats['start'],
             'replication_last': now},
            self.rcache, self.logger)
        self.logger.info(' '.join(['%s:%s' % item for item in
                         self.stats.items() if item[0] in
                         ('no_change', 'hashmatch', 'rsync', 'diff', 'ts_repl',
                          'empty', 'diff_capped')]))

    def _rsync_file(self, db_file, remote_file, whole_file=True):
        """
        Sync a single file using rsync. Used by _rsync_db to handle syncing.

        :param db_file: file to be synced
        :param remote_file: remote location to sync the DB file to
        :param whole-file: if True, uses rsync's --whole-file flag

        :returns: True if the sync was successful, False otherwise
        """
        popen_args = ['rsync', '--quiet', '--no-motd',
                      '--timeout=%s' % int(math.ceil(self.node_timeout)),
                      '--contimeout=%s' % int(math.ceil(self.conn_timeout))]
        if whole_file:
            popen_args.append('--whole-file')
        popen_args.extend([db_file, remote_file])
        proc = subprocess.Popen(popen_args)
        proc.communicate()
        if proc.returncode != 0:
            self.logger.error(_('ERROR rsync failed with %(code)s: %(args)s'),
                              {'code': proc.returncode, 'args': popen_args})
        return proc.returncode == 0

    def _rsync_db(self, broker, device, http, local_id,
                  replicate_method='complete_rsync', replicate_timeout=None):
        """
        Sync a whole db using rsync.

        :param broker: DB broker object of DB to be synced
        :param device: device to sync to
        :param http: ReplConnection object
        :param local_id: unique ID of the local database replica
        :param replicate_method: remote operation to perform after rsync
        :param replicate_timeout: timeout to wait in seconds
        """
        device_ip = rsync_ip(device['replication_ip'])
        if self.vm_test_mode:
            remote_file = '%s::%s%s/%s/tmp/%s' % (
                device_ip, self.server_type, device['replication_port'],
                device['device'], local_id)
        else:
            remote_file = '%s::%s/%s/tmp/%s' % (
                device_ip, self.server_type, device['device'], local_id)
        mtime = os.path.getmtime(broker.db_file)
        if not self._rsync_file(broker.db_file, remote_file):
            return False
        # perform block-level sync if the db was modified during the first sync
        if os.path.exists(broker.db_file + '-journal') or \
                os.path.getmtime(broker.db_file) > mtime:
            # grab a lock so nobody else can modify it
            with broker.lock():
                if not self._rsync_file(broker.db_file, remote_file, False):
                    return False
        with Timeout(replicate_timeout or self.node_timeout):
            response = http.replicate(replicate_method, local_id)
        return response and response.status >= 200 and response.status < 300

    def _usync_db(self, point, broker, http, remote_id, local_id):
        """
        Sync a db by sending all records since the last sync.

        :param point: synchronization high water mark between the replicas
        :param broker: database broker object
        :param http: ReplConnection object for the remote server
        :param remote_id: database id for the remote replica
        :param local_id: database id for the local replica

        :returns: boolean indicating completion and success
        """
        self.stats['diff'] += 1
        self.logger.increment('diffs')
        self.logger.debug('Syncing chunks with %s, starting at %s',
                          http.host, point)
        sync_table = broker.get_syncs()
        objects = broker.get_items_since(point, self.per_diff)
        diffs = 0
        while len(objects) and diffs < self.max_diffs:
            diffs += 1
            with Timeout(self.node_timeout):
                response = http.replicate('merge_items', objects, local_id)
            if not response or response.status >= 300 or response.status < 200:
                if response:
                    self.logger.error(_('ERROR Bad response %(status)s from '
                                        '%(host)s'),
                                      {'status': response.status,
                                       'host': http.host})
                return False
            # replication relies on db order to send the next merge batch in
            # order with no gaps
            point = objects[-1]['ROWID']
            objects = broker.get_items_since(point, self.per_diff)
        if objects:
            self.logger.debug(
                'Synchronization for %s has fallen more than '
                '%s rows behind; moving on and will try again next pass.',
                broker, self.max_diffs * self.per_diff)
            self.stats['diff_capped'] += 1
            self.logger.increment('diff_caps')
        else:
            with Timeout(self.node_timeout):
                response = http.replicate('merge_syncs', sync_table)
            if response and response.status >= 200 and response.status < 300:
                broker.merge_syncs([{'remote_id': remote_id,
                                     'sync_point': point}],
                                   incoming=False)
                return True
        return False

    def _in_sync(self, rinfo, info, broker, local_sync):
        """
        Determine whether or not two replicas of a databases are considered
        to be in sync.

        :param rinfo: remote database info
        :param info: local database info
        :param broker: database broker object
        :param local_sync: cached last sync point between replicas

        :returns: boolean indicating whether or not the replicas are in sync
        """
        if max(rinfo['point'], local_sync) >= info['max_row']:
            self.stats['no_change'] += 1
            self.logger.increment('no_changes')
            return True
        if rinfo['hash'] == info['hash']:
            self.stats['hashmatch'] += 1
            self.logger.increment('hashmatches')
            broker.merge_syncs([{'remote_id': rinfo['id'],
                                 'sync_point': rinfo['point']}],
                               incoming=False)
            return True

    def _http_connect(self, node, partition, db_file):
        """
        Make an http_connection using ReplConnection

        :param node: node dictionary from the ring
        :param partition: partition partition to send in the url
        :param db_file: DB file

        :returns: ReplConnection object
        """
        return ReplConnection(node, partition,
                              os.path.basename(db_file).split('.', 1)[0],
                              self.logger)

    def _gather_sync_args(self, info):
        """
        Convert local replication_info to sync args tuple.
        """
        sync_args_order = ('max_row', 'hash', 'id', 'created_at',
                           'put_timestamp', 'delete_timestamp', 'metadata')
        return tuple(info[key] for key in sync_args_order)

    def _repl_to_node(self, node, broker, partition, info):
        """
        Replicate a database to a node.

        :param node: node dictionary from the ring to be replicated to
        :param broker: DB broker for the DB to be replication
        :param partition: partition on the node to replicate to
        :param info: DB info as a dictionary of {'max_row', 'hash', 'id',
                     'created_at', 'put_timestamp', 'delete_timestamp',
                     'metadata'}

        :returns: True if successful, False otherwise
        """
        http = self._http_connect(node, partition, broker.db_file)
        sync_args = self._gather_sync_args(info)
        with Timeout(self.node_timeout):
            response = http.replicate('sync', *sync_args)
        if not response:
            return False
        return self._handle_sync_response(node, response, info, broker, http)

    def _handle_sync_response(self, node, response, info, broker, http):
        if response.status == HTTP_NOT_FOUND:  # completely missing, rsync
            self.stats['rsync'] += 1
            self.logger.increment('rsyncs')
            return self._rsync_db(broker, node, http, info['id'])
        elif response.status == HTTP_INSUFFICIENT_STORAGE:
            raise DriveNotMounted()
        elif response.status >= 200 and response.status < 300:
            rinfo = json.loads(response.data)
            local_sync = broker.get_sync(rinfo['id'], incoming=False)
            if self._in_sync(rinfo, info, broker, local_sync):
                return True
            # if the difference in rowids between the two differs by
            # more than 50%, rsync then do a remote merge.
            if rinfo['max_row'] / float(info['max_row']) < 0.5:
                self.stats['remote_merge'] += 1
                self.logger.increment('remote_merges')
                return self._rsync_db(broker, node, http, info['id'],
                                      replicate_method='rsync_then_merge',
                                      replicate_timeout=(info['count'] / 2000))
            # else send diffs over to the remote server
            return self._usync_db(max(rinfo['point'], local_sync),
                                  broker, http, rinfo['id'], info['id'])

    def _post_replicate_hook(self, broker, info, responses):
        """
        :param broker: the container that just replicated
        :param info: pre-replication full info dict
        :param responses: a list of bools indicating success from nodes
        """
        pass

    def _replicate_object(self, partition, object_file, node_id):
        """
        Replicate the db, choosing method based on whether or not it
        already exists on peers.

        :param partition: partition to be replicated to
        :param object_file: DB file name to be replicated
        :param node_id: node id of the node to be replicated to
        """
        start_time = now = time.time()
        self.logger.debug('Replicating db %s', object_file)
        self.stats['attempted'] += 1
        self.logger.increment('attempts')
        shouldbehere = True
        try:
            broker = self.brokerclass(object_file, pending_timeout=30)
            broker.reclaim(now - self.reclaim_age,
                           now - (self.reclaim_age * 2))
            info = broker.get_replication_info()
            bpart = self.ring.get_part(
                info['account'], info.get('container'))
            if bpart != int(partition):
                partition = bpart
                # Important to set this false here since the later check only
                # checks if it's on the proper device, not partition.
                shouldbehere = False
                name = '/' + quote(info['account'])
                if 'container' in info:
                    name += '/' + quote(info['container'])
                self.logger.error(
                    'Found %s for %s when it should be on partition %s; will '
                    'replicate out and remove.' % (object_file, name, bpart))
        except (Exception, Timeout) as e:
            if 'no such table' in str(e):
                self.logger.error(_('Quarantining DB %s'), object_file)
                quarantine_db(broker.db_file, broker.db_type)
            else:
                self.logger.exception(_('ERROR reading db %s'), object_file)
            self.stats['failure'] += 1
            self.logger.increment('failures')
            return
        # The db is considered deleted if the delete_timestamp value is greater
        # than the put_timestamp, and there are no objects.
        delete_timestamp = Timestamp(info.get('delete_timestamp') or 0)
        put_timestamp = Timestamp(info.get('put_timestamp') or 0)
        if delete_timestamp < (now - self.reclaim_age) and \
                delete_timestamp > put_timestamp and \
                info['count'] in (None, '', 0, '0'):
            if self.report_up_to_date(info):
                self.delete_db(broker)
            self.logger.timing_since('timing', start_time)
            return
        responses = []
        nodes = self.ring.get_part_nodes(int(partition))
        if shouldbehere:
            shouldbehere = bool([n for n in nodes if n['id'] == node_id])
        # See Footnote [1] for an explanation of the repl_nodes assignment.
        i = 0
        while i < len(nodes) and nodes[i]['id'] != node_id:
            i += 1
        repl_nodes = nodes[i + 1:] + nodes[:i]
        more_nodes = self.ring.get_more_nodes(int(partition))
        for node in repl_nodes:
            success = False
            try:
                success = self._repl_to_node(node, broker, partition, info)
            except DriveNotMounted:
                repl_nodes.append(more_nodes.next())
                self.logger.error(_('ERROR Remote drive not mounted %s'), node)
            except (Exception, Timeout):
                self.logger.exception(_('ERROR syncing %(file)s with node'
                                        ' %(node)s'),
                                      {'file': object_file, 'node': node})
            self.stats['success' if success else 'failure'] += 1
            self.logger.increment('successes' if success else 'failures')
            responses.append(success)
        try:
            self._post_replicate_hook(broker, info, responses)
        except (Exception, Timeout):
            self.logger.exception('UNHANDLED EXCEPTION: in post replicate '
                                  'hook for %s', broker.db_file)
        if not shouldbehere and all(responses):
            # If the db shouldn't be on this node and has been successfully
            # synced to all of its peers, it can be removed.
            self.delete_db(broker)
        self.logger.timing_since('timing', start_time)

    def delete_db(self, broker):
        object_file = broker.db_file
        hash_dir = os.path.dirname(object_file)
        suf_dir = os.path.dirname(hash_dir)
        with lock_parent_directory(object_file):
            shutil.rmtree(hash_dir, True)
        try:
            os.rmdir(suf_dir)
        except OSError as err:
            if err.errno not in (errno.ENOENT, errno.ENOTEMPTY):
                self.logger.exception(
                    _('ERROR while trying to clean up %s') % suf_dir)
        self.stats['remove'] += 1
        device_name = self.extract_device(object_file)
        self.logger.increment('removes.' + device_name)

    def extract_device(self, object_file):
        """
        Extract the device name from an object path.  Returns "UNKNOWN" if the
        path could not be extracted successfully for some reason.

        :param object_file: the path to a database file.
        """
        match = self.extract_device_re.match(object_file)
        if match:
            return match.groups()[0]
        return "UNKNOWN"

    def report_up_to_date(self, full_info):
        return True

    def run_once(self, *args, **kwargs):
        """Run a replication pass once."""
        self._zero_stats()
        dirs = []
        ips = whataremyips()
        if not ips:
            self.logger.error(_('ERROR Failed to get my own IPs?'))
            return
        self._local_device_ids = set()
        for node in self.ring.devs:
            if node and is_local_device(ips, self.port,
                                        node['replication_ip'],
                                        node['replication_port']):
                if self.mount_check and not ismount(
                        os.path.join(self.root, node['device'])):
                    self.logger.warn(
                        _('Skipping %(device)s as it is not mounted') % node)
                    continue
                unlink_older_than(
                    os.path.join(self.root, node['device'], 'tmp'),
                    time.time() - self.reclaim_age)
                datadir = os.path.join(self.root, node['device'], self.datadir)
                if os.path.isdir(datadir):
                    self._local_device_ids.add(node['id'])
                    dirs.append((datadir, node['id']))
        self.logger.info(_('Beginning replication run'))
        for part, object_file, node_id in roundrobin_datadirs(dirs):
            self.cpool.spawn_n(
                self._replicate_object, part, object_file, node_id)
        self.cpool.waitall()
        self.logger.info(_('Replication run OVER'))
        self._report_stats()

    def run_forever(self, *args, **kwargs):
        """
        Replicate dbs under the given root in an infinite loop.
        """
        sleep(random.random() * self.interval)
        while True:
            begin = time.time()
            try:
                self.run_once()
            except (Exception, Timeout):
                self.logger.exception(_('ERROR trying to replicate'))
            elapsed = time.time() - begin
            if elapsed < self.interval:
                sleep(self.interval - elapsed)
예제 #16
0
class btclient(Thread):
    def __init__(self, infohash_queue):
        Thread.__init__(self)
        self.setDaemon(True)
        self.infohash_queue = infohash_queue
        self.metadata_queue = Queue()
        self.dowloaded = set()
        self.pool = GreenPool()
        self.running = False

    def run(self):
        self.running = True
        while self.running:
            if self.infohash_queue.empty():
                sleep(3)
            else:
                infohash, address = self.infohash_queue.get()
                self.pool.spawn_n(self.download_metadata, address, infohash,
                                  self.metadata_queue)

    def stop(self):
        self.running = False

    def metadata_queue(self):
        return self.metadata_queue

    def download_metadata(self, address, infohash, metadata_queue, timeout=5):
        metadata = []
        start_time = time()
        if infohash in self.dowloaded:
            return
        try:
            the_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            # the_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
            # the_socket.bind(('0.0.0.0', 9000))
            the_socket.settimeout(timeout)
            the_socket.connect(address)

            # handshake
            send_handshake(the_socket, infohash)
            packet = the_socket.recv(4096)

            # handshake error
            if not check_handshake(packet, infohash):
                return

            # ext handshake
            send_ext_handshake(the_socket)
            packet = the_socket.recv(4096)

            # get ut_metadata and metadata_size
            ut_metadata, metadata_size = get_ut_metadata(
                packet), get_metadata_size(packet)

            # request each piece of metadata
            for piece in range(int(math.ceil(metadata_size / (16.0 * 1024)))):
                if infohash in self.dowloaded:
                    break
                request_metadata(the_socket, ut_metadata, piece)
                packet = recvall(the_socket,
                                 timeout)  # the_socket.recv(1024*17)
                metadata.append(packet[packet.index("ee") + 2:])
                if '6:pieces' in packet:
                    break

        except socket.timeout:
            logger.debug('Connect timeout to %s:%d' % address)
            # TODO: Maybe need NAT Traversa
        except socket.error as error:
            errno, err_msg = error
            if errno == 10052:
                logger.debug(
                    'Network dropped connection on reset(10052) %s:%d' %
                    address)
            elif errno == 10061:
                logger.debug('Connection refused(10061) %s:%d' % address)
            else:
                logger.error(err_msg)
        except Exception:
            pass
        finally:
            the_socket.close()
            metadata = "".join(metadata)
            if metadata.startswith('d') and '6:pieces' in metadata:
                metadata = metadata[:metadata.index('6:pieces')] + 'e'
                try:
                    d_metadata = bdecode(metadata)
                except Exception as e:
                    logger.error(str(e) + 'metadata: ' + metadata)
                else:
                    self.dowloaded.add(infohash)
                    metadata_queue.put(
                        (infohash, address, d_metadata, time() - start_time))
예제 #17
0
    def test_connection(self):
        """
        conn = Connection(
            auth_endpoint="https://identity.api.rackspacecloud.com/v2.0",
            client_id=str(uuid.uuid4()),
            endpoint="http://localhost:8888/v1/12345",
            user="", key="")

        """

        conn = Connection(
            auth_endpoint="https://identity.api.rackspacecloud.com/v2.0",
            client_id=str(uuid.uuid4()),
            endpoint="http://166.78.143.130/v1/12345",
            user="",
            key="")

        conn.connect(token='blah')

        def create_worker(queue_name):
            return conn.create_queue(queue_name)

        def post_worker(queue):
            return queue.post_message('test_message', 10)

        def delete_worker(queue_name):
            conn.delete_queue(queue_name)
            return queue_name

        pool = GreenPool(100)

        def on_message_posted(greenthread):
            msg = greenthread.wait()
            print msg._href

        def on_queue_created(greenthread):
            queue = greenthread.wait()
            print queue.name

            for x in range(0, 10):
                gt = pool.spawn(post_worker, queue)
                gt.link(on_message_posted)

        queue_names = ["queue-" + str(x) for x in xrange(0, 5)]

        for queue_name in queue_names:
            gt = pool.spawn(create_worker, queue_name)
            gt.link(on_queue_created)

        pool.waitall()

        def delete_worker(queue_name):
            conn.delete_queue(queue_name)
            print "Queue:", queue_name, " deleted"

        for queue in conn.get_queues():
            gt = pool.spawn_n(delete_worker, queue.name)

        print "Waiting for everything to finish"
        pool.waitall()
        print "Done"
예제 #18
0
파일: agent.py 프로젝트: ldenel/oio-sds
    def run(self, *args, **kwargs):
        try:
            self.logger.info('event agent: starting')

            pool = GreenPool(len(self.workers))

            for worker in self.workers:
                pool.spawn(worker.start)

            def front(server, backend):
                while True:
                    msg = server.recv_multipart()
                    if validate_msg(msg):
                        try:
                            event_id = sqlite3.Binary(msg[2])
                            data = msg[3]
                            self.queue.put(event_id, data)
                            event = ['', msg[2], msg[3]]
                            backend.send_multipart(event)
                        except Exception:
                            pass
                        finally:
                            ack = msg[0:3]
                            server.send_multipart(ack)

            def back(backend):
                while True:
                    msg = backend.recv_multipart()
                    event_id = msg[1]
                    success = msg[2]
                    event_id = sqlite3.Binary(event_id)
                    if not success:
                        self.queue.failed(event_id)
                    else:
                        self.queue.delete(event_id)

            boss_pool = GreenPool(2)
            boss_pool.spawn_n(front, self.server, self.backend)
            boss_pool.spawn_n(back, self.backend)
            while True:
                sleep(1)

                now = time.time()
                if now - self.last_retry > self.retry_interval:
                    self.retry()
                    self.last_retry = now

                for w in self.workers:
                    if w.failed:
                        self.workers.remove(w)
                        self.logger.warn('restart worker "%s"', w.name)
                        new_w = EventWorker(self.conf, w.name, self.context)
                        self.workers.append(new_w)
                        pool.spawn(new_w.start)

        except Exception as e:
            self.logger.error('ERROR in main loop %s', e)
            raise e
        finally:
            self.logger.warn('event agent: stopping')
            self.stop_workers()
예제 #19
0
class Replicator(Daemon):
    """
    Implements the logic for directing db replication.
    """

    def __init__(self, conf, logger=None):
        self.conf = conf
        self.logger = logger or get_logger(conf, log_route='replicator')
        self.root = conf.get('devices', '/srv/node')
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        self.bind_ip = conf.get('bind_ip', '0.0.0.0')
        self.port = int(conf.get('bind_port', self.default_port))
        concurrency = int(conf.get('concurrency', 8))
        self.cpool = GreenPool(size=concurrency)
        swift_dir = conf.get('swift_dir', '/etc/swift')
        self.ring = ring.Ring(swift_dir, ring_name=self.server_type)
        self._local_device_ids = set()
        self.per_diff = int(conf.get('per_diff', 1000))
        self.max_diffs = int(conf.get('max_diffs') or 100)
        self.interval = int(conf.get('interval') or
                            conf.get('run_pause') or 30)
        if 'run_pause' in conf and 'interval' not in conf:
            self.logger.warning('Option %(type)s-replicator/run_pause '
                                'is deprecated and will be removed in a '
                                'future version. Update your configuration'
                                ' to use option %(type)s-replicator/'
                                'interval.'
                                % {'type': self.server_type})
        self.databases_per_second = int(
            conf.get('databases_per_second', 50))
        self.node_timeout = float(conf.get('node_timeout', 10))
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.rsync_compress = config_true_value(
            conf.get('rsync_compress', 'no'))
        self.rsync_module = conf.get('rsync_module', '').rstrip('/')
        if not self.rsync_module:
            self.rsync_module = '{replication_ip}::%s' % self.server_type
        self.reclaim_age = float(conf.get('reclaim_age', 86400 * 7))
        swift.common.db.DB_PREALLOCATION = \
            config_true_value(conf.get('db_preallocation', 'f'))
        self._zero_stats()
        self.recon_cache_path = conf.get('recon_cache_path',
                                         '/var/cache/swift')
        self.recon_replicator = '%s.recon' % self.server_type
        self.rcache = os.path.join(self.recon_cache_path,
                                   self.recon_replicator)
        self.extract_device_re = re.compile('%s%s([^%s]+)' % (
            self.root, os.path.sep, os.path.sep))
        self.handoffs_only = config_true_value(conf.get('handoffs_only', 'no'))

    def _zero_stats(self):
        """Zero out the stats."""
        self.stats = {'attempted': 0, 'success': 0, 'failure': 0, 'ts_repl': 0,
                      'no_change': 0, 'hashmatch': 0, 'rsync': 0, 'diff': 0,
                      'remove': 0, 'empty': 0, 'remote_merge': 0,
                      'start': time.time(), 'diff_capped': 0, 'deferred': 0,
                      'failure_nodes': {}}

    def _report_stats(self):
        """Report the current stats to the logs."""
        now = time.time()
        self.logger.info(
            _('Attempted to replicate %(count)d dbs in %(time).5f seconds '
              '(%(rate).5f/s)'),
            {'count': self.stats['attempted'],
             'time': now - self.stats['start'],
             'rate': self.stats['attempted'] /
                (now - self.stats['start'] + 0.0000001)})
        self.logger.info(_('Removed %(remove)d dbs') % self.stats)
        self.logger.info(_('%(success)s successes, %(failure)s failures')
                         % self.stats)
        dump_recon_cache(
            {'replication_stats': self.stats,
             'replication_time': now - self.stats['start'],
             'replication_last': now},
            self.rcache, self.logger)
        self.logger.info(' '.join(['%s:%s' % item for item in
                         sorted(self.stats.items()) if item[0] in
                         ('no_change', 'hashmatch', 'rsync', 'diff', 'ts_repl',
                          'empty', 'diff_capped', 'remote_merge')]))

    def _add_failure_stats(self, failure_devs_info):
        for node, dev in failure_devs_info:
            self.stats['failure'] += 1
            failure_devs = self.stats['failure_nodes'].setdefault(node, {})
            failure_devs.setdefault(dev, 0)
            failure_devs[dev] += 1

    def _rsync_file(self, db_file, remote_file, whole_file=True,
                    different_region=False):
        """
        Sync a single file using rsync. Used by _rsync_db to handle syncing.

        :param db_file: file to be synced
        :param remote_file: remote location to sync the DB file to
        :param whole-file: if True, uses rsync's --whole-file flag
        :param different_region: if True, the destination node is in a
                                 different region

        :returns: True if the sync was successful, False otherwise
        """
        popen_args = ['rsync', '--quiet', '--no-motd',
                      '--timeout=%s' % int(math.ceil(self.node_timeout)),
                      '--contimeout=%s' % int(math.ceil(self.conn_timeout))]
        if whole_file:
            popen_args.append('--whole-file')

        if self.rsync_compress and different_region:
            # Allow for compression, but only if the remote node is in
            # a different region than the local one.
            popen_args.append('--compress')

        popen_args.extend([db_file, remote_file])
        proc = subprocess.Popen(popen_args)
        proc.communicate()
        if proc.returncode != 0:
            self.logger.error(_('ERROR rsync failed with %(code)s: %(args)s'),
                              {'code': proc.returncode, 'args': popen_args})
        return proc.returncode == 0

    def _rsync_db(self, broker, device, http, local_id,
                  replicate_method='complete_rsync', replicate_timeout=None,
                  different_region=False):
        """
        Sync a whole db using rsync.

        :param broker: DB broker object of DB to be synced
        :param device: device to sync to
        :param http: ReplConnection object
        :param local_id: unique ID of the local database replica
        :param replicate_method: remote operation to perform after rsync
        :param replicate_timeout: timeout to wait in seconds
        :param different_region: if True, the destination node is in a
                                 different region
        """
        rsync_module = rsync_module_interpolation(self.rsync_module, device)
        rsync_path = '%s/tmp/%s' % (device['device'], local_id)
        remote_file = '%s/%s' % (rsync_module, rsync_path)
        mtime = os.path.getmtime(broker.db_file)
        if not self._rsync_file(broker.db_file, remote_file,
                                different_region=different_region):
            return False
        # perform block-level sync if the db was modified during the first sync
        if os.path.exists(broker.db_file + '-journal') or \
                os.path.getmtime(broker.db_file) > mtime:
            # grab a lock so nobody else can modify it
            with broker.lock():
                if not self._rsync_file(broker.db_file, remote_file,
                                        whole_file=False,
                                        different_region=different_region):
                    return False
        with Timeout(replicate_timeout or self.node_timeout):
            response = http.replicate(replicate_method, local_id,
                                      os.path.basename(broker.db_file))
        return response and 200 <= response.status < 300

    def _send_replicate_request(self, http, *repl_args):
        with Timeout(self.node_timeout):
            response = http.replicate(*repl_args)
        if not response or not is_success(response.status):
            if response:
                self.logger.error('ERROR Bad response %s from %s',
                                  response.status, http.host)
            return False
        return True

    def _usync_db(self, point, broker, http, remote_id, local_id):
        """
        Sync a db by sending all records since the last sync.

        :param point: synchronization high water mark between the replicas
        :param broker: database broker object
        :param http: ReplConnection object for the remote server
        :param remote_id: database id for the remote replica
        :param local_id: database id for the local replica

        :returns: boolean indicating completion and success
        """
        self.stats['diff'] += 1
        self.logger.increment('diffs')
        self.logger.debug('%s usyncing chunks to %s, starting at row %s',
                          broker.db_file,
                          '%(ip)s:%(port)s/%(device)s' % http.node,
                          point)
        start = time.time()
        sync_table = broker.get_syncs()
        objects = broker.get_items_since(point, self.per_diff)
        diffs = 0
        while len(objects) and diffs < self.max_diffs:
            diffs += 1
            if not self._send_replicate_request(
                    http, 'merge_items', objects, local_id):
                return False
            # replication relies on db order to send the next merge batch in
            # order with no gaps
            point = objects[-1]['ROWID']
            objects = broker.get_items_since(point, self.per_diff)

        self.logger.debug('%s usyncing chunks to %s, finished at row %s (%gs)',
                          broker.db_file,
                          '%(ip)s:%(port)s/%(device)s' % http.node,
                          point, time.time() - start)

        if objects:
            self.logger.debug(
                'Synchronization for %s has fallen more than '
                '%s rows behind; moving on and will try again next pass.',
                broker, self.max_diffs * self.per_diff)
            self.stats['diff_capped'] += 1
            self.logger.increment('diff_caps')
        else:
            with Timeout(self.node_timeout):
                response = http.replicate('merge_syncs', sync_table)
            if response and 200 <= response.status < 300:
                broker.merge_syncs([{'remote_id': remote_id,
                                     'sync_point': point}],
                                   incoming=False)
                return True
        return False

    def _in_sync(self, rinfo, info, broker, local_sync):
        """
        Determine whether or not two replicas of a databases are considered
        to be in sync.

        :param rinfo: remote database info
        :param info: local database info
        :param broker: database broker object
        :param local_sync: cached last sync point between replicas

        :returns: boolean indicating whether or not the replicas are in sync
        """
        if max(rinfo['point'], local_sync) >= info['max_row']:
            self.stats['no_change'] += 1
            self.logger.increment('no_changes')
            return True
        if rinfo['hash'] == info['hash']:
            self.stats['hashmatch'] += 1
            self.logger.increment('hashmatches')
            broker.merge_syncs([{'remote_id': rinfo['id'],
                                 'sync_point': rinfo['point']}],
                               incoming=False)
            return True

    def _http_connect(self, node, partition, db_file):
        """
        Make an http_connection using ReplConnection

        :param node: node dictionary from the ring
        :param partition: partition partition to send in the url
        :param db_file: DB file

        :returns: ReplConnection object
        """
        hsh, other, ext = parse_db_filename(db_file)
        return ReplConnection(node, partition, hsh, self.logger)

    def _gather_sync_args(self, info):
        """
        Convert local replication_info to sync args tuple.
        """
        sync_args_order = ('max_row', 'hash', 'id', 'created_at',
                           'put_timestamp', 'delete_timestamp', 'metadata')
        return tuple(info[key] for key in sync_args_order)

    def _repl_to_node(self, node, broker, partition, info,
                      different_region=False):
        """
        Replicate a database to a node.

        :param node: node dictionary from the ring to be replicated to
        :param broker: DB broker for the DB to be replication
        :param partition: partition on the node to replicate to
        :param info: DB info as a dictionary of {'max_row', 'hash', 'id',
                     'created_at', 'put_timestamp', 'delete_timestamp',
                     'metadata'}
        :param different_region: if True, the destination node is in a
                                 different region

        :returns: True if successful, False otherwise
        """
        http = self._http_connect(node, partition, broker.db_file)
        sync_args = self._gather_sync_args(info)
        with Timeout(self.node_timeout):
            response = http.replicate('sync', *sync_args)
        if not response:
            return False
        return self._handle_sync_response(node, response, info, broker, http,
                                          different_region=different_region)

    def _handle_sync_response(self, node, response, info, broker, http,
                              different_region=False):
        if response.status == HTTP_NOT_FOUND:  # completely missing, rsync
            self.stats['rsync'] += 1
            self.logger.increment('rsyncs')
            return self._rsync_db(broker, node, http, info['id'],
                                  different_region=different_region)
        elif response.status == HTTP_INSUFFICIENT_STORAGE:
            raise DriveNotMounted()
        elif 200 <= response.status < 300:
            rinfo = json.loads(response.data)
            local_sync = broker.get_sync(rinfo['id'], incoming=False)
            if rinfo.get('metadata', ''):
                broker.update_metadata(json.loads(rinfo['metadata']))
            return self._choose_replication_mode(
                node, rinfo, info, local_sync, broker, http,
                different_region)
        return False

    def _choose_replication_mode(self, node, rinfo, info, local_sync, broker,
                                 http, different_region):
        if self._in_sync(rinfo, info, broker, local_sync):
            self.logger.debug('%s in sync with %s, nothing to do',
                              broker.db_file,
                              '%(ip)s:%(port)s/%(device)s' % node)
            return True

        # if the difference in rowids between the two differs by
        # more than 50% and the difference is greater than per_diff,
        # rsync then do a remote merge.
        # NOTE: difference > per_diff stops us from dropping to rsync
        # on smaller containers, who have only a few rows to sync.
        if (rinfo['max_row'] / float(info['max_row']) < 0.5 and
                info['max_row'] - rinfo['max_row'] > self.per_diff):
            self.stats['remote_merge'] += 1
            self.logger.increment('remote_merges')
            return self._rsync_db(broker, node, http, info['id'],
                                  replicate_method='rsync_then_merge',
                                  replicate_timeout=(info['count'] / 2000),
                                  different_region=different_region)
        # else send diffs over to the remote server
        return self._usync_db(max(rinfo['point'], local_sync),
                              broker, http, rinfo['id'], info['id'])

    def _post_replicate_hook(self, broker, info, responses):
        """
        :param broker: broker instance for the database that just replicated
        :param info: pre-replication full info dict
        :param responses: a list of bools indicating success from nodes
        """
        pass

    def cleanup_post_replicate(self, broker, orig_info, responses):
        """
        Cleanup non primary database from disk if needed.

        :param broker: the broker for the database we're replicating
        :param orig_info: snapshot of the broker replication info dict taken
            before replication
        :param responses: a list of boolean success values for each replication
                          request to other nodes

        :return success: returns False if deletion of the database was
            attempted but unsuccessful, otherwise returns True.
        """
        log_template = 'Not deleting db %s (%%s)' % broker.db_file
        max_row_delta = broker.get_max_row() - orig_info['max_row']
        if max_row_delta < 0:
            reason = 'negative max_row_delta: %s' % max_row_delta
            self.logger.error(log_template, reason)
            return True
        if max_row_delta:
            reason = '%s new rows' % max_row_delta
            self.logger.debug(log_template, reason)
            return True
        if not (responses and all(responses)):
            reason = '%s/%s success' % (responses.count(True), len(responses))
            self.logger.debug(log_template, reason)
            return True
        # If the db has been successfully synced to all of its peers, it can be
        # removed. Callers should have already checked that the db is not on a
        # primary node.
        if not self.delete_db(broker):
            self.logger.debug(
                'Failed to delete db %s', broker.db_file)
            return False
        self.logger.debug('Successfully deleted db %s', broker.db_file)
        return True

    def _replicate_object(self, partition, object_file, node_id):
        """
        Replicate the db, choosing method based on whether or not it
        already exists on peers.

        :param partition: partition to be replicated to
        :param object_file: DB file name to be replicated
        :param node_id: node id of the node to be replicated to
        :returns: a tuple (success, responses). ``success`` is a boolean that
            is True if the method completed successfully, False otherwise.
            ``responses`` is a list of booleans each of which indicates the
            success or not of replicating to a peer node if replication has
            been attempted. ``success`` is False if any of ``responses`` is
            False; when ``responses`` is empty, ``success`` may be either True
            or False.
        """
        start_time = now = time.time()
        self.logger.debug('Replicating db %s', object_file)
        self.stats['attempted'] += 1
        self.logger.increment('attempts')
        shouldbehere = True
        responses = []
        try:
            broker = self.brokerclass(object_file, pending_timeout=30)
            broker.reclaim(now - self.reclaim_age,
                           now - (self.reclaim_age * 2))
            info = broker.get_replication_info()
            bpart = self.ring.get_part(
                info['account'], info.get('container'))
            if bpart != int(partition):
                partition = bpart
                # Important to set this false here since the later check only
                # checks if it's on the proper device, not partition.
                shouldbehere = False
                name = '/' + quote(info['account'])
                if 'container' in info:
                    name += '/' + quote(info['container'])
                self.logger.error(
                    'Found %s for %s when it should be on partition %s; will '
                    'replicate out and remove.' % (object_file, name, bpart))
        except (Exception, Timeout) as e:
            if 'no such table' in str(e):
                self.logger.error(_('Quarantining DB %s'), object_file)
                quarantine_db(broker.db_file, broker.db_type)
            else:
                self.logger.exception(_('ERROR reading db %s'), object_file)
            nodes = self.ring.get_part_nodes(int(partition))
            self._add_failure_stats([(failure_dev['replication_ip'],
                                      failure_dev['device'])
                                     for failure_dev in nodes])
            self.logger.increment('failures')
            return False, responses
        if broker.is_reclaimable(now, self.reclaim_age):
            if self.report_up_to_date(info):
                self.delete_db(broker)
            self.logger.timing_since('timing', start_time)
            return True, responses
        failure_devs_info = set()
        nodes = self.ring.get_part_nodes(int(partition))
        local_dev = None
        for node in nodes:
            if node['id'] == node_id:
                local_dev = node
                break
        if shouldbehere:
            shouldbehere = bool([n for n in nodes if n['id'] == node_id])
        # See Footnote [1] for an explanation of the repl_nodes assignment.
        if len(nodes) > 1:
            i = 0
            while i < len(nodes) and nodes[i]['id'] != node_id:
                i += 1
            repl_nodes = nodes[i + 1:] + nodes[:i]
        else:  # Special case if using only a single replica
            repl_nodes = nodes
        more_nodes = self.ring.get_more_nodes(int(partition))
        if not local_dev:
            # Check further if local device is a handoff node
            for node in self.ring.get_more_nodes(int(partition)):
                if node['id'] == node_id:
                    local_dev = node
                    break
        for node in repl_nodes:
            different_region = False
            if local_dev and local_dev['region'] != node['region']:
                # This additional information will help later if we
                # want to handle syncing to a node in different
                # region with some optimizations.
                different_region = True
            success = False
            try:
                success = self._repl_to_node(node, broker, partition, info,
                                             different_region)
            except DriveNotMounted:
                try:
                    repl_nodes.append(next(more_nodes))
                except StopIteration:
                    self.logger.error(
                        _('ERROR There are not enough handoff nodes to reach '
                          'replica count for partition %s'),
                        partition)
                self.logger.error(_('ERROR Remote drive not mounted %s'), node)
            except (Exception, Timeout):
                self.logger.exception(_('ERROR syncing %(file)s with node'
                                        ' %(node)s'),
                                      {'file': object_file, 'node': node})
            if not success:
                failure_devs_info.add((node['replication_ip'], node['device']))
            self.logger.increment('successes' if success else 'failures')
            responses.append(success)
        try:
            self._post_replicate_hook(broker, info, responses)
        except (Exception, Timeout):
            self.logger.exception('UNHANDLED EXCEPTION: in post replicate '
                                  'hook for %s', broker.db_file)
        if not shouldbehere:
            if not self.cleanup_post_replicate(broker, info, responses):
                failure_devs_info.update(
                    [(failure_dev['replication_ip'], failure_dev['device'])
                     for failure_dev in repl_nodes])
        target_devs_info = set([(target_dev['replication_ip'],
                                 target_dev['device'])
                                for target_dev in repl_nodes])
        self.stats['success'] += len(target_devs_info - failure_devs_info)
        self._add_failure_stats(failure_devs_info)

        self.logger.timing_since('timing', start_time)
        if shouldbehere:
            responses.append(True)
        return all(responses), responses

    def delete_db(self, broker):
        object_file = broker.db_file
        hash_dir = os.path.dirname(object_file)
        suf_dir = os.path.dirname(hash_dir)
        with lock_parent_directory(object_file):
            shutil.rmtree(hash_dir, True)
        try:
            os.rmdir(suf_dir)
        except OSError as err:
            if err.errno not in (errno.ENOENT, errno.ENOTEMPTY):
                self.logger.exception(
                    _('ERROR while trying to clean up %s') % suf_dir)
                return False
        self.stats['remove'] += 1
        device_name = self.extract_device(object_file)
        self.logger.increment('removes.' + device_name)
        return True

    def extract_device(self, object_file):
        """
        Extract the device name from an object path.  Returns "UNKNOWN" if the
        path could not be extracted successfully for some reason.

        :param object_file: the path to a database file.
        """
        match = self.extract_device_re.match(object_file)
        if match:
            return match.groups()[0]
        return "UNKNOWN"

    def _partition_dir_filter(self, device_id, partitions_to_replicate):

        def filt(partition_dir):
            partition = int(partition_dir)
            if self.handoffs_only:
                primary_node_ids = [
                    d['id'] for d in self.ring.get_part_nodes(partition)]
                if device_id in primary_node_ids:
                    return False

            if partition not in partitions_to_replicate:
                return False

            return True

        return filt

    def report_up_to_date(self, full_info):
        return True

    def roundrobin_datadirs(self, dirs):
        return RateLimitedIterator(
            roundrobin_datadirs(dirs),
            elements_per_second=self.databases_per_second)

    def run_once(self, *args, **kwargs):
        """Run a replication pass once."""
        override_options = parse_override_options(once=True, **kwargs)

        devices_to_replicate = override_options.devices or Everything()
        partitions_to_replicate = override_options.partitions or Everything()

        self._zero_stats()
        dirs = []
        ips = whataremyips(self.bind_ip)
        if not ips:
            self.logger.error(_('ERROR Failed to get my own IPs?'))
            return

        if self.handoffs_only:
            self.logger.warning(
                'Starting replication pass with handoffs_only enabled. '
                'This mode is not intended for normal '
                'operation; use handoffs_only with care.')

        self._local_device_ids = set()
        found_local = False
        for node in self.ring.devs:
            if node and is_local_device(ips, self.port,
                                        node['replication_ip'],
                                        node['replication_port']):
                found_local = True
                try:
                    dev_path = check_drive(self.root, node['device'],
                                           self.mount_check)
                except ValueError as err:
                    self._add_failure_stats(
                        [(failure_dev['replication_ip'],
                          failure_dev['device'])
                         for failure_dev in self.ring.devs if failure_dev])
                    self.logger.warning('Skipping: %s', err)
                    continue
                if node['device'] not in devices_to_replicate:
                    self.logger.debug(
                        'Skipping device %s due to given arguments',
                        node['device'])
                    continue
                unlink_older_than(
                    os.path.join(dev_path, 'tmp'),
                    time.time() - self.reclaim_age)
                datadir = os.path.join(self.root, node['device'], self.datadir)
                if os.path.isdir(datadir):
                    self._local_device_ids.add(node['id'])
                    part_filt = self._partition_dir_filter(
                        node['id'], partitions_to_replicate)
                    dirs.append((datadir, node['id'], part_filt))
        if not found_local:
            self.logger.error("Can't find itself %s with port %s in ring "
                              "file, not replicating",
                              ", ".join(ips), self.port)
        self.logger.info(_('Beginning replication run'))
        for part, object_file, node_id in self.roundrobin_datadirs(dirs):
            self.cpool.spawn_n(
                self._replicate_object, part, object_file, node_id)
        self.cpool.waitall()
        self.logger.info(_('Replication run OVER'))
        if self.handoffs_only:
            self.logger.warning(
                'Finished replication pass with handoffs_only enabled. '
                'If handoffs_only is no longer required, disable it.')
        self._report_stats()

    def run_forever(self, *args, **kwargs):
        """
        Replicate dbs under the given root in an infinite loop.
        """
        sleep(random.random() * self.interval)
        while True:
            begin = time.time()
            try:
                self.run_once()
            except (Exception, Timeout):
                self.logger.exception(_('ERROR trying to replicate'))
            elapsed = time.time() - begin
            if elapsed < self.interval:
                sleep(self.interval - elapsed)
예제 #20
0
            order = Order(id=nextid,
                          exchange=d['exchange'],
                          ticker=d['ticker'],
                          price=oprice,
                          volume=d['volume'],
                          type=d['action'])
            ORDERS[nextid] = order
            logger.info("ORDER: %s", order)
            resp = {
                'order_id': nextid,
                'price': oprice,
                'profit': profit,
                'retcode': 0
            }
            nextid += 1
        # Unknown action otherwise
        socket.send(bytes(json.dumps(resp), 'utf-8'))


# Spawn green threads
logging.basicConfig(level=logging.INFO)
pool = GreenPool()
try:
    pool.spawn_n(handle_tick)
    pool.spawn_n(handle_broker)
    pool.waitall()  # Loops forever
finally:
    # There might some orphan orders left over
    print("ORPHANS:", ORDERS)
예제 #21
0
파일: xprofile.py 프로젝트: Prosunjit/Swift
class ProfileMiddleware(object):
    def __init__(self, app, conf):
        self.app = app
        self.logger = get_logger(conf, log_route='profile')
        self.log_filename_prefix = conf.get('log_filename_prefix',
                                            DEFAULT_PROFILE_PREFIX)
        dirname = os.path.dirname(self.log_filename_prefix)
        # Notes: this effort may fail due to permission denied.
        # it is better to be created and authorized to current
        # user in advance.
        if not os.path.exists(dirname):
            os.makedirs(dirname)
        self.dump_interval = float(conf.get('dump_interval', 5.0))
        self.dump_timestamp = config_true_value(
            conf.get('dump_timestamp', 'no'))
        self.flush_at_shutdown = config_true_value(
            conf.get('flush_at_shutdown', 'no'))
        self.path = conf.get('path', '__profile__').replace('/', '')
        self.unwind = config_true_value(conf.get('unwind', 'no'))
        self.profile_module = conf.get('profile_module',
                                       'eventlet.green.profile')
        self.profiler = get_profiler(self.profile_module)
        self.profile_log = ProfileLog(self.log_filename_prefix,
                                      self.dump_timestamp)
        self.viewer = HTMLViewer(self.path, self.profile_module,
                                 self.profile_log)
        self.dump_pool = GreenPool(1000)
        self.last_dump_at = None

    def __del__(self):
        if self.flush_at_shutdown:
            self.profile_log.clear(str(os.getpid()))

    def _combine_body_qs(self, request):
        wsgi_input = request.environ['wsgi.input']
        query_dict = request.params
        qs_in_body = wsgi_input.read()
        query_dict.update(
            parse_qs(qs_in_body, keep_blank_values=True, strict_parsing=False))
        return query_dict

    def dump_checkpoint(self):
        current_time = time.time()
        if self.last_dump_at is None or self.last_dump_at +\
                self.dump_interval < current_time:
            self.dump_pool.spawn_n(self.profile_log.dump_profile,
                                   self.profiler, os.getpid())
            self.last_dump_at = current_time

    def __call__(self, environ, start_response):
        request = Request(environ)
        path_entry = request.path_info.split('/')
        # hijack favicon request sent by browser so that it doesn't
        # invoke profiling hook and contaminate the data.
        if path_entry[1] == 'favicon.ico':
            start_response('200 OK', [])
            return ''
        elif path_entry[1] == self.path:
            try:
                self.dump_checkpoint()
                query_dict = self._combine_body_qs(request)
                content, headers = self.viewer.render(request.url,
                                                      request.method,
                                                      path_entry, query_dict,
                                                      self.renew_profile)
                start_response('200 OK', headers)
                return [bytes_(content)]
            except MethodNotAllowed as mx:
                start_response('405 Method Not Allowed', [])
                return '%s' % mx
            except NotFoundException as nx:
                start_response('404 Not Found', [])
                return '%s' % nx
            except ProfileException as pf:
                start_response('500 Internal Server Error', [])
                return '%s' % pf
            except Exception as ex:
                start_response('500 Internal Server Error', [])
                return _('Error on render profiling results: %s') % ex
        else:
            try:
                _locals = locals()
                code = self.unwind and PROFILE_EXEC_EAGER or\
                    PROFILE_EXEC_LAZY
                self.profiler.runctx(code, globals(), _locals)
                app_iter = _locals['app_iter_']
                self.dump_checkpoint()
                return app_iter
            except:
                self.logger.exception(_('Error profiling code'))
            finally:
                pass

    def renew_profile(self):
        self.profiler = get_profiler(self.profile_module)
예제 #22
0
파일: worker.py 프로젝트: BryanSD/letter-s
                if len(messages) > 0:
                    last_message = messages[-1]
                    ttl = int(last_message['body']['ttl'])
                    reset_count = int(last_message['body']['work_item_count'])
                    if reset_count >= 0:
                        work_item_count = reset_count

                    print 'New control message: ttl=%d, work_item_count=%d' % \
                          (ttl, reset_count)
            except Exception as ex:
                print ex

            eventlet.sleep(1)

    pool.spawn_n(get_worker_information)

    def post_stats():
        global work_item_count

        s = socket.socket()
        s.connect((sys.argv[2], int(sys.argv[3])))

        while True:
            start_time = time.time()

            graphite_message = 'openstack.worker.result.sum %d %d\n' % (
                work_item_count, int(time.time()))
            s.sendall(graphite_message)

            elapsed_time = time.time() - start_time
예제 #23
0
        global rate, ttl
        queue = conn.create_queue('openstack-producer-controller')

        while True:
            messages = list(queue.get_messages(restart=True))

            if len(messages) > 0:
                last_message = messages[-1]
                rate = last_message['body']['rate']
                ttl = last_message['body']['ttl']
            else:
                rate, ttl = 10, 60

            eventlet.sleep(1)

    pool.spawn_n(get_production_information)

    def post_work():
        global rate, ttl, messages_created

        queue = conn.create_queue('openstack-tasks')
        job_types = {0: 'prime', 1: 'fibonacci'}

        while True:
            if rate == 0:
                eventlet.sleep(1)
                continue
            else:
                eventlet.sleep(1.0 / rate)

            job_type = random.randint(0, 1)
예제 #24
0
class Concurrency(object):
    """
    Convenience class to support concurrency, if Eventlet is
    available; otherwise it just performs at single concurrency.

    :param concurrency: The level of concurrency desired. Default: 10
    """

    def __init__(self, concurrency=10):
        self.concurrency = concurrency
        if self.concurrency and GreenPool:
            self._pool = GreenPool(self.concurrency)
        else:
            self._pool = None
        self._queue = Queue.Queue()
        self._results = {}

    def _spawner(self, ident, func, *args, **kwargs):
        exc_type = exc_value = exc_tb = result = None
        try:
            result = func(*args, **kwargs)
        except (Exception, Timeout):
            exc_type, exc_value, exc_tb = sys.exc_info()
        self._queue.put((ident, (exc_type, exc_value, exc_tb, result)))

    def spawn(self, ident, func, *args, **kwargs):
        """
        Returns immediately to the caller and begins executing the
        func in the background. Use get_results and the ident given
        to retrieve the results of the func. If the func causes an
        exception, this exception will be caught and the
        sys.exc_info() will be returned via get_results.

        :param ident: An identifier to find the results of the func
            from get_results. This identifier can be anything unique
            to the Concurrency instance.
        :param func: The function to execute concurrently.
        :param args: The args to give the func.
        :param kwargs: The keyword args to the give the func.
        :returns: None
        """
        if self._pool:
            self._pool.spawn_n(self._spawner, ident, func, *args, **kwargs)
            sleep()
        else:
            self._spawner(ident, func, *args, **kwargs)

    def get_results(self):
        """
        Returns a dict of the results currently available. The keys
        are the ident values given with the calls to spawn. The
        values are tuples of (exc_type, exc_value, exc_tb, result)
        where:

        =========  ============================================
        exc_type   The type of any exception raised.
        exc_value  The actual exception if any was raised.
        exc_tb     The traceback if any exception was raised.
        result     If no exception was raised, this will be the
                   return value of the called function.
        =========  ============================================
        """
        try:
            while True:
                ident, value = self._queue.get(block=False)
                self._results[ident] = value
        except Queue.Empty:
            pass
        return self._results

    def join(self):
        """
        Blocks until all currently pending functions have finished.
        """
        if self._pool:
            self._pool.waitall()
예제 #25
0
파일: agent.py 프로젝트: huayuxian/oio-sds
    def run(self, *args, **kwargs):
        try:
            self.logger.info('event agent: starting')

            pool = GreenPool(len(self.workers))

            for worker in self.workers:
                pool.spawn(worker.start)

            def front(server, backend):
                while True:
                    msg = server.recv_multipart()
                    if validate_msg(msg):
                        try:
                            event_id = msg[2]
                            data = msg[3]
                            self.queue.put(event_id, data)
                            event = ['', msg[2], msg[3]]
                            backend.send_multipart(event)
                        except Exception:
                            pass
                        finally:
                            ack = msg[0:3]
                            server.send_multipart(ack)

            def back(backend):
                while True:
                    msg = backend.recv_multipart()
                    event_id = msg[1]
                    success = msg[2]
                    if not success:
                        self.queue.failed(event_id)
                        self.logger.warn('event %s moved to failed',
                                         binascii.hexlify(event_id))
                    else:
                        self.queue.delete(event_id)
                        self.logger.debug('event %s removed from queue',
                                          binascii.hexlify(event_id))

            boss_pool = GreenPool(2)
            boss_pool.spawn_n(front, self.server, self.backend)
            boss_pool.spawn_n(back, self.backend)
            while True:

                results = self.queue.load(self.batch_size)

                for event in results:
                    event_id, data = event
                    msg = ['', event_id, str(data)]
                    self.backend.send_multipart(msg)
                    self.retries_run_time = ratelimit(
                        self.retries_run_time, self.max_retries_per_second)

                for w in self.workers:
                    if w.failed:
                        self.workers.remove(w)
                        self.logger.warn('restart worker "%s"', w.name)
                        new_w = EventWorker(self.conf, w.name, self.context)
                        self.workers.append(new_w)
                        pool.spawn(new_w.start)

                sleep(SLEEP_TIME)

        except Exception as e:
            self.logger.error('ERROR in main loop %s', e)
            raise
        finally:
            self.logger.warn('event agent: stopping')
            self.stop_workers()

            self.context.destroy(linger=True)
            self.context = None
예제 #26
0
class Replicator(Daemon):
    """
    Implements the logic for directing db replication.
    """
    def __init__(self, conf, logger=None):
        self.conf = conf
        self.logger = logger or get_logger(conf, log_route='replicator')
        self.root = conf.get('devices', '/srv/node')
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        self.bind_ip = conf.get('bind_ip', '0.0.0.0')
        self.port = int(conf.get('bind_port', self.default_port))
        concurrency = int(conf.get('concurrency', 8))
        self.cpool = GreenPool(size=concurrency)
        swift_dir = conf.get('swift_dir', '/etc/swift')
        self.ring = ring.Ring(swift_dir, ring_name=self.server_type)
        self._local_device_ids = set()
        self.per_diff = int(conf.get('per_diff', 1000))
        self.max_diffs = int(conf.get('max_diffs') or 100)
        self.interval = int(
            conf.get('interval') or conf.get('run_pause') or 30)
        self.node_timeout = float(conf.get('node_timeout', 10))
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.rsync_compress = config_true_value(
            conf.get('rsync_compress', 'no'))
        self.rsync_module = conf.get('rsync_module', '').rstrip('/')
        if not self.rsync_module:
            self.rsync_module = '{replication_ip}::%s' % self.server_type
        self.reclaim_age = float(conf.get('reclaim_age', 86400 * 7))
        swift.common.db.DB_PREALLOCATION = \
            config_true_value(conf.get('db_preallocation', 'f'))
        self._zero_stats()
        self.recon_cache_path = conf.get('recon_cache_path',
                                         '/var/cache/swift')
        self.recon_replicator = '%s.recon' % self.server_type
        self.rcache = os.path.join(self.recon_cache_path,
                                   self.recon_replicator)
        self.extract_device_re = re.compile(
            '%s%s([^%s]+)' % (self.root, os.path.sep, os.path.sep))

    def _zero_stats(self):
        """Zero out the stats."""
        self.stats = {
            'attempted': 0,
            'success': 0,
            'failure': 0,
            'ts_repl': 0,
            'no_change': 0,
            'hashmatch': 0,
            'rsync': 0,
            'diff': 0,
            'remove': 0,
            'empty': 0,
            'remote_merge': 0,
            'start': time.time(),
            'diff_capped': 0,
            'failure_nodes': {}
        }

    def _report_stats(self):
        """Report the current stats to the logs."""
        now = time.time()
        self.logger.info(
            _('Attempted to replicate %(count)d dbs in %(time).5f seconds '
              '(%(rate).5f/s)'), {
                  'count':
                  self.stats['attempted'],
                  'time':
                  now - self.stats['start'],
                  'rate':
                  self.stats['attempted'] /
                  (now - self.stats['start'] + 0.0000001)
              })
        self.logger.info(_('Removed %(remove)d dbs') % self.stats)
        self.logger.info(
            _('%(success)s successes, %(failure)s failures') % self.stats)
        dump_recon_cache(
            {
                'replication_stats': self.stats,
                'replication_time': now - self.stats['start'],
                'replication_last': now
            }, self.rcache, self.logger)
        self.logger.info(' '.join([
            '%s:%s' % item for item in sorted(self.stats.items())
            if item[0] in ('no_change', 'hashmatch', 'rsync', 'diff',
                           'ts_repl', 'empty', 'diff_capped', 'remote_merge')
        ]))

    def _add_failure_stats(self, failure_devs_info):
        for node, dev in failure_devs_info:
            self.stats['failure'] += 1
            failure_devs = self.stats['failure_nodes'].setdefault(node, {})
            failure_devs.setdefault(dev, 0)
            failure_devs[dev] += 1

    def _rsync_file(self,
                    db_file,
                    remote_file,
                    whole_file=True,
                    different_region=False):
        """
        Sync a single file using rsync. Used by _rsync_db to handle syncing.

        :param db_file: file to be synced
        :param remote_file: remote location to sync the DB file to
        :param whole-file: if True, uses rsync's --whole-file flag
        :param different_region: if True, the destination node is in a
                                 different region

        :returns: True if the sync was successful, False otherwise
        """
        popen_args = [
            'rsync', '--quiet', '--no-motd',
            '--timeout=%s' % int(math.ceil(self.node_timeout)),
            '--contimeout=%s' % int(math.ceil(self.conn_timeout))
        ]
        if whole_file:
            popen_args.append('--whole-file')

        if self.rsync_compress and different_region:
            # Allow for compression, but only if the remote node is in
            # a different region than the local one.
            popen_args.append('--compress')

        popen_args.extend([db_file, remote_file])
        proc = subprocess.Popen(popen_args)
        proc.communicate()
        if proc.returncode != 0:
            self.logger.error(_('ERROR rsync failed with %(code)s: %(args)s'),
                              {
                                  'code': proc.returncode,
                                  'args': popen_args
                              })
        return proc.returncode == 0

    def _rsync_db(self,
                  broker,
                  device,
                  http,
                  local_id,
                  replicate_method='complete_rsync',
                  replicate_timeout=None,
                  different_region=False):
        """
        Sync a whole db using rsync.

        :param broker: DB broker object of DB to be synced
        :param device: device to sync to
        :param http: ReplConnection object
        :param local_id: unique ID of the local database replica
        :param replicate_method: remote operation to perform after rsync
        :param replicate_timeout: timeout to wait in seconds
        :param different_region: if True, the destination node is in a
                                 different region
        """
        rsync_module = rsync_module_interpolation(self.rsync_module, device)
        rsync_path = '%s/tmp/%s' % (device['device'], local_id)
        remote_file = '%s/%s' % (rsync_module, rsync_path)
        mtime = os.path.getmtime(broker.db_file)
        if not self._rsync_file(broker.db_file,
                                remote_file,
                                different_region=different_region):
            return False
        # perform block-level sync if the db was modified during the first sync
        if os.path.exists(broker.db_file + '-journal') or \
                os.path.getmtime(broker.db_file) > mtime:
            # grab a lock so nobody else can modify it
            with broker.lock():
                if not self._rsync_file(broker.db_file,
                                        remote_file,
                                        whole_file=False,
                                        different_region=different_region):
                    return False
        with Timeout(replicate_timeout or self.node_timeout):
            response = http.replicate(replicate_method, local_id)
        return response and 200 <= response.status < 300

    def _usync_db(self, point, broker, http, remote_id, local_id):
        """
        Sync a db by sending all records since the last sync.

        :param point: synchronization high water mark between the replicas
        :param broker: database broker object
        :param http: ReplConnection object for the remote server
        :param remote_id: database id for the remote replica
        :param local_id: database id for the local replica

        :returns: boolean indicating completion and success
        """
        self.stats['diff'] += 1
        self.logger.increment('diffs')
        self.logger.debug('Syncing chunks with %s, starting at %s', http.host,
                          point)
        sync_table = broker.get_syncs()
        objects = broker.get_items_since(point, self.per_diff)
        diffs = 0
        while len(objects) and diffs < self.max_diffs:
            diffs += 1
            with Timeout(self.node_timeout):
                response = http.replicate('merge_items', objects, local_id)
            if not response or response.status >= 300 or response.status < 200:
                if response:
                    self.logger.error(
                        _('ERROR Bad response %(status)s from '
                          '%(host)s'), {
                              'status': response.status,
                              'host': http.host
                          })
                return False
            # replication relies on db order to send the next merge batch in
            # order with no gaps
            point = objects[-1]['ROWID']
            objects = broker.get_items_since(point, self.per_diff)
        if objects:
            self.logger.debug(
                'Synchronization for %s has fallen more than '
                '%s rows behind; moving on and will try again next pass.',
                broker, self.max_diffs * self.per_diff)
            self.stats['diff_capped'] += 1
            self.logger.increment('diff_caps')
        else:
            with Timeout(self.node_timeout):
                response = http.replicate('merge_syncs', sync_table)
            if response and 200 <= response.status < 300:
                broker.merge_syncs([{
                    'remote_id': remote_id,
                    'sync_point': point
                }],
                                   incoming=False)
                return True
        return False

    def _in_sync(self, rinfo, info, broker, local_sync):
        """
        Determine whether or not two replicas of a databases are considered
        to be in sync.

        :param rinfo: remote database info
        :param info: local database info
        :param broker: database broker object
        :param local_sync: cached last sync point between replicas

        :returns: boolean indicating whether or not the replicas are in sync
        """
        if max(rinfo['point'], local_sync) >= info['max_row']:
            self.stats['no_change'] += 1
            self.logger.increment('no_changes')
            return True
        if rinfo['hash'] == info['hash']:
            self.stats['hashmatch'] += 1
            self.logger.increment('hashmatches')
            broker.merge_syncs([{
                'remote_id': rinfo['id'],
                'sync_point': rinfo['point']
            }],
                               incoming=False)
            return True

    def _http_connect(self, node, partition, db_file):
        """
        Make an http_connection using ReplConnection

        :param node: node dictionary from the ring
        :param partition: partition partition to send in the url
        :param db_file: DB file

        :returns: ReplConnection object
        """
        return ReplConnection(node, partition,
                              os.path.basename(db_file).split('.', 1)[0],
                              self.logger)

    def _gather_sync_args(self, info):
        """
        Convert local replication_info to sync args tuple.
        """
        sync_args_order = ('max_row', 'hash', 'id', 'created_at',
                           'put_timestamp', 'delete_timestamp', 'metadata')
        return tuple(info[key] for key in sync_args_order)

    def _repl_to_node(self,
                      node,
                      broker,
                      partition,
                      info,
                      different_region=False):
        """
        Replicate a database to a node.

        :param node: node dictionary from the ring to be replicated to
        :param broker: DB broker for the DB to be replication
        :param partition: partition on the node to replicate to
        :param info: DB info as a dictionary of {'max_row', 'hash', 'id',
                     'created_at', 'put_timestamp', 'delete_timestamp',
                     'metadata'}
        :param different_region: if True, the destination node is in a
                                 different region

        :returns: True if successful, False otherwise
        """
        http = self._http_connect(node, partition, broker.db_file)
        sync_args = self._gather_sync_args(info)
        with Timeout(self.node_timeout):
            response = http.replicate('sync', *sync_args)
        if not response:
            return False
        return self._handle_sync_response(node,
                                          response,
                                          info,
                                          broker,
                                          http,
                                          different_region=different_region)

    def _handle_sync_response(self,
                              node,
                              response,
                              info,
                              broker,
                              http,
                              different_region=False):
        if response.status == HTTP_NOT_FOUND:  # completely missing, rsync
            self.stats['rsync'] += 1
            self.logger.increment('rsyncs')
            return self._rsync_db(broker,
                                  node,
                                  http,
                                  info['id'],
                                  different_region=different_region)
        elif response.status == HTTP_INSUFFICIENT_STORAGE:
            raise DriveNotMounted()
        elif 200 <= response.status < 300:
            rinfo = json.loads(response.data)
            local_sync = broker.get_sync(rinfo['id'], incoming=False)
            if self._in_sync(rinfo, info, broker, local_sync):
                return True
            # if the difference in rowids between the two differs by
            # more than 50% and the difference is greater than per_diff,
            # rsync then do a remote merge.
            # NOTE: difference > per_diff stops us from dropping to rsync
            # on smaller containers, who have only a few rows to sync.
            if rinfo['max_row'] / float(info['max_row']) < 0.5 and \
                    info['max_row'] - rinfo['max_row'] > self.per_diff:
                self.stats['remote_merge'] += 1
                self.logger.increment('remote_merges')
                return self._rsync_db(broker,
                                      node,
                                      http,
                                      info['id'],
                                      replicate_method='rsync_then_merge',
                                      replicate_timeout=(info['count'] / 2000),
                                      different_region=different_region)
            # else send diffs over to the remote server
            return self._usync_db(max(rinfo['point'], local_sync), broker,
                                  http, rinfo['id'], info['id'])

    def _post_replicate_hook(self, broker, info, responses):
        """
        :param broker: the container that just replicated
        :param info: pre-replication full info dict
        :param responses: a list of bools indicating success from nodes
        """
        pass

    def _replicate_object(self, partition, object_file, node_id):
        """
        Replicate the db, choosing method based on whether or not it
        already exists on peers.

        :param partition: partition to be replicated to
        :param object_file: DB file name to be replicated
        :param node_id: node id of the node to be replicated to
        """
        start_time = now = time.time()
        self.logger.debug('Replicating db %s', object_file)
        self.stats['attempted'] += 1
        self.logger.increment('attempts')
        shouldbehere = True
        try:
            broker = self.brokerclass(object_file, pending_timeout=30)
            broker.reclaim(now - self.reclaim_age,
                           now - (self.reclaim_age * 2))
            info = broker.get_replication_info()
            bpart = self.ring.get_part(info['account'], info.get('container'))
            if bpart != int(partition):
                partition = bpart
                # Important to set this false here since the later check only
                # checks if it's on the proper device, not partition.
                shouldbehere = False
                name = '/' + quote(info['account'])
                if 'container' in info:
                    name += '/' + quote(info['container'])
                self.logger.error(
                    'Found %s for %s when it should be on partition %s; will '
                    'replicate out and remove.' % (object_file, name, bpart))
        except (Exception, Timeout) as e:
            if 'no such table' in str(e):
                self.logger.error(_('Quarantining DB %s'), object_file)
                quarantine_db(broker.db_file, broker.db_type)
            else:
                self.logger.exception(_('ERROR reading db %s'), object_file)
            nodes = self.ring.get_part_nodes(int(partition))
            self._add_failure_stats([(failure_dev['replication_ip'],
                                      failure_dev['device'])
                                     for failure_dev in nodes])
            self.logger.increment('failures')
            return
        # The db is considered deleted if the delete_timestamp value is greater
        # than the put_timestamp, and there are no objects.
        delete_timestamp = Timestamp(info.get('delete_timestamp') or 0)
        put_timestamp = Timestamp(info.get('put_timestamp') or 0)
        if (now - self.reclaim_age) > delete_timestamp > put_timestamp and \
                info['count'] in (None, '', 0, '0'):
            if self.report_up_to_date(info):
                self.delete_db(broker)
            self.logger.timing_since('timing', start_time)
            return
        responses = []
        failure_devs_info = set()
        nodes = self.ring.get_part_nodes(int(partition))
        local_dev = None
        for node in nodes:
            if node['id'] == node_id:
                local_dev = node
                break
        if shouldbehere:
            shouldbehere = bool([n for n in nodes if n['id'] == node_id])
        # See Footnote [1] for an explanation of the repl_nodes assignment.
        if len(nodes) > 1:
            i = 0
            while i < len(nodes) and nodes[i]['id'] != node_id:
                i += 1
            repl_nodes = nodes[i + 1:] + nodes[:i]
        else:  # Special case if using only a single replica
            repl_nodes = nodes
        more_nodes = self.ring.get_more_nodes(int(partition))
        if not local_dev:
            # Check further if local device is a handoff node
            for node in self.ring.get_more_nodes(int(partition)):
                if node['id'] == node_id:
                    local_dev = node
                    break
        for node in repl_nodes:
            different_region = False
            if local_dev and local_dev['region'] != node['region']:
                # This additional information will help later if we
                # want to handle syncing to a node in different
                # region with some optimizations.
                different_region = True
            success = False
            try:
                success = self._repl_to_node(node, broker, partition, info,
                                             different_region)
            except DriveNotMounted:
                try:
                    repl_nodes.append(next(more_nodes))
                except StopIteration:
                    self.logger.error(
                        _('ERROR There are not enough handoff nodes to reach '
                          'replica count for partition %s'), partition)
                self.logger.error(_('ERROR Remote drive not mounted %s'), node)
            except (Exception, Timeout):
                self.logger.exception(
                    _('ERROR syncing %(file)s with node'
                      ' %(node)s'), {
                          'file': object_file,
                          'node': node
                      })
            if not success:
                failure_devs_info.add((node['replication_ip'], node['device']))
            self.logger.increment('successes' if success else 'failures')
            responses.append(success)
        try:
            self._post_replicate_hook(broker, info, responses)
        except (Exception, Timeout):
            self.logger.exception(
                'UNHANDLED EXCEPTION: in post replicate '
                'hook for %s', broker.db_file)
        if not shouldbehere and responses and all(responses):
            # If the db shouldn't be on this node and has been successfully
            # synced to all of its peers, it can be removed.
            if not self.delete_db(broker):
                failure_devs_info.update([(failure_dev['replication_ip'],
                                           failure_dev['device'])
                                          for failure_dev in repl_nodes])

        target_devs_info = set([(target_dev['replication_ip'],
                                 target_dev['device'])
                                for target_dev in repl_nodes])
        self.stats['success'] += len(target_devs_info - failure_devs_info)
        self._add_failure_stats(failure_devs_info)

        self.logger.timing_since('timing', start_time)

    def delete_db(self, broker):
        object_file = broker.db_file
        hash_dir = os.path.dirname(object_file)
        suf_dir = os.path.dirname(hash_dir)
        with lock_parent_directory(object_file):
            shutil.rmtree(hash_dir, True)
        try:
            os.rmdir(suf_dir)
        except OSError as err:
            if err.errno not in (errno.ENOENT, errno.ENOTEMPTY):
                self.logger.exception(
                    _('ERROR while trying to clean up %s') % suf_dir)
                return False
        self.stats['remove'] += 1
        device_name = self.extract_device(object_file)
        self.logger.increment('removes.' + device_name)
        return True

    def extract_device(self, object_file):
        """
        Extract the device name from an object path.  Returns "UNKNOWN" if the
        path could not be extracted successfully for some reason.

        :param object_file: the path to a database file.
        """
        match = self.extract_device_re.match(object_file)
        if match:
            return match.groups()[0]
        return "UNKNOWN"

    def report_up_to_date(self, full_info):
        return True

    def run_once(self, *args, **kwargs):
        """Run a replication pass once."""
        self._zero_stats()
        dirs = []
        ips = whataremyips(self.bind_ip)
        if not ips:
            self.logger.error(_('ERROR Failed to get my own IPs?'))
            return
        self._local_device_ids = set()
        found_local = False
        for node in self.ring.devs:
            if node and is_local_device(ips, self.port, node['replication_ip'],
                                        node['replication_port']):
                found_local = True
                if not check_drive(self.root, node['device'],
                                   self.mount_check):
                    self._add_failure_stats([
                        (failure_dev['replication_ip'], failure_dev['device'])
                        for failure_dev in self.ring.devs if failure_dev
                    ])
                    self.logger.warning(
                        _('Skipping %(device)s as it is not mounted') % node)
                    continue
                unlink_older_than(
                    os.path.join(self.root, node['device'], 'tmp'),
                    time.time() - self.reclaim_age)
                datadir = os.path.join(self.root, node['device'], self.datadir)
                if os.path.isdir(datadir):
                    self._local_device_ids.add(node['id'])
                    dirs.append((datadir, node['id']))
        if not found_local:
            self.logger.error(
                "Can't find itself %s with port %s in ring "
                "file, not replicating", ", ".join(ips), self.port)
        self.logger.info(_('Beginning replication run'))
        for part, object_file, node_id in roundrobin_datadirs(dirs):
            self.cpool.spawn_n(self._replicate_object, part, object_file,
                               node_id)
        self.cpool.waitall()
        self.logger.info(_('Replication run OVER'))
        self._report_stats()

    def run_forever(self, *args, **kwargs):
        """
        Replicate dbs under the given root in an infinite loop.
        """
        sleep(random.random() * self.interval)
        while True:
            begin = time.time()
            try:
                self.run_once()
            except (Exception, Timeout):
                self.logger.exception(_('ERROR trying to replicate'))
            elapsed = time.time() - begin
            if elapsed < self.interval:
                sleep(self.interval - elapsed)
예제 #27
0
class Concurrency(object):
    """
    Convenience class to support concurrency, if Eventlet is
    available; otherwise it just performs at single concurrency.

    :param concurrency: The level of concurrency desired. Default: 10
    """

    def __init__(self, concurrency=10):
        self.concurrency = concurrency
        if self.concurrency and GreenPool:
            self._pool = GreenPool(self.concurrency)
        else:
            self._pool = None
        self._queue = Queue.Queue()
        self._results = {}

    def _spawner(self, ident, func, *args, **kwargs):
        exc_type = exc_value = exc_tb = result = None
        try:
            result = func(*args, **kwargs)
        except (Exception, Timeout):
            exc_type, exc_value, exc_tb = sys.exc_info()
        self._queue.put((ident, (exc_type, exc_value, exc_tb, result)))

    def spawn(self, ident, func, *args, **kwargs):
        """
        Returns immediately to the caller and begins executing the
        func in the background. Use get_results and the ident given
        to retrieve the results of the func. If the func causes an
        exception, this exception will be caught and the
        sys.exc_info() will be returned via get_results.

        :param ident: An identifier to find the results of the func
            from get_results. This identifier can be anything unique
            to the Concurrency instance.
        :param func: The function to execute concurrently.
        :param args: The args to give the func.
        :param kwargs: The keyword args to the give the func.
        :returns: None
        """
        if self._pool:
            self._pool.spawn_n(self._spawner, ident, func, *args, **kwargs)
        else:
            self._spawner(ident, func, *args, **kwargs)

    def get_results(self):
        """
        Returns a dict of the results currently available. The keys
        are the ident values given with the calls to spawn. The
        values are tuples of (exc_type, exc_value, exc_tb, result)
        where:

        =========  ============================================
        exc_type   The type of any exception raised.
        exc_value  The actual exception if any was raised.
        exc_tb     The traceback if any exception was raised.
        result     If no exception was raised, this will be the
                   return value of the called function.
        =========  ============================================
        """
        try:
            while True:
                ident, value = self._queue.get(block=False)
                self._results[ident] = value
        except Queue.Empty:
            pass
        return self._results

    def join(self):
        """
        Blocks until all currently pending functions have finished.
        """
        if self._pool:
            self._pool.waitall()
예제 #28
0
        global ttl

        queue = conn.get_queue('openstack-worker-controller')

        while True:
            messages = list(queue.get_messages(restart=True))

            if len(messages) > 0:
                last_message = messages[-1]
                ttl = last_message['body']['ttl']
            else:
                ttl = 60

            eventlet.sleep(1)

    pool.spawn_n(get_worker_information)

    queue_tasks = conn.get_queue('openstack-tasks')
    queue_results = conn.get_queue('openstack-responses')

    s = socket.socket()
    s.connect((sys.argv[2], int(sys.argv[3])))

    work_item_count = 0
    while True:
        claim = queue_tasks.claim(ttl=60, grace=60)

        for msg in claim.messages:
            #print claim['ttl']
            msg_body = msg['body']
예제 #29
0
class ProfileMiddleware(object):

    def __init__(self, app, conf):
        self.app = app
        self.logger = get_logger(conf, log_route='profile')
        self.log_filename_prefix = conf.get('log_filename_prefix',
                                            DEFAULT_PROFILE_PREFIX)
        dirname = os.path.dirname(self.log_filename_prefix)
        # Notes: this effort may fail due to permission denied.
        # it is better to be created and authorized to current
        # user in advance.
        if not os.path.exists(dirname):
            os.makedirs(dirname)
        self.dump_interval = float(conf.get('dump_interval', 5.0))
        self.dump_timestamp = config_true_value(conf.get(
            'dump_timestamp', 'no'))
        self.flush_at_shutdown = config_true_value(conf.get(
            'flush_at_shutdown', 'no'))
        self.path = conf.get('path', '__profile__').replace('/', '')
        self.unwind = config_true_value(conf.get('unwind', 'no'))
        self.profile_module = conf.get('profile_module',
                                       'eventlet.green.profile')
        self.profiler = get_profiler(self.profile_module)
        self.profile_log = ProfileLog(self.log_filename_prefix,
                                      self.dump_timestamp)
        self.viewer = HTMLViewer(self.path, self.profile_module,
                                 self.profile_log)
        self.dump_pool = GreenPool(1000)
        self.last_dump_at = None

    def __del__(self):
        if self.flush_at_shutdown:
            self.profile_log.clear(str(os.getpid()))

    def _combine_body_qs(self, request):
        wsgi_input = request.environ['wsgi.input']
        query_dict = request.params
        qs_in_body = wsgi_input.read()
        query_dict.update(parse_qs(qs_in_body, keep_blank_values=True,
                                   strict_parsing=False))
        return query_dict

    def dump_checkpoint(self):
        current_time = time.time()
        if self.last_dump_at is None or self.last_dump_at +\
                self.dump_interval < current_time:
            self.dump_pool.spawn_n(self.profile_log.dump_profile,
                                   self.profiler, os.getpid())
            self.last_dump_at = current_time

    def __call__(self, environ, start_response):
        request = Request(environ)
        path_entry = request.path_info.split('/')
        # hijack favicon request sent by browser so that it doesn't
        # invoke profiling hook and contaminate the data.
        if path_entry[1] == 'favicon.ico':
            start_response('200 OK', [])
            return ''
        elif path_entry[1] == self.path:
            try:
                self.dump_checkpoint()
                query_dict = self._combine_body_qs(request)
                content, headers = self.viewer.render(request.url,
                                                      request.method,
                                                      path_entry,
                                                      query_dict,
                                                      self.renew_profile)
                start_response('200 OK', headers)
                return [bytes_(content)]
            except MethodNotAllowed as mx:
                start_response('405 Method Not Allowed', [])
                return '%s' % mx
            except NotFoundException as nx:
                start_response('404 Not Found', [])
                return '%s' % nx
            except ProfileException as pf:
                start_response('500 Internal Server Error', [])
                return '%s' % pf
            except Exception as ex:
                start_response('500 Internal Server Error', [])
                return _('Error on render profiling results: %s') % ex
        else:
            _locals = locals()
            code = self.unwind and PROFILE_EXEC_EAGER or\
                PROFILE_EXEC_LAZY
            self.profiler.runctx(code, globals(), _locals)
            app_iter = _locals['app_iter_']
            self.dump_checkpoint()
            return app_iter

    def renew_profile(self):
        self.profiler = get_profiler(self.profile_module)