Example #1
0
    def _shot(self):
        self.app.on_loop_begin()
        poll = self.app.polling_targets()
        nodes = _load_from(RedisNodeStatus, self.app, poll['nodes'])
        proxies = _load_from(ProxyStatus, self.app, poll['proxies'])
        # commit because `get_by` may create new nodes
        # to reattach session they must be persisted
        commit_session()

        all_nodes = nodes + proxies
        random.shuffle(all_nodes)
        pollers = [
            Poller(all_nodes[i:i + NODES_EACH_THREAD])
            for i in xrange(0, len(all_nodes), NODES_EACH_THREAD)
        ]
        for p in pollers:
            p.start()
        time.sleep(self.interval)

        for p in pollers:
            p.join()
        for p in pollers:
            for n in p.nodes:
                n.add_to_db()

        save_polling_stat(nodes, proxies)
        commit_session()
        logging.debug('Total %d nodes, %d proxies', len(nodes), len(proxies))
        self.app.write_polling_details({n.addr: n.details
                                        for n in nodes},
                                       {p.addr: p.details
                                        for p in proxies})
Example #2
0
def try_create_exec_thread_by_task(t, app):
    t.check_completed()
    if t.completion is not None:
        return None
    if not t.runnable():
        return None

    lock = t.acquire_lock()
    if lock is None:
        return None

    step = t.next_step()

    # When decide to run a task, it's possible that
    # its next step has been started at the last poll.
    # So we check
    #   if no step have been bound to the lock, bind the next
    #   if the step bound to the lock is still running, skip it
    #   the step bound to the lock is completed, bind the next
    if lock.step_id is None:
        lock.step_id = step.id
        db.session.add(lock)
    elif lock.step.completion is None:
        return None
    else:
        lock.step_id = step.id
        db.session.add(lock)

    try:
        commit_session()
    except IntegrityError:
        return None

    logging.debug('Run task %d', t.id)
    return TaskRunner(app, t.id, step.id)
Example #3
0
    def _shot(self):
        poll = file_ipc.read_poll()
        nodes = _load_from(RedisNodeStatus, poll['nodes'])
        proxies = _load_from(ProxyStatus, poll['proxies'])
        # commit because `get_by` may create new nodes
        # to reattach session they must be persisted
        commit_session()

        all_nodes = nodes + proxies
        random.shuffle(all_nodes)
        pollers = [
            Poller(all_nodes[i:i + NODES_EACH_THREAD], self.algalon_client)
            for i in xrange(0, len(all_nodes), NODES_EACH_THREAD)
        ]
        for p in pollers:
            p.start()
        time.sleep(self.interval)

        for p in pollers:
            p.join()
        for p in pollers:
            for n in p.nodes:
                n.add_to_db()

        save_polling_stat(nodes, proxies)
        commit_session()
        logging.debug('Total %d nodes, %d proxies', len(nodes), len(proxies))

        try:
            file_ipc.write_details({n.addr: n.details
                                    for n in nodes},
                                   {p.addr: p.details
                                    for p in proxies})
        except StandardError, e:
            logging.exception(e)
Example #4
0
    def _shot(self):
        poll = file_ipc.read_poll()
        nodes = _load_from(RedisNodeStatus, poll['nodes'])
        proxies = _load_from(ProxyStatus, poll['proxies'])
        # commit because `get_by` may create new nodes
        # to reattach session they must be persisted
        commit_session()

        all_nodes = nodes + proxies
        random.shuffle(all_nodes)
        pollers = [Poller(all_nodes[i: i + NODES_EACH_THREAD],
                          self.algalon_client)
                   for i in xrange(0, len(all_nodes), NODES_EACH_THREAD)]
        for p in pollers:
            p.start()
        time.sleep(self.interval)

        for p in pollers:
            p.join()
        for p in pollers:
            for n in p.nodes:
                n.add_to_db()

        save_polling_stat(nodes, proxies)
        commit_session()
        logging.debug('Total %d nodes, %d proxies', len(nodes), len(proxies))

        try:
            file_ipc.write_details({n.addr: n.details for n in nodes},
                                   {p.addr: p.details for p in proxies})
        except StandardError, e:
            logging.exception(e)
Example #5
0
def _join(_, cluster_id, cluster_host, cluster_port, newin_host, newin_port):
    redistrib.command.join_no_load(cluster_host, cluster_port, newin_host,
                                   newin_port)
    n = get_node_by_host_port(newin_host, newin_port)
    if n is None:
        return True
    n.assignee_id = cluster_id
    db.session.add(n)
    commit_session()
    return True
Example #6
0
def _join(_, cluster_id, cluster_host, cluster_port, newin_host, newin_port):
    redistrib.command.join_no_load(cluster_host, cluster_port, newin_host,
                                   newin_port)
    n = get_node_by_host_port(newin_host, newin_port)
    if n is None:
        return True
    n.assignee_id = cluster_id
    db.session.add(n)
    commit_session()
    return True
Example #7
0
def _replicate(_, cluster_id, master_host, master_port, slave_host,
               slave_port):
    redistrib.command.replicate(master_host, master_port, slave_host,
                                slave_port)
    n = get_node_by_host_port(slave_host, slave_port)
    if n is None:
        return True
    n.assignee_id = cluster_id
    db.session.add(n)
    commit_session()
    return True
Example #8
0
def _replicate(_, cluster_id, master_host, master_port, slave_host,
               slave_port):
    redistrib.command.replicate(master_host, master_port, slave_host,
                                slave_port)
    n = get_node_by_host_port(slave_host, slave_port)
    if n is None:
        return True
    n.assignee_id = cluster_id
    db.session.add(n)
    commit_session()
    return True
Example #9
0
def _migrate_slots(command, src_host, src_port, dst_host, dst_port, slots,
                   start=0):
    while start < len(slots):
        begin = datetime.now()
        redistrib.command.migrate_slots(src_host, src_port, dst_host, dst_port,
                                        [slots[start]])
        start += 1
        if (datetime.now() - begin).seconds >= config.POLL_INTERVAL:
            command.args['start'] = start
            command.save()
            commit_session()
            return start == len(slots)
    return True
Example #10
0
def _migrate_slots(command,
                   src_host,
                   src_port,
                   dst_host,
                   dst_port,
                   slots,
                   start=0):
    while start < len(slots):
        begin = datetime.now()
        redistrib.command.migrate_slots(src_host, src_port, dst_host, dst_port,
                                        [slots[start]])
        start += 1
        if (datetime.now() - begin).seconds >= config.POLL_INTERVAL:
            command.args['start'] = start
            command.save()
            commit_session()
            return start == len(slots)
    return True
Example #11
0
    def run(self):
        with self.app.app_context():
            task = ClusterTask.query.get(self.task_id)
            if task is None:
                # not possible gonna happen
                return
            try:
                step = TaskStep.query.get(self.step_id)

                # check again the step haven't run yet
                if step.completion is not None:
                    return task.check_completed()

                logging.info('Execute step %d', step.id)
                if not step.execute():
                    task.fail('Step fails')
                    commit_session()
                    return
                lock = task.acquired_lock()
                lock.step = None
                db.session.add(lock)
                commit_session()
                task.check_completed()
            except (StandardError, SQLAlchemyError), e:
                logging.exception(e)
                db.session.rollback()
                task.exec_error = traceback.format_exc()
                task.completion = datetime.now()
                db.session.add(task)
                commit_session()
Example #12
0
    def test_timed(self):
        CD = 5

        class TestTimedClient(alarm.Timed):
            def __init__(self):
                alarm.Timed.__init__(self, CD)
                self.alarms = []

            def do_send_alarm(self, endpoint, message, exception, **kwargs):
                self.alarms.append({
                    'endpoint': endpoint,
                    'message': message,
                })

        self.app.replace_alarm_client(TestTimedClient())
        p = FakePoller(self.app)

        nm.create_instance('127.0.0.1', 29000)
        commit_session()
        self.app.write_polling_targets()
        p.poll_once()
        self.assertEqual(0, len(self.app.alarm_client.alarms))

        n = nm.get_by_host_port('127.0.0.1', 29000)
        n.suppress_alert = False
        commit_session()
        self.app.write_polling_targets()
        p.poll_once()
        self.assertEqual(1, len(self.app.alarm_client.alarms))

        p.poll_once()
        self.assertEqual(1, len(self.app.alarm_client.alarms))

        time.sleep(CD + 1)
        p.poll_once()
        self.assertEqual(2, len(self.app.alarm_client.alarms))
Example #13
0
    def test_alarm(self):
        class TestAlarmClient(alarm.Base):
            def __init__(self):
                self.alarms = {}

            def send_alarm(self, endpoint, message, exception, **kwargs):
                self.alarms[(endpoint.host, endpoint.port)] = (message,
                                                               exception)

        self.app.replace_alarm_client(TestAlarmClient())
        p = FakePoller(self.app)

        nm.create_instance('127.0.0.1', 29000)
        commit_session()
        self.app.write_polling_targets()
        p.poll_once()
        self.assertEqual(0, len(self.app.alarm_client.alarms))

        n = nm.get_by_host_port('127.0.0.1', 29000)
        n.suppress_alert = False
        commit_session()
        self.app.write_polling_targets()
        p.poll_once()
        self.assertEqual(1, len(self.app.alarm_client.alarms))
Example #14
0
        if len(me.assigned_slots) != 0:
            raise ValueError('node still holding slots')
        redistrib.command.quit_cluster(host, port)
    except SocketError, e:
        logging.exception(e)
        logging.info('Remove instance from cluster on exception')
    except ProtocolError, e:
        if NOT_IN_CLUSTER_MESSAGE not in e.message:
            raise

    remove_empty_cluster(cluster_id)
    n = get_node_by_host_port(host, port)
    if n is not None:
        n.assignee_id = None
        db.session.add(n)
    commit_session()
    return True


def _migrate_slots(command,
                   src_host,
                   src_port,
                   dst_host,
                   dst_port,
                   slots,
                   start=0):
    while start < len(slots):
        begin = datetime.now()
        redistrib.command.migrate_slots(src_host, src_port, dst_host, dst_port,
                                        [slots[start]])
        start += 1
Example #15
0
    def test_create_delete_cluster(self):
        with self.app.test_client() as client:
            r = client.post('/redis/add',
                            data={
                                'host': '127.0.0.1',
                                'port': '7100',
                            })
            self.assertReqStatus(200, r)
            self.assertEqual(
                {
                    'nodes': [{
                        'host': '127.0.0.1',
                        'port': 7100,
                        'suppress_alert': 1,
                    }],
                    'proxies': [],
                }, self.app.polling_targets())

        with self.app.test_client() as client:
            r = client.post('/cluster/add',
                            data={
                                'descr': 'the-quick-brown-fox',
                            })
            self.assertReqStatus(200, r)
            cluster_id = int(r.data)

            r = client.post('/task/launch',
                            data=json.dumps({
                                'cluster':
                                cluster_id,
                                'nodes': [{
                                    'host': '127.0.0.1',
                                    'port': 7100,
                                }],
                            }))
            self.assertReqStatus(200, r)
            self.exec_all_tasks()

        with self.app.test_client() as client:
            r = client.post('/cluster/shutdown',
                            data={
                                'cluster_id': cluster_id,
                            })
            self.assertReqStatus(200, r)
            self.exec_all_tasks()

            tasks = models.task.ClusterTask.query.all()
            self.assertEqual(1, len(tasks))
            self.assertEqual(cluster_id, tasks[0].cluster_id)

        with self.app.test_client() as client:
            r = client.post('/cluster/delete', data={
                'id': cluster_id,
            })
            self.assertReqStatus(400, r)

            models.task.TaskStep.query.filter_by(task_id=tasks[0].id).delete()
            models.task.ClusterTask.query.delete()
            commit_session()

        with self.app.test_client() as client:
            r = client.post('/cluster/delete', data={
                'id': cluster_id,
            })
            self.assertReqStatus(200, r)
Example #16
0
        if len(me.assigned_slots) != 0:
            raise ValueError('node still holding slots')
        redistrib.command.quit_cluster(host, port)
    except SocketError, e:
        logging.exception(e)
        logging.info('Remove instance from cluster on exception')
    except ProtocolError, e:
        if NOT_IN_CLUSTER_MESSAGE not in e.message:
            raise

    remove_empty_cluster(cluster_id)
    n = get_node_by_host_port(host, port)
    if n is not None:
        n.assignee_id = None
        db.session.add(n)
    commit_session()
    return True


def _migrate_slots(command, src_host, src_port, dst_host, dst_port, slots,
                   start=0):
    while start < len(slots):
        begin = datetime.now()
        redistrib.command.migrate_slots(src_host, src_port, dst_host, dst_port,
                                        [slots[start]])
        start += 1
        if (datetime.now() - begin).seconds >= config.POLL_INTERVAL:
            command.args['start'] = start
            command.save()
            commit_session()
            return start == len(slots)