def _prepare_master_node(node, pod, aof, host, app): cid, new_node_host = _deploy_node(pod, aof, host, app) try: task = models.task.ClusterTask( cluster_id=node.assignee_id, task_type=models.task.TASK_TYPE_AUTO_BALANCE, user_id=app.default_user_id()) db.session.add(task) db.session.flush() logging.info( 'Node deployed: container id=%s host=%s; joining cluster %d' ' [create task %d] use host %s', cid, new_node_host, node.assignee_id, task.id, host) task.add_step('join', cluster_id=node.assignee_id, cluster_host=node.host, cluster_port=node.port, newin_host=new_node_host, newin_port=6379) return task, cid, new_node_host except BaseException as exc: logging.exception(exc) logging.info('Remove container %s and rollback', cid) _rm_containers([cid], app) db.session.rollback() raise
def launch_cluster(): req_json = request.get_json(force=True) cluster = models.cluster.get_by_id(req_json['cluster']) if cluster is None: raise ValueError('no such cluster') if len(cluster.nodes) != 0: raise ValueError('cluster serving') nodes = [] for a in req_json['nodes']: n = models.node.get_by_host_port(a['host'], a['port']) if n is None: raise ValueError('no such node') if n.assignee_id is not None: raise ValueError('node already serving') n.assignee_id = cluster.id db.session.add(n) nodes.append(n) task = models.task.ClusterTask(cluster_id=cluster.id, task_type=models.task.TASK_TYPE_LAUNCH, user_id=bp.app.get_user_id()) task.add_step('launch', host_port_list=[{ 'host': n.host, 'port': n.port } for n in nodes]) db.session.add(task)
def fix_node_migrating(request): n = models.node.get_by_host_port(request.form['host'], int(request.form['port'])) if n is None or n.assignee is None: raise ValueError('no such node in cluster') task = models.task.ClusterTask(cluster_id=n.assignee.id, task_type=models.task.TASK_TYPE_FIX_MIGRATE) task.add_step('fix_migrate', host=n.host, port=n.port) db.session.add(task)
def recover_migrate_status(request): c = models.cluster.get_by_id(int(request.form['cluster_id'])) if c is None: raise ValueError('no such cluster') task = models.task.ClusterTask(cluster_id=c.id, task_type=models.task.TASK_TYPE_FIX_MIGRATE) for node in c.nodes: task.add_step('fix_migrate', host=node.host, port=node.port) db.session.add(task)
def join_cluster(request): c = models.cluster.get_by_id(int(request.form['cluster_id'])) if c is None or len(c.nodes) == 0: raise ValueError('no such cluster') task = models.task.ClusterTask(cluster_id=c.id, task_type=models.task.TASK_TYPE_JOIN) task.add_step('join', cluster_id=c.id, cluster_host=c.nodes[0].host, cluster_port=c.nodes[0].port, newin_host=request.form['host'], newin_port=int(request.form['port'])) db.session.add(task)
def recover_migrate_status(request): c = models.cluster.get_by_id(int(request.form['cluster_id'])) if c is None: raise ValueError('no such cluster') masters = redistrib.command.list_masters( c.nodes[0].host, c.nodes[0].port)[0] task = models.task.ClusterTask(cluster_id=c.id, task_type=models.task.TASK_TYPE_FIX_MIGRATE) for node in masters: task.add_step('fix_migrate', host=node.host, port=node.port) db.session.add(task)
def replicate(request): n = nm.get_by_host_port( request.form['master_host'], int(request.form['master_port'])) if n is None or n.assignee_id is None: raise ValueError('unable to replicate') task = models.task.ClusterTask(cluster_id=n.assignee_id, task_type=models.task.TASK_TYPE_REPLICATE) task.add_step('replicate', cluster_id=n.assignee_id, master_host=n.host, master_port=n.port, slave_host=request.form['slave_host'], slave_port=int(request.form['slave_port'])) db.session.add(task)
def add_node_to_balance_for(host, port, plan, slots, app): node = models.node.get_by_host_port(host, int(port)) if node is None or node.assignee_id is None: logging.info( 'No node or cluster found for %s:%d (This should be a corrupt)', host, port) return if node.assignee.current_task is not None: logging.info('Fail to auto balance cluster %d for node %s:%d : busy', node.assignee_id, host, port) return task, cid, new_host = _prepare_master_node(node, plan.pod, plan.aof, plan.host, app) cids = [cid] hosts = [new_host] try: cs, hs = _add_slaves(plan.slaves, task, node.assignee_id, new_host, plan.pod, plan.aof, app) cids.extend(cs) hosts.extend(hs) migrating_slots = slots[:len(slots) / 2] task.add_step('migrate', src_host=node.host, src_port=node.port, dst_host=new_host, dst_port=6379, slots=migrating_slots) logging.info('Migrating %d slots from %s to %s', len(migrating_slots), host, new_host) db.session.add(task) db.session.flush() lock = task.acquire_lock() if lock is not None: logging.info('Auto balance task %d has been emit; lock id=%d', task.id, lock.id) for h in hosts: models.audit.eru_event(h, 6379, models.audit.EVENT_TYPE_CREATE, app.default_user_id(), plan.balance_plan_json) return app.write_polling_targets() logging.info( 'Auto balance task fail to lock,' ' discard auto balance this time.' ' Delete container id=%s', cids) _rm_containers(cids, app) except BaseException as exc: logging.info('Remove container %s and rollback', cids) _rm_containers(cids, app) db.session.rollback() raise
def batch_tasks(request): c = models.cluster.get_by_id(request.post_json['cluster_id']) if c is None or len(c.nodes) == 0: raise ValueError('no such cluster') task = models.task.ClusterTask( cluster_id=c.id, task_type=models.task.TASK_TYPE_BATCH) for n in request.post_json.get('migrs', []): task.add_step( 'migrate', src_host=n['src_host'], src_port=n['src_port'], dst_host=n['dst_host'], dst_port=n['dst_port'], slots=n['slots']) for n in request.post_json.get('quits', []): task.add_step('quit', cluster_id=c.id, host=n['host'], port=n['port']) db.session.add(task)
def quit_cluster(request): n = nm.get_by_host_port(request.post_json['host'], int(request.post_json['port'])) if n is None: raise ValueError('no such node') task = models.task.ClusterTask(cluster_id=n.assignee_id, task_type=models.task.TASK_TYPE_QUIT) for migr in request.post_json.get('migratings', []): task.add_step('migrate', src_host=n.host, src_port=n.port, dst_host=migr['host'], dst_port=migr['port'], slots=migr['slots']) task.add_step('quit', cluster_id=n.assignee_id, host=n.host, port=n.port) db.session.add(task)
def migrate_slots(request): src_host = request.form['src_host'] src_port = int(request.form['src_port']) dst_host = request.form['dst_host'] dst_port = int(request.form['dst_port']) slots = [int(s) for s in request.form['slots'].split(',')] src = nm.get_by_host_port(src_host, src_port) task = models.task.ClusterTask(cluster_id=src.assignee_id, task_type=models.task.TASK_TYPE_MIGRATE) task.add_step('migrate', src_host=src.host, src_port=src.port, dst_host=dst_host, dst_port=dst_port, slots=slots) db.session.add(task)
def _add_slaves(slaves, task, cluster_id, master_host, pod, aof): cids = [] try: for s in slaves: logging.info('Auto deploy slave for master %s [task %d],' ' use host %s', master_host, task.id, s.get('host')) cid, new_host = _deploy_node(pod, aof, s.get('host')) cids.append(cid) task.add_step('replicate', cluster_id=cluster_id, master_host=master_host, master_port=6379, slave_host=new_host, slave_port=6379) return cids except BaseException as exc: logging.info('Remove container %s and rollback', cids) _rm_containers(cids) db.session.rollback() raise
def quit_cluster(): req_json = request.get_json(force=True) n = models.node.get_by_host_port(req_json['host'], req_json['port']) if n is None: raise ValueError('no such node') task = models.task.ClusterTask(cluster_id=n.assignee_id, task_type=models.task.TASK_TYPE_QUIT, user_id=bp.app.get_user_id()) for migr in req_json.get('migratings', []): task.add_step('migrate', src_host=n.host, src_port=n.port, dst_host=migr['host'], dst_port=migr['port'], slots=migr['slots']) task.add_step('quit', cluster_id=n.assignee_id, host=n.host, port=n.port) db.session.add(task)
def add_node_to_balance_for(host, port, plan, slots): node = models.node.get_by_host_port(host, int(port)) if node is None or node.assignee_id is None: logging.info( 'No node or cluster found for %s:%d (This should be a corrupt)', host, port) return if node.assignee.current_task is not None: logging.info( 'Fail to auto balance cluster %d for node %s:%d : busy', node.assignee_id, host, port) return task, cid, new_host = _prepare_master_node( node, plan.pod, plan.aof, plan.host) cids = [cid] try: cids.extend(_add_slaves( plan.slaves, task, node.assignee_id, new_host, plan.pod, plan.aof)) migrating_slots = slots[: len(slots) / 2] task.add_step( 'migrate', src_host=node.host, src_port=node.port, dst_host=new_host, dst_port=6379, slots=migrating_slots) logging.info('Migrating %d slots from %s to %s', len(migrating_slots), host, new_host) db.session.add(task) db.session.flush() lock = task.acquire_lock() if lock is not None: logging.info('Auto balance task %d has been emit; lock id=%d', task.id, lock.id) file_ipc.write_nodes_proxies_from_db() return logging.info('Auto balance task fail to lock,' ' discard auto balance this time.' ' Delete container id=%s', cids) _rm_containers(cids) except BaseException as exc: logging.info('Remove container %s and rollback', cids) _rm_containers(cids) db.session.rollback() raise
def join_cluster(): req_json = request.get_json(force=True) c = models.cluster.get_by_id(int(req_json['cluster_id'])) if c is None or len(c.nodes) == 0: raise ValueError('no such cluster') task = models.task.ClusterTask(cluster_id=c.id, task_type=models.task.TASK_TYPE_JOIN, user_id=bp.app.get_user_id()) for n in req_json['nodes']: node = models.node.get_by_host_port(n['host'], int(n['port'])) if node is None: raise ValueError('no such node') if node.assignee_id is not None: raise ValueError('node already serving') task.add_step('join', cluster_id=c.id, cluster_host=c.nodes[0].host, cluster_port=c.nodes[0].port, newin_host=node.host, newin_port=node.port) db.session.add(task)
def _prepare_master_node(node, pod, aof, host): cid, new_node_host = _deploy_node(pod, aof, host) try: task = models.task.ClusterTask( cluster_id=node.assignee_id, task_type=models.task.TASK_TYPE_AUTO_BALANCE) db.session.add(task) db.session.flush() logging.info( 'Node deployed: container id=%s host=%s; joining cluster %d' ' [create task %d] use host %s', cid, new_node_host, node.assignee_id, task.id, host) task.add_step( 'join', cluster_id=node.assignee_id, cluster_host=node.host, cluster_port=node.port, newin_host=new_node_host, newin_port=6379) return task, cid, new_node_host except BaseException as exc: logging.exception(exc) logging.info('Remove container %s and rollback', cid) _rm_containers([cid]) db.session.rollback() raise
def batch_tasks(): req_json = request.get_json(force=True) c = models.cluster.get_by_id(req_json['cluster_id']) if c is None or len(c.nodes) == 0: raise ValueError('no such cluster') task = models.task.ClusterTask(cluster_id=c.id, task_type=models.task.TASK_TYPE_BATCH, user_id=bp.app.get_user_id()) has_step = False nodes0 = c.nodes[0] for n in req_json.get('joins', []): has_step = True node = models.node.get_by_host_port(n['host'], int(n['port'])) if node is None: raise ValueError('no such node') if node.assignee_id is not None: raise ValueError('node already serving') task.add_step('join', cluster_host=nodes0.host, cluster_port=nodes0.port, newin_host=node.host, newin_port=node.port, cluster_id=c.id) node.assignee_id = c.id db.session.add(node) for n in req_json.get('replicas', []): has_step = True node = models.node.get_by_host_port(n['slhost'], int(n['slport'])) if node is None: raise ValueError('no such node') if node.assignee_id is not None: raise ValueError('node already serving') master = models.node.get_by_host_port(n['mhost'], int(n['mport'])) if master is None: raise ValueError('no such node') if master.assignee_id != c.id: raise ValueError('master not in the cluster') task.add_step('replicate', master_host=master.host, master_port=master.port, slave_host=node.host, slave_port=node.port, cluster_id=c.id) node.assignee_id = c.id db.session.add(node) for n in req_json.get('migrs', []): has_step = True task.add_step('migrate', src_host=n['src_host'], src_port=n['src_port'], dst_host=n['dst_host'], dst_port=n['dst_port'], slots=n['slots']) for n in req_json.get('quits', []): has_step = True task.add_step('quit', cluster_id=c.id, host=n['host'], port=n['port']) if has_step: db.session.add(task)