Example #1
0
async def update_client_status():
    while True:
        clients = ravdb.get_clients(status='connected')
        for client in clients:
            if (datetime.datetime.utcnow() -
                    client.last_active_time).seconds > 200:  # To be reduced.
                if client.reporting == "ready":
                    ravdb.update_client(
                        client,
                        status="disconnected",
                        disconnected_at=datetime.datetime.utcnow())
                else:
                    ravdb.update_client(
                        client,
                        status="disconnected",
                        reporting="idle",
                        disconnected_at=datetime.datetime.utcnow())

                assigned_subgraph = ravdb.get_subgraph(
                    client.current_subgraph_id, client.current_graph_id)
                if assigned_subgraph is not None:
                    ravdb.update_subgraph(assigned_subgraph, status="ready")

            client_type = "/{}".format(client.type)
            await sio.emit(
                "check_status",
                {"sid": client.sid},
                namespace=client_type,
                room=client.sid,
            )

        await sio.sleep(5)
Example #2
0
async def retry_failed_subgraphs(graph_id):
    global Queue
    graph = ravdb.get_graph(graph_id)
    failed_subgraph_ids = ravdb.get_failed_subgraphs_from_graph(graph)
    if len(failed_subgraph_ids) > 0:
        print("\nFailed subgraph ids Retry: ", failed_subgraph_ids)
        for failed_subgraph_id in failed_subgraph_ids:
            failed_subgraph = ravdb.get_subgraph(
                subgraph_id=failed_subgraph_id, graph_id=graph_id)
            assigned_client = ravdb.get_assigned_client(
                failed_subgraph.subgraph_id, failed_subgraph.graph_id)
            if assigned_client is not None:
                ravdb.update_client(assigned_client,
                                    reporting="idle",
                                    current_subgraph_id=None,
                                    current_graph_id=None)

            retries = failed_subgraph.retry_attempts

            op_ids = ast.literal_eval(failed_subgraph.op_ids)
            if int(retries) <= 5:
                failed_combination = (failed_subgraph.subgraph_id,
                                      failed_subgraph.graph_id)
                if failed_combination not in Queue:
                    Queue.append(failed_combination)

            else:
                ravdb.update_subgraph(failed_subgraph, status='failed')
                ravdb.update_graph(graph, status="failed")
                for op_id in op_ids:
                    failed_op = ravdb.get_op(op_id)
                    ravdb.update_op(failed_op, status="failed")
Example #3
0
async def horizontal_split(graph_id, minimum_split_size=20):
    subgraphs = ravdb.get_all_subgraphs(graph_id=graph_id)
    for subgraph in subgraphs:
        if subgraph.has_failed == "False" and int(
                subgraph.retry_attempts
        ) <= 1 and subgraph.status != SubgraphStatus.COMPUTED and subgraph.status != 'standby' and subgraph.status != 'failed' and subgraph.status != 'computing':
            op_ids = ast.literal_eval(subgraph.op_ids)
            if len(op_ids) > minimum_split_size:
                row1 = op_ids[:minimum_split_size]

                parent_subgraph = ravdb.get_subgraph(
                    subgraph_id=subgraph.parent_subgraph_id, graph_id=graph_id)
                if parent_subgraph is not None:
                    if str(row1) == str(parent_subgraph.op_ids):
                        minimum_split_size += random.randint(
                            1,
                            len(op_ids) - minimum_split_size)
                        row1 = op_ids[:minimum_split_size]

                row2 = op_ids[minimum_split_size:]
                ravdb.update_subgraph(subgraph, op_ids=str(row1))
                last_subgraph_id = len(
                    ravdb.get_all_subgraphs(graph_id=graph_id))
                if len(row2) > 0:
                    new_subgraph = ravdb.create_subgraph(
                        subgraph_id=last_subgraph_id + 1,
                        graph_id=graph_id,
                        optimized="False",
                        op_ids=str(row2),
                        status="standby",
                        parent_subgraph_id=subgraph.subgraph_id)
                    for op_id in row2:
                        op = ravdb.get_op(op_id)
                        ravdb.update_op(op,
                                        subgraph_id=new_subgraph.subgraph_id)
Example #4
0
def create_sub_graphs(graph_id):
    op_dependency = ravdb.get_graph_op_dependency(graph_id)
    # print('OP DEPENDENCY: ',op_dependency)
    for subgraph_id in op_dependency:
        subgraph = ravdb.get_subgraph(subgraph_id=subgraph_id,
                                      graph_id=graph_id)

        if subgraph is not None:
            complexity = calculate_subgraph_complexity(subgraph=subgraph)
            ravdb.update_subgraph(subgraph,
                                  subgraph_id=subgraph_id,
                                  graph_id=graph_id,
                                  op_ids=str(op_dependency[subgraph_id]),
                                  complexity=complexity)
        else:
            subgraph = ravdb.create_subgraph(subgraph_id=subgraph_id,
                                             graph_id=graph_id,
                                             op_ids=str(
                                                 op_dependency[subgraph_id]),
                                             status=SubgraphStatus.READY)
            complexity = calculate_subgraph_complexity(subgraph=subgraph)
            ravdb.update_subgraph(subgraph, complexity=complexity)
Example #5
0
async def run_scheduler():
    global SCHEDULER_RUNNING, Queue
    SCHEDULER_RUNNING = True
    while True:
        print("Scheduler Running...")
        distributed_graphs = ravdb.get_graphs(status=GraphStatus.PENDING,
                                              approach="distributed")
        federated_graphs = ravdb.get_graphs(status=GraphStatus.PENDING,
                                            approach="federated")

        if len(distributed_graphs) == 0 and len(federated_graphs) == 0:
            print("No graphs found")

        else:
            for federated_graph in federated_graphs:
                create_sub_graphs(federated_graph.id)

            for distributed_graph in distributed_graphs:
                current_graph_id = distributed_graph.id

                await vertical_split(distributed_graph.id)
                await sio.sleep(0.1)
                await horizontal_split(distributed_graph.id,
                                       minimum_split_size=20)
                await sio.sleep(0.1)
                await retry_failed_subgraphs(distributed_graph.id)
                await sio.sleep(0.1)

                failed_subgraph_ids = get_failed_subgraphs_from_queue(
                    current_graph_id)

                for subgraph_id in failed_subgraph_ids:
                    subgraph = ravdb.get_subgraph(subgraph_id=subgraph_id,
                                                  graph_id=current_graph_id)

                    if subgraph.status != "assigned" and subgraph.status != "computing" and subgraph.status != "computed":
                        if subgraph.has_failed != "True" or subgraph.status == "failed":
                            op_ids = ast.literal_eval(subgraph.op_ids)
                            final_subsubgraph_list = []
                            for op_id in op_ids:
                                op = ravdb.get_op(op_id)
                                if op.status != "computed":
                                    if op.inputs != "null":
                                        for input_op_id in ast.literal_eval(
                                                op.inputs):
                                            input_op = ravdb.get_op(
                                                input_op_id)
                                            if input_op.status != "computed":
                                                final_subsubgraph_list.append(
                                                    input_op_id)
                                    final_subsubgraph_list.append(op_id)

                            final_subsubgraph_list = list(
                                set(final_subsubgraph_list))

                            failed_ops = final_subsubgraph_list
                            failed_ops.sort()

                            for failed_op_id in failed_ops:
                                failed_op = ravdb.get_op(failed_op_id)
                                if failed_op.operator != "lin" and failed_op.status != "computed":
                                    ravdb.update_op(failed_op,
                                                    subgraph_id=subgraph_id,
                                                    message=None,
                                                    status="pending")
                                elif failed_op.operator == "lin":
                                    ravdb.update_op(failed_op,
                                                    subgraph_id=subgraph_id,
                                                    message=None)

                            updated_subgraph = ravdb.update_subgraph(
                                subgraph,
                                op_ids=str(failed_ops),
                                status='ready',
                                optimized='True',
                                has_failed="True")
                if len(Queue) > 0:
                    print('\nQUEUE', Queue)

                subgraphs = ravdb.get_subgraphs_from_graph(
                    graph_id=current_graph_id)

                for subgraph in subgraphs:
                    if subgraph.status == 'ready':

                        ready_flag = True
                        op_ids = ast.literal_eval(subgraph.op_ids)
                        for op_id in op_ids:
                            op = ravdb.get_op(op_id)
                            if op.inputs != 'null':
                                for input_op_id in ast.literal_eval(op.inputs):
                                    input_op = ravdb.get_op(input_op_id)
                                    if input_op.subgraph_id != subgraph.subgraph_id:
                                        if input_op.status != "computed":
                                            ready_flag = False
                                            break
                                if not ready_flag:
                                    break
                        if not ready_flag:
                            continue

                        idle_clients = ravdb.get_idle_clients(
                            reporting=ClientStatus.IDLE)
                        if idle_clients is not None:
                            op_ids = ast.literal_eval(subgraph.op_ids)
                            prelim_times = {}
                            for idle_client in idle_clients:
                                idle_client_time = 0
                                for op_id in op_ids:
                                    op = ravdb.get_op(op_id=op_id)
                                    if op is not None:
                                        operator = op.operator
                                        capabilities_dict = ast.literal_eval(
                                            idle_client.capabilities)
                                        if operator not in capabilities_dict.keys(
                                        ):
                                            client_time = random.random() * 10
                                        else:
                                            client_time = capabilities_dict[
                                                operator]
                                        idle_client_time += client_time
                                prelim_times[idle_client.id] = idle_client_time
                            if bool(prelim_times):
                                fastest_client_id = min(prelim_times,
                                                        key=prelim_times.get)
                                client = ravdb.get_client(id=fastest_client_id)
                                ravdb.update_subgraph(subgraph,
                                                      status='assigned')
                                ravdb.update_client(
                                    client,
                                    reporting='busy',
                                    current_subgraph_id=subgraph.subgraph_id,
                                    current_graph_id=subgraph.graph_id)

                            else:
                                print('\n\nNo idle clients')
                        else:
                            print('\nNo idle clients')

                subgraphs = ravdb.get_all_subgraphs(graph_id=current_graph_id)
                for subgraph in subgraphs:
                    subgraph_op_ids = ast.literal_eval(subgraph.op_ids)
                    actual_op_ids = []
                    for op_id in subgraph_op_ids:
                        op = ravdb.get_op(op_id)
                        if op.subgraph_id == subgraph.subgraph_id:
                            actual_op_ids.append(op)

                    num_ops = len(actual_op_ids)
                    counter = {
                        'pending': 0,
                        'computed': 0,
                        'failed': 0,
                        'computing': 0
                    }
                    for subgraph_op in actual_op_ids:
                        if subgraph_op.status == "pending":
                            counter['pending'] += 1
                        elif subgraph_op.status == "computed":
                            counter['computed'] += 1
                        elif subgraph_op.status == "failed":
                            counter['failed'] += 1
                        elif subgraph_op.status == "computing":
                            counter['computing'] += 1

                    if subgraph.status != 'computed':
                        if counter['computed'] == num_ops:
                            ravdb.update_subgraph(subgraph, status="computed")
                            assigned_client = ravdb.get_assigned_client(
                                subgraph.subgraph_id, subgraph.graph_id)
                            if assigned_client is not None:
                                ravdb.update_client(assigned_client,
                                                    reporting="idle",
                                                    current_subgraph_id=None,
                                                    current_graph_id=None)

                        elif counter['pending'] > 0 and (
                                subgraph.status == 'computing'
                                or subgraph.status == 'computed'):
                            assigned_client = ravdb.get_assigned_client(
                                subgraph.subgraph_id, subgraph.graph_id)
                            if assigned_client is not None:
                                ravdb.update_client(assigned_client,
                                                    reporting="idle",
                                                    current_subgraph_id=None,
                                                    current_graph_id=None)
                            ravdb.update_subgraph(subgraph,
                                                  status="not_ready",
                                                  optimized="False")

                        elif counter['pending'] == 0 and counter[
                                'computing'] == 0 and counter[
                                    'failed'] == 0 and counter['computed'] == 0:
                            ravdb.update_subgraph(subgraph, status="computed")
                            assigned_client = ravdb.get_assigned_client(
                                subgraph.subgraph_id, subgraph.graph_id)
                            if assigned_client is not None:
                                ravdb.update_client(assigned_client,
                                                    reporting="idle",
                                                    current_subgraph_id=None,
                                                    current_graph_id=None)

                    elif subgraph.status == "computed" and subgraph.has_failed == "True":  #and int(subgraph.retry_attempts) >= 2:
                        temp_Queue = Queue
                        for queue_subgraph_id, queue_graph_id in Queue:
                            if queue_subgraph_id == subgraph.subgraph_id and queue_graph_id == current_graph_id:
                                temp_Queue.remove(
                                    (queue_subgraph_id, queue_graph_id))
                        Queue = temp_Queue

                    if subgraph.status == "computed":
                        if counter['pending'] > 0:
                            assigned_client = ravdb.get_assigned_client(
                                subgraph.subgraph_id, subgraph.graph_id)
                            if assigned_client is not None:
                                ravdb.update_client(assigned_client,
                                                    reporting="idle",
                                                    current_subgraph_id=None,
                                                    current_graph_id=None)
                            ravdb.update_subgraph(subgraph,
                                                  status="not_ready",
                                                  optimized="False")

        await sio.sleep(2)
Example #6
0
async def vertical_split(graph_id):
    op_dependency = ravdb.get_graph_op_dependency(graph_id)

    # print('OP DEPENDENCY: ',op_dependency)

    for subgraph_id in op_dependency:
        op_ids = op_dependency[subgraph_id]
        for op_id in op_ids:
            op = ravdb.get_op(op_id)
            ravdb.update_op(op, subgraph_id=subgraph_id)

        subgraph = ravdb.get_subgraph(subgraph_id=subgraph_id,
                                      graph_id=graph_id)

        subgraph_ops = ravdb.get_subgraph_ops(graph_id=graph_id,
                                              subgraph_id=subgraph_id)
        subgraph_op_ids = []
        for subgraph_op in subgraph_ops:
            subgraph_op_ids.append(subgraph_op.id)
        subgraph_op_ids.sort()

        if subgraph is None:
            subgraph = ravdb.create_subgraph(subgraph_id=subgraph_id,
                                             graph_id=graph_id,
                                             op_ids=str(subgraph_op_ids),
                                             status=SubgraphStatus.READY)
        else:
            if subgraph.status != 'failed':
                if subgraph.status == 'standby':
                    parent_subgraph = ravdb.get_subgraph(
                        subgraph_id=subgraph.parent_subgraph_id,
                        graph_id=graph_id)
                    if parent_subgraph.status == SubgraphStatus.COMPUTED or parent_subgraph.status == SubgraphStatus.COMPUTING:
                        ravdb.update_subgraph(subgraph,
                                              op_ids=str(subgraph_op_ids),
                                              status='not_ready')
                else:
                    op_ids = subgraph.op_ids
                    if len(subgraph_op_ids) == 0:
                        ravdb.update_subgraph(subgraph,
                                              op_ids=str(subgraph_op_ids),
                                              status='computed',
                                              optimized='True')
                        assigned_client = ravdb.get_assigned_client(
                            subgraph.subgraph_id, subgraph.graph_id)
                        if assigned_client is not None:
                            ravdb.update_client(assigned_client,
                                                reporting="idle",
                                                current_subgraph_id=None,
                                                current_graph_id=None)
                    else:
                        ravdb.update_subgraph(subgraph,
                                              op_ids=str(subgraph_op_ids))

    last_id = len(ravdb.get_all_subgraphs(graph_id=graph_id))
    if last_id == 0:
        last_id = 1
    new_op_dependency = {}
    for subgraph_id in op_dependency:
        subgraph = ravdb.get_subgraph(subgraph_id=subgraph_id,
                                      graph_id=graph_id)
        if subgraph is not None and subgraph.status != 'standby' and subgraph.status != 'computed' and subgraph.status != 'computing':
            if subgraph.optimized == "False":
                computed_ops = []
                G = nx.DiGraph()

                op_ids = ast.literal_eval(subgraph.op_ids)

                for op_id in op_ids:
                    op = ravdb.get_op(op_id)
                    if op.inputs != "null":
                        for input_id in ast.literal_eval(op.inputs):
                            input_op = ravdb.get_op(input_id)
                            if input_id in computed_ops:
                                name = "ghost_op_" + str(input_id)
                                ghost_op = ravdb.create_op(
                                    name=name,
                                    graph_id=input_op.graph_id,
                                    subgraph_id=subgraph_id,
                                    complexity=input_op.complexity,
                                    output_dims=input_op.output_dims,
                                    inputs="null",
                                    outputs=input_op.outputs,
                                    node_type="input",
                                    op_type="other",
                                    operator="lin",
                                    status="computed",
                                    params=input_op.params)
                                op_inputs = ast.literal_eval(op.inputs)
                                for j in range(len(op_inputs)):
                                    if op_inputs[j] == input_id:
                                        op_inputs[j] = ghost_op.id
                                ravdb.update_op(op, inputs=str(op_inputs))
                            else:
                                if input_op.status == "computed":
                                    computed_ops.append(input_id)

                for op_id in op_ids:
                    op = ravdb.get_op(op_id)
                    if op.inputs != "null":
                        for input_id in ast.literal_eval(op.inputs):
                            G.add_edge(input_id, op_id)

                subsubgraphs = list(nx.weakly_connected_components(G))
                subsubgraphs = [list(x) for x in subsubgraphs]

                if len(subsubgraphs) > 1:
                    new_op_dependency[subgraph_id] = subsubgraphs[0]
                    for i in range(1, len(subsubgraphs)):
                        new_op_dependency[last_id + i] = subsubgraphs[i]
                elif len(subsubgraphs) == 1:
                    new_op_dependency[subgraph_id] = subsubgraphs[0]

                if len(new_op_dependency) != 0:
                    last_id = list(new_op_dependency.keys())[-1]

    # print('\nNEW OP DEPENDENCY: ',new_op_dependency)
    for subgraph_id in new_op_dependency:
        op_ids = new_op_dependency[subgraph_id]
        for k in range(len(op_ids)):
            op = ravdb.get_op(op_ids[k])
            ravdb.update_op(op, subgraph_id=subgraph_id)

    for subgraph_id in new_op_dependency:
        subgraph = ravdb.get_subgraph(subgraph_id=subgraph_id,
                                      graph_id=graph_id)
        sorted_new_op_deps = new_op_dependency[subgraph_id]
        sorted_new_op_deps.sort()
        if subgraph is not None:
            complexity = calculate_subgraph_complexity(subgraph=subgraph)
            ravdb.update_subgraph(subgraph,
                                  subgraph_id=subgraph_id,
                                  graph_id=graph_id,
                                  op_ids=str(sorted_new_op_deps),
                                  complexity=complexity,
                                  optimized="True",
                                  status=SubgraphStatus.READY)
        else:
            subgraph = ravdb.create_subgraph(subgraph_id=subgraph_id,
                                             graph_id=graph_id,
                                             op_ids=str(sorted_new_op_deps),
                                             status=SubgraphStatus.READY,
                                             optimized="True")
            complexity = calculate_subgraph_complexity(subgraph=subgraph)
            ravdb.update_subgraph(subgraph, complexity=complexity)
Example #7
0
async def final_scheduler_call(graph_id):
    await sio.sleep(5)
    distributed_graph = ravdb.get_graph(graph_id=graph_id)
    subgraphs = ravdb.get_all_subgraphs(graph_id=graph_id)

    for subgraph in subgraphs:
        if subgraph.status == 'ready':
            idle_clients = ravdb.get_idle_clients(reporting=ClientStatus.IDLE)
            if idle_clients is not None:
                op_ids = ast.literal_eval(subgraph.op_ids)
                prelim_times = {}
                for idle_client in idle_clients:
                    idle_client_time = 0
                    for op_id in op_ids:
                        op = ravdb.get_op(op_id=op_id)
                        if op is not None:
                            operator = op.operator
                            capabilities_dict = ast.literal_eval(
                                idle_client.capabilities)
                            if operator not in capabilities_dict.keys():
                                client_time = random.random() * 10
                            else:
                                client_time = capabilities_dict[operator]
                            idle_client_time += client_time
                    prelim_times[idle_client.id] = idle_client_time
                if bool(prelim_times):
                    fastest_client_id = min(prelim_times, key=prelim_times.get)
                    client = ravdb.get_client(id=fastest_client_id)
                    ravdb.update_subgraph(subgraph, status='assigned')
                    ravdb.update_client(
                        client,
                        reporting='busy',
                        current_subgraph_id=subgraph.subgraph_id,
                        current_graph_id=subgraph.graph_id)

                else:
                    print('\n\nNo idle clients')
            else:
                print('\nNo idle clients')

        if subgraph.status != 'computed':
            subgraph_op_ids = ast.literal_eval(subgraph.op_ids)
            actual_op_ids = []
            for op_id in subgraph_op_ids:
                op = ravdb.get_op(op_id)
                if op.subgraph_id == subgraph.subgraph_id:
                    actual_op_ids.append(op)

            num_ops = len(actual_op_ids)
            counter = {
                'pending': 0,
                'computed': 0,
                'failed': 0,
                'computing': 0
            }
            for subgraph_op in actual_op_ids:
                if subgraph_op.status == "pending":
                    counter['pending'] += 1
                elif subgraph_op.status == "computed":
                    counter['computed'] += 1
                elif subgraph_op.status == "failed":
                    counter['failed'] += 1
                elif subgraph_op.status == "computing":
                    counter['computing'] += 1
            if counter['computed'] == num_ops:
                ravdb.update_subgraph(subgraph, status="computed")
                assigned_client = ravdb.get_assigned_client(
                    subgraph.subgraph_id, subgraph.graph_id)
                if assigned_client is not None:
                    ravdb.update_client(assigned_client,
                                        reporting="idle",
                                        current_subgraph_id=None,
                                        current_graph_id=None)
            elif counter['failed'] > 0:
                ravdb.update_subgraph(subgraph, status="failed")
                assigned_client = ravdb.get_assigned_client(
                    subgraph.subgraph_id, subgraph.graph_id)
                if assigned_client is not None:
                    ravdb.update_client(assigned_client,
                                        reporting="idle",
                                        current_subgraph_id=None,
                                        current_graph_id=None)

    graph_completed = True
    for check_subgraph in subgraphs:
        if check_subgraph.status != "computed":
            graph_completed = False

    if graph_completed:
        ravdb.update_graph(distributed_graph, status='computed')
Example #8
0
async def emit_op(sid, op=None):
    """
    1. Find an op
    2. Create payload
    3. Emit Op
    """

    client = ravdb.get_client_by_sid(sid)
    if client is not None:
        subgraph_id = client.current_subgraph_id
        graph_id = client.current_graph_id
        if subgraph_id is not None and graph_id is not None:
            subgraph = ravdb.get_subgraph(subgraph_id, graph_id)
            if subgraph.status == 'assigned':
                payloads = []
                ready_flag = True
                for op_id in ast.literal_eval(subgraph.op_ids):
                    op = ravdb.get_op(op_id)
                    if op.status == OpStatus.PENDING:
                        inputs = ast.literal_eval(op.inputs)
                        if inputs is not None:
                            for input_op_id in inputs:
                                input_op = ravdb.get_op(input_op_id)
                                if input_op_id not in ast.literal_eval(
                                        subgraph.op_ids):
                                    if ravdb.get_op_readiness(
                                            input_op) == "not_ready":
                                        ready_flag = False
                                        break
                        if not ready_flag:
                            break

                if ready_flag:
                    appended_ops = []
                    for op_id in ast.literal_eval(subgraph.op_ids):
                        op = ravdb.get_op(op_id)
                        if op is not None:
                            if op.status == "pending" and op.name is None:
                                payloads.append(create_payload(op))
                                appended_ops.append(op_id)

                    # if subgraph.has_failed == "True":
                    #     print('Sending Payload: ', payloads)
                    logger.debug("Emitting Subgraph:{}, {}".format(
                        sid, payloads))
                    await sio.emit("subgraph",
                                   payloads,
                                   namespace="/client",
                                   room=sid)
                    print("\n Emitted subgraph: ", subgraph_id)
                    ravdb.update_subgraph(
                        subgraph,
                        status=SubgraphStatus.COMPUTING,
                        retry_attempts=subgraph.retry_attempts + 1)

                    for op_id in appended_ops:
                        ravop = ravdb.get_op(op_id)
                        if ravop is not None:
                            if ravop.status == "pending":
                                ravdb.update_op(ravop,
                                                status=OpStatus.COMPUTING)

                else:
                    print("\n\nSubgraph not ready")
                    await sio.sleep(0.1)
                    await emit_op(sid)