Beispiel #1
0
async def update_client_status():
    while True:
        clients = ravdb.get_clients(status='connected')
        for client in clients:
            if (datetime.datetime.utcnow() -
                    client.last_active_time).seconds > 200:  # To be reduced.
                if client.reporting == "ready":
                    ravdb.update_client(
                        client,
                        status="disconnected",
                        disconnected_at=datetime.datetime.utcnow())
                else:
                    ravdb.update_client(
                        client,
                        status="disconnected",
                        reporting="idle",
                        disconnected_at=datetime.datetime.utcnow())

                assigned_subgraph = ravdb.get_subgraph(
                    client.current_subgraph_id, client.current_graph_id)
                if assigned_subgraph is not None:
                    ravdb.update_subgraph(assigned_subgraph, status="ready")

            client_type = "/{}".format(client.type)
            await sio.emit(
                "check_status",
                {"sid": client.sid},
                namespace=client_type,
                room=client.sid,
            )

        await sio.sleep(5)
Beispiel #2
0
async def retry_failed_subgraphs(graph_id):
    global Queue
    graph = ravdb.get_graph(graph_id)
    failed_subgraph_ids = ravdb.get_failed_subgraphs_from_graph(graph)
    if len(failed_subgraph_ids) > 0:
        print("\nFailed subgraph ids Retry: ", failed_subgraph_ids)
        for failed_subgraph_id in failed_subgraph_ids:
            failed_subgraph = ravdb.get_subgraph(
                subgraph_id=failed_subgraph_id, graph_id=graph_id)
            assigned_client = ravdb.get_assigned_client(
                failed_subgraph.subgraph_id, failed_subgraph.graph_id)
            if assigned_client is not None:
                ravdb.update_client(assigned_client,
                                    reporting="idle",
                                    current_subgraph_id=None,
                                    current_graph_id=None)

            retries = failed_subgraph.retry_attempts

            op_ids = ast.literal_eval(failed_subgraph.op_ids)
            if int(retries) <= 5:
                failed_combination = (failed_subgraph.subgraph_id,
                                      failed_subgraph.graph_id)
                if failed_combination not in Queue:
                    Queue.append(failed_combination)

            else:
                ravdb.update_subgraph(failed_subgraph, status='failed')
                ravdb.update_graph(graph, status="failed")
                for op_id in op_ids:
                    failed_op = ravdb.get_op(op_id)
                    ravdb.update_op(failed_op, status="failed")
Beispiel #3
0
def create_context_file(client):
    """
    Create and dump context
    """
    print("Creating context...")
    # Create
    context = get_context()

    # Dump
    filename = "context_with_private_key_{}.txt".format(client.cid)
    dump_context(context,
                 os.path.join(CONTEXT_FOLDER, filename),
                 save_secret_key=True)
    ravdb.update_client(client,
                        context=json.dumps({"context_filename": filename}))
    print("Context created and dumped")
Beispiel #4
0
async def create_credentials(cid, ftp_credentials, client):
    # print("Type:", ftp_credentials)
    if ftp_credentials is None or ftp_credentials == "None" or ftp_credentials == "null":
        credentials = add_user(cid)

        ravdb.update_client(client, ftp_credentials=json.dumps(credentials))
    else:
        ftp_credentials = json.loads(ftp_credentials)
        print("FTP credentials:", ftp_credentials)
        if not check_credentials(**ftp_credentials):
            credentials = add_user(cid)
            ravdb.update_client(client,
                                ftp_credentials=json.dumps(credentials))
        else:
            pass

    # Upload context file
    if ENCRYPTION:
        upload_context_file(client)
Beispiel #5
0
async def create_client(cid, sid, client_type):
    print("Connected:{} {} {}".format(cid, sid, client_type))

    namespace = "/{}".format(client_type)
    cid = cid[0]

    # Create client
    client = ravdb.get_client_by_cid(cid)

    if client is None:
        print("new client connected")
        client = ravdb.create_client(cid=cid,
                                     sid=sid,
                                     type=client_type,
                                     status="connected")
        # Create client sid mapping
        ravdb.create_client_sid_mapping(cid=cid,
                                        sid=sid,
                                        client_id=client.id,
                                        namespace=namespace)
    else:
        print("existing client connected ")
        client = ravdb.update_client(
            client,
            sid=sid,
            connected_at=datetime.datetime.now(),
            status="connected",
            last_active_time=datetime.datetime.utcnow())
        # Update sid
        client_sid_mapping = ravdb.find_client_sid_mapping(cid=cid, sid=sid)

        if client_sid_mapping is None:
            ravdb.create_client_sid_mapping(cid=cid,
                                            sid=sid,
                                            client_id=client.id,
                                            namespace=namespace)
        else:
            ravdb.update_client_sid_mapping(client_sid_mapping.id,
                                            cid=cid,
                                            sid=sid,
                                            namespace=namespace)
    print("client connected")

    # create context
    if ENCRYPTION:
        create_context_file(client)

    # Create FTP credentials
    print("Creating ftp credentials...")
    ftp_credentials = client.ftp_credentials
    args = (cid, ftp_credentials, client)
    download_thread = threading.Thread(target=between_callback,
                                       name="create_credentials",
                                       args=args)
    download_thread.start()
Beispiel #6
0
async def disconnect(sid):
    print("Disconnected:{}".format(sid))

    client = ravdb.get_client_by_sid(sid=sid)
    if client is not None:
        ravdb.update_client(client,
                            status="disconnected",
                            disconnected_at=datetime.datetime.now())

        # Update client sid mapping
        ravdb.delete_client_sid_mapping(sid=sid)

        if client.type == "ravjs":
            # Get ops which were assigned to this
            ops = (ravdb.session.query(ClientOpMapping).filter(
                ClientOpMapping.client_id == client.id).filter(
                    or_(
                        ClientOpMapping.status == MappingStatus.SENT,
                        ClientOpMapping.status == MappingStatus.ACKNOWLEDGED,
                        ClientOpMapping.status == MappingStatus.COMPUTING,
                    )).all())

            print(ops)
            # Set those ops to pending
            for op in ops:
                ravdb.update_op(op, status=MappingStatus.NOT_COMPUTED)
        elif client.type == "analytics":
            # Update ops
            ops = (ravdb.session.query(ObjectiveClientMapping).filter(
                ObjectiveClientMapping.client_id == client.id).filter(
                    or_(
                        ObjectiveClientMapping.status == MappingStatus.SENT,
                        ObjectiveClientMapping.status ==
                        MappingStatus.ACKNOWLEDGED,
                        ObjectiveClientMapping.status ==
                        MappingStatus.COMPUTING,
                    )).all())

            print(ops)
            # Set those ops to pending
            for op in ops:
                ravdb.update_op(op, status=MappingStatus.NOT_COMPUTED)
Beispiel #7
0
async def run_scheduler():
    global SCHEDULER_RUNNING, Queue
    SCHEDULER_RUNNING = True
    while True:
        print("Scheduler Running...")
        distributed_graphs = ravdb.get_graphs(status=GraphStatus.PENDING,
                                              approach="distributed")
        federated_graphs = ravdb.get_graphs(status=GraphStatus.PENDING,
                                            approach="federated")

        if len(distributed_graphs) == 0 and len(federated_graphs) == 0:
            print("No graphs found")

        else:
            for federated_graph in federated_graphs:
                create_sub_graphs(federated_graph.id)

            for distributed_graph in distributed_graphs:
                current_graph_id = distributed_graph.id

                await vertical_split(distributed_graph.id)
                await sio.sleep(0.1)
                await horizontal_split(distributed_graph.id,
                                       minimum_split_size=20)
                await sio.sleep(0.1)
                await retry_failed_subgraphs(distributed_graph.id)
                await sio.sleep(0.1)

                failed_subgraph_ids = get_failed_subgraphs_from_queue(
                    current_graph_id)

                for subgraph_id in failed_subgraph_ids:
                    subgraph = ravdb.get_subgraph(subgraph_id=subgraph_id,
                                                  graph_id=current_graph_id)

                    if subgraph.status != "assigned" and subgraph.status != "computing" and subgraph.status != "computed":
                        if subgraph.has_failed != "True" or subgraph.status == "failed":
                            op_ids = ast.literal_eval(subgraph.op_ids)
                            final_subsubgraph_list = []
                            for op_id in op_ids:
                                op = ravdb.get_op(op_id)
                                if op.status != "computed":
                                    if op.inputs != "null":
                                        for input_op_id in ast.literal_eval(
                                                op.inputs):
                                            input_op = ravdb.get_op(
                                                input_op_id)
                                            if input_op.status != "computed":
                                                final_subsubgraph_list.append(
                                                    input_op_id)
                                    final_subsubgraph_list.append(op_id)

                            final_subsubgraph_list = list(
                                set(final_subsubgraph_list))

                            failed_ops = final_subsubgraph_list
                            failed_ops.sort()

                            for failed_op_id in failed_ops:
                                failed_op = ravdb.get_op(failed_op_id)
                                if failed_op.operator != "lin" and failed_op.status != "computed":
                                    ravdb.update_op(failed_op,
                                                    subgraph_id=subgraph_id,
                                                    message=None,
                                                    status="pending")
                                elif failed_op.operator == "lin":
                                    ravdb.update_op(failed_op,
                                                    subgraph_id=subgraph_id,
                                                    message=None)

                            updated_subgraph = ravdb.update_subgraph(
                                subgraph,
                                op_ids=str(failed_ops),
                                status='ready',
                                optimized='True',
                                has_failed="True")
                if len(Queue) > 0:
                    print('\nQUEUE', Queue)

                subgraphs = ravdb.get_subgraphs_from_graph(
                    graph_id=current_graph_id)

                for subgraph in subgraphs:
                    if subgraph.status == 'ready':

                        ready_flag = True
                        op_ids = ast.literal_eval(subgraph.op_ids)
                        for op_id in op_ids:
                            op = ravdb.get_op(op_id)
                            if op.inputs != 'null':
                                for input_op_id in ast.literal_eval(op.inputs):
                                    input_op = ravdb.get_op(input_op_id)
                                    if input_op.subgraph_id != subgraph.subgraph_id:
                                        if input_op.status != "computed":
                                            ready_flag = False
                                            break
                                if not ready_flag:
                                    break
                        if not ready_flag:
                            continue

                        idle_clients = ravdb.get_idle_clients(
                            reporting=ClientStatus.IDLE)
                        if idle_clients is not None:
                            op_ids = ast.literal_eval(subgraph.op_ids)
                            prelim_times = {}
                            for idle_client in idle_clients:
                                idle_client_time = 0
                                for op_id in op_ids:
                                    op = ravdb.get_op(op_id=op_id)
                                    if op is not None:
                                        operator = op.operator
                                        capabilities_dict = ast.literal_eval(
                                            idle_client.capabilities)
                                        if operator not in capabilities_dict.keys(
                                        ):
                                            client_time = random.random() * 10
                                        else:
                                            client_time = capabilities_dict[
                                                operator]
                                        idle_client_time += client_time
                                prelim_times[idle_client.id] = idle_client_time
                            if bool(prelim_times):
                                fastest_client_id = min(prelim_times,
                                                        key=prelim_times.get)
                                client = ravdb.get_client(id=fastest_client_id)
                                ravdb.update_subgraph(subgraph,
                                                      status='assigned')
                                ravdb.update_client(
                                    client,
                                    reporting='busy',
                                    current_subgraph_id=subgraph.subgraph_id,
                                    current_graph_id=subgraph.graph_id)

                            else:
                                print('\n\nNo idle clients')
                        else:
                            print('\nNo idle clients')

                subgraphs = ravdb.get_all_subgraphs(graph_id=current_graph_id)
                for subgraph in subgraphs:
                    subgraph_op_ids = ast.literal_eval(subgraph.op_ids)
                    actual_op_ids = []
                    for op_id in subgraph_op_ids:
                        op = ravdb.get_op(op_id)
                        if op.subgraph_id == subgraph.subgraph_id:
                            actual_op_ids.append(op)

                    num_ops = len(actual_op_ids)
                    counter = {
                        'pending': 0,
                        'computed': 0,
                        'failed': 0,
                        'computing': 0
                    }
                    for subgraph_op in actual_op_ids:
                        if subgraph_op.status == "pending":
                            counter['pending'] += 1
                        elif subgraph_op.status == "computed":
                            counter['computed'] += 1
                        elif subgraph_op.status == "failed":
                            counter['failed'] += 1
                        elif subgraph_op.status == "computing":
                            counter['computing'] += 1

                    if subgraph.status != 'computed':
                        if counter['computed'] == num_ops:
                            ravdb.update_subgraph(subgraph, status="computed")
                            assigned_client = ravdb.get_assigned_client(
                                subgraph.subgraph_id, subgraph.graph_id)
                            if assigned_client is not None:
                                ravdb.update_client(assigned_client,
                                                    reporting="idle",
                                                    current_subgraph_id=None,
                                                    current_graph_id=None)

                        elif counter['pending'] > 0 and (
                                subgraph.status == 'computing'
                                or subgraph.status == 'computed'):
                            assigned_client = ravdb.get_assigned_client(
                                subgraph.subgraph_id, subgraph.graph_id)
                            if assigned_client is not None:
                                ravdb.update_client(assigned_client,
                                                    reporting="idle",
                                                    current_subgraph_id=None,
                                                    current_graph_id=None)
                            ravdb.update_subgraph(subgraph,
                                                  status="not_ready",
                                                  optimized="False")

                        elif counter['pending'] == 0 and counter[
                                'computing'] == 0 and counter[
                                    'failed'] == 0 and counter['computed'] == 0:
                            ravdb.update_subgraph(subgraph, status="computed")
                            assigned_client = ravdb.get_assigned_client(
                                subgraph.subgraph_id, subgraph.graph_id)
                            if assigned_client is not None:
                                ravdb.update_client(assigned_client,
                                                    reporting="idle",
                                                    current_subgraph_id=None,
                                                    current_graph_id=None)

                    elif subgraph.status == "computed" and subgraph.has_failed == "True":  #and int(subgraph.retry_attempts) >= 2:
                        temp_Queue = Queue
                        for queue_subgraph_id, queue_graph_id in Queue:
                            if queue_subgraph_id == subgraph.subgraph_id and queue_graph_id == current_graph_id:
                                temp_Queue.remove(
                                    (queue_subgraph_id, queue_graph_id))
                        Queue = temp_Queue

                    if subgraph.status == "computed":
                        if counter['pending'] > 0:
                            assigned_client = ravdb.get_assigned_client(
                                subgraph.subgraph_id, subgraph.graph_id)
                            if assigned_client is not None:
                                ravdb.update_client(assigned_client,
                                                    reporting="idle",
                                                    current_subgraph_id=None,
                                                    current_graph_id=None)
                            ravdb.update_subgraph(subgraph,
                                                  status="not_ready",
                                                  optimized="False")

        await sio.sleep(2)
Beispiel #8
0
async def vertical_split(graph_id):
    op_dependency = ravdb.get_graph_op_dependency(graph_id)

    # print('OP DEPENDENCY: ',op_dependency)

    for subgraph_id in op_dependency:
        op_ids = op_dependency[subgraph_id]
        for op_id in op_ids:
            op = ravdb.get_op(op_id)
            ravdb.update_op(op, subgraph_id=subgraph_id)

        subgraph = ravdb.get_subgraph(subgraph_id=subgraph_id,
                                      graph_id=graph_id)

        subgraph_ops = ravdb.get_subgraph_ops(graph_id=graph_id,
                                              subgraph_id=subgraph_id)
        subgraph_op_ids = []
        for subgraph_op in subgraph_ops:
            subgraph_op_ids.append(subgraph_op.id)
        subgraph_op_ids.sort()

        if subgraph is None:
            subgraph = ravdb.create_subgraph(subgraph_id=subgraph_id,
                                             graph_id=graph_id,
                                             op_ids=str(subgraph_op_ids),
                                             status=SubgraphStatus.READY)
        else:
            if subgraph.status != 'failed':
                if subgraph.status == 'standby':
                    parent_subgraph = ravdb.get_subgraph(
                        subgraph_id=subgraph.parent_subgraph_id,
                        graph_id=graph_id)
                    if parent_subgraph.status == SubgraphStatus.COMPUTED or parent_subgraph.status == SubgraphStatus.COMPUTING:
                        ravdb.update_subgraph(subgraph,
                                              op_ids=str(subgraph_op_ids),
                                              status='not_ready')
                else:
                    op_ids = subgraph.op_ids
                    if len(subgraph_op_ids) == 0:
                        ravdb.update_subgraph(subgraph,
                                              op_ids=str(subgraph_op_ids),
                                              status='computed',
                                              optimized='True')
                        assigned_client = ravdb.get_assigned_client(
                            subgraph.subgraph_id, subgraph.graph_id)
                        if assigned_client is not None:
                            ravdb.update_client(assigned_client,
                                                reporting="idle",
                                                current_subgraph_id=None,
                                                current_graph_id=None)
                    else:
                        ravdb.update_subgraph(subgraph,
                                              op_ids=str(subgraph_op_ids))

    last_id = len(ravdb.get_all_subgraphs(graph_id=graph_id))
    if last_id == 0:
        last_id = 1
    new_op_dependency = {}
    for subgraph_id in op_dependency:
        subgraph = ravdb.get_subgraph(subgraph_id=subgraph_id,
                                      graph_id=graph_id)
        if subgraph is not None and subgraph.status != 'standby' and subgraph.status != 'computed' and subgraph.status != 'computing':
            if subgraph.optimized == "False":
                computed_ops = []
                G = nx.DiGraph()

                op_ids = ast.literal_eval(subgraph.op_ids)

                for op_id in op_ids:
                    op = ravdb.get_op(op_id)
                    if op.inputs != "null":
                        for input_id in ast.literal_eval(op.inputs):
                            input_op = ravdb.get_op(input_id)
                            if input_id in computed_ops:
                                name = "ghost_op_" + str(input_id)
                                ghost_op = ravdb.create_op(
                                    name=name,
                                    graph_id=input_op.graph_id,
                                    subgraph_id=subgraph_id,
                                    complexity=input_op.complexity,
                                    output_dims=input_op.output_dims,
                                    inputs="null",
                                    outputs=input_op.outputs,
                                    node_type="input",
                                    op_type="other",
                                    operator="lin",
                                    status="computed",
                                    params=input_op.params)
                                op_inputs = ast.literal_eval(op.inputs)
                                for j in range(len(op_inputs)):
                                    if op_inputs[j] == input_id:
                                        op_inputs[j] = ghost_op.id
                                ravdb.update_op(op, inputs=str(op_inputs))
                            else:
                                if input_op.status == "computed":
                                    computed_ops.append(input_id)

                for op_id in op_ids:
                    op = ravdb.get_op(op_id)
                    if op.inputs != "null":
                        for input_id in ast.literal_eval(op.inputs):
                            G.add_edge(input_id, op_id)

                subsubgraphs = list(nx.weakly_connected_components(G))
                subsubgraphs = [list(x) for x in subsubgraphs]

                if len(subsubgraphs) > 1:
                    new_op_dependency[subgraph_id] = subsubgraphs[0]
                    for i in range(1, len(subsubgraphs)):
                        new_op_dependency[last_id + i] = subsubgraphs[i]
                elif len(subsubgraphs) == 1:
                    new_op_dependency[subgraph_id] = subsubgraphs[0]

                if len(new_op_dependency) != 0:
                    last_id = list(new_op_dependency.keys())[-1]

    # print('\nNEW OP DEPENDENCY: ',new_op_dependency)
    for subgraph_id in new_op_dependency:
        op_ids = new_op_dependency[subgraph_id]
        for k in range(len(op_ids)):
            op = ravdb.get_op(op_ids[k])
            ravdb.update_op(op, subgraph_id=subgraph_id)

    for subgraph_id in new_op_dependency:
        subgraph = ravdb.get_subgraph(subgraph_id=subgraph_id,
                                      graph_id=graph_id)
        sorted_new_op_deps = new_op_dependency[subgraph_id]
        sorted_new_op_deps.sort()
        if subgraph is not None:
            complexity = calculate_subgraph_complexity(subgraph=subgraph)
            ravdb.update_subgraph(subgraph,
                                  subgraph_id=subgraph_id,
                                  graph_id=graph_id,
                                  op_ids=str(sorted_new_op_deps),
                                  complexity=complexity,
                                  optimized="True",
                                  status=SubgraphStatus.READY)
        else:
            subgraph = ravdb.create_subgraph(subgraph_id=subgraph_id,
                                             graph_id=graph_id,
                                             op_ids=str(sorted_new_op_deps),
                                             status=SubgraphStatus.READY,
                                             optimized="True")
            complexity = calculate_subgraph_complexity(subgraph=subgraph)
            ravdb.update_subgraph(subgraph, complexity=complexity)
Beispiel #9
0
async def final_scheduler_call(graph_id):
    await sio.sleep(5)
    distributed_graph = ravdb.get_graph(graph_id=graph_id)
    subgraphs = ravdb.get_all_subgraphs(graph_id=graph_id)

    for subgraph in subgraphs:
        if subgraph.status == 'ready':
            idle_clients = ravdb.get_idle_clients(reporting=ClientStatus.IDLE)
            if idle_clients is not None:
                op_ids = ast.literal_eval(subgraph.op_ids)
                prelim_times = {}
                for idle_client in idle_clients:
                    idle_client_time = 0
                    for op_id in op_ids:
                        op = ravdb.get_op(op_id=op_id)
                        if op is not None:
                            operator = op.operator
                            capabilities_dict = ast.literal_eval(
                                idle_client.capabilities)
                            if operator not in capabilities_dict.keys():
                                client_time = random.random() * 10
                            else:
                                client_time = capabilities_dict[operator]
                            idle_client_time += client_time
                    prelim_times[idle_client.id] = idle_client_time
                if bool(prelim_times):
                    fastest_client_id = min(prelim_times, key=prelim_times.get)
                    client = ravdb.get_client(id=fastest_client_id)
                    ravdb.update_subgraph(subgraph, status='assigned')
                    ravdb.update_client(
                        client,
                        reporting='busy',
                        current_subgraph_id=subgraph.subgraph_id,
                        current_graph_id=subgraph.graph_id)

                else:
                    print('\n\nNo idle clients')
            else:
                print('\nNo idle clients')

        if subgraph.status != 'computed':
            subgraph_op_ids = ast.literal_eval(subgraph.op_ids)
            actual_op_ids = []
            for op_id in subgraph_op_ids:
                op = ravdb.get_op(op_id)
                if op.subgraph_id == subgraph.subgraph_id:
                    actual_op_ids.append(op)

            num_ops = len(actual_op_ids)
            counter = {
                'pending': 0,
                'computed': 0,
                'failed': 0,
                'computing': 0
            }
            for subgraph_op in actual_op_ids:
                if subgraph_op.status == "pending":
                    counter['pending'] += 1
                elif subgraph_op.status == "computed":
                    counter['computed'] += 1
                elif subgraph_op.status == "failed":
                    counter['failed'] += 1
                elif subgraph_op.status == "computing":
                    counter['computing'] += 1
            if counter['computed'] == num_ops:
                ravdb.update_subgraph(subgraph, status="computed")
                assigned_client = ravdb.get_assigned_client(
                    subgraph.subgraph_id, subgraph.graph_id)
                if assigned_client is not None:
                    ravdb.update_client(assigned_client,
                                        reporting="idle",
                                        current_subgraph_id=None,
                                        current_graph_id=None)
            elif counter['failed'] > 0:
                ravdb.update_subgraph(subgraph, status="failed")
                assigned_client = ravdb.get_assigned_client(
                    subgraph.subgraph_id, subgraph.graph_id)
                if assigned_client is not None:
                    ravdb.update_client(assigned_client,
                                        reporting="idle",
                                        current_subgraph_id=None,
                                        current_graph_id=None)

    graph_completed = True
    for check_subgraph in subgraphs:
        if check_subgraph.status != "computed":
            graph_completed = False

    if graph_completed:
        ravdb.update_graph(distributed_graph, status='computed')
Beispiel #10
0
async def check_callback(sid, data):
    client = ravdb.get_client_by_sid(sid=data["sid"])
    ravdb.update_client(client,
                        status="connected",
                        last_active_time=datetime.datetime.utcnow())