Example #1
0
def main():
    killer = GracefulKiller()
    evalai = EvalAI_Interface(
        AUTH_TOKEN=AUTH_TOKEN,
        EVALAI_API_SERVER=EVALAI_API_SERVER,
        QUEUE_NAME=QUEUE_NAME,
    )
    logger.info("Deploying Worker for {}".format(
        evalai.get_challenge_by_queue_name()["title"]))
    while True:
        logger.info(
            "Fetching new messages from the queue {}".format(QUEUE_NAME))
        message = evalai.get_message_from_sqs_queue()
        message_body = message.get("body")
        if message_body:
            submission_pk = message_body.get("submission_pk")
            submission = evalai.get_submission_by_pk(submission_pk)
            if submission:
                if (submission.get("status") == "finished"
                        or submission.get("status") == "failed"):
                    # Fetch the last job name from the list as it is the latest running job
                    job_name = submission.get("job_name")[-1]
                    delete_job(batch_v1, job_name)
                    message_receipt_handle = message.get("receipt_handle")
                    evalai.delete_message_from_sqs_queue(
                        message_receipt_handle)
                elif submission.get("status") == "running":
                    continue
                else:
                    message_receipt_handle = message.get("receipt_handle")
                    logger.info(
                        "Processing message body: {0}".format(message_body))
                    process_submission_callback(message_body, evalai)
        if killer.kill_now:
            break
def main():
    killer = GracefulKiller()
    evalai = EvalAI_Interface(
        AUTH_TOKEN=AUTH_TOKEN,
        EVALAI_API_SERVER=EVALAI_API_SERVER,
        QUEUE_NAME=QUEUE_NAME,
    )
    logger.info("Deploying Worker for {}".format(
        evalai.get_challenge_by_queue_name()["title"]))
    challenge = evalai.get_challenge_by_queue_name()
    cluster_details = evalai.get_aws_eks_cluster_details(challenge.get("id"))
    cluster_name = cluster_details.get("name")
    cluster_endpoint = cluster_details.get("cluster_endpoint")
    api_instance = get_api_client(cluster_name, cluster_endpoint, challenge,
                                  evalai)
    install_gpu_drivers(api_instance)
    while True:
        message = evalai.get_message_from_sqs_queue()
        message_body = message.get("body")
        if message_body:
            submission_pk = message_body.get("submission_pk")
            challenge_pk = message_body.get("challenge_pk")
            phase_pk = message_body.get("phase_pk")
            submission = evalai.get_submission_by_pk(submission_pk)
            if submission:
                api_instance = get_api_object(cluster_name, cluster_endpoint,
                                              challenge, evalai)
                core_v1_api_instance = get_core_v1_api_object(
                    cluster_name, cluster_endpoint, challenge, evalai)
                if (submission.get("status") == "finished"
                        or submission.get("status") == "failed"
                        or submission.get("status") == "cancelled"):
                    # Fetch the last job name from the list as it is the latest running job
                    job_name = submission.get("job_name")[-1]
                    delete_job(api_instance, job_name)
                    message_receipt_handle = message.get("receipt_handle")
                    evalai.delete_message_from_sqs_queue(
                        message_receipt_handle)
                elif submission.get("status") == "running":
                    job_name = submission.get("job_name")[-1]
                    update_failed_jobs_and_send_logs(
                        api_instance,
                        core_v1_api_instance,
                        evalai,
                        job_name,
                        submission_pk,
                        challenge_pk,
                        phase_pk,
                    )
                else:
                    logger.info(
                        "Processing message body: {0}".format(message_body))
                    challenge_phase = evalai.get_challenge_phase_by_pk(
                        challenge_pk, phase_pk)
                    process_submission_callback(api_instance, message_body,
                                                challenge_phase, evalai)

        if killer.kill_now:
            break
Example #3
0
def main():
    killer = GracefulKiller()
    evalai = EvalAI_Interface(
        AUTH_TOKEN=AUTH_TOKEN,
        EVALAI_API_SERVER=EVALAI_API_SERVER,
        QUEUE_NAME=QUEUE_NAME,
    )
    logger.info(
        "Deploying Worker for {}".format(
            evalai.get_challenge_by_queue_name()["title"]
        )
    )
    challenge = evalai.get_challenge_by_queue_name()
    cluster_details = evalai.get_aws_eks_cluster_details(challenge.get("id"))
    cluster_name = cluster_details.get("name")
    cluster_endpoint = cluster_details.get("cluster_endpoint")
    api_instance_client = get_api_client(
        cluster_name, cluster_endpoint, challenge, evalai
    )
    install_gpu_drivers(api_instance_client)
    api_instance = get_api_object(
        cluster_name, cluster_endpoint, challenge, evalai
    )
    core_v1_api_instance = get_core_v1_api_object(
        cluster_name, cluster_endpoint, challenge, evalai
    )
    if challenge.get("is_static_dataset_code_upload"):
        # Create and Mount Script Volume
        script_config_map = create_script_config_map(script_config_map_name)
        create_configmap(core_v1_api_instance, script_config_map)
    submission_meta = {}
    submission_meta["submission_time_limit"] = challenge.get(
        "submission_time_limit"
    )
    while True:
        time.sleep(2)
        message = evalai.get_message_from_sqs_queue()
        message_body = message.get("body")
        if message_body:
            if challenge.get(
                "is_static_dataset_code_upload"
            ) and not message_body.get(
                "is_static_dataset_code_upload_submission"
            ):
                time.sleep(35)
                continue
            api_instance = get_api_object(
                cluster_name, cluster_endpoint, challenge, evalai
            )
            core_v1_api_instance = get_core_v1_api_object(
                cluster_name, cluster_endpoint, challenge, evalai
            )
            message_body["submission_meta"] = submission_meta
            submission_pk = message_body.get("submission_pk")
            challenge_pk = message_body.get("challenge_pk")
            phase_pk = message_body.get("phase_pk")
            submission = evalai.get_submission_by_pk(submission_pk)
            if submission:
                if (
                    submission.get("status") == "finished"
                    or submission.get("status") == "failed"
                    or submission.get("status") == "cancelled"
                ):
                    try:
                        # Fetch the last job name from the list as it is the latest running job
                        job_name = submission.get("job_name")[-1]
                        delete_job(api_instance, job_name)
                        message_receipt_handle = message.get("receipt_handle")
                        evalai.delete_message_from_sqs_queue(
                            message_receipt_handle
                        )
                    except Exception as e:
                        logger.exception(
                            "Failed to delete submission job: {}".format(e)
                        )
                        # Delete message from sqs queue to avoid re-triggering job delete
                        evalai.delete_message_from_sqs_queue(
                            message_receipt_handle
                        )
                elif submission.get("status") == "running":
                    job_name = submission.get("job_name")[-1]
                    update_failed_jobs_and_send_logs(
                        api_instance,
                        core_v1_api_instance,
                        evalai,
                        job_name,
                        submission_pk,
                        challenge_pk,
                        phase_pk,
                        message,
                    )
                else:
                    logger.info(
                        "Processing message body: {0}".format(message_body)
                    )
                    challenge_phase = evalai.get_challenge_phase_by_pk(
                        challenge_pk, phase_pk
                    )
                    process_submission_callback(
                        api_instance, message_body, challenge_phase, evalai
                    )

        if killer.kill_now:
            break