Esempio n. 1
0
 def task_pre_step(self, step_name, ds, meta, run_id, task_id, flow, graph,
                   retry_count, max_retries):
     meta.register_metadata(run_id, step_name, task_id, [
         MetaDatum(field='conda_env_id',
                   value=self._env_id(),
                   type='conda_env_id')
     ])
 def task_pre_step(
     self,
     step_name,
     task_datastore,
     metadata,
     run_id,
     task_id,
     flow,
     graph,
     retry_count,
     max_user_code_retries,
     ubf_context,
     inputs,
 ):
     self.task_id = task_id
     meta = {}
     meta["argo-workflow-template"] = os.environ["ARGO_WORKFLOW_TEMPLATE"]
     meta["argo-workflow-name"] = os.environ["ARGO_WORKFLOW_NAME"]
     meta["argo-workflow-namespace"] = os.environ["ARGO_WORKFLOW_NAMESPACE"]
     entries = [
         MetaDatum(field=k,
                   value=v,
                   type=k,
                   tags=["attempt_id:{0}".format(retry_count)])
         for k, v in meta.items()
     ]
     # Register book-keeping metadata for debugging.
     metadata.register_metadata(run_id, step_name, task_id, entries)
 def task_pre_step(
     self,
     step_name,
     task_datastore,
     metadata,
     run_id,
     task_id,
     flow,
     graph,
     retry_count,
     max_user_code_retries,
     ubf_context,
     inputs,
 ):
     meta = {}
     meta["aws-step-functions-execution"] = os.environ["METAFLOW_RUN_ID"]
     meta["aws-step-functions-state-machine"] = os.environ[
         "SFN_STATE_MACHINE"]
     entries = [
         MetaDatum(field=k,
                   value=v,
                   type=k,
                   tags=["attempt_id:{0}".format(retry_count)])
         for k, v in meta.items()
     ]
     # Register book-keeping metadata for debugging.
     metadata.register_metadata(run_id, step_name, task_id, entries)
Esempio n. 4
0
 def task_pre_step(self, step_name, ds, meta, run_id, task_id, flow, graph,
                   retry_count, max_retries, ubf_context, inputs):
     if self.is_enabled(ubf_context):
         meta.register_metadata(run_id, step_name, task_id, [
             MetaDatum(field='conda_env_id',
                       value=self._env_id(),
                       type='conda_env_id',
                       tags=[])
         ])
Esempio n. 5
0
    def task_pre_step(
        self,
        step_name,
        task_datastore,
        metadata,
        run_id,
        task_id,
        flow,
        graph,
        retry_count,
        max_retries,
        ubf_context,
        inputs,
    ):
        self.metadata = metadata
        self.task_datastore = task_datastore

        # task_pre_step may run locally if fallback is activated for @catch
        # decorator. In that scenario, we skip collecting Kubernetes execution
        # metadata. A rudimentary way to detect non-local execution is to
        # check for the existence of METAFLOW_KUBERNETES_WORKLOAD environment
        # variable.

        if "METAFLOW_KUBERNETES_WORKLOAD" in os.environ:
            meta = {}
            meta["kubernetes-pod-name"] = os.environ[
                "METAFLOW_KUBERNETES_POD_NAME"]
            meta["kubernetes-pod-namespace"] = os.environ[
                "METAFLOW_KUBERNETES_POD_NAMESPACE"]
            meta["kubernetes-pod-id"] = os.environ[
                "METAFLOW_KUBERNETES_POD_ID"]
            meta["kubernetes-pod-service-account-name"] = os.environ[
                "METAFLOW_KUBERNETES_SERVICE_ACCOUNT_NAME"]
            # Unfortunately, there doesn't seem to be any straight forward way right
            # now to attach the Batch/v1 name - While we can rely on a hacky approach
            # given we know that the pod name is simply a unique suffix with a hyphen
            # delimiter to the Batch/v1 name - this approach will fail if the Batch/v1
            # name is closer to 63 chars where the pod name will truncate the Batch/v1
            # name.
            # if "ARGO_WORKFLOW_NAME" not in os.environ:
            #     meta["kubernetes-job-name"] = os.environ[
            #         "METAFLOW_KUBERNETES_POD_NAME"
            #     ].rpartition("-")[0]

            entries = [
                MetaDatum(field=k, value=v, type=k, tags=[])
                for k, v in meta.items()
            ]
            # Register book-keeping metadata for debugging.
            metadata.register_metadata(run_id, step_name, task_id, entries)

            # Start MFLog sidecar to collect task logs.
            self._save_logs_sidecar = Sidecar("save_logs_periodically")
            self._save_logs_sidecar.start()
 def task_pre_step(self, step_name, datastore, metadata, run_id, task_id,
                   flow, graph, retry_count, max_user_code_retries):
     meta = {}
     meta['aws-step-functions-execution'] = os.environ['METAFLOW_RUN_ID']
     meta['aws-step-functions-state-machine'] =\
                                     os.environ['SFN_STATE_MACHINE']
     entries = [
         MetaDatum(field=k, value=v, type=k) for k, v in meta.items()
     ]
     # Register book-keeping metadata for debugging.
     metadata.register_metadata(run_id, step_name, task_id, entries)
Esempio n. 7
0
 def task_pre_step(self, step_name, ds, metadata, run_id, task_id, flow,
                   graph, retry_count, max_retries):
     if metadata.TYPE == 'local':
         self.ds_root = ds.root
     else:
         self.ds_root = None
     meta = {}
     meta['aws-batch-job-id'] = os.environ['AWS_BATCH_JOB_ID']
     meta['aws-batch-job-attempt'] = os.environ['AWS_BATCH_JOB_ATTEMPT']
     meta['aws-batch-ce-name'] = os.environ['AWS_BATCH_CE_NAME']
     meta['aws-batch-jq-name'] = os.environ['AWS_BATCH_JQ_NAME']
     entries = [
         MetaDatum(field=k, value=v, type=k) for k, v in meta.items()
     ]
     # Register book-keeping metadata for debugging.
     metadata.register_metadata(run_id, step_name, task_id, entries)
Esempio n. 8
0
    def task_pre_step(
        self,
        step_name,
        task_datastore,
        metadata,
        run_id,
        task_id,
        flow,
        graph,
        retry_count,
        max_retries,
        ubf_context,
        inputs,
    ):
        self.metadata = metadata
        self.task_datastore = task_datastore

        # task_pre_step may run locally if fallback is activated for @catch
        # decorator. In that scenario, we skip collecting Kubernetes execution
        # metadata. A rudimentary way to detect non-local execution is to
        # check for the existence of METAFLOW_KUBERNETES_WORKLOAD environment
        # variable.

        if "METAFLOW_KUBERNETES_WORKLOAD" in os.environ:
            meta = {}
            # TODO: Get kubernetes job id and job name
            meta["kubernetes-pod-id"] = os.environ[
                "METAFLOW_KUBERNETES_POD_ID"]
            meta["kubernetes-pod-name"] = os.environ[
                "METAFLOW_KUBERNETES_POD_NAME"]
            meta["kubernetes-pod-namespace"] = os.environ[
                "METAFLOW_KUBERNETES_POD_NAMESPACE"]
            # meta['kubernetes-job-attempt'] = ?

            entries = [
                MetaDatum(field=k, value=v, type=k, tags=[])
                for k, v in meta.items()
            ]
            # Register book-keeping metadata for debugging.
            metadata.register_metadata(run_id, step_name, task_id, entries)

            # Start MFLog sidecar to collect task logs.
            self._save_logs_sidecar = SidecarSubProcess(
                "save_logs_periodically")
Esempio n. 9
0
    def task_pre_step(self,
                      step_name,
                      ds,
                      metadata,
                      run_id,
                      task_id,
                      flow,
                      graph,
                      retry_count,
                      max_retries,
                      ubf_context,
                      inputs):
        if metadata.TYPE == 'local':
            self.ds_root = ds.root
        else:
            self.ds_root = None
        meta = {}
        meta['aws-batch-job-id'] = os.environ['AWS_BATCH_JOB_ID']
        meta['aws-batch-job-attempt'] = os.environ['AWS_BATCH_JOB_ATTEMPT']
        meta['aws-batch-ce-name'] = os.environ['AWS_BATCH_CE_NAME']
        meta['aws-batch-jq-name'] = os.environ['AWS_BATCH_JQ_NAME']
        meta['aws-batch-execution-env'] = os.environ['AWS_EXECUTION_ENV']

        # Capture AWS Logs metadata. This is best effort only since
        # only V4 of the metadata uri for the ECS container hosts this
        # information and it is quite likely that not all consumers of 
        # Metaflow would be running the container agent compatible with
        # version V4.
        # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint.html
        try:
            logs_meta = requests.get(
                            url=os.environ['ECS_CONTAINER_METADATA_URI_V4']) \
                                .json() \
                                .get('LogOptions', {})
            meta['aws-batch-awslogs-group'] = logs_meta.get('awslogs-group')
            meta['aws-batch-awslogs-region'] = logs_meta.get('awslogs-region')
            meta['aws-batch-awslogs-stream'] = logs_meta.get('awslogs-stream')
        except:
            pass

        entries = [MetaDatum(field=k, value=v, type=k, tags=[]) for k, v in meta.items()]
        # Register book-keeping metadata for debugging.
        metadata.register_metadata(run_id, step_name, task_id, entries)
        self._save_logs_sidecar = SidecarSubProcess('save_logs_periodically')
Esempio n. 10
0
    def task_pre_step(
        self,
        step_name,
        task_datastore,
        meta,
        run_id,
        task_id,
        flow,
        graph,
        retry_count,
        max_retries,
        ubf_context,
        inputs,
    ):
        if self.is_enabled(ubf_context):
            # Add the Python interpreter's parent to the path. This is to
            # ensure that any non-pythonic dependencies introduced by the conda
            # environment are visible to the user code.
            env_path = os.path.dirname(sys.executable)
            if os.environ.get("PATH") is not None:
                env_path = os.pathsep.join([env_path, os.environ["PATH"]])
            os.environ["PATH"] = env_path

            meta.register_metadata(
                run_id,
                step_name,
                task_id,
                [
                    MetaDatum(
                        field="conda_env_id",
                        value=self._env_id(),
                        type="conda_env_id",
                        tags=["attempt_id:{0}".format(retry_count)],
                    )
                ],
            )
    def task_pre_step(
        self,
        step_name,
        task_datastore,
        metadata,
        run_id,
        task_id,
        flow,
        graph,
        retry_count,
        max_retries,
        ubf_context,
        inputs,
    ):
        self.metadata = metadata
        self.task_datastore = task_datastore

        # task_pre_step may run locally if fallback is activated for @catch
        # decorator. In that scenario, we skip collecting AWS Batch execution
        # metadata. A rudimentary way to detect non-local execution is to
        # check for the existence of AWS_BATCH_JOB_ID environment variable.

        if "AWS_BATCH_JOB_ID" in os.environ:
            meta = {}
            meta["aws-batch-job-id"] = os.environ["AWS_BATCH_JOB_ID"]
            meta["aws-batch-job-attempt"] = os.environ["AWS_BATCH_JOB_ATTEMPT"]
            meta["aws-batch-ce-name"] = os.environ["AWS_BATCH_CE_NAME"]
            meta["aws-batch-jq-name"] = os.environ["AWS_BATCH_JQ_NAME"]
            meta["aws-batch-execution-env"] = os.environ["AWS_EXECUTION_ENV"]

            # Capture AWS Logs metadata. This is best effort only since
            # only V4 of the metadata uri for the ECS container hosts this
            # information and it is quite likely that not all consumers of
            # Metaflow would be running the container agent compatible with
            # version V4.
            # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint.html
            try:
                logs_meta = (requests.get(
                    url=os.environ["ECS_CONTAINER_METADATA_URI_V4"]).json().
                             get("LogOptions", {}))
                meta["aws-batch-awslogs-group"] = logs_meta.get(
                    "awslogs-group")
                meta["aws-batch-awslogs-region"] = logs_meta.get(
                    "awslogs-region")
                meta["aws-batch-awslogs-stream"] = logs_meta.get(
                    "awslogs-stream")
            except:
                pass

            entries = [
                MetaDatum(
                    field=k,
                    value=v,
                    type=k,
                    tags=["attempt_id:{0}".format(retry_count)],
                ) for k, v in meta.items()
            ]
            # Register book-keeping metadata for debugging.
            metadata.register_metadata(run_id, step_name, task_id, entries)

            self._save_logs_sidecar = SidecarSubProcess(
                "save_logs_periodically")

        num_parallel = int(os.environ.get("AWS_BATCH_JOB_NUM_NODES", 0))
        if num_parallel >= 1 and ubf_context == UBF_CONTROL:
            # UBF handling for multinode case
            control_task_id = current.task_id
            top_task_id = control_task_id.replace("control-", "")  # chop "-0"
            mapper_task_ids = [control_task_id] + [
                "%s-node-%d" % (top_task_id, node_idx)
                for node_idx in range(1, num_parallel)
            ]
            flow._control_mapper_tasks = [
                "%s/%s/%s" % (run_id, step_name, mapper_task_id)
                for mapper_task_id in mapper_task_ids
            ]
            flow._control_task_is_mapper_zero = True

        if num_parallel >= 1:
            _setup_multinode_environment()