Esempio n. 1
0
    def handle(self, *args, **options):
        pod_id = options['pod_id']
        log_sleep_interval = options['log_sleep_interval']
        self.stdout.write(
            "Started a new jobs logs / sidecar monitor with, pod_id: `{}` container_job_name: `{}`"
            "log sleep interval: `{}`".format(
                pod_id, settings.CONTAINER_NAME_EXPERIMENT_JOB,
                log_sleep_interval),
            ending='\n')
        k8s_manager = K8SManager(namespace=settings.K8S_NAMESPACE,
                                 in_cluster=True)
        is_running, labels = monitor.can_log(k8s_manager, pod_id,
                                             log_sleep_interval)
        if not is_running:
            monitor.logger.info('Job is not running anymore.')
            return

        monitor.run_for_experiment_job(
            k8s_manager=k8s_manager,
            pod_id=pod_id,
            experiment_uuid=labels.experiment_uuid.hex,
            experiment_name=labels.experiment_name,
            job_uuid=labels.job_uuid.hex,
            task_type=labels.task_type,
            task_idx=labels.task_idx,
            container_job_name=settings.CONTAINER_NAME_EXPERIMENT_JOB)
        monitor.logger.info('Finished logging')
    def handle(self, *args, **options):
        pod_id = options['pod_id']
        log_sleep_interval = options['log_sleep_interval']
        self.stdout.write(
            "Started a new jobs logs / sidecar monitor with, pod_id: `{}` container_job_name: `{}`"
            "log sleep interval: `{}`".format(pod_id,
                                              settings.CONTAINER_NAME_EXPERIMENT_JOB,
                                              log_sleep_interval),
            ending='\n')
        k8s_manager = K8SManager(namespace=settings.K8S_NAMESPACE, in_cluster=True)
        is_running, labels = monitor.can_log(k8s_manager, pod_id, log_sleep_interval)
        if not is_running:
            monitor.logger.info('Job is not running anymore.')
            return

        monitor.run_for_experiment_job(
            k8s_manager=k8s_manager,
            pod_id=pod_id,
            experiment_uuid=labels.experiment_uuid.hex,
            experiment_name=labels.experiment_name,
            job_uuid=labels.job_uuid.hex,
            task_type=labels.task_type,
            task_idx=labels.task_idx,
            container_job_name=settings.CONTAINER_NAME_EXPERIMENT_JOB)
        monitor.logger.info('Finished logging')
Esempio n. 3
0
    parser = argparse.ArgumentParser()

    parser.add_argument('--pod_id', type=str)
    parser.add_argument('--app_label', type=str)
    parser.add_argument('--log_sleep_interval', default=2, type=int)
    args = parser.parse_args()
    arguments = args.__dict__

    pod_id = arguments.pop('pod_id')
    app_label = arguments.pop('app_label')
    log_sleep_interval = arguments.pop('log_sleep_interval')

    k8s_manager = K8SManager(namespace=settings.K8S_NAMESPACE, in_cluster=True)
    client = PolyaxonClient()
    client.set_internal_health_check()
    is_running, labels = monitor.can_log(k8s_manager, pod_id,
                                         log_sleep_interval)

    if not is_running:
        monitor.logger.info('Pod is not running anymore.')
    else:
        if app_label == settings.APP_LABELS_EXPERIMENT:
            start_experiment_sidecar(monitor=monitor,
                                     k8s_manager=k8s_manager,
                                     pod_id=pod_id,
                                     labels=labels)
        elif app_label == settings.APP_LABELS_JOB:
            start_job_side_car(monitor=monitor,
                               k8s_manager=k8s_manager,
                               pod_id=pod_id,
                               labels=labels)
        else: