Beispiel #1
0
    def start(self, dbi, ActionClass=None, action_args=()):
        ###
        #  Start scheduler, loop forever and handle checking for new obsid's, new/removed taskmanagers etc..
        #      This loop can be terminated by q + Enter, and paused by p + enter
        ###
        self.user_input = InputThread()
        self.user_input.start()
        self.initial_startup = True  # The scheduler is just starting, for the first run if we have new obs we need this to assign to proper taskmanagers
        self.tm_cycle = cycle(self.stills)
        self.keep_running = True
        logger.info('Starting Scheduler')
        self.dbi = dbi
        last_checked_for_stills = time.time()
        while self.keep_running:

            num_of_open_obs = len(self.dbi.list_open_observations())
            if num_of_open_obs == 0 and self.sg.aws_enabled == "1":
                for still in self.launched_actions:
                    self.post_to_server(still, "HALT_NOW")
                self.shutdown()
                logger.debug("Shutting down AWS nodes as we are out of data to process...")
                # shutdown aws nodes...

            if (time.time() - last_checked_for_stills) > TIME_INT_FOR_NEW_TM_CHECK:
                self.find_all_taskmanagers()
                last_checked_for_stills = time.time()
                logger.debug("Number of TaskManagers : %s" % len(self.task_clients))

            self.ext_command_hook()
            self.get_new_active_obs()
            if self.check_if_stills_reporting_full() is not True:
                self.update_action_queue(ActionClass, action_args)
            launched_actions_copy = copy.copy(self.launched_actions)
            # Launch actions that can be scheduled
            for tm in launched_actions_copy:
                tm_info = self.dbi.get_still_info(tm)
                if self.check_taskmanager(tm) is False:  # Check if the TaskManager is still available, if not it will pop it out
                    continue

                while len(self.get_launched_actions(tm, tx=False)) < tm_info.max_num_of_tasks:  # I think this will work
                    # logger.debug("Number of launched actions : %s" % len(self.get_launched_actions(tm, tx=False)))
                    action_from_queue = self.pop_action_queue(tm, tx=False)  # FIXME: MIght still be having a small issue when a TM goes offline and back on

                    if action_from_queue is not False:
                        if self.launch_action(action_from_queue) != "OK":  # If we had a connection error stop trying until TM checks back in
                            break
                    else:
                        break

            self.clean_completed_actions(self.dbi)

            keyboard_input = self.user_input.get_user_input()
            if keyboard_input is not None and keyboard_input != '':
                handle_keyboard_input(self, keyboard_input)
            else:
                time.sleep(self.sleep_time)
            self.initial_startup = False  # We've run once now, all obs were assigned via roundrobin if they were not previously
        self.shutdown()
Beispiel #2
0
    def finalize_tasks(self, poll_interval=5.):
        self.user_input = InputThread()
        self.user_input.start()

        while self.keep_running:
            self.active_tasks_semaphore.acquire()
            new_active_tasks = []
            for mytask in self.active_tasks:
                if self.poll_task_status(mytask) is None:
                    # This should probably be handled in a better way
                    new_active_tasks.append(mytask)
                else:
                    mytask.finalize()
            self.active_tasks = new_active_tasks
            self.active_tasks_semaphore.release()

            # Jon: I think we can get rid of the watchdog as I'm already
            # throwing this at the db
            time.sleep(poll_interval)
            if self.watchdog_count == 30:
                logger.debug('TaskServer is alive')
                for mytask in self.active_tasks:
                    try:
                        child_proc = mytask.process.children()[0]
                        if psutil.pid_exists(child_proc.pid):
                            cpu = child_proc.cpu_percent(interval=1.0)
                            mem_pct = child_proc.memory_percent()
                            mem_mb = mem_pct * psutil.virtual_memory(
                            ).total / 1048576  # == 2**20, to convert to MB
                            # save in task object
                            mytask.max_mem = max(mytask.max_mem, mem_mb)
                            mytask.cpu_load_avg += cpu / 100.
                            mytask.n_cpu_load_polls += 1
                            # echo out to screen
                            logger.debug(
                                'Proc info on {obsnum}:{task}:{pid} - cpu={cpu:.1f}%,'
                                ' mem={mem:.1f}%, Naffinity={aff}'.format(
                                    obsnum=mytask.obs,
                                    task=mytask.task,
                                    pid=child_proc.pid,
                                    cpu=cpu,
                                    mem=mem_pct,
                                    aff=len(child_proc.cpu_affinity())))
                    except:
                        pass
                self.watchdog_count = 0
            else:
                self.watchdog_count += 1

            self.keyboard_input = self.user_input.get_user_input()
            if self.keyboard_input is not None:
                handle_keyboard_input(self, self.keyboard_input)
        return
Beispiel #3
0
    def start(self, dbi, ActionClass=None, action_args=()):
        ###
        #  Start scheduler, loop forever and handle checking for new obsid's, new/removed taskmanagers etc..
        #      This loop can be terminated by q + Enter, and paused by p + enter
        ###
        self.user_input = InputThread()
        self.user_input.start()
        self.initial_startup = True  # The scheduler is just starting, for the first run if we have new obs we need this to assign to proper taskmanagers
        self.tm_cycle = cycle(self.stills)
        self.keep_running = True
        logger.info('Starting Scheduler')
        self.dbi = dbi
        last_checked_for_stills = time.time()

        while self.keep_running:

            if (time.time() - last_checked_for_stills) > TIME_INT_FOR_NEW_TM_CHECK:
                self.find_all_taskmanagers()
                last_checked_for_stills = time.time()
                logger.debug("Number of TaskManagers : %s" % len(self.task_clients))

            self.ext_command_hook()
            self.get_new_active_obs()

            self.update_action_queue(ActionClass, action_args)
            launched_actions_copy = copy.copy(self.launched_actions)
            # Launch actions that can be scheduled
            for tm in launched_actions_copy:
                tm_info = self.dbi.get_still_info(tm)
                if self.check_taskmanager(tm) is False:  # Check if the TaskManager is still available, if not it will pop it out
                    continue

                while len(self.get_launched_actions(tm, tx=False)) < tm_info.max_num_of_tasks:  # I think this will work
                    # logger.debug("Number of launched actions : %s" % len(self.get_launched_actions(tm, tx=False)))
                    action_from_queue = self.pop_action_queue(tm, tx=False)  # FIXME: MIght still be having a small issue when a TM goes offline and back on

                    if action_from_queue is not False:
                        if self.launch_action(action_from_queue) != "OK":  # If we had a connection error stop trying until TM checks back in
                            break
                    else:
                        break

            self.clean_completed_actions(self.dbi)

            keyboard_input = self.user_input.get_user_input()
            if keyboard_input is not None and keyboard_input != '':
                handle_keyboard_input(self, keyboard_input)
            else:
                time.sleep(self.sleep_time)
            self.initial_startup = False  # We've run once now, all obs were assigned via roundrobin if they were not previously
        self.shutdown()
Beispiel #4
0
    def finalize_tasks(self, poll_interval=5.):
        self.user_input = InputThread()
        self.user_input.start()

        while self.keep_running:
            self.active_tasks_semaphore.acquire()
            new_active_tasks = []
            for mytask in self.active_tasks:
                if self.poll_task_status(mytask) is None:
                    new_active_tasks.append(
                        mytask
                    )  # This should probably be handled in a better way
                else:
                    mytask.finalize()
            self.active_tasks = new_active_tasks
            self.active_tasks_semaphore.release()

            #  Jon: I think we can get rid of the watchdog as I'm already throwing this at the db
            time.sleep(poll_interval)
            if self.watchdog_count == 30:
                logger.debug('TaskServer is alive')
                for mytask in self.active_tasks:
                    try:
                        child_proc = mytask.process.children()[0]
                        if psutil.pid_exists(child_proc.pid):
                            logger.debug(
                                'Proc info on {obsnum}:{task}:{pid} - cpu={cpu:.1f}%, mem={mem:.1f}%, Naffinity={aff}'
                                .format(
                                    obsnum=mytask.obs,
                                    task=mytask.task,
                                    pid=child_proc.pid,
                                    cpu=child_proc.cpu_percent(interval=1.0),
                                    mem=child_proc.memory_percent(),
                                    aff=len(child_proc.cpu_affinity())))
                    except:
                        pass
                self.watchdog_count = 0
            else:
                self.watchdog_count += 1

            self.keyboard_input = self.user_input.get_user_input()
            if self.keyboard_input is not None:
                handle_keyboard_input(self, self.keyboard_input)
        return
    def finalize_tasks(self, poll_interval=5.):
        self.user_input = InputThread()
        self.user_input.start()

        while self.keep_running:
            self.active_tasks_semaphore.acquire()
            new_active_tasks = []
            for mytask in self.active_tasks:
                if self.poll_task_status(mytask) is None:
                    new_active_tasks.append(mytask)   # This should probably be handled in a better way
                else:
                    mytask.finalize()
            self.active_tasks = new_active_tasks
            self.active_tasks_semaphore.release()

            #  Jon: I think we can get rid of the watchdog as I'm already throwing this at the db
            time.sleep(poll_interval)
            if self.watchdog_count == 30:
                logger.debug('TaskServer is alive')
                for mytask in self.active_tasks:
                    try:
                        child_proc = mytask.process.children()[0]
                        if psutil.pid_exists(child_proc.pid):
                            logger.debug('Proc info on {obsnum}:{task}:{pid} - cpu={cpu:.1f}%, mem={mem:.1f}%, Naffinity={aff}'.format(
                                obsnum=mytask.obs, task=mytask.task, pid=child_proc.pid, cpu=child_proc.cpu_percent(interval=1.0),
                                mem=child_proc.memory_percent(), aff=len(child_proc.cpu_affinity())))
                    except:
                        pass
                self.watchdog_count = 0
            else:
                self.watchdog_count += 1

            self.keyboard_input = self.user_input.get_user_input()
            if self.keyboard_input is not None:
                handle_keyboard_input(self, self.keyboard_input)
        return
Beispiel #6
0
    def start(self, dbi, ActionClass=None, action_args=()):
        ###
        #  Start scheduler, loop forever and handle checking for new obsid's, new/removed taskmanagers etc..
        #      This loop can be terminated by q + Enter, and paused by p + enter
        ###
        self.user_input = InputThread()
        self.user_input.start()
        # The scheduler is just starting, for the first run if we have new obs
        # we need this to assign to proper taskmanagers
        self.initial_startup = True
        self.tm_cycle = cycle(self.stills)
        self.keep_running = True
        logger.info('Starting Scheduler')
        self.dbi = dbi
        last_checked_for_stills = time.time()
        last_checked_for_mc = time.time()

        while self.keep_running:
            if (time.time() -
                    last_checked_for_stills) > TIME_INT_FOR_NEW_TM_CHECK:
                self.find_all_taskmanagers()
                last_checked_for_stills = time.time()
                logger.debug("Number of TaskManagers : %s" %
                             len(self.task_clients))

            if self.wf.log_to_mc:
                import mc_utils
                now = time.time()
                dt_check = now - last_checked_for_mc
                if dt_check.total_seconds() > TIME_INT_FOR_NEW_MC_CHECK:
                    # get total number of running tasks
                    ntasks = 0
                    launched_actions_copy = copy.deepcopy(
                        self.launched_actions)
                    for tm in launched_actions_copy:
                        ntasks += len(self.get_launched_actions(tm, tx=False))

                    # get time since check-in in minutes
                    dt_check_min = dt_check / 60

                    # get time since boot in hours
                    boot_time = psutil.boot_time()
                    dt_boot = now - boot_time
                    dt_boot_hr = dt_boot / 60 / 60

                    # log to M&C
                    status = "OK"
                    logger.debug(
                        "Logging to M&C : {0} status, {1:5.2f} min since last check; {2}"
                        " tasks running; {4:10.3} hr since boot".format(
                            status, dt_check_min, str(ntasks), dt_boot_hr))
                    mc_utils.add_mc_rtp_status(status, dt_check_min, ntasks,
                                               dt_boot_hr)

            self.ext_command_hook()
            self.get_new_active_obs()

            self.update_action_queue(ActionClass, action_args)
            launched_actions_copy = copy.copy(self.launched_actions)
            # Launch actions that can be scheduled
            for tm in launched_actions_copy:
                tm_info = self.dbi.get_still_info(tm)
                # Check if the TaskManager is still available, if not it will
                # pop it out
                if self.check_taskmanager(tm) is False:
                    continue

                # I think this will work
                while len(self.get_launched_actions(
                        tm, tx=False)) < tm_info.max_num_of_tasks:
                    # logger.debug("Number of launched actions : %s" % len(self.get_launched_actions(tm, tx=False)))
                    # FIXME: MIght still be having a small issue when a TM goes
                    # offline and back on
                    action_from_queue = self.pop_action_queue(tm, tx=False)

                    if action_from_queue is not False:
                        # If we had a connection error stop trying until TM
                        # checks back in
                        if self.launch_action(action_from_queue) != "OK":
                            break
                    else:
                        break

            self.clean_completed_actions(self.dbi)

            keyboard_input = self.user_input.get_user_input()
            if keyboard_input is not None and keyboard_input != '':
                handle_keyboard_input(self, keyboard_input)
            else:
                time.sleep(self.sleep_time)
            # We've run once now, all obs were assigned via roundrobin if they
            # were not previously
            self.initial_startup = False
        self.shutdown()