async def client_sess_heartbeat_loop(self, loop_info): """send ping/pong heartbeat message to client to verify the connection Parameters ---------- loop_info : dict metadata needed to determine when is the loop should continue """ self.log.info([ ['y', ' - starting '], ['b', 'client_sess_heartbeat_loop'], ['y', ' for session: '], ['c', self.sess_id], ]) # the interval to sleep between heartbeats send_interval_msec = self.sess_ping['send_interval_msec'] # the maximal interval achieved in practice between heartbeats, # to test for server overloads max_ping_interval_msec = (send_interval_msec + self.sess_ping['max_interval_good_msec']) interval_now_msec = 0 sleep_sec = send_interval_msec * 1e-3 while self.get_loop_state(loop_info): data = { 'ping_interval_msec': interval_now_msec, } self.sess_ping_time = get_time('msec') await self.emit(event_name='heartbeat_ping', data=data) # update the interval between pings interval_now_msec = get_time('msec') await asyncio.sleep(sleep_sec) interval_now_msec = get_time('msec') - interval_now_msec # for now we only produce a log warning in case of these kinds of problems if interval_now_msec > max_ping_interval_msec: self.log.warn([ ['y', ' - high server load / slow connection for '], ['r', self.sess_id], ['y', ' ? --> '], [ 'o', interval_now_msec, 'msec delay for', send_interval_msec, 'sleep' ], ]) self.log.info([ ['r', ' - ending '], ['b', 'client_sess_heartbeat_loop'], ['r', ' for session: '], ['c', self.sess_id], ]) return
async def heartbeat_pong(self, data): """server check for bad connections (maybe not needed...?!?) Parameters ---------- data : dict client data and metadata related to the event """ # if eg we are offline if self.is_sess_offline: return # go over the resources use by the session, and make sure that everything # is registered as expected (not not be registered e.g., in case of session # restoration, when previous cleanup removed registries from redis) # this is done after coming back from offline, or if min_validate_widget_time_sec # time has passed since the previous validation validate_widgets = ((self.sess_ping_time is None) or ((get_time('sec') - self.validate_widget_time_sec) > self.min_validate_widget_time_sec)) if validate_widgets: sess_resources = data['data']['sess_resources'] sess_widgets = sess_resources['sess_widgets'] await self.validate_sess_widgets( sess_id=data['metadata']['sess_id'], sess_widgets=sess_widgets, ) self.validate_widget_time_sec = get_time('sec') # if eg we are just back from offline if self.sess_ping_time is None: return ping_delay = data['metadata']['send_time_msec'] - self.sess_ping_time if ping_delay > self.sess_ping['max_interval_good_msec']: is_slow = (ping_delay < self.sess_ping['max_interval_slow_msec']) log_func = self.log.warn if is_slow else self.log.error if is_slow: log_txt = 'unstable connection (' + str( ping_delay) + ' msec delay): ' else: log_txt = 'not connected to ' log_func([ ['y', ' - ', log_txt], ['r', self.sess_id], ['g', '\n', ' ' * 22, ' --> '], ['o', data['metadata']], ]) return
def cleanup(self): lock_heartbeats = self.redis.h_get_all( self.resource_lockers, default_val={}, ) for name, keys in lock_heartbeats.items(): for key in keys: lock = self.redis.h_get(name=name, key=key, default_val=None) if lock is None: continue if lock['expire_msec'] <= 0: continue expired = ((get_time('msec') - lock['time']) > lock['expire_msec']) if expired: self.log.warn([ ['r', ' - removing expired lock '], ['y', name], ['r', ' / '], ['o', key], ['r', ' ... should have been cleaned up manually ?!?'], ]) self.remove(name, key) return
def _lock_exit(self, exc_type, exc, tb): for lock in self.locks: lock_id = self.redis.get(name=lock['name'], default_val=None) if lock_id == lock['lock_id']: self.redis.delete(name=lock['name']) lock_time_diff = get_time('msec') - lock['lock_time'] if self.debug_lock: self.log.info([ ['c', ' -- del ', lock_id, ' ', lock['name']], ['o', ' (was locked for: '], ['y', lock_time_diff], ['o', ' msec)'], ]) slow_threshold = min( self.slow_lock_msec, self.lock_timeout_sec * 1e3, ) if lock_time_diff >= slow_threshold: self.log.warn([ ['r', ' - slow lock release for '], ['b', lock['name']], ['r', ' took '], ['b', lock_time_diff], ['r', ' msec, given thresholds: '], [ 'b', (self.slow_lock_msec, self.lock_timeout_sec) ], ]) return
def __init__(self, parent, lock_names, can_exist=False, debug=None): self.parent = parent self.log = self.parent.log self.redis = self.parent.redis self.get_lock_id = self.parent.get_lock_id self.lock_timeout_sec = self.parent.lock_timeout_sec self.slow_lock_msec = self.parent.slow_lock_msec self.lock_names = lock_names if isinstance(self.lock_names, str): self.lock_names = [self.lock_names] self.can_exist = can_exist self.debug_lock = False if debug is not None: self.debug_lock = debug # self.debug_lock = True self.locks = [] self.time_0_msec = get_time('msec') self.time_1_msec = self.time_0_msec self.timeout_0_msec = 0 self.timeout_1_msec = 0 return
async def get_data_widget_type(self): data = { 'rnd': get_rnd(), 'time': get_time('msec'), 'n_circ': 1, 'anim_speed': 500, } return data
def sync_block(self, is_locked, max_lock_sec=None): if max_lock_sec is None: max_lock_sec = self.max_lock_sec lock_time = get_time('sec') while is_locked(): if (get_time('sec') - lock_time) > max_lock_sec: self.log.warn([ ['r', ' - async_block cant release ?!? is_locked():'], ['y', '\n', inspect.getsource(is_locked)], ]) break time.sleep(0.01) return
def _lock_enter(self, name, lock_id, in_loop): is_set = False if in_loop: # try to set the name (works only if it does not already exist) set_try = self.redis.set_nx( name=name, data=lock_id, expire_sec=self.lock_timeout_sec, ) self.timeout_0_msec = get_time('msec') - self.time_0_msec self.timeout_1_msec = get_time('msec') - self.time_1_msec if self.timeout_0_msec >= self.slow_lock_msec: self.time_0_msec = get_time('msec') self.log.warn([ ['r', ' - slow lock for '], ['b', name], ['r', ' waiting for '], ['c', self.timeout_1_msec], ['r', ' msec ...'], ]) is_set = set_try['set_nx'] else: if self.timeout_0_msec >= self.lock_timeout_sec * 1e3: raise Exception(' - lock for ' + str(name) + ' seems to have expired' + ' (not released) after ' + str(self.timeout_1_msec) + ' msec') self.locks += [{ 'name': name, 'lock_time': get_time('msec'), 'lock_id': lock_id, }] if self.debug_lock: self.log.info([['b', ' ++ add ', lock_id, ' ', name]]) return is_set
async def async_block(self, is_locked, max_lock_sec=None): if max_lock_sec is None: max_lock_sec = self.max_lock_sec lock_time = get_time('sec') while True: if is_coroutine(is_locked): if not await is_locked(): break else: if not is_locked(): break if (get_time('sec') - lock_time) > max_lock_sec: self.log.warn([ ['r', ' - async_block cant release ?!? is_locked():'], ['y', '\n', inspect.getsource(is_locked)], ]) break await asyncio.sleep(0.01) return
def get_pinned_eles(self): n_rnd_pin = random.randint(20, 40) self.pinned_eles = [] for i in range(n_rnd_pin): data = [] for j in range(random.randint(1, 12)): rnd_key = self.tel_key[random.randint(0, len(self.tel_key) - 1)] rnd_id = self.tel_ids[random.randint(0, len(self.tel_ids) - 1)] data.append({ 'data': self.get_tel_data(rnd_id, [rnd_key]), 'keys': [self.tel_category, rnd_key, rnd_id] }) self.pinned_eles.append({ 'id': 'pinned' + str(i), 'timestamp': get_time('msec'), 'data': data, 'context': {} })
async def emit(self, event_name, data=None, metadata=None): """construct the data/metadata dics to be sent to the session """ data_send = data if data is not None else dict() sess_widget_ids = self.redis.l_get('ws;sess_widget_ids;' + self.sess_id) # lock in order to prevent racing conditions on self.n_serv_msg async with self.locker.locks.acquire('serv'): metadata_send = { 'event_name': event_name, 'sess_id': self.sess_id, 'sess_widget_ids': sess_widget_ids, 'n_serv_msg': self.n_serv_msg, 'send_time_msec': get_time('msec'), } self.n_serv_msg += 1 if metadata is not None: metadata_send.update(metadata) data_send = { 'data': data_send, 'metadata': metadata_send, } try: if self.is_sess_open: await self.ws_send(self.websocket_data_dump(data_send)) except (ConnectionClosed, ConnectionClosedError) as e: self.log.warn([ ['r', ' - connection problems ? '], ['c', self.serv_id, self.sess_id, ': '], ['r', e], ]) except Exception as e: raise e return
def add(self, name, key, expire_sec=None): if expire_sec is None: expire_msec = 0 else: expire_msec = max(1, int(expire_sec * 1e3)) self.redis.h_set( name=name, key=key, data={ 'time': get_time('msec'), 'expire_msec': expire_msec, }, ) keys = self.redis.h_get(name=self.resource_lockers, key=name, default_val=[]) keys += [key] self.redis.h_set(name=self.resource_lockers, key=name, data=keys) return
def update_urgent_current(self): self.urgent_current = [] inst_health = self.get_tel_health() for tel_id, vect in inst_health.items(): for key, value in vect.items(): if (key != 'status' and value != None and self.get_tel_state(int(value)) == 'ERROR'): self.urgent_current.append({ 'id': tel_id + key, 'keys': [ tel_id, key, self.sm.inst_data.get_tel_type(tel_id), self.tel_category ], 'name': tel_id, 'data': { 'measures': [{ 'value': value, 'timestamp': get_time('msec') }], 'type': self.get_tel_measure_types(key) } })
def _need_reload(self): return (get_time('sec') - self._reload_time_sec > self._reload_time_wait_sec)
def set_reload_time_sec(self, reload_time_sec=None): self._reload_time_sec = (reload_time_sec if reload_time_sec is not None else get_time('sec')) return
def init(self): debug_tmp = False # debug_tmp = True self.exe_phase = dict() self.all_obs_blocks = [] self.external_events = [] self.n_nights = self.clock_sim.get_n_nights() night_start_sec = self.clock_sim.get_night_start_sec() night_end_sec = self.clock_sim.get_night_end_sec() night_duration_sec = self.clock_sim.get_night_duration_sec() self.n_init_cycle += 1 is_cycle_done = False n_cycle_now = 0 n_sched_block = -1 overhead_sec = self.obs_block_sec * 0.05 tot_sched_duration_sec = night_start_sec max_block_duration_sec = night_end_sec - self.obs_block_sec pipe = self.redis.get_pipe() while True: can_break = not ((tot_sched_duration_sec < max_block_duration_sec) and (n_cycle_now < self.max_n_cycles) and (not is_cycle_done)) if can_break: break base_cycle_name = ( self.name_prefix + '_' + str(self.n_init_cycle) + '_' + str(n_cycle_now) + '_' ) n_cycle_now += 1 # derive a random combination of sub-arrays which do not # conflict with each other sub_array_ids = list(self.sub_array_insts.keys()) n_sa_0 = self.rnd_gen.randint(0, len(sub_array_ids) - 1) sa_id_0 = sub_array_ids[n_sa_0] allowed_sa_ids = self.inst_data.get_allowed_sub_arrays(sa_id_0) sa_ids = [sa_id_0] while len(allowed_sa_ids) > 0: # select a random id from the allowed list of the initial sa check_n_sa = self.rnd_gen.randint(0, len(allowed_sa_ids) - 1) sa_id_add = allowed_sa_ids[check_n_sa] allowed_sa_ids.remove(sa_id_add) # check if this id is allowed by all included sas check_sa_ids = [] for sa_id in sa_ids: check_sa_ids_now = self.inst_data.get_allowed_sub_arrays(sa_id) check_sa_ids += [int(sa_id_add in check_sa_ids_now)] # add the new sa if it is allowed by all if sum(check_sa_ids) == len(check_sa_ids): sa_ids += [sa_id_add] if debug_tmp: precent = (tot_sched_duration_sec - night_start_sec) / night_duration_sec print() print('-' * 100) print( ' - n_nights/n_cycle_now', [self.n_nights, n_cycle_now], 'tot_sched_duration_sec / percentage:', [tot_sched_duration_sec, int(100 * precent)], ) sched_block_duration_sec = [] # for n_sched_block_now in range(n_cycle_sched_blocks): for n_sched_block_now in range(len(sa_ids)): sched_block_id = ( self.sched_block_prefix + base_cycle_name + str(n_sched_block_now) ) n_sched_block += 1 sa_id = sa_ids[n_sched_block_now] tel_ids = self.sub_array_insts[sa_id] n_tel_now = len(tel_ids) if debug_tmp: print(' -- sub-array:', sa_id, '\n', ' ' * 15, tel_ids) # choose the number of obs blocks inside these blocks n_obs_blocks = self.rnd_gen.randint( self.min_n_obs_block, self.max_n_obs_block ) if debug_tmp: print( ' --- n_sched_block:', n_sched_block, ' --- n_sched_block_now / n_tel_now:', n_sched_block_now, n_tel_now, '-------', sched_block_id ) tot_obs_block_duration_sec = 0 block_duration_sec = tot_sched_duration_sec targets = get_rnd_targets( self=self, night_duration_sec=night_duration_sec, block_duration_sec=block_duration_sec, ) for n_obs_now in range(n_obs_blocks): obs_block_id = ( self.obs_block_prefix + base_cycle_name + str(n_sched_block_now) + '_' + str(n_obs_now) ) obs_block_name = (str(n_sched_block) + ' (' + str(n_obs_now) + ')') self.exe_phase[obs_block_id] = '' rnd = self.rnd_gen.random() obs_block_sec = self.obs_block_sec if rnd < 0.05: obs_block_sec /= 1.8 elif rnd < 0.3: obs_block_sec /= 1.5 elif rnd < 0.5: obs_block_sec /= 1.1 obs_block_sec = int(floor(obs_block_sec)) planed_block_end_sec = block_duration_sec + obs_block_sec is_cycle_done = (planed_block_end_sec > night_end_sec) if is_cycle_done: if debug_tmp: print( ' - is_cycle_done - ', 'n_obs_now / start_time_sec / duration:', n_obs_now, block_duration_sec, obs_block_sec ) break # integrated time for all obs blocks within this sched block tot_obs_block_duration_sec += obs_block_sec pointings = get_rnd_pointings( self=self, tel_ids=tel_ids, targets=targets, sched_block_id=sched_block_id, obs_block_id=obs_block_id, n_obs_now=n_obs_now, ) if debug_tmp: print( ' ---- n_obs_now / start_time_sec / duration:', n_obs_now, block_duration_sec, obs_block_sec, '-------', obs_block_id, ) time = { 'start': block_duration_sec, 'duration': obs_block_sec - overhead_sec, } time['end'] = time['start'] + time['duration'] exe_state = {'state': 'wait', 'can_run': True} metadata = { 'n_sched': n_sched_block, 'n_obs': n_obs_now, 'block_name': obs_block_name } telescopes = { 'large': { 'min': int(len(list(filter(lambda x: 'L' in x, tel_ids))) / 2), 'max': 4, 'ids': list(filter(lambda x: 'L' in x, tel_ids)) }, 'medium': { 'min': int(len(list(filter(lambda x: 'M' in x, tel_ids))) / 2), 'max': 25, 'ids': list(filter(lambda x: 'M' in x, tel_ids)) }, 'small': { 'min': int(len(list(filter(lambda x: 'S' in x, tel_ids))) / 2), 'max': 70, 'ids': list(filter(lambda x: 'S' in x, tel_ids)) } } block = { 'sched_block_id': sched_block_id, 'obs_block_id': obs_block_id, 'time': time, 'metadata': metadata, 'timestamp': get_time('msec'), 'telescopes': telescopes, 'exe_state': exe_state, 'run_phase': [], 'targets': targets, 'pointings': pointings, 'tel_ids': tel_ids, } pipe.set( name=block['obs_block_id'], data=block, expire_sec=self.expire_sec ) self.all_obs_blocks.append(block) block_duration_sec += obs_block_sec # list of duration of all sched blocks within this cycle if tot_obs_block_duration_sec > 0: # timedelta(seconds = 0): sched_block_duration_sec += [tot_obs_block_duration_sec] # the maximal duration of all blocks within this cycle tot_sched_duration_sec += max(sched_block_duration_sec) pipe.set(name='external_events', data=self.external_events) pipe.set(name='external_clock_events', data=self.external_clock_events) pipe.execute() self.update_exe_statuses() return