class BaselineOp(object): def __init__(self, log, id, station, iring, ntime_gulp=1, guarantee=True, core=-1): self.log = log self.station = station self.iring = iring self.ntime_gulp = ntime_gulp self.guarantee = guarantee self.core = core self.client = Client(id) self.bind_proclog = ProcLog(type(self).__name__+"/bind") self.in_proclog = ProcLog(type(self).__name__+"/in") self.size_proclog = ProcLog(type(self).__name__+"/size") self.sequence_proclog = ProcLog(type(self).__name__+"/sequence0") self.perf_proclog = ProcLog(type(self).__name__+"/perf") self.in_proclog.update({'nring':1, 'ring0':self.iring.name}) def _plot_baselines(self, time_tag, freq, dist, baselines, valid): # Plotting setup nchan = freq.size nbl = baselines.shape[0] freq = freq[nchan//2] baselines = baselines[valid,nchan//2,:] baselines = numpy.abs(baselines[:,[0,1,3]]) minval = numpy.min(baselines) maxval = numpy.max(baselines) if minval == maxval: maxval = minval + 1.0 mindst = 0.0 maxdst = numpy.max(dist) # Image setup im = PIL.Image.new('RGB', (601, 421), '#FFFFFF') draw = PIL.ImageDraw.Draw(im) font = PIL.ImageFont.load(os.path.join(BASE_PATH, 'fonts', 'helvB10.pil')) # Axes boxes for i in range(2): draw.line([i * 600, 0, i * 600, 400], fill = '#000000') for i in range(2): draw.line([(0, i * 400), (im.size[0], i * 400)], fill = '#000000') # Visiblity amplitudes as a function of (u,v) distance x0, y0 = 1, 400 draw.text((x0 + 500, y0 - 395), '%.3f MHz' % (freq/1e6,), font=font, fill='#000000') ## (u,v) distance x = ((599.0 / (maxdst - mindst)) * (dist - mindst)).clip(0, 599) ## XX y = ((399.0 / (maxval - minval)) * (baselines[:,0] - minval)).clip(0, 399) draw.point(list(zip(x0 + x, y0 - y)), fill='#1F77B4') ## YY y = ((399.0 / (maxval - minval)) * (baselines[:,2] - minval)).clip(0, 399) draw.point(list(zip(x0 + x, y0 - y)), fill='#FF7F0E') ### XY #y = ((399.0 / (maxval - minval)) * (baselines[:,1] - minval)).clip(0, 399) #draw.point(list(zip(x0 + x, y0 - y)), fill='#A00000') # Details and labels ySummary = 402 timeStr = datetime.utcfromtimestamp(time_tag / FS) timeStr = timeStr.strftime("%Y/%m/%d %H:%M:%S UTC") draw.text((5, ySummary), timeStr, font = font, fill = '#000000') rangeStr = 'range shown: %.6f - %.6f' % (minval, maxval) draw.text((210, ySummary), rangeStr, font = font, fill = '#000000') x = im.size[0] + 15 #for label, c in reversed(list(zip(('XX','XY','YY'), ('#1F77B4','#A00000','#FF7F0E')))): for label, c in reversed(list(zip(('XX','YY'), ('#1F77B4','#FF7F0E')))): x -= draw.textsize(label, font = font)[0] + 20 draw.text((x, ySummary), label, font = font, fill = c) return im def main(self): cpu_affinity.set_core(self.core) self.bind_proclog.update({'ncore': 1, 'core0': cpu_affinity.get_core(),}) for iseq in self.iring.read(guarantee=self.guarantee): ihdr = json.loads(iseq.header.tostring()) self.sequence_proclog.update(ihdr) self.log.info("Baseline: Start of new sequence: %s", str(ihdr)) # Setup the ring metadata and gulp sizes time_tag = ihdr['time_tag'] navg = ihdr['navg'] nbl = ihdr['nbl'] nstand = ihdr['nstand'] chan0 = ihdr['chan0'] nchan = ihdr['nchan'] chan_bw = ihdr['bw'] / nchan npol = ihdr['npol'] igulp_size = self.ntime_gulp*nbl*nchan*npol*8 ishape = (self.ntime_gulp,nbl,nchan,npol) self.iring.resize(igulp_size) # Setup the arrays for the frequencies and baseline lenghts freq = chan0*chan_bw + numpy.arange(nchan)*chan_bw uvw = get_zenith_uvw(self.station, LWATime(time_tag, format='timetag')) uvw[:,2] = 0 dist = numpy.sqrt((uvw**2).sum(axis=1)) valid = numpy.where(dist > 0.1)[0] last_save = 0.0 prev_time = time.time() for ispan in iseq.read(igulp_size): if ispan.size < igulp_size: continue # Ignore final gulp curr_time = time.time() acquire_time = curr_time - prev_time prev_time = curr_time ## Setup and load idata = ispan.data_view('ci32').reshape(ishape) if time.time() - last_save > 60: ## Timestamp tt = LWATime(time_tag, format='timetag') ts = tt.unix ## Plot bdata = idata[0,...] bdata = bdata.view(numpy.int32) bdata = bdata.reshape(ishape+(2,)) bdata = bdata[0,:,:,:,0] + 1j*bdata[0,:,:,:,1] bdata = bdata.astype(numpy.complex64) im = self._plot_baselines(time_tag, freq, dist, bdata, valid) ## Save mp = ImageMonitorPoint.from_image(im) self.client.write_monitor_point('diagnostics/baselines', mp, timestamp=ts) last_save = time.time() time_tag += navg * self.ntime_gulp curr_time = time.time() process_time = curr_time - prev_time prev_time = curr_time self.perf_proclog.update({'acquire_time': acquire_time, 'reserve_time': 0.0, 'process_time': process_time,}) self.log.info("BaselineOp - Done")
class StatisticsOp(object): def __init__(self, log, id, iring, ntime_gulp=1, guarantee=True, core=None): self.log = log self.iring = iring self.ntime_gulp = ntime_gulp self.guarantee = guarantee self.core = core self.client = Client(id) self.bind_proclog = ProcLog(type(self).__name__+"/bind") self.in_proclog = ProcLog(type(self).__name__+"/in") self.size_proclog = ProcLog(type(self).__name__+"/size") self.sequence_proclog = ProcLog(type(self).__name__+"/sequence0") self.perf_proclog = ProcLog(type(self).__name__+"/perf") self.in_proclog.update( {'nring':1, 'ring0':self.iring.name}) self.size_proclog.update({'nseq_per_gulp': self.ntime_gulp}) def main(self): if self.core is not None: cpu_affinity.set_core(self.core) self.bind_proclog.update({'ncore': 1, 'core0': cpu_affinity.get_core(),}) for iseq in self.iring.read(guarantee=self.guarantee): ihdr = json.loads(iseq.header.tostring()) self.sequence_proclog.update(ihdr) self.log.info("Statistics: Start of new sequence: %s", str(ihdr)) # Setup the ring metadata and gulp sizes time_tag = ihdr['time_tag'] navg = ihdr['navg'] nbl = ihdr['nbl'] nstand = ihdr['nstand'] chan0 = ihdr['chan0'] nchan = ihdr['nchan'] chan_bw = ihdr['bw'] / nchan npol = ihdr['npol'] igulp_size = self.ntime_gulp*nbl*nchan*npol*8 # ci32 ishape = (self.ntime_gulp,nbl,nchan,npol) autos = [i*(2*(nstand-1)+1-i)//2 + i for i in range(nstand)] data_pols = ['XX', 'YY'] last_save = 0.0 prev_time = time.time() iseq_spans = iseq.read(igulp_size) for ispan in iseq_spans: if ispan.size < igulp_size: continue # Ignore final gulp curr_time = time.time() acquire_time = curr_time - prev_time prev_time = curr_time ## Setup and load idata = ispan.data_view('ci32').reshape(ishape) if time.time() - last_save > 60: ## Timestamp tt = LWATime(time_tag, format='timetag') ts = tt.unix ## Pull out the auto-correlations adata = idata.view(numpy.int32) adata = adata.reshape(ishape+(2,)) adata = adata[0,autos,:,:,0] adata = adata[:,:,[0,3]] ## Run the statistics over all times/channels ## * only really works for ntime_gulp=1 data_min = numpy.min(adata, axis=1) data_max = numpy.max(adata, axis=1) data_avg = numpy.mean(adata, axis=1) ## Save for data,name in zip((data_min,data_avg,data_max), ('min','avg','max')): value = MultiMonitorPoint([data[:,i].tolist() for i in range(data.shape[1])], timestamp=ts, field=data_pols) self.client.write_monitor_point('statistics/%s' % name, value) last_save = time.time() time_tag += navg * self.ntime_gulp curr_time = time.time() process_time = curr_time - prev_time prev_time = curr_time self.perf_proclog.update({'acquire_time': acquire_time, 'reserve_time': -1, 'process_time': process_time,}) self.log.info("StatisticsOp - Done")
def main(argv): parser = argparse.ArgumentParser( description="Data recorder manager for slow/fast visibility data") parser.add_argument( '-b', '--begin-band', type=int, default=1, help='beginning dr_visibility.py band number to manage') parser.add_argument( '-e', '--end-band', type=int, default=16, help='ending dr_visibility.py IP band number to manage') parser.add_argument('-l', '--logfile', type=str, help='file to write logging to') parser.add_argument('-q', '--quick', action='store_true', help='run in fast visibiltiy mode') parser.add_argument('-f', '--fork', action='store_true', help='fork and run in the background') args = parser.parse_args() # Fork, if requested if args.fork: stderr = '/tmp/%s_%i.stderr' % (os.path.splitext( os.path.basename(__file__))[0], args.port) daemonize(stdin='/dev/null', stdout='/dev/null', stderr=stderr) # Setup logging log = logging.getLogger(__name__) logFormat = logging.Formatter('%(asctime)s [%(levelname)-8s] %(message)s', datefmt='%Y-%m-%d %H:%M:%S') logFormat.converter = time.gmtime if args.logfile is None: logHandler = logging.StreamHandler(sys.stdout) else: logHandler = LogFileHandler(args.logfile) logHandler.setFormatter(logFormat) log.addHandler(logHandler) log.setLevel(logging.DEBUG) log.info("Starting %s with PID %i", os.path.basename(__file__), os.getpid()) log.info("Cmdline args:") for arg in vars(args): log.info(" %s: %s", arg, getattr(args, arg)) # Setup the IDs to use/manage mcs_id = 'drv' if args.quick: mcs_id += 'f' else: mcs_id += 's' MANAGE_ID = [] for band in range(args.begin_band, args.end_band + 1): sub_id = mcs_id + str(band) MANAGE_ID.append(sub_id) # Setup signal handling shutdown_event = setup_signal_handling([]) c = Client(mcs_id) for cmd in ('ping', 'sync', 'start', 'stop'): cb = CommandCallbackBase(c.client) def wrapper(value): status = True response = {} for id in MANAGE_ID: s, r = c.send_command(id, cmd, **value) status &= s response[id] = r return status, response cb.action = wrapper c.set_command_callback(cmd, cb) tlast = 0.0 while not shutdown_event.is_set(): if time.time() - tlast > 15: status = "normal" info = "" first = True for id in MANAGE_ID: t0 = time.time() svalue = c.read_monitor_point('summary', id=id) ivalue = c.read_monitor_point('info', id=id) if svalue is None: svalue = MonitorPoint("timeout", timestamp=0) if ivalue is None: ivalue = MonitorPoint("timeout", timestamp=0) log.info("%s -> %s (%s) at %.0f", id, svalue.value, ivalue.value, svalue.timestamp) age = svalue.timestamp - t0 if age > 120: status = "timeout" if status == "error": status = svalue.value elif svalue.value == "warning": if status == "normal": status = svalue.value if not first: info += "; " info += "%s: %s (%s)" % (id, svalue.value, ivalue.value) first = False ts = time.time() c.write_monitor_point('summary', status, timestamp=ts) c.write_monitor_point('info', info, timestamp=ts) tlast = ts time.sleep(2) return 0
class SpectraOp(object): def __init__(self, log, id, iring, ntime_gulp=1, guarantee=True, core=-1): self.log = log self.iring = iring self.ntime_gulp = ntime_gulp self.guarantee = guarantee self.core = core self.client = Client(id) self.bind_proclog = ProcLog(type(self).__name__+"/bind") self.in_proclog = ProcLog(type(self).__name__+"/in") self.size_proclog = ProcLog(type(self).__name__+"/size") self.sequence_proclog = ProcLog(type(self).__name__+"/sequence0") self.perf_proclog = ProcLog(type(self).__name__+"/perf") self.in_proclog.update({'nring':1, 'ring0':self.iring.name}) def _plot_spectra(self, time_tag, freq, specs): # Plotting setup nchan = freq.size nstand = specs.shape[0] try: minval = numpy.min(specs[numpy.where(numpy.isfinite(specs))]) maxval = numpy.max(specs[numpy.where(numpy.isfinite(specs))]) except ValueError: minval = 0.0 maxval = 1.0 # Image setup width = 20 height = 18 im = PIL.Image.new('RGB', (width * 65 + 1, height * 65 + 21), '#FFFFFF') draw = PIL.ImageDraw.Draw(im) font = PIL.ImageFont.load(os.path.join(BASE_PATH, 'fonts', 'helvB10.pil')) # Axes boxes for i in range(width + 1): draw.line([i * 65, 0, i * 65, height * 65], fill = '#000000') for i in range(height + 1): draw.line([(0, i * 65), (im.size[0], i * 65)], fill = '#000000') # Power as a function of frequency for all antennas x = numpy.arange(nchan) * 64 // nchan for s in range(nstand): if s >= height * width: break x0, y0 = (s % width) * 65 + 1, (s // width + 1) * 65 draw.text((x0 + 5, y0 - 60), str(s+1), font=font, fill='#000000') ## XX c = '#1F77B4' y = ((54.0 / (maxval - minval)) * (specs[s,:,0] - minval)).clip(0, 54) draw.point(list(zip(x0 + x, y0 - y)), fill=c) ## YY c = '#FF7F0E' y = ((54.0 / (maxval - minval)) * (specs[s,:,1] - minval)).clip(0, 54) draw.point(list(zip(x0 + x, y0 - y)), fill=c) # Summary ySummary = height * 65 + 2 timeStr = datetime.utcfromtimestamp(time_tag / FS) timeStr = timeStr.strftime("%Y/%m/%d %H:%M:%S UTC") draw.text((5, ySummary), timeStr, font = font, fill = '#000000') rangeStr = 'range shown: %.3f to %.3f dB' % (minval, maxval) draw.text((210, ySummary), rangeStr, font = font, fill = '#000000') x = im.size[0] + 15 for label, c in reversed(list(zip(('XX', 'YY'), ('#1F77B4','#FF7F0E')))): x -= draw.textsize(label, font = font)[0] + 20 draw.text((x, ySummary), label, font = font, fill = c) return im def main(self): cpu_affinity.set_core(self.core) self.bind_proclog.update({'ncore': 1, 'core0': cpu_affinity.get_core(),}) for iseq in self.iring.read(guarantee=self.guarantee): ihdr = json.loads(iseq.header.tostring()) self.sequence_proclog.update(ihdr) self.log.info("Spectra: Start of new sequence: %s", str(ihdr)) # Setup the ring metadata and gulp sizes time_tag = ihdr['time_tag'] navg = ihdr['navg'] nbl = ihdr['nbl'] nstand = ihdr['nstand'] chan0 = ihdr['chan0'] nchan = ihdr['nchan'] chan_bw = ihdr['bw'] / nchan npol = ihdr['npol'] igulp_size = self.ntime_gulp*nbl*nchan*npol*8 # ci32 ishape = (self.ntime_gulp,nbl,nchan,npol) # Setup the arrays for the frequencies and auto-correlations freq = chan0*chan_bw + numpy.arange(nchan)*chan_bw autos = [i*(2*(nstand-1)+1-i)//2 + i for i in range(nstand)] last_save = 0.0 prev_time = time.time() for ispan in iseq.read(igulp_size): if ispan.size < igulp_size: continue # Ignore final gulp curr_time = time.time() acquire_time = curr_time - prev_time prev_time = curr_time ## Setup and load idata = ispan.data_view('ci32').reshape(ishape) if time.time() - last_save > 60: ## Timestamp tt = LWATime(time_tag, format='timetag') ts = tt.unix ## Pull out the auto-correlations adata = idata.view(numpy.int32) adata = adata.reshape(ishape+(2,)) adata = adata[0,autos,:,:,0] adata = adata[:,:,[0,3]] ## Plot im = self._plot_spectra(time_tag, freq, 10*numpy.log10(adata)) ## Save mp = ImageMonitorPoint.from_image(im) self.client.write_monitor_point('diagnostics/spectra', mp, timestamp=ts) last_save = time.time() time_tag += navg * self.ntime_gulp curr_time = time.time() process_time = curr_time - prev_time prev_time = curr_time self.perf_proclog.update({'acquire_time': acquire_time, 'reserve_time': 0.0, 'process_time': process_time,}) self.log.info("SpectraOp - Done")
class StatisticsOp(object): def __init__(self, log, id, iring, ntime_gulp=250, guarantee=True, core=None): self.log = log self.iring = iring self.ntime_gulp = ntime_gulp self.guarantee = guarantee self.core = core self.client = Client(id) self.bind_proclog = ProcLog(type(self).__name__ + "/bind") self.in_proclog = ProcLog(type(self).__name__ + "/in") self.size_proclog = ProcLog(type(self).__name__ + "/size") self.sequence_proclog = ProcLog(type(self).__name__ + "/sequence0") self.perf_proclog = ProcLog(type(self).__name__ + "/perf") self.in_proclog.update({'nring': 1, 'ring0': self.iring.name}) self.size_proclog.update({'nseq_per_gulp': self.ntime_gulp}) def main(self): if self.core is not None: cpu_affinity.set_core(self.core) self.bind_proclog.update({ 'ncore': 1, 'core0': cpu_affinity.get_core(), }) for iseq in self.iring.read(guarantee=self.guarantee): ihdr = json.loads(iseq.header.tostring()) self.sequence_proclog.update(ihdr) self.log.info("Statistics: Start of new sequence: %s", str(ihdr)) # Setup the ring metadata and gulp sizes time_tag = ihdr['time_tag'] navg = ihdr['navg'] nbeam = ihdr['nbeam'] chan0 = ihdr['chan0'] nchan = ihdr['nchan'] chan_bw = ihdr['bw'] / nchan npol = ihdr['npol'] pols = ihdr['pols'] igulp_size = self.ntime_gulp * nbeam * nchan * npol * 4 # float32 ishape = (self.ntime_gulp, nbeam, nchan, npol) data_pols = pols.split(',') last_save = 0.0 prev_time = time.time() iseq_spans = iseq.read(igulp_size) for ispan in iseq_spans: if ispan.size < igulp_size: continue # Ignore final gulp curr_time = time.time() acquire_time = curr_time - prev_time prev_time = curr_time ## Setup and load idata = ispan.data_view(numpy.float32).reshape(ishape) idata = idata.reshape(-1, npol) if time.time() - last_save > 60: ## Timestamp tt = LWATime(time_tag, format='timetag') ts = tt.unix ## Run the statistics over all times/channels ## * only really works for nbeam=1 data_min = numpy.min(idata, axis=0) data_max = numpy.max(idata, axis=0) data_avg = numpy.mean(idata, axis=0) ## Save for data, name in zip((data_min, data_avg, data_max), ('min', 'avg', 'max')): value = MultiMonitorPoint(data.tolist(), timestamp=ts, field=data_pols) self.client.write_monitor_point( 'statistics/%s' % name, value) last_save = time.time() time_tag += navg * self.ntime_gulp * (int(FS) // int(CHAN_BW)) curr_time = time.time() process_time = curr_time - prev_time prev_time = curr_time self.perf_proclog.update({ 'acquire_time': acquire_time, 'reserve_time': -1, 'process_time': process_time, }) self.log.info("StatisticsOp - Done")
class SpectraOp(object): def __init__(self, log, id, iring, ntime_gulp=250, guarantee=True, core=None): self.log = log self.iring = iring self.ntime_gulp = ntime_gulp self.guarantee = guarantee self.core = core self.client = Client(id) self.bind_proclog = ProcLog(type(self).__name__ + "/bind") self.in_proclog = ProcLog(type(self).__name__ + "/in") self.size_proclog = ProcLog(type(self).__name__ + "/size") self.sequence_proclog = ProcLog(type(self).__name__ + "/sequence0") self.perf_proclog = ProcLog(type(self).__name__ + "/perf") self.in_proclog.update({'nring': 1, 'ring0': self.iring.name}) self.size_proclog.update({'nseq_per_gulp': self.ntime_gulp}) def main(self): if self.core is not None: cpu_affinity.set_core(self.core) self.bind_proclog.update({ 'ncore': 1, 'core0': cpu_affinity.get_core(), }) # Setup the figure ## Import import matplotlib matplotlib.use('Agg') from matplotlib import pyplot as plt from matplotlib.ticker import MultipleLocator ## Create fig = plt.Figure(figsize=(6, 6)) ax = fig.gca() for iseq in self.iring.read(guarantee=self.guarantee): ihdr = json.loads(iseq.header.tostring()) self.sequence_proclog.update(ihdr) self.log.info("Spectra: Start of new sequence: %s", str(ihdr)) # Setup the ring metadata and gulp sizes time_tag = ihdr['time_tag'] navg = ihdr['navg'] nbeam = ihdr['nbeam'] chan0 = ihdr['chan0'] nchan = ihdr['nchan'] chan_bw = ihdr['bw'] / nchan npol = ihdr['npol'] pols = ihdr['pols'] igulp_size = self.ntime_gulp * nbeam * nchan * npol * 4 # float32 ishape = (self.ntime_gulp, nbeam, nchan, npol) nchan_pipeline = nchan // NPIPELINE frange = (numpy.arange(nchan) + chan0) * CHAN_BW last_save = 0.0 prev_time = time.time() iseq_spans = iseq.read(igulp_size) for ispan in iseq_spans: if ispan.size < igulp_size: continue # Ignore final gulp curr_time = time.time() acquire_time = curr_time - prev_time prev_time = curr_time ## Setup and load idata = ispan.data_view(numpy.float32).reshape(ishape) if time.time() - last_save > 60: ## Timestamp tt = LWATime(time_tag, format='timetag') ts = tt.datetime.strftime('%y%m%d %H:%M:%S') ## Average over time sdata = idata.mean(axis=0) ## Create a diagnostic plot after suming the flags across polarization ax.cla() ax.plot(frange / 1e6, numpy.log10(sdata[0, :, 0]) * 10, color='#1F77B4') ax.plot(frange / 1e6, numpy.log10(sdata[0, :, 1]) * 10, color='#FF7F0E') ylim = ax.get_ylim() for b in range(1, NPIPELINE): linestyle = ':' if b % 4 == 0: linestyle = '--' ax.vlines(frange[b * nchan_pipeline] / 1e6, *ylim, linestyle=linestyle, color='black', alpha=0.2) ax.set_ylim(ylim) ax.set_xlim((frange[0] / 1e6, frange[-1] / 1e6)) ax.set_xlabel('Frequency [MHz]') ax.set_ylabel('Power [arb. dB]') ax.xaxis.set_major_locator(MultipleLocator(base=10.0)) fig.tight_layout() ## Save tt = LWATime(time_tag, format='timetag') mp = ImageMonitorPoint.from_figure(fig) self.client.write_monitor_point('diagnostics/spectra', mp, timestamp=tt.unix) last_save = time.time() time_tag += navg * self.ntime_gulp * (int(FS) // int(CHAN_BW)) curr_time = time.time() process_time = curr_time - prev_time prev_time = curr_time self.perf_proclog.update({ 'acquire_time': acquire_time, 'reserve_time': -1, 'process_time': process_time, }) self.log.info("SpectraOp - Done")
class StatusLogger(object): """ Monitoring class for logging the overall status of a pipeline. This aggregates other monitoring points of the pipeline and uses that information to compute an overall state of the pipeline. """ def __init__(self, log, id, queue, shutdown_event=None, update_interval=10): self.log = log self.id = id self.queue = queue if shutdown_event is None: shutdown_event = threading.Event() self.shutdown_event = shutdown_event self.update_interval = update_interval self.client = Client(id) def _update(self): pass def main(self, once=False): """ Main logging loop. May be run only once with the "once" keyword set to True. """ while not self.shutdown_event.is_set(): # Active operation ts = time.time() is_active = False if self.queue.active is None else True active_filename = None time_left = None if is_active: active_filename = self.queue.active.filename time_left = self.queue.active.stop_time - self.queue.active.utcnow() self.client.write_monitor_point('op-type', active_filename, timestamp=ts) self.client.write_monitor_point('op-tag', active_filename, timestamp=ts) # TODO: Overall system system/health # What goes into this? # * RX rate/missing packets? # * Block processing time? # * Free disk space? # * Thread check? # * ??? missing = self.client.read_monitor_point('bifrost/rx_missing') if missing is None: missing = MonitorPoint(0.0) processing = self.client.read_monitor_point('bifrost/max_process') total = self.client.read_monitor_point('storage/active_disk_size') free = self.client.read_monitor_point('storage/active_disk_free') dfree = 1.0*free.value / total.value ts = min([v.timestamp for v in (missing, processing, total, free)]) if dfree < 0.99 and missing.value < 0.01: self.client.write_monitor_point('summary', 'normal', timestamp=ts) self.client.write_monitor_point('info', 'A-OK', timestamp=ts) elif dfree > 0.99 and missing.value < 0.01: self.client.write_monitor_point('summary', 'warning', timestamp=ts) self.client.write_monitor_point('info', "no space (%.1f%% used)" % (dfree*100.0,), timestamp=ts) elif dfree < 0.99 and missing.value > 0.01: self.client.write_monitor_point('summary', 'warning', timestamp=ts) self.client.write_monitor_point('info', "missing packets (%.1f%% missing)" % (missing.value*100.0,), timestamp=ts) else: self.client.write_monitor_point('summary', 'error', timestamp=ts) self.client.write_monitor_point('info', "it's bad", timestamp=ts) # Report self.log.debug("=== Status Report ===") self.log.debug(" queue size: %i", len(self.queue)) self.log.debug(" active operation: %s", is_active) if is_active: self.log.debug(" active filename: %s", os.path.basename(active_filename)) self.log.debug(" active time remaining: %s", time_left) self.log.debug("=== ===") # Sleep if once: break time.sleep(self.update_interval)
class PerformanceLogger(object): """ Monitoring class for logging how a Bifrost pipeline is performing. This captures the maximum acquire/process/reserve times for the pipeline as well as the RX rate and missing packet fraction. """ def __init__(self, log, id, queue=None, shutdown_event=None, update_interval=10): self.log = log self.id = id self.queue = queue if shutdown_event is None: shutdown_event = threading.Event() self.shutdown_event = shutdown_event self.update_interval = update_interval self.client = Client(id) self._pid = os.getpid() self._state = deque([], 2) self._update() def _update(self): new_state = load_by_pid(self._pid) new_state_time = time.time() self._state.append((new_state_time,new_state)) def main(self, once=False): """ Main logging loop. May be run only once with the "once" keyword set to True. """ while not self.shutdown_event.is_set(): # Update the state self._update() # Get the pipeline lag, is possible ts = time.time() lag = None if self.queue is not None: lag = self.queue.lag.total_seconds() self.client.write_monitor_point('bifrost/pipeline_lag', lag, timestamp=ts, unit='s') # Find the maximum acquire/process/reserve times ts = time.time() acquire, process, reserve = 0.0, 0.0, 0.0 for block,contents in self._state[1][1].items(): try: perf = contents['perf'] except KeyError: continue acquire = max([acquire, perf['acquire_time']]) process = max([process, perf['process_time']]) reserve = max([reserve, perf['reserve_time']]) self.client.write_monitor_point('bifrost/max_acquire', acquire, timestamp=ts, unit='s') self.client.write_monitor_point('bifrost/max_process', process, timestamp=ts, unit='s') self.client.write_monitor_point('bifrost/max_reserve', reserve, timestamp=ts, unit='s') # Estimate the data rate and current missing data fracation rx_valid, rx_rate, missing_fraction = False, 0.0, 0.0 good0, late0, missing0 = 0, 0, 0 good1, late1, missing1 = 0, 0, 0 try: ts = time.time() for block,contents in self._state[0][1].items(): if block[-8:] == '_capture': rx_valid = True good0 = contents['stats']['ngood_bytes'] late0 = contents['stats']['nlate_bytes'] missing0 = contents['stats']['nmissing_bytes'] for block,contents in self._state[1][1].items(): if block[-8:] == '_capture': good1 = contents['stats']['ngood_bytes'] late1 = contents['stats']['nlate_bytes'] missing1 = contents['stats']['nmissing_bytes'] rx_rate = (good1 - good0) / (self._state[1][0] - self._state[0][0]) missing_fraction = (missing1 - missing0) / (good1 - good0 + missing1 - missing0) self.client.write_monitor_point('bifrost/rx_rate', rx_rate, timestamp=ts, unit='B/s') self.client.write_monitor_point('bifrost/rx_missing', missing_fraction, timestamp=ts) except (KeyError, IndexError, ZeroDivisionError): rx_valid = False # Load average ts = time.time() try: one, five, fifteen = os.getloadavg() self.client.write_monitor_point('system/load_average/one_minute', one, timestamp=ts) self.client.write_monitor_point('system/load_average/five_minute', five, timestamp=ts) self.client.write_monitor_point('system/load_average/fifteen_minute', fifteen, timestamp=ts) except OSError: one, five, fifteen = None, None, None # Report self.log.debug("=== Performance Report ===") self.log.debug(" max acquire/process/reserve times: %.3f/%.3f/%.3f", acquire, process, reserve) if rx_valid: self.log.debug(" receive data rate: %.3f B/s", rx_rate) self.log.debug(" missing data fraction: %.3f%%", missing_fraction*100.0) if lag is not None: self.log.debug(" pipeline lag: %s", lag) if one is not None: self.log.debug(" load average: %.2f, %.2f, %.2f", one, five, fifteen) self.log.debug("=== ===") # Sleep if once: break time.sleep(self.update_interval)
class StorageLogger(object): """ Monitoring class for logging how storage is used by a pipeline and for enforcing a directory quota, if needed. """ def __init__(self, log, id, directory, quota=None, shutdown_event=None, update_interval=10): self.log = log self.id = id self.directory = directory if quota == 0: quota = None self.quota = quota if shutdown_event is None: shutdown_event = threading.Event() self.shutdown_event = shutdown_event self.update_interval = update_interval self.client = Client(id) self._files = [] self._file_sizes = [] def _update(self): self._files = glob.glob(os.path.join(self.directory, '*')) self._files.sort(key=lambda x: os.path.getmtime(x)) self._file_sizes = [os.path.getsize(filename) for filename in self._files] def _manage_quota(self): total_size = sum(self._file_sizes) removed = [] i = 0 while total_size > self.quota and len(self._files) > 1: to_remove = self._files[i] try: if os.path.isdir(to_remove): shutil.rmtree(to_remove) else: os.unlink(to_remove) removed.append(to_remove) del self._files[i] del self._file_sizes[i] i = 0 except Exception as e: self.log.warning("Quota manager could not remove '%s': %s", to_remove, str(e)) i += 1 total_size = sum(self._file_sizes) if removed: self.log.debug("=== Quota Report ===") self.log.debug("Removed %i files", len(removed)) self.log.debug("=== ===") def main(self, once=False): """ Main logging loop. May be run only once with the "once" keyword set to True. """ while not self.shutdown_event.is_set(): # Update the state self._update() # Quota management, if needed if self.quota is not None: self._manage_quota() # Find the disk size and free space for the disk hosting the # directory - this should be quota-aware ts = time.time() st = os.statvfs(self.directory) disk_free = st.f_bavail * st.f_frsize disk_total = st.f_blocks * st.f_frsize self.client.write_monitor_point('storage/active_disk_size', disk_total, timestamp=ts, unit='B') self.client.write_monitor_point('storage/active_disk_free', disk_free, timestamp=ts, unit='B') # Find the total size of all files ts = time.time() total_size = sum(self._file_sizes) self.client.write_monitor_point('storage/active_directory', self.directory, timestamp=ts) self.client.write_monitor_point('storage/active_directory_size', total_size, timestamp=ts, unit='B') self.client.write_monitor_point('storage/active_directory_count', len(self._files), timestamp=ts) # Report self.log.debug("=== Storage Report ===") self.log.debug(" directory: %s", self.directory) self.log.debug(" disk size: %i B", disk_total) self.log.debug(" disk free: %i B", disk_free) self.log.debug(" file count: %i", len(self._files)) self.log.debug(" total size: %i B", total_size) self.log.debug("=== ===") # Sleep if once: break time.sleep(self.update_interval)