def __init__(self, view, run): self.configs = run.configs self.batch_size = run.batch_size self.filter_fn = run.filter_callback self.destination = run.destination self.n_files = len(self.configs) pf = PacketFooter(view=view) views = pf.split_packets() self.eb = EventBuilder(views, self.configs) self.c_filter = PrometheusManager.get_metric('psana_eb_filter')
def _start_prometheus_client(self, mpi_rank=0): if not self.monitor: logging.debug('not monitoring performance with prometheus') self.prom_man = None return logging.debug('starting prometheus client on rank %d' % mpi_rank) self.prom_man = PrometheusManager() self.e = threading.Event() self.t = threading.Thread(name='PrometheusThread%s' % (mpi_rank), target=self.prom_man.push_metrics, args=(self.e, os.getpid()), daemon=True) self.t.start()
def test_tester(jobid): prom_man = PrometheusManager(jobid) e = threading.Event() t = threading.Thread(name='PrometheusThread%s'%(rank), target=prom_man.push_metrics, args=(e, rank), daemon=True) t.start() tester = Tester(prom_man) tester.run() e.set() t.join()
from psana.event import Event from psana import dgram from psana.psexp.packet_footer import PacketFooter import numpy as np import os from psana.psexp.TransitionId import TransitionId import logging from psana.psexp.prometheus_manager import PrometheusManager s_bd_disk = PrometheusManager.get_metric('psana_bd_wait_disk') class EventManager(object): """ Return an event from the received smalldata memoryview (view) 1) If dm is empty (no bigdata), yield this smd event 2) If dm is not empty, - with filter fn, fetch one bigdata and yield it. - w/o filter fn, fetch one big chunk of bigdata and replace smalldata view with the read out bigdata. Yield one bigdata event. """ def __init__(self, view, smd_configs, dm, filter_fn=0, prometheus_counter=None): if view: pf = PacketFooter(view=view) self.smd_events = pf.split_packets()
from sys import byteorder import numpy as np from psana.psexp.smdreader_manager import SmdReaderManager from psana.psexp.eventbuilder_manager import EventBuilderManager from psana.psexp.packet_footer import PacketFooter from psana.psexp.step import Step from psana.psexp.events import Events from psana.psexp.event_manager import TransitionId from psana.dgram import Dgram import os from mpi4py import MPI import logging import time from psana.psexp.prometheus_manager import PrometheusManager s_eb_wait_smd0 = PrometheusManager.get_metric('psana_eb_wait_smd0') s_bd_wait_eb = PrometheusManager.get_metric('psana_bd_wait_eb') # Setting up group communications # Ex. PS_SMD_NODES=3 mpirun -n 13 # 1 4 7 10 # 0 2 5 8 11 # 3 6 9 12 #-smd_group- # -bd_main_group- # color # 0 0 0 0 # 1 1 1 1 # 2 2 2 2 # bd_main_rank bd_rank # 0 3 6 9 0 1 2 3
from .datasource import DataSource #from .smalldata import SmallData # Collect start-up time (determined as when this file is loaded). from psana.psexp.prometheus_manager import PrometheusManager import time g_ts = PrometheusManager.get_metric('psana_timestamp') g_ts.labels('psana_init').set(time.time()) # Calls MPI_Abort when one or more (but not all) cores fail. from psana.psexp.tools import mode # Checks that we are in MPI and not Legion mode if mode == 'mpi': # We only need the MPI_Abort when working with > 1 core. from mpi4py import MPI if MPI.COMM_WORLD.Get_size() > 1: import sys import logging logger = logging.getLogger(__name__) handler = logging.StreamHandler(stream=sys.stderr) logger.addHandler(handler) # Global error handler def global_except_hook(exc_type, exc_value, exc_traceback): # Needs to write out to logger before calling MPI_Abort logger.error("except_hook. Calling MPI_Abort()", exc_info=(exc_type, exc_value, exc_traceback)) # NOTE: mpi4py must be imported inside exception handler, not globally. # In chainermn, mpi4py import is carefully delayed, because # mpi4py automatically call MPI_Init() and cause a crash on Infiniband environment.
from psana.psexp.prometheus_manager import PrometheusManager from prometheus_client import Summary import threading import logging, os, time logging.basicConfig(level=logging.DEBUG, format='(%(threadName)-10s) %(message)s', ) from mpi4py import MPI comm = MPI.COMM_WORLD size = comm.Get_size() rank = comm.Get_rank() import random s = PrometheusManager.get_metric('psana_smd0_wait_disk') class Tester(object): def __init__(self, prom_man): self.prom_man = prom_man logging.debug('starting prometheus client on rank %d'%rank) self.c = self.prom_man.get_metric('psana_smd0_read') @s.time() def receive(self): t = random.randrange(0,10) logging.debug('receive() sleep %d s'%(t)) time.sleep(t) def run(self): cn = 0