def __init__(self, file_path, schema): self.Path = file_path self.Tree = uproot.open(file_path)["Events"] self.BranchSizeArrays = {} self.Schema = schema self.T = Tracer() self.Converted = {}
def __init__(self, dxsock, request, workers, storage, bulk_data_transport, log_file): Task.__init__(self) self.DXSock = dxsock self.Request = request self.JID = request.JID self.Workers = workers self.ModuleStorage = storage self.Accumulator = None self.EventsSeen = 0 self.EventsReported = 0 self.T = Tracer() self.BulkDataTransport = bulk_data_transport self.LogFile = log_file self.HAccumulators = {hid:HAccumulator(desc) for hid, desc in request.HDescriptors.items()} self.HistSentTime = 0.0 self.HistSendInterval = 20.0
def __init__(self, jid, data_server_url, bulk_transport_port, dx, data_client, workers, job_description, log_file_path): multiprocessing.Process.__init__(self) self.daemon = True self.JID = jid self.DataExchange = dx self.Workers = workers # list of WorkerInfo objects self.JobDesc = job_description self.DataClient = data_client self.TotalEvents = 0 self.T = Tracer() self.ContractStartedT = None self.FirstWorkerExitT = self.LastWorkerExitT = None self.DataServerURL = data_server_url self.Contract = None self.LogFile = None self.LogFilePath = log_file_path self.BulkTransportPort = bulk_transport_port
def __init__(self, backend, dataset_name): PyThread.__init__(self) self.Queue = Queue(self.QUEUESIZE) self.DatasetName = dataset_name self.Backend = backend self.Buf = {} self.TotalBytes = 0 self.Shutdown = False self.T = Tracer()
def runWorker(self, params, dxsock, frames, wid): t0 = time.time() self.log("------ runWorker entry for job %s worker %s" % (params.JID, self.ID)) buffer_id = "%s_%s" % (params.JID, self.ID) buffer = SocketWorkerBuffer(buffer_id, dxsock, params.HDescriptors, log=self.log) #worker_module = sandbox_import_module(module_name, ["Worker"]) worker_module = __import__(params.WorkerModuleName, {}, {}, ["Worker"]) T = Tracer() bulk_storage = None if params.BulkDataName: with T["open_bulk_storage"]: bulk_storage = BulkStorage.open(params.BulkDataName) #print "Worker: len(bulk_storage)=%d" % (len(bulk_storage),) self.log("t=%.3f: bulk data received %d bytes, %d keys" % (time.time() - t0, len(bulk_storage), len(bulk_storage.keys()))) worker_class = worker_module.Worker dataset_name = params.DatasetName user_params = params.UserParams use_data_cache = params.UseDataCache jid = params.JID data_mod_client = None if params.DataModURL is not None and params.DataModToken is not None: data_mod_client = StripedClient(params.DataModURL, data_modification_token=params.DataModToken) self.log("t=%.3f: StripedClient initialized" % (time.time() - t0,)) worker = WorkerDriver(jid, wid, self.Client, worker_class, dataset_name, frames, self.NWorkers, buffer, user_params, bulk_storage, use_data_cache, data_mod_client, tracer = T, log = self.log) self.log("t=%.3f: Worker driver created for frames: %s" % (time.time() - t0, frames)) with T["worker.run"]: nevents = worker.run() self.log("t=%.3f: worker.run() ended with nevents=%s" % (time.time() - t0, nevents)) buffer.close(nevents) del sys.modules[params.WorkerModuleName] self.log("------ Worker %s stats: -----\n%s" % (self.ID, T.formatStats())) self.log("t=%.3f: ------ exit from runWorker" % (time.time() - t0,))
def __init__(self, jid, wid, client, worker_class, dataset_name, rgids, nworkers, buffer, user_params, bulk_data, use_data_cache, data_mod_client, tracer = None, log = None): self.JID = jid self.Client = client self.DataModClient = data_mod_client self.WorkerClass = worker_class self.DatasetName = dataset_name self.MyID = wid self.NWorkers = nworkers self.Buffer = buffer self.RGIDs = rgids self.UserParams = user_params self.BulkData = bulk_data self.T = tracer or Tracer() self.Buffer.set_trace(self.T) self.UseDataCache = use_data_cache self.Log = log self.SeenEvents = 0
class AccumulatorDriver(Task): class JobInterface(object): def __init__(self, driver): self.Driver = driver @property def job_id(self): return self.Driver.JID def message(self, text): self.Driver.message(text) class DBInterface(object): # # dummy for now # def __init__(self, driver): self.Driver = driver def __init__(self, dxsock, request, workers, storage, bulk_data_transport, log_file): Task.__init__(self) self.DXSock = dxsock self.Request = request self.JID = request.JID self.Workers = workers self.ModuleStorage = storage self.Accumulator = None self.EventsSeen = 0 self.EventsReported = 0 self.T = Tracer() self.BulkDataTransport = bulk_data_transport self.LogFile = log_file self.HAccumulators = {hid:HAccumulator(desc) for hid, desc in request.HDescriptors.items()} self.HistSentTime = 0.0 self.HistSendInterval = 20.0 def eventsDelta(self, n=0): self.EventsSeen += n delta = self.EventsSeen - self.EventsReported self.EventsReported = self.EventsSeen return delta def log(self, msg): msg = ("AccumulatorDriver(%s): %s" % (self.JID, msg)) print(msg) if self.LogFile is not None: self.LogFile.log(msg) def run(self): try: storage = None bulk_data = None worker_module_name = "m_%s_%s" % (os.getpid(), self.Request.JID) module_file = "%s/%s.py" % (self.ModuleStorage, worker_module_name) open(module_file, "w").write(self.Request.WorkerText) frames = self.Request.RGIDs frames_by_worker = distribute_items(frames, len(self.Workers)) params = WorkerParams.fromRequest(self.Request, worker_module_name) # # Store bulk data in shared memory # if self.Request.BulkDataName: with self.T["wait_for_bulk_data"]: t0 = time.time() bulk_data = self.BulkDataTransport.pop(self.Request.BulkDataName, timeout=30) t1 = time.time() self.log("bulk data %s received, %d bytes encoded, %.2f wait time" % (self.Request.BulkDataName, len(bulk_data), t1-t0)) bulk_data = decodeData(bulk_data) with self.T["store_bulk_data"]: assert isinstance(bulk_data, dict) n = sum([len(v.data)+1000 for v in bulk_data.values()]) n = int(n*1.1)+1000000 # for safety storage = BulkStorage.create(params.BulkDataName, bulk_data) storage.save() self.log("bulk data stored. %f MB allocated" % (float(n)/1024/1024,)) # # Create Accumulator if specified # worker_module = __import__(worker_module_name, {}, {}, ["Accumulator"]) if hasattr(worker_module, "Accumulator"): job_interface = self.JobInterface(self) db_interface = self.DBInterface(self) self.Accumulator = worker_module.Accumulator( params.UserParams, bulk_data, job_interface, db_interface ) worker_interfaces = [] for iw, (w, frames) in enumerate(zip(self.Workers, frames_by_worker)): if frames: wid = "%s/%d" % (self.Request.WID, iw) wi = WorkerInterface(self, w.Address, params, wid, frames) wi.start() worker_interfaces.append(wi) for wi in worker_interfaces: wi.join() self.log("all worker interfaces closed") if self.Accumulator is not None: data = self.Accumulator.values() if data is not None: with self.T["send accumulated data"]: events_delta = self.eventsDelta() self.log("sending accumulated data with events_delta=%d" % (events_delta,)) self.DXSock.send(DXMessage("data", events_delta = events_delta, format="encode")(data=encodeData(data))) self.sendHistograms() #self.DXSock.send(DXMessage("flush", nevents=self.EventsAccumulated)) except: self.DXSock.send(DXMessage("exception").append(info=traceback.format_exc())) finally: self.DXSock.close() self.log("socket closed") if storage: storage.unlink() self.log("bulk storage unlinked") os.unlink(module_file) if module_file.endswith(".py"): try: os.unlink(module_file+"c") except OSError: pass self.log("---- Accumulator stats ----\n" + self.T.formatStats()) @synchronized def message(self, message): self.DXSock.send(DXMessage("message", nevents=0).append(message=message)) @synchronized def messageFromWorker(self, worker_interface, msg): # Can be message, hist, stream, flush, exception if msg.Type == "data": storage = BulkStorage.open(msg["storage"]) #print "Accumulator.messageFromWorker(data): keys:", storage.keys() events_delta = msg["events_delta"] #self.log("data message: events_delta=%s" % (events_delta,)) data = storage.asDict() if self.Accumulator is None: msg = DXMessage("data", events_delta = self.eventsDelta(events_delta), format="encode")(data=encodeData(data)) self.DXSock.send(msg) else: through = None try: with self.T["accumulate"]: through = self.Accumulator.add(data) except: self.DXSock.send(DXMessage("exception").append(info=traceback.format_exc())) if through is not None: with self.T["send through data"]: msg = DXMessage("data", events_delta = self.eventsDelta(events_delta), format="encode")(data=encodeData(through)) self.DXSock.send(msg) else: self.EventsSeen += events_delta storage.unlink() elif msg.Type == "hist": for k, v in msg.items(): if k.startswith("h:"): hid = k[2:] self.HAccumulators[hid].add(v) #print("AccumulatorDriver: h(%s).Counts->%s" % (hid, self.HAccumulators[hid].H.Counts)) now = time.time() if now > self.HistSentTime + self.HistSendInterval: self.sendHistograms() self.HistSentTime = now else: self.DXSock.send(msg) def sendHistograms(self): msg = DXMessage("hist") nhist = 0 for hid, hacc in self.HAccumulators.items(): if hacc.NFills: #print ("sendHistograms: counts=", hacc.H.Counts) msg.append("h:"+hid, hacc.dump()) nhist += 1 if nhist: self.DXSock.send(msg)
from striped.common import Tracer T = Tracer() with T["run"]: with T["imports"]: from striped.job import SinglePointStripedSession as Session import numpy as np from numpy.lib.recfunctions import append_fields import fitsio, healpy as hp import sys, time #job_server_address = ("dbwebdev.fnal.gov", 8765) #development job_server_address = ("ifdb01.fnal.gov", 8765) #production session = Session(job_server_address) input_file = sys.argv[1] input_filename = input_file.rsplit("/",1)[-1].rsplit(".",1)[-1] with T["fits/read"]: input_data = fitsio.read(input_file, ext=2, columns=["ALPHAWIN_J2000","DELTAWIN_J2000"]) with T["hpix"]: hpix = hp.ang2pix(nside=16384,theta=input_data['ALPHAWIN_J2000'],phi=input_data['DELTAWIN_J2000'], lonlat=True, nest=True) hpix = np.asarray(hpix, np.float64) input_data = append_fields(input_data, "HPIX", hpix) np.sort(input_data, order="HPIX") input_data = np.array(zip(input_data['ALPHAWIN_J2000'], input_data['DELTAWIN_J2000'], input_data['HPIX']))
class JobProcess(multiprocessing.Process): def __init__(self, jid, data_server_url, bulk_transport_port, dx, data_client, workers, job_description, log_file_path): multiprocessing.Process.__init__(self) self.daemon = True self.JID = jid self.DataExchange = dx self.Workers = workers # list of WorkerInfo objects self.JobDesc = job_description self.DataClient = data_client self.TotalEvents = 0 self.T = Tracer() self.ContractStartedT = None self.FirstWorkerExitT = self.LastWorkerExitT = None self.DataServerURL = data_server_url self.Contract = None self.LogFile = None self.LogFilePath = log_file_path self.BulkTransportPort = bulk_transport_port def log(self, msg): print(("%s: %s" % (time.ctime(time.time()), msg))) if self.LogFile is not None: self.LogFile.write("%s: %s\n" % (time.ctime(time.time()), msg)) self.LogFile.flush() def run(self): self.log("job process %s started" % (self.JID, )) if self.LogFilePath is not None: self.LogFile = open(self.LogFilePath, "w") try: with self.T["JobProcess/run"]: setproctitle("striped_job %s" % (self.JID, )) self.log("started: dataset: %s, fraction: %s, %d workers" % (self.JobDesc.DatasetName, self.JobDesc.Fraction, len(self.Workers))) callback_delegate = self with self.T["JobProcess/run/create_contract"]: self.Contract = Contract( self.JID, self.DataServerURL, self.BulkTransportPort, self.DataClient.dataset(self.JobDesc.DatasetName), self.JobDesc, self.Workers, callback_delegate, self.log, self.T) self.DataExchange.send( DXMessage("job_started", nworkers=len(self.Workers), jid=self.JID, total_events=self.Contract.TotalEvents, selected_events=self.Contract.SelectedEvents, selected_frames=json.dumps( self.Contract.SelectedFrames))) self.log("job_started sent") with self.T["JobProcess/run/start_contract"]: self.Contract.start() self.ContractStartedT = self.FirstWorkerExitT = self.LastWorkerExitT = time.time( ) self.log("contract started. waiting...") with self.T["JobProcess/run/wait_contract"]: self.Contract.wait() self.DataExchange.send( DXMessage("job_done", total_events=self.TotalEvents)) self.log( "Job finished. Worker exit timestamps: first: %.5f, last:%.5f" % (self.FirstWorkerExitT - self.ContractStartedT, self.LastWorkerExitT - self.ContractStartedT)) self.DataExchange.close() self.log("---- exit ----") except: tb = traceback.format_exc() self.DataExchange.send(DXMessage("job_failed").append(reason=tb)) self.log("Exception: ------------\n%s" % (tb, )) finally: self.log("----- job stats: -----\n" + self.T.formatStats()) if self.LogFile is not None: self.LogFile.close() def updateReceived(self, wid, hists, streams, nevents_delta): self.TotalEvents += nevents_delta client_disconnected = False if hists: msg = DXMessage("histograms", total_events=self.TotalEvents, wid=wid) for k, v in hists.items(): msg[k] = v try: self.DataExchange.send(msg) except: self.log("Error sending message to the client:\n%s" % (traceback.format_exc(), )) client_disconnected = True if streams: for k, data in streams.items(): msg = DXMessage("stream", name=k, format="pickle", total_events=self.TotalEvents, wid=wid) msg.append( data=data ) # this is still pickled data because the WorkerInterface does not unpickle try: self.DataExchange.send(msg) except: self.log("Error sending message to the client:\n%s" % (traceback.format_exc(), )) client_disconnected = True if not streams and not hists: #print "sending empty(%d)" % (self.TotalEvents,) msg = DXMessage("empty", total_events=self.TotalEvents, wid=wid) try: self.DataExchange.send(msg) except: self.log("Error sending message to the client:\n%s" % (traceback.format_exc(), )) client_disconnected = True if client_disconnected: self.log( "Client disconnected (because of the communication error). Aborting" ) self.Contract.abort() def forward(self, msg): with self.T["callback/forward/%s" % (msg.Type, )]: self.DataExchange.send(msg) def eventsDelta(self, wid, events_delta): with self.T["callback/eventsDelta"]: self.DataExchange.send( DXMessage("events", wid=wid, events_delta=events_delta)) def dataReceived(self, wid, events_delta, data): with self.T["callback/data"]: self.DataExchange.send( DXMessage("data", wid=wid, events_delta=events_delta).append(data=data)) def exceptionReceived(self, wid, info): with self.T["callback/exception"]: self.DataExchange.send( DXMessage("exception", wid=wid).append(info=info)) def messageReceived(self, wid, nevents, message): with self.T["callback/message"]: self.DataExchange.send( DXMessage("message", wid=wid, nevents=nevents).append(message=message)) def dataLoadFailureReceived(self, wid, rgid): with self.T["callback/data_load_failure"]: self.DataExchange.send( DXMessage("data_load_failure", wid=wid, rgid=rgid)) def workerExited(self, wid, status, t, nevents, nrunning): if self.FirstWorkerExitT is None: self.FirstWorkerExitT = time.time() self.LastWorkerExitT = time.time() with self.T["callback/worker_exit"]: self.DataExchange.send( DXMessage("worker_exit", nrunning=nrunning, wid=wid, status=status, t=t, nevents=nevents))
def __init__(self, striped_client, data_buffer, dataset_name, columns, schema=None, trace=None): self.T = trace or Tracer() global T T = self.T self.Name = dataset_name self.BranchNames = set() self.AttrNames = set() self.Columns = set(columns) data_columns = set(columns) if not schema: self.ClientDataset = striped_client.dataset(dataset_name, columns) columns_dict = self.ClientDataset.columns( columns, include_size_columns=True) # check if any columns are missing in the dataset missing = [cn for cn in columns if not cn in columns_dict] if len(missing): raise KeyError( "The following columns are not found in the dataset: %s" % (",".join(missing), )) self.ColumnToBranch = { cn: (cc.descriptor.ParentArray, cc.descriptor.SizeColumn) for cn, cc in columns_dict.items() } for cn in columns: bn, sn = self.ColumnToBranch.get(cn) if bn: self.BranchNames.add(bn) data_columns.add(bn + ".@size") else: self.AttrNames.add(cn) if sn: data_columns.add(sn) self.FetchColumns = self.ClientDataset.columnsAndSizes(columns) else: self.ClientDataset = None columns_to_branch = {} fetch_columns = set() missing = [] for cn in columns: if '.' in cn: bn, an = cn.split('.', 1) sn = bn + ".@size" columns_to_branch[cn] = (bn, sn) fetch_columns.add(sn) self.BranchNames.add(bn) else: columns_to_branch[cn] = (None, None) self.AttrNames.add(cn) fetch_columns.add(cn) self.ColumnToBranch = columns_to_branch self.FetchColumns = list(fetch_columns) self.TagConditions = [] self.ProcessedEvents = 0 #print self.EventTemplate.branchTemplate #print "Q Dataset: fetch columns:", self.FetchColumns self.Filter = None self.DataBuffer = data_buffer
from striped.common import Tracer from QArrays2 import QAEventGroup from Vault import Vault import numpy as np, sys, traceback T = Tracer() class Frame: def __init__( self, rginfo, column_to_branch, raw_stroll, tagged_event_ids, ): self.RGInfo = rginfo #self.RGID = rginfo.RGID self.NEvents = rginfo.NEvents #print "Frame: created for rgid=%d, nevents=%d" % (rginfo.RGID, rginfo.NEvents) self.AttrVault = Vault() self.VarAttrVaults = {} self.BranchVaults = {} # name -> branch vault for cn, (bn, sc) in column_to_branch.items(): if bn: prefix = bn + '.' assert cn.startswith(prefix) aname = cn[len(prefix):] if sc != bn + ".@size": raise NotImplementedError(
def __init__(self, file_path, schema): self.Schema = schema self.T = Tracer() self.Config = yaml.load(open(file_path, "r")) self.NEvents = self.Config["NEvents"] self.NBPerEvent = self.Config["NBPerEvent"]