def request_eventranges(job_def): global sfm_har_config, sfm_har_config_done sfm_har_config_done.wait() # retrieve event request file eventRequestFile = sfm_har_config['eventRequestFile'] eventRequestFile_tmp = eventRequestFile + '.tmp' # crate event request file if not os.path.exists(eventRequestFile): # need to output a file containing: # {'nRanges': ???, 'pandaID':???, 'taskID':???, 'jobsetID':???} logger.debug( 'requesting new event ranges by writing to file "%s" with this content: %s', eventRequestFile, job_def) # get new job definition new_job_def = {job_def['pandaID']: job_def} f = open(eventRequestFile_tmp, 'w') f.write(serializer.serialize(new_job_def)) f.close() # now move tmp filename to real filename os.rename(eventRequestFile_tmp, eventRequestFile) else: logger.debug('request file already exists. Adding requests') # move current file to temp os.rename(eventRequestFile, eventRequestFile_tmp) filedata = open(eventRequestFile_tmp).read() requests = serializer.deserialize(filedata) pandaID = job_def['pandaID'] if pandaID in requests: logger.debug('adding event range count to existing request') thisjob = requests[pandaID] if thisjob['jobsetID'] == job_def['jobsetID'] and thisjob[ 'taskID'] == job_def['taskID']: thisjob['nRanges'] += job_def['nRanges'] else: logger.warning( 'existing request for PandaID %s does not match new request details %s', thisjob, job_def) else: logger.debug('adding new job definition to existing request') requests[pandaID] = job_def # output updated requests to file open(eventRequestFile_tmp, 'w').write(serializer.serialize(requests)) # now move tmp filename to real filename os.rename(eventRequestFile_tmp, eventRequestFile)
def request_eventranges(job_def): global sfm_har_config,sfm_har_config_done sfm_har_config_done.wait() # retrieve event request file eventRequestFile = sfm_har_config['eventRequestFile'] eventRequestFile_tmp = eventRequestFile + '.tmp' # crate event request file if not os.path.exists(eventRequestFile): # need to output a file containing: # {'nRanges': ???, 'pandaID':???, 'taskID':???, 'jobsetID':???} logger.debug('requesting new event ranges by writing to file "%s" with this content: %s',eventRequestFile,job_def) # get new job definition new_job_def = {job_def['pandaID']:job_def} f = open(eventRequestFile_tmp,'w') f.write(serializer.serialize(new_job_def)) f.close() # now move tmp filename to real filename os.rename(eventRequestFile_tmp,eventRequestFile) else: logger.debug('request file already exists. Adding requests') # move current file to temp os.rename(eventRequestFile,eventRequestFile_tmp) filedata = open(eventRequestFile_tmp).read() requests = serializer.deserialize(filedata) pandaID = job_def['pandaID'] if pandaID in requests: logger.debug('adding event range count to existing request') thisjob = requests[pandaID] if thisjob['jobsetID'] == job_def['jobsetID'] and thisjob['taskID'] == job_def['taskID']: thisjob['nRanges'] += job_def['nRanges'] else: logger.warning('existing request for PandaID %s does not match new request details %s',thisjob,job_def) else: logger.debug('adding new job definition to existing request') requests[pandaID] = job_def # output updated requests to file open(eventRequestFile_tmp,'w').write(serializer.serialize(requests)) # now move tmp filename to real filename os.rename(eventRequestFile_tmp,eventRequestFile)
def athenamp_worker(): logger.info('start athenamp_worker') comm = AthenaCommunicator() while True: logger.info('start loop, athenamp worker') logger.info('sending ready for events') comm.send(ATHENA_READY_FOR_EVENTS) logger.info('waiting for response') msg = comm.recv_block() logger.info('received: %s', msg) if msg.startswith('['): try: _l = serializer.deserialize(msg) except Exception: logger.error('failed to deserialize msg') continue # received event ranges, sleep for a bit and return the file time.sleep(5) # return file info # "/build1/tsulaia/20.3.7.5/run-es/athenaMP-workers-AtlasG4Tf-sim/worker_1/myHITS.pool.root_000.Range-6,ID:Range-6,CPU:1,WALL:1" outputfilename = os.path.join( os.getcwd(), 'TEST' + _l['eventRangeID'] + '.ROOT') msg = outputfilename + ',ID:' + _l['eventRangeID'] + ',CPU:1,WALL:1' logger.info('sending output file: %s', msg) comm.send(msg) elif NO_MORE_EVENTS in msg: break logger.info('worker exiting')
def athenamp_worker(): logger.info('start athenamp_worker') comm = athena_communicator() while True: logger.info('start loop, athenamp worker') logger.info('sending ready for events') comm.send(ATHENA_READY_FOR_EVENTS) logger.info('waiting for response') msg = comm.recv_block() logger.info('received: %s',msg) if msg.startswith('['): try: l = serializer.deserialize(msg) except: logger.error('failed to deserialize msg') continue # received event ranges, sleep for a bit and return the file time.sleep(5) # return file info # "/build1/tsulaia/20.3.7.5/run-es/athenaMP-workers-AtlasG4Tf-sim/worker_1/myHITS.pool.root_000.Range-6,ID:Range-6,CPU:1,WALL:1" outputfilename = os.path.join(os.getcwd(),'TEST'+l['eventRangeID']+'.ROOT') msg = outputfilename +',ID:'+l['eventRangeID']+',CPU:1,WALL:1' logger.info('sending output file: %s',msg) com.send(msg) elif NO_MORE_EVENTS in msg: break logger.info('worker exiting')
def stage_out_files(file_list,output_type): global sfm_har_config,sfm_har_config_done sfm_har_config_done.wait() if output_type not in ['output','es_output','log']: raise Exception('incorrect type provided: %s' % (output_type)) # load name of eventStatusDumpJsonFile file eventStatusDumpJsonFile = sfm_har_config['eventStatusDumpJsonFile'] eventStatusDumpData = {} # loop over filelist for filedata in file_list: # make sure pandaID is a string pandaID = str(filedata['pandaid']) chksum = None if 'chksum' in filedata: chksum = filedata['chksum'] # filename = os.path.join(output_path,os.path.basename(filedata['filename'])) # format data for file: file_descriptor = {'eventRangeID':filedata['eventrangeid'], 'eventStatus':filedata['eventstatus'], 'path':filedata['filename'], 'type':output_type, 'chksum': chksum, 'guid': None, } try: eventStatusDumpData[pandaID].append(file_descriptor) except KeyError: eventStatusDumpData[pandaID] = [file_descriptor] # create a temp file to place contents # this avoids Harvester trying to read the file while it is being written eventStatusDumpJsonFile_tmp = eventStatusDumpJsonFile + '.tmp' # if file does not already exists, new data is just what we have if not os.path.exists(eventStatusDumpJsonFile): data = eventStatusDumpData # if the file exists, move it to a tmp filename, update its contents and then recreate it. else: # first move existing file to tmp so Harvester does not read it while we edit try: os.rename(eventStatusDumpJsonFile,eventStatusDumpJsonFile_tmp) except Exception: logger.warning('tried moving %s to a tmp filename to add more output files for Harvester.',eventStatusDumpJsonFile) if not os.path.exists(eventStatusDumpJsonFile): logger.warning('%s file no longer exists so Harvester must have grabbed it. Need to create a new file',eventStatusDumpJsonFile) data = eventStatusDumpData else: # now open and read in the data with open(eventStatusDumpJsonFile_tmp,'r') as f: data = serializer.deserialize(f.read()) logger.debug('found existing data for pandaIDs: %s',data.keys()) for pandaID in eventStatusDumpData: # if the pandaID already exists, just append the new file to that list try: logger.debug('addding data to existing panda list') data[pandaID] += eventStatusDumpData[pandaID] # if the pandaID does not exist, add a new list except KeyError: logger.debug('addding new panda id list') data[pandaID] = eventStatusDumpData[pandaID] if logger.getEffectiveLevel() == logging.DEBUG: tmpstr = ' '.join('%s:%s' % (x,len(data[x])) for x in data) logger.debug('writing output to file %s with keys: %s', eventStatusDumpJsonFile,tmpstr) # overwrite the temp file with the updated data with open(eventStatusDumpJsonFile_tmp,'w') as f: f.write(serializer.serialize(data,pretty_print=True)) # move tmp file into place os.rename(eventStatusDumpJsonFile_tmp,eventStatusDumpJsonFile) logger.debug('done')
def stage_out_file(output_type,output_path,eventRangeID,eventStatus,pandaID,chksum=None,): global sfm_har_config,sfm_har_config_done sfm_har_config_done.wait() if output_type not in ['output','es_output','log']: raise Exception('incorrect type provided: %s' % (output_type)) if not os.path.exists(output_path): raise Exception('output file not found: %s' % (output_path)) # make sure pandaID is a string pandaID = str(pandaID) # load name of eventStatusDumpJsonFile file eventStatusDumpJsonFile = sfm_har_config['eventStatusDumpJsonFile'] # first create a temp file to place contents # this avoids Harvester trying to read the file while it is being written eventStatusDumpJsonFile_tmp = eventStatusDumpJsonFile + '.tmp' # format data for file: file_descriptor = {'eventRangeID':eventRangeID, 'eventStatus':eventStatus, 'path':output_path, 'type':output_type, 'chksum': chksum, 'guid': None, } # if file does not already exists, new data is just what we have if not os.path.exists(eventStatusDumpJsonFile): data = {pandaID: [file_descriptor]} # if the file exists, move it to a tmp filename, update its contents and then recreate it. else: # first move existing file to tmp so Harvester does not read it while we edit try: os.rename(eventStatusDumpJsonFile,eventStatusDumpJsonFile_tmp) except Exception: logger.warning('tried moving %s to a tmp filename to add more output files for Harvester.',eventStatusDumpJsonFile) if not os.path.exists(eventStatusDumpJsonFile): logger.warning('%s file no longer exists so Harvester must have grabbed it. Need to create a new file',eventStatusDumpJsonFile) data = {pandaID: [file_descriptor]} else: # now open and read in the data with open(eventStatusDumpJsonFile_tmp,'r') as f: data = serializer.deserialize(f.read()) logger.debug('existing data contains %s',data) # if the pandaID already exists, just append the new file to that list if pandaID in data: logger.debug('addding data to existing panda list') data[pandaID].append(file_descriptor) # if the pandaID does not exist, add a new list else: logger.debug('addding new panda id list') data[pandaID] = [file_descriptor] logger.debug('output to file %s: %s',eventStatusDumpJsonFile,data) # overwrite the temp file with the updated data with open(eventStatusDumpJsonFile_tmp,'w') as f: f.write(serializer.serialize(data)) # move tmp file into place os.rename(eventStatusDumpJsonFile_tmp,eventStatusDumpJsonFile)
def stage_out_files(file_list, output_type): global sfm_har_config, sfm_har_config_done sfm_har_config_done.wait() if output_type not in ['output', 'es_output', 'log']: raise Exception('incorrect type provided: %s' % (output_type)) # load name of eventStatusDumpJsonFile file eventStatusDumpJsonFile = sfm_har_config['eventStatusDumpJsonFile'] eventStatusDumpData = {} # loop over filelist for filedata in file_list: # make sure pandaID is a string pandaID = str(filedata['pandaid']) chksum = None if 'chksum' in filedata: chksum = filedata['chksum'] # filename = os.path.join(output_path,os.path.basename(filedata['filename'])) # format data for file: file_descriptor = { 'eventRangeID': filedata['eventrangeid'], 'eventStatus': filedata['eventstatus'], 'path': filedata['filename'], 'type': output_type, 'chksum': chksum, 'guid': None, } try: eventStatusDumpData[pandaID].append(file_descriptor) except KeyError: eventStatusDumpData[pandaID] = [file_descriptor] # create a temp file to place contents # this avoids Harvester trying to read the file while it is being written eventStatusDumpJsonFile_tmp = eventStatusDumpJsonFile + '.tmp' # if file does not already exists, new data is just what we have if not os.path.exists(eventStatusDumpJsonFile): data = eventStatusDumpData # if the file exists, move it to a tmp filename, update its contents and then recreate it. else: # first move existing file to tmp so Harvester does not read it while we edit try: os.rename(eventStatusDumpJsonFile, eventStatusDumpJsonFile_tmp) except Exception: logger.warning( 'tried moving %s to a tmp filename to add more output files for Harvester.', eventStatusDumpJsonFile) if not os.path.exists(eventStatusDumpJsonFile): logger.warning( '%s file no longer exists so Harvester must have grabbed it. Need to create a new file', eventStatusDumpJsonFile) data = eventStatusDumpData else: # now open and read in the data with open(eventStatusDumpJsonFile_tmp, 'r') as f: data = serializer.deserialize(f.read()) logger.debug('found existing data for pandaIDs: %s', data.keys()) for pandaID in eventStatusDumpData: # if the pandaID already exists, just append the new file to that list try: logger.debug('addding data to existing panda list') data[pandaID] += eventStatusDumpData[pandaID] # if the pandaID does not exist, add a new list except KeyError: logger.debug('addding new panda id list') data[pandaID] = eventStatusDumpData[pandaID] if logger.getEffectiveLevel() == logging.DEBUG: tmpstr = ' '.join('%s:%s' % (x, len(data[x])) for x in data) logger.debug('writing output to file %s with keys: %s', eventStatusDumpJsonFile, tmpstr) # overwrite the temp file with the updated data with open(eventStatusDumpJsonFile_tmp, 'w') as f: f.write(serializer.serialize(data, pretty_print=True)) # move tmp file into place os.rename(eventStatusDumpJsonFile_tmp, eventStatusDumpJsonFile) logger.debug('done')
def stage_out_file( output_type, output_path, eventRangeID, eventStatus, pandaID, chksum=None, ): global sfm_har_config, sfm_har_config_done sfm_har_config_done.wait() if output_type not in ['output', 'es_output', 'log']: raise Exception('incorrect type provided: %s' % (output_type)) if not os.path.exists(output_path): raise Exception('output file not found: %s' % (output_path)) # make sure pandaID is a string pandaID = str(pandaID) # load name of eventStatusDumpJsonFile file eventStatusDumpJsonFile = sfm_har_config['eventStatusDumpJsonFile'] # first create a temp file to place contents # this avoids Harvester trying to read the file while it is being written eventStatusDumpJsonFile_tmp = eventStatusDumpJsonFile + '.tmp' # format data for file: file_descriptor = { 'eventRangeID': eventRangeID, 'eventStatus': eventStatus, 'path': output_path, 'type': output_type, 'chksum': chksum, 'guid': None, } # if file does not already exists, new data is just what we have if not os.path.exists(eventStatusDumpJsonFile): data = {pandaID: [file_descriptor]} # if the file exists, move it to a tmp filename, update its contents and then recreate it. else: # first move existing file to tmp so Harvester does not read it while we edit try: os.rename(eventStatusDumpJsonFile, eventStatusDumpJsonFile_tmp) except Exception: logger.warning( 'tried moving %s to a tmp filename to add more output files for Harvester.', eventStatusDumpJsonFile) if not os.path.exists(eventStatusDumpJsonFile): logger.warning( '%s file no longer exists so Harvester must have grabbed it. Need to create a new file', eventStatusDumpJsonFile) data = {pandaID: [file_descriptor]} else: # now open and read in the data with open(eventStatusDumpJsonFile_tmp, 'r') as f: data = serializer.deserialize(f.read()) logger.debug('existing data contains %s', data) # if the pandaID already exists, just append the new file to that list if pandaID in data: logger.debug('addding data to existing panda list') data[pandaID].append(file_descriptor) # if the pandaID does not exist, add a new list else: logger.debug('addding new panda id list') data[pandaID] = [file_descriptor] logger.debug('output to file %s: %s', eventStatusDumpJsonFile, data) # overwrite the temp file with the updated data with open(eventStatusDumpJsonFile_tmp, 'w') as f: f.write(serializer.serialize(data)) # move tmp file into place os.rename(eventStatusDumpJsonFile_tmp, eventStatusDumpJsonFile)