def request_eventranges(job_def):
    global sfm_har_config, sfm_har_config_done
    sfm_har_config_done.wait()

    # retrieve event request file
    eventRequestFile = sfm_har_config['eventRequestFile']
    eventRequestFile_tmp = eventRequestFile + '.tmp'

    # crate event request file
    if not os.path.exists(eventRequestFile):
        # need to output a file containing:
        #   {'nRanges': ???, 'pandaID':???, 'taskID':???, 'jobsetID':???}
        logger.debug(
            'requesting new event ranges by writing to file "%s" with this content: %s',
            eventRequestFile, job_def)

        # get new job definition
        new_job_def = {job_def['pandaID']: job_def}

        f = open(eventRequestFile_tmp, 'w')
        f.write(serializer.serialize(new_job_def))
        f.close()

        # now move tmp filename to real filename
        os.rename(eventRequestFile_tmp, eventRequestFile)

    else:
        logger.debug('request file already exists. Adding requests')

        # move current file to temp
        os.rename(eventRequestFile, eventRequestFile_tmp)

        filedata = open(eventRequestFile_tmp).read()
        requests = serializer.deserialize(filedata)

        pandaID = job_def['pandaID']
        if pandaID in requests:
            logger.debug('adding event range count to existing request')
            thisjob = requests[pandaID]
            if thisjob['jobsetID'] == job_def['jobsetID'] and thisjob[
                    'taskID'] == job_def['taskID']:
                thisjob['nRanges'] += job_def['nRanges']
            else:
                logger.warning(
                    'existing request for PandaID %s does not match new request details %s',
                    thisjob, job_def)
        else:
            logger.debug('adding new job definition to existing request')
            requests[pandaID] = job_def

        # output updated requests to file
        open(eventRequestFile_tmp, 'w').write(serializer.serialize(requests))

        # now move tmp filename to real filename
        os.rename(eventRequestFile_tmp, eventRequestFile)
def request_eventranges(job_def):
   global sfm_har_config,sfm_har_config_done
   sfm_har_config_done.wait()

   # retrieve event request file
   eventRequestFile = sfm_har_config['eventRequestFile']
   eventRequestFile_tmp = eventRequestFile + '.tmp'
   
   # crate event request file
   if not os.path.exists(eventRequestFile):
      # need to output a file containing:
      #   {'nRanges': ???, 'pandaID':???, 'taskID':???, 'jobsetID':???}
      logger.debug('requesting new event ranges by writing to file "%s" with this content: %s',eventRequestFile,job_def)
      
      # get new job definition
      new_job_def = {job_def['pandaID']:job_def}
      
      f = open(eventRequestFile_tmp,'w')
      f.write(serializer.serialize(new_job_def))
      f.close()

      # now move tmp filename to real filename
      os.rename(eventRequestFile_tmp,eventRequestFile)

   else:
      logger.debug('request file already exists. Adding requests')

      # move current file to temp
      os.rename(eventRequestFile,eventRequestFile_tmp)

      filedata = open(eventRequestFile_tmp).read()
      requests = serializer.deserialize(filedata)

      pandaID = job_def['pandaID']
      if pandaID in requests:
         logger.debug('adding event range count to existing request')
         thisjob = requests[pandaID]
         if thisjob['jobsetID'] == job_def['jobsetID'] and thisjob['taskID'] == job_def['taskID']:
            thisjob['nRanges'] += job_def['nRanges']
         else:
            logger.warning('existing request for PandaID %s does not match new request details %s',thisjob,job_def)
      else:
         logger.debug('adding new job definition to existing request')
         requests[pandaID] = job_def

      # output updated requests to file
      open(eventRequestFile_tmp,'w').write(serializer.serialize(requests))

      # now move tmp filename to real filename
      os.rename(eventRequestFile_tmp,eventRequestFile)
def athenamp_worker():

    logger.info('start athenamp_worker')

    comm = AthenaCommunicator()

    while True:
        logger.info('start loop, athenamp worker')

        logger.info('sending ready for events')
        comm.send(ATHENA_READY_FOR_EVENTS)

        logger.info('waiting for response')
        msg = comm.recv_block()

        logger.info('received: %s', msg)

        if msg.startswith('['):
            try:
                _l = serializer.deserialize(msg)
            except Exception:
                logger.error('failed to deserialize msg')
                continue

            # received event ranges, sleep for a bit and return the file
            time.sleep(5)

            # return file info
            # "/build1/tsulaia/20.3.7.5/run-es/athenaMP-workers-AtlasG4Tf-sim/worker_1/myHITS.pool.root_000.Range-6,ID:Range-6,CPU:1,WALL:1"

            outputfilename = os.path.join(
                os.getcwd(), 'TEST' + _l['eventRangeID'] + '.ROOT')
            msg = outputfilename + ',ID:' + _l['eventRangeID'] + ',CPU:1,WALL:1'
            logger.info('sending output file: %s', msg)
            comm.send(msg)

        elif NO_MORE_EVENTS in msg:
            break

    logger.info('worker exiting')
Exemple #4
0
def athenamp_worker():

   logger.info('start athenamp_worker')
   
   comm = athena_communicator()

   while True:
      logger.info('start loop, athenamp worker')

      logger.info('sending ready for events')
      comm.send(ATHENA_READY_FOR_EVENTS)

      logger.info('waiting for response')
      msg = comm.recv_block()

      logger.info('received: %s',msg)

      if msg.startswith('['):
         try:
            l = serializer.deserialize(msg)
         except:
            logger.error('failed to deserialize msg')
            continue

         # received event ranges, sleep for a bit and return the file 
         time.sleep(5)

         # return file info
         # "/build1/tsulaia/20.3.7.5/run-es/athenaMP-workers-AtlasG4Tf-sim/worker_1/myHITS.pool.root_000.Range-6,ID:Range-6,CPU:1,WALL:1"

         outputfilename = os.path.join(os.getcwd(),'TEST'+l['eventRangeID']+'.ROOT')
         msg = outputfilename +',ID:'+l['eventRangeID']+',CPU:1,WALL:1'
         logger.info('sending output file: %s',msg)
         com.send(msg)

      elif NO_MORE_EVENTS in msg:
         break

   logger.info('worker exiting')
def stage_out_files(file_list,output_type):
   global sfm_har_config,sfm_har_config_done
   sfm_har_config_done.wait()

   if output_type not in ['output','es_output','log']:
      raise Exception('incorrect type provided: %s' % (output_type))


   # load name of eventStatusDumpJsonFile file
   eventStatusDumpJsonFile = sfm_har_config['eventStatusDumpJsonFile']
   
   eventStatusDumpData = {}
   # loop over filelist
   for filedata in file_list:

      # make sure pandaID is a string
      pandaID = str(filedata['pandaid'])

      chksum = None
      if 'chksum' in filedata:
         chksum = filedata['chksum']

      # filename = os.path.join(output_path,os.path.basename(filedata['filename']))
      
      # format data for file:
      file_descriptor = {'eventRangeID':filedata['eventrangeid'],
                         'eventStatus':filedata['eventstatus'],
                         'path':filedata['filename'],
                         'type':output_type,
                         'chksum': chksum,
                         'guid': None,
                        }
      try:
         eventStatusDumpData[pandaID].append(file_descriptor)
      except KeyError:
         eventStatusDumpData[pandaID] = [file_descriptor]

   # create a temp file to place contents
   # this avoids Harvester trying to read the file while it is being written
   eventStatusDumpJsonFile_tmp = eventStatusDumpJsonFile + '.tmp'

   # if file does not already exists, new data is just what we have
   if not os.path.exists(eventStatusDumpJsonFile):
      data = eventStatusDumpData
   
   # if the file exists, move it to a tmp filename, update its contents and then recreate it.
   else:

      # first move existing file to tmp so Harvester does not read it while we edit
      try:
         os.rename(eventStatusDumpJsonFile,eventStatusDumpJsonFile_tmp)
      except Exception:
         logger.warning('tried moving %s to a tmp filename to add more output files for Harvester.',eventStatusDumpJsonFile)
         if not os.path.exists(eventStatusDumpJsonFile):
            logger.warning('%s file no longer exists so Harvester must have grabbed it. Need to create a new file',eventStatusDumpJsonFile)
            data = eventStatusDumpData
      else:

         # now open and read in the data
         with open(eventStatusDumpJsonFile_tmp,'r') as f:
            data = serializer.deserialize(f.read())
         logger.debug('found existing data for pandaIDs: %s',data.keys())

         for pandaID in eventStatusDumpData:

            # if the pandaID already exists, just append the new file to that list
            try:
               logger.debug('addding data to existing panda list')
               data[pandaID] += eventStatusDumpData[pandaID]
            # if the pandaID does not exist, add a new list
            except KeyError:
               logger.debug('addding new panda id list')
               data[pandaID] = eventStatusDumpData[pandaID]

   if logger.getEffectiveLevel() == logging.DEBUG:
      tmpstr = ' '.join('%s:%s' % (x,len(data[x])) for x in data)
      logger.debug('writing output to file %s with keys: %s', eventStatusDumpJsonFile,tmpstr)
   
   # overwrite the temp file with the updated data
   with open(eventStatusDumpJsonFile_tmp,'w') as f:
      f.write(serializer.serialize(data,pretty_print=True))

   # move tmp file into place
   os.rename(eventStatusDumpJsonFile_tmp,eventStatusDumpJsonFile)

   logger.debug('done')
def stage_out_file(output_type,output_path,eventRangeID,eventStatus,pandaID,chksum=None,):
   global sfm_har_config,sfm_har_config_done
   sfm_har_config_done.wait()

   if output_type not in ['output','es_output','log']:
      raise Exception('incorrect type provided: %s' % (output_type))

   if not os.path.exists(output_path):
      raise Exception('output file not found: %s' % (output_path))

   # make sure pandaID is a string
   pandaID = str(pandaID)
      

   # load name of eventStatusDumpJsonFile file
   eventStatusDumpJsonFile = sfm_har_config['eventStatusDumpJsonFile']
   
   # first create a temp file to place contents
   # this avoids Harvester trying to read the file while it is being written
   eventStatusDumpJsonFile_tmp = eventStatusDumpJsonFile + '.tmp'
   

   # format data for file:
   file_descriptor = {'eventRangeID':eventRangeID,
                      'eventStatus':eventStatus,
                      'path':output_path,
                      'type':output_type,
                      'chksum': chksum,
                      'guid': None,
                     }

   # if file does not already exists, new data is just what we have
   if not os.path.exists(eventStatusDumpJsonFile):
      data = {pandaID: [file_descriptor]}
   
   # if the file exists, move it to a tmp filename, update its contents and then recreate it.
   else:

      # first move existing file to tmp so Harvester does not read it while we edit
      try:
         os.rename(eventStatusDumpJsonFile,eventStatusDumpJsonFile_tmp)
      except Exception:
         logger.warning('tried moving %s to a tmp filename to add more output files for Harvester.',eventStatusDumpJsonFile)
         if not os.path.exists(eventStatusDumpJsonFile):
            logger.warning('%s file no longer exists so Harvester must have grabbed it. Need to create a new file',eventStatusDumpJsonFile)
            data = {pandaID: [file_descriptor]}
      else:

         # now open and read in the data
         with open(eventStatusDumpJsonFile_tmp,'r') as f:
            data = serializer.deserialize(f.read())
         logger.debug('existing data contains %s',data)
         # if the pandaID already exists, just append the new file to that list
         if pandaID in data:
            logger.debug('addding data to existing panda list')
            data[pandaID].append(file_descriptor)
         # if the pandaID does not exist, add a new list
         else:
            logger.debug('addding new panda id list')
            data[pandaID] = [file_descriptor]

   logger.debug('output to file %s: %s',eventStatusDumpJsonFile,data)
   
   # overwrite the temp file with the updated data
   with open(eventStatusDumpJsonFile_tmp,'w') as f:
      f.write(serializer.serialize(data))

   # move tmp file into place
   os.rename(eventStatusDumpJsonFile_tmp,eventStatusDumpJsonFile)
def stage_out_files(file_list, output_type):
    global sfm_har_config, sfm_har_config_done
    sfm_har_config_done.wait()

    if output_type not in ['output', 'es_output', 'log']:
        raise Exception('incorrect type provided: %s' % (output_type))

    # load name of eventStatusDumpJsonFile file
    eventStatusDumpJsonFile = sfm_har_config['eventStatusDumpJsonFile']

    eventStatusDumpData = {}
    # loop over filelist
    for filedata in file_list:

        # make sure pandaID is a string
        pandaID = str(filedata['pandaid'])

        chksum = None
        if 'chksum' in filedata:
            chksum = filedata['chksum']

        # filename = os.path.join(output_path,os.path.basename(filedata['filename']))

        # format data for file:
        file_descriptor = {
            'eventRangeID': filedata['eventrangeid'],
            'eventStatus': filedata['eventstatus'],
            'path': filedata['filename'],
            'type': output_type,
            'chksum': chksum,
            'guid': None,
        }
        try:
            eventStatusDumpData[pandaID].append(file_descriptor)
        except KeyError:
            eventStatusDumpData[pandaID] = [file_descriptor]

    # create a temp file to place contents
    # this avoids Harvester trying to read the file while it is being written
    eventStatusDumpJsonFile_tmp = eventStatusDumpJsonFile + '.tmp'

    # if file does not already exists, new data is just what we have
    if not os.path.exists(eventStatusDumpJsonFile):
        data = eventStatusDumpData

    # if the file exists, move it to a tmp filename, update its contents and then recreate it.
    else:

        # first move existing file to tmp so Harvester does not read it while we edit
        try:
            os.rename(eventStatusDumpJsonFile, eventStatusDumpJsonFile_tmp)
        except Exception:
            logger.warning(
                'tried moving %s to a tmp filename to add more output files for Harvester.',
                eventStatusDumpJsonFile)
            if not os.path.exists(eventStatusDumpJsonFile):
                logger.warning(
                    '%s file no longer exists so Harvester must have grabbed it. Need to create a new file',
                    eventStatusDumpJsonFile)
                data = eventStatusDumpData
        else:

            # now open and read in the data
            with open(eventStatusDumpJsonFile_tmp, 'r') as f:
                data = serializer.deserialize(f.read())
            logger.debug('found existing data for pandaIDs: %s', data.keys())

            for pandaID in eventStatusDumpData:

                # if the pandaID already exists, just append the new file to that list
                try:
                    logger.debug('addding data to existing panda list')
                    data[pandaID] += eventStatusDumpData[pandaID]
                # if the pandaID does not exist, add a new list
                except KeyError:
                    logger.debug('addding new panda id list')
                    data[pandaID] = eventStatusDumpData[pandaID]

    if logger.getEffectiveLevel() == logging.DEBUG:
        tmpstr = ' '.join('%s:%s' % (x, len(data[x])) for x in data)
        logger.debug('writing output to file %s with keys: %s',
                     eventStatusDumpJsonFile, tmpstr)

    # overwrite the temp file with the updated data
    with open(eventStatusDumpJsonFile_tmp, 'w') as f:
        f.write(serializer.serialize(data, pretty_print=True))

    # move tmp file into place
    os.rename(eventStatusDumpJsonFile_tmp, eventStatusDumpJsonFile)

    logger.debug('done')
def stage_out_file(
    output_type,
    output_path,
    eventRangeID,
    eventStatus,
    pandaID,
    chksum=None,
):
    global sfm_har_config, sfm_har_config_done
    sfm_har_config_done.wait()

    if output_type not in ['output', 'es_output', 'log']:
        raise Exception('incorrect type provided: %s' % (output_type))

    if not os.path.exists(output_path):
        raise Exception('output file not found: %s' % (output_path))

    # make sure pandaID is a string
    pandaID = str(pandaID)

    # load name of eventStatusDumpJsonFile file
    eventStatusDumpJsonFile = sfm_har_config['eventStatusDumpJsonFile']

    # first create a temp file to place contents
    # this avoids Harvester trying to read the file while it is being written
    eventStatusDumpJsonFile_tmp = eventStatusDumpJsonFile + '.tmp'

    # format data for file:
    file_descriptor = {
        'eventRangeID': eventRangeID,
        'eventStatus': eventStatus,
        'path': output_path,
        'type': output_type,
        'chksum': chksum,
        'guid': None,
    }

    # if file does not already exists, new data is just what we have
    if not os.path.exists(eventStatusDumpJsonFile):
        data = {pandaID: [file_descriptor]}

    # if the file exists, move it to a tmp filename, update its contents and then recreate it.
    else:

        # first move existing file to tmp so Harvester does not read it while we edit
        try:
            os.rename(eventStatusDumpJsonFile, eventStatusDumpJsonFile_tmp)
        except Exception:
            logger.warning(
                'tried moving %s to a tmp filename to add more output files for Harvester.',
                eventStatusDumpJsonFile)
            if not os.path.exists(eventStatusDumpJsonFile):
                logger.warning(
                    '%s file no longer exists so Harvester must have grabbed it. Need to create a new file',
                    eventStatusDumpJsonFile)
                data = {pandaID: [file_descriptor]}
        else:

            # now open and read in the data
            with open(eventStatusDumpJsonFile_tmp, 'r') as f:
                data = serializer.deserialize(f.read())
            logger.debug('existing data contains %s', data)
            # if the pandaID already exists, just append the new file to that list
            if pandaID in data:
                logger.debug('addding data to existing panda list')
                data[pandaID].append(file_descriptor)
            # if the pandaID does not exist, add a new list
            else:
                logger.debug('addding new panda id list')
                data[pandaID] = [file_descriptor]

    logger.debug('output to file %s: %s', eventStatusDumpJsonFile, data)

    # overwrite the temp file with the updated data
    with open(eventStatusDumpJsonFile_tmp, 'w') as f:
        f.write(serializer.serialize(data))

    # move tmp file into place
    os.rename(eventStatusDumpJsonFile_tmp, eventStatusDumpJsonFile)