Esempio n. 1
0
    def __update_statistics_from_job(
        self,
        job_id,
        job_vgrid_name,
        buildcache_dict,
        job_dict,
        ):
        """The dirty details of what jobinfo is used in the
        statistics and buildcache"""

        # Fix legacy VGRIDs

        job_dict['VGRID'] = validated_vgrid_list(self.__configuration,
                                                    job_dict)

        # If the mRSL file was modified and this is the first time
        # we have seen it, add the request info to the statistics.

        if not buildcache_dict.has_key(job_id):
            self.__add(self.VGRID, job_vgrid_name, 'NODECOUNT_REQ',
                       int(job_dict['NODECOUNT']))
            self.__add(self.VGRID, job_vgrid_name, 'CPUTIME_REQ',
                       int(job_dict['CPUTIME']))
            self.__add(self.VGRID, job_vgrid_name, 'CPUCOUNT_REQ',
                       int(job_dict['CPUCOUNT']))
            self.__add(self.VGRID, job_vgrid_name, 'DISK_REQ',
                       int(job_dict['DISK']))
            self.__add(self.VGRID, job_vgrid_name, 'MEMORY_REQ',
                       int(job_dict['MEMORY']))
            self.__add(self.VGRID, job_vgrid_name, 'RUNTIMEENVIRONMENT_REQ',
                       len(job_dict['RUNTIMEENVIRONMENT']))

        unique_resource_name = None
        resource_id = None
        if job_dict.has_key('RESOURCE_CONFIG'):
            if job_dict.has_key('UNIQUE_RESOURCE_NAME'):
                unique_resource_name = job_dict['UNIQUE_RESOURCE_NAME'
                        ].upper()

            if job_dict['RESOURCE_CONFIG'].has_key('RESOURCE_ID'):
                resource_id = job_dict['RESOURCE_CONFIG']['RESOURCE_ID'
                        ].upper()

        if job_dict['STATUS'] == 'PARSE':
            self.__add(self.VGRID, job_vgrid_name, 'PARSE', 1)
        elif job_dict['STATUS'] == 'QUEUED':
            self.__add(self.VGRID, job_vgrid_name, 'QUEUED', 1)
        elif job_dict['STATUS'] == 'EXECUTING':
            self.__add(self.VGRID, job_vgrid_name, 'EXECUTING', 1)
        elif job_dict['STATUS'] == 'FAILED':
            self.__add(self.VGRID, job_vgrid_name, 'FAILED', 1)
            self.__add_resource(unique_resource_name, resource_id,
                                'FAILED', 1)
        elif job_dict['STATUS'] == 'RETRY':
            self.__add(self.VGRID, job_vgrid_name, 'RETRY', 1)
            self.__add_resource(unique_resource_name, resource_id,
                                'RETRY', 1)
        elif job_dict['STATUS'] == 'EXPIRED':
            self.__add(self.VGRID, job_vgrid_name, 'EXPIRED', 1)
        elif job_dict['STATUS'] == 'FROZEN':
            self.__add(self.VGRID, job_vgrid_name, 'FROZEN', 1)
        elif job_dict['STATUS'] == 'CANCELED':
            self.__add(self.VGRID, job_vgrid_name, 'CANCELED', 1)
        elif job_dict['STATUS'] == 'FINISHED':

            # Recent jobs have the scheduled resource vgrid available in
            # the RESOURCE_VGRID field. However, that vgrid may be a parent of
            # the requested job vgrid due to inheritance.
            # We repeat the vgrid match up to find the actual job vgrid here.
            # Fall back to saved resource prioritized vgrid list for old jobs.
            
            active_res_vgrid = job_dict.get('RESOURCE_VGRID', None)
            if active_res_vgrid:
                search_vgrids = [active_res_vgrid]
            else:
                print "WARNING: no RESOURCE_VGRID for job %(JOB_ID)s" % \
                      job_dict
                resource_config = job_dict['RESOURCE_CONFIG']

                # Fix legacy VGRIDs

                resource_config['VGRID'] = validated_vgrid_list(
                    self.__configuration, resource_config)
                search_vgrids = resource_config['VGRID']
            (match, active_job_vgrid, _) = job_fits_res_vgrid(
                job_dict['VGRID'], search_vgrids)

            if not match:

                # This should not happen - scheduled job to wrong vgrid!
            
                print "ERROR: %s no match for vgrids: %s vs %s" % \
                      (job_dict['JOB_ID'], job_dict['VGRID'], search_vgrids)
                active_job_vgrid = '__NO_SUCH_JOB_VGRID__'
                
            active_vgrid = active_job_vgrid.upper()
            if active_vgrid == job_vgrid_name:

                # Compute used wall time

                finished_timestamp = job_dict['FINISHED_TIMESTAMP']
                finished_datetime = datetime.datetime(
                    finished_timestamp.tm_year,
                    finished_timestamp.tm_mon,
                    finished_timestamp.tm_mday,
                    finished_timestamp.tm_hour,
                    finished_timestamp.tm_min,
                    finished_timestamp.tm_sec,
                    )

                starting_timestamp = job_dict['EXECUTING_TIMESTAMP']
                starting_datetime = datetime.datetime(
                    starting_timestamp.tm_year,
                    starting_timestamp.tm_mon,
                    starting_timestamp.tm_mday,
                    starting_timestamp.tm_hour,
                    starting_timestamp.tm_min,
                    starting_timestamp.tm_sec,
                    )

                used_walltime = finished_datetime - starting_datetime

                # VGrid stats
                
                self.__add(self.VGRID, job_vgrid_name, 'FINISHED', 1)
                self.__add(self.VGRID, job_vgrid_name, 'NODECOUNT_DONE',
                           int(job_dict['NODECOUNT']))
                self.__add(self.VGRID, job_vgrid_name, 'CPUTIME_DONE',
                           int(job_dict['CPUTIME']))
                
                self.__add(self.VGRID, job_vgrid_name, 'USED_WALLTIME',
                           used_walltime)
                
                self.__add(self.VGRID, job_vgrid_name, 'CPUCOUNT_DONE',
                           int(job_dict['CPUCOUNT']))
                self.__add(self.VGRID, job_vgrid_name, 'DISK_DONE',
                           int(job_dict['DISK']))
                self.__add(self.VGRID, job_vgrid_name, 'MEMORY_DONE',
                           int(job_dict['MEMORY']))
                self.__add(self.VGRID, job_vgrid_name, 'RUNTIMEENVIRONMENT_DONE',
                           len(job_dict['RUNTIMEENVIRONMENT']))
                
                # Resource stats
                
                self.__add_resource(unique_resource_name, resource_id,
                                    'FINISHED', 1)
                
                self.__add_resource(unique_resource_name, resource_id,
                                    'USED_WALLTIME', used_walltime)
                
                # RE stats

                for runtime_env in job_dict['RUNTIMEENVIRONMENT']:
                    self.__addre(self.VGRID, job_vgrid_name, runtime_env, 1)

                    # Old mRSL files lack the UNIQUE_RESOURCE_NAME field

                    if unique_resource_name:
                        self.__addre(self.RESOURCE_TOTAL,
                                     unique_resource_name, runtime_env, 1)

                    # Old mRSL files lack the RESOURCE_ID field
                    # Old mRSL files has resource_id == unique_resource_name

                    if resource_id and resource_id != unique_resource_name:
                        self.__addre(self.RESOURCE_NODE, resource_id,
                                     runtime_env, 1)

        else:

            print 'Unknown status: ' + job_dict['STATUS']

        # Check and update cache for previous status'

        if buildcache_dict.has_key(job_id) and \
               buildcache_dict[job_id] in pending_states:
            self.__add(self.VGRID, job_vgrid_name, buildcache_dict[job_id], -1)

        # Cache current status for use in next iteration.
        # Note that status: CANCELED, FAILED, EXPIRED or FINISHED are
        # final stages and therefore none of thoose should occur in
        # the cache, as the mRSL file should not be modified once it
        # reaches one of thoose stages.

        if job_dict['STATUS'] in pending_states:
            buildcache_dict[job_id] = job_dict['STATUS']
        elif buildcache_dict.has_key(job_id):
            del buildcache_dict[job_id]
Esempio n. 2
0
    def __update_statistics_from_job(
        self,
        job_id,
        job_vgrid_name,
        buildcache_dict,
        job_dict,
    ):
        """The dirty details of what jobinfo is used in the
        statistics and buildcache"""

        # Fix legacy VGRIDs

        job_dict['VGRID'] = validated_vgrid_list(self.__configuration,
                                                 job_dict)

        # If the mRSL file was modified and this is the first time
        # we have seen it, add the request info to the statistics.

        if not buildcache_dict.has_key(job_id):
            self.__add(self.VGRID, job_vgrid_name, 'NODECOUNT_REQ',
                       int(job_dict['NODECOUNT']))
            self.__add(self.VGRID, job_vgrid_name, 'CPUTIME_REQ',
                       int(job_dict['CPUTIME']))
            self.__add(self.VGRID, job_vgrid_name, 'CPUCOUNT_REQ',
                       int(job_dict['CPUCOUNT']))
            self.__add(self.VGRID, job_vgrid_name, 'DISK_REQ',
                       int(job_dict['DISK']))
            self.__add(self.VGRID, job_vgrid_name, 'MEMORY_REQ',
                       int(job_dict['MEMORY']))
            self.__add(self.VGRID, job_vgrid_name, 'RUNTIMEENVIRONMENT_REQ',
                       len(job_dict['RUNTIMEENVIRONMENT']))

        unique_resource_name = None
        resource_id = None
        if job_dict.has_key('RESOURCE_CONFIG'):
            if job_dict.has_key('UNIQUE_RESOURCE_NAME'):
                unique_resource_name = job_dict['UNIQUE_RESOURCE_NAME'].upper()

            if job_dict['RESOURCE_CONFIG'].has_key('RESOURCE_ID'):
                resource_id = job_dict['RESOURCE_CONFIG']['RESOURCE_ID'].upper(
                )

        if job_dict['STATUS'] == 'PARSE':
            self.__add(self.VGRID, job_vgrid_name, 'PARSE', 1)
        elif job_dict['STATUS'] == 'QUEUED':
            self.__add(self.VGRID, job_vgrid_name, 'QUEUED', 1)
        elif job_dict['STATUS'] == 'EXECUTING':
            self.__add(self.VGRID, job_vgrid_name, 'EXECUTING', 1)
        elif job_dict['STATUS'] == 'FAILED':
            self.__add(self.VGRID, job_vgrid_name, 'FAILED', 1)
            self.__add_resource(unique_resource_name, resource_id, 'FAILED', 1)
        elif job_dict['STATUS'] == 'RETRY':
            self.__add(self.VGRID, job_vgrid_name, 'RETRY', 1)
            self.__add_resource(unique_resource_name, resource_id, 'RETRY', 1)
        elif job_dict['STATUS'] == 'EXPIRED':
            self.__add(self.VGRID, job_vgrid_name, 'EXPIRED', 1)
        elif job_dict['STATUS'] == 'FROZEN':
            self.__add(self.VGRID, job_vgrid_name, 'FROZEN', 1)
        elif job_dict['STATUS'] == 'CANCELED':
            self.__add(self.VGRID, job_vgrid_name, 'CANCELED', 1)
        elif job_dict['STATUS'] == 'FINISHED':

            # Recent jobs have the scheduled resource vgrid available in
            # the RESOURCE_VGRID field. However, that vgrid may be a parent of
            # the requested job vgrid due to inheritance.
            # We repeat the vgrid match up to find the actual job vgrid here.
            # Fall back to saved resource prioritized vgrid list for old jobs.

            active_res_vgrid = job_dict.get('RESOURCE_VGRID', None)
            if active_res_vgrid:
                search_vgrids = [active_res_vgrid]
            else:
                print "WARNING: no RESOURCE_VGRID for job %(JOB_ID)s" % \
                      job_dict
                resource_config = job_dict['RESOURCE_CONFIG']

                # Fix legacy VGRIDs

                resource_config['VGRID'] = validated_vgrid_list(
                    self.__configuration, resource_config)
                search_vgrids = resource_config['VGRID']
            (match, active_job_vgrid,
             _) = job_fits_res_vgrid(job_dict['VGRID'], search_vgrids)

            if not match:

                # This should not happen - scheduled job to wrong vgrid!

                print "ERROR: %s no match for vgrids: %s vs %s" % \
                      (job_dict['JOB_ID'], job_dict['VGRID'], search_vgrids)
                active_job_vgrid = '__NO_SUCH_JOB_VGRID__'

            active_vgrid = active_job_vgrid.upper()
            if active_vgrid == job_vgrid_name:

                # Compute used wall time

                finished_timestamp = job_dict['FINISHED_TIMESTAMP']
                finished_datetime = datetime.datetime(
                    finished_timestamp.tm_year,
                    finished_timestamp.tm_mon,
                    finished_timestamp.tm_mday,
                    finished_timestamp.tm_hour,
                    finished_timestamp.tm_min,
                    finished_timestamp.tm_sec,
                )

                starting_timestamp = job_dict['EXECUTING_TIMESTAMP']
                starting_datetime = datetime.datetime(
                    starting_timestamp.tm_year,
                    starting_timestamp.tm_mon,
                    starting_timestamp.tm_mday,
                    starting_timestamp.tm_hour,
                    starting_timestamp.tm_min,
                    starting_timestamp.tm_sec,
                )

                used_walltime = finished_datetime - starting_datetime

                # VGrid stats

                self.__add(self.VGRID, job_vgrid_name, 'FINISHED', 1)
                self.__add(self.VGRID, job_vgrid_name, 'NODECOUNT_DONE',
                           int(job_dict['NODECOUNT']))
                self.__add(self.VGRID, job_vgrid_name, 'CPUTIME_DONE',
                           int(job_dict['CPUTIME']))

                self.__add(self.VGRID, job_vgrid_name, 'USED_WALLTIME',
                           used_walltime)

                self.__add(self.VGRID, job_vgrid_name, 'CPUCOUNT_DONE',
                           int(job_dict['CPUCOUNT']))
                self.__add(self.VGRID, job_vgrid_name, 'DISK_DONE',
                           int(job_dict['DISK']))
                self.__add(self.VGRID, job_vgrid_name, 'MEMORY_DONE',
                           int(job_dict['MEMORY']))
                self.__add(self.VGRID, job_vgrid_name,
                           'RUNTIMEENVIRONMENT_DONE',
                           len(job_dict['RUNTIMEENVIRONMENT']))

                # Resource stats

                self.__add_resource(unique_resource_name, resource_id,
                                    'FINISHED', 1)

                self.__add_resource(unique_resource_name, resource_id,
                                    'USED_WALLTIME', used_walltime)

                # RE stats

                for runtime_env in job_dict['RUNTIMEENVIRONMENT']:
                    self.__addre(self.VGRID, job_vgrid_name, runtime_env, 1)

                    # Old mRSL files lack the UNIQUE_RESOURCE_NAME field

                    if unique_resource_name:
                        self.__addre(self.RESOURCE_TOTAL, unique_resource_name,
                                     runtime_env, 1)

                    # Old mRSL files lack the RESOURCE_ID field
                    # Old mRSL files has resource_id == unique_resource_name

                    if resource_id and resource_id != unique_resource_name:
                        self.__addre(self.RESOURCE_NODE, resource_id,
                                     runtime_env, 1)

        else:

            print 'Unknown status: ' + job_dict['STATUS']

        # Check and update cache for previous status'

        if buildcache_dict.has_key(job_id) and \
               buildcache_dict[job_id] in pending_states:
            self.__add(self.VGRID, job_vgrid_name, buildcache_dict[job_id], -1)

        # Cache current status for use in next iteration.
        # Note that status: CANCELED, FAILED, EXPIRED or FINISHED are
        # final stages and therefore none of thoose should occur in
        # the cache, as the mRSL file should not be modified once it
        # reaches one of thoose stages.

        if job_dict['STATUS'] in pending_states:
            buildcache_dict[job_id] = job_dict['STATUS']
        elif buildcache_dict.has_key(job_id):
            del buildcache_dict[job_id]
Esempio n. 3
0
    explicit_vgrids = {}
    implicit_vgrids = {}
    parent_vgrids = {}
    for job_dict in matches:
        job_id = job_dict['JOB_ID']
        job_vgrid = job_dict['VGRID']
        job_vgrid_string = str(job_vgrid)
        if job_dict['STATUS'] == 'FINISHED':
            finished_count += 1

            active_res_vgrid = job_dict.get('RESOURCE_VGRID', None)
            if active_res_vgrid:
                search_vgrids = [active_res_vgrid]
            else:
                search_vgrids = job_dict['RESOURCE_CONFIG']['VGRID']
            (match, active_job_vgrid, _) = job_fits_res_vgrid(
                job_dict['VGRID'], search_vgrids)

            resource_vgrid_map[active_res_vgrid] = resource_vgrid_map.get(
                active_res_vgrid, 0) + 1
            job_vgrid_map[active_job_vgrid] = job_vgrid_map.get(
                active_job_vgrid, 0) + 1
            if active_res_vgrid not in job_vgrid:
                implicit_vgrids[active_res_vgrid] = implicit_vgrids.get(
                    active_res_vgrid, 0) + 1
                parent_vgrids[job_vgrid_string] = parent_vgrids.get(
                    job_vgrid_string, 0) + 1
            else:
                explicit_vgrids[active_res_vgrid] = explicit_vgrids.get(
                    active_res_vgrid, 0) + 1

    
Esempio n. 4
0
    implicit_vgrids = {}
    parent_vgrids = {}
    for job_dict in matches:
        job_id = job_dict['JOB_ID']
        job_vgrid = job_dict['VGRID']
        job_vgrid_string = str(job_vgrid)
        if job_dict['STATUS'] == 'FINISHED':
            finished_count += 1

            active_res_vgrid = job_dict.get('RESOURCE_VGRID', None)
            if active_res_vgrid:
                search_vgrids = [active_res_vgrid]
            else:
                search_vgrids = job_dict['RESOURCE_CONFIG']['VGRID']
            (match, active_job_vgrid,
             _) = job_fits_res_vgrid(job_dict['VGRID'], search_vgrids)

            resource_vgrid_map[active_res_vgrid] = resource_vgrid_map.get(
                active_res_vgrid, 0) + 1
            job_vgrid_map[active_job_vgrid] = job_vgrid_map.get(
                active_job_vgrid, 0) + 1
            if active_res_vgrid not in job_vgrid:
                implicit_vgrids[active_res_vgrid] = implicit_vgrids.get(
                    active_res_vgrid, 0) + 1
                parent_vgrids[job_vgrid_string] = parent_vgrids.get(
                    job_vgrid_string, 0) + 1
            else:
                explicit_vgrids[active_res_vgrid] = explicit_vgrids.get(
                    active_res_vgrid, 0) + 1