def get_session_profile(sid, src=None): if not src: src = "%s/%s" % (os.getcwd(), sid) if os.path.exists(src): # we have profiles locally profiles = glob.glob("%s/*.prof" % src) profiles += glob.glob("%s/*/*.prof" % src) else: # need to fetch profiles from .session import fetch_profiles profiles = fetch_profiles(sid=sid, skip_existing=True) # filter out some frequent, but uninteresting events efilter = { ru.EVENT: ['publish', 'work start', 'work done'], ru.MSG: ['update unit state', 'unit update pushed', 'bulked', 'bulk size'] } profiles = ru.read_profiles(profiles, sid, efilter=efilter) profile, accuracy = ru.combine_profiles(profiles) profile = ru.clean_profile(profile, sid, rps.FINAL, rps.CANCELED) hostmap = get_hostmap(profile) if not hostmap: # FIXME: legacy host notation - deprecated hostmap = get_hostmap_deprecated(profiles) return profile, accuracy, hostmap
def get_session_profile(sid, src=None): if not src: src = "%s/%s" % (os.getcwd(), sid) if os.path.exists(src): # we have profiles locally profiles = glob.glob("%s/*.prof" % src) profiles += glob.glob("%s/*/*.prof" % src) else: # need to fetch profiles from .session import fetch_profiles profiles = fetch_profiles(sid=sid, skip_existing=True) # filter out some frequent, but uninteresting events efilter = {ru.EVENT : ['publish', 'work start', 'work done'], ru.MSG : ['update unit state', 'unit update pushed', 'bulked', 'bulk size'] } profiles = ru.read_profiles(profiles, sid, efilter=efilter) profile, accuracy = ru.combine_profiles(profiles) profile = ru.clean_profile(profile, sid, rps.FINAL, rps.CANCELED) hostmap = get_hostmap(profile) if not hostmap: # FIXME: legacy host notation - deprecated hostmap = get_hostmap_deprecated(profiles) return profile, accuracy, hostmap
def get_session_profile(sid, src=None): if not src: src = "%s/%s" % (os.getcwd(), sid) if os.path.exists(src): # we have profiles locally profiles = glob.glob("%s/*.prof" % src) profiles += glob.glob("%s/*/*.prof" % src) else: # need to fetch profiles from .session import fetch_profiles profiles = fetch_profiles(sid=sid, skip_existing=True) # filter out some frequent, but uninteresting events efilter = { ru.EVENT: [ # 'get', 'publish', 'schedule_skip', 'schedule_fail', 'staging_stderr_start', 'staging_stderr_stop', 'staging_stdout_start', 'staging_stdout_stop', 'staging_uprof_start', 'staging_uprof_stop', 'update_pushed', ] } profiles = ru.read_profiles(profiles, sid, efilter=efilter) profile, accuracy = ru.combine_profiles(profiles) profile = ru.clean_profile(profile, sid, s.FINAL, s.CANCELED) hostmap = get_hostmap(profile) if not hostmap: # FIXME: legacy host notation - deprecated hostmap = get_hostmap_deprecated(profiles) return profile, accuracy, hostmap
def get_session_profile(sid, src=None): if not src: src = os.getcwd() if os.path.exists(src): # EnTK profiles are always on localhost profiles = glob.glob("%s/*.prof" % (src)) profiles += glob.glob("%s/*/*.prof" % (src)) else: profiles = glob.glob("./%s/*.prof" % (sid)) profiles += glob.glob("./%s/*/*.prof" % (sid)) if not profiles: raise EnTKError('No profiles found at %s' % src) try: profiles = ru.read_profiles(profiles=profiles, sid=sid) prof, acc = ru.combine_profiles(profiles) prof = ru.clean_profile(prof, sid=sid, state_final=res.FINAL, state_canceled=res.CANCELED) hostmap = get_hostmap(prof) if not hostmap: # FIXME: legacy host notation - deprecated hostmap = get_hostmap_deprecated(profiles) return prof, acc, hostmap except Exception as ex: # Push the exception raised by child functions print(traceback.format_exc()) raise EnTKError('Error: %s' % ex)
def get_session_profile(sid, src=None): if not src: src = os.getcwd() if os.path.exists(src): # EnTK profiles are always on localhost profiles = glob.glob("%s/%s/*.prof" % (src, sid)) else: raise EnTKError('%s/%s does not exist' % (src, sid)) if not profiles: raise EnTKError('No profiles found at %s' % src) try: profiles = ru.read_profiles(profiles=profiles, sid=sid) prof, acc = ru.combine_profiles(profiles) prof = ru.clean_profile(prof, sid=sid, state_final=res.FINAL, state_canceled=res.CANCELED) hostmap = get_hostmap(prof) if not hostmap: # FIXME: legacy host notation - deprecated hostmap = get_hostmap_deprecated(profiles) return prof, acc, hostmap except Exception as ex: # Push the exception raised by child functions print traceback.format_exc() raise EnTKError('Error: %s' % ex)
def __init__(self, src, stype, sid=None, _entities=None, _init=True): ''' Create a radical.analytics session for analysis. The session is created from a set of profiles, which usually have been produced from some other session object in the RCT stack, such as radical.pilot. Profiles are accepted in two forms: in a directory, or in a tarball (of such a directory). In the latter case, the tarball are extracted into `$TMP`, and then handled just as the directory case. If no `sid` (session ID) is specified, that ID is derived from the directory name. ''' if not os.path.exists(src): raise ValueError('src [%s] does not exist' % src) if os.path.isdir(src): pass elif os.path.isfile(src): # src is afile - we assume its a tarball and extract it if src.endswith('.prof'): # use as is tgt = src elif src.endswith('.tgz') or \ src.endswith('.tbz') : tgt = src[:-4] elif src.endswith('.tar.gz') or \ src.endswith('.tar.bz') : tgt = src[:-7] elif src.endswith('.prof'): tgt = None else: raise ValueError('src does not look like a tarball or profile') if tgt and not os.path.exists(tgt): # need to extract print 'extract tarball to %s' % tgt try: tf = tarfile.open(name=src, mode='r:bz2') tf.extractall(path=os.path.dirname(tgt)) except Exception as e: raise RuntimeError('Cannot extract tarball: %s' % repr(e)) # switch to the extracted data dir if tgt: src = tgt # if no sid is given, we assume its the directory name if not sid: if src.endswith('/'): src = src[:-1] sid = os.path.basename(src) self._sid = sid self._src = src self._stype = stype # print 'sid: %s [%s]' % (sid, stype) # print 'src: %s' % src if stype == 'radical': # src is expected to point either to a single profile, or to # a directory tree containing profiles if not src: raise ValueError('RA session types need `src` specified') profiles = list() if os.path.isfile(src): profiles.append(src) else: for root, dirs, files in os.walk(src): for f in files: if f.endswith('.prof'): profiles.append('%s/%s' % (root, f)) profiles = ru.read_profiles(profiles, sid=sid) self._profile, accuracy = ru.combine_profiles(profiles) self._description = {'tree' : dict(), 'entities' : list(), 'hostmap' : dict(), 'accuracy' : 0.0} elif stype == 'radical.pilot': import radical.pilot.utils as rpu self._profile, accuracy, hostmap \ = rpu.get_session_profile (sid=sid, src=self._src) self._description = rpu.get_session_description(sid=sid, src=self._src) self._description['accuracy'] = accuracy self._description['hostmap'] = hostmap elif stype == 'radical.entk': import radical.entk.utils as reu self._profile, accuracy, hostmap \ = reu.get_session_profile (sid=sid, src=self._src) self._description = reu.get_session_description(sid=sid, src=self._src) self._description['accuracy'] = accuracy self._description['hostmap'] = hostmap else: raise ValueError('unsupported session type [%s]' % stype) self._t_start = None self._t_stop = None self._ttc = None self._log = ru.Logger('radical.analytics') self._rep = ru.Reporter('radical.analytics') # internal state is represented by a dict of entities: # dict keys are entity uids (which are assumed to be unique per # session), dict values are ra.Entity instances. self._entities = dict() if _init: self._initialize_entities(self._profile) # we do some bookkeeping in self._properties where we keep a list of # property values around which we encountered in self._entities. self._properties = dict() if _init: self._initialize_properties()