def job_properties( self, job_id, job_directory ): pid = open( self.__pid_file( job_directory ), "r" ).read().strip() contents = os.listdir( job_directory ) try: rel_path = filter( self._is_instrumented_collectl_log, contents )[ 0 ] path = os.path.join( job_directory, rel_path ) except IndexError: message = "Failed to find collectl log in directory %s, files were %s" % ( job_directory, contents ) raise Exception( message ) properties = dict( pid=int( pid ), ) if self.saved_logs_path: destination_rel_dir = os.path.join( *directory_hash.directory_hash_id( job_id ) ) destination_rel_path = os.path.join( destination_rel_dir, rel_path ) destination_path = os.path.join( self.saved_logs_path, destination_rel_path ) destination_dir = os.path.dirname( destination_path ) if not os.path.isdir( destination_dir ): os.makedirs( destination_dir ) shutil.copyfile( path, destination_path ) properties[ "raw_log_path" ] = destination_rel_path if self.summarize_process_data: # Run collectl in playback and generate statistics of interest summary_statistics = self.__summarize_process_data( pid, path ) for statistic, value in summary_statistics: properties[ "process_%s" % "_".join( statistic ) ] = value return properties
def create(self, obj, **kwargs): if not self.exists(obj, **kwargs): # Pull out locally used fields extra_dir = kwargs.get('extra_dir', None) extra_dir_at_root = kwargs.get('extra_dir_at_root', False) dir_only = kwargs.get('dir_only', False) alt_name = kwargs.get('alt_name', None) # Construct hashed path rel_path = os.path.join(*directory_hash_id(obj.id)) # Optionally append extra_dir if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) # Create given directory in cache cache_dir = os.path.join(self.staging_path, rel_path) if not os.path.exists(cache_dir): os.makedirs(cache_dir) # Although not really necessary to create S3 folders (because S3 has # flat namespace), do so for consistency with the regular file system # S3 folders are marked by having trailing '/' so add it now # s3_dir = '%s/' % rel_path # self._push_to_os(s3_dir, from_string='') # If instructed, create the dataset in cache & in S3 if not dir_only: rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id) open(os.path.join(self.staging_path, rel_path), 'w').close() self._push_to_os(rel_path, from_string='')
def job_properties(self, job_id, job_directory): pid = open(self.__pid_file(job_directory), "r").read().strip() contents = os.listdir(job_directory) try: rel_path = filter(self._is_instrumented_collectl_log, contents)[0] path = os.path.join(job_directory, rel_path) except IndexError: message = "Failed to find collectl log in directory %s, files were %s" % ( job_directory, contents) raise Exception(message) properties = dict(pid=int(pid), ) if self.saved_logs_path: destination_rel_dir = os.path.join( *directory_hash.directory_hash_id(job_id)) destination_rel_path = os.path.join(destination_rel_dir, rel_path) destination_path = os.path.join(self.saved_logs_path, destination_rel_path) destination_dir = os.path.dirname(destination_path) if not os.path.isdir(destination_dir): os.makedirs(destination_dir) shutil.copyfile(path, destination_path) properties["raw_log_path"] = destination_rel_path if self.summarize_process_data: # Run collectl in playback and generate statistics of interest summary_statistics = self.__summarize_process_data(pid, path) for statistic, value in summary_statistics: properties["process_%s" % "_".join(statistic)] = value return properties
def create(self, obj, **kwargs): if not self.exists(obj, **kwargs): # Pull out locally used fields extra_dir = kwargs.get('extra_dir', None) extra_dir_at_root = kwargs.get('extra_dir_at_root', False) dir_only = kwargs.get('dir_only', False) alt_name = kwargs.get('alt_name', None) # Construct hashed path rel_path = os.path.join(*directory_hash_id(obj.id)) # Optionally append extra_dir if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) # Create given directory in cache cache_dir = os.path.join(self.staging_path, rel_path) if not os.path.exists(cache_dir): os.makedirs(cache_dir) # Although not really necessary to create S3 folders (because S3 has # flat namespace), do so for consistency with the regular file system # S3 folders are marked by having trailing '/' so add it now # s3_dir = '%s/' % rel_path # self._push_to_os(s3_dir, from_string='') # If instructed, create the dataset in cache & in S3 if not dir_only: rel_path = os.path.join( rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id) open(os.path.join(self.staging_path, rel_path), 'w').close() self._push_to_os(rel_path, from_string='')
def _construct_path( self, obj, base_dir=None, dir_only=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False, **kwargs ): rel_path = os.path.join(*directory_hash_id(obj.id)) if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) # for JOB_WORK directory if obj_dir: rel_path = os.path.join(rel_path, str(obj.id)) if base_dir: base = self.extra_dirs.get(base_dir) return os.path.join(base, rel_path) # S3 folders are marked by having trailing '/' so add it now rel_path = "%s/" % rel_path if not dir_only: rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id) return rel_path
def _construct_path(self, obj, base_dir=None, dir_only=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False, **kwargs): rel_path = os.path.join(*directory_hash_id(obj.id)) if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) # for JOB_WORK directory if obj_dir: rel_path = os.path.join(rel_path, str(obj.id)) if base_dir: base = self.extra_dirs.get(base_dir) return os.path.join(base, rel_path) # S3 folders are marked by having trailing '/' so add it now rel_path = '%s/' % rel_path if not dir_only: rel_path = os.path.join( rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id) return rel_path
def _construct_path(self, obj, base_dir=None, dir_only=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False, **kwargs): # extra_dir should never be constructed from provided data but just # make sure there are no shenannigans afoot if extra_dir and extra_dir != os.path.normpath(extra_dir): log.warning('extra_dir is not normalized: %s', extra_dir) raise ObjectInvalid("The requested object is invalid") # ensure that any parent directory references in alt_name would not # result in a path not contained in the directory path constructed here if alt_name: if not safe_relpath(alt_name): log.warning('alt_name would locate path outside dir: %s', alt_name) raise ObjectInvalid("The requested object is invalid") # alt_name can contain parent directory references, but S3 will not # follow them, so if they are valid we normalize them out alt_name = os.path.normpath(alt_name) rel_path = os.path.join(*directory_hash_id(obj.id)) if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) # for JOB_WORK directory if obj_dir: rel_path = os.path.join(rel_path, str(obj.id)) if base_dir: base = self.extra_dirs.get(base_dir) return os.path.join(base, rel_path) # S3 folders are marked by having trailing '/' so add it now rel_path = '%s/' % rel_path if not dir_only: rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id) return rel_path
def _construct_path(self, obj, old_style=False, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, **kwargs): """ Construct the expected absolute path for accessing the object identified by `obj`.id. :type base_dir: string :param base_dir: A key in self.extra_dirs corresponding to the base directory in which this object should be created, or None to specify the default directory. :type dir_only: bool :param dir_only: If True, check only the path where the file identified by `obj` should be located, not the dataset itself. This option applies to `extra_dir` argument as well. :type extra_dir: string :param extra_dir: Append the value of this parameter to the expected path used to access the object identified by `obj` (e.g., /files/000/<extra_dir>/dataset_10.dat). :type alt_name: string :param alt_name: Use this name as the alternative name for the returned dataset rather than the default. :type old_style: bool param old_style: This option is used for backward compatibility. If True the composed directory structure does not include a hash id (e.g., /files/dataset_10.dat (old) vs. /files/000/dataset_10.dat (new)) """ base = self.extra_dirs.get(base_dir, self.file_path) if old_style: if extra_dir is not None: path = os.path.join(base, extra_dir) else: path = base else: # Construct hashed path rel_path = os.path.join(*directory_hash_id(obj.id)) # Optionally append extra_dir if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) path = os.path.join(base, rel_path) if not dir_only: path = os.path.join( path, alt_name if alt_name else "dataset_%s.dat" % obj.id) return os.path.abspath(path)
def _construct_path(self, obj, dir_only=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, **kwargs): rel_path = os.path.join(*directory_hash_id(obj.id)) if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) # S3 folders are marked by having trailing '/' so add it now rel_path = '%s/' % rel_path if not dir_only: rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id) return rel_path
def _construct_path(self, obj, old_style=False, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False, **kwargs): """ Construct the absolute path for accessing the object identified by `obj.id`. :type base_dir: string :param base_dir: A key in self.extra_dirs corresponding to the base directory in which this object should be created, or None to specify the default directory. :type dir_only: boolean :param dir_only: If True, check only the path where the file identified by `obj` should be located, not the dataset itself. This option applies to `extra_dir` argument as well. :type extra_dir: string :param extra_dir: Append the value of this parameter to the expected path used to access the object identified by `obj` (e.g., /files/000/<extra_dir>/dataset_10.dat). :type alt_name: string :param alt_name: Use this name as the alternative name for the returned dataset rather than the default. :type old_style: boolean param old_style: This option is used for backward compatibility. If `True` then the composed directory structure does not include a hash id (e.g., /files/dataset_10.dat (old) vs. /files/000/dataset_10.dat (new)) """ base = self.extra_dirs.get(base_dir, self.file_path) if old_style: if extra_dir is not None: path = os.path.join(base, extra_dir) else: path = base else: # Construct hashed path rel_path = os.path.join(*directory_hash_id(obj.id)) # Create a subdirectory for the object ID if obj_dir: rel_path = os.path.join(rel_path, str(obj.id)) # Optionally append extra_dir if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) path = os.path.join(base, rel_path) if not dir_only: path = os.path.join(path, alt_name if alt_name else "dataset_%s.dat" % obj.id) return os.path.abspath(path)
def _construct_path(self, obj, old_style=False, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False, **kwargs): """ Construct the absolute path for accessing the object identified by `obj.id`. :type base_dir: string :param base_dir: A key in self.extra_dirs corresponding to the base directory in which this object should be created, or None to specify the default directory. :type dir_only: boolean :param dir_only: If True, check only the path where the file identified by `obj` should be located, not the dataset itself. This option applies to `extra_dir` argument as well. :type extra_dir: string :param extra_dir: Append the value of this parameter to the expected path used to access the object identified by `obj` (e.g., /files/000/<extra_dir>/dataset_10.dat). :type alt_name: string :param alt_name: Use this name as the alternative name for the returned dataset rather than the default. :type old_style: boolean param old_style: This option is used for backward compatibility. If `True` then the composed directory structure does not include a hash id (e.g., /files/dataset_10.dat (old) vs. /files/000/dataset_10.dat (new)) """ base = os.path.abspath(self.extra_dirs.get(base_dir, self.file_path)) # extra_dir should never be constructed from provided data but just # make sure there are no shenannigans afoot if extra_dir and extra_dir != os.path.normpath(extra_dir): log.warning('extra_dir is not normalized: %s', extra_dir) raise ObjectInvalid("The requested object is invalid") # ensure that any parent directory references in alt_name would not # result in a path not contained in the directory path constructed here if alt_name and not safe_relpath(alt_name): log.warning('alt_name would locate path outside dir: %s', alt_name) raise ObjectInvalid("The requested object is invalid") if old_style: if extra_dir is not None: path = os.path.join(base, extra_dir) else: path = base else: # Construct hashed path rel_path = os.path.join(*directory_hash_id(obj.id)) # Create a subdirectory for the object ID if obj_dir: rel_path = os.path.join(rel_path, str(obj.id)) # Optionally append extra_dir if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) path = os.path.join(base, rel_path) if not dir_only: path = os.path.join(path, alt_name if alt_name else "dataset_%s.dat" % obj.id) return os.path.abspath(path)