class ContainerMixin(object): """Container mixin to give class basic container methods.""" def __init__(self, *args, **kwargs): if not hasattr(self, 'ui'): self.ui = FancyLogger() # Setup progress bar if not hasattr(self, 'log_progress'): self.log_progress = kwargs.get('log_progress', True) if not hasattr(self, 'disable_progress_bar'): self.disable_progress_bar = kwargs.get('disable_progress_bar', False) if not hasattr(self, 'use_ascii_progress_bar'): self.use_ascii_progress_bar = kwargs.get('use_ascii_progress_bar', True) def __getstate__(self): return {} def __setstate__(self, d): self.ui = FancyLogger() @property def logger(self): """Logger instance""" logger = logging.getLogger(__name__) if not logger.handlers: setup_logging() return logger def show(self): """Print container content Returns ------- Nothing """ print(self) def log(self, level='info'): """Log container content Parameters ---------- level : str Logging level, possible values [info, debug, warn, warning, error, critical] Returns ------- Nothing """ self.ui.line(self.__str__(), level=level)
def __init__(self, *args, **kwargs): if not hasattr(self, 'ui'): self.ui = FancyLogger() # Setup progress bar if not hasattr(self, 'log_progress'): self.log_progress = kwargs.get('log_progress', True) if not hasattr(self, 'disable_progress_bar'): self.disable_progress_bar = kwargs.get('disable_progress_bar', False) if not hasattr(self, 'use_ascii_progress_bar'): self.use_ascii_progress_bar = kwargs.get('use_ascii_progress_bar', True)
def __init__(self, *args, **kwargs): if not hasattr(self, 'ui'): self.ui = FancyLogger() if not hasattr(self, 'valid_formats'): self.valid_formats = [] if not hasattr(self, 'filename'): if kwargs.get('filename', None): # Set variables self.filename = kwargs.get('filename', None) else: self.filename = None if not hasattr(self, 'format'): self.format = None if hasattr(self, 'filename') and self.filename is not None: self.detect_file_format() self.validate_format() # Setup progress bar if not hasattr(self, 'log_progress'): self.log_progress = kwargs.get('log_progress', True) if not hasattr(self, 'disable_progress_bar'): self.disable_progress_bar = kwargs.get('disable_progress_bar', False) if not hasattr(self, 'use_ascii_progress_bar'): self.use_ascii_progress_bar = kwargs.get('use_ascii_progress_bar', True)
def log_chain(self, level='info'): """Log chain information Parameters ---------- level : str Logging level, possible values [info, debug, warn, warning, error, critical] Returns ------- Nothing """ ui = FancyLogger() ui.line(data=self.chain_string(), level=level)
def log(self, level='info'): """Log container content Parameters ---------- level : str Logging level, possible values [info, debug, warn, warning, error, critical] Returns ------- Nothing """ from dcase_util.ui import FancyLogger FancyLogger().line(str(self), level=level) return self
def __init__(self, entry_label, mode='submission', class_labels=None, file_count=None, task='ASC', allow_placeholder_lines_in_output=False): """Constructor Parameters ---------- entry_label : str Entry label mode : str Checker mode [submission, processed] class_labels : list of str Class labels file_count : int Correct mount of unique files in the system output task : str Task label [ASC, SED, TAG] allow_placeholder_lines_in_output : bool Allow placeholder lines in the output, in case of sound event system output only audio file is outputted if no sound events were detected for the file. """ # Run super init super(SubmissionChecker, self).__init__() self.entry = entry_label self.mode = mode self.class_labels = class_labels self.file_count = file_count self.task = task self.allow_placeholder_lines_in_output = allow_placeholder_lines_in_output self.error_log = [] self.ui = FancyLogger()
def data_collector(item_list=None, data_processing_chain=None, meta_processing_chain=None, target_format='single_target_per_sequence', channel_dimension='channels_last', verbose=True, print_indent=2): """Data collector Collects data and meta into matrices while processing them through processing chains. Parameters ---------- item_list : list or dict Items in the data sequence. List containing multi-level dictionary with first level key 'data' and 'meta'. Second level should contain parameters for process method in the processing chain. Default value None data_processing_chain : ProcessingChain Data processing chain. Default value None meta_processing_chain : ProcessingChain Meta processing chain. Default value None channel_dimension : str Controls where channel dimension should be added. Similar to Keras data format parameter. If None given, no channel dimension is added. Possible values [None, 'channels_first', 'channels_last'] Default value None target_format : str Meta data interpretation in the relation to the data items. Default value 'single_target_per_segment' verbose : bool Print information about the data Default value True print_indent : int Default value 2 Returns ------- numpy.ndarray data numpy.ndarray meta dict data size information """ if item_list: # Collect all data and meta X = [] Y = [] for item in item_list: data = data_processing_chain.process(**item['data']) meta = meta_processing_chain.process(**item['meta']) X.append(data.data) # Collect meta if target_format == 'single_target_per_sequence': # Collect single target per sequence for i in range(0, data.shape[data.sequence_axis]): Y.append(meta.data[:, 0]) elif target_format == 'same': # Collect single target per sequence Y.append( numpy.repeat(a=meta.data, repeats=data.length, axis=1).T) data_size = {} if len(data.shape) == 2: # Stack collected data and meta correct way if data.time_axis == 0: X = numpy.vstack(X) Y = numpy.vstack(Y) else: X = numpy.hstack(X) Y = numpy.hstack(Y) # Get data item size data_size = { 'data': X.shape[data.data_axis], 'time': X.shape[data.time_axis], } elif len(data.shape) == 3: # Stack collected data and meta correct way if data.sequence_axis == 0: X = numpy.vstack(X) Y = numpy.vstack(Y) elif data.sequence_axis == 1: X = numpy.hstack(X) Y = numpy.hstack(Y) elif data.sequence_axis == 2: X = numpy.dstack(X) Y = numpy.dstack(Y) if channel_dimension: # Add channel dimension to the data if channel_dimension == 'channels_first': X = numpy.expand_dims(X, axis=1) elif channel_dimension == 'channels_last': X = numpy.expand_dims(X, axis=3) # Get data item size data_size = { 'data': X.shape[data.data_axis], 'time': X.shape[data.time_axis], 'sequence': X.shape[data.sequence_axis], } if verbose: data_shape = data.shape data_axis = { 'time_axis': data.time_axis, 'data_axis': data.data_axis } if hasattr(data, 'sequence_axis'): data_axis['sequence_axis'] = data.sequence_axis meta_shape = meta.shape meta_axis = { 'time_axis': meta.time_axis, 'data_axis': meta.data_axis } if hasattr(meta, 'sequence_axis'): meta_axis['sequence_axis'] = meta.sequence_axis logger = FancyLogger() # Data information logger.line('Data', indent=print_indent) # Matrix logger.data(field='Matrix shape', value=X.shape, indent=print_indent + 2) # Item logger.data(field='Item shape', value=data_shape, indent=print_indent + 2) logger.data(field='Time', value=data_shape[data_axis['time_axis']], indent=print_indent + 4) logger.data(field='Data', value=data_shape[data_axis['data_axis']], indent=print_indent + 4) if 'sequence_axis' in data_axis: logger.data(field='Sequence', value=data_shape[data_axis['sequence_axis']], indent=print_indent + 4) # Meta information logger.line('Meta', indent=print_indent) # Matrix logger.data(field='Matrix shape', value=Y.shape, indent=print_indent + 2) # Item logger.data(field='Item shape', value=meta_shape, indent=print_indent + 2) logger.data(field='Time', value=meta_shape[meta_axis['time_axis']], indent=print_indent + 4) logger.data(field='Data', value=meta_shape[meta_axis['data_axis']], indent=print_indent + 4) if 'sequence_axis' in meta_axis: logger.data(field='Sequence', value=meta_shape[meta_axis['sequence_axis']], indent=print_indent + 4) return X, Y, data_size
def __setstate__(self, d): self.ui = FancyLogger()
class ContainerMixin(object): """Container mixin to give class basic container methods.""" def __init__(self, *args, **kwargs): if not hasattr(self, 'ui'): self.ui = FancyLogger() # Setup progress bar if not hasattr(self, 'log_progress'): self.log_progress = kwargs.get('log_progress', True) if not hasattr(self, 'disable_progress_bar'): self.disable_progress_bar = kwargs.get('disable_progress_bar', False) if not hasattr(self, 'use_ascii_progress_bar'): self.use_ascii_progress_bar = kwargs.get('use_ascii_progress_bar', True) def __getstate__(self): return {} def __setstate__(self, d): self.ui = FancyLogger() def __str__(self): return self.to_string() @property def logger(self): """Logger instance""" logger = logging.getLogger(__name__) if not logger.handlers: setup_logging() return logger def show(self, mode='auto', indent=0, visualize=False): """Print container content If called inside Jupyter notebook HTML formatted version is shown. Parameters ---------- mode : str Output type, possible values ['auto', 'print', 'html']. 'html' will work only in Jupyter notebook Default value 'auto' indent : int Amount of indent Default value 0 visualize : bool Visualize container data if class has plot method Default value False Returns ------- Nothing """ if mode == 'auto': if is_jupyter(): mode = 'html' else: mode = 'print' if mode not in ['html', 'print']: # Unknown mode given message = '{name}: Unknown mode [{mode}]'.format( name=self.__class__.__name__, mode=mode) self.logger.exception(message) raise ValueError(message) if mode == 'html': from IPython.core.display import display, HTML display(HTML(self.to_html(indent=indent))) if visualize and hasattr(self, 'plot'): # If class has plot method use it to visualize the content self.plot() elif mode == 'print': print(self.to_string(indent=indent)) def to_string(self, ui=None, indent=0): """Get container information in a string Parameters ---------- ui : FancyStringifier or FancyHTMLStringifier Stringifier class Default value FancyStringifier indent : int Amount of indent Default value 0 Returns ------- str """ if ui is None: ui = FancyStringifier() output = '' output += ui.class_name(self.__class__.__name__, indent=indent) + '\n' if hasattr(self, 'filename') and self.filename: output += ui.data( field='filename', value=self.filename, indent=indent) + '\n' return output def to_html(self, indent=0): """Get container information in a HTML formatted string Parameters ---------- indent : int Amount of indent Default value 0 Returns ------- str """ return self.to_string(ui=FancyHTMLStringifier(), indent=indent) def log(self, level='info'): """Log container content Parameters ---------- level : str Logging level, possible values [info, debug, warn, warning, error, critical] Returns ------- Nothing """ self.ui.line(self.__str__(), level=level)
def setup_keras(seed=None, profile=None, backend='theano', device=None, BLAS_thread_count=None, BLAS_MKL_CNR=True, nvcc_fastmath=None, theano_floatX=None, theano_optimizer=None, theano_OpenMP=None, theano_deterministic=None, verbose=True): """Setup Keras and environmental variables effecting on it. Given parameters are used to override ones specified in keras.json file. Parameters ---------- seed : int, optional Randomization seed. If none given, no seed is set. profile : str, optional Profile name ['deterministic', 'cuda0_fast'], will override other parameters with profile parameters. backend : str Keras backend ['theano', 'tensorflow'] Default value 'theano' device : str, optional Device for computations ['cpu', 'cuda', 'cuda0', 'cuda1', 'opencl0:0', 'opencl0:1'] BLAS_thread_count : int Number of thread used for BLAS libraries BLAS_MKL_CNR : bool Conditional numerical reproducibility for MKL BLAS library. Use this to reproduce results with MKL. Default value True nvcc_fastmath : str, optional Control the usage of fast math library in NVCC theano_floatX : str, optional Default dtype for Theano matrix and tensor ['float64', 'float32', 'float16'] theano_optimizer : str, optional Optimizer ['fast_run', 'merge', 'fast_compile', 'None'] theano_OpenMP : bool, optional Enable or disable parallel computation on the CPU with OpenMP. theano_deterministic : bool, optional verbose : bool Print information Default value True """ def logger(): logger_instance = logging.getLogger(__name__) if not logger_instance.handlers: setup_logging() return logger_instance if profile: if profile == 'deterministic': if seed is None: message = 'You should set randomization seed to get deterministic behaviour.' logger().exception(message) raise AttributeError(message) # Parameters to help to get deterministic results device = 'cpu' BLAS_thread_count = 1 BLAS_MKL_CNR = True nvcc_fastmath = False theano_optimizer = 'None' theano_OpenMP = False theano_deterministic = True elif profile == 'cuda0_fast': device = 'cuda0' BLAS_thread_count = 8 BLAS_MKL_CNR = True nvcc_fastmath = True theano_optimizer = 'fast_run' theano_OpenMP = True theano_deterministic = True else: message = 'Invalid Keras setup profile [{profile}].'.format( profile=profile) logger().exception(message) raise AttributeError(message) # Set seed first if seed: numpy.random.seed(seed) random.seed(seed) # Check parameter validity if backend and backend not in ['theano', 'tensorflow']: message = 'Invalid Keras backend type [{backend}].'.format( backend=backend) logger().exception(message) raise AttributeError(message) if device and device not in ['cpu', 'cuda', 'cuda0', 'opencl0:0']: message = 'Invalid Keras device type [{device}].'.format(device=device) logger().exception(message) raise AttributeError(message) if theano_floatX and theano_floatX not in [ 'float64', 'float32', 'float16' ]: message = 'Invalid Keras floatX type [{floatX}].'.format( floatX=theano_floatX) logger().exception(message) raise AttributeError(message) if theano_optimizer and theano_optimizer not in [ 'fast_run', 'merge', 'fast_compile', 'None' ]: message = 'Invalid Keras optimizer type [{optimizer}].'.format( optimizer=theano_optimizer) logger().exception(message) raise AttributeError(message) ui = FancyLogger() if verbose: ui.sub_header('Keras setup') # Get BLAS library associated to numpy if numpy.__config__.blas_opt_info and 'libraries' in numpy.__config__.blas_opt_info: blas_libraries = numpy.__config__.blas_opt_info['libraries'] else: blas_libraries = [''] blas_extra_info = [] # Select Keras backend os.environ["KERAS_BACKEND"] = backend # Threading if BLAS_thread_count: os.environ['GOTO_NUM_THREADS'] = str(BLAS_thread_count) os.environ['OMP_NUM_THREADS'] = str(BLAS_thread_count) os.environ['MKL_NUM_THREADS'] = str(BLAS_thread_count) blas_extra_info.append( 'Threads[{threads}]'.format(threads=BLAS_thread_count)) if BLAS_thread_count > 1: os.environ['OMP_DYNAMIC'] = 'False' os.environ['MKL_DYNAMIC'] = 'False' else: os.environ['OMP_DYNAMIC'] = 'True' os.environ['MKL_DYNAMIC'] = 'True' # Conditional Numerical Reproducibility (CNR) for MKL BLAS library if BLAS_MKL_CNR and blas_libraries[0].startswith('mkl'): os.environ['MKL_CBWR'] = 'COMPATIBLE' blas_extra_info.append('MKL_CBWR[{mode}]'.format(mode='COMPATIBLE')) # Show BLAS info if verbose: if numpy.__config__.blas_opt_info and 'libraries' in numpy.__config__.blas_opt_info: blas_libraries = numpy.__config__.blas_opt_info['libraries'] if blas_libraries[0].startswith('openblas'): ui.data(field='BLAS library', value='OpenBLAS ({info})'.format( info=', '.join(blas_extra_info))) elif blas_libraries[0].startswith('blas'): ui.data(field='BLAS library', value='BLAS/Atlas ({info})'.format( info=', '.join(blas_extra_info))) elif blas_libraries[0].startswith('mkl'): ui.data(field='BLAS library', value='MKL ({info})'.format( info=', '.join(blas_extra_info))) # Set backend and parameters before importing keras if verbose: ui.data(field='Backend', value=backend) if backend == 'theano': # Theano setup # Default flags flags = [ # 'ldflags=', 'warn.round=False', ] # Set device if device: flags.append('device=' + device) # Set floatX if theano_floatX: flags.append('floatX=' + theano_floatX) if verbose: ui.data(field='floatX', value=theano_floatX) # Set optimizer if theano_optimizer is not None: flags.append('optimizer=' + theano_optimizer) # Set fastmath for GPU mode only if nvcc_fastmath and device != 'cpu': if nvcc_fastmath: flags.append('nvcc.fastmath=True') else: flags.append('nvcc.fastmath=False') # Set OpenMP if theano_OpenMP is not None: if theano_OpenMP: flags.append('openmp=True') else: flags.append('openmp=False') if theano_deterministic is not None: if theano_deterministic: flags.append('deterministic=more') else: flags.append('deterministic=default') if verbose: ui.line('Theano', indent=2) for item in flags: ui.data(field=item.split('=')[0], value=item.split('=')[1], indent=4) # Set environmental variable for Theano os.environ["THEANO_FLAGS"] = ','.join(flags) elif backend == 'tensorflow': flags = [] # Tensorflow setup if verbose: ui.line('Tensorflow', indent=2) # Set device if device: flags.append('device=' + device) # In case of CPU disable visible GPU. if device == 'cpu': os.environ["CUDA_VISIBLE_DEVICES"] = '' if verbose: ui.line('Tensorflow', indent=2) for item in flags: ui.data(field=item.split('=')[0], value=item.split('=')[1], indent=4) with SuppressStdoutAndStderr(): # Import keras and suppress backend announcement printed to stderr import keras if verbose: ui.foot()
def __init__(self, manual_update=False, epochs=None, external_metric_labels=None, metric=None, loss=None, manual_update_interval=1, output_type='logging', show_timing=True, **kwargs): """Constructor Parameters ---------- epochs : int Total amount of epochs Default value None metric : str Metric name Default value None manual_update : bool Manually update callback, use this to when injecting external metrics Default value False manual_update_interval : int Epoch interval for manual update, used anticipate updates Default value 1 output_type : str Output type, either 'logging', 'console', or 'notebook' Default value 'logging' show_timing : bool Show per epoch time and estimated time remaining Default value True external_metric_labels : dict or OrderedDict Dictionary with {'metric_label': 'metric_name'} Default value None """ kwargs.update({ 'manual_update': manual_update, 'epochs': epochs, 'external_metric_labels': external_metric_labels, }) super(ProgressLoggerCallback, self).__init__(**kwargs) if isinstance(metric, str): self.metric = metric elif callable(metric): self.metric = metric.__name__ self.loss = loss self.manual_update_interval = manual_update_interval self.output_type = output_type self.show_timing = show_timing self.timer = Timer() self.ui = FancyStringifier() if self.output_type == 'logging': self.output_target = FancyLogger() elif self.output_type == 'console': self.output_target = FancyPrinter() elif self.output_type == 'notebook': self.output_target = FancyHTMLPrinter() self.ui = FancyHTMLStringifier() self.seen = 0 self.log_values = [] self.most_recent_values = collections.OrderedDict() self.most_recent_values['l_tra'] = None self.most_recent_values['l_val'] = None self.most_recent_values['m_tra'] = None self.most_recent_values['m_val'] = None self.data = { 'l_tra': numpy.empty((self.epochs,)), 'l_val': numpy.empty((self.epochs,)), 'm_tra': numpy.empty((self.epochs,)), 'm_val': numpy.empty((self.epochs,)), } self.data['l_tra'][:] = numpy.nan self.data['l_val'][:] = numpy.nan self.data['m_tra'][:] = numpy.nan self.data['m_val'][:] = numpy.nan for metric_label in self.external_metric_labels: self.data[metric_label] = numpy.empty((self.epochs,)) self.data[metric_label][:] = numpy.nan self.header_shown = False self.last_update_epoch = 0 self.target = None self.first_epoch = None self.total_time = 0
def pack(self, dataset_name='dcase-dataset', content=None, output_path=None, base_path=None, overwrite=False, verbose=True): """Pack dataset. Parameters ---------- dataset_name : str Dataset name Default value 'dcase-dataset' content : list of dict List of packages to be packed. Package item dict should have format {'data_name': 'doc', 'file_list': [{'source': 'file1.txt'}]}. Default value None output_path : str Path to which packages are saved. Default value None base_path : str Base path of the data. If per item package paths are not given ('target' field), this parameter is used to create one from source path. Default value None overwrite : bool Overwrite existing packages. Default value False verbose : bool Show information during the packing. Default value True Returns ------- nothing """ if verbose: log = FancyLogger() log.section_header('Packing dataset [{dataset_name}]'.format( dataset_name=dataset_name)) if base_path is not None and not base_path.endswith(os.path.sep): base_path += os.path.sep for group in content: if verbose: log.line('[{data_name}]'.format(data_name=group['data_name'])) package_filename = os.path.join( output_path, self.filename_template.format( dataset_name=dataset_name, data_name=group['data_name'], extension=self.package_extension)) newest_source = 0 for item in group['file_list']: if not os.path.exists(item['source']): message = '{name}: File not found [{source_file}].'.format( name=self.__class__.__name__, source_file=item['source']) self.logger.exception(message) raise IOError(message) if 'target' not in item: if item['source'].startswith(base_path): item['target'] = item['source'][len(base_path):] else: item['target'] = item['source'] timestamp = os.path.getmtime(item['source']) if newest_source < timestamp: newest_source = timestamp # Get newest package, take care of split packages all_packages = Path().file_list( path=os.path.split(os.path.abspath(package_filename))[0], extensions=os.path.splitext(package_filename)[1][1:]) newest_package = 0 for package in all_packages: base_name = os.path.splitext(os.path.split(package)[-1])[0] if base_name[-1].isdigit(): base_name = os.path.splitext(base_name)[0] if base_name == os.path.splitext( os.path.split(package_filename)[-1])[0]: timestamp = os.path.getmtime(package) if newest_package < timestamp: newest_package = timestamp if newest_package < newest_source or overwrite: if self.convert_md_to_html: # Check for markdown content new_files = [] for item in group['file_list']: if os.path.splitext(item['source'])[-1] == '.md': if not os.path.exists( os.path.splitext(item['source'])[0] + '.html' ) or (os.path.exists( os.path.splitext(item['source'])[0] + '.html') and os.path.getmtime( item['source']) > os.path.getmtime( os.path.splitext(item['source'])[0] + '.html')) or overwrite: # Convert self.convert_markdown( source_filename=item['source'], target_filename=os.path.splitext( item['source'])[0] + '.html') new_files.append({ 'source': os.path.splitext(item['source'])[0] + '.html', 'target': os.path.splitext(item['target'])[0] + '.html' }) # Add new html files to the file_list group['file_list'] += new_files # Create packages package = Package(filename=package_filename) package_filenames = package.compress( file_list=group['file_list'], size_limit=self.package_size_limit) if verbose: log.line('Saved', indent=2) for i in package_filenames: log.line('[{file}] [{size}]'.format( file=i.replace(base_path, ''), size=get_byte_string(os.path.getsize(i), show_bytes=False)), indent=4) if verbose: log.foot()