def show(self, mode='auto', indent=0, visualize=False): """Print container content If called inside Jupyter notebook HTML formatted version is shown. Parameters ---------- mode : str Output type, possible values ['auto', 'print', 'html']. 'html' will work only in Jupyter notebook Default value 'auto' indent : int Amount of indent Default value 0 visualize : bool Visualize container data if class has plot method Default value False Returns ------- Nothing """ if mode == 'auto': if is_jupyter(): mode = 'html' else: mode = 'print' if mode not in ['html', 'print']: # Unknown mode given message = '{name}: Unknown mode [{mode}]'.format( name=self.__class__.__name__, mode=mode) self.logger.exception(message) raise ValueError(message) if mode == 'html': from IPython.core.display import display, HTML display(HTML(self.to_html(indent=indent))) if visualize and hasattr(self, 'plot'): # If class has plot method use it to visualize the content self.plot() elif mode == 'print': print(self.to_string(indent=indent))
def show(self, mode='auto', indent=0): """Print information about the best model If called inside Jupyter notebook HTML formatted version is shown. Parameters ---------- mode : str Output type, possible values ['auto', 'print', 'html']. 'html' will work only in Jupyter notebook Default value 'auto' indent : int Amount of indent Default value 0 Returns ------- Nothing """ if mode == 'auto': if is_jupyter(): mode = 'html' else: mode = 'print' if mode not in ['html', 'print']: # Unknown mode given message = '{name}: Unknown mode [{mode}]'.format(name=self.__class__.__name__, mode=mode) self.logger.exception(message) raise ValueError(message) if mode == 'html': from IPython.core.display import display, HTML display( HTML( self.to_html(indent=indent) ) ) elif mode == 'print': print(self.to_string(indent=indent))
def compress(self, filename=None, path=None, file_list=None, size_limit=None): """Compress the package. Supports Zip and Tar packages. Parameters ---------- filename : str Filename for the package. If None given, one given to class initializer is used. Default value None path : str Path get files if file_list is not set. Files are collected recursively. Default value None file_list : list of dict List of files to be included to the package. Item format {'source': 'file1.txt', 'target': 'folder1/file1.txt'}. Default value None size_limit : int Size limit in bytes. Default value None Returns ------- list of str Filenames of created packages """ if is_jupyter(): from tqdm import tqdm_notebook as tqdm else: from tqdm import tqdm if filename is not None: self.filename = filename self.detect_file_format() self.validate_format() if path is not None and file_list is None: files = Path(path=path).file_list(recursive=True) file_list = [] for filename in files: file_list.append({ 'source': filename, 'target': os.path.relpath(filename) }) package_filenames = [] total_uncompressed_size = 0 for item in file_list: total_uncompressed_size += os.path.getsize(item['source']) if size_limit is None or total_uncompressed_size < size_limit: package = None if self.format == FileFormat.ZIP: package = zipfile.ZipFile(file=self.filename, mode='w') elif self.format == FileFormat.TAR: package = tarfile.open(name=self.filename, mode='w:gz') package_filenames.append(self.filename) size_uncompressed = 0 for item in file_list: if os.path.exists(item['source']): if self.format == FileFormat.ZIP: package.write(filename=item['source'], arcname=os.path.relpath(item['target']), compress_type=zipfile.ZIP_DEFLATED) file_info = package.getinfo( os.path.relpath(item['target'])) size_uncompressed += file_info.file_size elif self.format == FileFormat.TAR: package.add(name=item['source'], arcname=os.path.relpath(item['target'])) file_info = package.gettarinfo(name=item['source'], arcname=os.path.relpath( item['target'])) size_uncompressed += file_info.size else: package.close() message = '{name}: Non-existing file [{filename}] detected while compressing a package [{package}]'.format( name=self.__class__.__name__, filename=item['source'], package=self.filename) if self.logger: self.logger.exception(message) raise IOError(message) package.close() else: base, extension = os.path.splitext(self.filename) filename_template = base + '.{package_id}' + extension package = None # Initialize package package_id = 1 size_uncompressed = 0 if self.format == FileFormat.ZIP: package = zipfile.ZipFile( file=filename_template.format(package_id=package_id), mode='w') elif self.format == FileFormat.TAR: package = tarfile.open( name=filename_template.format(package_id=package_id), mode='w:gz') package_filenames.append( filename_template.format(package_id=package_id)) progress = tqdm(file_list, desc="{0: <25s}".format('Compress'), file=sys.stdout, leave=False, disable=self.disable_progress_bar, ascii=self.use_ascii_progress_bar) for item_id, item in enumerate(progress): if self.disable_progress_bar: self.logger.info( ' {title:<15s} [{item_id:d}/{total:d}] {file:<30s}'. format(title='Compress ', item_id=item_id, total=len(progress), file=item['source'])) if os.path.exists(item['source']): current_size_uncompressed = os.path.getsize(item['source']) if size_uncompressed + current_size_uncompressed > size_limit: # Size limit met, close current package and open a new one. package.close() package_id += 1 if self.format == FileFormat.ZIP: package = zipfile.ZipFile( file=filename_template.format( package_id=package_id), mode='w') elif self.format == FileFormat.TAR: package = tarfile.open( name=filename_template.format( package_id=package_id), mode='w:gz') package_filenames.append( filename_template.format(package_id=package_id)) size_uncompressed = 0 if self.format == FileFormat.ZIP: package.write(filename=item['source'], arcname=os.path.relpath(item['target']), compress_type=zipfile.ZIP_DEFLATED) file_info = package.getinfo( os.path.relpath(item['target'])) size_uncompressed += file_info.file_size elif self.format == FileFormat.TAR: package.add(name=item['source'], arcname=os.path.relpath(item['target'])) file_info = package.gettarinfo(name=item['source'], arcname=os.path.relpath( item['target'])) size_uncompressed += file_info.size else: package.close() message = '{name}: Non-existing file [{filename}] detected while compressing a package [{package}]'.format( name=self.__class__.__name__, filename=item['source'], package=filename_template.format( package_id=package_id)) if self.logger: self.logger.exception(message) raise IOError(message) package.close() return package_filenames
def extract(self, target_path=None, overwrite=False, omit_first_level=False): """Extract the package. Supports Zip and Tar packages. Parameters ---------- target_path : str Path to extract the package content. If none given, package is extracted in the same path than package. Default value None overwrite : bool Overwrite existing files. Default value False omit_first_level : bool Omit first directory level. Default value True Returns ------- self """ if is_jupyter(): from tqdm import tqdm_notebook as tqdm else: from tqdm import tqdm if target_path is None: target_path = os.path.split(self.filename)[0] Path(target_path).create() offset = 0 if self.format == FileFormat.ZIP: with zipfile.ZipFile(self.filename, "r") as z: if omit_first_level: parts = [] for name in z.namelist(): if not name.endswith('/'): parts.append(name.split('/')[:-1]) prefix = os.path.commonprefix(parts) or '' if prefix: if len(prefix) > 1: prefix_ = list() prefix_.append(prefix[0]) prefix = prefix_ prefix = '/'.join(prefix) + '/' offset = len(prefix) # Start extraction members = z.infolist() file_count = 1 progress = tqdm(members, desc="{0: <25s}".format('Extract'), file=sys.stdout, leave=False, disable=self.disable_progress_bar, ascii=self.use_ascii_progress_bar) for i, member in enumerate(progress): if self.disable_progress_bar: self.logger.info( ' {title:<15s} [{item_id:d}/{total:d}] {file:<30s}' .format(title='Extract ', item_id=i, total=len(progress), file=member.filename)) if not omit_first_level or len(member.filename) > offset: if omit_first_level: member.filename = member.filename[offset:] progress.set_description("{0: >35s}".format( member.filename.split('/')[-1])) progress.update() if not os.path.isfile( os.path.join(target_path, member.filename)) or overwrite: try: if hasattr(self, 'package_password' ) and self.package_password: z.extract(member=member, path=target_path, pwd=self.package_password) else: z.extract(member=member, path=target_path) except KeyboardInterrupt: # Delete latest file, since most likely it was not extracted fully os.remove( os.path.join(target_path, member.filename)) # Quit sys.exit() file_count += 1 elif self.format == FileFormat.TAR: tar = tarfile.open(self.filename, "r:gz") progress = tqdm(tar, desc="{0: <25s}".format('Extract'), file=sys.stdout, leave=False, disable=self.disable_progress_bar, ascii=self.use_ascii_progress_bar) for i, tar_info in enumerate(progress): if self.disable_progress_bar: self.logger.info( ' {title:<15s} [{item_id:d}/{total:d}] {file:<30s}'. format(title='Extract ', item_id=i, total=len(progress), file=tar_info.name)) if not os.path.isfile(os.path.join( target_path, tar_info.name)) or overwrite: tar.extract(tar_info, target_path) tar.members = [] tar.close() return self
def setup_keras(seed=None, profile=None, backend='theano', device=None, BLAS_thread_count=None, BLAS_MKL_CNR=True, nvcc_fastmath=None, theano_floatX=None, theano_optimizer=None, theano_OpenMP=None, theano_deterministic=None, verbose=True, print_indent=0): """Setup Keras and environmental variables effecting on it. Given parameters are used to override ones specified in keras.json file. Parameters ---------- seed : int, optional Randomization seed. If none given, no seed is set. Default value None profile : str, optional Profile name ['deterministic', 'cuda0_fast'], will override other parameters with profile parameters. Default value None backend : str Keras backend ['theano', 'tensorflow'] Default value 'theano' device : str, optional Device for computations ['cpu', 'cuda', 'cuda0', 'cuda1', 'opencl0:0', 'opencl0:1'] Default value None BLAS_thread_count : int Number of thread used for BLAS libraries Default value None BLAS_MKL_CNR : bool Conditional numerical reproducibility for MKL BLAS library. Use this to reproduce results with MKL. Default value True nvcc_fastmath : str, optional Control the usage of fast math library in NVCC Default value None theano_floatX : str, optional Default dtype for Theano matrix and tensor ['float64', 'float32', 'float16'] Default value None theano_optimizer : str, optional Optimizer ['fast_run', 'merge', 'fast_compile', 'None'] Default value None theano_OpenMP : bool, optional Enable or disable parallel computation on the CPU with OpenMP. Default value None theano_deterministic : bool, optional Default value None verbose : bool Print information Default value True print_indent : int Print indent Default value 0 """ def logger(): logger_instance = logging.getLogger(__name__) if not logger_instance.handlers: setup_logging() return logger_instance if profile: if profile == 'deterministic': if seed is None: message = 'You should set randomization seed to get deterministic behaviour.' logger().exception(message) raise AttributeError(message) # Parameters to help to get deterministic results device = 'cpu' BLAS_thread_count = 1 BLAS_MKL_CNR = True nvcc_fastmath = False theano_optimizer = 'None' theano_OpenMP = False theano_deterministic = True elif profile == 'cuda0_fast': device = 'cuda0' BLAS_thread_count = 8 BLAS_MKL_CNR = True nvcc_fastmath = True theano_optimizer = 'fast_run' theano_OpenMP = True theano_deterministic = True else: message = 'Invalid Keras setup profile [{profile}].'.format( profile=profile) logger().exception(message) raise AttributeError(message) # Set seed first if seed: numpy.random.seed(seed) random.seed(seed) # Check parameter validity if backend and backend not in ['theano', 'tensorflow']: message = 'Invalid Keras backend type [{backend}].'.format( backend=backend) logger().exception(message) raise AttributeError(message) if device and device not in ['cpu', 'cuda', 'cuda0', 'opencl0:0']: message = 'Invalid Keras device type [{device}].'.format(device=device) logger().exception(message) raise AttributeError(message) if theano_floatX and theano_floatX not in [ 'float64', 'float32', 'float16' ]: message = 'Invalid Keras floatX type [{floatX}].'.format( floatX=theano_floatX) logger().exception(message) raise AttributeError(message) if theano_optimizer and theano_optimizer not in [ 'fast_run', 'merge', 'fast_compile', 'None' ]: message = 'Invalid Keras optimizer type [{optimizer}].'.format( optimizer=theano_optimizer) logger().exception(message) raise AttributeError(message) if is_jupyter(): ui = FancyHTMLPrinter() else: ui = FancyLogger() if verbose: ui.sub_header('Keras setup', indent=print_indent) # Get BLAS library associated to numpy if numpy.__config__.blas_opt_info and 'libraries' in numpy.__config__.blas_opt_info: blas_libraries = numpy.__config__.blas_opt_info['libraries'] else: blas_libraries = [''] blas_extra_info = [] # Select Keras backend os.environ["KERAS_BACKEND"] = backend # Threading if BLAS_thread_count: os.environ['GOTO_NUM_THREADS'] = str(BLAS_thread_count) os.environ['OMP_NUM_THREADS'] = str(BLAS_thread_count) os.environ['MKL_NUM_THREADS'] = str(BLAS_thread_count) blas_extra_info.append( 'Threads[{threads}]'.format(threads=BLAS_thread_count)) if BLAS_thread_count > 1: os.environ['OMP_DYNAMIC'] = 'False' os.environ['MKL_DYNAMIC'] = 'False' else: os.environ['OMP_DYNAMIC'] = 'True' os.environ['MKL_DYNAMIC'] = 'True' # Conditional Numerical Reproducibility (CNR) for MKL BLAS library if BLAS_MKL_CNR and blas_libraries[0].startswith('mkl'): os.environ['MKL_CBWR'] = 'COMPATIBLE' blas_extra_info.append('MKL_CBWR[{mode}]'.format(mode='COMPATIBLE')) # Show BLAS info if verbose: if numpy.__config__.blas_opt_info and 'libraries' in numpy.__config__.blas_opt_info: blas_libraries = numpy.__config__.blas_opt_info['libraries'] if blas_libraries[0].startswith('openblas'): ui.data(field='BLAS library', value='OpenBLAS ({info})'.format( info=', '.join(blas_extra_info)), indent=print_indent + 2) elif blas_libraries[0].startswith('blas'): ui.data(field='BLAS library', value='BLAS/Atlas ({info})'.format( info=', '.join(blas_extra_info)), indent=print_indent + 2) elif blas_libraries[0].startswith('mkl'): ui.data(field='BLAS library', value='MKL ({info})'.format( info=', '.join(blas_extra_info)), indent=print_indent + 2) # Set backend and parameters before importing keras if verbose: ui.data(field='Backend', value=backend, indent=print_indent + 2) if backend == 'theano': # Theano setup # Default flags flags = [ # 'ldflags=', 'warn.round=False', ] # Set device if device: flags.append('device=' + device) # Set floatX if theano_floatX: flags.append('floatX=' + theano_floatX) if verbose: ui.data(field='floatX', value=theano_floatX, indent=print_indent + 2) # Set optimizer if theano_optimizer is not None: flags.append('optimizer=' + theano_optimizer) # Set fastmath for GPU mode only if nvcc_fastmath and device != 'cpu': if nvcc_fastmath: flags.append('nvcc.fastmath=True') else: flags.append('nvcc.fastmath=False') # Set OpenMP if theano_OpenMP is not None: if theano_OpenMP: flags.append('openmp=True') else: flags.append('openmp=False') if theano_deterministic is not None: if theano_deterministic: flags.append('deterministic=more') else: flags.append('deterministic=default') if verbose: ui.data('Theano', '', indent=print_indent + 2) for item in flags: ui.data(field=item.split('=')[0], value=item.split('=')[1], indent=print_indent + 4) # Set environmental variable for Theano os.environ["THEANO_FLAGS"] = ','.join(flags) elif backend == 'tensorflow': # Tensorflow setup if verbose: ui.data('Tensorflow', '', indent=print_indent + 2) # In case of CPU, disable visible GPUs. if device == 'cpu': os.environ["CUDA_VISIBLE_DEVICES"] = '' import tensorflow as tf if seed: # Set random seed tf.set_random_seed(seed) config = tf.ConfigProto(inter_op_parallelism_threads=BLAS_thread_count) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import logging logging.getLogger('tensorflow').setLevel(logging.FATAL) with SuppressStdoutAndStderr(): from keras import backend as k session = tf.Session(config=config) k.set_session(session) if verbose: gpu_device_found = False if device != 'cpu': from tensorflow.python.client import device_lib for device_candidate in device_lib.list_local_devices(): if device_candidate.device_type == 'GPU': gpu_device_found = True if not gpu_device_found: device = 'cpu ({original_device} was set but not found)'.format( original_device=device) ui.data(field='Device', value=device, indent=print_indent + 4) with SuppressStdoutAndStderr(): # Import keras and suppress backend announcement printed to stderr import keras if verbose: ui.foot(indent=print_indent)
def download(self): """Download remote file and save it as local file. Returns ------- self """ if is_jupyter(): from tqdm import tqdm_notebook as tqdm else: from tqdm import tqdm try: if self.local_changed(): try: from urllib.request import urlretrieve except ImportError: from urllib import urlretrieve # Set socket timeout socket.setdefaulttimeout(self.socket_timeout) def progress_hook(t): """ Wraps tqdm instance. Don't forget to close() or __exit__() the tqdm instance once you're done with it (easiest using `with` syntax). """ last_b = [0] def inner(b=1, bsize=1, tsize=None): """ b : int, optional Number of blocks just transferred [default: 1]. bsize : int, optional Size of each block (in tqdm units) [default: 1]. tsize : int, optional Total size (in tqdm units). If [default: None] remains unchanged. """ if tsize is not None: t.total = tsize t.update((b - last_b[0]) * bsize) last_b[0] = b return inner tmp_file = self.filename + '.partial_download' with tqdm(desc="{0: >25s}".format(os.path.splitext(self.remote_file.split('/')[-1])[0]), file=sys.stdout, unit='B', unit_scale=True, miniters=1, leave=False, disable=self.disable_progress_bar, ascii=self.use_ascii_progress_bar) as t: try: local_filename, headers = urlretrieve( url=self.remote_file, filename=tmp_file, reporthook=progress_hook(t), data=None ) except IOError: # Second attempt by ignoring SSL context. import ssl ssl._create_default_https_context = ssl._create_unverified_context local_filename, headers = urlretrieve( url=self.remote_file, filename=tmp_file, reporthook=progress_hook(t), data=None ) tmp_md5 = get_file_hash(filename=tmp_file) file_valid = True if self.remote_md5 is not None: if tmp_md5 == self.remote_md5: file_valid = True else: message = '{name}: Download failed [{filename}] [md5 mismatch]'.format( name=self.__class__.__name__, filename=self.remote_file, ) self.logger.exception(message) raise IOError(message) if file_valid: self._local_md5 = tmp_md5 os.rename(tmp_file, self.filename) except Exception as e: message = '{name}: Download failed [{filename}] [{error_number}: {strerror}]'.format( name=self.__class__.__name__, filename=self.remote_file, error_number=e.errno if hasattr(e, 'errno') else '', strerror=e.strerror if hasattr(e, 'strerror') else '', ) self.logger.exception(message) raise return self
def model_summary_string(keras_model, mode='keras', show_parameters=True, display=False): """Model summary in a formatted string, similar to Keras model summary function. Parameters ---------- keras_model : keras model Keras model mode : str Summary mode ['extended', 'keras']. In case 'keras', standard Keras summary is returned. Default value keras show_parameters : bool Show model parameter count and input / output shapes Default value True display : bool Display summary immediately, otherwise return string Default value False Returns ------- str Model summary """ if is_jupyter(): ui = FancyHTMLStringifier() html_mode = True else: ui = FancyStringifier() html_mode = False output = '' output += ui.line('Model summary') + '\n' if mode == 'extended' or mode == 'extended_wide': layer_name_map = { 'BatchNormalization': 'BatchNorm', } layer_type_html_tags = { 'InputLayer': '<span class="label label-default">{0:s}</span>', 'Dense': '<span class="label label-primary">{0:s}</span>', 'TimeDistributed': '<span class="label label-primary">{0:s}</span>', 'BatchNorm': '<span class="label label-default">{0:s}</span>', 'Activation': '<span class="label label-default">{0:s}</span>', 'Dropout': '<span class="label label-default">{0:s}</span>', 'Flatten': '<span class="label label-success">{0:s}</span>', 'Reshape': '<span class="label label-success">{0:s}</span>', 'Permute': '<span class="label label-success">{0:s}</span>', 'Conv1D': '<span class="label label-warning">{0:s}</span>', 'Conv2D': '<span class="label label-warning">{0:s}</span>', 'MaxPooling1D': '<span class="label label-success">{0:s}</span>', 'MaxPooling2D': '<span class="label label-success">{0:s}</span>', 'MaxPooling3D': '<span class="label label-success">{0:s}</span>', 'AveragePooling1D': '<span class="label label-success">{0:s}</span>', 'AveragePooling2D': '<span class="label label-success">{0:s}</span>', 'AveragePooling3D': '<span class="label label-success">{0:s}</span>', 'GlobalMaxPooling1D': '<span class="label label-success">{0:s}</span>', 'GlobalMaxPooling2D': '<span class="label label-success">{0:s}</span>', 'GlobalMaxPooling3D': '<span class="label label-success">{0:s}</span>', 'GlobalAveragePooling1D': '<span class="label label-success">{0:s}</span>', 'GlobalAveragePooling2D': '<span class="label label-success">{0:s}</span>', 'GlobalAveragePooling3D': '<span class="label label-success">{0:s}</span>', 'RNN': '<span class="label label-danger">{0:s}</span>', 'SimpleRNN': '<span class="label label-danger">{0:s}</span>', 'GRU': '<span class="label label-danger">{0:s}</span>', 'CuDNNGRU': '<span class="label label-danger">{0:s}</span>', 'LSTM': '<span class="label label-danger">{0:s}</span>', 'CuDNNLSTM': '<span class="label label-danger">{0:s}</span>', 'Bidirectional': '<span class="label label-danger">{0:s}</span>' } from tensorflow import keras from distutils.version import LooseVersion import tensorflow.keras.backend as keras_backend table_data = { 'layer_type': [], 'output': [], 'parameter_count': [], 'name': [], 'connected_to': [], 'activation': [], 'initialization': [] } row_separators = [] prev_name = None for layer_id, layer in enumerate(keras_model.layers): connections = [] if LooseVersion(keras.__version__) >= LooseVersion('2.1.3'): for node_index, node in enumerate(layer._inbound_nodes): for i in range(len(node.inbound_layers)): inbound_layer = node.inbound_layers[i].name inbound_node_index = node.node_indices[i] inbound_tensor_index = node.tensor_indices[i] connections.append( inbound_layer + '[' + str(inbound_node_index) + '][' + str(inbound_tensor_index) + ']' ) else: for node_index, node in enumerate(layer.inbound_nodes): for i in range(len(node.inbound_layers)): inbound_layer = node.inbound_layers[i].name inbound_node_index = node.node_indices[i] inbound_tensor_index = node.tensor_indices[i] connections.append( inbound_layer + '[' + str(inbound_node_index) + '][' + str(inbound_tensor_index) + ']' ) config = DictContainer(layer.get_config()) layer_name = layer.__class__.__name__ if layer_name in layer_name_map: layer_name = layer_name_map[layer_name] if html_mode and layer_name in layer_type_html_tags: layer_name = layer_type_html_tags[layer_name].format(layer_name) if config.get_path('kernel_initializer.class_name') == 'VarianceScaling': init = str(config.get_path('kernel_initializer.config.distribution', '---')) elif config.get_path('kernel_initializer.class_name') == 'RandomUniform': init = 'uniform' else: init = '-' name_parts = layer.name.split('_') if prev_name != name_parts[0]: row_separators.append(layer_id) prev_name = name_parts[0] table_data['layer_type'].append(layer_name) table_data['output'].append(str(layer.output_shape)) table_data['parameter_count'].append(str(layer.count_params())) table_data['name'].append(layer.name) table_data['connected_to'].append(str(connections[0]) if len(connections) > 0 else '-') table_data['activation'].append(str(config.get('activation', '-'))) table_data['initialization'].append(init) trainable_count = int( numpy.sum([keras_backend.count_params(p) for p in set(keras_model.trainable_weights)]) ) non_trainable_count = int( numpy.sum([keras_backend.count_params(p) for p in set(keras_model.non_trainable_weights)]) ) # Show row separators only if they are useful if len(row_separators) == len(keras_model.layers): row_separators = None if mode == 'extended': output += ui.table( cell_data=[table_data['name'], table_data['layer_type'], table_data['output'], table_data['parameter_count']], column_headers=['Layer name', 'Layer type', 'Output shape', 'Parameters'], column_types=['str30', 'str20', 'str25', 'str20'], column_separators=[1, 2], row_separators=row_separators, indent=4 ) elif mode == 'extended_wide': output += ui.table( cell_data=[table_data['name'], table_data['layer_type'], table_data['output'], table_data['parameter_count'], table_data['activation'], table_data['initialization']], column_headers=['Layer name', 'Layer type', 'Output shape', 'Parameters', 'Act.', 'Init.'], column_types=['str30', 'str20', 'str25', 'str20', 'str15', 'str15'], column_separators=[1, 2, 3], row_separators=row_separators, indent=4 ) if show_parameters: output += ui.line('') + '\n' output += ui.line('Parameters', indent=4) + '\n' output += ui.data(indent=6, field='Total', value=trainable_count + non_trainable_count) + '\n' output += ui.data(indent=6, field='Trainable', value=trainable_count) + '\n' output += ui.data(indent=6, field='Non-Trainable', value=non_trainable_count) + '\n' else: output_buffer = [] keras_model.summary(print_fn=output_buffer.append) for line in output_buffer: if is_jupyter(): output += ui.line('<code>'+line+'</code>', indent=4) + '\n' else: output += ui.line(line, indent=4) + '\n' model_config = keras_model.get_config() if show_parameters: output += ui.line('') + '\n' output += ui.line('Input', indent=4) + '\n' output += ui.data(indent=6, field='Shape', value=keras_model.input_shape) + '\n' output += ui.line('Output', indent=4) + '\n' output += ui.data(indent=6, field='Shape', value=keras_model.output_shape) + '\n' if isinstance(model_config, dict) and 'layers' in model_config: output += ui.data( indent=6, field='Activation', value=model_config['layers'][-1]['config'].get('activation') ) + '\n' elif isinstance(model_config, list): output += ui.data( indent=6, field='Activation', value=model_config[-1].get('config', {}).get('activation') ) + '\n' if display: if is_jupyter(): from IPython.core.display import display, HTML display(HTML(output)) else: print(output) else: return output
def prepare(self): """Prepare dataset for the usage. Returns ------- self """ if is_jupyter(): from tqdm import tqdm_notebook as tqdm else: from tqdm import tqdm # Make sure audio directory exists Path().makedirs(path=os.path.join(self.local_path, 'audio')) # Make sure evaluation_setup directory exists Path().makedirs( path=os.path.join(self.local_path, self.evaluation_setup_folder)) if 'audio' in self.included_content_types: # Collect file ids files = [] files += ListDictContainer(filename=os.path.join( self.local_path, 'testing_set.csv')).load( fields=['query_id', 'segment_start', 'segment_end']) files += ListDictContainer(filename=os.path.join( self.local_path, 'training_set.csv')).load( fields=['query_id', 'segment_start', 'segment_end']) file_progress = tqdm(files, desc="{0: <25s}".format('Files'), file=sys.stdout, leave=False, disable=self.disable_progress_bar, ascii=self.use_ascii_progress_bar) non_existing_videos = {} # Load list of already identified non-accessible videos item_access_log_filename = os.path.join( self.local_path, 'item_access_error.log.csv') if os.path.isfile(item_access_log_filename): for item in ListDictContainer( filename=item_access_log_filename).load( fields=['query_id', 'error']): non_existing_videos[item['query_id']] = item # Check that audio files exists for file_data in file_progress: audio_filename = os.path.join( self.local_path, 'audio', 'Y{query_id}_{segment_start}_{segment_end}.{extension}'. format(query_id=file_data['query_id'], segment_start=file_data['segment_start'], segment_end=file_data['segment_end'], extension=self.default_audio_extension)) # Download segment if it does not exists if not os.path.isfile(audio_filename) and file_data[ 'query_id'] not in non_existing_videos: try: AudioContainer().load_from_youtube( query_id=file_data['query_id'], start=file_data['segment_start'], stop=file_data['segment_end']).save( filename=audio_filename) except IOError as e: non_existing_videos[file_data['query_id']] = { 'error': str(e.message).replace('\n', ' '), 'query_id': file_data['query_id'] } # Save list of non-accessible videos ListDictContainer(list(non_existing_videos.values()), filename=item_access_log_filename).save( fields=['query_id', 'error']) # Evaluation setup filenames train_filename = self.evaluation_setup_filename(setup_part='train', fold=1, scene_label='youtube', file_extension='txt') test_filename = self.evaluation_setup_filename(setup_part='test', fold=1, scene_label='youtube', file_extension='txt') evaluate_filename = self.evaluation_setup_filename( setup_part='evaluate', fold=1, scene_label='youtube', file_extension='txt') # Check that evaluation setup exists evaluation_setup_exists = True if not os.path.isfile(train_filename) or not os.path.isfile( test_filename) or not os.path.isfile(evaluate_filename): evaluation_setup_exists = False if not evaluation_setup_exists: # Evaluation setup was not found, generate one fold = 1 train_meta = MetaDataContainer() for item in MetaDataContainer().load( os.path.join(self.local_path, 'groundtruth_weak_label_training_set.csv')): if not item.filename.endswith(self.default_audio_extension): item.filename = os.path.join( 'audio', 'Y' + os.path.splitext(item.filename)[0] + '.' + self.default_audio_extension) # Set scene label item.scene_label = 'youtube' # Translate event onset and offset, weak labels item.offset -= item.onset item.onset -= item.onset # Only collect items which exists if audio present if 'audio' in self.included_content_types: if os.path.isfile( os.path.join(self.local_path, item.filename)): train_meta.append(item) else: train_meta.append(item) train_meta.save( filename=self.evaluation_setup_filename(setup_part='train', fold=fold, scene_label='youtube', file_extension='txt')) evaluate_meta = MetaDataContainer() for item in MetaDataContainer().load( os.path.join(self.local_path, 'groundtruth_strong_label_testing_set.csv')): if not item.filename.endswith(self.default_audio_extension): item.filename = os.path.join( 'audio', 'Y' + os.path.splitext(item.filename)[0] + '.' + self.default_audio_extension) # Set scene label item.scene_label = 'youtube' # Only collect items which exists if 'audio' in self.included_content_types: if os.path.isfile( os.path.join(self.local_path, item.filename)): evaluate_meta.append(item) else: evaluate_meta.append(item) evaluate_meta.save( filename=self.evaluation_setup_filename(setup_part='evaluate', fold=fold, scene_label='youtube', file_extension='txt')) test_meta = MetaDataContainer() for item in evaluate_meta: test_meta.append(MetaDataItem({'filename': item.filename})) test_meta.save( filename=self.evaluation_setup_filename(setup_part='test', fold=fold, scene_label='youtube', file_extension='txt')) # Load meta and cross validation self.load() if not self.meta_container.exists(): fold = 1 meta_data = MetaDataContainer() meta_data += MetaDataContainer().load( self.evaluation_setup_filename(setup_part='train', fold=fold, scene_label='youtube', file_extension='txt')) meta_data += MetaDataContainer().load( self.evaluation_setup_filename(setup_part='evaluate', fold=fold, scene_label='youtube', file_extension='txt')) # Save meta meta_data.save(filename=self.meta_file) # Load meta and cross validation self.load() return self