def data_collector(item_list=None, data_processing_chain=None, meta_processing_chain=None, target_format='single_target_per_sequence', channel_dimension='channels_last', verbose=True, print_indent=2): """Data collector Collects data and meta into matrices while processing them through processing chains. Parameters ---------- item_list : list or dict Items in the data sequence. List containing multi-level dictionary with first level key 'data' and 'meta'. Second level should contain parameters for process method in the processing chain. Default value None data_processing_chain : ProcessingChain Data processing chain. Default value None meta_processing_chain : ProcessingChain Meta processing chain. Default value None channel_dimension : str Controls where channel dimension should be added. Similar to Keras data format parameter. If None given, no channel dimension is added. Possible values [None, 'channels_first', 'channels_last'] Default value None target_format : str Meta data interpretation in the relation to the data items. Default value 'single_target_per_segment' verbose : bool Print information about the data Default value True print_indent : int Default value 2 Returns ------- numpy.ndarray data numpy.ndarray meta dict data size information """ if item_list: # Collect all data and meta X = [] Y = [] for item in item_list: data = data_processing_chain.process(**item['data']) meta = meta_processing_chain.process(**item['meta']) X.append(data.data) # Collect meta if target_format == 'single_target_per_sequence': # Collect single target per sequence for i in range(0, data.shape[data.sequence_axis]): Y.append(meta.data[:, 0]) elif target_format == 'same': # Collect single target per sequence Y.append( numpy.repeat(a=meta.data, repeats=data.length, axis=1).T) data_size = {} if len(data.shape) == 2: # Stack collected data and meta correct way if data.time_axis == 0: X = numpy.vstack(X) Y = numpy.vstack(Y) else: X = numpy.hstack(X) Y = numpy.hstack(Y) # Get data item size data_size = { 'data': X.shape[data.data_axis], 'time': X.shape[data.time_axis], } elif len(data.shape) == 3: # Stack collected data and meta correct way if data.sequence_axis == 0: X = numpy.vstack(X) Y = numpy.vstack(Y) elif data.sequence_axis == 1: X = numpy.hstack(X) Y = numpy.hstack(Y) elif data.sequence_axis == 2: X = numpy.dstack(X) Y = numpy.dstack(Y) if channel_dimension: # Add channel dimension to the data if channel_dimension == 'channels_first': X = numpy.expand_dims(X, axis=1) elif channel_dimension == 'channels_last': X = numpy.expand_dims(X, axis=3) # Get data item size data_size = { 'data': X.shape[data.data_axis], 'time': X.shape[data.time_axis], 'sequence': X.shape[data.sequence_axis], } if verbose: data_shape = data.shape data_axis = { 'time_axis': data.time_axis, 'data_axis': data.data_axis } if hasattr(data, 'sequence_axis'): data_axis['sequence_axis'] = data.sequence_axis meta_shape = meta.shape meta_axis = { 'time_axis': meta.time_axis, 'data_axis': meta.data_axis } if hasattr(meta, 'sequence_axis'): meta_axis['sequence_axis'] = meta.sequence_axis logger = FancyLogger() # Data information logger.line('Data', indent=print_indent) # Matrix logger.data(field='Matrix shape', value=X.shape, indent=print_indent + 2) # Item logger.data(field='Item shape', value=data_shape, indent=print_indent + 2) logger.data(field='Time', value=data_shape[data_axis['time_axis']], indent=print_indent + 4) logger.data(field='Data', value=data_shape[data_axis['data_axis']], indent=print_indent + 4) if 'sequence_axis' in data_axis: logger.data(field='Sequence', value=data_shape[data_axis['sequence_axis']], indent=print_indent + 4) # Meta information logger.line('Meta', indent=print_indent) # Matrix logger.data(field='Matrix shape', value=Y.shape, indent=print_indent + 2) # Item logger.data(field='Item shape', value=meta_shape, indent=print_indent + 2) logger.data(field='Time', value=meta_shape[meta_axis['time_axis']], indent=print_indent + 4) logger.data(field='Data', value=meta_shape[meta_axis['data_axis']], indent=print_indent + 4) if 'sequence_axis' in meta_axis: logger.data(field='Sequence', value=meta_shape[meta_axis['sequence_axis']], indent=print_indent + 4) return X, Y, data_size
def setup_keras(seed=None, profile=None, backend='theano', device=None, BLAS_thread_count=None, BLAS_MKL_CNR=True, nvcc_fastmath=None, theano_floatX=None, theano_optimizer=None, theano_OpenMP=None, theano_deterministic=None, verbose=True): """Setup Keras and environmental variables effecting on it. Given parameters are used to override ones specified in keras.json file. Parameters ---------- seed : int, optional Randomization seed. If none given, no seed is set. profile : str, optional Profile name ['deterministic', 'cuda0_fast'], will override other parameters with profile parameters. backend : str Keras backend ['theano', 'tensorflow'] Default value 'theano' device : str, optional Device for computations ['cpu', 'cuda', 'cuda0', 'cuda1', 'opencl0:0', 'opencl0:1'] BLAS_thread_count : int Number of thread used for BLAS libraries BLAS_MKL_CNR : bool Conditional numerical reproducibility for MKL BLAS library. Use this to reproduce results with MKL. Default value True nvcc_fastmath : str, optional Control the usage of fast math library in NVCC theano_floatX : str, optional Default dtype for Theano matrix and tensor ['float64', 'float32', 'float16'] theano_optimizer : str, optional Optimizer ['fast_run', 'merge', 'fast_compile', 'None'] theano_OpenMP : bool, optional Enable or disable parallel computation on the CPU with OpenMP. theano_deterministic : bool, optional verbose : bool Print information Default value True """ def logger(): logger_instance = logging.getLogger(__name__) if not logger_instance.handlers: setup_logging() return logger_instance if profile: if profile == 'deterministic': if seed is None: message = 'You should set randomization seed to get deterministic behaviour.' logger().exception(message) raise AttributeError(message) # Parameters to help to get deterministic results device = 'cpu' BLAS_thread_count = 1 BLAS_MKL_CNR = True nvcc_fastmath = False theano_optimizer = 'None' theano_OpenMP = False theano_deterministic = True elif profile == 'cuda0_fast': device = 'cuda0' BLAS_thread_count = 8 BLAS_MKL_CNR = True nvcc_fastmath = True theano_optimizer = 'fast_run' theano_OpenMP = True theano_deterministic = True else: message = 'Invalid Keras setup profile [{profile}].'.format( profile=profile) logger().exception(message) raise AttributeError(message) # Set seed first if seed: numpy.random.seed(seed) random.seed(seed) # Check parameter validity if backend and backend not in ['theano', 'tensorflow']: message = 'Invalid Keras backend type [{backend}].'.format( backend=backend) logger().exception(message) raise AttributeError(message) if device and device not in ['cpu', 'cuda', 'cuda0', 'opencl0:0']: message = 'Invalid Keras device type [{device}].'.format(device=device) logger().exception(message) raise AttributeError(message) if theano_floatX and theano_floatX not in [ 'float64', 'float32', 'float16' ]: message = 'Invalid Keras floatX type [{floatX}].'.format( floatX=theano_floatX) logger().exception(message) raise AttributeError(message) if theano_optimizer and theano_optimizer not in [ 'fast_run', 'merge', 'fast_compile', 'None' ]: message = 'Invalid Keras optimizer type [{optimizer}].'.format( optimizer=theano_optimizer) logger().exception(message) raise AttributeError(message) ui = FancyLogger() if verbose: ui.sub_header('Keras setup') # Get BLAS library associated to numpy if numpy.__config__.blas_opt_info and 'libraries' in numpy.__config__.blas_opt_info: blas_libraries = numpy.__config__.blas_opt_info['libraries'] else: blas_libraries = [''] blas_extra_info = [] # Select Keras backend os.environ["KERAS_BACKEND"] = backend # Threading if BLAS_thread_count: os.environ['GOTO_NUM_THREADS'] = str(BLAS_thread_count) os.environ['OMP_NUM_THREADS'] = str(BLAS_thread_count) os.environ['MKL_NUM_THREADS'] = str(BLAS_thread_count) blas_extra_info.append( 'Threads[{threads}]'.format(threads=BLAS_thread_count)) if BLAS_thread_count > 1: os.environ['OMP_DYNAMIC'] = 'False' os.environ['MKL_DYNAMIC'] = 'False' else: os.environ['OMP_DYNAMIC'] = 'True' os.environ['MKL_DYNAMIC'] = 'True' # Conditional Numerical Reproducibility (CNR) for MKL BLAS library if BLAS_MKL_CNR and blas_libraries[0].startswith('mkl'): os.environ['MKL_CBWR'] = 'COMPATIBLE' blas_extra_info.append('MKL_CBWR[{mode}]'.format(mode='COMPATIBLE')) # Show BLAS info if verbose: if numpy.__config__.blas_opt_info and 'libraries' in numpy.__config__.blas_opt_info: blas_libraries = numpy.__config__.blas_opt_info['libraries'] if blas_libraries[0].startswith('openblas'): ui.data(field='BLAS library', value='OpenBLAS ({info})'.format( info=', '.join(blas_extra_info))) elif blas_libraries[0].startswith('blas'): ui.data(field='BLAS library', value='BLAS/Atlas ({info})'.format( info=', '.join(blas_extra_info))) elif blas_libraries[0].startswith('mkl'): ui.data(field='BLAS library', value='MKL ({info})'.format( info=', '.join(blas_extra_info))) # Set backend and parameters before importing keras if verbose: ui.data(field='Backend', value=backend) if backend == 'theano': # Theano setup # Default flags flags = [ # 'ldflags=', 'warn.round=False', ] # Set device if device: flags.append('device=' + device) # Set floatX if theano_floatX: flags.append('floatX=' + theano_floatX) if verbose: ui.data(field='floatX', value=theano_floatX) # Set optimizer if theano_optimizer is not None: flags.append('optimizer=' + theano_optimizer) # Set fastmath for GPU mode only if nvcc_fastmath and device != 'cpu': if nvcc_fastmath: flags.append('nvcc.fastmath=True') else: flags.append('nvcc.fastmath=False') # Set OpenMP if theano_OpenMP is not None: if theano_OpenMP: flags.append('openmp=True') else: flags.append('openmp=False') if theano_deterministic is not None: if theano_deterministic: flags.append('deterministic=more') else: flags.append('deterministic=default') if verbose: ui.line('Theano', indent=2) for item in flags: ui.data(field=item.split('=')[0], value=item.split('=')[1], indent=4) # Set environmental variable for Theano os.environ["THEANO_FLAGS"] = ','.join(flags) elif backend == 'tensorflow': flags = [] # Tensorflow setup if verbose: ui.line('Tensorflow', indent=2) # Set device if device: flags.append('device=' + device) # In case of CPU disable visible GPU. if device == 'cpu': os.environ["CUDA_VISIBLE_DEVICES"] = '' if verbose: ui.line('Tensorflow', indent=2) for item in flags: ui.data(field=item.split('=')[0], value=item.split('=')[1], indent=4) with SuppressStdoutAndStderr(): # Import keras and suppress backend announcement printed to stderr import keras if verbose: ui.foot()