Esempio n. 1
0
def data_collector(item_list=None,
                   data_processing_chain=None,
                   meta_processing_chain=None,
                   target_format='single_target_per_sequence',
                   channel_dimension='channels_last',
                   verbose=True,
                   print_indent=2):
    """Data collector

    Collects data and meta into matrices while processing them through processing chains.

    Parameters
    ----------
    item_list : list or dict
        Items in the data sequence. List containing multi-level dictionary with first level key
        'data' and 'meta'. Second level should contain parameters for process method in the processing chain.
        Default value None

    data_processing_chain : ProcessingChain
        Data processing chain.
        Default value None

    meta_processing_chain : ProcessingChain
        Meta processing chain.
        Default value None

    channel_dimension : str
        Controls where channel dimension should be added. Similar to Keras data format parameter.
        If None given, no channel dimension is added.
        Possible values [None, 'channels_first', 'channels_last']
        Default value None

    target_format : str
        Meta data interpretation in the relation to the data items.
        Default value 'single_target_per_segment'

    verbose : bool
        Print information about the data
        Default value True

    print_indent : int
        Default value 2

    Returns
    -------
    numpy.ndarray
        data

    numpy.ndarray
        meta

    dict
        data size information

    """

    if item_list:
        # Collect all data and meta
        X = []
        Y = []

        for item in item_list:
            data = data_processing_chain.process(**item['data'])
            meta = meta_processing_chain.process(**item['meta'])

            X.append(data.data)

            # Collect meta
            if target_format == 'single_target_per_sequence':
                # Collect single target per sequence
                for i in range(0, data.shape[data.sequence_axis]):
                    Y.append(meta.data[:, 0])

            elif target_format == 'same':
                # Collect single target per sequence
                Y.append(
                    numpy.repeat(a=meta.data, repeats=data.length, axis=1).T)

        data_size = {}

        if len(data.shape) == 2:
            # Stack collected data and meta correct way
            if data.time_axis == 0:
                X = numpy.vstack(X)
                Y = numpy.vstack(Y)

            else:
                X = numpy.hstack(X)
                Y = numpy.hstack(Y)

            # Get data item size
            data_size = {
                'data': X.shape[data.data_axis],
                'time': X.shape[data.time_axis],
            }

        elif len(data.shape) == 3:
            # Stack collected data and meta correct way
            if data.sequence_axis == 0:
                X = numpy.vstack(X)
                Y = numpy.vstack(Y)

            elif data.sequence_axis == 1:
                X = numpy.hstack(X)
                Y = numpy.hstack(Y)

            elif data.sequence_axis == 2:
                X = numpy.dstack(X)
                Y = numpy.dstack(Y)

            if channel_dimension:
                # Add channel dimension to the data
                if channel_dimension == 'channels_first':
                    X = numpy.expand_dims(X, axis=1)

                elif channel_dimension == 'channels_last':
                    X = numpy.expand_dims(X, axis=3)

            # Get data item size
            data_size = {
                'data': X.shape[data.data_axis],
                'time': X.shape[data.time_axis],
                'sequence': X.shape[data.sequence_axis],
            }

        if verbose:
            data_shape = data.shape
            data_axis = {
                'time_axis': data.time_axis,
                'data_axis': data.data_axis
            }

            if hasattr(data, 'sequence_axis'):
                data_axis['sequence_axis'] = data.sequence_axis

            meta_shape = meta.shape
            meta_axis = {
                'time_axis': meta.time_axis,
                'data_axis': meta.data_axis
            }

            if hasattr(meta, 'sequence_axis'):
                meta_axis['sequence_axis'] = meta.sequence_axis

            logger = FancyLogger()

            # Data information
            logger.line('Data', indent=print_indent)

            # Matrix
            logger.data(field='Matrix shape',
                        value=X.shape,
                        indent=print_indent + 2)

            # Item
            logger.data(field='Item shape',
                        value=data_shape,
                        indent=print_indent + 2)

            logger.data(field='Time',
                        value=data_shape[data_axis['time_axis']],
                        indent=print_indent + 4)

            logger.data(field='Data',
                        value=data_shape[data_axis['data_axis']],
                        indent=print_indent + 4)

            if 'sequence_axis' in data_axis:
                logger.data(field='Sequence',
                            value=data_shape[data_axis['sequence_axis']],
                            indent=print_indent + 4)

            # Meta information
            logger.line('Meta', indent=print_indent)

            # Matrix
            logger.data(field='Matrix shape',
                        value=Y.shape,
                        indent=print_indent + 2)

            # Item
            logger.data(field='Item shape',
                        value=meta_shape,
                        indent=print_indent + 2)
            logger.data(field='Time',
                        value=meta_shape[meta_axis['time_axis']],
                        indent=print_indent + 4)

            logger.data(field='Data',
                        value=meta_shape[meta_axis['data_axis']],
                        indent=print_indent + 4)

            if 'sequence_axis' in meta_axis:
                logger.data(field='Sequence',
                            value=meta_shape[meta_axis['sequence_axis']],
                            indent=print_indent + 4)

        return X, Y, data_size
Esempio n. 2
0
def setup_keras(seed=None,
                profile=None,
                backend='theano',
                device=None,
                BLAS_thread_count=None,
                BLAS_MKL_CNR=True,
                nvcc_fastmath=None,
                theano_floatX=None,
                theano_optimizer=None,
                theano_OpenMP=None,
                theano_deterministic=None,
                verbose=True):
    """Setup Keras and environmental variables effecting on it.
    Given parameters are used to override ones specified in keras.json file.

    Parameters
    ----------
    seed : int, optional
        Randomization seed. If none given, no seed is set.

    profile : str, optional
        Profile name ['deterministic', 'cuda0_fast'], will override other parameters with profile parameters.

    backend : str
        Keras backend ['theano', 'tensorflow']
        Default value 'theano'

    device : str, optional
        Device for computations ['cpu', 'cuda', 'cuda0', 'cuda1', 'opencl0:0', 'opencl0:1']

    BLAS_thread_count : int
        Number of thread used for BLAS libraries

    BLAS_MKL_CNR : bool
        Conditional numerical reproducibility for MKL BLAS library. Use this to reproduce results with MKL.
        Default value True

    nvcc_fastmath : str, optional
        Control the usage of fast math library in NVCC

    theano_floatX : str, optional
        Default dtype for Theano matrix and tensor ['float64', 'float32', 'float16']

    theano_optimizer : str, optional
        Optimizer ['fast_run', 'merge', 'fast_compile', 'None']

    theano_OpenMP : bool, optional
        Enable or disable parallel computation on the CPU with OpenMP.

    theano_deterministic : bool, optional

    verbose : bool
        Print information
        Default value True
    """
    def logger():
        logger_instance = logging.getLogger(__name__)
        if not logger_instance.handlers:
            setup_logging()
        return logger_instance

    if profile:
        if profile == 'deterministic':
            if seed is None:
                message = 'You should set randomization seed to get deterministic behaviour.'
                logger().exception(message)
                raise AttributeError(message)

            # Parameters to help to get deterministic results
            device = 'cpu'
            BLAS_thread_count = 1
            BLAS_MKL_CNR = True
            nvcc_fastmath = False
            theano_optimizer = 'None'
            theano_OpenMP = False
            theano_deterministic = True

        elif profile == 'cuda0_fast':
            device = 'cuda0'
            BLAS_thread_count = 8
            BLAS_MKL_CNR = True
            nvcc_fastmath = True
            theano_optimizer = 'fast_run'
            theano_OpenMP = True
            theano_deterministic = True

        else:
            message = 'Invalid Keras setup profile [{profile}].'.format(
                profile=profile)
            logger().exception(message)
            raise AttributeError(message)

    # Set seed first
    if seed:
        numpy.random.seed(seed)
        random.seed(seed)

    # Check parameter validity
    if backend and backend not in ['theano', 'tensorflow']:
        message = 'Invalid Keras backend type [{backend}].'.format(
            backend=backend)
        logger().exception(message)
        raise AttributeError(message)

    if device and device not in ['cpu', 'cuda', 'cuda0', 'opencl0:0']:
        message = 'Invalid Keras device type [{device}].'.format(device=device)
        logger().exception(message)
        raise AttributeError(message)

    if theano_floatX and theano_floatX not in [
            'float64', 'float32', 'float16'
    ]:
        message = 'Invalid Keras floatX type [{floatX}].'.format(
            floatX=theano_floatX)
        logger().exception(message)
        raise AttributeError(message)

    if theano_optimizer and theano_optimizer not in [
            'fast_run', 'merge', 'fast_compile', 'None'
    ]:
        message = 'Invalid Keras optimizer type [{optimizer}].'.format(
            optimizer=theano_optimizer)
        logger().exception(message)
        raise AttributeError(message)

    ui = FancyLogger()
    if verbose:
        ui.sub_header('Keras setup')

    # Get BLAS library associated to numpy
    if numpy.__config__.blas_opt_info and 'libraries' in numpy.__config__.blas_opt_info:
        blas_libraries = numpy.__config__.blas_opt_info['libraries']
    else:
        blas_libraries = ['']

    blas_extra_info = []

    # Select Keras backend
    os.environ["KERAS_BACKEND"] = backend

    # Threading
    if BLAS_thread_count:
        os.environ['GOTO_NUM_THREADS'] = str(BLAS_thread_count)
        os.environ['OMP_NUM_THREADS'] = str(BLAS_thread_count)
        os.environ['MKL_NUM_THREADS'] = str(BLAS_thread_count)
        blas_extra_info.append(
            'Threads[{threads}]'.format(threads=BLAS_thread_count))

        if BLAS_thread_count > 1:
            os.environ['OMP_DYNAMIC'] = 'False'
            os.environ['MKL_DYNAMIC'] = 'False'
        else:
            os.environ['OMP_DYNAMIC'] = 'True'
            os.environ['MKL_DYNAMIC'] = 'True'

    # Conditional Numerical Reproducibility (CNR) for MKL BLAS library
    if BLAS_MKL_CNR and blas_libraries[0].startswith('mkl'):
        os.environ['MKL_CBWR'] = 'COMPATIBLE'
        blas_extra_info.append('MKL_CBWR[{mode}]'.format(mode='COMPATIBLE'))

    # Show BLAS info
    if verbose:
        if numpy.__config__.blas_opt_info and 'libraries' in numpy.__config__.blas_opt_info:
            blas_libraries = numpy.__config__.blas_opt_info['libraries']
            if blas_libraries[0].startswith('openblas'):
                ui.data(field='BLAS library',
                        value='OpenBLAS ({info})'.format(
                            info=', '.join(blas_extra_info)))

            elif blas_libraries[0].startswith('blas'):
                ui.data(field='BLAS library',
                        value='BLAS/Atlas ({info})'.format(
                            info=', '.join(blas_extra_info)))

            elif blas_libraries[0].startswith('mkl'):
                ui.data(field='BLAS library',
                        value='MKL ({info})'.format(
                            info=', '.join(blas_extra_info)))

    # Set backend and parameters before importing keras
    if verbose:
        ui.data(field='Backend', value=backend)

    if backend == 'theano':
        # Theano setup

        # Default flags
        flags = [
            # 'ldflags=',
            'warn.round=False',
        ]

        # Set device
        if device:
            flags.append('device=' + device)

        # Set floatX
        if theano_floatX:
            flags.append('floatX=' + theano_floatX)

            if verbose:
                ui.data(field='floatX', value=theano_floatX)

        # Set optimizer
        if theano_optimizer is not None:
            flags.append('optimizer=' + theano_optimizer)

        # Set fastmath for GPU mode only
        if nvcc_fastmath and device != 'cpu':
            if nvcc_fastmath:
                flags.append('nvcc.fastmath=True')
            else:
                flags.append('nvcc.fastmath=False')

        # Set OpenMP
        if theano_OpenMP is not None:
            if theano_OpenMP:
                flags.append('openmp=True')
            else:
                flags.append('openmp=False')

        if theano_deterministic is not None:
            if theano_deterministic:
                flags.append('deterministic=more')
            else:
                flags.append('deterministic=default')

        if verbose:
            ui.line('Theano', indent=2)

            for item in flags:
                ui.data(field=item.split('=')[0],
                        value=item.split('=')[1],
                        indent=4)

        # Set environmental variable for Theano
        os.environ["THEANO_FLAGS"] = ','.join(flags)

    elif backend == 'tensorflow':
        flags = []
        # Tensorflow setup
        if verbose:
            ui.line('Tensorflow', indent=2)

        # Set device
        if device:
            flags.append('device=' + device)

            # In case of CPU disable visible GPU.
            if device == 'cpu':
                os.environ["CUDA_VISIBLE_DEVICES"] = ''

        if verbose:
            ui.line('Tensorflow', indent=2)

            for item in flags:
                ui.data(field=item.split('=')[0],
                        value=item.split('=')[1],
                        indent=4)

    with SuppressStdoutAndStderr():
        # Import keras and suppress backend announcement printed to stderr
        import keras

    if verbose:
        ui.foot()