Esempio n. 1
0
    def show(self, mode='auto', indent=0, visualize=False):
        """Print container content

        If called inside Jupyter notebook HTML formatted version is shown.

        Parameters
        ----------
        mode : str
            Output type, possible values ['auto', 'print', 'html']. 'html' will work only in Jupyter notebook
            Default value 'auto'

        indent : int
            Amount of indent
            Default value 0

        visualize : bool
            Visualize container data if class has plot method
            Default value False

        Returns
        -------
        Nothing

        """

        if mode == 'auto':
            if is_jupyter():
                mode = 'html'
            else:
                mode = 'print'

        if mode not in ['html', 'print']:
            # Unknown mode given
            message = '{name}: Unknown mode [{mode}]'.format(
                name=self.__class__.__name__, mode=mode)
            self.logger.exception(message)
            raise ValueError(message)

        if mode == 'html':
            from IPython.core.display import display, HTML
            display(HTML(self.to_html(indent=indent)))

            if visualize and hasattr(self, 'plot'):
                # If class has plot method use it to visualize the content
                self.plot()

        elif mode == 'print':
            print(self.to_string(indent=indent))
Esempio n. 2
0
    def show(self, mode='auto', indent=0):
        """Print information about the best model

        If called inside Jupyter notebook HTML formatted version is shown.

        Parameters
        ----------
        mode : str
            Output type, possible values ['auto', 'print', 'html']. 'html' will work only in Jupyter notebook
            Default value 'auto'

        indent : int
            Amount of indent
            Default value 0

        Returns
        -------
        Nothing

        """

        if mode == 'auto':
            if is_jupyter():
                mode = 'html'
            else:
                mode = 'print'

        if mode not in ['html', 'print']:
            # Unknown mode given
            message = '{name}: Unknown mode [{mode}]'.format(name=self.__class__.__name__, mode=mode)
            self.logger.exception(message)
            raise ValueError(message)

        if mode == 'html':
            from IPython.core.display import display, HTML
            display(
                HTML(
                    self.to_html(indent=indent)
                )
            )

        elif mode == 'print':
            print(self.to_string(indent=indent))
Esempio n. 3
0
    def compress(self,
                 filename=None,
                 path=None,
                 file_list=None,
                 size_limit=None):
        """Compress the package. Supports Zip and Tar packages.

        Parameters
        ----------
        filename : str
            Filename for the package. If None given, one given to class initializer is used.
            Default value None

        path : str
            Path get files if file_list is not set. Files are collected recursively.
            Default value None

        file_list : list of dict
            List of files to be included to the package.
            Item format {'source': 'file1.txt', 'target': 'folder1/file1.txt'}.
            Default value None

        size_limit : int
            Size limit in bytes.
            Default value None

        Returns
        -------
        list of str
            Filenames of created packages

        """

        if is_jupyter():
            from tqdm import tqdm_notebook as tqdm
        else:
            from tqdm import tqdm

        if filename is not None:
            self.filename = filename
            self.detect_file_format()
            self.validate_format()

        if path is not None and file_list is None:
            files = Path(path=path).file_list(recursive=True)
            file_list = []

            for filename in files:
                file_list.append({
                    'source': filename,
                    'target': os.path.relpath(filename)
                })

        package_filenames = []

        total_uncompressed_size = 0
        for item in file_list:
            total_uncompressed_size += os.path.getsize(item['source'])

        if size_limit is None or total_uncompressed_size < size_limit:
            package = None

            if self.format == FileFormat.ZIP:
                package = zipfile.ZipFile(file=self.filename, mode='w')

            elif self.format == FileFormat.TAR:
                package = tarfile.open(name=self.filename, mode='w:gz')

            package_filenames.append(self.filename)

            size_uncompressed = 0
            for item in file_list:
                if os.path.exists(item['source']):
                    if self.format == FileFormat.ZIP:
                        package.write(filename=item['source'],
                                      arcname=os.path.relpath(item['target']),
                                      compress_type=zipfile.ZIP_DEFLATED)
                        file_info = package.getinfo(
                            os.path.relpath(item['target']))
                        size_uncompressed += file_info.file_size

                    elif self.format == FileFormat.TAR:
                        package.add(name=item['source'],
                                    arcname=os.path.relpath(item['target']))
                        file_info = package.gettarinfo(name=item['source'],
                                                       arcname=os.path.relpath(
                                                           item['target']))
                        size_uncompressed += file_info.size

                else:
                    package.close()
                    message = '{name}: Non-existing file [{filename}] detected while compressing a package [{package}]'.format(
                        name=self.__class__.__name__,
                        filename=item['source'],
                        package=self.filename)
                    if self.logger:
                        self.logger.exception(message)

                    raise IOError(message)

            package.close()

        else:
            base, extension = os.path.splitext(self.filename)
            filename_template = base + '.{package_id}' + extension
            package = None

            # Initialize package
            package_id = 1

            size_uncompressed = 0
            if self.format == FileFormat.ZIP:
                package = zipfile.ZipFile(
                    file=filename_template.format(package_id=package_id),
                    mode='w')

            elif self.format == FileFormat.TAR:
                package = tarfile.open(
                    name=filename_template.format(package_id=package_id),
                    mode='w:gz')

            package_filenames.append(
                filename_template.format(package_id=package_id))

            progress = tqdm(file_list,
                            desc="{0: <25s}".format('Compress'),
                            file=sys.stdout,
                            leave=False,
                            disable=self.disable_progress_bar,
                            ascii=self.use_ascii_progress_bar)

            for item_id, item in enumerate(progress):
                if self.disable_progress_bar:
                    self.logger.info(
                        '  {title:<15s} [{item_id:d}/{total:d}] {file:<30s}'.
                        format(title='Compress ',
                               item_id=item_id,
                               total=len(progress),
                               file=item['source']))

                if os.path.exists(item['source']):
                    current_size_uncompressed = os.path.getsize(item['source'])
                    if size_uncompressed + current_size_uncompressed > size_limit:
                        # Size limit met, close current package and open a new one.
                        package.close()

                        package_id += 1
                        if self.format == FileFormat.ZIP:
                            package = zipfile.ZipFile(
                                file=filename_template.format(
                                    package_id=package_id),
                                mode='w')

                        elif self.format == FileFormat.TAR:
                            package = tarfile.open(
                                name=filename_template.format(
                                    package_id=package_id),
                                mode='w:gz')

                        package_filenames.append(
                            filename_template.format(package_id=package_id))

                        size_uncompressed = 0

                    if self.format == FileFormat.ZIP:
                        package.write(filename=item['source'],
                                      arcname=os.path.relpath(item['target']),
                                      compress_type=zipfile.ZIP_DEFLATED)

                        file_info = package.getinfo(
                            os.path.relpath(item['target']))
                        size_uncompressed += file_info.file_size

                    elif self.format == FileFormat.TAR:
                        package.add(name=item['source'],
                                    arcname=os.path.relpath(item['target']))
                        file_info = package.gettarinfo(name=item['source'],
                                                       arcname=os.path.relpath(
                                                           item['target']))
                        size_uncompressed += file_info.size

                else:
                    package.close()
                    message = '{name}: Non-existing file [{filename}] detected while compressing a package [{package}]'.format(
                        name=self.__class__.__name__,
                        filename=item['source'],
                        package=filename_template.format(
                            package_id=package_id))

                    if self.logger:
                        self.logger.exception(message)

                    raise IOError(message)

            package.close()

        return package_filenames
Esempio n. 4
0
    def extract(self,
                target_path=None,
                overwrite=False,
                omit_first_level=False):
        """Extract the package. Supports Zip and Tar packages.

        Parameters
        ----------
        target_path : str
            Path to extract the package content. If none given, package is extracted in the same path than package.
            Default value None

        overwrite : bool
            Overwrite existing files.
            Default value False

        omit_first_level : bool
            Omit first directory level.
            Default value True

        Returns
        -------
        self

        """

        if is_jupyter():
            from tqdm import tqdm_notebook as tqdm
        else:
            from tqdm import tqdm

        if target_path is None:
            target_path = os.path.split(self.filename)[0]

        Path(target_path).create()

        offset = 0

        if self.format == FileFormat.ZIP:
            with zipfile.ZipFile(self.filename, "r") as z:
                if omit_first_level:
                    parts = []
                    for name in z.namelist():
                        if not name.endswith('/'):
                            parts.append(name.split('/')[:-1])

                    prefix = os.path.commonprefix(parts) or ''

                    if prefix:
                        if len(prefix) > 1:
                            prefix_ = list()
                            prefix_.append(prefix[0])
                            prefix = prefix_

                        prefix = '/'.join(prefix) + '/'

                    offset = len(prefix)

                # Start extraction
                members = z.infolist()
                file_count = 1
                progress = tqdm(members,
                                desc="{0: <25s}".format('Extract'),
                                file=sys.stdout,
                                leave=False,
                                disable=self.disable_progress_bar,
                                ascii=self.use_ascii_progress_bar)

                for i, member in enumerate(progress):
                    if self.disable_progress_bar:
                        self.logger.info(
                            '  {title:<15s} [{item_id:d}/{total:d}] {file:<30s}'
                            .format(title='Extract ',
                                    item_id=i,
                                    total=len(progress),
                                    file=member.filename))

                    if not omit_first_level or len(member.filename) > offset:
                        if omit_first_level:
                            member.filename = member.filename[offset:]

                        progress.set_description("{0: >35s}".format(
                            member.filename.split('/')[-1]))
                        progress.update()

                        if not os.path.isfile(
                                os.path.join(target_path,
                                             member.filename)) or overwrite:
                            try:
                                if hasattr(self, 'package_password'
                                           ) and self.package_password:
                                    z.extract(member=member,
                                              path=target_path,
                                              pwd=self.package_password)

                                else:
                                    z.extract(member=member, path=target_path)

                            except KeyboardInterrupt:
                                # Delete latest file, since most likely it was not extracted fully
                                os.remove(
                                    os.path.join(target_path, member.filename))

                                # Quit
                                sys.exit()

                        file_count += 1

        elif self.format == FileFormat.TAR:
            tar = tarfile.open(self.filename, "r:gz")
            progress = tqdm(tar,
                            desc="{0: <25s}".format('Extract'),
                            file=sys.stdout,
                            leave=False,
                            disable=self.disable_progress_bar,
                            ascii=self.use_ascii_progress_bar)

            for i, tar_info in enumerate(progress):
                if self.disable_progress_bar:
                    self.logger.info(
                        '  {title:<15s} [{item_id:d}/{total:d}] {file:<30s}'.
                        format(title='Extract ',
                               item_id=i,
                               total=len(progress),
                               file=tar_info.name))

                if not os.path.isfile(os.path.join(
                        target_path, tar_info.name)) or overwrite:
                    tar.extract(tar_info, target_path)

                tar.members = []
            tar.close()

        return self
Esempio n. 5
0
def setup_keras(seed=None,
                profile=None,
                backend='theano',
                device=None,
                BLAS_thread_count=None,
                BLAS_MKL_CNR=True,
                nvcc_fastmath=None,
                theano_floatX=None,
                theano_optimizer=None,
                theano_OpenMP=None,
                theano_deterministic=None,
                verbose=True,
                print_indent=0):
    """Setup Keras and environmental variables effecting on it.
    Given parameters are used to override ones specified in keras.json file.

    Parameters
    ----------
    seed : int, optional
        Randomization seed. If none given, no seed is set.
        Default value None

    profile : str, optional
        Profile name ['deterministic', 'cuda0_fast'], will override other parameters with profile parameters.
        Default value None

    backend : str
        Keras backend ['theano', 'tensorflow']
        Default value 'theano'

    device : str, optional
        Device for computations ['cpu', 'cuda', 'cuda0', 'cuda1', 'opencl0:0', 'opencl0:1']
        Default value None

    BLAS_thread_count : int
        Number of thread used for BLAS libraries
        Default value None

    BLAS_MKL_CNR : bool
        Conditional numerical reproducibility for MKL BLAS library. Use this to reproduce results with MKL.
        Default value True

    nvcc_fastmath : str, optional
        Control the usage of fast math library in NVCC
        Default value None

    theano_floatX : str, optional
        Default dtype for Theano matrix and tensor ['float64', 'float32', 'float16']
        Default value None

    theano_optimizer : str, optional
        Optimizer ['fast_run', 'merge', 'fast_compile', 'None']
        Default value None

    theano_OpenMP : bool, optional
        Enable or disable parallel computation on the CPU with OpenMP.
        Default value None

    theano_deterministic : bool, optional
        Default value None

    verbose : bool
        Print information
        Default value True

    print_indent : int
        Print indent
        Default value 0

    """
    def logger():
        logger_instance = logging.getLogger(__name__)
        if not logger_instance.handlers:
            setup_logging()

        return logger_instance

    if profile:
        if profile == 'deterministic':
            if seed is None:
                message = 'You should set randomization seed to get deterministic behaviour.'
                logger().exception(message)
                raise AttributeError(message)

            # Parameters to help to get deterministic results
            device = 'cpu'
            BLAS_thread_count = 1
            BLAS_MKL_CNR = True
            nvcc_fastmath = False
            theano_optimizer = 'None'
            theano_OpenMP = False
            theano_deterministic = True

        elif profile == 'cuda0_fast':
            device = 'cuda0'
            BLAS_thread_count = 8
            BLAS_MKL_CNR = True
            nvcc_fastmath = True
            theano_optimizer = 'fast_run'
            theano_OpenMP = True
            theano_deterministic = True

        else:
            message = 'Invalid Keras setup profile [{profile}].'.format(
                profile=profile)
            logger().exception(message)
            raise AttributeError(message)

    # Set seed first
    if seed:
        numpy.random.seed(seed)
        random.seed(seed)

    # Check parameter validity
    if backend and backend not in ['theano', 'tensorflow']:
        message = 'Invalid Keras backend type [{backend}].'.format(
            backend=backend)
        logger().exception(message)
        raise AttributeError(message)

    if device and device not in ['cpu', 'cuda', 'cuda0', 'opencl0:0']:
        message = 'Invalid Keras device type [{device}].'.format(device=device)
        logger().exception(message)
        raise AttributeError(message)

    if theano_floatX and theano_floatX not in [
            'float64', 'float32', 'float16'
    ]:
        message = 'Invalid Keras floatX type [{floatX}].'.format(
            floatX=theano_floatX)
        logger().exception(message)
        raise AttributeError(message)

    if theano_optimizer and theano_optimizer not in [
            'fast_run', 'merge', 'fast_compile', 'None'
    ]:
        message = 'Invalid Keras optimizer type [{optimizer}].'.format(
            optimizer=theano_optimizer)
        logger().exception(message)
        raise AttributeError(message)

    if is_jupyter():
        ui = FancyHTMLPrinter()

    else:
        ui = FancyLogger()

    if verbose:
        ui.sub_header('Keras setup', indent=print_indent)

    # Get BLAS library associated to numpy
    if numpy.__config__.blas_opt_info and 'libraries' in numpy.__config__.blas_opt_info:
        blas_libraries = numpy.__config__.blas_opt_info['libraries']

    else:
        blas_libraries = ['']

    blas_extra_info = []

    # Select Keras backend
    os.environ["KERAS_BACKEND"] = backend

    # Threading
    if BLAS_thread_count:
        os.environ['GOTO_NUM_THREADS'] = str(BLAS_thread_count)
        os.environ['OMP_NUM_THREADS'] = str(BLAS_thread_count)
        os.environ['MKL_NUM_THREADS'] = str(BLAS_thread_count)
        blas_extra_info.append(
            'Threads[{threads}]'.format(threads=BLAS_thread_count))

        if BLAS_thread_count > 1:
            os.environ['OMP_DYNAMIC'] = 'False'
            os.environ['MKL_DYNAMIC'] = 'False'

        else:
            os.environ['OMP_DYNAMIC'] = 'True'
            os.environ['MKL_DYNAMIC'] = 'True'

    # Conditional Numerical Reproducibility (CNR) for MKL BLAS library
    if BLAS_MKL_CNR and blas_libraries[0].startswith('mkl'):
        os.environ['MKL_CBWR'] = 'COMPATIBLE'
        blas_extra_info.append('MKL_CBWR[{mode}]'.format(mode='COMPATIBLE'))

    # Show BLAS info
    if verbose:
        if numpy.__config__.blas_opt_info and 'libraries' in numpy.__config__.blas_opt_info:
            blas_libraries = numpy.__config__.blas_opt_info['libraries']

            if blas_libraries[0].startswith('openblas'):
                ui.data(field='BLAS library',
                        value='OpenBLAS ({info})'.format(
                            info=', '.join(blas_extra_info)),
                        indent=print_indent + 2)

            elif blas_libraries[0].startswith('blas'):
                ui.data(field='BLAS library',
                        value='BLAS/Atlas ({info})'.format(
                            info=', '.join(blas_extra_info)),
                        indent=print_indent + 2)

            elif blas_libraries[0].startswith('mkl'):
                ui.data(field='BLAS library',
                        value='MKL ({info})'.format(
                            info=', '.join(blas_extra_info)),
                        indent=print_indent + 2)

    # Set backend and parameters before importing keras
    if verbose:
        ui.data(field='Backend', value=backend, indent=print_indent + 2)

    if backend == 'theano':
        # Theano setup

        # Default flags
        flags = [
            # 'ldflags=',
            'warn.round=False',
        ]

        # Set device
        if device:
            flags.append('device=' + device)

        # Set floatX
        if theano_floatX:
            flags.append('floatX=' + theano_floatX)

            if verbose:
                ui.data(field='floatX',
                        value=theano_floatX,
                        indent=print_indent + 2)

        # Set optimizer
        if theano_optimizer is not None:
            flags.append('optimizer=' + theano_optimizer)

        # Set fastmath for GPU mode only
        if nvcc_fastmath and device != 'cpu':
            if nvcc_fastmath:
                flags.append('nvcc.fastmath=True')
            else:
                flags.append('nvcc.fastmath=False')

        # Set OpenMP
        if theano_OpenMP is not None:
            if theano_OpenMP:
                flags.append('openmp=True')

            else:
                flags.append('openmp=False')

        if theano_deterministic is not None:
            if theano_deterministic:
                flags.append('deterministic=more')

            else:
                flags.append('deterministic=default')

        if verbose:
            ui.data('Theano', '', indent=print_indent + 2)

            for item in flags:
                ui.data(field=item.split('=')[0],
                        value=item.split('=')[1],
                        indent=print_indent + 4)

        # Set environmental variable for Theano
        os.environ["THEANO_FLAGS"] = ','.join(flags)

    elif backend == 'tensorflow':
        # Tensorflow setup
        if verbose:
            ui.data('Tensorflow', '', indent=print_indent + 2)

        # In case of CPU, disable visible GPUs.
        if device == 'cpu':
            os.environ["CUDA_VISIBLE_DEVICES"] = ''

        import tensorflow as tf
        if seed:
            # Set random seed
            tf.set_random_seed(seed)

        config = tf.ConfigProto(inter_op_parallelism_threads=BLAS_thread_count)

        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
        import logging
        logging.getLogger('tensorflow').setLevel(logging.FATAL)

        with SuppressStdoutAndStderr():
            from keras import backend as k
            session = tf.Session(config=config)
            k.set_session(session)

        if verbose:
            gpu_device_found = False
            if device != 'cpu':
                from tensorflow.python.client import device_lib
                for device_candidate in device_lib.list_local_devices():
                    if device_candidate.device_type == 'GPU':
                        gpu_device_found = True

                if not gpu_device_found:
                    device = 'cpu ({original_device} was set but not found)'.format(
                        original_device=device)

            ui.data(field='Device', value=device, indent=print_indent + 4)

    with SuppressStdoutAndStderr():
        # Import keras and suppress backend announcement printed to stderr
        import keras

    if verbose:
        ui.foot(indent=print_indent)
Esempio n. 6
0
    def download(self):
        """Download remote file and save it as local file.

        Returns
        -------
        self

        """

        if is_jupyter():
            from tqdm import tqdm_notebook as tqdm
        else:
            from tqdm import tqdm

        try:
            if self.local_changed():
                try:
                    from urllib.request import urlretrieve

                except ImportError:
                    from urllib import urlretrieve

                # Set socket timeout
                socket.setdefaulttimeout(self.socket_timeout)

                def progress_hook(t):
                    """
                    Wraps tqdm instance. Don't forget to close() or __exit__()
                    the tqdm instance once you're done with it (easiest using `with` syntax).
                    """

                    last_b = [0]

                    def inner(b=1, bsize=1, tsize=None):
                        """
                        b  : int, optional
                            Number of blocks just transferred [default: 1].
                        bsize  : int, optional
                            Size of each block (in tqdm units) [default: 1].
                        tsize  : int, optional
                            Total size (in tqdm units). If [default: None] remains unchanged.
                        """
                        if tsize is not None:
                            t.total = tsize

                        t.update((b - last_b[0]) * bsize)
                        last_b[0] = b

                    return inner

                tmp_file = self.filename + '.partial_download'

                with tqdm(desc="{0: >25s}".format(os.path.splitext(self.remote_file.split('/')[-1])[0]),
                          file=sys.stdout,
                          unit='B',
                          unit_scale=True,
                          miniters=1,
                          leave=False,
                          disable=self.disable_progress_bar,
                          ascii=self.use_ascii_progress_bar) as t:

                    try:
                        local_filename, headers = urlretrieve(
                            url=self.remote_file,
                            filename=tmp_file,
                            reporthook=progress_hook(t),
                            data=None
                        )
                    except IOError:
                        # Second attempt by ignoring SSL context.
                        import ssl
                        ssl._create_default_https_context = ssl._create_unverified_context

                        local_filename, headers = urlretrieve(
                            url=self.remote_file,
                            filename=tmp_file,
                            reporthook=progress_hook(t),
                            data=None
                        )

                tmp_md5 = get_file_hash(filename=tmp_file)
                file_valid = True
                if self.remote_md5 is not None:
                    if tmp_md5 == self.remote_md5:
                        file_valid = True

                    else:
                        message = '{name}: Download failed [{filename}] [md5 mismatch]'.format(
                            name=self.__class__.__name__,
                            filename=self.remote_file,
                        )
                        self.logger.exception(message)
                        raise IOError(message)

                if file_valid:
                    self._local_md5 = tmp_md5
                    os.rename(tmp_file, self.filename)

        except Exception as e:
            message = '{name}: Download failed [{filename}] [{error_number}: {strerror}]'.format(
                name=self.__class__.__name__,
                filename=self.remote_file,
                error_number=e.errno if hasattr(e, 'errno') else '',
                strerror=e.strerror if hasattr(e, 'strerror') else '',
            )
            self.logger.exception(message)
            raise

        return self
Esempio n. 7
0
def model_summary_string(keras_model, mode='keras', show_parameters=True, display=False):
    """Model summary in a formatted string, similar to Keras model summary function.

    Parameters
    ----------
    keras_model : keras model
        Keras model

    mode : str
        Summary mode ['extended', 'keras']. In case 'keras', standard Keras summary is returned.
        Default value keras

    show_parameters : bool
        Show model parameter count and input / output shapes
        Default value True

    display : bool
        Display summary immediately, otherwise return string
        Default value False

    Returns
    -------
    str
        Model summary

    """

    if is_jupyter():
        ui = FancyHTMLStringifier()
        html_mode = True
    else:
        ui = FancyStringifier()
        html_mode = False

    output = ''
    output += ui.line('Model summary') + '\n'

    if mode == 'extended' or mode == 'extended_wide':
        layer_name_map = {
            'BatchNormalization': 'BatchNorm',
        }

        layer_type_html_tags = {
            'InputLayer': '<span class="label label-default">{0:s}</span>',
            'Dense': '<span class="label label-primary">{0:s}</span>',
            'TimeDistributed': '<span class="label label-primary">{0:s}</span>',

            'BatchNorm': '<span class="label label-default">{0:s}</span>',
            'Activation': '<span class="label label-default">{0:s}</span>',
            'Dropout': '<span class="label label-default">{0:s}</span>',

            'Flatten': '<span class="label label-success">{0:s}</span>',
            'Reshape': '<span class="label label-success">{0:s}</span>',
            'Permute': '<span class="label label-success">{0:s}</span>',

            'Conv1D': '<span class="label label-warning">{0:s}</span>',
            'Conv2D': '<span class="label label-warning">{0:s}</span>',

            'MaxPooling1D': '<span class="label label-success">{0:s}</span>',
            'MaxPooling2D': '<span class="label label-success">{0:s}</span>',
            'MaxPooling3D': '<span class="label label-success">{0:s}</span>',
            'AveragePooling1D': '<span class="label label-success">{0:s}</span>',
            'AveragePooling2D': '<span class="label label-success">{0:s}</span>',
            'AveragePooling3D': '<span class="label label-success">{0:s}</span>',
            'GlobalMaxPooling1D': '<span class="label label-success">{0:s}</span>',
            'GlobalMaxPooling2D': '<span class="label label-success">{0:s}</span>',
            'GlobalMaxPooling3D': '<span class="label label-success">{0:s}</span>',
            'GlobalAveragePooling1D': '<span class="label label-success">{0:s}</span>',
            'GlobalAveragePooling2D': '<span class="label label-success">{0:s}</span>',
            'GlobalAveragePooling3D': '<span class="label label-success">{0:s}</span>',

            'RNN': '<span class="label label-danger">{0:s}</span>',
            'SimpleRNN': '<span class="label label-danger">{0:s}</span>',
            'GRU': '<span class="label label-danger">{0:s}</span>',
            'CuDNNGRU': '<span class="label label-danger">{0:s}</span>',
            'LSTM': '<span class="label label-danger">{0:s}</span>',
            'CuDNNLSTM': '<span class="label label-danger">{0:s}</span>',
            'Bidirectional': '<span class="label label-danger">{0:s}</span>'
        }

        from tensorflow import keras
        from distutils.version import LooseVersion
        import tensorflow.keras.backend as keras_backend

        table_data = {
            'layer_type': [],
            'output': [],
            'parameter_count': [],
            'name': [],
            'connected_to': [],
            'activation': [],
            'initialization': []
        }

        row_separators = []
        prev_name = None
        for layer_id, layer in enumerate(keras_model.layers):
            connections = []
            if LooseVersion(keras.__version__) >= LooseVersion('2.1.3'):
                for node_index, node in enumerate(layer._inbound_nodes):
                    for i in range(len(node.inbound_layers)):
                        inbound_layer = node.inbound_layers[i].name
                        inbound_node_index = node.node_indices[i]
                        inbound_tensor_index = node.tensor_indices[i]
                        connections.append(
                            inbound_layer + '[' + str(inbound_node_index) + '][' + str(inbound_tensor_index) + ']'
                        )

            else:
                for node_index, node in enumerate(layer.inbound_nodes):
                    for i in range(len(node.inbound_layers)):
                        inbound_layer = node.inbound_layers[i].name
                        inbound_node_index = node.node_indices[i]
                        inbound_tensor_index = node.tensor_indices[i]
                        connections.append(
                            inbound_layer + '[' + str(inbound_node_index) + '][' + str(inbound_tensor_index) + ']'
                        )

            config = DictContainer(layer.get_config())
            layer_name = layer.__class__.__name__
            if layer_name in layer_name_map:
                layer_name = layer_name_map[layer_name]

            if html_mode and layer_name in layer_type_html_tags:
                layer_name = layer_type_html_tags[layer_name].format(layer_name)

            if config.get_path('kernel_initializer.class_name') == 'VarianceScaling':
                init = str(config.get_path('kernel_initializer.config.distribution', '---'))

            elif config.get_path('kernel_initializer.class_name') == 'RandomUniform':
                init = 'uniform'

            else:
                init = '-'

            name_parts = layer.name.split('_')
            if prev_name != name_parts[0]:
                row_separators.append(layer_id)
                prev_name = name_parts[0]

            table_data['layer_type'].append(layer_name)
            table_data['output'].append(str(layer.output_shape))
            table_data['parameter_count'].append(str(layer.count_params()))
            table_data['name'].append(layer.name)
            table_data['connected_to'].append(str(connections[0]) if len(connections) > 0 else '-')
            table_data['activation'].append(str(config.get('activation', '-')))
            table_data['initialization'].append(init)

        trainable_count = int(
            numpy.sum([keras_backend.count_params(p) for p in set(keras_model.trainable_weights)])
        )

        non_trainable_count = int(
            numpy.sum([keras_backend.count_params(p) for p in set(keras_model.non_trainable_weights)])
        )

        # Show row separators only if they are useful
        if len(row_separators) == len(keras_model.layers):
            row_separators = None
        if mode == 'extended':
            output += ui.table(
                cell_data=[table_data['name'], table_data['layer_type'], table_data['output'], table_data['parameter_count']],
                column_headers=['Layer name', 'Layer type', 'Output shape', 'Parameters'],
                column_types=['str30', 'str20', 'str25', 'str20'],
                column_separators=[1, 2],
                row_separators=row_separators,
                indent=4
            )

        elif mode == 'extended_wide':
            output += ui.table(
                cell_data=[table_data['name'], table_data['layer_type'], table_data['output'], table_data['parameter_count'],
                           table_data['activation'], table_data['initialization']],
                column_headers=['Layer name', 'Layer type', 'Output shape', 'Parameters', 'Act.', 'Init.'],
                column_types=['str30', 'str20', 'str25', 'str20', 'str15', 'str15'],
                column_separators=[1, 2, 3],
                row_separators=row_separators,
                indent=4
            )

        if show_parameters:
            output += ui.line('') + '\n'
            output += ui.line('Parameters', indent=4) + '\n'
            output += ui.data(indent=6, field='Total', value=trainable_count + non_trainable_count) + '\n'
            output += ui.data(indent=6, field='Trainable', value=trainable_count) + '\n'
            output += ui.data(indent=6, field='Non-Trainable', value=non_trainable_count) + '\n'

    else:
        output_buffer = []
        keras_model.summary(print_fn=output_buffer.append)
        for line in output_buffer:
            if is_jupyter():
                output += ui.line('<code>'+line+'</code>', indent=4) + '\n'
            else:
                output += ui.line(line, indent=4) + '\n'

    model_config = keras_model.get_config()

    if show_parameters:
        output += ui.line('') + '\n'
        output += ui.line('Input', indent=4) + '\n'
        output += ui.data(indent=6, field='Shape', value=keras_model.input_shape) + '\n'

        output += ui.line('Output', indent=4) + '\n'
        output += ui.data(indent=6, field='Shape', value=keras_model.output_shape) + '\n'

        if isinstance(model_config, dict) and 'layers' in model_config:
            output += ui.data(
                indent=6,
                field='Activation',
                value=model_config['layers'][-1]['config'].get('activation')
            ) + '\n'

        elif isinstance(model_config, list):
            output += ui.data(
                indent=6,
                field='Activation',
                value=model_config[-1].get('config', {}).get('activation')
            ) + '\n'

    if display:
        if is_jupyter():
            from IPython.core.display import display, HTML
            display(HTML(output))

        else:
            print(output)

    else:
        return output
Esempio n. 8
0
    def prepare(self):
        """Prepare dataset for the usage.

        Returns
        -------
        self

        """

        if is_jupyter():
            from tqdm import tqdm_notebook as tqdm
        else:
            from tqdm import tqdm

        # Make sure audio directory exists
        Path().makedirs(path=os.path.join(self.local_path, 'audio'))

        # Make sure evaluation_setup directory exists
        Path().makedirs(
            path=os.path.join(self.local_path, self.evaluation_setup_folder))

        if 'audio' in self.included_content_types:
            # Collect file ids
            files = []
            files += ListDictContainer(filename=os.path.join(
                self.local_path, 'testing_set.csv')).load(
                    fields=['query_id', 'segment_start', 'segment_end'])

            files += ListDictContainer(filename=os.path.join(
                self.local_path, 'training_set.csv')).load(
                    fields=['query_id', 'segment_start', 'segment_end'])

            file_progress = tqdm(files,
                                 desc="{0: <25s}".format('Files'),
                                 file=sys.stdout,
                                 leave=False,
                                 disable=self.disable_progress_bar,
                                 ascii=self.use_ascii_progress_bar)

            non_existing_videos = {}

            # Load list of already identified non-accessible videos
            item_access_log_filename = os.path.join(
                self.local_path, 'item_access_error.log.csv')
            if os.path.isfile(item_access_log_filename):
                for item in ListDictContainer(
                        filename=item_access_log_filename).load(
                            fields=['query_id', 'error']):
                    non_existing_videos[item['query_id']] = item

            # Check that audio files exists
            for file_data in file_progress:
                audio_filename = os.path.join(
                    self.local_path, 'audio',
                    'Y{query_id}_{segment_start}_{segment_end}.{extension}'.
                    format(query_id=file_data['query_id'],
                           segment_start=file_data['segment_start'],
                           segment_end=file_data['segment_end'],
                           extension=self.default_audio_extension))

                # Download segment if it does not exists
                if not os.path.isfile(audio_filename) and file_data[
                        'query_id'] not in non_existing_videos:
                    try:
                        AudioContainer().load_from_youtube(
                            query_id=file_data['query_id'],
                            start=file_data['segment_start'],
                            stop=file_data['segment_end']).save(
                                filename=audio_filename)

                    except IOError as e:
                        non_existing_videos[file_data['query_id']] = {
                            'error': str(e.message).replace('\n', ' '),
                            'query_id': file_data['query_id']
                        }

            # Save list of non-accessible videos
            ListDictContainer(list(non_existing_videos.values()),
                              filename=item_access_log_filename).save(
                                  fields=['query_id', 'error'])

        # Evaluation setup filenames
        train_filename = self.evaluation_setup_filename(setup_part='train',
                                                        fold=1,
                                                        scene_label='youtube',
                                                        file_extension='txt')

        test_filename = self.evaluation_setup_filename(setup_part='test',
                                                       fold=1,
                                                       scene_label='youtube',
                                                       file_extension='txt')

        evaluate_filename = self.evaluation_setup_filename(
            setup_part='evaluate',
            fold=1,
            scene_label='youtube',
            file_extension='txt')

        # Check that evaluation setup exists
        evaluation_setup_exists = True
        if not os.path.isfile(train_filename) or not os.path.isfile(
                test_filename) or not os.path.isfile(evaluate_filename):
            evaluation_setup_exists = False

        if not evaluation_setup_exists:
            # Evaluation setup was not found, generate one
            fold = 1

            train_meta = MetaDataContainer()
            for item in MetaDataContainer().load(
                    os.path.join(self.local_path,
                                 'groundtruth_weak_label_training_set.csv')):
                if not item.filename.endswith(self.default_audio_extension):
                    item.filename = os.path.join(
                        'audio', 'Y' + os.path.splitext(item.filename)[0] +
                        '.' + self.default_audio_extension)

                # Set scene label
                item.scene_label = 'youtube'

                # Translate event onset and offset, weak labels
                item.offset -= item.onset
                item.onset -= item.onset

                # Only collect items which exists if audio present
                if 'audio' in self.included_content_types:
                    if os.path.isfile(
                            os.path.join(self.local_path, item.filename)):
                        train_meta.append(item)
                else:
                    train_meta.append(item)

            train_meta.save(
                filename=self.evaluation_setup_filename(setup_part='train',
                                                        fold=fold,
                                                        scene_label='youtube',
                                                        file_extension='txt'))

            evaluate_meta = MetaDataContainer()
            for item in MetaDataContainer().load(
                    os.path.join(self.local_path,
                                 'groundtruth_strong_label_testing_set.csv')):
                if not item.filename.endswith(self.default_audio_extension):
                    item.filename = os.path.join(
                        'audio', 'Y' + os.path.splitext(item.filename)[0] +
                        '.' + self.default_audio_extension)
                # Set scene label
                item.scene_label = 'youtube'

                # Only collect items which exists
                if 'audio' in self.included_content_types:
                    if os.path.isfile(
                            os.path.join(self.local_path, item.filename)):
                        evaluate_meta.append(item)
                else:
                    evaluate_meta.append(item)

            evaluate_meta.save(
                filename=self.evaluation_setup_filename(setup_part='evaluate',
                                                        fold=fold,
                                                        scene_label='youtube',
                                                        file_extension='txt'))

            test_meta = MetaDataContainer()
            for item in evaluate_meta:
                test_meta.append(MetaDataItem({'filename': item.filename}))

            test_meta.save(
                filename=self.evaluation_setup_filename(setup_part='test',
                                                        fold=fold,
                                                        scene_label='youtube',
                                                        file_extension='txt'))

            # Load meta and cross validation
            self.load()

        if not self.meta_container.exists():
            fold = 1
            meta_data = MetaDataContainer()
            meta_data += MetaDataContainer().load(
                self.evaluation_setup_filename(setup_part='train',
                                               fold=fold,
                                               scene_label='youtube',
                                               file_extension='txt'))

            meta_data += MetaDataContainer().load(
                self.evaluation_setup_filename(setup_part='evaluate',
                                               fold=fold,
                                               scene_label='youtube',
                                               file_extension='txt'))
            # Save meta
            meta_data.save(filename=self.meta_file)

            # Load meta and cross validation
            self.load()

        return self