예제 #1
0
    def to_string(self, ui=None, indent=0):
        """Get container information in a string

        Parameters
        ----------
        ui : FancyStringifier or FancyHTMLStringifier
            Stringifier class
            Default value FancyStringifier

        indent : int
            Amount of indention used
            Default value 0

        Returns
        -------
        str

        """

        if ui is None:
            ui = FancyStringifier()

        output = ''
        output += ui.class_name(self.__class__.__name__, indent=indent) + '\n'

        if hasattr(self, 'filename') and self.filename:
            output += ui.data(
                field='filename', value=self.filename, indent=indent) + '\n'

        if self._file_info is None:
            self.get_info()

        output += ui.line('Size', indent=indent) + '\n'

        output += ui.data(field='Uncompressed',
                          value=get_byte_string(self._size_uncompressed),
                          indent=indent + 2) + '\n'

        if self.format == FileFormat.ZIP:
            output += ui.data(field='Compressed',
                              value=get_byte_string(self._size_compressed),
                              indent=indent + 2) + '\n'

            output += ui.data(field='Ratio',
                              value=self._size_compressed /
                              float(self._size_uncompressed) * 100,
                              unit='%',
                              indent=indent + 2) + '\n'

        output += ui.line('Files', indent=indent) + '\n'
        output += ui.data(field='Count',
                          value=len(self._file_info),
                          indent=indent + 2) + '\n'

        return output
예제 #2
0
    def remote_size_string(self):
        """File size of remote file in human readable form.

        Returns
        -------
        str

        """

        return get_byte_string(self.remote_bytes)
예제 #3
0
    def local_size_string(self):
        """File size of local file in human readable form.

        Returns
        -------
        str

        """
        if self.local_bytes:
            return get_byte_string(self.local_bytes)

        else:
            return None
예제 #4
0
    def __str__(self):
        ui = FancyStringifier()

        output = ''
        output += FancyStringifier().class_name(self.__class__.__name__) + '\n'

        if hasattr(self, 'filename') and self.filename:
            output += FancyStringifier().data(field='filename',
                                              value=self.filename) + '\n'

        if self._file_info is None:
            self.get_info()

        output += ui.line('Size', indent=2) + '\n'

        output += FancyStringifier().data(field='Uncompressed',
                                          value=get_byte_string(
                                              self._size_uncompressed),
                                          indent=4) + '\n'

        if self.format == FileFormat.ZIP:
            output += FancyStringifier().data(field='Compressed',
                                              value=get_byte_string(
                                                  self._size_compressed),
                                              indent=4) + '\n'

            output += FancyStringifier().data(
                field='Ratio',
                value=self._size_compressed / float(self._size_uncompressed) *
                100,
                unit='%',
                indent=4) + '\n'

        output += ui.line('Files', indent=2) + '\n'
        output += FancyStringifier().data(
            field='Count', value=len(self._file_info), indent=4) + '\n'

        return output
예제 #5
0
    def pack(self,
             dataset_name='dcase-dataset',
             content=None,
             output_path=None,
             base_path=None,
             overwrite=False,
             verbose=True):
        """Pack dataset.

        Parameters
        ----------
        dataset_name : str
            Dataset name
            Default value 'dcase-dataset'

        content : list of dict
            List of packages to be packed. Package item dict should have format {'data_name': 'doc', 'file_list': [{'source': 'file1.txt'}]}.
            Default value None

        output_path : str
            Path to which packages are saved.
            Default value None

        base_path : str
            Base path of the data. If per item package paths are not given ('target' field), this parameter is used
            to create one from source path.
            Default value None

        overwrite : bool
            Overwrite existing packages.
            Default value False

        verbose : bool
            Show information during the packing.
            Default value True

        Returns
        -------
        nothing

        """

        if verbose:
            log = FancyLogger()
            log.section_header('Packing dataset [{dataset_name}]'.format(
                dataset_name=dataset_name))

        if base_path is not None and not base_path.endswith(os.path.sep):
            base_path += os.path.sep

        for group in content:
            if verbose:
                log.line('[{data_name}]'.format(data_name=group['data_name']))

            package_filename = os.path.join(
                output_path,
                self.filename_template.format(
                    dataset_name=dataset_name,
                    data_name=group['data_name'],
                    extension=self.package_extension))

            newest_source = 0
            for item in group['file_list']:
                if not os.path.exists(item['source']):
                    message = '{name}: File not found [{source_file}].'.format(
                        name=self.__class__.__name__,
                        source_file=item['source'])

                    self.logger.exception(message)
                    raise IOError(message)

                if 'target' not in item:
                    if item['source'].startswith(base_path):
                        item['target'] = item['source'][len(base_path):]
                    else:
                        item['target'] = item['source']

                timestamp = os.path.getmtime(item['source'])
                if newest_source < timestamp:
                    newest_source = timestamp

            # Get newest package, take care of split packages
            all_packages = Path().file_list(
                path=os.path.split(os.path.abspath(package_filename))[0],
                extensions=os.path.splitext(package_filename)[1][1:])

            newest_package = 0
            for package in all_packages:
                base_name = os.path.splitext(os.path.split(package)[-1])[0]

                if base_name[-1].isdigit():
                    base_name = os.path.splitext(base_name)[0]

                if base_name == os.path.splitext(
                        os.path.split(package_filename)[-1])[0]:
                    timestamp = os.path.getmtime(package)
                    if newest_package < timestamp:
                        newest_package = timestamp

            if newest_package < newest_source or overwrite:
                if self.convert_md_to_html:
                    # Check for markdown content
                    new_files = []
                    for item in group['file_list']:
                        if os.path.splitext(item['source'])[-1] == '.md':
                            if not os.path.exists(
                                    os.path.splitext(item['source'])[0] +
                                    '.html'
                            ) or (os.path.exists(
                                    os.path.splitext(item['source'])[0] +
                                    '.html')
                                  and os.path.getmtime(
                                      item['source']) > os.path.getmtime(
                                          os.path.splitext(item['source'])[0] +
                                          '.html')) or overwrite:
                                # Convert
                                self.convert_markdown(
                                    source_filename=item['source'],
                                    target_filename=os.path.splitext(
                                        item['source'])[0] + '.html')

                                new_files.append({
                                    'source':
                                    os.path.splitext(item['source'])[0] +
                                    '.html',
                                    'target':
                                    os.path.splitext(item['target'])[0] +
                                    '.html'
                                })

                    # Add new html files to the file_list
                    group['file_list'] += new_files

                # Create packages
                package = Package(filename=package_filename)
                package_filenames = package.compress(
                    file_list=group['file_list'],
                    size_limit=self.package_size_limit)

                if verbose:
                    log.line('Saved', indent=2)

                    for i in package_filenames:
                        log.line('[{file}] [{size}]'.format(
                            file=i.replace(base_path, ''),
                            size=get_byte_string(os.path.getsize(i),
                                                 show_bytes=False)),
                                 indent=4)

        if verbose:
            log.foot()