def to_string(self, ui=None, indent=0): """Get container information in a string Parameters ---------- ui : FancyStringifier or FancyHTMLStringifier Stringifier class Default value FancyStringifier indent : int Amount of indention used Default value 0 Returns ------- str """ if ui is None: ui = FancyStringifier() output = '' output += ui.class_name(self.__class__.__name__, indent=indent) + '\n' if hasattr(self, 'filename') and self.filename: output += ui.data( field='filename', value=self.filename, indent=indent) + '\n' if self._file_info is None: self.get_info() output += ui.line('Size', indent=indent) + '\n' output += ui.data(field='Uncompressed', value=get_byte_string(self._size_uncompressed), indent=indent + 2) + '\n' if self.format == FileFormat.ZIP: output += ui.data(field='Compressed', value=get_byte_string(self._size_compressed), indent=indent + 2) + '\n' output += ui.data(field='Ratio', value=self._size_compressed / float(self._size_uncompressed) * 100, unit='%', indent=indent + 2) + '\n' output += ui.line('Files', indent=indent) + '\n' output += ui.data(field='Count', value=len(self._file_info), indent=indent + 2) + '\n' return output
def remote_size_string(self): """File size of remote file in human readable form. Returns ------- str """ return get_byte_string(self.remote_bytes)
def local_size_string(self): """File size of local file in human readable form. Returns ------- str """ if self.local_bytes: return get_byte_string(self.local_bytes) else: return None
def __str__(self): ui = FancyStringifier() output = '' output += FancyStringifier().class_name(self.__class__.__name__) + '\n' if hasattr(self, 'filename') and self.filename: output += FancyStringifier().data(field='filename', value=self.filename) + '\n' if self._file_info is None: self.get_info() output += ui.line('Size', indent=2) + '\n' output += FancyStringifier().data(field='Uncompressed', value=get_byte_string( self._size_uncompressed), indent=4) + '\n' if self.format == FileFormat.ZIP: output += FancyStringifier().data(field='Compressed', value=get_byte_string( self._size_compressed), indent=4) + '\n' output += FancyStringifier().data( field='Ratio', value=self._size_compressed / float(self._size_uncompressed) * 100, unit='%', indent=4) + '\n' output += ui.line('Files', indent=2) + '\n' output += FancyStringifier().data( field='Count', value=len(self._file_info), indent=4) + '\n' return output
def pack(self, dataset_name='dcase-dataset', content=None, output_path=None, base_path=None, overwrite=False, verbose=True): """Pack dataset. Parameters ---------- dataset_name : str Dataset name Default value 'dcase-dataset' content : list of dict List of packages to be packed. Package item dict should have format {'data_name': 'doc', 'file_list': [{'source': 'file1.txt'}]}. Default value None output_path : str Path to which packages are saved. Default value None base_path : str Base path of the data. If per item package paths are not given ('target' field), this parameter is used to create one from source path. Default value None overwrite : bool Overwrite existing packages. Default value False verbose : bool Show information during the packing. Default value True Returns ------- nothing """ if verbose: log = FancyLogger() log.section_header('Packing dataset [{dataset_name}]'.format( dataset_name=dataset_name)) if base_path is not None and not base_path.endswith(os.path.sep): base_path += os.path.sep for group in content: if verbose: log.line('[{data_name}]'.format(data_name=group['data_name'])) package_filename = os.path.join( output_path, self.filename_template.format( dataset_name=dataset_name, data_name=group['data_name'], extension=self.package_extension)) newest_source = 0 for item in group['file_list']: if not os.path.exists(item['source']): message = '{name}: File not found [{source_file}].'.format( name=self.__class__.__name__, source_file=item['source']) self.logger.exception(message) raise IOError(message) if 'target' not in item: if item['source'].startswith(base_path): item['target'] = item['source'][len(base_path):] else: item['target'] = item['source'] timestamp = os.path.getmtime(item['source']) if newest_source < timestamp: newest_source = timestamp # Get newest package, take care of split packages all_packages = Path().file_list( path=os.path.split(os.path.abspath(package_filename))[0], extensions=os.path.splitext(package_filename)[1][1:]) newest_package = 0 for package in all_packages: base_name = os.path.splitext(os.path.split(package)[-1])[0] if base_name[-1].isdigit(): base_name = os.path.splitext(base_name)[0] if base_name == os.path.splitext( os.path.split(package_filename)[-1])[0]: timestamp = os.path.getmtime(package) if newest_package < timestamp: newest_package = timestamp if newest_package < newest_source or overwrite: if self.convert_md_to_html: # Check for markdown content new_files = [] for item in group['file_list']: if os.path.splitext(item['source'])[-1] == '.md': if not os.path.exists( os.path.splitext(item['source'])[0] + '.html' ) or (os.path.exists( os.path.splitext(item['source'])[0] + '.html') and os.path.getmtime( item['source']) > os.path.getmtime( os.path.splitext(item['source'])[0] + '.html')) or overwrite: # Convert self.convert_markdown( source_filename=item['source'], target_filename=os.path.splitext( item['source'])[0] + '.html') new_files.append({ 'source': os.path.splitext(item['source'])[0] + '.html', 'target': os.path.splitext(item['target'])[0] + '.html' }) # Add new html files to the file_list group['file_list'] += new_files # Create packages package = Package(filename=package_filename) package_filenames = package.compress( file_list=group['file_list'], size_limit=self.package_size_limit) if verbose: log.line('Saved', indent=2) for i in package_filenames: log.line('[{file}] [{size}]'.format( file=i.replace(base_path, ''), size=get_byte_string(os.path.getsize(i), show_bytes=False)), indent=4) if verbose: log.foot()