def scan_dir_multiprocessing( dir_in, dir_out, jobs, frame_slice: typing.Union[Slice.Slice, Slice.Sample] ) -> typing.Dict[str, HTMLResult]: """Multiprocessing code to plot log passes. Returns a dict of {path_in : HTMLResult, ...}""" assert os.path.isdir(dir_in) if jobs < 1: jobs = multiprocessing.cpu_count() logging.info( 'scan_dir_multiprocessing(): Setting multi-processing jobs to %d' % jobs) pool = multiprocessing.Pool(processes=jobs) tasks = [(t.filePathIn, t.filePathOut, False, frame_slice) for t in DirWalk.dirWalk( dir_in, dir_out, theFnMatch='', recursive=True, bigFirst=True) ] # print('tasks:') # pprint.pprint(tasks, width=200) # return {} results = [ r.get() for r in [pool.apply_async(scan_a_single_file, t) for t in tasks] ] _write_indexes(dir_out, {r.path_output: r for r in results}) return {r.path_input: r for r in results}
def procLISPathMP(dIn, dOut, fnMatch, recursive, keepGoing, jobs, fileFn, resultObj=None): """Multiprocessing code to process LIS files. dIn, dOut are directories. fnMatch is a glob string. recursive is a boolean to control recursion. keepGoing is passed to fileFn fileFn is the operational function that will take a tuple of: (fIn, fOut, keepGoing) and return a result that can be added to the resultObj or None. This should not raise. resultObj is accumulation of the results of fileFn or None, this it returned.""" if jobs < 1: jobs = multiprocessing.cpu_count() logging.info('procLISPathMP(): Setting multi-processing jobs to %d' % jobs) myPool = multiprocessing.Pool(processes=jobs) myTaskS = [(t.filePathIn, t.filePathOut, keepGoing) for t in DirWalk.dirWalk(dIn, dOut, fnMatch, recursive)] #print('myTaskS', myTaskS) myResults = [ r.get() for r in [myPool.apply_async(fileFn, t) for t in myTaskS] ] for r in myResults: if r is not None and resultObj is not None: resultObj += r return resultObj
def index_dir_or_file(path_in: str, path_out: str, recurse: bool, read_back: bool, validate: bool) -> typing.Dict[str, IndexResult]: """Index a directory or file and return the results.""" logging.info( f'index_dir_or_file(): "{path_in}" to "{path_out}" recurse: {recurse}') ret = {} if os.path.isdir(path_in): for file_in_out in DirWalk.dirWalk(path_in, path_out, theFnMatch='', recursive=recurse, bigFirst=False): file_type = bin_file_type.binary_file_type_from_path( file_in_out.filePathIn) if file_type == 'RP66V1': ret[file_in_out.filePathIn] = index_a_single_file( file_in_out.filePathIn, file_in_out.filePathOut, read_back, validate) else: file_type = bin_file_type.binary_file_type_from_path(path_in) if file_type == 'RP66V1': ret[path_in] = index_a_single_file(path_in, path_out, read_back, validate) return ret
def convert_rp66v1_dir_or_file_to_las_multiprocessing( dir_in: str, dir_out: str, recurse: bool, array_reduction: str, frame_slice: Slice.Slice, channels: typing.Set[str], field_width: int, float_format: str, jobs: int ) -> typing.Dict[str, LASWriteResult]: """Multiprocessing code to LAS. Returns a dict of {path_in : LASWriteResult, ...}""" assert os.path.isdir(dir_in) if jobs < 1: jobs = multiprocessing.cpu_count() logging.info('scan_dir_multiprocessing(): Setting multi-processing jobs to %d' % jobs) pool = multiprocessing.Pool(processes=jobs) tasks = [ (t.filePathIn, array_reduction, t.filePathOut, frame_slice, channels, field_width, float_format) for t in DirWalk.dirWalk( dir_in, dir_out, theFnMatch='', recursive=recurse, bigFirst=True ) ] # print('tasks:') # pprint.pprint(tasks, width=200) # return {} results = [ r.get() for r in [ pool.apply_async(single_rp66v1_file_to_las, t) for t in tasks ] ] return {r.path_input: r for r in results}
def test_04(self): """TestDirWalk.test_04(): Input and output, *.py and recursion.""" # print() for v in DirWalk.dirWalk('.', theOut='spam', theFnMatch='*.py', recursive=True): pass
def test_10(self): """TestDirWalk.test_10(): Fails if input does not exist.""" try: for v in DirWalk.dirWalk('no_existent'): pass self.fail('DirWalk.ExceptionDirWalk not raised.') except DirWalk.ExceptionDirWalk: pass
def test_02(self): """TestDirWalk.test_02(): Input and output, no globbing or recursion.""" # print() for v in DirWalk.dirWalk('.', theOut='spam', theFnMatch=None, recursive=False): pass
def test_06(self): """TestDirWalk.test_06(): Input only, *.py, recursion and biggest first.""" # print() for v in DirWalk.dirWalk('.', theOut='', theFnMatch='*.py', recursive=True, bigFirst=True): pass
def test_05(self): """TestDirWalk.test_05(): Input and output, *.py, recursion and biggest first.""" print() for v in DirWalk.dirWalk('.', theOut='spam', theFnMatch='*.py', recursive=True, bigFirst=True): print('{:8d}: {:s}'.format(os.path.getsize(v.filePathIn), v))
def test_06(self): """TestDirWalk.test_06(): Input only, *.py, recursion and biggest first.""" print() for v in DirWalk.dirWalk('.', theOut=None, theFnMatch='*.py', recursive=True, bigFirst=True): print('{:8d}: {:s}'.format(os.path.getsize(v), v))
def scan_dir_or_file( path_in: str, path_out: str, recursive: bool, label_process: bool, frame_slice: typing.Union[Slice.Slice, Slice.Sample] ) -> typing.Dict[str, HTMLResult]: """Scans a directory or file putting the results in path_out. Returns a dict of {path_in : HTMLResult, ...} """ # Required as we are going to split them by os.sep # NOTE: normpath removes trailing os.sep which is what we want. path_in = os.path.normpath(path_in) path_out = os.path.normpath(path_out) logging.info( f'scan_dir_or_file(): "{path_in}" to "{path_out}" recurse: {recursive}' ) ret: typing.Dict[str, HTMLResult] = {} # Output file path to FileResult if os.path.isdir(path_in): index_map_global: typing.Dict[str, HTMLResult] = {} if not recursive: for file_in_out in DirWalk.dirWalk(path_in, path_out, theFnMatch='', recursive=recursive, bigFirst=False): result = scan_a_single_file(file_in_out.filePathIn, file_in_out.filePathOut, label_process, frame_slice) ret[file_in_out.filePathIn] = result if not result.exception and not result.ignored: index_map_global[result.path_output] = result if label_process: process.add_message_to_queue('Writing Indexes.') _write_indexes(path_out, index_map_global) else: len_path_in = len(path_in.split(os.sep)) for root, dirs, files in os.walk(path_in, topdown=False): root_rel_to_path_in = root.split(os.sep)[len_path_in:] dir_out = os.path.join(path_out, *root_rel_to_path_in) for file in files: file_path_in = os.path.join(root, file) # Respect sub-directories in root # root_rel_to_path_in.append(file) file_path_out = os.path.join(dir_out, file) result = scan_a_single_file(file_path_in, file_path_out, label_process, frame_slice) ret[file_path_in] = result if not result.exception and not result.ignored: index_map_global[result.path_output] = result if label_process: process.add_message_to_queue('Writing Indexes.') _write_indexes(path_out, index_map_global) else: ret[path_in] = scan_a_single_file(path_in, path_out, label_process, frame_slice) return ret
def procLISPathSP(dIn, dOut, fnMatch, recursive, keepGoing, fileFn, resultObj=None): for fpIn, fpOut in DirWalk.dirWalk(dIn, dOut, fnMatch, recursive): result = fileFn(fpIn, fpOut, keepGoing) if result is not None and resultObj is not None: resultObj += result return resultObj
def index_dir_or_file(path_in: str, path_out: str, recurse: bool, private: bool) -> typing.Dict[str, IndexResult]: logging.info( f'index_dir_or_file(): "{path_in}" to "{path_out}" recurse: {recurse}') ret = {} if os.path.isdir(path_in): for file_in_out in DirWalk.dirWalk(path_in, path_out, theFnMatch='', recursive=recurse, bigFirst=False): # print(file_in_out) ret[file_in_out.filePathIn] = index_a_single_file( file_in_out.filePathIn, file_in_out.filePathOut, private) else: ret[path_in] = index_a_single_file(path_in, path_out, private) return ret
def index_dir_multiprocessing(dir_in: str, dir_out: str, private: bool, jobs: int) -> typing.Dict[str, IndexResult]: """Multiprocessing code to index in XML. Returns a dict of {path_in : IndexResult, ...}""" if jobs < 1: jobs = multiprocessing.cpu_count() logging.info( 'scan_dir_multiprocessing(): Setting multi-processing jobs to %d' % jobs) pool = multiprocessing.Pool(processes=jobs) tasks = [(t.filePathIn, t.filePathOut, private) for t in DirWalk.dirWalk( dir_in, dir_out, theFnMatch='', recursive=True, bigFirst=True)] # print('tasks:') # pprint.pprint(tasks, width=200) # return {} results = [ r.get() for r in [pool.apply_async(index_a_single_file, t) for t in tasks] ] return {r.path_input: r for r in results}
def plotLogPassesMP(dIn, dOut, opts): """Multiprocessing code to plot log passes. Returns a PlotLogInfo object.""" if opts.jobs < 1: jobs = multiprocessing.cpu_count() else: jobs = opts.jobs logging.info('plotLogPassesMP(): Setting multi-processing jobs to %d' % jobs) myPool = multiprocessing.Pool(processes=jobs) myTaskS = [ (t.filePathIn, t.filePathOut, opts) \ for t in DirWalk.dirWalk(dIn, dOut, opts.glob, opts.recursive, bigFirst=True) ] retResult = PlotLogInfo() myResults = [ r.get() for r in [myPool.apply_async(processFile, t) for t in myTaskS] ] for r in myResults: # r is a PlotLogInfo object retResult += r return retResult
def plotLogPassesMP(dIn, dOut, fnMatch, recursive, keepGoing, lgFormatS, apiHeader, jobs): """Multiprocessing code to plot log passes. Returns a PlotLogInfo object.""" if jobs < 1: jobs = multiprocessing.cpu_count() logging.info('plotLogPassesMP(): Setting multi-processing jobs to %d' % jobs) myPool = multiprocessing.Pool(processes=jobs) myTaskS = [ (t.filePathIn, t.filePathOut, keepGoing, lgFormatS, apiHeader) \ for t in DirWalk.dirWalk(dIn, dOut, fnMatch, recursive, bigFirst=True) ] retResult = PlotLogInfo() #print('myTaskS', myTaskS) myResults = [ r.get() for r in [myPool.apply_async(processFile, t) for t in myTaskS] ] for r in myResults: # r is a PlotLogInfo object retResult += r return retResult
def index_dir_multiprocessing(dir_in: str, dir_out: str, jobs: int, recurse: bool, read_back: bool, validate: bool) -> typing.Dict[str, IndexResult]: """Multiprocessing code to plot log passes. Returns a dict of {path_in : IndexResult, ...}""" assert os.path.isdir(dir_in) if jobs < 1: jobs = multiprocessing.cpu_count() logging.info( 'scan_dir_multiprocessing(): Setting multi-processing jobs to %d' % jobs) pool = multiprocessing.Pool(processes=jobs) tasks = [ (t.filePathIn, t.filePathOut, read_back, validate) for t in DirWalk.dirWalk( dir_in, dir_out, theFnMatch='', recursive=recurse, bigFirst=True) ] results = [ r.get() for r in [pool.apply_async(index_a_single_file, t) for t in tasks] ] return {r.path_in: r for r in results}
def copy_tree(path_from: str, path_to: str, recurse: bool, file_types: typing.List[str], nervous: bool, over_write: bool) -> typing.Tuple[typing.Dict[str, int], int]: """Copies particular binary file types from one directory structure to another.""" def _log_message(msg: str) -> None: if nervous: logger.info(f'Would {msg}') else: logger.info(f'{msg}') if not os.path.isdir(path_from): raise ValueError(f'Path {path_from} is not a directory.') if not os.path.isdir(path_to): _log_message(f'Create directory {path_to}') if not nervous: os.makedirs(path_to) common_prefix = os.path.commonpath([path_from, path_to]) common_prefix = common_prefix[:1 + common_prefix.rfind(os.sep)] logger.info(f'copy_tree(): common prefix: {common_prefix}') file_type_count: typing.Dict[str, int] = {} byte_count = 0 for file_in_out in DirWalk.dirWalk(path_from, path_to, '', recurse): fod = FileOnDisc(file_in_out.filePathIn) if len(file_types) == 0 or fod.bin_type in file_types: _log_message( f'Copy {file_in_out.filePathIn[len(common_prefix):]} to {file_in_out.filePathOut[len(common_prefix):]}' ) if not nervous: os.makedirs(os.path.dirname(file_in_out.filePathOut), exist_ok=True) if os.path.isfile(file_in_out.filePathOut) and not over_write: logger.warning(f'Not over writing file at {file_in_out.filePathOut}') else: shutil.copyfile(file_in_out.filePathIn, file_in_out.filePathOut) byte_count += os.path.getsize(file_in_out.filePathOut) try: file_type_count[fod.bin_type] += 1 except KeyError: file_type_count[fod.bin_type] = 1 return file_type_count, byte_count
def test_01(self): """TestDirWalk.test_01(): Input only, defaults.""" print() for v in DirWalk.dirWalk('.'): print(v)
def test_01(self): """TestGenBigFirst.test_01(): Input only, defaults.""" print() for v in DirWalk.genBigFirst('.'): print('{:8d}: {:s}'.format(os.path.getsize(v), v))
def copy_files(path_in: str, path_out: str, binary_file_types: typing.Set[str], move: bool, nervous: bool) -> typing.List[str]: """ Copies binary files from path_in to path_out. If move is True the file is moved, if False the file is copied. Returns a list of destination paths. """ logger.debug(f'copy_files(): "{path_in}" to "{path_out}" ') ret = [] for file_in_out in DirWalk.dirWalk(path_in, path_out, theFnMatch='', recursive=True, bigFirst=False): bin_type = bin_file_type.binary_file_type_from_path( file_in_out.filePathIn) if len(binary_file_types) == 0 or bin_type in binary_file_types: if nervous: print( f'copy_files(): Would create destination directory at {file_in_out.filePathOut}' ) if move: print( f'copy_files(): Would move "{file_in_out.filePathIn}" to "{file_in_out.filePathOut}" ' ) else: print( f'copy_files(): Would copy "{file_in_out.filePathIn}" to "{file_in_out.filePathOut}" ' ) else: # For real. logger.debug( f'copy_files(): Creating destination directory at {file_in_out.filePathOut}' ) os.makedirs(os.path.dirname(file_in_out.filePathOut), exist_ok=True) if move: logger.info( f'copy_files(): Moving "{file_in_out.filePathIn}" to "{file_in_out.filePathOut}" ' ) # move() uses copy2() function by default ret.append( shutil.move(file_in_out.filePathIn, file_in_out.filePathOut)) else: logger.info( f'copy_files(): Copying "{file_in_out.filePathIn}" to "{file_in_out.filePathOut}" ' ) # copy2 attempts to preserve metadata ret.append( shutil.copy2(file_in_out.filePathIn, file_in_out.filePathOut)) elif zipfile.is_zipfile(file_in_out.filePathIn): zip_out_path = os.path.splitext(file_in_out.filePathOut)[0] # ret.extend(_analyse_zip_archive(file_in_out.filePathIn, zip_out_path, binary_file_types, nervous)) logger.debug( f'_analyse_zip_archive(): At "{file_in_out.filePathIn}" path_out: "{zip_out_path}"' ) with open(file_in_out.filePathIn, 'rb') as zip_instream: try: return _recurse_copy_zip_archive(zip_instream, zip_out_path, binary_file_types, nervous) except Exception: logger.exception('_recurse_copy_zip_archive() FAILED') else: logger.debug( f'copy_files(): Ignoring type "{bin_type}" at "{file_in_out.filePathOut}"' ) return ret
def test_01(self): """TestGenBigFirst.test_01(): Input only, defaults.""" # print() for v in DirWalk.gen_big_first('.'): pass
def test_03(self): """TestDirWalk.test_03(): Input only, *.py and recursion.""" # print() for v in DirWalk.dirWalk('.', theFnMatch='*.py', recursive=True): pass