Example #1
0
def scan_dir_multiprocessing(
    dir_in, dir_out, jobs, frame_slice: typing.Union[Slice.Slice, Slice.Sample]
) -> typing.Dict[str, HTMLResult]:
    """Multiprocessing code to plot log passes.
    Returns a dict of {path_in : HTMLResult, ...}"""
    assert os.path.isdir(dir_in)
    if jobs < 1:
        jobs = multiprocessing.cpu_count()
    logging.info(
        'scan_dir_multiprocessing(): Setting multi-processing jobs to %d' %
        jobs)
    pool = multiprocessing.Pool(processes=jobs)
    tasks = [(t.filePathIn, t.filePathOut, False, frame_slice)
             for t in DirWalk.dirWalk(
                 dir_in, dir_out, theFnMatch='', recursive=True, bigFirst=True)
             ]
    # print('tasks:')
    # pprint.pprint(tasks, width=200)
    # return {}
    results = [
        r.get()
        for r in [pool.apply_async(scan_a_single_file, t) for t in tasks]
    ]
    _write_indexes(dir_out, {r.path_output: r for r in results})
    return {r.path_input: r for r in results}
Example #2
0
def procLISPathMP(dIn,
                  dOut,
                  fnMatch,
                  recursive,
                  keepGoing,
                  jobs,
                  fileFn,
                  resultObj=None):
    """Multiprocessing code to process LIS files.
    dIn, dOut are directories.
    fnMatch is a glob string.
    recursive is a boolean to control recursion.
    keepGoing is passed to fileFn
    fileFn is the operational function that will take a tuple of:
        (fIn, fOut, keepGoing) and return a result that can be added to
        the resultObj or None.
        This should not raise.
    resultObj is accumulation of the results of fileFn or None, this it returned."""
    if jobs < 1:
        jobs = multiprocessing.cpu_count()
    logging.info('procLISPathMP(): Setting multi-processing jobs to %d' % jobs)
    myPool = multiprocessing.Pool(processes=jobs)
    myTaskS = [(t.filePathIn, t.filePathOut, keepGoing)
               for t in DirWalk.dirWalk(dIn, dOut, fnMatch, recursive)]
    #print('myTaskS', myTaskS)
    myResults = [
        r.get() for r in [myPool.apply_async(fileFn, t) for t in myTaskS]
    ]
    for r in myResults:
        if r is not None and resultObj is not None:
            resultObj += r
    return resultObj
Example #3
0
def index_dir_or_file(path_in: str, path_out: str, recurse: bool,
                      read_back: bool,
                      validate: bool) -> typing.Dict[str, IndexResult]:
    """Index a directory or file and return the results."""
    logging.info(
        f'index_dir_or_file(): "{path_in}" to "{path_out}" recurse: {recurse}')
    ret = {}
    if os.path.isdir(path_in):
        for file_in_out in DirWalk.dirWalk(path_in,
                                           path_out,
                                           theFnMatch='',
                                           recursive=recurse,
                                           bigFirst=False):
            file_type = bin_file_type.binary_file_type_from_path(
                file_in_out.filePathIn)
            if file_type == 'RP66V1':
                ret[file_in_out.filePathIn] = index_a_single_file(
                    file_in_out.filePathIn, file_in_out.filePathOut, read_back,
                    validate)
    else:
        file_type = bin_file_type.binary_file_type_from_path(path_in)
        if file_type == 'RP66V1':
            ret[path_in] = index_a_single_file(path_in, path_out, read_back,
                                               validate)
    return ret
Example #4
0
def convert_rp66v1_dir_or_file_to_las_multiprocessing(
        dir_in: str,
        dir_out: str,
        recurse: bool,
        array_reduction: str,
        frame_slice: Slice.Slice,
        channels: typing.Set[str],
        field_width: int,
        float_format: str,
        jobs: int
) -> typing.Dict[str, LASWriteResult]:
    """Multiprocessing code to LAS.
    Returns a dict of {path_in : LASWriteResult, ...}"""
    assert os.path.isdir(dir_in)
    if jobs < 1:
        jobs = multiprocessing.cpu_count()
    logging.info('scan_dir_multiprocessing(): Setting multi-processing jobs to %d' % jobs)
    pool = multiprocessing.Pool(processes=jobs)
    tasks = [
        (t.filePathIn, array_reduction, t.filePathOut, frame_slice, channels, field_width, float_format)
        for t in DirWalk.dirWalk(
            dir_in, dir_out, theFnMatch='', recursive=recurse, bigFirst=True
        )
    ]
    # print('tasks:')
    # pprint.pprint(tasks, width=200)
    # return {}
    results = [
        r.get() for r in [
            pool.apply_async(single_rp66v1_file_to_las, t) for t in tasks
        ]
    ]
    return {r.path_input: r for r in results}
Example #5
0
 def test_04(self):
     """TestDirWalk.test_04(): Input and output, *.py and recursion."""
     # print()
     for v in DirWalk.dirWalk('.',
                              theOut='spam',
                              theFnMatch='*.py',
                              recursive=True):
         pass
Example #6
0
 def test_10(self):
     """TestDirWalk.test_10(): Fails if input does not exist."""
     try:
         for v in DirWalk.dirWalk('no_existent'):
             pass
         self.fail('DirWalk.ExceptionDirWalk not raised.')
     except DirWalk.ExceptionDirWalk:
         pass
Example #7
0
 def test_02(self):
     """TestDirWalk.test_02(): Input and output, no globbing or recursion."""
     # print()
     for v in DirWalk.dirWalk('.',
                              theOut='spam',
                              theFnMatch=None,
                              recursive=False):
         pass
Example #8
0
 def test_06(self):
     """TestDirWalk.test_06(): Input only, *.py, recursion and biggest first."""
     # print()
     for v in DirWalk.dirWalk('.',
                              theOut='',
                              theFnMatch='*.py',
                              recursive=True,
                              bigFirst=True):
         pass
Example #9
0
 def test_05(self):
     """TestDirWalk.test_05(): Input and output, *.py, recursion and biggest first."""
     print()
     for v in DirWalk.dirWalk('.',
                              theOut='spam',
                              theFnMatch='*.py',
                              recursive=True,
                              bigFirst=True):
         print('{:8d}: {:s}'.format(os.path.getsize(v.filePathIn), v))
Example #10
0
 def test_06(self):
     """TestDirWalk.test_06(): Input only, *.py, recursion and biggest first."""
     print()
     for v in DirWalk.dirWalk('.',
                              theOut=None,
                              theFnMatch='*.py',
                              recursive=True,
                              bigFirst=True):
         print('{:8d}: {:s}'.format(os.path.getsize(v), v))
Example #11
0
def scan_dir_or_file(
    path_in: str, path_out: str, recursive: bool, label_process: bool,
    frame_slice: typing.Union[Slice.Slice, Slice.Sample]
) -> typing.Dict[str, HTMLResult]:
    """Scans a directory or file putting the results in path_out.
    Returns a dict of {path_in : HTMLResult, ...}
    """
    # Required as we are going to split them by os.sep
    # NOTE: normpath removes trailing os.sep which is what we want.
    path_in = os.path.normpath(path_in)
    path_out = os.path.normpath(path_out)
    logging.info(
        f'scan_dir_or_file(): "{path_in}" to "{path_out}" recurse: {recursive}'
    )
    ret: typing.Dict[str, HTMLResult] = {}
    # Output file path to FileResult
    if os.path.isdir(path_in):
        index_map_global: typing.Dict[str, HTMLResult] = {}
        if not recursive:
            for file_in_out in DirWalk.dirWalk(path_in,
                                               path_out,
                                               theFnMatch='',
                                               recursive=recursive,
                                               bigFirst=False):
                result = scan_a_single_file(file_in_out.filePathIn,
                                            file_in_out.filePathOut,
                                            label_process, frame_slice)
                ret[file_in_out.filePathIn] = result
                if not result.exception and not result.ignored:
                    index_map_global[result.path_output] = result
            if label_process:
                process.add_message_to_queue('Writing Indexes.')
            _write_indexes(path_out, index_map_global)
        else:
            len_path_in = len(path_in.split(os.sep))
            for root, dirs, files in os.walk(path_in, topdown=False):
                root_rel_to_path_in = root.split(os.sep)[len_path_in:]
                dir_out = os.path.join(path_out, *root_rel_to_path_in)
                for file in files:
                    file_path_in = os.path.join(root, file)
                    # Respect sub-directories in root
                    # root_rel_to_path_in.append(file)
                    file_path_out = os.path.join(dir_out, file)
                    result = scan_a_single_file(file_path_in, file_path_out,
                                                label_process, frame_slice)
                    ret[file_path_in] = result
                    if not result.exception and not result.ignored:
                        index_map_global[result.path_output] = result
            if label_process:
                process.add_message_to_queue('Writing Indexes.')
            _write_indexes(path_out, index_map_global)
    else:
        ret[path_in] = scan_a_single_file(path_in, path_out, label_process,
                                          frame_slice)
    return ret
Example #12
0
def procLISPathSP(dIn,
                  dOut,
                  fnMatch,
                  recursive,
                  keepGoing,
                  fileFn,
                  resultObj=None):
    for fpIn, fpOut in DirWalk.dirWalk(dIn, dOut, fnMatch, recursive):
        result = fileFn(fpIn, fpOut, keepGoing)
        if result is not None and resultObj is not None:
            resultObj += result
    return resultObj
Example #13
0
def index_dir_or_file(path_in: str, path_out: str, recurse: bool,
                      private: bool) -> typing.Dict[str, IndexResult]:
    logging.info(
        f'index_dir_or_file(): "{path_in}" to "{path_out}" recurse: {recurse}')
    ret = {}
    if os.path.isdir(path_in):
        for file_in_out in DirWalk.dirWalk(path_in,
                                           path_out,
                                           theFnMatch='',
                                           recursive=recurse,
                                           bigFirst=False):
            # print(file_in_out)
            ret[file_in_out.filePathIn] = index_a_single_file(
                file_in_out.filePathIn, file_in_out.filePathOut, private)
    else:
        ret[path_in] = index_a_single_file(path_in, path_out, private)
    return ret
Example #14
0
def index_dir_multiprocessing(dir_in: str, dir_out: str, private: bool,
                              jobs: int) -> typing.Dict[str, IndexResult]:
    """Multiprocessing code to index in XML.
    Returns a dict of {path_in : IndexResult, ...}"""
    if jobs < 1:
        jobs = multiprocessing.cpu_count()
    logging.info(
        'scan_dir_multiprocessing(): Setting multi-processing jobs to %d' %
        jobs)
    pool = multiprocessing.Pool(processes=jobs)
    tasks = [(t.filePathIn, t.filePathOut, private) for t in DirWalk.dirWalk(
        dir_in, dir_out, theFnMatch='', recursive=True, bigFirst=True)]
    # print('tasks:')
    # pprint.pprint(tasks, width=200)
    # return {}
    results = [
        r.get()
        for r in [pool.apply_async(index_a_single_file, t) for t in tasks]
    ]
    return {r.path_input: r for r in results}
Example #15
0
def plotLogPassesMP(dIn, dOut, opts):
    """Multiprocessing code to plot log passes. Returns a PlotLogInfo object."""
    if opts.jobs < 1:
        jobs = multiprocessing.cpu_count()
    else:
        jobs = opts.jobs
    logging.info('plotLogPassesMP(): Setting multi-processing jobs to %d' %
                 jobs)
    myPool = multiprocessing.Pool(processes=jobs)
    myTaskS = [
        (t.filePathIn, t.filePathOut, opts) \
            for t in DirWalk.dirWalk(dIn, dOut, opts.glob, opts.recursive, bigFirst=True)
    ]
    retResult = PlotLogInfo()
    myResults = [
        r.get() for r in [myPool.apply_async(processFile, t) for t in myTaskS]
    ]
    for r in myResults:
        # r is a PlotLogInfo object
        retResult += r
    return retResult
Example #16
0
def plotLogPassesMP(dIn, dOut, fnMatch, recursive, keepGoing, lgFormatS,
                    apiHeader, jobs):
    """Multiprocessing code to plot log passes. Returns a PlotLogInfo object."""
    if jobs < 1:
        jobs = multiprocessing.cpu_count()
    logging.info('plotLogPassesMP(): Setting multi-processing jobs to %d' %
                 jobs)
    myPool = multiprocessing.Pool(processes=jobs)
    myTaskS = [
        (t.filePathIn, t.filePathOut, keepGoing, lgFormatS, apiHeader) \
            for t in DirWalk.dirWalk(dIn, dOut, fnMatch, recursive, bigFirst=True)
    ]
    retResult = PlotLogInfo()
    #print('myTaskS', myTaskS)
    myResults = [
        r.get() for r in [myPool.apply_async(processFile, t) for t in myTaskS]
    ]
    for r in myResults:
        # r is a PlotLogInfo object
        retResult += r
    return retResult
Example #17
0
def index_dir_multiprocessing(dir_in: str, dir_out: str, jobs: int,
                              recurse: bool, read_back: bool,
                              validate: bool) -> typing.Dict[str, IndexResult]:
    """Multiprocessing code to plot log passes.
    Returns a dict of {path_in : IndexResult, ...}"""
    assert os.path.isdir(dir_in)
    if jobs < 1:
        jobs = multiprocessing.cpu_count()
    logging.info(
        'scan_dir_multiprocessing(): Setting multi-processing jobs to %d' %
        jobs)
    pool = multiprocessing.Pool(processes=jobs)
    tasks = [
        (t.filePathIn, t.filePathOut, read_back, validate)
        for t in DirWalk.dirWalk(
            dir_in, dir_out, theFnMatch='', recursive=recurse, bigFirst=True)
    ]
    results = [
        r.get()
        for r in [pool.apply_async(index_a_single_file, t) for t in tasks]
    ]
    return {r.path_in: r for r in results}
Example #18
0
def copy_tree(path_from: str, path_to: str, recurse: bool,
              file_types: typing.List[str], nervous: bool,
              over_write: bool) -> typing.Tuple[typing.Dict[str, int], int]:
    """Copies particular binary file types from one directory structure to another."""
    def _log_message(msg: str) -> None:
        if nervous:
            logger.info(f'Would {msg}')
        else:
            logger.info(f'{msg}')

    if not os.path.isdir(path_from):
        raise ValueError(f'Path {path_from} is not a directory.')
    if not os.path.isdir(path_to):
        _log_message(f'Create directory {path_to}')
        if not nervous:
            os.makedirs(path_to)
    common_prefix = os.path.commonpath([path_from, path_to])
    common_prefix = common_prefix[:1 + common_prefix.rfind(os.sep)]
    logger.info(f'copy_tree(): common prefix: {common_prefix}')
    file_type_count: typing.Dict[str, int] = {}
    byte_count = 0
    for file_in_out in DirWalk.dirWalk(path_from, path_to, '', recurse):
        fod = FileOnDisc(file_in_out.filePathIn)
        if len(file_types) == 0 or fod.bin_type in file_types:
            _log_message(
                f'Copy {file_in_out.filePathIn[len(common_prefix):]} to {file_in_out.filePathOut[len(common_prefix):]}'
            )
            if not nervous:
                os.makedirs(os.path.dirname(file_in_out.filePathOut), exist_ok=True)
                if os.path.isfile(file_in_out.filePathOut) and not over_write:
                    logger.warning(f'Not over writing file at {file_in_out.filePathOut}')
                else:
                    shutil.copyfile(file_in_out.filePathIn, file_in_out.filePathOut)
                    byte_count += os.path.getsize(file_in_out.filePathOut)
                    try:
                        file_type_count[fod.bin_type] += 1
                    except KeyError:
                        file_type_count[fod.bin_type] = 1
    return file_type_count, byte_count
Example #19
0
 def test_01(self):
     """TestDirWalk.test_01(): Input only, defaults."""
     print()
     for v in DirWalk.dirWalk('.'):
         print(v)
Example #20
0
 def test_01(self):
     """TestGenBigFirst.test_01(): Input only, defaults."""
     print()
     for v in DirWalk.genBigFirst('.'):
         print('{:8d}: {:s}'.format(os.path.getsize(v), v))
Example #21
0
def copy_files(path_in: str, path_out: str, binary_file_types: typing.Set[str],
               move: bool, nervous: bool) -> typing.List[str]:
    """
    Copies binary files from path_in to path_out.

    If move is True the file is moved, if False the file is copied.
    Returns a list of destination paths.
    """
    logger.debug(f'copy_files(): "{path_in}" to "{path_out}" ')
    ret = []
    for file_in_out in DirWalk.dirWalk(path_in,
                                       path_out,
                                       theFnMatch='',
                                       recursive=True,
                                       bigFirst=False):
        bin_type = bin_file_type.binary_file_type_from_path(
            file_in_out.filePathIn)
        if len(binary_file_types) == 0 or bin_type in binary_file_types:
            if nervous:
                print(
                    f'copy_files(): Would create destination directory at {file_in_out.filePathOut}'
                )
                if move:
                    print(
                        f'copy_files(): Would move "{file_in_out.filePathIn}" to "{file_in_out.filePathOut}" '
                    )
                else:
                    print(
                        f'copy_files(): Would copy "{file_in_out.filePathIn}" to "{file_in_out.filePathOut}" '
                    )
            else:
                # For real.
                logger.debug(
                    f'copy_files(): Creating destination directory at {file_in_out.filePathOut}'
                )
                os.makedirs(os.path.dirname(file_in_out.filePathOut),
                            exist_ok=True)
                if move:
                    logger.info(
                        f'copy_files(): Moving "{file_in_out.filePathIn}" to "{file_in_out.filePathOut}" '
                    )
                    # move() uses copy2() function by default
                    ret.append(
                        shutil.move(file_in_out.filePathIn,
                                    file_in_out.filePathOut))
                else:
                    logger.info(
                        f'copy_files(): Copying "{file_in_out.filePathIn}" to "{file_in_out.filePathOut}" '
                    )
                    # copy2 attempts to preserve metadata
                    ret.append(
                        shutil.copy2(file_in_out.filePathIn,
                                     file_in_out.filePathOut))
        elif zipfile.is_zipfile(file_in_out.filePathIn):
            zip_out_path = os.path.splitext(file_in_out.filePathOut)[0]
            # ret.extend(_analyse_zip_archive(file_in_out.filePathIn, zip_out_path, binary_file_types, nervous))
            logger.debug(
                f'_analyse_zip_archive(): At "{file_in_out.filePathIn}" path_out: "{zip_out_path}"'
            )
            with open(file_in_out.filePathIn, 'rb') as zip_instream:
                try:
                    return _recurse_copy_zip_archive(zip_instream,
                                                     zip_out_path,
                                                     binary_file_types,
                                                     nervous)
                except Exception:
                    logger.exception('_recurse_copy_zip_archive() FAILED')

        else:
            logger.debug(
                f'copy_files(): Ignoring type "{bin_type}" at "{file_in_out.filePathOut}"'
            )
    return ret
Example #22
0
 def test_01(self):
     """TestGenBigFirst.test_01(): Input only, defaults."""
     # print()
     for v in DirWalk.gen_big_first('.'):
         pass
Example #23
0
 def test_03(self):
     """TestDirWalk.test_03(): Input only, *.py and recursion."""
     # print()
     for v in DirWalk.dirWalk('.', theFnMatch='*.py', recursive=True):
         pass