コード例 #1
0
def bench_main_blocksize_filesize(tmpdir, maxsize):
    stmt = textwrap.dedent("""
        cfg.update(blocksize={blocksize})
        main.main({files_dirs})
        """)
    params = []

    # single files, test filesize and blocksize
    max_filesize = maxsize
    max_blocksize = min(200*MiB, max_filesize)
    cases = [(np.array([max_filesize]),
              bytes_logspace(10*KiB, max_blocksize, 20),
              'main_blocksize_single'),
             (bytes_linspace(min(1*MiB, max_filesize//2), max_filesize, 5),
              np.array([256*KiB]),
              'main_filesize_single'),
             ]

    for filesize, blocksize, study in cases:
        testdir = mkdtemp(dir=tmpdir, prefix=study + '_')
        files = write_single_files(testdir, filesize)
        this = ps.pgrid(zip(ps.plist('filesize', filesize),
                            ps.plist('filesize_str', map(size2str,
                                                         filesize)),
                            ps.plist('files_dirs', [[x] for x in files])),
                        ps.plist('study', [study]),
                        ps.plist('maxsize_str', [size2str(maxsize)]),
                        zip(ps.plist('blocksize', blocksize),
                            ps.plist('blocksize_str', map(size2str,
                                                          blocksize))))
        params += this

    study = 'main_blocksize'
    testdir, group_dirs, files = write_collection(maxsize, tmpdir=tmpdir,
                                                  study=study)
    blocksize = bytes_logspace(10*KiB, min(200*MiB, maxsize), 20)
    this = ps.pgrid(ps.plist('files_dirs', [[testdir]]),
                    ps.plist('study', [study]),
                    ps.plist('maxsize_str', [size2str(maxsize)]),
                    zip(ps.plist('blocksize', blocksize),
                        ps.plist('blocksize_str', map(size2str,
                                                          blocksize))))
    params += this
    return stmt, params, {}
コード例 #2
0
def bench_main_parallel(tmpdir, maxsize):
    stmt = textwrap.dedent("""
        cfg.update(blocksize={blocksize},
                   nthreads={nthreads},
                   nprocs={nprocs},
                   share_leafs={share_leafs})
        main.main({files_dirs})
        """)
    params = []

    study = 'main_parallel'
    testdir, group_dirs, files = write_collection(maxsize, tmpdir=tmpdir,
                                                  study=study)
    blocksize = np.array([256*KiB])

    for share_leafs in [True, False]:
        this = ps.pgrid(ps.plist('files_dirs', [[testdir]]),
                        ps.plist('study', [study]),
                        zip(ps.plist('nthreads', range(1, MAXWORKERS+1)),
                            ps.plist('nworkers', range(1, MAXWORKERS+1))),
                        ps.plist('nprocs', [1]),
                        ps.plist('pool_type', ['thread']),
                        ps.plist('maxsize_str', [size2str(maxsize)]),
                        ps.plist('share_leafs', [share_leafs]),
                        zip(ps.plist('blocksize', blocksize),
                            ps.plist('blocksize_str', list(map(size2str,
                                                                   blocksize)))))
        params += this

        this = ps.pgrid(ps.plist('files_dirs', [[testdir]]),
                        ps.plist('study', [study]),
                        zip(ps.plist('nprocs', range(1, MAXWORKERS+1)),
                            ps.plist('nworkers', range(1, MAXWORKERS+1))),
                        ps.plist('nthreads', [1]),
                        ps.plist('pool_type', ['proc']),
                        ps.plist('maxsize_str', [size2str(maxsize)]),
                        ps.plist('share_leafs', [share_leafs]),
                        zip(ps.plist('blocksize', blocksize),
                            ps.plist('blocksize_str', list(map(size2str,
                                                                   blocksize)))))
        params += this
    return stmt, params, {}
コード例 #3
0
def bench_hash_file_parallel(tmpdir, maxsize):
    params = []

    study = 'hash_file_parallel'
    testdir, group_dirs, files = write_collection(maxsize, tmpdir=tmpdir,
                                                  study=study)

    pool_map = {'seq': pl.SequentialPoolExecutor,
                'thread': pl.ThreadPoolExecutor,
                'proc': pl.ProcessPoolExecutor,
                'proc,thread=1': lambda nw: pl.ProcessAndThreadPoolExecutor(nw, 1),
                'thread,proc=1': lambda nw: pl.ProcessAndThreadPoolExecutor(1, nw),
                }

    ctx = dict(pool_map=pool_map,
               pl=pl,
               files=files,
               worker=_worker_bench_hash_file_parallel,
               )

    setup = cache_flush_setup

    stmt = """
with pool_map['{pool_type}']({nworkers}) as pool:
    x=list(pool.map(worker, files))
    """

    this = ps.pgrid(ps.plist('pool_type',
                             [k for k in pool_map.keys() if k != 'seq']),
                    ps.plist('nworkers', range(1, MAXWORKERS+1)),
                    ps.plist('study', [study]),
                    ps.plist('maxsize_str', [size2str(maxsize)]),
                    )
    params += this
    # non-pool reference
    params += [{'study': study, 'pool_type': 'seq', 'nworkers': 1,
                'maxsize_str': size2str(maxsize)}]

    return stmt, params, dict(setup=setup, globals=ctx)
コード例 #4
0
def bench_main_parallel_2d(tmpdir, maxsize):
    stmt = textwrap.dedent("""
        cfg.update(blocksize={blocksize},
                   nthreads={nthreads},
                   nprocs={nprocs})
        main.main({files_dirs})
        """)
    params = []

    study = 'main_parallel_2d'
    testdir, group_dirs, files = write_collection(maxsize, tmpdir=tmpdir,
                                                  study=study)
    blocksize = np.array([256*KiB])
    this = ps.pgrid(ps.plist('files_dirs', [[testdir]]),
                    ps.plist('study', [study]),
                    ps.plist('nthreads', range(1, MAXWORKERS+1)),
                    ps.plist('nprocs', range(1, MAXWORKERS+1)),
                    ps.plist('maxsize_str', [size2str(maxsize)]),
                    zip(ps.plist('blocksize', blocksize),
                        ps.plist('blocksize_str', list(map(size2str,
                                                               blocksize)))))
    params += this
    return stmt, params, {}
コード例 #5
0
        'findsame',
        'findsame -t1',
        'findsame -l 512K',
        'findsame -l 4K',
        'findsame -t1 -l 4K',
        'jdupes -q -r',
        'jdupes -q -rQ',
        'jdupes -q -rTT',
        'duff -ra',
        'duff -rat',
        'rdfind -outputname /dev/null',
    ])

    # cache warming only once per datadir to save time, that's why we need to
    # loop over ps.run() calls instead of going w/ the usual psweep workflow
    # (assemble all params, then call ps.run() once), instead we use one of
    # ps.run()'s features: append to existing db on disk
    for _datadir in datadirs:
        print(_datadir)
        for _cache in ['cold', 'warm']:
            print(f"  {_cache}")
            if _cache == 'warm':
                subprocess.run(f"findsame {_datadir} > /dev/null", shell=True)

            datadir = ps.plist('datadir', [_datadir])
            cache = ps.plist('cache', [_cache])
            params = ps.pgrid(tool_cmd, datadir, cache)

            # results in calc/results.pk
            ps.run(func, params)