def test_error_capture(): # Check that error are captured, and that correct exceptions # are raised. if mp is not None: # A JoblibException will be raised only if there is indeed # multiprocessing assert_raises( JoblibException, Parallel(n_jobs=2), [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))]) assert_raises(WorkerInterrupt, Parallel(n_jobs=2), [delayed(interrupt_raiser)(x) for x in (1, 0)]) # Try again with the context manager API with Parallel(n_jobs=2) as parallel: assert_true(parallel._backend is not None) assert_raises( JoblibException, parallel, [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))]) # The managed pool should still be available and be in a working # state despite the previously raised (and caught) exception assert_true(parallel._backend is not None) assert_equal([f(x, y=1) for x in range(10)], parallel(delayed(f)(x, y=1) for x in range(10))) assert_raises(WorkerInterrupt, parallel, [delayed(interrupt_raiser)(x) for x in (1, 0)]) # The pool should still be available despite the exception assert_true(parallel._backend is not None) assert_equal([f(x, y=1) for x in range(10)], parallel(delayed(f)(x, y=1) for x in range(10))) # Check that the inner pool has been terminated when exiting the # context manager assert_true(parallel._backend is None) else: assert_raises(KeyboardInterrupt, Parallel(n_jobs=2), [delayed(interrupt_raiser)(x) for x in (1, 0)]) # wrapped exceptions should inherit from the class of the original # exception to make it easy to catch them assert_raises(ZeroDivisionError, Parallel(n_jobs=2), [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))]) assert_raises(MyExceptionWithFinickyInit, Parallel(n_jobs=2, verbose=0), (delayed(exception_raiser)(i, custom_exception=True) for i in range(30))) try: # JoblibException wrapping is disabled in sequential mode: ex = JoblibException() Parallel(n_jobs=1)(delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))) except Exception as ex: assert_false(isinstance(ex, JoblibException))
def test_error_capture(): # Check that error are captured, and that correct exceptions # are raised. if mp is not None: # A JoblibException will be raised only if there is indeed # multiprocessing assert_raises(JoblibException, Parallel(n_jobs=2), [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))]) assert_raises(WorkerInterrupt, Parallel(n_jobs=2), [delayed(interrupt_raiser)(x) for x in (1, 0)]) # Try again with the context manager API with Parallel(n_jobs=2) as parallel: assert_true(parallel._backend._pool is not None) assert_raises(JoblibException, parallel, [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))]) # The managed pool should still be available and be in a working # state despite the previously raised (and caught) exception assert_true(parallel._backend._pool is not None) assert_equal([f(x, y=1) for x in range(10)], parallel(delayed(f)(x, y=1) for x in range(10))) assert_raises(WorkerInterrupt, parallel, [delayed(interrupt_raiser)(x) for x in (1, 0)]) # The pool should still be available despite the exception assert_true(parallel._backend._pool is not None) assert_equal([f(x, y=1) for x in range(10)], parallel(delayed(f)(x, y=1) for x in range(10))) # Check that the inner pool has been terminated when exiting the # context manager assert_true(parallel._backend._pool is None) else: assert_raises(KeyboardInterrupt, Parallel(n_jobs=2), [delayed(interrupt_raiser)(x) for x in (1, 0)]) # wrapped exceptions should inherit from the class of the original # exception to make it easy to catch them assert_raises(ZeroDivisionError, Parallel(n_jobs=2), [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))]) assert_raises( MyExceptionWithFinickyInit, Parallel(n_jobs=2, verbose=0), (delayed(exception_raiser)(i, custom_exception=True) for i in range(30)), ) try: # JoblibException wrapping is disabled in sequential mode: ex = JoblibException() Parallel(n_jobs=1)(delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))) except Exception as ex: assert_false(isinstance(ex, JoblibException))
def cleanTranscripts(): """ filter the transcripts by removing stopwords and stemming """ dfs = pd.read_csv('data/captions-clean.csv', encoding='utf-8', chunksize=500, nrows=1, skiprows=range(1, 40000)) parallel = Parallel(n_jobs=multiprocessing.cpu_count()) retlist = parallel(delayed(callable)(i) for i in dfs) df = pd.concat(retlist) # df['transcript_clean'] = np.nan # datalength = len(df) # print(df.head()) # li_transcripts = ['n'] * len(df) # for index, transcript in enumerate(df['transcript']): # transcript_clean = ast.literal_eval(transcript) # transcript_clean = getTokens.getTokens(li_strings=(ast.literal_eval(transcript)), lemmatizing=True) # li_transcripts[index] = transcript_clean # if index % 200 == 0: # df['transcript_clean'] = li_transcripts # df.to_csv('data/captions-filtered.csv', encoding='utf-8') # print('Completed video ' + str(index) + '/' + str(datalength)) # df['transcript_clean'] = li_transcripts df.to_csv('data/captions-filtered.csv', encoding='utf-8')
def test_error_capture(backend): # Check that error are captured, and that correct exceptions # are raised. if mp is not None: with raises(ZeroDivisionError): Parallel(n_jobs=2, backend=backend)( [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))]) with raises(WorkerInterrupt): Parallel(n_jobs=2, backend=backend)( [delayed(interrupt_raiser)(x) for x in (1, 0)]) # Try again with the context manager API with Parallel(n_jobs=2, backend=backend) as parallel: assert get_workers(parallel._backend) is not None original_workers = get_workers(parallel._backend) with raises(ZeroDivisionError): parallel( [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))]) # The managed pool should still be available and be in a working # state despite the previously raised (and caught) exception assert get_workers(parallel._backend) is not None # The pool should have been interrupted and restarted: assert get_workers(parallel._backend) is not original_workers assert ([f(x, y=1) for x in range(10) ] == parallel(delayed(f)(x, y=1) for x in range(10))) original_workers = get_workers(parallel._backend) with raises(WorkerInterrupt): parallel([delayed(interrupt_raiser)(x) for x in (1, 0)]) # The pool should still be available despite the exception assert get_workers(parallel._backend) is not None # The pool should have been interrupted and restarted: assert get_workers(parallel._backend) is not original_workers assert ([f(x, y=1) for x in range(10) ] == parallel(delayed(f)(x, y=1) for x in range(10))) # Check that the inner pool has been terminated when exiting the # context manager assert get_workers(parallel._backend) is None else: with raises(KeyboardInterrupt): Parallel(n_jobs=2)([delayed(interrupt_raiser)(x) for x in (1, 0)]) # wrapped exceptions should inherit from the class of the original # exception to make it easy to catch them with raises(ZeroDivisionError): Parallel(n_jobs=2)( [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))]) with raises(MyExceptionWithFinickyInit): Parallel(n_jobs=2, verbose=0)((delayed(exception_raiser)(i, custom_exception=True) for i in range(30))) try: # JoblibException wrapping is disabled in sequential mode: ex = JoblibException() Parallel(n_jobs=1)(delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))) except Exception as ex: assert not isinstance(ex, JoblibException)
def test_nested_loop_with_exception_with_loky(): with raises(ValueError): with Parallel(n_jobs=2, backend="loky") as parallel: parallel([delayed(nested_loop)("loky"), delayed(raise_exception)("loky")])
print(f'idx={idx}', np.mean(cprofile_process_times), np.std(cprofile_process_times))#, cprofile_process_times) return cprofile_process_times combs=[0, 1, 2, 3,4, 5, 6,7] print('*****serial') for i in combs: foo(i) print('*****parallel') # pre_dispatch='2 * n_jobs', batch_size='auto', temp_folder=None with Parallel(n_jobs=2, verbose=0, backend='loky') as parallel: res = parallel(delayed(foo)(i) for i in combs) # # # # print('scipy') # # import scipy # # pprint(threadpool_info()) # # flg = 0 # if flg: # import os # # # must set these before loading numpy: # os.environ["OMP_NUM_THREADS"] = '8' # export OMP_NUM_THREADS=4 # os.environ["OPENBLAS_NUM_THREADS"] = '8' # export OPENBLAS_NUM_THREADS=4
print(np.mean(cprofile_process_times), np.std(cprofile_process_times)) #, cprofile_process_times) return cprofile_process_times combs = [0, 1, 2, 3, 4, 5, 6, 7] print('*****serial') for i in combs: foo() print('*****parallel') # pre_dispatch='2 * n_jobs', batch_size='auto', temp_folder=None with Parallel(n_jobs=5, verbose=0, backend='loky') as parallel: res = parallel(delayed(foo)() for _ in combs) # # # # print('scipy') # # import scipy # # pprint(threadpool_info()) # # flg = 0 # if flg: # import os # # # must set these before loading numpy: # os.environ["OMP_NUM_THREADS"] = '8' # export OMP_NUM_THREADS=4 # os.environ["OPENBLAS_NUM_THREADS"] = '8' # export OPENBLAS_NUM_THREADS=4 # os.environ["MKL_NUM_THREADS"] = '8' # export MKL_NUM_THREADS=6 # # os.environ["VECLIB_MAXIMUM_THREADS"] = '4' # export VECLIB_MAXIMUM_THREADS=4