Python ProcessPoolExecutor.mapの例、concurrent.futures.ProcessPoolExecutor.map Pythonの例

コード例 #1

0

ファイルを表示

def main():
    configFile, infolder, outfolder, max_workers = parseArgs()
    trimmer = Trimmer(infolder, outfolder)
    parser = Parser(configFile)
    pool = ProcessPoolExecutor(max_workers)
    for videoName, trimConfigs in parser:
        pool.map(trimmer.trim, trimConfigs)

コード例 #2

0

ファイルを表示

def objective(x):
    logger.info('x: {}'.format(x))
    now = datetime.now()
    logdir = os.path.join('./log', now.strftime('%Y%m%d-%H%M%S'))
    os.makedirs(logdir, exist_ok=True)
    executor = ProcessPoolExecutor()
    inpdir = './testcase/'
    args_list = []
    for fname in sorted(os.listdir(inpdir)):
        if fname.find('.in') == -1:
            continue
        fpath = os.path.join(inpdir, fname)
        args_list.append([fpath, logdir, x])
    total = len(args_list)
    for res in tqdm(executor.map(run_experiment, args_list),
                    total=total,
                    ncols=0):
        pass

    regrets = []
    for res in tqdm(executor.map(parse_log, args_list), total=total, ncols=0):
        regrets.append(res)

    mean = np.mean(regrets)
    std = np.std(regrets)
    logger.info('mean: {}, std: {}'.format(mean, std))
    return -mean

コード例 #3

0

ファイルを表示

ファイル: homework1.py プロジェクト: qianlizhixing12/Python001-class01

def execCmd(args):
  ips = getIp(args.ip)
  if not ips:
    return

  # 在进程池使用queue报错了
  # q1 = Queue()
  # for ip in ips:
  #   for port in range(1, 1025):
  #     q1.put((ip, port))
  result = []

  if args.fun == 'ping':
    p = ProcessPoolExecutor(max_workers=args.num)
    result = list(p.map(pingExec, ips))
    # p.shutdown(wait=True)
    # print(result)
    # print('succ: ', list(row[0] for row in result if row[1]))
    # print('fail: ', list(row[0] for row in result if not row[1]))
  elif args.fun == 'tcp':
    task = ((ip, port) for ip in ips for port in range(22, 33))
    p = ProcessPoolExecutor(max_workers=args.num)
    result = list(p.map(tcpExec, task))
  else:
    print('-f opt must is ping or tcp!')
    return

  saveExec(args.save, result)

コード例 #4

0

ファイルを表示

ファイル: core.py プロジェクト: pandagod/vx_77

def crawl_pic():
    chrome_options = ChromeOptions()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')
    driver = webdriver.Chrome(chrome_options=chrome_options)
    driver.get(PIC_SEARCH_BASE_URL)
    while True:
        bef_height = driver.execute_script('return document.body.scrollHeight')
        driver.execute_script("window.scrollTo(0,document.body.scrollHeight)")
        time.sleep(1)
        aft_height = driver.execute_script('return document.body.scrollHeight')
        if aft_height == bef_height:
            break
    soup = BeautifulSoup(driver.page_source, 'lxml')
    div = soup.find(id='mmComponent_images_1')
    imgs = div.find_all('img')
    img_urls = []
    for img in imgs:
        img_url = img.attrs.get('src', None) or img.attrs.get('data-src', None)
        if img_url:
            img_urls.append(img_url)

    print('image urls crawl finished,begin to download ...')
    pool = ProcessPoolExecutor(max_workers=cpu_count())
    pool.map(save_pic, img_urls)
    print('download pictures finished ...')

コード例 #5

0

ファイルを表示

ファイル: pmap.py プロジェクト: xuruidong/Python-002

def pmap_main(argv):
    t_start = time.time()
    arg_n, arg_f, arg_w, arg_m, arg_ip, arg_v = arg_parse(argv)

    if (arg_m == "proc"):
        pool = ProcessPoolExecutor(max_workers=arg_n)
    elif (arg_m == "thread"):
        pool = ThreadPoolExecutor(max_workers=arg_n)

    func_arg = [i for i in range(arg_ip[0], arg_ip[1])]

    if (arg_f == "tcp"):
        result_itor = pool.map(handler_tcp, func_arg)
    else:
        result_itor = pool.map(handler_ping, func_arg)

    pool.shutdown()
    json_result = {}
    for i in result_itor:
        json_result[i[0]] = i[1]
        print("%s : %s" % (i[0], i[1]))

    if (arg_w):
        try:
            with open(arg_w, 'w') as save:
                json.dump(json_result, save)
        except Exception as e:
            print("save result to %s error: %s" % (arg_w, e))
    else:
        print(arg_w)

    if (arg_v):
        print("time use: %f" % (time.time() - t_start))

コード例 #6

0

ファイルを表示

ファイル: run_experiment.py プロジェクト: nel215/roads-and-junctions

def run_experiments():
    now = datetime.now()
    logdir = os.path.join('./log', now.strftime('%Y%m%d-%H%M%S'))
    os.makedirs(logdir, exist_ok=True)
    executor = ProcessPoolExecutor()
    args_list = []
    for i in range(100):
        args_list.append([i + 1, logdir])
    total = len(args_list)
    for res in tqdm(executor.map(run_experiment, args_list),
                    total=total,
                    ncols=0):
        pass

    scores = []
    for res in tqdm(executor.map(parse_log, args_list), total=total, ncols=0):
        scores.append(res)

    scores = np.array(scores)
    mean = np.mean(scores)
    std = np.std(scores)
    max_score = np.max(scores)
    max_score_idx = np.argmax(scores) + 1
    logger.info('mean: {}, std: {}, max: {}(seed:{})'.format(
        mean, std, max_score, max_score_idx))

コード例 #7

0

ファイルを表示

def tcp_scan(host):
    p = ProcessPoolExecutor(max_workers=8)
    p.map(scan, [(host, port) for port in [
        53, 80, 8080, 3128, 8081, 9080, 1080, 21, 23, 443, 69, 22, 25, 110,
        7001, 9090, 3389, 1521, 1158, 2100, 1433
    ]],
          chunksize=100)  #把任务分块提交

コード例 #8

0

ファイルを表示

def get_wuliaotu(url):
    global start_url, Flag
    try:
        resp = requests.get(url, headers=header)
        bs = BeautifulSoup(resp.text, 'html.parser')
        next_url = 'http:' + bs.find(
            'a', class_='previous-comment-page').get('href')
    except Exception as err:
        print(f'Error:{err}')
        Flag = False
        return Flag

    url_ls = set()

    for item in bs.find_all('span', class_='img-hash'):
        url = ('http:' +
               str(base64.b64decode(item.string.encode('utf-8')))[2:]).replace(
                   '\'', '')
        url_ls.add(url)

    pool = ProcessPoolExecutor(max_workers=8)
    pool.map(download_data, url_ls)
    url_ls.clear()
    start_url = next_url
    time.sleep(random.randint(3, 6))

コード例 #9

0

ファイルを表示

    def inference_all_questions(self, question_folder, cpu_num=1):
        base_dir = os.getcwd()
        print('base_dir: ', base_dir)
        # os.makedirs(question_folder, exist_ok=True)

        os.chdir(question_folder)
        print('numcores: ', cpu_num)
        audios = os.listdir()
        audios = [audio for audio in audios if audio.endswith('.wav')]

        if cpu_num > 1:
            audios_for_one_process = len(audios) // cpu_num

            if audios_for_one_process < 1:
                audios_for_one_process = 1
            print('audios count: ', len(audios))
            print('audio_for_one_process: ', audios_for_one_process)
            tmp = []
            splitted_audio = []

            for audio in audios:
                if audios_for_one_process == len(tmp):
                    splitted_audio.append(tmp)
                    tmp = []
                    tmp.append(audio)
                elif audio == audios[-1]:
                    # 마지막까지 갔을 때..
                    splitted_audio.append(tmp)
                else:
                    tmp.append(audio)
        else:
            splitted_audio = audios

        print('splitted audios: {}'.format(len(splitted_audio)))

        if cpu_num > 1:
            exe = Executor(max_workers=cpu_num)
            futures = []
            for audio in splitted_audio:
                futures.append(
                    exe.submit(self.inference_splitted_audios, audio))

            results = [future.result() for future in tqdm(futures)]
            print(results)
            # print('multiprocessing Result: ', len(result))
            # exe.shutdown(wait=True)
            # exe.shutdown(wait=True)
            # print(future.result)
            # result = parmap.map(self.inference_splitted_audios, splitted_audio, pm_pbar=True, pm_processes=cpu_num)
        else:
            with Executor(max_workers=cpu_num) as exe:
                exe.map(self.inference_audio, splitted_audio)
                exe.shutdown(wait=True)
            # result = parmap.map(self.inference_audio, splitted_audio, pm_pbar=True, pm_processes=cpu_num)

        os.chdir(base_dir)

        return question_folder, results

コード例 #10

0

ファイルを表示

ファイル: quantiance.py プロジェクト: longfeiw07/quantiance

def main():
    executor = ProcessPoolExecutor(max_workers=3)
    t1 = time.time()
    try:
        executor.map(get_StocksDailyPrice, stocksPool)
    except Exception as err:
        print('cannot', err)

    t2 = time.time()
    duration = t2 - t1
    print("aaa", duration)
    executor.shutdown()

コード例 #11

0

ファイルを表示

ファイル: scraper.py プロジェクト: wdang/wdang.github.io

def multiproc():
    os.chdir("D:\\OneDrive")
    outputs = []
    csvdata = csv.reader(open('PSN SUMMER SALES.csv'))
    titles = [item[0] for item in csvdata]
    pool = ProcessPoolExecutor(max_workers=4)
    results = list(pool.map(metacriticgames, titles))

コード例 #12

0

ファイルを表示

ファイル: con_future_logging.py プロジェクト: rwhitt2049/poc

def main():
    q = Manager().Queue()
    wrk = logging.getLogger('worker')
    wrk.setLevel(logging.DEBUG)
    sh = logging.StreamHandler()
    wrk.addHandler(sh)

    lp = logging.handlers.QueueListener(q, GeneralHandler(), respect_handler_level=False)
    lp.start()

    numbs = ((x, q) for x in range(10))
    executor = ProcessPoolExecutor(max_workers=2)
    vals = executor.map(double, numbs)

    '''
    workers = []
    for i in range(10):

        wp = Process(target=double, args=([i, q],))
        workers.append(wp)
        wp.start()

    for wp in workers:
        wp.join()

    '''
    print(list(vals))
    lp.stop()

コード例 #13

0

ファイルを表示

def my_test_1():
    # step_list = np.pi * np.linspace(0.001, 0.5, 50)
    step_list = np.pi * np.linspace(0.001, 0.05, 30)

    pool = Pool(max_workers=3)
    infor_list = pool.map(run_many_times, step_list)  # 使用多进程改进

    # infor_list = [run_many_times(step) for step in step_list]

    infor_list = sorted(list(infor_list))
    print(infor_list)

    with open('mydata2.csv', 'w') as f:
        f.write('{},{},{},{},{},{},{},{},{},{},{}\n'.format(
            'theta', 'avg', 'median', 'best', 'global_best', 'better_count',
            'first_better_generation', 'best_in_many_times',
            'best_x_in_many_times', 'best_y_in_many_times',
            'best_chrom_in_many_times'))

    for s, d in infor_list:
        for k, v in d.items():
            print(k, v)
        with open('mydata2.csv', 'a') as f:
            f.write("{},{},{},{},{},{},{},{},{},{},{}\n".format(
                s.replace('pi', ''), d['avg_fitness'], d['median_fitness'],
                d['best_fitness'], d['global_best_fitness'],
                d['better_fitness_count'],
                d['first_better_fitness_generation'],
                d['best_fitness_in_many_times'],
                d['best_fitness_solution_in_many_times'][0],
                d['best_fitness_solution_in_many_times'][1],
                d['best_fitness_chrom_in_many_times']))

コード例 #14

0

ファイルを表示

ファイル: run_parallel.py プロジェクト: aambrioso1/Effective_Python

def main():
    start = time.time()
    pool = ProcessPoolExecutor(max_workers=2)  # The one change
    results = list(pool.map(my_module.gcd, NUMBERS))
    end = time.time()
    delta = end - start
    print(f'Took {delta:.3f} seconds')

コード例 #15

0

ファイルを表示

ファイル: threaded_check.py プロジェクト: Miilty/CertifStates

def process_data():
    f = open(DATA_FILE)
    data = json.load(f)
    res = pandas.DataFrame(data)
    aux = res.transpose()

    l = []
    d = set()
    pool = ProcessPoolExecutor(max_workers=40)
    count = 0
    for i, r in aux.iterrows():
        v = r['publicKeyRaw']

        if v in d:
            continue
        d.add(v)
       
        checks = pool.map(find_duplicates, [[v, aux.copy()]] , chunksize=1000)
  
        for c in checks:
            if len(c) > 1:
                l.append(c)
                count = count +1
    with open("collisions.json", 'w') as outfile:
        outfile.write(json.dumps([df.transpose().to_dict() for df in l]))

コード例 #16

0

ファイルを表示

ファイル: utils.py プロジェクト: mk788/ReferentialGym

def compute_topographic_similarity_parallel(sentences,features,comprange=100, max_workers=32):
    executor = ProcessPoolExecutor(max_workers=max_workers)
    indices = list(range(len(sentences)))
    levs = []
    for idx1, idx1_levs in tqdm(zip(indices, executor.map(compute_levenshtein_distance_for_idx_over_comprange, itertools.repeat(sentences), indices, itertools.repeat(comprange)))):
        for l in idx1_levs: 
            levs.append(l)

    indices = list(range(len(features)))
    cossims = []
    for idx1, idx1_cossims in tqdm(zip(indices, executor.map(compute_cosine_sim_for_idx_over_comprange, itertools.repeat(features), indices, itertools.repeat(comprange)))):
        for c in idx1_cossims: 
            cossims.append(c)
    
    rho, p = spearmanr(levs, cossims)
    return -rho, p, levs, cossims

コード例 #17

0

ファイルを表示

ファイル: 2.1.运行速度比较.py プロジェクト: GMwang550146647/network

 def multi_processing_map(self, n_task=2000, depth=35):
     pp = ProcessPoolExecutor()
     result = [
         item for item in pp.map(self.fib, [depth for i in range(n_task)])
     ]
     print("Multi Process Result:{}".format(result))
     return result

コード例 #18

0

ファイルを表示

def parallel_tasks_run(task_function, task_data: list,
                       num_prints: O[int] = None,
                       print_function=default_print_task_function,
                       level=1,
                       task_name: str = 'unnamed-task',
                       num_threads: int = 1,
                       use_process: bool = True) -> list:
    num_task = len(task_data)
    logging.info(':: start ({}) #data/#threads = {}/{}, use-process({})'
                 .format(task_name, num_task, num_threads, use_process))
    if num_prints is not None:
        step_print = int(np.ceil(num_task / num_prints))
    else:
        step_print = None
    print_function = partial(print_function, step_=step_print, task_name=task_name, level=level)
    task_data = [[xi, len(task_data), print_function, x if isinstance(x, list) else [x]]
                 for xi, x in enumerate(task_data)]
    if (len(task_data) < 2) or (num_threads < 2):
        ret = [task_function(x) for x in task_data]
    else:
        if use_process:
            pool_ = ProcessPoolExecutor(max_workers=num_threads)
        else:
            pool_ = ThreadPoolExecutor(max_workers=num_threads)
        ret = list(pool_.map(task_function, task_data))
        pool_.shutdown()
    return ret

コード例 #19

0

ファイルを表示

ファイル: main_basinhopping.py プロジェクト: xiaofeima1990/project

def para_data_allo_1(Theta, cpu_num, rng, d_struct, xi_n, Data_struct):
    time.sleep(0.5)

    # print(" id: {} , is dealing the auction with {} bidder ".format(threading.get_ident(),pub[2]))

    TT, _ = Data_struct.shape

    print("the length of the auction is {}".format(TT))

    results = []
    try:

        func = partial(para_fun_est, Theta, rng, xi_n, d_struct['h'])

        pool = ProcessPoolExecutor(max_workers=cpu_num)

        results = pool.map(
            func,
            zip(range(0, TT), Data_struct['bidder_state'],
                Data_struct['bidder_pos'], Data_struct['price_norm'],
                Data_struct[Pub_col].values.tolist()))

        MoM = np.nansum(list(results))

    except np.linalg.LinAlgError as err:
        if 'Singular matrix' in str(err):
            return 10**5
        else:
            print(err)
            exit(1)

    return MoM / TT

コード例 #20

0

ファイルを表示

ファイル: functions.py プロジェクト: onerva-korhonen/consistency-as-onion

def calculateCorrelationsToCentroidInParallel(voxelIndices,
                                              allVoxelTs,
                                              centroidIndices,
                                              nCPUs=5):
    """
    A wrapper function for calculating the correlation to ROI centroid in parallel across
    ROIs.
    
    Parameters:
    -----------
    voxelIndices: list of np.arrays, each array containing indices of voxels of one ROI; 
               these indices should refer to voxels' 
               locations in the file containing voxel time series; note that the chunk
               must contain at least one voxel
    allVoxelTs: structured np.array with a field name 'roi_voxel_ts' (and possible additional 
                fields), this field contains voxel time series
    centroidIndices: np.array of indices of ROI centroids in allVoxelTs
    nCPUs = int, number of CPUs to be used for the parallel computing (default = 5)
    
    
    Returns:
    --------
    correlationsToCentroid: list of doubles, correlations of the voxel ts defined
                          by voxelIndices to the ROICentroids
    """
    cfg = {'allVoxelTs': allVoxelTs}
    paramSpace = [(cfg, {
        'voxelIndices': voxelInd,
        'centroidIndex': centroidIndex
    }) for voxelInd, centroidIndex in zip(voxelIndices, centroidIndices)]
    pool = Pool(max_workers=nCPUs)
    correlationsToCentroid = list(
        pool.map(calculateCorrelationToCentroid, paramSpace, chunksize=1))
    return correlationsToCentroid

コード例 #21

0

ファイルを表示

def main_concurrent_from_future():
    start = time.time()
    pool = ProcessPoolExecutor(max_workers=2)
    results = list(pool.map(gcd, NUMBERS))
    end = time.time()
    delta = end - start
    print(f'Operation took {delta:.3f} seconds\n')

コード例 #22

0

ファイルを表示

ファイル: python_5_41_code.py プロジェクト: Hao973/python_learn

def test_3():
    start = time()
    pool = ProcessPoolExecutor(max_workers=2)
    results = list(pool.map(gcd, numbers))
    print(results)
    end = time()
    print('Took %.3f seconds' % (end - start))

コード例 #23

0

ファイルを表示

ファイル: bmp.py プロジェクト: swarmer/mlstarterpack

def _convert(args):
    sourcedir = Path(args.sourcedir)
    targetdir = Path(args.targetdir)

    os.makedirs(targetdir, exist_ok=True)

    jobs = []
    for source_path in find_images(args.sourcedir):
        source_name = source_path.relative_to(sourcedir)

        target_name = Path(str(source_name).replace(source_name.suffix, '.bmp'))
        target_path = targetdir / target_name

        if target_path.exists() and target_path.stat().st_size > 0:
            continue

        jobs.append(ConvertJob(
            source_path, target_path,
            args.target_smaller_size,
        ))

    print(f'{len(jobs)} files to process')

    executor = ProcessPoolExecutor()
    iterator = executor.map(_process_image, jobs)
    for _ in iterator:
        pass
    print('Done')

コード例 #24

0

ファイルを表示

def main():
    start = time.time()
    pool = ProcessPoolExecutor(max_workers=2) # 이 부분만 바꿈
    results = list(pool.map(my_module.gcd, NUMBERS))
    end = time.time()
    delta = end - start
    print(f'총 {delta:.3f} 초 걸림')

コード例 #25

0

ファイルを表示

def maintain(infile, func):
    # outfile = infile + ".temp"
    # if not os.path.exists(outfile):
    #     open(outfile, "w", encoding="utf8").close()

    # with open(infile, encoding="utf8") as csvfile:
    #     num_written = num_lines_in_file(outfile)
    #     for idx, row in enumerate(csv.reader(csvfile, delimiter="|")):
    #         if (idx >= num_written):
    #             row = func(row)
    #             append_to_file(outfile, row)

    # os.replace(outfile, infile)

    outfile = infile + ".temp"
    if not os.path.exists(outfile):
        open(outfile, "w", encoding="utf8").close()

    rows = []
    with open(infile, encoding="utf8") as csvfile:
        for row in csv.reader(csvfile, delimiter="|"):
            rows.append(row)

    pool = ProcessPoolExecutor(max_workers=60)
    processed = list(pool.map(func, rows))

    print(processed)
    for row in processed:
        append_to_file(outfile, row)

    os.replace(outfile, infile)

コード例 #26

0

ファイルを表示

ファイル: demo.py プロジェクト: DL2021Spring/CourseProject

def threading():

    numbers = [(1963309, 2265973), (2030677, 3814172), (1551645, 2229620),
               (2039045, 2020802)]
    pool = ProcessPoolExecutor(max_workers=2)
    ret = list(pool.map(gcd, numbers))
    return ret

コード例 #27

0

ファイルを表示

ファイル: pool.py プロジェクト: honmaple/maple-spider

class ThreadPool(object):
    '''线程池实现'''

    def __init__(self, thread_num=1, process_num=1, q_size=2000, daemon=True):
        self.thread_pool = _ThreadPoolExecutor(thread_num, daemon)
        self.process_pool = ProcessPoolExecutor(process_num)
        self.result_queue = Queue(q_size)

    def wait(self, threads=[]):
        thread_wait(threads)

    def add_thread(self, target, args=()):
        result = self.thread_pool.submit(target, *args)
        return result

    def add_process(self, target, args=()):
        result = self.process_pool.submit(target, *args)
        return result

    def thread_map(self, target, args=[]):
        return [self.thread_pool.submit(target, arg) for arg in args]

    def process_map(self, target, args=[]):
        return self.process_pool.map(target, args)

    def map(self, target, args=[]):
        return self.process_map(target, args)

コード例 #28

0

ファイルを表示

class ProcessPoolOpInvoker(ModelOpInvoker):
    _close_key = None

    def __init__(self, model, func, n_jobs, persist_method):
        if isinstance(model, PersistedModel):
            _log.debug('model already persisted')
            key = model
        else:
            _log.debug('persisting model with method %s', persist_method)
            key = persist(model, method=persist_method)
            self._close_key = key

        _log.debug('persisting function')
        func = pickle.dumps(func)
        ctx = LKContext.INSTANCE
        _log.info('setting up ProcessPoolExecutor w/ %d workers', n_jobs)
        os.environ['_LK_IN_MP'] = 'yes'
        kid_tc = proc_count(level=1)
        self.executor = ProcessPoolExecutor(
            n_jobs, ctx, _initialize_mp_worker,
            (key, func, kid_tc, log_queue(), get_root_seed()))

    def map(self, *iterables):
        return self.executor.map(_mp_invoke_worker, *iterables)

    def shutdown(self):
        self.executor.shutdown()
        os.environ.pop('_LK_IN_MP', 'yes')
        if self._close_key is not None:
            self._close_key.close()
            del self._close_key

コード例 #29

0

ファイルを表示

def find_matching_pair(fastq_r1s: Iterable,
                       fastq_r2s: Iterable,
                       primer_seq_fwd: str,
                       primer_seq_rev: str,
                       pool: ProcessPoolExecutor = None) -> Tuple[str, str]:

    if pool:
        bools = pool.map(
            partial(matches_fastq_pair, primer_seq_fwd, primer_seq_rev),
            fastq_r1s, fastq_r2s)
        matches = [(str(r1), str(r2))
                   for r1, r2, is_match in zip(fastq_r1s, fastq_r2s, bools)
                   if is_match]
    else:
        matches = [
            (str(r1), str(r2)) for r1, r2 in zip(fastq_r1s, fastq_r2s)
            if matches_fastq_pair(primer_seq_fwd, primer_seq_rev, r1, r2)
        ]

    if matches:
        if len(matches) > 1:
            logger.warning('More than one match: {}'.format(matches))
            # Return the first match on the assumption that inputs rows and files are
            # ordered similarly.
            # TODO (gdingle): deal with multi matches better
        return matches[0]
    else:
        raise ValueError(
            'Cannot find match for primers {} in {} candidate FastQ file pairs'
            .format(
                (primer_seq_fwd, primer_seq_rev),
                len(list(fastq_r1s)),
            ))

コード例 #30

0

ファイルを表示

ファイル: convert2jack.py プロジェクト: 5l1v3r1/jack-1

def convert_dataset(path,
                    filemap,
                    name,
                    num_processes,
                    max_num_support,
                    max_tokens,
                    is_web=True):
    with open(path, 'rb') as f:
        dataset = pickle.load(f)

    if num_processes == 1:
        instances = process(
            (dataset, filemap, max_num_support, max_tokens, is_web), True)
    else:
        chunk_size = 1000
        executor = ProcessPoolExecutor(num_processes)
        instances = []
        i = 0
        for processed in executor.map(
                process, [(dataset[i * chunk_size:(i + 1) * chunk_size],
                           filemap, max_num_support, max_tokens, is_web)
                          for i in range(len(dataset) // chunk_size + 1)]):
            instances.extend(processed)
            i += chunk_size
            print("%d/%d done" % (min(len(dataset), i), len(dataset)))

    return {"meta": {"source": name}, 'instances': instances}

コード例 #31

0

ファイルを表示

def main():

    results = load_model_results('../results.txt')

    # Iterate over all groups, save one PDF for group, sorted by confidence.
    for index, key in enumerate(results):
        if index in [0, 1, 2, 3, 4]:
            continue

        print(' ****  Group: ', key, ' **** ')

        # Extract results for a single group.
        group_results = results[key]
        group_len = len(group_results)

        # Where to save the PDFs.
        output_directory = 'plots_' + str(key) + '/'
        in_tuple = zip(group_results, [output_directory] * group_len)

        # Multiprocessing!
        max_workers = 12
        print('Max workers: ', max_workers)
        start = time()
        pool = ProcessPoolExecutor(max_workers=max_workers)
        results = list(pool.map(plot_row, in_tuple))
        # results = list(pool.map(run_NN, search_map))
        end = time()
        print('Took %.3f seconds' % (end - start))

コード例 #32

0

ファイルを表示

ファイル: website_usecases.py プロジェクト: kkonrad/Troia-System-Tests

def run_simulation(datasets, workers_num):
    workers = [TroiaWebDemoUser(get_troia_client(),
        "TES_TROJ_JID_" + str(i)) for i in xrange(workers_num)]
    for worker in workers:
        worker.set_datasets(datasets)
    executor = ProcessPoolExecutor(workers_num)
    # maap = map
    maap = lambda *args, **kwargs: list(executor.map(*args, **kwargs))
    maap(exec_fun, workers, repeat(ITERATIONS, workers_num))

コード例 #33

0

ファイルを表示

ファイル: august_23.py プロジェクト: dputtick/rc_code_dojo

def main(chunk):  
    nums = range(1, 1000)
    pool = ProcessPoolExecutor()
    count = 0
    returned_iterator = pool.map(is_prime, nums, timeout=None, chunksize=chunk)
    for result in returned_iterator:
        if result:
            count += 1
    return count

コード例 #34

0

ファイルを表示

ファイル: genetic_neural_network.py プロジェクト: jpodeszwik/openai

class GeneticSearcher:
    def __init__(self, pop_size, problem):
        self.problem = problem
        self.pop = [Network.random_network() for i in range(pop_size)]
        self.fitness_cache = {}
        self.best = None
        self.nt = NetTester(problem)
        self.pp = ProcessPoolExecutor(max_workers=4)
        self.ntf = NetworkTesterFactory(problem)
        self.pop_size = pop_size

    def recalculate_fitness(self):
        nets_to_rate = [net for net in self.pop if net not in self.fitness_cache]
        for net, res in self.pp.map(self.ntf.rate_network, nets_to_rate):
            self.fitness_cache[net] = res

    def selection(self):
        population_fitness = [(net, self.fitness_cache[net]) for net in self.pop]
        population_fitness = sorted(population_fitness, reverse=True, key=lambda x: x[1])
        self.best = population_fitness[0]
        return list(map(lambda x: x[0], population_fitness[:int(self.pop_size / 3)]))

    def crossing(self, parents):
        children = []
        while len(children) < self.pop_size / 3:
            parents = random.sample(set(parents), 2)
            children.append(self.problem.crossing(parents[0], parents[1]))

        return children

    def mutation(self, population):
        mutants = []
        while len(mutants) < 0.3 * self.pop_size:
            mutants.append(self.problem.mutate(random.choice(population)))

        return mutants

    def iteration(self):
        self.recalculate_fitness()
        old_survivors = self.selection()
        children = self.crossing(old_survivors)
        mutants = self.mutation(old_survivors)

        new_generation = old_survivors + children + mutants

        while len(new_generation) < self.pop_size:
            new_generation.append(Network.random_network())

        self.pop = new_generation

        return self.best[1]

    def show_best(self):
        self.nt.test(self.best[0], render=True)

コード例 #35

0

ファイルを表示

ファイル: pi_futures.py プロジェクト: gjbex/training-material

def compute_pi(nr_tries=10000, pool_size=None, constructor=None):
    if not constructor:
        executor = ProcessPoolExecutor(max_workers=pool_size)
    else:
        executor = constructor(max_workers=pool_size)
    args = [(nr_tries//pool_size, )
            for _ in range(pool_size)]
    results = executor.map(partial_pi, args)
    if not pool_size:
        pool_size = multiprocessing.cpu_count()
    return sum(results)/pool_size

コード例 #36

0

ファイルを表示

ファイル: item_41.py プロジェクト: lancelote/effective_python

def main():
    numbers = [
        (1963309, 2265973),
        (2030677, 3814172),
        (1551645,	2229620),
        (2039045,	2020802)
    ]
    start = time()
    pool = ProcessPoolExecutor(max_workers=2)
    results = list(pool.map(gcd, numbers))
    end = time()
    print('Took %.3f seconds' % (end - start))

コード例 #37

0

ファイルを表示

ファイル: pfam_sets.py プロジェクト: mmayers12/n15_mice

def make_arch_db():
    executor = ProcessPoolExecutor(max_workers=8)
    by = 10000
    m = 60000000
    #by = 2000
    #m = 10000
    e = executor.map(process_range, zip(range(0, m, by),range(by, m+by, by)))
    executor.shutdown()
    print('done calculating architectures')
    pfam_sets = merge(e)
    print(len(pfam_sets))
    gsave(pfam_sets,'pfam_sets.pkl.gz')
    
    # mongodb
    db = MongoClient('wl-cmadmin', 27017).ArchDB_Pfam_071414.ArchDB_Pfam_071414
    db.insert(map(lambda item: {'_id': min(item[1]), 'pID': list(item[1]), 'Pfam': item[0]}, pfam_sets.items()))
    db.ensure_index('pID')
    db.ensure_index('Pfam')

コード例 #38

0

ファイルを表示

ファイル: fuzz_rasm2.py プロジェクト: P4N74/radare2-regressions

def main():
    cases = {}
    meta_cases = {}
    meta_meta_cases = {}

    if len(sys.argv) != 2:
        print("Usage: %s <path to binary file>" % sys.argv[0])
        return 0

    if not os.path.exists(sys.argv[1]):
        print("No such file %s" % sys.argv[1])
        return 1

    fsize = os.stat(sys.argv[1]).st_size

    if fsize < MAX_OPLEN:
        print("muy pequeño:  %s" % sys.argv[1])
        return 1

    with open(sys.argv[1], "rb") as f:
        input_data = f.read()

    pool = ProcessPoolExecutor(CONCURRENCY)
    for offset in range(0, fsize-20, CONCURRENCY):
        inputs = [hexlify(input_data[o:o+MAX_OPLEN])
                  for o in range(offset, offset+CONCURRENCY)]
        tasks = pool.map(check_hexpairs, inputs)
        for res in tasks:
            if not res:
                continue
            inskey = res['case']
            insmkey = res['metacase']
            insmmkey = res['metametacase']
            meta_meta_cases[insmmkey] = meta_meta_cases.get(insmmkey, 0) + 1
            meta_cases[insmkey] = meta_cases.get(insmkey, 0) + 1
            if (meta_cases[insmkey] > MAX_METACASE_EXAMPLES or
                    meta_meta_cases[insmmkey] > MAX_META_META_CASE_EXAMPLES):
                pass
            elif inskey not in cases:
                cases[inskey] = cases.get(inskey, 0) + 1
                print("%s\n" % json.dumps(res, indent=4))

コード例 #39

0

ファイルを表示

ファイル: susy-fit-inputs.py プロジェクト: dguest/susy-analysis

def run():
    args = get_config()
    dargs = vars(args)

    # launch a bunch of processes to look at all systematics
    if args.systematic == 'all':
        dargs['quiet'] = True   # multiprocessing makes a mess of the outputs
        systs = get_all_systematics(args.files)
        syst_args = {x: dargs.copy() for x in systs}
        for syst in systs:
            syst_args[syst]['systematic'] = syst
        executor = Executor()
        counts_list = executor.map(run_systematic, syst_args.values())
        counts_dict = {}
        for subdict in counts_list:
            counts_dict.update(subdict)
    # or just do one...
    else:
        counts_dict = run_systematic(dargs)
    with open(dargs['output'],'w') as out_yml:
        translated = fitinputs.translate_to_fit_inputs(counts_dict)
        out_yml.write(yaml.dump(translated))

コード例 #40

0

ファイルを表示

ファイル: main.py プロジェクト: AtomLinter/linter-pylama

def main():
    arguments = create_parser()
    if arguments.get('show_version'):
        print(INTRO)
        return

    if 'settings_path' in arguments:
        sp = arguments['settings_path']
        arguments['settings_path'] = os.path.abspath(sp) if os.path.isdir(sp) else os.path.dirname(os.path.abspath(sp))
        if not os.path.isdir(arguments['settings_path']):
            print("WARNING: settings_path dir does not exist: {0}".format(arguments['settings_path']))

    if 'virtual_env' in arguments:
        venv = arguments['virtual_env']
        arguments['virtual_env'] = os.path.abspath(venv)
        if not os.path.isdir(arguments['virtual_env']):
            print("WARNING: virtual_env dir does not exist: {0}".format(arguments['virtual_env']))

    file_names = arguments.pop('files', [])
    if file_names == ['-']:
        SortImports(file_contents=sys.stdin.read(), write_to_stdout=True, **arguments)
    else:
        if not file_names:
            file_names = ['.']
            arguments['recursive'] = True
            if not arguments.get('apply', False):
                arguments['ask_to_apply'] = True
        config = from_path(os.path.abspath(file_names[0]) or os.getcwd()).copy()
        config.update(arguments)
        wrong_sorted_files = False
        skipped = []
        if arguments.get('recursive', False):
            file_names = iter_source_code(file_names, config, skipped)
        num_skipped = 0
        if config['verbose'] or config.get('show_logo', False):
            print(INTRO)
        jobs = arguments.get('jobs')
        if jobs:
            executor = ProcessPoolExecutor(max_workers=jobs)

            for sort_attempt in executor.map(functools.partial(sort_imports, **arguments), file_names):
                if not sort_attempt:
                    continue
                incorrectly_sorted = sort_attempt.incorrectly_sorted
                if arguments.get('check', False) and incorrectly_sorted:
                    wrong_sorted_files = True
                if sort_attempt.skipped:
                    num_skipped += 1
        else:
            for file_name in file_names:
                try:
                    sort_attempt = SortImports(file_name, **arguments)
                    incorrectly_sorted = sort_attempt.incorrectly_sorted
                    if arguments.get('check', False) and incorrectly_sorted:
                        wrong_sorted_files = True
                    if sort_attempt.skipped:
                        num_skipped += 1
                except IOError as e:
                    print("WARNING: Unable to parse file {0} due to {1}".format(file_name, e))
        if wrong_sorted_files:
            exit(1)

        num_skipped += len(skipped)
        if num_skipped and not arguments.get('quiet', False):
            if config['verbose']:
                for was_skipped in skipped:
                    print("WARNING: {0} was skipped as it's listed in 'skip' setting"
                        " or matches a glob in 'skip_glob' setting".format(was_skipped))
            print("Skipped {0} files".format(num_skipped))

コード例 #41

0

ファイルを表示

ファイル: prep.py プロジェクト: JeanBilheux/OverviewSciPy2016

if not os.path.exists(data):
    os.mkdir(data)


minute = os.path.join(data, 'minute')
if not os.path.exists(minute):
    os.mkdir(minute)
    generate_stocks(freq=pd.Timedelta(seconds=120),
                    start=pd.Timestamp('2010-01-01'),
                    directory=minute)


def convert_to_json(d):
    filenames = sorted(glob(os.path.join(d, '*')))[-365:]
    with open(d.replace('minute', 'json') + '.json', 'w') as f:
        for fn in filenames:
            df = pd.read_csv(fn)
            for rec in df.to_dict(orient='records'):
                json.dump(rec, f)
                f.write(os.linesep)
    print("Finished JSON: %s" % d)


js = os.path.join(data, 'json')
if not os.path.exists(js):
    os.mkdir(js)
    directories = sorted(glob(os.path.join(minute, '*')))

    e = ProcessPoolExecutor()
    list(e.map(convert_to_json, directories))

コード例 #42

0

ファイルを表示

ファイル: effective_python_worksheet.py プロジェクト: jerry-dumblauskas/PythonBuiltins


numbers = [(1963309, 2265973), (2030677, 3814172),
           (1551645, 2229620), (2039045, 2020802)]

start = time()
results = list(map(gcd, numbers))
end = time()
print('Took %.3f seconds' % (end - start))

print("using multiprocessing...")

try:
    start = time()
    pool = ProcessPoolExecutor(max_workers=2)  # The one change
    results = list(pool.map(gcd, numbers))
    end = time()
    print('Took %.3f seconds' % (end - start))
except RuntimeError as e:
    print("another windows fail....")

# Item 42 ...
print("====ITEM 42: Define Function Decorators with functools.wraps ====")
print("func tools!!")


def my_dec(func):
    def wrapper():
        print("in wrapper")
        return func()

コード例 #43

0

ファイルを表示

ファイル: item-41.py プロジェクト: paulhendricks/effective-python

def main_3():
    start = time.time()
    pool = ProcessPoolExecutor(max_workers=8)
    list(pool.map(gcd, numbers))
    end = time.time()
    return 'Took %.3f seconds' % (end - start)

コード例 #44

0

ファイルを表示

ファイル: map.py プロジェクト: laike9m/my-slides-samples

from concurrent.futures import ProcessPoolExecutor

def is_odd_number(number):
    return number % 2

executor = ProcessPoolExecutor()
it = executor.map(is_odd_number, [1, 2], timeout=1)
print(next(it))
print(next(it))

コード例 #45

0

ファイルを表示

ファイル: ReTreeingFuncs.py プロジェクト: JudoWill/ResearchNotebooks

                'AdjPval',
                'Group1Name',
                'AI',
                'AI-pval',
                'AI-null']
fname = 'more_phylip_BenjRes.tsv'
benj_writer = csv.DictWriter(open(fname, 'w'), benj_fields, delimiter = '\t')
   

benj_writer.writeheader()

multi = True
print 'Starting multiprocessing!'
if multi:
    pool = ProcessPoolExecutor(max_workers = 30)
    results = pool.map(calculate_region, yield_regions(trop_dict))
else:
    results = imap(calculate_region, islice(yield_regions(trop_dict), 0,35))

for gname, sub, prot, win, start, benj_res in results:
    
    #print prot, start, win
    tdict = {
             'Prot':prot,
             'Start':start,
             'WinSize':win,
             'GroupName':gname,
             'Subtype':sub,
             }
    if type(benj_res) is StringType:
        if (benj_res == 'Already Processed') or benj_res.startswith('Too few unique sequences'):

コード例 #46

0

ファイルを表示

ファイル: a.py プロジェクト: hashiwa000/EffectivePythonWork

def do_multi_process():
    start = time()
    pool = ProcessPoolExecutor(max_workers=2)
    result = list(pool.map(gcd, numbers))
    end = time()
    print('Took %.3f seconds' % (end - start))

コード例 #47

0

ファイルを表示

ファイル: score_processor.py プロジェクト: wiki-ai/revscoring

class ScoreProcessor:

    IO_WORKER_MULTIPLIER = 0.25
    MIN_IO_WORKERS = 2
    MAX_IO_WORKERS = 10

    def __init__(self, scoring_model, extractor, cpu_workers=None,
                 io_workers=None, batch_size=50):
        self.scoring_model = scoring_model
        self.extractor = extractor
        self.cpu_workers = \
            int(cpu_workers) if cpu_workers is not None else cpu_count()
        self.batch_size = int(batch_size)

        if io_workers is not None:
            self.io_workers = int(io_workers)
        else:
            self.io_workers = max(self.MIN_IO_WORKERS,
                                  min(self.MAX_IO_WORKERS,
                                      int(self.cpu_workers *
                                          self.IO_WORKER_MULTIPLIER)))

        logger.info("Starting up IO thread pool with {0} workers"
                    .format(self.io_workers))
        self.scores_ex = ThreadPoolExecutor(max_workers=self.io_workers)
        logger.info("Starting up CPU thread pool with {0} workers"
                    .format(self.cpu_workers))
        self.process_ex = ProcessPoolExecutor(max_workers=self.cpu_workers)

        roots = dependencies.dig(self.scoring_model.features)
        self.root_datasources = [d for d in roots if isinstance(d, Datasource)]

    def __enter__(self):
        return self

    def __exit__(self):
        self.scores_executor.shutdown()
        self.process_executor.shutdown()

    def score(self, rev_ids, caches=None, cache=None):
        if isinstance(rev_ids, int):
            rev_ids = [rev_ids]

        batches = batch_rev_caches(chunked(rev_ids, self.batch_size), caches,
                                   cache)

        for batch_scores in self.scores_ex.map(self._score_batch, batches):
            for score in batch_scores:
                yield score

    def _score_batch(self, batch_rev_cache):
        id_batch, caches, cache = batch_rev_cache
        logger.debug("running _score_batch() on {0} rev_ids"
                     .format(len(id_batch)))
        error_values = self.extractor.extract(
            id_batch, self.root_datasources, caches=caches, cache=cache)
        e_r_caches = self._group_error_root_caches(
            id_batch, error_values, caches, cache)

        rev_scores = self.process_ex.map(self._process_score, e_r_caches)
        return list(rev_scores)

    def _group_error_root_caches(self, id_batch, error_values, caches, cache):
        for rev_id, (error, vals) in zip(id_batch, error_values):
            if error:
                score_cache = {}
                scoring_model = None
                extractor = None
            else:
                score_cache = {}
                score_cache.update(cache or {})
                score_cache.update((caches or {}).get(rev_id, {}))
                score_cache.update({rd: rv for rd, rv in
                                    zip(self.root_datasources, vals)})
                scoring_model = self.scoring_model
                extractor = self.extractor

            yield (rev_id, scoring_model, extractor, score_cache, error)

    @classmethod
    def _process_score(cls, e_r_caches):
        rev_id, scoring_model, extractor, cache, error = e_r_caches
        logger.debug("running _process_score() on {0}".format(rev_id))

        if error is None:

            try:
                feature_values = list(extractor.solve(
                    scoring_model.features, cache=cache))
            except Exception as error:
                logger.debug("An error occured during feature extraction")
                raise error
                return rev_id, error_score(error)

            try:
                score = scoring_model.score(feature_values)
                return rev_id, score
            except Exception as error:
                logger.debug("An error occured during scoring")
                return rev_id, error_score(error)
        else:
            return rev_id, error_score(error)