def process_folder(self):
     if self.in_folder.endswith(".txt"):
         paths_list = read_file(self.in_folder)
     else:
         paths_list, _ = traverse_dir_files(self.in_folder)
     print('[Info] 样本数: {}'.format(len(paths_list)))
     random.seed(47)
     random.shuffle(paths_list)
     if len(paths_list) > self.max_num:
         paths_list = paths_list[:self.max_num]
     print('[Info] 样本数: {}'.format(len(paths_list)))
     time_str = get_current_time_str()
     out_file = os.path.join(self.out_folder, "val_{}.txt".format(time_str))
     out_html = os.path.join(self.out_folder,
                             "val_{}.html".format(time_str))
     pool = Pool(processes=100)
     for img_idx, img_path in enumerate(paths_list):
         if "rotation_datasets_hardcase" in img_path:  # 过滤hardcase
             continue
         # ServiceTester.process_img_path(img_idx, img_path, self.service, out_file)
         pool.apply_async(ServiceTester.process_img_path,
                          (img_idx, img_path, self.service, out_file))
     pool.close()
     pool.join()
     print('[Info] 处理完成: {}'.format(out_file))
     data_lines = read_file(out_file)
     print('[Info] 正确率: {}'.format(
         safe_div(len(paths_list) - len(data_lines), len(paths_list))))
     out_list = []
     for data_line in data_lines:
         items = data_line.split("\t")
         out_list.append(items)
     make_html_page(out_html, out_list)
     print('[Info] 处理完成: {}'.format(out_html))
    def filter_checked_urls(self):
        in_dir = os.path.join(DATA_DIR, 'datasets_v4_checked_r_txt')
        out_dir = os.path.join(
            DATA_DIR,
            'datasets_v4_checked_r_urls_out_{}'.format(get_current_time_str()))
        mkdir_if_not_exist(out_dir)

        paths_list, names_list = traverse_dir_files(in_dir)
        pool = Pool(processes=80)

        idx = 0
        for in_path, in_name in zip(paths_list, names_list):
            out_error_path = os.path.join(out_dir,
                                          '{}.error.txt'.format(in_name))
            out_right_path = os.path.join(out_dir,
                                          '{}.right.txt'.format(in_name))
            print('[Info] out_file: {} - {}'.format(out_error_path,
                                                    out_right_path))
            data_lines = read_file(in_path)
            print('[Info] 文本数量: {}'.format(len(data_lines)))
            for data_line in data_lines:
                url = data_line
                pool.apply_async(DatasetFilter.check_url,
                                 (idx, url, out_error_path, out_right_path))
                idx += 1
                if idx % 1000 == 0:
                    print('[Info] idx: {}'.format(idx))

        pool.close()
        pool.join()
        print('[Info] 处理完成: {}'.format(out_dir))
Beispiel #3
0
def run(num, method, args):
    p = Pool(num)
    for arg in args:
        p.apply_async(method, args=(arg, ))
    p.close()
    p.join()
    p.terminate()
Beispiel #4
0
class Mobile(Device):
    def __init__(self, device_id, address):
        super().__init__(device_id)
        self.address = address
        self.p = Pool(6)
        self.capture = None
        self.pre_time = time.time() * 1000

    def tap_button(self, button):
        cmd = "adb  -s {:s} shell input tap {:d} {:d}".format(
            self.device_id, button[0], button[1])
        self.p.apply_async(execute_cmd, args={cmd})

    def swipe(self, action):
        cmd = "adb -s {:s} shell input swipe {:d} {:d} {:d} {:d} 300".format(
            self.device_id, action[0], action[1], action[2], action[3])
        self.p.apply_async(execute_cmd, args={cmd})

    def get_frame(self):
        if self.capture is None:
            self.capture = cv2.VideoCapture(self.address)

        state, img = self.capture.read()
        if state:
            time_mill = time.time() * 1000
            if time_mill - self.pre_time >= 500:
                self.pre_time = time_mill
                return [cv2.resize(img, (540, 960)), 0]
            else:
                return [None, 0]
        else:
            self.capture.release()
            self.capture = None
            time.sleep(10)
            return [None, -1]
Beispiel #5
0
    def process(self):
        data_dir = os.path.join(DATA_DIR, 'labeled_data')
        print('[Info] 数据文件夹: {}'.format(data_dir))
        out_dir = os.path.join(
            DATA_DIR, 'labeled_data_out_{}'.format(get_current_time_str()))
        print('[Info] 输出文件夹: {}'.format(out_dir))
        mkdir_if_not_exist(out_dir)
        paths_list, names_list = traverse_dir_files(data_dir)
        print('[Info] 文件数: {}'.format(len(paths_list)))
        out_file_format = os.path.join(out_dir, 'labeled_data_imgs_{}.txt')
        pool = Pool(processes=80)
        for path, name in zip(paths_list, names_list):
            name = name.split(".")[0]
            out_file = out_file_format.format(name)
            print('[Info] 输出文件: {}'.format(out_file))
            data_lines = read_file(path)
            for idx, data_line in enumerate(data_lines):
                if idx == 0:
                    continue
                # DataPrelabeled.process_line(out_file, idx, data_line)
                pool.apply_async(DataPrelabeled.process_line,
                                 (out_file, idx, data_line))
        pool.close()
        pool.join()

        print('[Info] 处理完成: {}'.format(out_dir))
Beispiel #6
0
def get_img_href():
    """
    与用户交互获取文件保存的路径, 使用多进程-多线程,下载图片
    :return: 图片保存情况
    """
    url = "http://www.cct58.com/mneinv/1.html"
    # head = main_head()

    save_path = input("文件保存在: ")

    # 根据电脑CPU个数, 创建多进程
    os.cpu_count()
    # pool = Pool(os.cpu_count())
    pool = Pool(10)

    # 遍历获取每一页的url链接
    for href in get_every_page(url, main_head(), proxy):
        try:
            for person_mx27, title in get_every_person(href, main_head(),
                                                       proxy):
                # print(person_mx27, title)     http://www.cct58.com/mneinv/19497/mx27/  夏夏

                # downloads(person_mx27, head, proxy,save_path)
                pool.apply_async(downloads,
                                 args=(person_mx27, main_head(), proxy,
                                       save_path))
        except:
            continue
    pool.close()
    pool.join()
Beispiel #7
0
def main(original_dir, output_dir, crf, thread_count=None, overwrite=False):
    """
    Compresses every MP4 video in a directory using the H.264 codec.

    Args:
        original_dir: Directory containing original videos.
        output_dir: Directory to output new videos to.  Will be created if
            it does not exist.
        crf: Constant rate factor, i.e. the amount of compressing to perform.
            A higher number means higher compression.
        thread_count: Amount of threads to use.  Set to None automatically
            choose a number of threads.
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    videos = get_videos(original_dir, output_dir, overwrite)

    print('Compressing {} videos...'.format(len(videos)))
    pool = Pool(thread_count)
    for video, output in videos:
        pool.apply_async(compress_worker, (video, output, crf))

    pool.close()
    pool.join()

    print('Done')
Beispiel #8
0
 def check_all(self):
     func = self.check
     pool = Pool()
     for token in self.all():
         pool.apply_async(func, args=(token, ))
     pool.join()
     pool.close()
Beispiel #9
0
    def process_folder(self, in_dir, out_dir):
        """
        处理文件夹
        """
        print('[Info] in_dir: {}'.format(in_dir))
        print('[Info] out_dir: {}'.format(out_dir))
        mkdir_if_not_exist(out_dir)

        paths_list, names_list = traverse_dir_files(in_dir)
        print('[Info] 待处理文件数量: {}'.format(len(paths_list)))

        random.seed(47)
        paths_list, names_list = shuffle_two_list(paths_list, names_list)

        n_prc = 40
        pool = Pool(processes=n_prc)  # 多线程下载

        for idx, (path, name) in enumerate(zip(paths_list, names_list)):
            pool.apply_async(DataProcessor.process_img,
                             args=(path, name, out_dir))
            # DataProcessor.process_img(path, name, out_dir)
            if (idx + 1) % 1000 == 0:
                print('[Info] num: {}'.format(idx + 1))

        # 多进程逻辑
        pool.close()
        pool.join()

        print('[Info] 处理完成! {}'.format(out_dir))
        return
class QuantDslApplicationWithMultiprocessing(QuantDslApplication):

    def __init__(self, num_workers=None, call_evaluation_queue=None, **kwargs):
        if num_workers is not None:
            assert call_evaluation_queue is None
            # Parent.
            self.pool = Pool(processes=num_workers)
            self.manager = Manager()
            self.call_evaluation_queue = self.manager.Queue()

        else:
            # Child.
            self.pool = None
        super(QuantDslApplicationWithMultiprocessing, self).__init__(call_evaluation_queue=call_evaluation_queue, **kwargs)

        if self.pool:
            # Start worker pool.
            app_kwargs = self.get_subprocess_application_args()
            args = (self.manager.Lock(), self.__class__, app_kwargs)
            for i in range(num_workers):
                self.pool.apply_async(loop_on_evaluation_queue, args)

    def get_subprocess_application_args(self):
        app_kwargs = dict(
            call_evaluation_queue=self.call_evaluation_queue,
        )
        return app_kwargs

    def close(self):
        super(QuantDslApplicationWithMultiprocessing, self).close()
        if self.pool:
            self.pool.terminate()
    def download_right_angle(self):
        files_dir = os.path.join(ROOT_DIR, '..', 'datasets',
                                 '2020_11_26_vpf_right')
        paths_list, names_list = traverse_dir_files(files_dir)

        pool = Pool(processes=80)
        for path, name in zip(paths_list, names_list):
            name_x = name.split('.')[0]
            urls_file = os.path.join(ROOT_DIR, '..', 'datasets',
                                     '2020_11_26_vpf_right',
                                     '{}.txt'.format(name_x))  # 输入
            out_dir = os.path.join(ROOT_DIR, '..', 'datasets',
                                   'datasets_v4_checked', 'vpf_right',
                                   name_x)  # 输出
            mkdir_if_not_exist(out_dir)

            data_lines = read_file(urls_file)

            for idx, data_line in enumerate(data_lines):
                url, angle = data_line.split(',')
                pool.apply_async(DatasetFilter.process_img_angle,
                                 (idx, url, angle, out_dir))

        pool.close()
        pool.join()
        print('[Info] 处理完成: {}'.format(files_dir))
Beispiel #12
0
def work(host, port, processes, threads, times):
    pool = Pool(processes,
                lambda: signal.signal(signal.SIGINT, signal.SIG_IGN))
    p = Process(target=progress)
    p.daemon = True

    start = time.time()

    try:
        for chunk in divide(times, processes):
            pool.apply_async(thread, (host, port, threads, chunk))

        p.start()

        pool.close()
        pool.join()
        p.terminate()
        p.join()

    except KeyboardInterrupt:
        pool.terminate()
        p.terminate()
        p.join()
        pool.join()

    return time.time() - start
    def process(self):
        name = "nat_main_all_20211020"
        file_path = os.path.join(DATA_DIR, "{}.txt".format(name))
        print("[Info] 输入文件: {}".format(file_path))
        out_file_path = os.path.join(
            DATA_DIR, "{}.out.{}.txt".format(name, get_current_time_str()))
        out_html_path = os.path.join(DATA_DIR, "{}.out.html".format(name))
        data_lines = read_file(file_path)
        random.seed(47)
        random.shuffle(data_lines)
        # data_lines = data_lines[:1000]
        print('[Info] 样本数: {}'.format(len(data_lines)))
        pool = Pool(processes=20)
        for data_idx, data_line in enumerate(data_lines):
            # RoiChecker.process_line_roi(data_idx, data_line, out_file_path)
            pool.apply_async(RoiChecker.process_line_roi,
                             (data_idx, data_line, out_file_path))
        pool.close()
        pool.join()
        print('[Info] 写入完成: {}'.format(out_file_path))

        data_lines = read_file(out_file_path)
        items_list = []
        for data_line in data_lines:
            items_list.append(data_line.split("\t"))
        make_html_page(out_html_path, items_list)
        print('[Info] 写入完成: {}'.format(out_html_path))
Beispiel #14
0
class _MultiExecutor(_Executor):
    """Execute functions async in a process pool"""

    def __init__(self):
        super(_MultiExecutor, self).__init__()
        self._children = 0
        self.pool = Pool()

    def _collector(self, result):
        super(_MultiExecutor, self)._collector(result)
        self._children -= 1

    def execute(self, func, args):
        self._children += 1
        self.pool.apply_async(func, args, callback=self._collector)

    def wait_for_results(self):
        self.pool.close()
        # One would have hoped joining the pool would take care of this, but
        # apparently you need to first make sure that all your launched tasks
        # has returned their results properly, before calling join, or you
        # risk a deadlock.
        while self._children > 0:
            time.sleep(0.001)
        self.pool.join()
Beispiel #15
0
def run():
    p = Pool(5)  # 最多执行5个进程,打印5个数
    for i in range(10000):
        p.apply_async(fun, args=(i,))
    p.close()
    p.join()  # 等待所有子进程结束,再往后执行
    print("end")
    def process_v2(self):
        val_folder = os.path.join(ROOT_DIR, '..', 'datasets', 'datasets_val')
        data_lines, _ = traverse_dir_files(val_folder)
        print('[Info] 样本数: {}'.format(len(data_lines)))
        type_name = "val"

        folder_name = "dataset_{}_{}".format(type_name, len(data_lines))
        dataset_folder = os.path.join(self.out_ds_folder, folder_name)
        mkdir_if_not_exist(dataset_folder)
        print('[Info] 输出文件夹路径: {}'.format(dataset_folder))

        mkdir_if_not_exist(self.out_files_folder)
        out_path_file = os.path.join(self.out_files_folder, "{}.txt".format(folder_name))
        print('[Info] 输出文件路径: {}'.format(out_path_file))

        pool = Pool(processes=100)
        for data_idx, data_line in enumerate(data_lines):
            pool.apply_async(
                DatasetReorder.copy_line_mul, (data_idx, data_line, type_name, dataset_folder, out_path_file))

        pool.close()
        pool.join()
        path_list = read_file(out_path_file)
        print('[Info] 输出路径: {}, 样本数: {}'.format(len(path_list), len(data_lines)))
        print('[Info] 处理完成: {}'.format(out_path_file))
    def process(self):
        data_dir = os.path.join(DATA_DIR, '2020_12_23')
        print('[Info] 数据文件: {}'.format(data_dir))

        out_file = os.path.join(DATA_DIR, '2020_12_23.txt')
        print('[Info] 写出文件: {}'.format(out_file))

        paths_list, names_list = traverse_dir_files(data_dir)
        print('[Info] 文件数: {}'.format(len(paths_list)))

        data_lines = []
        for path, name in zip(paths_list, names_list):
            sub_lines = read_file(path)
            data_lines += sub_lines
        print('[Info] 文本行数: {}'.format(len(data_lines)))

        random.seed(47)
        random.shuffle(data_lines)

        pool = Pool(processes=80)
        for idx, data_line in enumerate(data_lines):
            # DataPreprocess.process_line(idx, data_line, out_file)
            pool.apply_async(DataPreprocess.process_line, (idx, data_line, out_file))

        pool.close()
        pool.join()
        print('[Info] 处理完成: {}'.format(out_file))
Beispiel #18
0
def write_sub_bam(chrom_list, used_bam_file_tmp, exclude_bam_file_tmp, out_dir,
                  total_modify_reads_file, total_delete_reads_file,
                  total_add_reads_file, process):
    write_bam_pool = Pool(int(process))
    exclude_bam_list = [exclude_bam_file_tmp]
    usedBamList = []
    for chrom in chrom_list:
        excludeBam_chr = "%s/exclude_%s.bam" % (out_dir, chrom)
        exclude_bam_list.append(excludeBam_chr)
        usedBam_chr = "%s/used_%s.bam" % (out_dir, chrom)
        usedBamList.append(usedBam_chr)

        write_bam_pool.apply_async(
            write_bam_byChr,
            args=(used_bam_file_tmp, chrom, excludeBam_chr, usedBam_chr,
                  total_modify_reads_file, total_delete_reads_file,
                  total_add_reads_file))
    write_bam_pool.close()
    write_bam_pool.join()

    exclude_bam_file = os.path.join(out_dir, "exclude.bam")
    bamMerge(exclude_bam_list, exclude_bam_file)
    used_bam_file = os.path.join(out_dir, "used.bam")
    if len(usedBamList) != 1:
        bamMerge(usedBamList, used_bam_file)
    else:
        used_bam_file = usedBamList[0]

    bamSort(used_bam_file, os.path.join(out_dir, "used.sort"))
    used_sort_bam_file = os.path.join(out_dir, "used.sort.bam")
    bamIndex(used_sort_bam_file)
    return used_sort_bam_file, exclude_bam_file
Beispiel #19
0
def insert_from_log(retry=3, docs=None):
    """
    retry: 循环次数,一次循环中如果有失败的任务则会开启下一次循环,循环数-1,直到循环数为0。循环数设为负数表示循环直到所有任务完成。
    """
    from multiprocessing.pool import Pool
    import multiprocessing
    retry -= 1
    if docs is None:
        docs = find()

    pool = Pool(None, initializer, ({name: globals()[name] for name in VARIABLES},))

    for doc in docs:
        pool.apply_async(handle_doc, kwds=doc, error_callback=on_error)
    
    try:
        pool.close()
        pool.join()
    except KeyboardInterrupt:
        pool.terminate()
        return 
    

    docs = list(find())
    logging.warning("Unfilled: %s", len(docs))

    if retry:
        logging.warning("retry | %s", retry)
        if docs:
            insert_from_log(retry, docs)
Beispiel #20
0
def read_group_posts(history_files, group_dict):
    """
    Each user is represented by a list of strings which are their posts.
    The posts go through preprocessing.
    :param history_files: a list of paths to group Twitter history file
    :param group_dict: group dictionary to insert the preprocessed posts to
    :return: List of lists representing a list of user posts
    """
    posts = manager.list()

    def update(*args):
        pbar.update()

    last_index = 0
    for hist_file in history_files:
        with open(hist_file) as f:
            hist = json.load(f)
        pool = Pool(processes=6,
                    initializer=init,
                    initargs=(posts, group_dict))
        indices = range(last_index, last_index + len(hist))
        pbar = tqdm(zip(indices, hist.items()),
                    total=len(hist.items()),
                    desc='Reading {}'.format(hist_file))
        for item in zip(indices, hist.items()):
            pool.apply_async(get_posts, args=(item, ), callback=update)
        last_index = item[0] + 1
        pool.close()
        pool.join()
        pbar.close()
    return posts
Beispiel #21
0
def main():
    args = dict(enumerate(sys.argv))
    day = args.get(1) or datetime.now().strftime("%Y%m%d")
    if len(day) != 8:
        raise Exception
    print('date={0}'.format(day))
    print('Start dumper...')

    goods = 'goods_{0}'.format(day)
    new = 'new_goods_{0}'.format(day)
    hot = 'hot_goods_{0}'.format(day)
    shop = 'shop_statistics_{0}'.format(day)
    detail = 'd_{0}'.format(day)
    monitor = 'm_{0}'.format(day)

    # todo 保存目录为可选项
    # todo 保存目录为 linux windows 兼容

    pool = Pool()
    pool.apply_async(func=dump_mongodb, args=(detail, ))
    pool.apply_async(func=dump_mongodb, args=(monitor, ))
    pool.apply_async(func=dump_mysql, args=(goods, ))
    pool.apply_async(func=dump_mysql, args=(new, ))
    pool.apply_async(func=dump_mysql, args=(hot, ))
    pool.apply_async(func=dump_mysql, args=(shop, ))
    pool.close()
    pool.join()
Beispiel #22
0
class RVWorkerPool(object):
    def __init__(self, processes=None):
        self.pool = Pool(processes)
        self.pipe_target = os.cpu_count()
        self.in_pipe = 0
        self.jobs = []

    def registerJob(self, func, args, ref=None, priority=1):
        job = JobRef(func, args, ref, priority)
        # Replace duplicate-source unstarted jobs
        if ref is not None:
            self.jobs = [j for j in self.jobs if j.ref != ref or j.started()]
        self.jobs.append(job)
        if self.in_pipe < self.pipe_target:
            job.jobStarted(self.pool.apply_async(func, args))
            self.in_pipe += 1
        return job

    def poll(self):
        done = []
        sorted_jobs = sorted(self.jobs, key=lambda j: j.priority)
        for job in sorted_jobs:
            if job.started():
                # Check if it's finished
                if job.ready():
                    self.jobs.remove(job)
                    self.in_pipe -= 1
            else:
                if self.in_pipe < self.pipe_target:
                    job.jobStarted(self.pool.apply_async(job.func, job.args))
                    self.in_pipe += 1

    def terminate(self):
        self.pool.terminate()
Beispiel #23
0
def diff_pro(dbname, tablename):
    "difference between slave and master "
    if is_sigint_up:
        os._exit(0)
    pool = Pool(processes=2)
    result = {}
    result['slave'] = pool.apply_async(table_count,
                                       args=(myhost, myport, myuser,
                                             mypassword, dbname, tablename))
    result['master'] = pool.apply_async(table_count,
                                        args=(mahost, maport, mauser,
                                              mapassword, dbname, tablename))
    pool.close()
    pool.join()
    slcount = result['slave'].get()['count']
    slstart = result['slave'].get()['start']
    slend = result['slave'].get()['end']
    slengine = result['slave'].get()['engine']
    sldelay = result['slave'].get()['delay']
    macount = result['master'].get()['count']
    mastart = result['master'].get()['start']
    maend = result['master'].get()['end']
    maengine = result['master'].get()['engine']
    dbtable = '%s.%s' % (dbname, tablename)
    if slcount == -2 or macount == -2:
        mark = 'error'
    elif slcount == -1 or macount == -1:
        mark = '*'
    else:
        mark = macount - slcount
    msg = "%-40s |%-9s %-9s|%-8s %-8s |%-8s %-8s |%-7s|%-15s %-15s |%-10s" % (
        dbtable, maengine, slengine, mastart, slstart, maend, slend, sldelay,
        macount, slcount, mark)
    print msg
    result.clear()
Beispiel #24
0
def compress_batch(input_folder, output_folder, target_compression, recursive=True, threads=1, follow_symlinks=True):
    # We require an absolute input path to we can replicate the data structure relative to it later on
    input_folder = os.path.abspath(input_folder)

    file_list = get_fast5_file_list(input_folder, recursive, follow_symlinks=follow_symlinks)
    if len(file_list) == 0:
        raise ValueError("No input fast5 files found in '{}'. Recursive={}".format(input_folder, recursive))

    # Set up the process pool and the progressbar
    pool = Pool(min(threads, len(file_list)))
    pbar = get_progress_bar(len(file_list))

    def update(result):
        pbar.update(pbar.currval + 1)

    for input_file in file_list:
        input_path = os.path.join(input_folder, input_file)
        output_path = os.path.join(output_folder, os.path.relpath(input_path, input_folder))

        pool.apply_async(func=compress_file,
                         args=(input_path, output_path, target_compression),
                         callback=update)

    # Tear down the process pool and pbar. We can't use contextmanagers since we need to close() then join()
    pool.close()
    pool.join()
    pbar.finish()
Beispiel #25
0
def test_queue_and_thread():
    strategy_name = parse_args()
    manager = Manager()

    in_queue = manager.Queue()
    out_queue = manager.Queue()

    # monitor([in_queue, out_queue])

    pool = Pool(processes=3)

    results = [
        pool.apply_async(start_engine, (in_queue, out_queue, strategy_name)),
        pool.apply_async(start_feed, (in_queue, )),
        pool.apply_async(start_executor, (out_queue, ))
    ]

    try:
        for r in results:
            r.get()

    except KeyboardInterrupt:
        print("Keybord interruption in main thread")

    finally:
        print("Cleaning main thread")
Beispiel #26
0
    def fill_views(self):
        client = Client('hscic')

        pool = Pool(processes=len(self.view_paths))
        tables = []

        prescribing_date = ImportLog.objects.latest_in_category(
            'prescribing').current_at.strftime('%Y-%m-%d')

        for path in self.view_paths:
            table_name = "vw__%s" % os.path.basename(path).replace('.sql', '')
            table = client.get_table(table_name)
            tables.append(table)

            with open(path) as f:
                sql = f.read()

            substitutions = {'this_month': prescribing_date}
            args = [table.name, sql, substitutions]
            pool.apply_async(query_and_export, args)

        pool.close()
        pool.join()  # wait for all worker processes to exit

        for table in tables:
            self.download_and_import(table)
            self.log("-------------")
Beispiel #27
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--boosted', action='store_true')
    args = parser.parse_args()

    boosted = args.boosted

    if boosted:
        pool = Pool(16)
        session, old_ip = renew_connection('')
        old_ip = ''
    else:
        session = new_session()

    req = Request('GET', CURLIE)
    prepared_req = session.prepare_request(req)

    resp = session.send(prepared_req, timeout=12)

    soup = BeautifulSoup(resp.text, 'html.parser')

    print('id,parent_id,name')

    cats = soup.select('.top-cat > a')
    for cat in cats:
        print('"' + cat['href'] + '","","' + cat.text + '"')
        if boosted:
            session, old_ip = renew_connection(old_ip)
            pool.apply_async(get_nodes, args=[cat['href'], '- ', session])
        else:
            get_nodes(cat['href'], session)
            time.sleep(1)
    if boosted:
        pool.close()
        pool.join()
def download_imgs_for_mp(img_file, out_folder, prefix=None, n_prc=10):
    """
    多线程下载
    :param img_file: 图片文件
    :param out_folder: 输出文件夹
    :param prefix: 图片前缀
    :param n_prc: 进程数, 默认40个
    :return: None
    """
    print_info('进程总数: %s' % n_prc)
    pool = Pool(processes=n_prc)  # 多线程下载
    paths_list = read_file(img_file)
    print_info('文件数: %s' % len(paths_list))

    _, imgs_names = traverse_dir_files(out_folder)

    for (index, path) in enumerate(paths_list):
        if prefix:
            pool.apply_async(download_img, (path, out_folder, imgs_names, prefix + '_' + str(index) + '.jpg'))
        else:
            pool.apply_async(download_img, (path, out_folder, imgs_names))

    pool.close()
    pool.join()

    # _, imgs_names = traverse_dir_files(out_folder)
    # print_info('图片总数: %s' % len(imgs_names))
    print_info('全部下载完成')
    def merge_hardcase(self, file_list, type_name):
        data_lines = []
        for file_name in file_list:
            file_path = os.path.join(self.folder, file_name)
            sub_lines = read_file(file_path)
            data_lines += sub_lines
        print('[Info] 样本行数: {}'.format(len(data_lines)))

        folder_name = "dataset_{}_{}".format(type_name, len(data_lines))
        dataset_folder = os.path.join(self.out_ds_folder, folder_name)
        mkdir_if_not_exist(dataset_folder)
        print('[Info] 输出文件夹路径: {}'.format(dataset_folder))

        mkdir_if_not_exist(self.out_files_folder)
        out_path_file = os.path.join(self.out_files_folder, "{}.txt".format(folder_name))
        print('[Info] 输出文件路径: {}'.format(out_path_file))

        pool = Pool(processes=100)
        for data_idx, data_line in enumerate(data_lines):
            pool.apply_async(
                DatasetReorder.copy_line_mul, (data_idx, data_line, type_name, dataset_folder, out_path_file))
        pool.close()
        pool.join()
        path_list = read_file(out_path_file)
        print('[Info] 输出路径: {}, 样本数: {}'.format(len(path_list), len(data_lines)))
        print('[Info] 处理完成: {}'.format(out_path_file))
Beispiel #30
0
def process_pool():
    pool = Pool(processes=3)
    for i in range(6):
        pool.apply_async(func)

    pool.close()
    pool.join()
    def fill_views(self):
        client = Client('hscic')

        pool = Pool(processes=len(self.view_paths))
        tables = []

        prescribing_date = ImportLog.objects.latest_in_category(
            'prescribing').current_at.strftime('%Y-%m-%d')

        for path in self.view_paths:
            table_name = "vw__%s" % os.path.basename(path).replace('.sql', '')
            table = client.get_table(table_name)
            tables.append(table)

            with open(path) as f:
                sql = f.read()

            substitutions = {'this_month': prescribing_date}
            args = [table.table_id, sql, substitutions]
            pool.apply_async(query_and_export, args)

        pool.close()
        pool.join()  # wait for all worker processes to exit

        for table in tables:
            self.download_and_import(table)
            self.log("-------------")

        with connection.cursor() as cursor:
            for view_id in self.materialized_views:
                # This is quite slow! up to 10 mins.
                cursor.execute("REFRESH MATERIALIZED VIEW %s" % view_id)
Beispiel #32
0
def work(host, port, processes, threads, times):
    pool = Pool(processes,
                lambda: signal.signal(signal.SIGINT, signal.SIG_IGN))
    p = Process(target=progress)
    p.daemon = True

    start = time.time()

    try:
        for chunk in divide(times, processes):
            pool.apply_async(thread, (host, port, threads, chunk))

        p.start()

        pool.close()
        pool.join()
        p.terminate()
        p.join()

    except KeyboardInterrupt:
        pool.terminate()
        p.terminate()
        p.join()
        pool.join()

    return time.time() - start
Beispiel #33
0
def main():
    print('Process (%s) start...' % os.getpid())
    p = Pool()
    for i in range(4):
        p.apply_async(long_time_task, args=(i,))
    print('Waiting for all subprocesses done...')
    p.close()
    p.join()
    print('All subprocesses done.')
Beispiel #34
0
class TcpController(object):
    def __init__(self,handlers):
        self.handlers=handlers
        self.workers=Pool(MAX_PROCESS_POOL_SIZE)

    def process(self,header,body):
        self.workers.apply_async(wrap,(self.handlers[header](),body,))

    def destroy(self):
        self.handlers=None
        self.workers.close()
Beispiel #35
0
    def _get(self, args):
        draft_id = args[0]
        id = args[1] if len(args) > 1 else None

        q = self.db.query(Player)
        if id is not None:
            player = q.filter(Player.id == int(id)).first()
            team = self.db.query(Team).filter(and_(Team.is_owner == True,
                                                   Team.draft_id == draft_id)).first()

            available_players = self.db.query(Player).join(Player.core).filter(and_(PlayerCore.rank != None,
                                                                                    PlayerCore.target_price != None,
                                                                                    PlayerCore.points > 0,
                                                                                    Player.draft_id == draft_id,
                                                                                    Player.team_id == None,
                                                                                    Player.id != player.id)).order_by(PlayerCore.rank).all()

            min_price = 1
            max_price = min(player.core.target_price + 21, team.money)
            manager = Manager()
            max_starters_points = manager.dict()
            max_bench_points = manager.dict()
            pool = Pool(processes=8)
            starters, bench = get_starters_and_bench(self.db, team.id)
            max_starters_points[0] = optimizer.optimize_roster(starters, available_players, team.money - (constants.BENCH_SIZE - len(bench)))[1]
            for m in range(min_price, 10):
                pool.apply_async(wrap_optimizer, args=(starters, available_players, team.money - m - (constants.BENCH_SIZE - len(bench)) + 1, max_bench_points, m))

            full_starters = True
            for s in starters:
                if s is None:
                    full_starters = False
            if not full_starters:
                starters_clone = list(starters)
                bench_clone = list(bench)
                place_player(player, starters_clone, bench_clone)
                for m in range(min_price, max_price):
                    pool.apply_async(wrap_optimizer, args=(starters_clone, available_players, team.money - m - (constants.BENCH_SIZE - len(bench_clone)), max_starters_points, m))

            pool.close()
            pool.join()

            ret = player.to_dict(['core'])
            ret['max_starters_points'] = dict(max_starters_points)
            ret['max_bench_points'] = dict(max_bench_points)

            return ret
        else:
            players = q.join(PlayerCore).filter(and_(Player.draft_id == int(draft_id),
                                                     PlayerCore.rank != None,
                                                     PlayerCore.target_price != None)).all()
            return {'players': [p.to_dict(['core']) for p in players]}
Beispiel #36
0
def main():
	"""
		Build all the models. Spin off a new process for each participant
		because the ANN library is not multithreaded. Process is used instead
		of thread to leverage multiple cores.
	"""
	parser = ArgumentParser()
	parser.add_argument("inputFilename")
	parser.add_argument("outputDirectory")
	
	args = parser.parse_args()
	inputFilename = args.inputFilename
	outputDirectory = args.outputDirectory
	
	data = pickle.load( open(inputFilename, 'rb') )
	
	tasks = [ 'matb', 'rantask' ]
	participantIds = [ '001', '002', '003', '004', '005', '006', '007' ]
	
	# Cut off first row header for each data set
	for task in tasks:
		for participantId in participantIds:
			data[participantId][task] = data[participantId][task][1:] 
			
	splits = performSplit( data )
	
	# Record start time so that the elapsed time can be determined
	start_time = time.time()
	
	# Create a multicore processing pool with 7 processes ( 7 so that one core stays free
	# for system processes )
	pool = Pool( processes = 7 )
	
	# Build models for participants in a task
	for task in tasks:
		for participantId in participantIds:
			outputFilename = path.join( outputDirectory, 'testingOn-' + participantId + '-' + task + '.txt' )
			
			# Spin off a process for the building
			pool.apply_async( tuneANN, ( splits[participantId][task], outputFilename ) )
			
	# Close down the pool so that we can wait on all the processes
	pool.close()
	pool.join()
	
	# Calculate and print the elapsed time
	elapsed_time = time.time() - start_time
	print( "Elapsed time: " + str(elapsed_time) )
Beispiel #37
0
def stat_volume(stime,etime):
    tgsinfo = read_tgs_info()

    # from multiprocessing.dummy import Pool as ThreadPool
    from multiprocessing.pool import Pool

    pool = Pool()
    volume = [pool.apply_async(stat_tgs_volume,args=(stime,etime,int(cid))) for cid in tgsinfo.keys()]
    pool.close()

    print 'waiting to join....'
    pool.join()

    print 'start to writing to file...'

    volume0 = []
    for i,elem in enumerate(volume):
        volume0.append((tgsinfo.keys()[i], elem.get()))
    volume0.sort(key=lambda x:x[1], reverse=True)

    total = 0
    with open(os.path.join(root_dir, "result", "volume.txt"),"w") as f:
        for i,elem in enumerate(volume0):
            # cid = tgsinfo.keys()[i]
            # vol = elem.get()
            total += elem[1]

            line = "%5s,%s: %d\n" % (elem[0], tgsinfo[elem[0]]['kkmc'], elem[1])
            f.write(line)

    print 'totally %d records.' % (total)
Beispiel #38
0
def manager_process(dir_queue, file_queue, out_queue):
    """Dispatches and manages path and scanning workers.

    """
    pool = Pool(options.num_threads)
    atexit.register(at_exit_manager, pool)
    logging.info('Gathering Files...')
    pool.apply(explore_path, (dir_queue, file_queue))
    logging.info('Files gathered. Scanning %s files...', file_queue.qsize())
    logging.info('Starting %s scan processes...', options.num_threads)
    print '~' * 80
    thread.start_new_thread(print_status, (file_queue,))
    for _ in range(options.num_threads):
        pool.apply_async(parallel_scan, (file_queue, out_queue))
    pool.close()
    pool.join()
    out_queue.put(StopIteration)
def run_jar_new_thread(config_path, **kwargs):
    """
    kwargs - arguments dictionary of run_jar()
    """
    pool = Pool(processes=1)
    __import__("ipdb").set_trace()
    result = pool.apply_async(_run_jar_with_config, [config_path], kwargs['kwargs'])
    job_id = result.get()
    return job_id
Beispiel #40
0
 def run(self):
     if(self.fileName[0] != None and self.fileName[0] !=""):
         # p = Process(target=self.creationPDF, args=(self.fileName[0],))
         # self.creationPDF(self.fileName[0])
         # p.start()
         pool = Pool(processes=4)  # start 4 worker processes
         result = pool.apply_async(self.creationPDF, [self.fileName[0]])
     else:
         print("Sauvegarde annule")
Beispiel #41
0
def multiprocess_all_chromosomes(func, cls, *args, **kwargs):
    '''
    Convenience method for splitting up queries based on tag id.
    '''
    processes = current_settings.ALLOWED_PROCESSES

    set_chromosome_lists(cls, use_table=kwargs.get('use_table', None))
    p = Pool(processes)

    try:
        for chr_list in current_settings.CHR_LISTS:
            p.apply_async(func, args=[cls, chr_list, ] + list(args))
        p.close()
        p.join()
    except Exception as e:
        print('Terminating pool.')
        p.terminate()
        raise e
Beispiel #42
0
def create_execution_pool():
    global execution_pool
    pool_size = engine.app.config['POOL_SIZE']
    execution_pool = Pool(pool_size, initializer=initialize_worker)

    futures = []
    for i in xrange(pool_size * 2):
        futures.append(execution_pool.apply_async(connect_worker))

    [f.get() for f in futures]
Beispiel #43
0
def main():
    queue_logger = setup_redirection()
    queue_logger.write("ABCDEF\n")
    try:
        p = Pool(10)
        results = [p.apply_async(some_process_body) for i in xrange(20)]
        [result.get() for result in results]
        p.close()
    finally:
        queue_logger.stop()
Beispiel #44
0
    def _transport_backup_parallel(data, filename, aws_key, aws_secret, bucketname):
        """
        Parallel multipart upload.
        """
        headers = {}
        _logger.info('Backing up via S3 parallel multipart upload agent')
        keyname = filename
        tempInFile = NamedTemporaryFile(suffix='.zip', prefix='db-backup-', delete=False)
        tempInFile.write(data)
        tempInFile.close()
        source_path = tempInFile.name
        source_size = os.stat(source_path).st_size
        parallel_processes = (multiprocessing.cpu_count() * 2) + 1

        conn = boto.connect_s3(aws_key, aws_secret)
        bucket = conn.get_bucket(bucketname)

        mtype = 'application/zip, application/octet-stream'
        headers.update({'Content-Type': mtype})

        mp = bucket.initiate_multipart_upload(keyname, headers=headers)

        bytes_per_chunk = max(int(math.sqrt(5242880) * math.sqrt(source_size)),
                              5242880)
        chunk_amount = int(math.ceil(source_size / float(bytes_per_chunk)))

        pool = Pool(processes=parallel_processes)
        for i in range(chunk_amount):
            offset = i * bytes_per_chunk
            remaining_bytes = source_size - offset
            bytes = min([bytes_per_chunk, remaining_bytes])
            part_num = i + 1
            pool.apply_async(_upload_part, [bucketname, aws_key, aws_secret, mp.id,
                                            part_num, source_path, offset, bytes])
        pool.close()
        pool.join()

        if len(mp.get_all_parts()) == chunk_amount:
            mp.complete_upload()
        else:
            mp.cancel_upload()
        os.unlink(tempInFile.name)
        _logger.info('Data successfully backed up to s3')
Beispiel #45
0
def process(configuration: LogParserConfig, db: DatabaseConnection, process_count: int):
    influxdb_client = get_client(configuration)
    pool = Pool(process_count)
    currently_processing = defaultdict(list)
    dl_dir = os.path.join(db.root_dir, 'data')

    while True:
        logging.info('Checking for new files to process')
        settings = db.get_settings()
        new_files = get_new_files_to_process(configuration.buckets, settings)
        settings.files.extend(new_files)
        timestamp = int(time.time())
        completed_files = defaultdict(list)
        # Empty queue of finished work and create a lists of all completed files per bucket
        while True:
            if finished_queue.empty():
                break
            success, bucket, filename = finished_queue.get()
            currently_processing[bucket].remove(filename)
            if success:
                completed_files[bucket].append(filename)
        # Set processed timestamp on processed files
        for file in settings.files:
            if file.name in completed_files[file.bucket]:
                file.processed_timestamp = timestamp
        logging.info('%d files completed processing since last loop', sum(len(l) for l in completed_files.values()))
        completed_files.clear()
        db.save_settings(settings)
        added = 0
        for file in settings.files:
            if file.processed_timestamp is None and file.name not in currently_processing[file.bucket]:
                currently_processing[file.bucket].append(file.name)
                pool.apply_async(process_file, (file.bucket, file.name, dl_dir, influxdb_client), {}, after_processed,
                                 after_error)
                added += 1
        processing_count = sum(len(l) for l in currently_processing.values())
        if added:
            logging.info('Added %s files to pool, %s files currently processing.', added, processing_count)
        else:
            logging.info('Nothing new to process, sleeping %s seconds. %s files currently in queue to be processed.',
                         configuration.interval, processing_count)
            time.sleep(configuration.interval)
            continue
    def start(self):

        print 'Start Speech Download Service...'

        language = self._language
        download_dir = self.setup_download_dir()

        with open(self._text_file_path, 'r') as f:

            # python 2.7 not supporting context manager with Pool()
            pool = Pool(processes=8)

            progress = Progress(len(f.readlines()))
            f.seek(0)
            results = []

            def update_progress(result):
                results.append(result)
                progress.update(len(results))

            for text in f:
                text = text.rstrip('\n')
                encoded_args = urlencode({
                    'hl': language,
                    'src': text,
                    'key': config.API_KEY
                })

                url = config.SPEECH_URL + encoded_args
                download_path = os.path.join(download_dir, text + config.DOWNLOAD_FILE_TYPE)

                downloader = SpeechDownloader(url, download_path)
                # Pool().apply() blocks until the process is finished
                # pool.apply(downloader.download)
                pool.apply_async(downloader.download, callback=update_progress)

            # Prevents any more tasks from being submitted to the pool
            pool.close()
            # Wait for the worker process to exit
            pool.join()

            print 'Done, Downloaded %d Speeches' % len(results)
Beispiel #47
0
def create_dictionary(filelist, output, tagmap, delimeter = '/'):
    """
    Create a dictionary out of slashtag-based files.

    :param filelist: List of file paths
    :type filelist: list[str]
    :param output: output file path
    :type output: str
    :param tagmap: Optional
    :type tagmap: TagMap
    """
    c = POSEvalDict()

    counts = {'tokens':0, 'lines':0}

    def merge_counts(result):
        d, cur_tokencount, cur_linecount = result
        c.combine(d)
        counts['tokens'] += cur_tokencount
        counts['lines'] += cur_linecount

    tm = None
    if tagmap:
        tm = TagMap(tagmap)

    # Initialize multithreading...
    p = Pool(cpu_count())
    for path in filelist:
        p.apply_async(process_file, args=[path, tm, delimeter], callback=merge_counts)
        # result = p.apply(process_file, args=[path, tm, delimeter])
        # merge_counts(result)

    p.close()
    p.join()


    # Now, dump the pickled POSEvalDict.
    print("Writing out dictionary...", end=' ')
    pickle.dump(c, open(output, 'wb'))
    print("Done.")
    print("{} tokens processed, {} sentences.".format(counts['tokens'], counts['lines']))
Beispiel #48
0
class MultiProcPlugin(DistributedPluginBase):
    """Execute workflow with multiprocessing

    The plugin_args input to run can be used to control the multiprocessing
    execution. Currently supported options are:

    - n_procs : number of processes to use
    - non_daemon : boolean flag to execute as non-daemon processes

    """

    def __init__(self, plugin_args=None):
        super(MultiProcPlugin, self).__init__(plugin_args=plugin_args)
        self._taskresult = {}
        self._taskid = 0
        n_procs = 1
        non_daemon = False
        if plugin_args:
            if 'n_procs' in plugin_args:
                n_procs = plugin_args['n_procs']
            if 'non_daemon' in plugin_args:
                non_daemon = plugin_args['non_daemon']
        if non_daemon:
            # run the execution using the non-daemon pool subclass
            self.pool = NonDaemonPool(processes=n_procs)
        else:
            self.pool = Pool(processes=n_procs)

    def _get_result(self, taskid):
        if taskid not in self._taskresult:
            raise RuntimeError('Multiproc task %d not found'%taskid)
        if not self._taskresult[taskid].ready():
            return None
        return self._taskresult[taskid].get()

    def _submit_job(self, node, updatehash=False):
        self._taskid += 1
        self._taskresult[self._taskid] = self.pool.apply_async(run_node,
                                                               (node, updatehash,))
        return self._taskid

    def _report_crash(self, node, result=None):
        if result and result['traceback']:
            node._result = result['result']
            node._traceback = result['traceback']
            return report_crash(node,
                                traceback=result['traceback'])
        else:
            return report_crash(node)

    def _clear_task(self, taskid):
        del self._taskresult[taskid]
Beispiel #49
0
def fetch_all(page_type, ids, output):
	#Python multithreding mess incoming
	start = time()
	pages = []
	links = build(page_type, ids)
	#Number of worker processes to start
	num_of_proc = 8
	pool = Pool(processes=num_of_proc)
	#Builds the process pool
	results = [pool.apply_async(fetch, (url, page_type[1], output,)) for url in links]
	#Proceeds to run the processes
	for result in results:
		result.get()
	print("Fetched all in %s" %(time() - start))
Beispiel #50
0
def fetch_all(page_type, ids, output):
	#Python multithreding mess incoming
	start = time()
	pages = []
	links = build(page_type, ids)
	#Number of worker processes to start
	num_of_proc = 8
	pool = Pool(processes=num_of_proc)
	#Fetches pages asynchronously
	results = [pool.apply_async(fetch, (url, page_type[1], output,)) for url in links]
	#Appends the fetched pages into the pages list
	for result in results:
		result.get()
	print("Fetched all in %s" %(time() - start))
Beispiel #51
0
    def _save_content(self, key, content, headers):
        print "key.name", key.name
        source_size = os.stat(content.file.name).st_size
        bytes_per_chunk = max(int(math.sqrt(5242880) * math.sqrt(source_size)), 5242880)
        chunk_amount = int(math.ceil(source_size / float(bytes_per_chunk)))
        mp = self.bucket.initiate_multipart_upload(key.name, headers=headers)

        pool = Pool(processes=1)
        for i in range(chunk_amount):
            offset = i * bytes_per_chunk
            remaining_bytes = source_size - offset
            bytes = min([bytes_per_chunk, remaining_bytes])
            part_num = i + 1
            pool.apply_async(_upload_part, [self.bucket_name, self.access_key,
                             self.secret_key, mp.id, part_num,
                             content.file.name, offset, bytes])
        pool.close()
        pool.join()

        if len(mp.get_all_parts()) == chunk_amount:
            mp.complete_upload()
            key = self.bucket.get_key(key.name)
        else:
            mp.cancel_upload()
Beispiel #52
0
def load_from_web():
    print "Loading from Web"

    movies = common.read_json(JSON_IN_FILE)

    pool = Pool(5)
    worker = [pool.apply_async(process_movie, [m]) for m in movies]

    imdb_movies = []
    for w in worker:
        w.wait()
        result = w.get()
        if result is not None:
            imdb_movies.append(w.get())

    common.write_json(JSON_OUT_FILE, imdb_movies)
    def multi_proc5(self, batch):
        start_time = datetime.datetime.now()

        sql = "select count(id) from records"
        count_result = db_connection.execute(sql)

        for row in count_result:
            count = row[0]
            break

        sql = "select id from records"
        result = db_connection.execute(sql)

        record_ids = []
        for idx, row in enumerate(result):
            if (idx % int(count/4) == 0) or (idx == count - 1): #4 because that is how many workers we have
                if idx == 0:
                    some_records = []
                else:
                    record_ids.append(some_records)
                    some_records = []
            some_records.append(row[0])

        input_pool = Pool(4)
        #Add id messages to input queue
        input_pool.map(partial(add_batch_ids_to_queue, batch_size=int(batch)), record_ids)
        input_pool.close()
        input_pool.join()


        output_pool = Pool(4)
        #Read ids from input_queue, read message from DB and write it to output_queue
        worker_results = []
        for i in range(4):
            worker_results.append(output_pool.apply_async(read_id_from_queue, ()))

        output_pool.close()

        for r in worker_results:
            r.get() # This reports results, including errors, of workers

        output_pool.join() # This blocks until all the processes have finished

        end_time = datetime.datetime.now()
        time_taken = (end_time - start_time).total_seconds()

        return time_taken
Beispiel #54
0
def create_training_parallel(count):
    pool_size = 8
    batch_count = pool_size * 5
    pool = Pool(pool_size)
    print("generating")
    results = []
    for i in range(batch_count):
        results.append(pool.apply_async(create_training_data, (count/batch_count,)))

    pool.close()
    pool.join()
    print("concatenating")

    output = []
    for r in results:
        output.extend(r.get(1000))
    return output
    def multi_proc3(self, batch):
        start_time = datetime.datetime.now()

        sql = "select count(id) from records"
        count_result = db_connection.execute(sql)

        for row in count_result:
            count = row[0]
            break

        sql = "select id from records"
        result = db_connection.execute(sql)

        record_ids = []
        for idx, row in enumerate(result):
            if (idx % int(batch) == 0) or (idx == count - 1):
                if idx == 0:
                    some_records = []
                else:
                    record_ids.append(some_records)
                    some_records = []
            some_records.append(row[0])


        #Add id messages to input queue
        msg_handler = MessageHandler()
        for records in record_ids:
            msg_handler.add_message(json.dumps({"ids":records}), "input_queue")

        worker_results = []
        p = Pool(4)
        for i in range(4):
            worker_results.append(p.apply_async(read_id_from_queue, ()))

        p.close()

        for r in worker_results:
            r.get()

        p.join() # This blocks until all the processes have finished

        end_time = datetime.datetime.now()
        time_taken = (end_time - start_time).total_seconds()

        return time_taken
    def multi_proc4(self, batch):
        start_time = datetime.datetime.now()

        sql = "select count(id) from records"
        count_result = db_connection.execute(sql)

        for row in count_result:
            count = row[0]
            break

        sql = "select id from records"
        result = db_connection.execute(sql)

        record_ids = []
        for idx, row in enumerate(result):
            if (idx % int(batch) == 0) or (idx == count - 1):
                if idx == 0:
                    some_records = []
                else:
                    record_ids.append(some_records)
                    some_records = []
            some_records.append(row[0])

        p = Pool(4)
        #Add id messages to input queue
        p.map(add_ids_to_queue, record_ids)

        #Read ids from input_queue, read message from DB and write it to output_queue
        worker_results = []
        p = Pool(4)
        for i in range(4):
            worker_results.append(p.apply_async(read_id_from_queue, ()))

        p.close()

        for r in worker_results:
            r.get()

        p.join() # This blocks until all the processes have finished

        end_time = datetime.datetime.now()
        time_taken = (end_time - start_time).total_seconds()

        return time_taken
Beispiel #57
0
    def render(self, scene, bucket_order_info: BucketOrderInfo, multiThread: bool=True):

        self.scene = scene

        if self.surface_integrator is not None:
            self.surface_integrator.Preprocess(scene, self.camera, self)

        if self.volume_integrator is not None:
            self.volume_integrator.Preprocess(scene, self.camera, self)

        sample = Sample(self.main_sampler, self.surface_integrator, self.volume_integrator, scene)

        if multiThread:
            my_bucket_orders = BucketOrder.create(bucket_order_info.width, bucket_order_info.height,
                                                  bucket_order_info.bucket_order_type)

            pool = Pool(processes=multiprocessing.cpu_count())
            # pool = Pool(processes=1)
            pool._wrap_exception = False

            results = []

            for i in range(bucket_order_info.width * bucket_order_info.height):
                # for i in range(55, 56):
                a = pool.apply_async(self.render_task, args=(
                    i, my_bucket_orders.buckets_orders[i], bucket_order_info, sample, self),
                                     callback=self.draw)

                results.append(a)

            for r in results:
                r.wait()

        else:
            bucketOrderInfo = BucketOrderInfo(BucketOrderSortType.Random, 1, 1)
            self.render_task(0, 0, bucketOrderInfo, sample, self)

        print("Render end")

        data = write_png(self.camera.film.data, self.camera.film.width, self.camera.film.height)
        with open("my_image.png", 'wb') as fd:
            fd.write(data)
 def fill_views(self):
     paths = []
     if self.view:
         for path in self.view_paths:
             if self.view in path:
                 paths.append(path)
                 break
     else:
         paths = self.view_paths
     pool = Pool(processes=len(paths))
     pool_results = []
     prescribing_date = ImportLog.objects.latest_in_category(
         'prescribing').current_at.strftime('%Y-%m-%d')
     for view in paths:
         if self.view and self.view not in view:
             continue
         # Perform bigquery parts of operation in parallel
         result = pool.apply_async(
             query_and_export, [self.dataset, view, prescribing_date])
         pool_results.append(result)
     pool.close()
     pool.join()  # wait for all worker processes to exit
     for result in pool_results:
         tablename, gcs_uri = result.get()
         f = download_and_unzip(gcs_uri)
         copy_str = "COPY %s(%s) FROM STDIN "
         copy_str += "WITH (FORMAT CSV)"
         fieldnames = f.readline().split(',')
         with connection.cursor() as cursor:
             with utils.constraint_and_index_reconstructor(tablename):
                 self.log("Deleting from table...")
                 cursor.execute("DELETE FROM %s" % tablename)
                 self.log("Copying CSV to postgres...")
                 try:
                     cursor.copy_expert(copy_str % (
                         tablename, ','.join(fieldnames)), f)
                 except Exception:
                     import shutil
                     shutil.copyfile(f.name, "/tmp/error")
                     raise
         f.close()
         self.log("-------------")
Beispiel #59
0
def stat_first_tgs(stime,etime):

    begtime = long(time.mktime(stime.timetuple())*1000)
    endtime = long(time.mktime(etime.timetuple())*1000)

    tgsinfo = read_tgs_info()
    vehicles = {}

    test = random.sample(tgsinfo.keys(), 100)

    from multiprocessing.pool import Pool

    pool = Pool()
    result = [pool.apply_async(_stat_first_tgs_single, args=(int(cid),begtime,endtime)) for cid in tgsinfo.keys()]
    pool.close()
    pool.join()

    result1 = [elem.get() for elem in result]

    print 'joining....'
    result2 = reduce(_combine, result1)

    print 'totally %d vehicles. ' % (len(result2))
    # print type(result2)

    c = Counter()
    for veh, info in result2.iteritems():
        c[info[1]] += 1

    print 'writing result into file...'
    with open(os.path.join(root_dir,"result","first_tgs.txt"),"w") as f:
        c1 = c.most_common()
        # print c1[0]
        # for cid,count in c1.iteritems():
        for elem in c1:
            line = "%5d,%6d\n" % (elem[0],elem[1])
            f.write(line)
    print 'finished.'
Beispiel #60
0
    def __test_multi_processed(self, test_set, method, model, threshold, trees, all_node_ids):
        """
        Create a process pool to distribute the prediction.
        """
        #process_count = multiprocessing.cpu_count()
        process_count = 4
        pool = Pool(processes=process_count)
        step = int(math.ceil(float(len(test_set)) / process_count))
        results = []
        for j in range(0, len(test_set), step):
            meme_ids = test_set[j: j + step]
            res = pool.apply_async(test_meme,
                                   (meme_ids, method, model, threshold, trees, all_node_ids, self.user_ids,
                                    self.users_map, self.verbosity))
            results.append(res)

        pool.close()
        pool.join()

        prp1_list = []
        prp2_list = []
        precisions = []
        recalls = []
        fprs = []
        f1s = []

        # Collect results of the processes.
        for res in results:
            r1, r2, r3, r4, r5, r6 = res.get()
            precisions.extend(r1)
            recalls.extend(r2)
            fprs.extend(r3)
            recalls.extend(r4)
            prp1_list.extend(r5)
            prp2_list.extend(r6)

        return precisions, recalls, f1s, fprs, prp1_list, prp2_list