def run_with_multiprocessing(nprocesses, ntasks, niterations): def task(n, name): for i in atpbar(range(n), name=name): time.sleep(0.0001) def worker(reporter, task, queue): register_reporter(reporter) while True: args = queue.get() if args is None: queue.task_done() break task(*args) queue.task_done() reporter = find_reporter() queue = multiprocessing.JoinableQueue() for i in range(nprocesses): p = multiprocessing.Process(target=worker, args=(reporter, task, queue)) p.start() for i in range(ntasks): name = 'task {}'.format(i) n = niterations[i] queue.put((n, name)) for i in range(nprocesses): queue.put(None) queue.join() flush()
def download_images(sources): if ARGS.out_folder: assert os.path.exists(ARGS.out_folder) and not os.path.isfile( ARGS.out_folder), "Need to specify an existing folder" else: ARGS.out_folder = "." def init_progressbar(reporter): atpbar.register_reporter(reporter) reporter = atpbar.find_reporter() pool = Pool(ARGS.workers, init_progressbar, initargs=(reporter, )) pool.map(download_img, sources)
def open(self): """open the drop box This method needs to be called before a task is put. Returns ------- None """ if len(self.workers) >= self.n_max_workers: # workers already created return self.task_queue = self.ctx.JoinableQueue() self.result_queue = self.ctx.Queue() self.logging_queue = self.ctx.Queue() # start logging listener self.loggingListener = threading.Thread(target=logger_thread, args=(self.logging_queue, )) self.loggingListener.start() # start progress monitor if self.progressbar: reporter = atpbar.find_reporter() else: reporter = None # start workers for i in range(self.n_max_workers): worker = self.Worker(task_queue=self.task_queue, result_queue=self.result_queue, logging_queue=self.logging_queue, progress_reporter=reporter) worker.start() self.workers.append(worker)
start_time = time.time() url = f'https://hacker-news.firebaseio.com/v0/item/{id_}.json' item = session.get(url).text f.write(item + '\n') end_time = time.time() elapsed_time = end_time - start_time if elapsed_time < seconds_to_wait_between_requests: time.sleep(seconds_to_wait_between_requests - elapsed_time) return filename reporter = find_reporter() with Pool(processes = num_processes, initializer = register_reporter, initargs = [reporter]) as pool: item_part_files = pool.map(job, split_ids_to_fetch) flush() formatted_date = datetime.utcnow().date().strftime("%Y_%m_%d") subprocess.run(f'cat data/* > data/all_items_{formatted_date}.json', shell = True, check = True) all_items_full_path = f'{os.getcwd()}/data/all_items_{formatted_date}.json' try: # cleanup any previously staged files cur.execute('remove @load_db.hackernews.%items;') # the table stage is an implicit stage created for every table so no need to create it
def __init__(self, max_lines=10, label=None): RemoteProgress.__init__(self) self.taskid = uuid.uuid4() self.reporter = find_reporter() self.pid = os.getpid() self.label = label
def __init__(self, label=None): self.taskid = uuid.uuid4() self.reporter = find_reporter() self.pid = os.getpid() self.label = label self.total = 0
def process(self, pressures, flows, p_0=ps.peep, f_0=fs.no_flow): """ Maps data points from pressure and flow to enumerated states Parameters ---------- pressures : array like of real Pressure data points flows : array like of real Flow data points p_0 : PressureStates enum, optional The initial pressure state the program assumes it is in. Defaults to peep f_0 : FlowStates enum, optional The initial flow state the program assumes it is in. Defaults to no flow. Returns ------- (array like of PressureStates enum, Array like of FlowStates enum) """ buffer = len(pressures) % self.w_len if cpu_count() > 2: reporter = find_reporter() p_queue = Queue() f_queue = Queue() if buffer != 0: p_process = Process(target=self.process_pressures, args=(pressures[:-buffer], p_0, p_queue, reporter)) f_process = Process(target=self.process_flows, args=(flows[:-buffer], f_0, f_queue, reporter)) else: p_process = Process(target=self.process_pressures, args=(pressures, p_0, p_queue, reporter)) f_process = Process(target=self.process_flows, args=(flows, f_0, f_queue, reporter)) p_process.start() f_process.start() self.p_labels = np.concatenate((self.p_labels, p_queue.get())) self.f_labels = np.concatenate((self.f_labels, f_queue.get())) p_process.join() f_process.join() self.p_labels = np.concatenate( [self.p_labels, np.array([self.p_labels[-1]] * buffer)]) self.f_labels = np.concatenate( [self.f_labels, np.array([self.f_labels[-1]] * buffer)]) else: if buffer != 0: self.process_pressures(pressures[:-buffer], p_0) self.process_flows(flows[:-buffer], f_0) else: self.process_pressures(pressures, p_0) self.process_flows(flows, f_0) self.p_labels = np.concatenate( [self.p_labels, np.array([self.p_labels[-1]] * buffer)]) self.f_labels = np.concatenate( [self.f_labels, np.array([self.f_labels[-1]] * buffer)])