def get_keys_multiproc( cls, lookup, loc_df, success_only=False, num_cores=-1, num_partitions=-1 ): """ Used for CPU bound lookup operations, Depends on a method `process_locations_multiproc(dataframe)` where single_row is a pandas series from a location Pandas DataFrame and returns a list of dicts holding the lookup results for that single row """ pool_count = num_cores if num_cores > 0 else cpu_count() part_count = num_partitions if num_partitions > 0 else min(pool_count * 2, len(loc_df)) locations = np.array_split(loc_df, part_count) pool = Pool(pool_count) results = pool.map(lookup.process_locations_multiproc, locations) lookup_results = sum([r for r in results if r], []) pool.terminate() return lookup_results
def source_item_from_list_in_event( stream_name, list_name, field, value, batch_size=1000, ): broker = RedisStream.get_broker() next_id = "+" i = 0 max_iter = int(1000 / batch_size) + 1 while i < max_iter: i += 1 event_tuples = broker.xrevrange(stream_name, max=next_id, count=batch_size) n = len(event_tuples) if not n: return None, None, None event_ids, event_dicts = zip(*event_tuples) with Pool(parallel_jobs(n)) as pool: event_bytes = pool.map(event_from_dict, event_dicts) events = pool.map(bytes_to_event, event_bytes) args = zip(*(events, [list_name] * len(events))) dicts = pool.starmap(extract_attr, args) args = zip(*(dicts, [field] * len(dicts), [value] * len(dicts))) matches = pool.starmap(find_first_by, args) for i, detection in enumerate(matches): if detection: return detection, event_ids[i], events[i].correlations next_id = decrement_id(event_ids[-1]) return None, None, None
def progress_map(func, *iterables, jobs=1, **kwargs): r""" Map a function across iterables of arguments. This is comparable to :meth:`astropy.utils.console.ProgressBar.map`, except that it is implemented using :mod:`tqdm` and so provides more detailed and accurate progress information. """ total = min(len(iterable) for iterable in iterables) if jobs == 1: return list(tqdm(map(func, *iterables), total=total, **kwargs)) else: with Pool(jobs) as pool: return [ item[1] for item in sorted( tqdm( pool.imap_unordered( WrappedFunc(func), enumerate(zip(*iterables)) ), total=total, **kwargs ), key=itemgetter(0) ) ]
def source_event(stream_name, filters_dict={}, batch_size=128, latest_first=True): broker = RedisStream.get_broker() next_id = "+" if latest_first else "-" i = 0 max_iter = int(1000 / batch_size) + 1 while i < max_iter: i += 1 if latest_first: event_tuples = broker.xrevrange(stream_name, max=next_id, count=batch_size) else: event_tuples = broker.xrange(stream_name, min=next_id, count=batch_size) n = len(event_tuples) if not n: return None event_ids, event_dicts = zip(*event_tuples) with Pool(parallel_jobs(n)) as pool: event_bytes = pool.map(event_from_dict, event_dicts) events = pool.map(bytes_to_event, event_bytes) args = zip(*(events, [filters_dict] * len(events))) matches = pool.starmap(match_event, args) if any(matches): index = matches.index(True) return events[index] next_id = decrement_id( event_ids[-1]) if latest_first else increment_id(event_ids[-1]) return None
def stream( self, subreddit: str, start_time: int, end_time: int ) -> Iterator[List[Dict[str, Any]]]: for id_iter in query_pushshift(subreddit, start_time, end_time): with Pool(cpu_count(), initializer) as workers: yield list(workers.imap_unordered(praw_by_id, id_iter))
def searchDarkWeb(self, query, include=None, exclude=None): ''' Gets data from search engines specified ''' if include: final = [a for a in include if a in self.sites] elif exclude: final = [a for a in self.sites if a not in exclude] else: final = list(self.sites.keys()) self.query = query pool = Pool(processes=len(final)) data = pool.map(self.search, final) resultList = [d for dat in data for d in dat] pool.close() ind = Indexer() for i in resultList: ind.join(i) return ind.results()
def praw_memes(self, verbose: bool) -> Iterator[List[redditData]]: for ids in self.query_pushshift(): with cast(mpPool, Pool(cpu_count(), initializer)) as workers: if verbose: memes: list[Union[redditData, None]] = list( tqdm(workers.imap_unordered(praw_by_id, ids))) else: memes = list(workers.imap_unordered(praw_by_id, ids)) yield [ meme for meme in memes if meme and meme["username"] != "None" ]
def cm(m =None, p=5, u="", **kw): ct = fetch(m) t = etree.HTML(ct) title = ''.join(t.xpath('//*[@id="workinfo"]/h1/text()')).strip() els = [(i.get("title")[:-11], i.get("href")) for i in t.xpath('//*[@id="chapterlist"]/ul/li/a')] for n, (e, _) in enumerate(els): print "%s." % n, e ci_pure = raw_input("输入对应集数序号,多集使用逗号分开,连续使用\"-\"分割 eg. 1\n4,6,7\n1-10,14-75\n请选择册:") ci = formatipt(ci_pure) #mg = Manager() tasks = [] for i in ci: ci_title, ci_href = els[i] tasks.append({"type": "sets", "title": title, "set": "%s.%s" % (i+1, ci_title), "url": ci_href}) file_dir = os.path.join(".", "download", title).encode("u8") try: if not os.path.exists(file_dir): os.makedirs(file_dir) except OSError: pass #use manual processes #ps = [Process(target=save_pic, args=(sets, tpls, pics)) for i in xrange(min(p, len(ci)))] #for i in ps: # i.start() pool = Pool(processes=p) result = pool.map_async(save_pic, tasks) #for i in xrange(realy_p)] #if p.map_async don't forget p.close() out = sys.stdout try: while True: if result.ready(): tasks = [] for ret in result.get(): tasks.extend(ret) if not tasks: break result = pool.map_async(save_pic, tasks) #for i in xrange(realy_p)] #if p.map_async don't forget p.close() out.write("\r%d" % (len(tasks))) out.flush() time.sleep(.5) except KeyboardInterrupt: print "已停止下载" #return finally: pool.close() print "下载完成" if not u: return #压缩并上传 zip_name = file_dir[2:].replace(os.path.sep, "_") + "_" + ci_pure username, password = u.split(":") ci_str = map(str, ci) tfiles = ["%s/%s"%(dirpath, filename) for dirpath, dirs, files in os.walk(file_dir) for filename in files if filename[:filename.find(".")] in ci_str] zip_files(tfiles, zip_name) upfile(username, password, zip_name)
def progress_map(func, *iterables, jobs=1, **kwargs): """Map a function across iterables of arguments. This is comparable to :meth:`astropy.utils.console.ProgressBar.map`, except that it is implemented using :mod:`tqdm` and so provides more detailed and accurate progress information. """ total = _get_total_estimate(*iterables) if jobs == 1: yield from tqdm(map(func, *iterables), total=total, **kwargs) else: with Pool(jobs) as pool: yield from _results_in_order( tqdm(pool.imap_unordered(WrappedFunc(func), enumerate(zip(*iterables))), total=total, **kwargs))
def _scan_match(sample_ui_list, path_list, comp_func, weight_list=None, threshold=0.6, pool_size=12): """ :param sample_ui_list: output after process_csv() :param path_list: relative or absolute path list of csv files :param comp_func: compare function :param weight_list: ui weight mask :param threshold: threshold,超过一定的阈值才会被计算成相同组件 :param pool_size: 并行池大小 :return: best match path name """ pool = Pool(processes=pool_size) arg_list = [] for j in range(len(path_list)): arg_list.append((j + 1, path_list[j], sample_ui_list, comp_func, weight_list, threshold)) score_list = pool.map(_single_scan_helper, arg_list) pool.close() pool.join() # return sorted path^score^score_distribution_list list return sorted(score_list, key=lambda k: k[1], reverse=True)
def process(self): """ Process all files contained in .mans extracted folder """ files_list = [] for filetype in self.filelist.keys(): # If filetype is new for now it's skipped if filetype not in MANS_FIELDS.keys(): logging.warning( f"[MAIN] {filetype} filetype not recognize. Send us a note! - SKIP" ) continue # Ignore items if not related to timeline # TODO: will use them in neo4j for relationship if MANS_FIELDS[filetype].get("skip", False): logging.debug(f"[MAIN] SKIP {filetype}") continue # Read all files related to the type for (file, offset) in self.filelist[filetype]: files_list.append((filetype, file, offset)) with Pool(processes=self.cpu_count) as pool: res = pool.starmap_async(self.process_file, files_list).get() logging.debug("[MAIN] Pre-Processing [✔]")
def mix_ciphers(ciphers_for_mixing, nr_rounds=MIN_MIX_ROUNDS, teller=_teller, nr_parallel=0): p = ciphers_for_mixing['modulus'] g = ciphers_for_mixing['generator'] q = ciphers_for_mixing['order'] y = ciphers_for_mixing['public'] original_ciphers = ciphers_for_mixing['mixed_ciphers'] nr_ciphers = len(original_ciphers) teller.task('Mixing %d ciphers for %d rounds' % (nr_ciphers, nr_rounds)) cipher_mix = {'modulus': p, 'generator': g, 'order': q, 'public': y} cipher_mix['original_ciphers'] = original_ciphers with teller.task('Producing final mixed ciphers', total=nr_ciphers): shuffled = shuffle_ciphers(p, g, q, y, original_ciphers, teller=teller) mixed_ciphers, mixed_offsets, mixed_randoms = shuffled cipher_mix['mixed_ciphers'] = mixed_ciphers total = nr_ciphers * nr_rounds with teller.task('Producing ciphers for proof', total=total): if nr_parallel > 0: pool = Pool(nr_parallel, Random.atfork) data = [ (p, g, q, y, original_ciphers) for _ in range(nr_rounds) ] collections = [] for r in pool.imap(_shuffle_ciphers, data): teller.advance() collections.append(r) pool.close() pool.join() else: collections = [shuffle_ciphers(p, g, q, y, original_ciphers, teller=teller) for _ in range(nr_rounds)] unzipped = [list(x) for x in zip(*collections)] cipher_collections, offset_collections, random_collections = unzipped cipher_mix['cipher_collections'] = cipher_collections cipher_mix['random_collections'] = random_collections cipher_mix['offset_collections'] = offset_collections with teller.task('Producing cryptographic hash challenge'): challenge = compute_mix_challenge(cipher_mix) cipher_mix['challenge'] = challenge bits = bit_iterator(int(challenge, 16)) with teller.task('Answering according to challenge', total=nr_rounds): for i, bit in zip(range(nr_rounds), bits): offsets = offset_collections[i] randoms = random_collections[i] if bit == 0: # Nothing to do, we just publish our offsets and randoms pass elif bit == 1: # The image is given. We now have to prove we know # both this image's and mixed_ciphers' offsets/randoms # by providing new offsets/randoms so one can reencode # this image to end up with mixed_ciphers. # original_ciphers -> image # original_ciphers -> mixed_ciphers # Provide image -> mixed_ciphers new_offsets = list([None]) * nr_ciphers new_randoms = list([None]) * nr_ciphers for j in range(nr_ciphers): cipher_random = randoms[j] cipher_offset = offsets[j] mixed_random = mixed_randoms[j] mixed_offset = mixed_offsets[j] new_offsets[cipher_offset] = mixed_offset new_random = (mixed_random - cipher_random) % q new_randoms[cipher_offset] = new_random offset_collections[i] = new_offsets random_collections[i] = new_randoms del offsets, randoms else: m = "This should be impossible. Something is broken." raise AssertionError(m) teller.advance() teller.finish('Mixing') return cipher_mix
def verify_cipher_mix(cipher_mix, teller=_teller, nr_parallel=0): try: p = cipher_mix['modulus'] g = cipher_mix['generator'] q = cipher_mix['order'] y = cipher_mix['public'] original_ciphers = cipher_mix['original_ciphers'] mixed_ciphers = cipher_mix['mixed_ciphers'] challenge = cipher_mix['challenge'] cipher_collections = cipher_mix['cipher_collections'] offset_collections = cipher_mix['offset_collections'] random_collections = cipher_mix['random_collections'] except KeyError as e: m = "Invalid cipher mix format" raise ZeusError(m, e) if compute_mix_challenge(cipher_mix) != challenge: m = "Invalid challenge" raise ZeusError(m) nr_ciphers = len(original_ciphers) nr_rounds = len(cipher_collections) teller.task('Verifying mixing of %d ciphers for %d rounds' % (nr_ciphers, nr_rounds)) if (len(offset_collections) != nr_rounds or len(random_collections) != nr_rounds): m = "Invalid cipher mix format: collections not of the same size!" raise ZeusError(m) #if not validate_cryptosystem(p, g, q, teller): # m = "Invalid cryptosystem" # raise AssertionError(m) total = nr_rounds * nr_ciphers with teller.task('Verifying ciphers', total=total): data = [] for i, bit in zip(range(nr_rounds), bit_iterator(int(challenge, 16))): ciphers = cipher_collections[i] randoms = random_collections[i] offsets = offset_collections[i] data.append((p, g, q, y, i, bit, original_ciphers, mixed_ciphers, ciphers, randoms, offsets)) if nr_parallel <= 0: for args in data: verify_mix_round(*args, teller=teller) else: pool = Pool(nr_parallel, Random.atfork) try: for count in pool.imap(_verify_mix_round, data): teller.advance(count) finally: pool.terminate() pool.join() teller.finish('Verifying mixing') return 1
def main(): p = Pool(5) A = [] for i in range(0, 100): A.append(i) print(p.map(f, A))