def check_open_ports(url: str, ip: str, path: Optional[str] = None) -> List[Result]: results = [] # create processing pool pool = Pool(os.cpu_count() * 2) mgr = Manager() queue = mgr.Queue() # read the data in from the data directory if path is None: file_path = pkg_resources.resource_filename( "yawast", "resources/common_ports.json") else: file_path = path with open(file_path) as json_file: data = json.load(json_file) for rec in data: pool.apply_async(_is_port_open, (url, ip, rec, queue)) pool.close() pool.join() while not queue.empty(): val = queue.get() if val is not None: results.append(val) return results
def __init__(self, file_list, mode, database_path, auto_tags=True, temp_dir=None): # Have the GUI pass a list of files straight to here # Then, on the basis of what is needed, pass the # filenames to the requisite functions # This includes getting file info for the database # Parsing for the reader proper # Caching upon closing self.file_list = [i for i in file_list if os.path.exists(i)] self.statistics = [0, (len(file_list))] self.hashes_and_paths = {} self.work_mode = mode[0] self.addition_mode = mode[1] self.database_path = database_path self.auto_tags = auto_tags self.temp_dir = temp_dir if database_path: self.database_hashes() self.threading_completed = [] self.queue = Manager().Queue() self.processed_books = [] if self.work_mode == 'addition': progress_object_generator()
def __init__(self, file_list, mode, database_path, settings, temp_dir=None): # Have the GUI pass a list of files straight to here # Then, on the basis of what is needed, pass the # filenames to the requisite functions # This includes getting file info for the database # Parsing for the reader proper # Caching upon closing self.file_list = [i for i in file_list if os.path.exists(i)] self.statistics = [0, (len(file_list))] self.hashes_and_paths = {} self.work_mode = mode[0] self.addition_mode = mode[1] self.database_path = database_path self.auto_tags = settings['auto_tags'] self.auto_cover = settings['auto_cover'] self.temp_dir = temp_dir if database_path: self.database_hashes() self.threading_completed = [] self.queue = Manager().Queue() self.errors = Manager().list() self.processed_books = [] if self.work_mode == 'addition': progress_object_generator()
def main(): manager = Manager() lock = manager.Lock() partial_get_index = functools.partial(get_index, lock) pool = Pool() pool.map(partial_get_index, [i for i in range(START_PAGE, END_PAGE + 1)]) pool.close() pool.join() print('over')
def calc(self, computeOptions, **args): tiles = self._get_tile_service().get_tiles_bounded_by_box(computeOptions.get_min_lat(), computeOptions.get_max_lat(), computeOptions.get_min_lon(), computeOptions.get_max_lon(), computeOptions.get_dataset()[0], computeOptions.get_start_time(), computeOptions.get_end_time()) if len(tiles) == 0: raise NexusProcessingException.NoDataException(reason="No data found for selected timeframe") maxprocesses = int(self.algorithm_config.get("multiprocessing", "maxprocesses")) results = [] if maxprocesses == 1: calculator = LongitudeHofMoellerCalculator() for x, tile in enumerate(tiles): result = calculator.longitude_time_hofmoeller_stats(tile, x) results.append(result) else: manager = Manager() work_queue = manager.Queue() done_queue = manager.Queue() for x, tile in enumerate(tiles): work_queue.put( ('longitude_time_hofmoeller_stats', tile, x)) [work_queue.put(SENTINEL) for _ in range(0, maxprocesses)] # Start new processes to handle the work pool = Pool(maxprocesses) [pool.apply_async(pool_worker, (LONGITUDE, work_queue, done_queue)) for _ in range(0, maxprocesses)] pool.close() # Collect the results for x, tile in enumerate(tiles): result = done_queue.get() try: error_str = result['error'] logger.error(error_str) raise NexusProcessingException(reason="Error calculating longitude_time_hofmoeller_stats.") except KeyError: pass results.append(result) pool.terminate() manager.shutdown() results = sorted(results, key=lambda entry: entry["time"]) results = self.applyDeseasonToHofMoeller(results, pivot="lons") result = HoffMoellerResults(results=results, computeOptions=computeOptions, type=HoffMoellerResults.LONGITUDE) return result
class ThreadingManager(object): """ Usage: tm = ThreadingManager(2, get_detections=cerberus_api.get_detection) u = tm.queued_executor('get_detections', [777, 125]) """ def __init__(self, cpu_count=cpu_count(), **kwargs): self.m = Manager() self.pool = self.m.Pool(cpu_count) self.kwargs = kwargs self.queue = self.m.Queue() self.log_queue = self.m.Queue() def executor(self, function, *args, **kwargs): result = self.pool.map(self.kwargs[function], *args, **kwargs) return result def queued_executor(self, function, *args, **kwargs): result = self.pool.map(self.kwargs[function], *args, **kwargs) [self.queue.put(i) for i in result] return self.queue def async_executor(self, function, *args, **kwargs): result = self.pool.map_async(self.kwargs[function], *args, **kwargs) sys.stdout.flush() return result def star_executor(self, function, *args, **kwargs): result = self.pool.starmap(self.kwargs[function], *args, **kwargs) return result def star_queued_executor(self, function, *args, **kwargs): result = self.pool.starmap(self.kwargs[function], *args, **kwargs) [self.queue.put(i) for i in result] return self.queue def async_star_executor(self, function, *args, **kwargs): result = self.pool.starmap_async(self.kwargs[function], *args, **kwargs) return result def async_star_queued_executor(self, function, *args, **kwargs): result = self.pool.starmap_async(self.kwargs[function], *args, **kwargs) [self.queue.put(i) for i in result] return self.queue def close(self, exception_type, exception_value, traceback): self.pool._maintain_pool() self.pool.close() self.pool.join()
def get_daily_difference_average_for_box(self, min_lat, max_lat, min_lon, max_lon, dataset1, dataset2, start_time, end_time): daysinrange = self._tile_service.find_days_in_range_asc( min_lat, max_lat, min_lon, max_lon, dataset1, start_time, end_time) maxprocesses = int( self.algorithm_config.get("multiprocessing", "maxprocesses")) if maxprocesses == 1: calculator = DailyDifferenceAverageCalculator() averagebyday = [] for dayinseconds in daysinrange: result = calculator.calc_average_diff_on_day( min_lat, max_lat, min_lon, max_lon, dataset1, dataset2, dayinseconds) averagebyday.append((result[0], result[1])) else: # Create a task to calc average difference for each day manager = Manager() work_queue = manager.Queue() done_queue = manager.Queue() for dayinseconds in daysinrange: work_queue.put( ('calc_average_diff_on_day', min_lat, max_lat, min_lon, max_lon, dataset1, dataset2, dayinseconds)) [work_queue.put(SENTINEL) for _ in xrange(0, maxprocesses)] # Start new processes to handle the work pool = Pool(maxprocesses) [ pool.apply_async(pool_worker, (work_queue, done_queue)) for _ in xrange(0, maxprocesses) ] pool.close() # Collect the results as [(day (in ms), average difference for that day)] averagebyday = [] for i in xrange(0, len(daysinrange)): result = done_queue.get() if result[0] == 'error': print >> sys.stderr, result[1] raise NexusProcessingException( reason="Error calculating average by day.") rdata = result averagebyday.append((rdata[0], rdata[1])) pool.terminate() manager.shutdown() return averagebyday
def main(): old_folder_name = "Test" new_folder_name = old_folder_name + "_copy" os.mkdir(new_folder_name) pool = Pool(5) queue = Manager().Queue() file_list = os.listdir(old_folder_name) file_size = len(file_list) for file in file_list: pool.apply_async(copy_file_worker, (file, old_folder_name, new_folder_name, queue)) num = 0 while num < file_size: queue.get() num += 1 print("Copy Rate: %.2f%%" % (num / file_size * 100)) print("Done!")
def process_venue_list(venue_id_list): pool = ThreadPool(5) m = ThreadManager() d = m.dict() for venue_id in venue_id_list: d[venue_id] = 'none' print("[.] Processing %s venues" % len(venue_id_list)) result = pool.map_async(process_venue, zip(venue_id_list, itertools.repeat(d))) monitor_map_progress(result, d, len(venue_id_list)) result.wait() _ = result.get() print("[x] Done with %s venues" % len(venue_id_list))
def new_send_email(email): if not IS_ACTIVE: new_send_email.queue = Manager().Queue() process = Process(target=process_sent_queue, args=(new_send_email.queue, )) process.daemon = True process.start() models.IS_ACTIVE = True new_send_email.queue.put(email)
def __init__(self, file_list, n_treads, q): assert isinstance(file_list, (list, tuple)) assert isinstance(n_treads, int) assert n_treads >= 0 assert type(q) is Manager().Queue self.is_active = True self.file_list = file_list self.n_treads = n_treads self.q = q
def parallel_parsing(directory): trees = list() input_files = [java_file for java_file in get_java_files(directory)] # pool = Pool(processes=8) # x = pool.apply_async(create_project_parse_tree, args=(input_files[0], trees)) # for java_file in get_java_files(directory): # res = pool.apply_async(create_project_parse_tree, (java_file,)) # runs in *only* one process # print(type(res.get(timeout=1))) # # num = Value('d', 0.0) # arr = Array('i', range(500)) with Manager() as manager: d = manager.dict() q = manager.Queue(10) p = Process(target=create_project_parse_tree, args=(input_files[0], q)) p.start() p.join() print(q)
return True # return "done" def _upload_pool(self): pool = ThreadPool(processes=self.n_treads) res = pool.map(self._upload_file, self.file_list) n_res = len(res) res_dict = Counter(res) succes = res_dict[True] self.q.put("Uploaded {}/{}".format(succes, n_res)) self.is_active = False def start(self): proc = Process(target=self._upload_pool) proc.start() if __name__ == '__main__': q = Manager().Queue() files_list = [ os.path.join(os.getcwd(), "test_data", i) for i in os.listdir("./test_data") ] uploader = Uploader(files_list, 2, q) uploader.start() while uploader.is_active: progress = q.get() print(progress)
dealdata = tuple(tuple([y for y in x]) for x in data) try: # 执行sql语句 cursor.executemany(sql, dealdata) # 提交到数据库执行 db.commit() except pm.Warning as w: print(repr(w)) # 如果发生错误则回滚 db.rollback() if __name__ == '__main__': # pool = ThreadPool(4) # page = [x + 1 for x in range(18)] manager = Manager() lock = manager.Lock() # pool = ThreadPool(processes=8) for i in range(1, 20): # pool.apply_async(spider, (i, lock)) spider(i) print(list) for i in list: if i == 21: continue else: dealdata(i) # results = pool.map(spider ,page) # pool.close() # pool.join()
class BookSorter: def __init__(self, file_list, mode, database_path, settings, temp_dir=None): # Have the GUI pass a list of files straight to here # Then, on the basis of what is needed, pass the # filenames to the requisite functions # This includes getting file info for the database # Parsing for the reader proper # Caching upon closing self.file_list = [i for i in file_list if os.path.exists(i)] self.statistics = [0, (len(file_list))] self.hashes_and_paths = {} self.work_mode = mode[0] self.addition_mode = mode[1] self.database_path = database_path self.auto_tags = settings['auto_tags'] self.auto_cover = settings['auto_cover'] self.temp_dir = temp_dir if database_path: self.database_hashes() self.threading_completed = [] self.queue = Manager().Queue() self.errors = Manager().list() self.processed_books = [] if self.work_mode == 'addition': progress_object_generator() def database_hashes(self): all_hashes_and_paths = database.DatabaseFunctions( self.database_path).fetch_data(('Hash', 'Path'), 'books', {'Hash': ''}, 'LIKE') if all_hashes_and_paths: self.hashes_and_paths = {i[0]: i[1] for i in all_hashes_and_paths} def database_entry_for_book(self, file_hash): database_return = database.DatabaseFunctions( self.database_path).fetch_data( ('Title', 'Author', 'Year', 'ISBN', 'Tags', 'Position', 'Bookmarks', 'CoverImage', 'Annotations'), 'books', {'Hash': file_hash}, 'EQUALS')[0] book_data = [] for count, i in enumerate(database_return): if count in ( 5, 6, 8): # Position, Bookmarks, and Annotations are pickled if i: book_data.append(pickle.loads(i)) else: book_data.append(None) else: book_data.append(i) return book_data def read_book(self, filename): # filename is expected as a string containg the # full path of the ebook file with open(filename, 'rb') as current_book: # This should speed up addition for larger files # without compromising the integrity of the process first_bytes = current_book.read(1024 * 32) # First 32KB of the file file_md5 = hashlib.md5(first_bytes).hexdigest() # Update the progress queue self.queue.put(filename) # This should not get triggered in reading mode # IF the file is NOT being loaded into the reader # Do not allow addition in case the file # is already in the database and it remains at its original path if self.work_mode == 'addition' and file_md5 in self.hashes_and_paths: if (self.hashes_and_paths[file_md5] == filename or os.path.exists(self.hashes_and_paths[file_md5])): if not self.hashes_and_paths[file_md5] == filename: warning_string = ( f'{os.path.basename(filename)} is already in database') logger.warning(warning_string) return # This allows for eliminating issues with filenames that have # a dot in them. All hail the roundabout fix. valid_extension = False for i in sorter: if os.path.basename(filename).endswith(i): file_extension = i valid_extension = True break if not valid_extension: this_error = 'Unsupported extension: ' + filename self.errors.append(this_error) logger.error(this_error) return book_ref = sorter[file_extension](filename, self.temp_dir, file_md5) # None of the following have an exception type specified # This will keep everything from crashing, but will make # troubleshooting difficult # TODO # In application notifications try: book_ref.read_book() except Exception as e: this_error = f'Error initializing: {filename}' self.errors.append(this_error) logger.exception(this_error + f' {type(e).__name__} Arguments: {e.args}') return this_book = {} this_book[file_md5] = {'hash': file_md5, 'path': filename} # Different modes require different values if self.work_mode == 'addition': try: metadata = book_ref.generate_metadata() except Exception as e: this_error = f'Metadata generation error: {filename}' self.errors.append(this_error) logger.exception(this_error + f' {type(e).__name__} Arguments: {e.args}') return title = metadata.title author = metadata.author year = metadata.year isbn = metadata.isbn tags = None if self.auto_tags: tags = metadata.tags cover_image_raw = metadata.cover if cover_image_raw: cover_image = resize_image(cover_image_raw) else: cover_image = None if self.auto_cover: cover_image = fetch_cover(title, author) this_book[file_md5]['cover_image'] = cover_image this_book[file_md5]['addition_mode'] = self.addition_mode if self.work_mode == 'reading': try: book_breakdown = book_ref.generate_content() except Exception as e: this_error = f'Content generation error: {filename}' self.errors.append(this_error) logger.exception(this_error + f' {type(e).__name__} Arguments: {e.args}') return toc = book_breakdown[0] content = book_breakdown[1] images_only = book_breakdown[2] try: book_data = self.database_entry_for_book(file_md5) except TypeError: logger.error( f'Database error: {filename}. Re-add book to program') return title = book_data[0].replace('&', '&&') author = book_data[1] year = book_data[2] isbn = book_data[3] tags = book_data[4] position = book_data[5] bookmarks = book_data[6] cover = book_data[7] annotations = book_data[8] this_book[file_md5]['position'] = position this_book[file_md5]['bookmarks'] = bookmarks this_book[file_md5]['toc'] = toc this_book[file_md5]['content'] = content this_book[file_md5]['images_only'] = images_only this_book[file_md5]['cover'] = cover this_book[file_md5]['annotations'] = annotations this_book[file_md5]['title'] = title this_book[file_md5]['author'] = author this_book[file_md5]['year'] = year this_book[file_md5]['isbn'] = isbn this_book[file_md5]['tags'] = tags return this_book def read_progress(self): while True: processed_file = self.queue.get() self.threading_completed.append(processed_file) total_number = len(self.file_list) completed_number = len(self.threading_completed) # Just for the record, this slows down book searching by about 20% if _progress_emitter: # Skip update in reading mode _progress_emitter.update_progress(completed_number * 100 // total_number) if total_number == completed_number: break def initiate_threads(self): if not self.file_list: return None def pool_creator(): _pool = Pool(thread_count) self.processed_books = _pool.map(self.read_book, self.file_list) _pool.close() _pool.join() start_time = time.time() worker_thread = threading.Thread(target=pool_creator) progress_thread = threading.Thread(target=self.read_progress) worker_thread.start() progress_thread.start() worker_thread.join() progress_thread.join(timeout=.5) return_books = {} # Exclude None returns generated in case of duplication / parse errors self.processed_books = [i for i in self.processed_books if i] for i in self.processed_books: for j in i: return_books[j] = i[j] del self.processed_books processing_time = str(time.time() - start_time) logger.info('Finished processing in ' + processing_time) return return_books, self.errors
def getTimeSeriesStatsForBoxSingleDataSet(self, bounding_polygon, ds, start_seconds_from_epoch, end_seconds_from_epoch, apply_seasonal_cycle_filter=True, apply_low_pass_filter=True): the_time = datetime.now() daysinrange = self._get_tile_service().find_days_in_range_asc(bounding_polygon.bounds[1], bounding_polygon.bounds[3], bounding_polygon.bounds[0], bounding_polygon.bounds[2], ds, start_seconds_from_epoch, end_seconds_from_epoch) logger.info("Finding days in range took %s for dataset %s" % (str(datetime.now() - the_time), ds)) if len(daysinrange) == 0: raise NoDataException(reason="No data found for selected timeframe") the_time = datetime.now() maxprocesses = int(self.algorithm_config.get("multiprocessing", "maxprocesses")) results = [] if maxprocesses == 1: calculator = TimeSeriesCalculator() for dayinseconds in daysinrange: result = calculator.calc_average_on_day(bounding_polygon.wkt, ds, dayinseconds) results += [result] if result else [] else: # Create a task to calc average difference for each day manager = Manager() work_queue = manager.Queue() done_queue = manager.Queue() for dayinseconds in daysinrange: work_queue.put( ('calc_average_on_day', bounding_polygon.wkt, ds, dayinseconds)) [work_queue.put(SENTINEL) for _ in range(0, maxprocesses)] # Start new processes to handle the work pool = Pool(maxprocesses) [pool.apply_async(pool_worker, (work_queue, done_queue)) for _ in range(0, maxprocesses)] pool.close() # Collect the results as [(day (in ms), average difference for that day)] for i in range(0, len(daysinrange)): result = done_queue.get() try: error_str = result['error'] logger.error(error_str) raise NexusProcessingException(reason="Error calculating average by day.") except KeyError: pass results += [result] if result else [] pool.terminate() manager.shutdown() results = sorted(results, key=lambda entry: entry["time"]) logger.info("Time series calculation took %s for dataset %s" % (str(datetime.now() - the_time), ds)) if apply_seasonal_cycle_filter: the_time = datetime.now() for result in results: month = datetime.utcfromtimestamp(result['time']).month month_mean, month_max, month_min = self.calculate_monthly_average(month, bounding_polygon.wkt, ds) seasonal_mean = result['mean'] - month_mean seasonal_min = result['min'] - month_min seasonal_max = result['max'] - month_max result['meanSeasonal'] = seasonal_mean result['minSeasonal'] = seasonal_min result['maxSeasonal'] = seasonal_max logger.info( "Seasonal calculation took %s for dataset %s" % (str(datetime.now() - the_time), ds)) the_time = datetime.now() filtering.applyAllFiltersOnField(results, 'mean', applySeasonal=False, applyLowPass=apply_low_pass_filter) filtering.applyAllFiltersOnField(results, 'max', applySeasonal=False, applyLowPass=apply_low_pass_filter) filtering.applyAllFiltersOnField(results, 'min', applySeasonal=False, applyLowPass=apply_low_pass_filter) if apply_seasonal_cycle_filter and apply_low_pass_filter: try: filtering.applyFiltersOnField(results, 'meanSeasonal', applySeasonal=False, applyLowPass=True, append="LowPass") filtering.applyFiltersOnField(results, 'minSeasonal', applySeasonal=False, applyLowPass=True, append="LowPass") filtering.applyFiltersOnField(results, 'maxSeasonal', applySeasonal=False, applyLowPass=True, append="LowPass") except Exception as e: # If it doesn't work log the error but ignore it tb = traceback.format_exc() logger.warn("Error calculating SeasonalLowPass filter:\n%s" % tb) logger.info( "LowPass filter calculation took %s for dataset %s" % (str(datetime.now() - the_time), ds)) return results, {}
def __init__(self, cpu_count=cpu_count(), **kwargs): self.m = Manager() self.pool = self.m.Pool(cpu_count) self.kwargs = kwargs self.queue = self.m.Queue() self.log_queue = self.m.Queue()
def getTimeSeriesStatsForBoxSingleDataSet(self, min_lat, max_lat, min_lon, max_lon, ds, start_time=0, end_time=-1, applySeasonalFilter=True, applyLowPass=True): daysinrange = self._get_tile_service().find_days_in_range_asc( min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time) if len(daysinrange) == 0: raise NoDataException( reason="No data found for selected timeframe") maxprocesses = int( self.algorithm_config.get("multiprocessing", "maxprocesses")) results = [] if maxprocesses == 1: calculator = TimeSeriesCalculator() for dayinseconds in daysinrange: result = calculator.calc_average_on_day( min_lat, max_lat, min_lon, max_lon, ds, dayinseconds) results.append(result) else: # Create a task to calc average difference for each day manager = Manager() work_queue = manager.Queue() done_queue = manager.Queue() for dayinseconds in daysinrange: work_queue.put(('calc_average_on_day', min_lat, max_lat, min_lon, max_lon, ds, dayinseconds)) [work_queue.put(SENTINEL) for _ in range(0, maxprocesses)] # Start new processes to handle the work pool = Pool(maxprocesses) [ pool.apply_async(pool_worker, (work_queue, done_queue)) for _ in range(0, maxprocesses) ] pool.close() # Collect the results as [(day (in ms), average difference for that day)] for i in range(0, len(daysinrange)): result = done_queue.get() try: error_str = result['error'] logger.error(error_str) raise NexusProcessingException( reason="Error calculating average by day.") except KeyError: pass results.append(result) pool.terminate() manager.shutdown() results = sorted(results, key=lambda entry: entry["time"]) filt.applyAllFiltersOnField(results, 'mean', applySeasonal=applySeasonalFilter, applyLowPass=applyLowPass) filt.applyAllFiltersOnField(results, 'max', applySeasonal=applySeasonalFilter, applyLowPass=applyLowPass) filt.applyAllFiltersOnField(results, 'min', applySeasonal=applySeasonalFilter, applyLowPass=applyLowPass) return results, {}
def get_init_params(self, manager: Manager): d = {} if manager is not None: d = manager.dict() return None, self.zoom_offset, self.max_zoom_level, d
'phone_num': 'M6Y3WpfSNET9W4ZwcML1tUx+jvOWtaDKwoUM3ABM+o7AXi8yZKplkUSM3u3R9cN+x4CNZ2Mo/SHFqB8nQWNt9WHEKc3iC0nSfTfbhlLJECCLpB60Cpbo7HKjE9dlY8s7kJY8bCn+xHAXEGg/2avB2SRPFLPo+Nm0JO6R07Sof4U=', 'pwd': 'OKNkTFqOU26Adb/9IAvze4K+u6aBHpd9cvBuyRWWAifDyb48wAvLbGUHfj0ZtTvGdg3Y2k8x9EyzcvW/G36R9ukCVpa+xJFztKM8GIl1q71OPNSTx0u1+EM6JiZnGxvPWApt0coRLm64BkRBcbhgliSauUlheBBfoAIADSNlXpw=' } headers['Cookie'] = get_Cookie(form_data, headers) if headers['Cookie'] is None: print('Cookie is None') sys.exit() # url = 'https://api.xiaoheihe.cn/game/pubg/get_stats_detail/?lang=zh-cn&os_type=iOS&os_version=10.3.3&_time=1548776142&version=1.1.52&device_id=D2AA4D4F-AC80-476C-BFE1-CBD83AB74133&heybox_id=5141514&hkey=06a344301cb7c6cdc1136a62c061c978&fpp=0&mode=solo&nickname=HuYaTV_15310849®ion=steam&season=pc-2018-02 # 上面为抓包拿到的url,经过测试,其他参数可省略,只需下面的url即可 url1 = 'https://api.xiaoheihe.cn/game/pubg/get_stats_detail/?heybox_id=14909789®ion=steam' put_thread_pool = ThreadPool(5) get_thread_pool = ThreadPool() queue = ThreadManager().Queue( ) # 线程池之间通信需要用Manager().Queue(),线程间通信用Queue() for i in range(len(par)): # 利用线程池的优点在于便于控制线程数量,生产线程池中最多有五个生产线程,提高了生产效率,又不会在线程间切换花费太多时间 put_thread_pool.apply_async(put_into_pool, (queue, url1, par[i], headers)) time.sleep(5) # 让生产者先生产5s,保证queue中有初始数据量 get_thread_pool.apply_async(get_from_pool, (queue, db)) queue.join() # 目的是阻塞主线程 db.close()
import boto3 import botocore import random from botocore.client import Config from botocore.endpoint import MAX_POOL_CONNECTIONS from multiprocessing import TimeoutError from multiprocessing.dummy import Pool as ThreadPool, Manager, Value from enumerate_iam.utils.remove_metadata import remove_metadata from enumerate_iam.utils.json_utils import json_encoder from enumerate_iam.bruteforce_tests import BRUTEFORCE_TESTS MAX_THREADS = 12 CLIENT_POOL = {} MANAGER = Manager() STOP_SIGNAL = MANAGER.Value('i', 0) def report_arn(candidate): """ Attempt to extract and slice up an ARN from the input string """ logger = logging.getLogger() arn_search = re.search(r'.*(arn:aws:.*?) .*', candidate) if arn_search: arn = arn_search.group(1) arn_id = arn.split(':')[4]
import json import logging from multiprocessing.dummy import Manager import random from urllib.parse import urlparse, parse_qs from config.config import BASEPYCURLCONFIG from core import MySpider from core.Spider import CrawlJob __author__ = 'Florian' m = Manager() crawled = m.dict() logger = logging.getLogger("logger") fh = logging.FileHandler("clubs.jsonl", 'a+') simpleFormat = logging.Formatter("%(message)s") fh.setFormatter(simpleFormat) fh.setLevel(logging.WARNING) ch = logging.StreamHandler() ch.setFormatter(simpleFormat) ch.setLevel(logging.DEBUG) logger.addHandler(fh) logger.addHandler(ch) logger.setLevel(logging.DEBUG) @MySpider.QueueInitializer.register() def seeds(): jobs = [] logger = logging.getLogger("logger") for federation in [
import datetime from multiprocessing.dummy import Pool as ThreadPool from multiprocessing.dummy import Manager import openalpr_api from openalpr_api.rest import ApiException from sinesp_client import SinespClient from requests.packages.urllib3.exceptions import InsecureRequestWarning import requests requests.packages.urllib3.disable_warnings(InsecureRequestWarning) # initiating globals now = datetime.datetime.now() sc = SinespClient() pool = ThreadPool(4) # number of threads for parallelism lock = Manager().Lock() def process_cars(car_images): return pool.map(process_car_image, car_images) def process_car_image(image_path): # create an instance of the API class api_instance = openalpr_api.DefaultApi() secret_key = 'sk_4fb6feea66fb6b5524b2d984' # str | The secret key used to authenticate your account. You can view your secret key by visiting https://cloud.openalpr.com/ country = 'br' # str | Defines the training data used by OpenALPR. \"us\" analyzes North-American style plates. \"eu\" analyzes European-style plates. This field is required if using the \"plate\" task You may use multiple datasets by using commas between the country codes. For example, 'au,auwide' would analyze using both the Australian plate styles. A full list of supported country codes can be found here https://github.com/openalpr/openalpr/tree/master/runtime_data/config recognize_vehicle = 0 # int | If set to 1, the vehicle will also be recognized in the image This requires an additional credit per request (optional) (default to 0) state = '' # str | Corresponds to a US state or EU country code used by OpenALPR pattern recognition. For example, using \"md\" matches US plates against the Maryland plate patterns. Using \"fr\" matches European plates against the French plate patterns. (optional) (default to ) return_image = 0 # int | If set to 1, the image you uploaded will be encoded in base64 and sent back along with the response (optional) (default to 0) topn = 10 # int | The number of results you would like to be returned for plate candidates and vehicle classifications (optional) (default to 10) prewarp = '' # str | Prewarp configuration is used to calibrate the analyses for the angle of a particular camera. More information is available here http://doc.openalpr.com/accuracy_improvements.html#calibration (optional) (default to )
class BookSorter: def __init__(self, file_list, mode, database_path, auto_tags=True, temp_dir=None): # Have the GUI pass a list of files straight to here # Then, on the basis of what is needed, pass the # filenames to the requisite functions # This includes getting file info for the database # Parsing for the reader proper # Caching upon closing self.file_list = [i for i in file_list if os.path.exists(i)] self.statistics = [0, (len(file_list))] self.hashes_and_paths = {} self.work_mode = mode[0] self.addition_mode = mode[1] self.database_path = database_path self.auto_tags = auto_tags self.temp_dir = temp_dir if database_path: self.database_hashes() self.threading_completed = [] self.queue = Manager().Queue() self.processed_books = [] if self.work_mode == 'addition': progress_object_generator() def database_hashes(self): all_hashes_and_paths = database.DatabaseFunctions( self.database_path).fetch_data(('Hash', 'Path'), 'books', {'Hash': ''}, 'LIKE') if all_hashes_and_paths: # self.hashes = [i[0] for i in all_hashes] self.hashes_and_paths = {i[0]: i[1] for i in all_hashes_and_paths} def database_entry_for_book(self, file_hash): database_return = database.DatabaseFunctions( self.database_path).fetch_data( ('Title', 'Author', 'Year', 'ISBN', 'Tags', 'Position', 'Bookmarks', 'CoverImage', 'Annotations'), 'books', {'Hash': file_hash}, 'EQUALS')[0] book_data = [] for count, i in enumerate(database_return): if count in ( 5, 6, 8): # Position, Bookmarks, and Annotations are pickled if i: book_data.append(pickle.loads(i)) else: book_data.append(None) else: book_data.append(i) return book_data def read_book(self, filename): # filename is expected as a string containg the # full path of the ebook file with open(filename, 'rb') as current_book: # This should speed up addition for larger files # without compromising the integrity of the process first_bytes = current_book.read(1024 * 32) # First 32KB of the file file_md5 = hashlib.md5(first_bytes).hexdigest() # Update the progress queue self.queue.put(filename) # This should not get triggered in reading mode # IF the file is NOT being loaded into the reader # Do not allow addition in case the file # is already in the database and it remains at its original path if self.work_mode == 'addition' and file_md5 in self.hashes_and_paths: if (self.hashes_and_paths[file_md5] == filename or os.path.exists(self.hashes_and_paths[file_md5])): if not self.hashes_and_paths[file_md5] == filename: print( f'{os.path.basename(filename)} is already in database') return # This allows for eliminating issues with filenames that have # a dot in them. All hail the roundabout fix. valid_extension = False for i in sorter: if os.path.basename(filename).endswith(i): file_extension = i valid_extension = True break if not valid_extension: print(filename + ' has an unsupported extension') return book_ref = sorter[file_extension](filename, self.temp_dir, file_md5) # Everything following this is standard # None values are accounted for here is_valid = book_ref.read_book() if not is_valid: print('Cannot parse: ' + filename) return if book_ref.book: this_book = {} this_book[file_md5] = {'hash': file_md5, 'path': filename} # Different modes require different values if self.work_mode == 'addition': # Reduce the size of the incoming image # if one is found title = book_ref.get_title() author = book_ref.get_author() year = book_ref.get_year() isbn = book_ref.get_isbn() tags = None if self.auto_tags: tags = book_ref.get_tags() cover_image_raw = book_ref.get_cover_image() if cover_image_raw: cover_image = resize_image(cover_image_raw) else: cover_image = None this_book[file_md5]['cover_image'] = cover_image this_book[file_md5]['addition_mode'] = self.addition_mode if self.work_mode == 'reading': all_content = book_ref.get_contents() # get_contents() returns a tuple. Index 1 is a collection of # special settings that depend on the kind of data being parsed. # Currently, this includes: # Only images included images_only BOOL Book contains only images content = all_content[0] images_only = all_content[1]['images_only'] if not content: content = [('Invalid', 'Something went horribly wrong')] book_data = self.database_entry_for_book(file_md5) title = book_data[0] author = book_data[1] year = book_data[2] isbn = book_data[3] tags = book_data[4] position = book_data[5] bookmarks = book_data[6] cover = book_data[7] annotations = book_data[8] this_book[file_md5]['position'] = position this_book[file_md5]['bookmarks'] = bookmarks this_book[file_md5]['content'] = content this_book[file_md5]['images_only'] = images_only this_book[file_md5]['cover'] = cover this_book[file_md5]['annotations'] = annotations this_book[file_md5]['title'] = title this_book[file_md5]['author'] = author this_book[file_md5]['year'] = year this_book[file_md5]['isbn'] = isbn this_book[file_md5]['tags'] = tags return this_book def read_progress(self): while True: processed_file = self.queue.get() self.threading_completed.append(processed_file) total_number = len(self.file_list) completed_number = len(self.threading_completed) if progress_emitter: # Skip update in reading mode progress_emitter.update_progress(completed_number * 100 // total_number) if total_number == completed_number: break def initiate_threads(self): if not self.file_list: return None def pool_creator(): _pool = Pool(5) self.processed_books = _pool.map(self.read_book, self.file_list) _pool.close() _pool.join() start_time = time.time() worker_thread = threading.Thread(target=pool_creator) progress_thread = threading.Thread(target=self.read_progress) worker_thread.start() progress_thread.start() worker_thread.join() progress_thread.join(timeout=.5) return_books = {} # Exclude None returns generated in case of duplication / parse errors self.processed_books = [i for i in self.processed_books if i] for i in self.processed_books: for j in i: return_books[j] = i[j] del self.processed_books print('Finished processing in', time.time() - start_time) return return_books
d['song_id'] = dict['song_id'] db.save_one_data_to_comment(d) # 存储最新评论 queue.task_done() # 标记该数据已从队列中取出 except: print("queue is empty wait for a while") time.sleep(1) if __name__ == '__main__': # start_time = time.time() db = DbHelper() db.connenct(const.DB_CONFIGS) put_thread_pool = ThreadPool(3) get_thread_pool = ThreadPool(3) queue = ThreadManager().Queue( ) # 线程池之间通信需要用Manager().Queue(),线程间通信用Queue() comment_url = 'https://music.163.com/weapi/v1/resource/comments/R_SO_4_{}?csrf_token=' # 评论接口 random_str = create_random_str(16) params = get_params(const.FIRST_PARAM, const.FORTH_PARAM, random_str) encSecKey = get_encSecKey(random_str, const.SECOND_PARAM, const.THIRD_PARAM) form_data = {'params': params, 'encSecKey': encSecKey} response = get_html(const.DAY_LIST_URL) links = [] song_ids = [] # 先将云音乐飙升榜中的一百首歌曲相关信息存入数据库 for data in get_day_hot_song(response): db.save_one_data_to_day_hot_song(data) song_id = data['song_id']
format(process_number, repr(e))) sleep(3) return my_func @error_catching def f(process_number): print("starting worker:", process_number) while True: sleep(2) print("Worker {} checks in.".format(process_number)) if __name__ == '__main__': processes = [] manager = Manager() for i in range(3): p = Process(target=f) p.daemon = True p.start() processes.append(p) try: while True: sleep(1) except KeyboardInterrupt: print("Keyboard interrupt in main") sys.exit()
import os import unittest from multiprocessing.dummy import Manager from main import Uploader q = Manager().Queue() files_list = [ os.path.join(os.getcwd(), "test_data", i) for i in os.listdir("./test_data") ] class Test(unittest.TestCase): def test_1(self): self.assertIsInstance(Uploader(files_list, 2, q), Uploader) def test_2(self): self.assertIsInstance(Uploader("", 2, q), Uploader) def test_3(self): self.assertIsInstance(Uploader(files_list, "2", q), Uploader) def test_4(self): self.assertIsInstance(Uploader(files_list, 2, "q"), Uploader) if __name__ == '__main__': unittest.main()
class BookSorter: def __init__(self, file_list, mode, database_path, settings, temp_dir=None): # Have the GUI pass a list of files straight to here # Then, on the basis of what is needed, pass the # filenames to the requisite functions # This includes getting file info for the database # Parsing for the reader proper # Caching upon closing self.file_list = [i for i in file_list if os.path.exists(i)] self.statistics = [0, (len(file_list))] self.hashes_and_paths = {} self.work_mode = mode[0] self.addition_mode = mode[1] self.database_path = database_path self.auto_tags = settings['auto_tags'] self.auto_cover = settings['auto_cover'] self.temp_dir = temp_dir if database_path: self.database_hashes() self.threading_completed = [] self.queue = Manager().Queue() self.errors = Manager().list() self.processed_books = [] if self.work_mode == 'addition': progress_object_generator() def database_hashes(self): all_hashes_and_paths = database.DatabaseFunctions( self.database_path).fetch_data( ('Hash', 'Path'), 'books', {'Hash': ''}, 'LIKE') if all_hashes_and_paths: self.hashes_and_paths = { i[0]: i[1] for i in all_hashes_and_paths} def database_entry_for_book(self, file_hash): database_return = database.DatabaseFunctions( self.database_path).fetch_data( ('Title', 'Author', 'Year', 'ISBN', 'Tags', 'Position', 'Bookmarks', 'CoverImage', 'Annotations'), 'books', {'Hash': file_hash}, 'EQUALS')[0] book_data = [] for count, i in enumerate(database_return): if count in (5, 6, 8): # Position, Bookmarks, and Annotations are pickled if i: book_data.append(pickle.loads(i)) else: book_data.append(None) else: book_data.append(i) return book_data def read_book(self, filename): # filename is expected as a string containg the # full path of the ebook file with open(filename, 'rb') as current_book: # This should speed up addition for larger files # without compromising the integrity of the process first_bytes = current_book.read(1024 * 32) # First 32KB of the file file_md5 = hashlib.md5(first_bytes).hexdigest() # Update the progress queue self.queue.put(filename) # This should not get triggered in reading mode # IF the file is NOT being loaded into the reader # Do not allow addition in case the file # is already in the database and it remains at its original path if self.work_mode == 'addition' and file_md5 in self.hashes_and_paths: if (self.hashes_and_paths[file_md5] == filename or os.path.exists(self.hashes_and_paths[file_md5])): if not self.hashes_and_paths[file_md5] == filename: warning_string = ( f'{os.path.basename(filename)} is already in database') logger.warning(warning_string) return # This allows for eliminating issues with filenames that have # a dot in them. All hail the roundabout fix. valid_extension = False for i in sorter: if os.path.basename(filename).endswith(i): file_extension = i valid_extension = True break if not valid_extension: this_error = 'Unsupported extension: ' + filename self.errors.append(this_error) logger.error(this_error) return book_ref = sorter[file_extension](filename, self.temp_dir, file_md5) # None of the following have an exception type specified # This will keep everything from crashing, but will make # troubleshooting difficult # TODO # In application notifications try: book_ref.read_book() except Exception as e: this_error = f'Error initializing: {filename}' self.errors.append(this_error) logger.exception(this_error + f' {type(e).__name__} Arguments: {e.args}') return this_book = {} this_book[file_md5] = { 'hash': file_md5, 'path': filename} # Different modes require different values if self.work_mode == 'addition': try: metadata = book_ref.generate_metadata() except Exception as e: this_error = f'Metadata generation error: {filename}' self.errors.append(this_error) logger.exception(this_error + f' {type(e).__name__} Arguments: {e.args}') return title = metadata.title author = metadata.author year = metadata.year isbn = metadata.isbn tags = None if self.auto_tags: tags = metadata.tags cover_image_raw = metadata.cover if cover_image_raw: cover_image = resize_image(cover_image_raw) else: cover_image = None if self.auto_cover: cover_image = fetch_cover(title, author) this_book[file_md5]['cover_image'] = cover_image this_book[file_md5]['addition_mode'] = self.addition_mode if self.work_mode == 'reading': try: book_breakdown = book_ref.generate_content() except Exception as e: this_error = f'Content generation error: {filename}' self.errors.append(this_error) logger.exception(this_error + f' {type(e).__name__} Arguments: {e.args}') return toc = book_breakdown[0] content = book_breakdown[1] images_only = book_breakdown[2] try: book_data = self.database_entry_for_book(file_md5) except TypeError: logger.error( f'Database error: {filename}. Re-add book to program') return title = book_data[0].replace('&', '&&') author = book_data[1] year = book_data[2] isbn = book_data[3] tags = book_data[4] position = book_data[5] bookmarks = book_data[6] cover = book_data[7] annotations = book_data[8] this_book[file_md5]['position'] = position this_book[file_md5]['bookmarks'] = bookmarks this_book[file_md5]['toc'] = toc this_book[file_md5]['content'] = content this_book[file_md5]['images_only'] = images_only this_book[file_md5]['cover'] = cover this_book[file_md5]['annotations'] = annotations this_book[file_md5]['title'] = title this_book[file_md5]['author'] = author this_book[file_md5]['year'] = year this_book[file_md5]['isbn'] = isbn this_book[file_md5]['tags'] = tags return this_book def read_progress(self): while True: processed_file = self.queue.get() self.threading_completed.append(processed_file) total_number = len(self.file_list) completed_number = len(self.threading_completed) # Just for the record, this slows down book searching by about 20% if _progress_emitter: # Skip update in reading mode _progress_emitter.update_progress( completed_number * 100 // total_number) if total_number == completed_number: break def initiate_threads(self): if not self.file_list: return None def pool_creator(): _pool = Pool(thread_count) self.processed_books = _pool.map( self.read_book, self.file_list) _pool.close() _pool.join() start_time = time.time() worker_thread = threading.Thread(target=pool_creator) progress_thread = threading.Thread(target=self.read_progress) worker_thread.start() progress_thread.start() worker_thread.join() progress_thread.join(timeout=.5) return_books = {} # Exclude None returns generated in case of duplication / parse errors self.processed_books = [i for i in self.processed_books if i] for i in self.processed_books: for j in i: return_books[j] = i[j] del self.processed_books processing_time = str(time.time() - start_time) logger.info('Finished processing in ' + processing_time) return return_books, self.errors