def run_proxy_scan(self, safe_flag): self.set_total_runtime(time.time()) self.set_proxy_log(self._manager_.dict()) i = 0 while i < self.get_job_len(): self._worker_pool_ = [] for _ in range(self._worker_count_): if i >= self.get_job_len(): break if safe_flag: p = Process(target=self._scan_secure_[i].run_proxy, args=(self.get_proxy_log(), True)) else: p = Process(target=self._scan_unsecure_[i].run_proxy, args=(self.get_proxy_log(), True)) p.start() self._worker_pool_.append(p) i += 1 for p in self._worker_pool_: p.join() self.set_total_runtime(time.time() - self.get_total_runtime()) return self.get_proxy_log()
def single_crawling_task(page_num, spider_idx): spider = spiders[spider_idx] crawl_spider.page_num = page_num manager = Manager() return_dic = manager.dict() cc = CustomCrawler() proc = Process( target=cc.crawling_start, args=( get_scrapy_settings(), spider, spider.__name__[:spider.__name__.find('Spider')].lower(), return_dic, ) ) try_time = 0 is_success = False while try_time < 2 and not is_success: try: proc.start() proc.join() is_success = True except Exception as e: print(e) try_time += 1 if try_time == 2: raise Exception("사이트에 연결하지 못했습니다. {spider.__name__}") return dict(return_dic)
def _get_24h_price_ticker_data(jobs, logger, exchange_class, schema_class, symbol=None, pairs=None, *_, **kwargs): socketio = None if kwargs["enable_messaging"]: socketio = SocketIO(message_queue=BROKER_URL) symbol_or_pairs = "-".join(symbol) if symbol else "PAIRS" p = Process( name="{} {}".format(exchange_class.__name__, symbol_or_pairs), target=_process, args=( logger, socketio, exchange_class, schema_class, symbol, pairs, ), ) jobs.append( dict( job=p, timeout=s.TIMEOUT_PER_SYMBOL_REQUEST if symbol else s.TIMEOUT_PER_SYMBOLS_REQUEST, )) p.start()
def run_full_scan(self): """ Run a complete scan of off a list of IPs for the entire port range. """ self.set_total_runtime(time.time()) i = 0 while i < self.get_job_len(): self._worker_pool_ = [] for _ in range(self.get_worker_count()): if i >= self.get_job_len(): break p = Process(target=self._scanners_[i].run, args=(self.get_log(), )) p.start() self._worker_pool_.append(p) i += 1 for p in self._worker_pool_: p.join() self.set_total_runtime(time.time() - self.get_total_runtime()) return self.get_log()
def process(text: str) -> list: pipe_return, pipe_receive = multiprocessing.Pipe(False) cpu_count = multiprocessing.cpu_count() process_list = [] str_parts = text.split('_') for i in range(cpu_count): try: executor = Process(target=str_target_executor, args=(i, str_parts[i], pipe_receive)) process_list.append(executor) executor.start() except IndexError: continue last_list = [] for _ in process_list: index, str_part = pipe_return.recv() last_list.append((index, str_part)) for executor in process_list: executor.join() print(last_list) last_list.sort(key=lambda a: a[0]) print(' '.join(map(lambda a: a[1], last_list)))
def _get_24h_price_ticker_data(jobs, logger, exchange_class, schema_class, symbol=None, pairs=None): socketio = None if TickerSettings.enable_socket_io: socketio = SocketIO(message_queue=BROKER_URL) symbol_or_pairs = '-'.join(symbol) if symbol else 'PAIRS' p = Process(name='{} {}'.format(exchange_class.__name__, symbol_or_pairs), target=_process, args=( logger, socketio, exchange_class, schema_class, symbol, pairs, )) jobs.append( dict(job=p, timeout=s.TIMEOUT_PER_SYMBOL_REQUEST if symbol else s.TIMEOUT_PER_SYMBOLS_REQUEST)) p.start()
def update(self): logger = self.get_logger() jobs = [] try: for coin_or_token, struct in s.EXCHANGES_AND_PAIRS_OF_REFERENCE.items( ): if "market_depth" not in struct: continue quote = struct["pair"] exchange = struct["market_depth"] _method = globals()[f"_process_{exchange}"] p = Process( name=f"{coin_or_token}-{quote}", target=_method, args=( logger, ( coin_or_token, quote, ), ), ) jobs.append(dict(job=p, timeout=s.ORDER_BOOK)) p.start() for j in jobs: j["job"].join(timeout=j["timeout"]) except Exception as error: _terminate_running_jobs(logger, jobs) logger.error("order_book error: {}".format(str(error))) self.update_state(state=states.FAILURE, meta=str(error)) raise Ignore() finally: _terminate_running_jobs(logger, jobs)
def test_dummy_in_process(uuid, box, process_wait_time): ret = False with kill_vm_and_keys_on_enter_and_exit(uuid, box): p = Process(target=test_dummy, args=(uuid, box)) p.start() p.join(process_wait_time) if p.is_alive(): log_string(uuid, "Process time out, PID: {}".format(p.pid), "Red") pProcess(p.pid).kill() else: if p.exitcode == True: log_string(uuid, "test_dummy_in_process finished successfully", "Green") ret = True else: log_string( uuid, "test_dummy_in_process finished with errors, exit code False", "Red") return ret
def analyze_input_wrapper_in_process(uuid, box, task, actions_list, process_wait_time): ret = False chrome_ouput, screen_recorder = None, None with kill_vm_on_enter_and_exit(uuid, box): p = Process(target=analyze_input_in_vm, args=(uuid, box, actions_list)) p.start() p.join(process_wait_time) if p.is_alive(): log_string(uuid, "Process time out, PID: {}".format(p.pid), "Red") pProcess(p.pid).kill() else: if p.exitcode == True: log_string(uuid, "custom_task finished successfully", "Green") ret = True else: log_string( uuid, "custom_task finished with errors, exit code False", "Red") return ret
def run_spider(spider, settings, kwargs=None): def f(q): try: # configure_logging(settings) runner = CrawlerRunner() if kwargs is not None: deferred = runner.crawl(spider, **kwargs) else: deferred = runner.crawl(spider) deferred.addBoth(lambda _: reactor.stop()) reactor.run() q.put(None) except Exception as e: q.put(e) q = Queue() p = Process(target=f, args=(q, )) p.start() result = q.get() p.join() if result is not None: raise result
def create_static_mix(static_mix_id): """ Task to create static mix and write to appropriate storage backend. :param static_mix_id: The id of the StaticMix to be processed """ # Mark as in progress try: static_mix = StaticMix.objects.get(id=static_mix_id) except StaticMix.DoesNotExist: # Does not exist, perhaps due to stale task print('StaticMix does not exist') return static_mix.status = TaskStatus.IN_PROGRESS static_mix.save() try: # Get paths directory = os.path.join(settings.MEDIA_ROOT, settings.SEPARATE_DIR, static_mix_id) filename = get_valid_filename(static_mix.formatted_name()) + '.mp3' rel_media_path = os.path.join(settings.SEPARATE_DIR, static_mix_id, filename) rel_path = os.path.join(settings.MEDIA_ROOT, rel_media_path) rel_path_dir = os.path.join(settings.MEDIA_ROOT, settings.SEPARATE_DIR, static_mix_id) pathlib.Path(directory).mkdir(parents=True, exist_ok=True) separator = get_separator(static_mix.separator, static_mix.separator_args, static_mix.bitrate, settings.CPU_SEPARATION) parts = { 'vocals': static_mix.vocals, 'drums': static_mix.drums, 'bass': static_mix.bass, 'other': static_mix.other } # Non-local filesystems like S3/Azure Blob do not support source_path() is_local = settings.DEFAULT_FILE_STORAGE == 'api.storage.FileSystemStorage' path = static_mix.source_path() if is_local else static_mix.source_url( ) if not settings.CPU_SEPARATION: # For GPU separation, do separation in separate process. # Otherwise, GPU memory is not automatically freed afterwards process_eval = Process(target=separator.create_static_mix, args=(parts, path, rel_path)) process_eval.start() try: process_eval.join() except SoftTimeLimitExceeded as e: # Kill process if user aborts task process_eval.terminate() raise e else: separator.create_static_mix(parts, path, rel_path) # Check file exists if os.path.exists(rel_path): static_mix.status = TaskStatus.DONE if is_local: # File is already on local filesystem static_mix.file.name = rel_media_path else: # Need to copy local file to S3/Azure Blob/etc. raw_file = open(rel_path, 'rb') content_file = ContentFile(raw_file.read()) content_file.name = filename static_mix.file = content_file # Remove local file os.remove(rel_path) # Remove empty directory os.rmdir(rel_path_dir) static_mix.save() else: raise Exception('Error writing to file') except FileNotFoundError as error: print(error) print('Please make sure you have FFmpeg and FFprobe installed.') static_mix.status = TaskStatus.ERROR static_mix.error = str(error) static_mix.save() except SoftTimeLimitExceeded: print('Aborted!') except Exception as error: print(error) static_mix.status = TaskStatus.ERROR static_mix.error = str(error) static_mix.save()
def create_dynamic_mix(dynamic_mix_id): """ Task to create dynamic mix and write to appropriate storage backend. :param dynamic_mix_id: The id of the audio track model (StaticMix) to be processed """ # Mark as in progress try: dynamic_mix = DynamicMix.objects.get(id=dynamic_mix_id) except DynamicMix.DoesNotExist: # Does not exist, perhaps due to stale task print('DynamicMix does not exist') return dynamic_mix.status = TaskStatus.IN_PROGRESS dynamic_mix.save() try: # Get paths directory = os.path.join(settings.MEDIA_ROOT, settings.SEPARATE_DIR, dynamic_mix_id) rel_media_path = os.path.join(settings.SEPARATE_DIR, dynamic_mix_id) file_prefix = get_valid_filename(dynamic_mix.formatted_prefix()) file_suffix = dynamic_mix.formatted_suffix() rel_path = os.path.join(settings.MEDIA_ROOT, rel_media_path) pathlib.Path(directory).mkdir(parents=True, exist_ok=True) separator = get_separator(dynamic_mix.separator, dynamic_mix.separator_args, dynamic_mix.bitrate, settings.CPU_SEPARATION) # Non-local filesystems like S3/Azure Blob do not support source_path() is_local = settings.DEFAULT_FILE_STORAGE == 'api.storage.FileSystemStorage' path = dynamic_mix.source_path( ) if is_local else dynamic_mix.source_url() # Do separation if not settings.CPU_SEPARATION: # For GPU separation, do separation in separate process. # Otherwise, GPU memory is not automatically freed afterwards process_eval = Process(target=separator.separate_into_parts, args=(path, rel_path)) process_eval.start() try: process_eval.join() except SoftTimeLimitExceeded as e: # Kill process if user aborts task process_eval.terminate() raise e else: separator.separate_into_parts(path, rel_path) # Check all parts exist if exists_all_parts(rel_path): rename_all_parts(rel_path, file_prefix, file_suffix) dynamic_mix.status = TaskStatus.DONE if is_local: save_to_local_storage(dynamic_mix, rel_media_path, file_prefix, file_suffix) else: save_to_ext_storage(dynamic_mix, rel_path, file_prefix, file_suffix) else: raise Exception('Error writing to file') except FileNotFoundError as error: print(error) print('Please make sure you have FFmpeg and FFprobe installed.') dynamic_mix.status = TaskStatus.ERROR dynamic_mix.error = str(error) dynamic_mix.save() except SoftTimeLimitExceeded: print('Aborted!') except Exception as error: print(error) dynamic_mix.status = TaskStatus.ERROR dynamic_mix.error = str(error) dynamic_mix.save()
def _process_harvest_response(self, next_response: bytes) -> int: """ Processes the harvest response content While the last response is being processed, the next one is already loaded to decrease run time Args: response (bytes): The response as bytes Returns: number_found_entries (int): The amount of found metadata records in this response """ xml_response = xml_helper.parse_xml(next_response) if xml_response is None: csw_logger.error( "Response is no valid xml. catalogue: {}, startPosition: {}, maxRecords: {}" .format(self.metadata.title, self.start_position, self.max_records_per_request)) # Abort! self.start_position = 0 return md_metadata_entries = xml_helper.try_get_element_from_xml( "//" + GENERIC_NAMESPACE_TEMPLATE.format("MD_Metadata"), xml_response) or [] next_record_position = int( xml_helper.try_get_attribute_from_xml_element( xml_response, "nextRecord", "//" + GENERIC_NAMESPACE_TEMPLATE.format("SearchResults"), )) self.start_position = next_record_position # Fetch found identifiers in parent process, so self.deleted_metadata can be edited easily for md_identifier in md_metadata_entries: id = xml_helper.try_get_text_from_xml_element( md_identifier, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("fileIdentifier") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) try: self.deleted_metadata.remove(id) except KeyError: pass # Delete response to free memory del xml_response # Process response via multiple processes t_start = time() num_processes = int(cpu_count() / 2) num_processes = num_processes if num_processes >= 1 else 1 index_step = int(len(md_metadata_entries) / num_processes) start_index = 0 end_index = 0 self.resource_list = md_metadata_entries process_list = [] for i in range(0, num_processes): if index_step < 1: end_index = -1 else: end_index += index_step p = Process(target=self._create_metadata_from_md_metadata, args=(start_index, end_index)) start_index += index_step process_list.append(p) # Close all connections to force each process to create a new one for itself connections.close_all() execute_threads(process_list) csw_logger.debug( "Harvesting '{}': runtime for {} metadata parsing: {}s ####". format(self.metadata.title, self.max_records_per_request, time() - t_start)) return len(md_metadata_entries)
def run(self): p = Process(target=self._crawl) p.start() p.join()