def separate_into_parts(self, input_path: str, output_path: str): """Creates a dynamic mix :param input_path: Input path :param output_path: Output path """ input_path = Path(input_path) output_path = Path(output_path) model = self.get_model() raw_sources = self.apply_model(model, input_path) # Export all source MP3s in parallel pool = Pool() tasks = [] for source, name in zip(raw_sources, ['drums', 'bass', 'other', 'vocals']): source = source.cpu().transpose(0, 1).numpy() filename = f'{name}.mp3' print(f'Exporting {name} MP3...') task = pool.apply_async(self.audio_adapter.save, (output_path / filename, source, self.sample_rate, 'mp3', self.bitrate)) tasks.append(task) try: pool.close() pool.join() except SoftTimeLimitExceeded as e: pool.terminate() raise e
def separate_into_parts(self, input_path: str, output_path: Path): # Check if we downloaded a webpage instead of the actual model file file_exists = self.model_file_path.is_file() mime = None if file_exists: mime = magic.from_file(str(self.model_file_path), mime=True) download_and_verify(MODEL_URL, self.model_dir, self.model_file_path, force=(file_exists and mime == 'text/html')) parts = { 'vocals': True, 'drums': True, 'bass': True, 'other': True } estimates = self.get_estimates(input_path, parts) # Export all source MP3s in parallel pool = Pool() tasks = [] output_path = Path(output_path) for name, estimate in estimates.items(): filename = f'{name}.mp3' print(f'Exporting {name} MP3...') task = pool.apply_async(self.audio_adapter.save, (output_path / filename, estimate, self.sample_rate, 'mp3', self.bitrate)) tasks.append(task) pool.close() pool.join()
def separate_into_parts(self, input_path: str, output_path: str): """Creates a dynamic mix :param input_path: Input path :param output_path: Output path """ input_path = Path(input_path) output_path = Path(output_path) self.download_and_verify() raw_sources = self.apply_model(input_path) # Export all source MP3s in parallel pool = Pool() tasks = [] for source, name in zip(raw_sources, ['drums', 'bass', 'other', 'vocals']): source = (source * 2**15).clamp_(-2**15, 2**15 - 1).short() source = source.cpu().transpose(0, 1).numpy() filename = f'{name}.mp3' print(f'Exporting {name} MP3...') task = pool.apply_async(encode_mp3, (source, str(output_path / filename), self.bitrate, self.verbose)) tasks.append(task) try: pool.close() pool.join() except SoftTimeLimitExceeded as e: pool.terminate() raise e
async def waiter(): def _call(child): return child() pool = Pool(forking_factor) results = pool.map(_call, children) pool.close() pool.join() return results
def query_tweets(query, limit=None, begindate=dt.date(2006, 3, 21), enddate=dt.date.today(), poolsize=20, lang='', use_proxies=False): no_days = (enddate - begindate).days if (no_days < 0): sys.exit('Begin date must occur before end date.') if poolsize > no_days: # Since we are assigning each pool a range of dates to query, # the number of pools should not exceed the number of dates. poolsize = no_days dateranges = [ begindate + dt.timedelta(days=elem) for elem in linspace(0, no_days, poolsize + 1) ] if limit and poolsize: limit_per_pool = (limit // poolsize) + 1 else: limit_per_pool = None # If we are setting pool size to 1, add a pause between requests to avoid IP ban by Twitter. throttled = poolsize == 1 and not use_proxies queries = [ '{} since:{} until:{}'.format(query, since, until) for since, until in zip(dateranges[:-1], dateranges[1:]) ] all_tweets = [] try: pool = Pool(poolsize) logger.info('queries: {}'.format(queries)) try: for new_tweets in pool.imap_unordered( partial(query_tweets_once, throttled=throttled, limit=limit_per_pool, lang=lang, use_proxies=use_proxies), queries): all_tweets.extend(new_tweets) logger.info('Got {} tweets ({} new).'.format( len(all_tweets), len(new_tweets))) except KeyboardInterrupt: logger.info('Program interrupted by user. Returning all tweets ' 'gathered so far.') finally: pool.close() pool.join() return all_tweets
class MultiprocessingDistributor(DistributorBaseClass): """ Distributor using a multiprocessing Pool to calculate the jobs in parallel on the local machine. """ def __init__(self, n_workers, disable_progressbar=False, progressbar_title="Feature Extraction", show_warnings=True): """ Creates a new MultiprocessingDistributor instance :param n_workers: How many workers should the multiprocessing pool have? :type n_workers: int :param disable_progressbar: whether to show a progressbar or not. :type disable_progressbar: bool :param progressbar_title: the title of the progressbar :type progressbar_title: basestring :param show_warnings: whether to show warnings or not. :type show_warnings: bool """ self.pool = Pool(processes=n_workers, initializer=initialize_warnings_in_workers, initargs=(show_warnings, )) self.n_workers = n_workers self.disable_progressbar = disable_progressbar self.progressbar_title = progressbar_title def distribute(self, func, partitioned_chunks, kwargs): """ Calculates the features in a parallel fashion by distributing the map command to a thread pool :param func: the function to send to each worker. :type func: callable :param partitioned_chunks: The list of data chunks - each element is again a list of chunks - and should be processed by one worker. :type partitioned_chunks: iterable :param kwargs: parameters for the map function :type kwargs: dict of string to parameter :return: The result of the calculation as a list - each item should be the result of the application of func to a single element. """ return self.pool.imap_unordered(partial(func, **kwargs), partitioned_chunks) def close(self): """ Collects the result from the workers and closes the thread pool. """ self.pool.close() self.pool.terminate() self.pool.join()
def query_tweets(query, limit=None, begindate=dt.date(2006, 3, 21), enddate=dt.datetime.now(), poolsize=20, lang=''): no_secs = (enddate - begindate).seconds if (no_secs < 0): sys.exit('Begin date must occur before end date.') if poolsize > no_secs: # Since we are assigning each pool a range of dates to query, # the number of pools should not exceed the number of dates. poolsize = no_secs dateranges = [ begindate + dt.timedelta(seconds=elem) for elem in linspace(0, no_secs, poolsize + 1) ] if limit and poolsize: limit_per_pool = (limit // poolsize) + 1 else: limit_per_pool = None queries = [ '{} since_time:{} until_time:{}'.format( query, int(time.mktime(since.timetuple())), int(time.mktime(until.timetuple()))) for since, until in zip(dateranges[:-1], dateranges[1:]) ] all_tweets = [] try: pool = Pool(poolsize) logger.info('queries: {}'.format(queries)) try: for new_tweets in pool.imap_unordered( partial(query_tweets_once, limit=limit_per_pool, lang=lang), queries): all_tweets.extend(new_tweets) logger.info('Got {} tweets ({} new).'.format( len(all_tweets), len(new_tweets))) except KeyboardInterrupt: logger.info('Program interrupted by user. Returning all tweets ' 'gathered so far.') finally: pool.close() pool.join() return all_tweets
def query_tweets_parallel(query, limit=None, begindate=dt.date(2006, 3, 21), enddate=dt.date.today(), poolsize=20, lang=''): number_days = (enddate - begindate).days if poolsize > number_days: # Since we are assigning each pool a range of dates to query, # the number of pools should not exceed the number of dates. poolsize = number_days dateranges = [ begindate + dt.timedelta(days=elem) for elem in linspace(0, number_days, poolsize + 1) ] if limit and poolsize: limit_per_pool = (limit // poolsize) + 1 else: limit_per_pool = None queries = [ '{} since:{} until:{}'.format(query, since, until) for since, until in zip(dateranges[:-1], dateranges[1:]) ] all_tweets = [] try: pool = Pool(poolsize) logger.info('queries: {}'.format(queries)) try: for new_tweets in pool.imap_unordered( partial(query_tweets_once, limit=limit_per_pool, lang=lang, use_proxy=use_proxy), queries): all_tweets.extend(new_tweets) logger.info('Got {} tweets ({} new).'.format( len(all_tweets), len(new_tweets))) except KeyboardInterrupt: logger.info('Program interrupted by user. Returning all tweets ' 'gathered so far.') finally: pool.close() pool.join() return all_tweets
def extractor_pool(self, func, iterable): ''' Extract items (billard multiprocessing use) :param func: function :param iterable: list ''' _finalizers = list() p = Pool(processes=cpu_count()) _finalizers.append(Finalize(p, p.terminate)) try: p.map_async(func, iterable) p.close() p.join() finally: p.terminate()
def separate_into_parts(self, input_path: str, output_path: Path): self.download_and_verify() ctx = get_extension_context(self.context) nn.set_default_context(ctx) nn.set_auto_forward(True) audio, _ = self.audio_adapter.load(input_path, sample_rate=self.sample_rate) if audio.shape[1] > 2: warnings.warn('Channel count > 2! ' 'Only the first two channels will be processed!') audio = audio[:, :2] if audio.shape[1] == 1: print('received mono file, so duplicate channels') audio = np.repeat(audio, 2, axis=1) print('Separating...') estimates = separate(audio, model_path=str(self.model_file_path), niter=self.iterations, alpha=self.alpha, softmask=self.softmask, residual_model=self.residual_model) output_path = Path(output_path) # Export all source MP3s in parallel pool = Pool() tasks = [] for name, estimate in estimates.items(): filename = f'{name}.mp3' print(f'Exporting {name} MP3...') task = pool.apply_async(self.audio_adapter.save, (os.path.join( output_path, filename), estimate, self.sample_rate, 'mp3', self.bitrate)) tasks.append(task) pool.close() pool.join()
def separate_into_parts(self, input_path: str, output_path: Path): self.download_and_verify() estimates = self.get_estimates(input_path) # Export all source MP3s in parallel pool = Pool() tasks = [] output_path = Path(output_path) for name, estimate in estimates.items(): filename = f'{name}.mp3' print(f'Exporting {name} MP3...') task = pool.apply_async(self.audio_adapter.save, (os.path.join( output_path, filename), estimate, self.sample_rate, 'mp3', self.bitrate)) tasks.append(task) pool.close() pool.join()
def query_tweets(query, limit=None, begindate=dt.date(2006, 3, 21), enddate=dt.date.today(), poolsize=20, lang=''): no_days = (enddate - begindate).days if (no_days < 0): sys.exit('Begin date must occur before end date.') if poolsize > no_days: poolsize = no_days dateranges = [ begindate + dt.timedelta(days=elem) for elem in linspace(0, no_days, poolsize + 1) ] if limit and poolsize: limit_per_pool = (limit // poolsize) + 1 else: limit_per_pool = None queries = [ '{} since:{} until:{}'.format(query, since, until) for since, until in zip(dateranges[:-1], dateranges[1:]) ] all_tweets = [] try: pool = Pool(poolsize) for new_tweets in pool.imap_unordered( partial(query_tweets_once, limit=limit_per_pool, lang=lang), queries): all_tweets.extend(new_tweets) finally: pool.close() pool.join() return all_tweets
def scrape(self, keywords): all_tweets = [] pool_size = 20 start_date = dt.date.today() - dt.timedelta(14) query = " ".join(keywords) no_of_days = (dt.date.today() - start_date).days if no_of_days < pool_size: pool_size = no_of_days date_ranges = [ start_date + dt.timedelta(days=elem) for elem in np.linspace(0, no_of_days, pool_size + 1) ] if self.limit and pool_size: self.limit = (self.limit // pool_size) + 1 queries = [ "{} since:{} until:{}".format(query, since, until) for since, until in zip(date_ranges[:-1], date_ranges[1:]) ] pool = Pool(pool_size) logging.info("queries: {}".format(queries)) try: for new_tweets in pool.imap_unordered(self.get_tweets, queries): all_tweets.extend(new_tweets) except KeyboardInterrupt: logging.info( "Program interrupted by user. Returning all tweets " "gathered so far." ) finally: pool.close() pool.join() return all_tweets
class RenderAndSave(): def __init__(self, async_mode=True): """ :param async_mode: False - используется для теста """ self.async_mode = async_mode @staticmethod def get_available_sites(news_portal=None, news_dep=None) -> list: """ Получение списка доступных сайтов :param news_portal: новостной портал, по активным тематическим разделам которого будет выполнен поиск, если не задан - поиск ведется по всем активным :param news_dep: новостной раздел портала :return: query """ if news_portal: return [ Site.objects.filter(is_active=True, news_portal=news_portal, news_department=news_dep) ] return [site for site in Site.objects.filter(is_active=True)] @staticmethod def _save_postgr(results): """ Сохранение в Postgresql результатов парсинга :param results: список с корежами, содержащими словарь, описывающий результат парсинга отдельной статьи :return: """ for part in results: for result in part: Article.objects.get_or_create( link=result['news_link'], has_prices=get_has_prices(result['main_text']), has_percents=get_has_percents(result['main_text']), frequent_words=get_frequent_words(result['main_text']), content=result) def _async_worker(self, sites_list) -> tuple: """ Запуск парсера в асинхронном режиме. :param sites_list: список сайтов для анализа :return: спискок кортежей с резульатами парсинга """ self.process_pool = Pool(processes=settings.PROCESS_AMOUNT) results = [ self.process_pool.apply_async( settings.AVAILABLE_RENDERS[site.news_portal], args=(site.target_url, )) for site in sites_list ] clean_data = [i.get() for i in results] self.process_pool.close() self.process_pool.join() return clean_data def _sync_worker(self, site) -> list: """ Запуск парсера в синхронном режиме. :param sites_list: сайт для анализа :return: спискок кортежей с резульатами парсинга """ try: site = site[0] return [ settings.AVAILABLE_RENDERS[site.news_portal].__call__( site.target_url) ] except Exception as err: logger.error(err) def run_parser(self) -> list: """ Запуск парсера. :return: """ sites_list = self.get_available_sites() if self.async_mode: data_from_site = self._async_worker(sites_list) else: data_from_site = self._sync_worker(sites_list) if not data_from_site: logger.info("Ошибка рабзора сайта") raise Exception("Ошибка рабзора сайта") self._save_postgr(data_from_site) return [i.target_url for i in sites_list]
def crawl_keywords(what): print(what) p = Pool(3) p.apply_async(crawl) p.close() p.join()