def inference_from_dicts(self, dicts, rest_api_schema=False, use_multiprocessing=True): """ Runs down-stream inference using the prediction head. :param dicts: Samples to run inference on provided as a list of dicts. One dict per sample. :type dicts: [dict] :param rest_api_schema: whether conform to the schema used for dicts in the HTTP API for Inference. :type rest_api_schema: bool :return: dict of predictions :param use_multiprocessing: time incurred in spawning processes could outweigh performance boost for very small number of dicts, eg, HTTP APIs for inference. This flags allows to disable multiprocessing for such cases. """ if self.prediction_type == "embedder": raise TypeError( "You have called inference_from_dicts for a model without any prediction head! " "If you want to: " "a) ... extract vectors from the language model: call `Inferencer.extract_vectors(...)`" f"b) ... run inference on a downstream task: make sure your model path {self.name} contains a saved prediction head" ) num_cpus = mp.cpu_count() or 1 dicts_per_cpu = np.ceil(len(dicts) / num_cpus) # automatic adjustment of multiprocessing chunksize # for small files (containing few dicts) we want small chunksize to ulitize all available cores but never less # than 2, because we need it to sample another random sentence in LM finetuning # for large files we want to minimize processor spawning without giving too much data to one process, so we # clip it at 5k multiprocessing_chunk_size = int(np.clip((np.ceil(dicts_per_cpu / 5)), a_min=2, a_max=5000)) dict_batches_to_process = int(len(dicts) / multiprocessing_chunk_size) num_cpus_used = min(mp.cpu_count(), dict_batches_to_process) or 1 if use_multiprocessing: with ExitStack() as stack: p = stack.enter_context(mp.Pool(processes=num_cpus_used)) logger.info( f"Got ya {num_cpus_used} parallel workers to do inference on {len(dicts)}dicts (chunksize = {multiprocessing_chunk_size})..." ) log_ascii_workers(num_cpus_used, logger) results = p.imap( partial(self._multiproc, processor=self.processor, rest_api_schema=rest_api_schema), grouper(dicts, multiprocessing_chunk_size), 1, ) preds_all = [] with tqdm(total=len(dicts), unit=" Dicts") as pbar: for dataset, tensor_names, sample in results: preds_all.extend(self._run_inference(dataset, tensor_names, sample)) pbar.update(multiprocessing_chunk_size) else: chunk = next(grouper(dicts, len(dicts))) dataset, tensor_names, sample = self._multiproc(chunk, processor=self.processor, rest_api_schema=rest_api_schema) preds_all = self._run_inference(dataset, tensor_names, sample) return preds_all
def _get_dataset(self, filename, dicts=None): if not filename and not dicts: raise ValueError("You must either supply `filename` or `dicts`") # loading dicts from file (default) if dicts is None: dicts = list(self.processor.file_to_dicts(filename)) #shuffle list of dicts here if we later want to have a random dev set splitted from train set if str(self.processor.train_filename) in str(filename): if not self.processor.dev_filename: if self.processor.dev_split > 0.0: random.shuffle(dicts) num_dicts = len(dicts) multiprocessing_chunk_size, num_cpus_used = calc_chunksize( num_dicts=num_dicts, max_processes=self.max_processes, max_chunksize=self.max_multiprocessing_chunksize, ) with ExitStack() as stack: if self.max_processes > 1: # use multiprocessing only when max_processes > 1 p = stack.enter_context(mp.Pool(processes=num_cpus_used)) logger.info( f"Got ya {num_cpus_used} parallel workers to convert {num_dicts} dictionaries " f"to pytorch datasets (chunksize = {multiprocessing_chunk_size})..." ) log_ascii_workers(num_cpus_used, logger) results = p.imap( partial(self._dataset_from_chunk, processor=self.processor), grouper(dicts, multiprocessing_chunk_size), chunksize=1, ) else: logger.info( f"Multiprocessing disabled, using a single worker to convert {num_dicts}" f"dictionaries to pytorch datasets.") results = map( partial(self._dataset_from_chunk, processor=self.processor), grouper(dicts, num_dicts)) datasets = [] desc = f"Preprocessing Dataset" if filename: desc += f" {filename}" with tqdm(total=len(dicts), unit=' Dicts', desc=desc) as pbar: for dataset, tensor_names in results: datasets.append(dataset) # update progress bar (last step can have less dicts than actual chunk_size) pbar.update( min(multiprocessing_chunk_size, pbar.total - pbar.n)) concat_datasets = ConcatDataset(datasets) return concat_datasets, tensor_names
def inference_from_dicts(self, dicts, rest_api_schema=False, use_multiprocessing=True): """ Runs down-stream inference using the prediction head. :param dicts: Samples to run inference on provided as a list of dicts. One dict per sample. :type dicts: [dict] :param rest_api_schema: whether conform to the schema used for dicts in the HTTP API for Inference. :type rest_api_schema: bool :return: dict of predictions :param use_multiprocessing: time incurred in spawning processes could outweigh performance boost for very small number of dicts, eg, HTTP APIs for inference. This flags allows to disable multiprocessing for such cases. """ if self.prediction_type == "embedder": raise TypeError( "You have called inference_from_dicts for a model without any prediction head! " "If you want to: " "a) ... extract vectors from the language model: call `Inferencer.extract_vectors(...)`" f"b) ... run inference on a downstream task: make sure your model path {self.name} contains a saved prediction head" ) multiprocessing_chunk_size, num_cpus_used = calc_chunksize(len(dicts)) if num_cpus_used == mp.cpu_count(): num_cpus_used -= 1 # We reserve one processor to do model inference CPU calculations (besides GPU computations) if use_multiprocessing: with ExitStack() as stack: p = stack.enter_context(mp.Pool(processes=num_cpus_used)) logger.info( f"Got ya {num_cpus_used} parallel workers to do inference on {len(dicts)}dicts (chunksize = {multiprocessing_chunk_size})..." ) log_ascii_workers(num_cpus_used, logger) results = p.imap( partial(self._create_datasets_chunkwise, processor=self.processor, rest_api_schema=rest_api_schema), grouper(dicts, multiprocessing_chunk_size), 1, ) preds_all = [] with tqdm(total=len(dicts), unit=" Dicts") as pbar: for dataset, tensor_names, baskets in results: # TODO change formot of formatted_preds in QA (list of dicts) preds_all.extend(self._run_inference(dataset, tensor_names, baskets, rest_api_schema)) pbar.update(multiprocessing_chunk_size) else: chunk = next(grouper(dicts, len(dicts))) dataset, tensor_names, baskets = self._create_datasets_chunkwise(chunk, processor=self.processor, rest_api_schema=rest_api_schema) # TODO change formot of formatted_preds in QA (list of dicts) preds_all = self._run_inference(dataset, tensor_names, baskets, rest_api_schema) return preds_all
def _get_dataset(self, filename): dicts = self.processor._file_to_dicts(filename) #shuffle list of dicts here if we later want to have a random dev set splitted from train set if filename == self.processor.train_filename: if not self.processor.dev_filename: if self.processor.dev_split > 0.0: dicts = random.shuffle(dicts) dict_batches_to_process = int( len(dicts) / self.multiprocessing_chunk_size) num_cpus = min(mp.cpu_count(), self.max_processes, dict_batches_to_process) or 1 with ExitStack() as stack: p = stack.enter_context(mp.Pool(processes=num_cpus)) logger.info( f"Got ya {num_cpus} parallel workers to convert dict chunks to datasets (chunksize = {self.multiprocessing_chunk_size})..." ) log_ascii_workers(num_cpus, logger) results = p.imap( partial(self._multiproc, processor=self.processor), grouper(dicts, self.multiprocessing_chunk_size), chunksize=1, ) datasets = [] for dataset, tensor_names in tqdm(results, total=len(dicts) / self.multiprocessing_chunk_size): datasets.append(dataset) concat_datasets = ConcatDataset(datasets) return concat_datasets, tensor_names
def _get_dataset(self, filename): dicts = self.processor.file_to_dicts(filename) #shuffle list of dicts here if we later want to have a random dev set splitted from train set if self.processor.train_filename in filename: if not self.processor.dev_filename: if self.processor.dev_split > 0.0: random.shuffle(dicts) num_dicts = len(dicts) multiprocessing_chunk_size, num_cpus_used = calc_chunksize(num_dicts) with ExitStack() as stack: p = stack.enter_context(mp.Pool(processes=num_cpus_used)) logger.info( f"Got ya {num_cpus_used} parallel workers to convert {num_dicts} dictionaries " f"to pytorch datasets (chunksize = {multiprocessing_chunk_size})..." ) log_ascii_workers(num_cpus_used, logger) results = p.imap( partial(self._multiproc, processor=self.processor), grouper(dicts, multiprocessing_chunk_size), chunksize=1, ) datasets = [] with tqdm(total=len(dicts), unit=' Dicts') as pbar: for dataset, tensor_names in results: datasets.append(dataset) pbar.update(multiprocessing_chunk_size) concat_datasets = ConcatDataset(datasets) return concat_datasets, tensor_names
def _inference_with_multiprocessing(self, dicts, rest_api_schema, aggregate_preds, multiprocessing_chunksize, num_processes): """ Implementation of inference. This method is a generator that yields the results. :param dicts: Samples to run inference on provided as a list of dicts or a generator object that yield dicts. :type dicts: iter(dict) :param rest_api_schema: Whether input dicts use the format that complies with the FARM REST API. Currently only used for QA to switch from squad to a more useful format in production. While input is almost the same, output contains additional meta data(offset, context..) :type rest_api_schema: bool :param aggregate_preds: whether to aggregate predictions across different samples (e.g. for QA on long texts) :type aggregate_preds: bool :param multiprocessing_chunksize: number of dicts to put together in one chunk and feed to one process :type multiprocessing_chunksize: int :param num_processes: size of multiprocessing.Pool :type num_processes: int :return: generator object that yield predictions :rtype: iter """ # Get us some workers (i.e. processes) p = mp.Pool(processes=num_processes) logger.info( f"Got ya {num_processes} parallel workers to do inference on dicts (chunksize = {multiprocessing_chunksize})..." ) log_ascii_workers(num_processes, logger) # We group the input dicts into chunks and feed each chunk to a different process, # where it gets converted to a pytorch dataset results = p.imap( partial(self._create_datasets_chunkwise, processor=self.processor, rest_api_schema=rest_api_schema), grouper(iterable=dicts, n=multiprocessing_chunksize), 1, ) # Once a process spits out a preprocessed chunk. we feed this dataset directly to the model. # So we don't need to wait until all preprocessing has finished before getting first predictions. for dataset, tensor_names, baskets in results: # TODO change format of formatted_preds in QA (list of dicts) if aggregate_preds: predictions = self._get_predictions_and_aggregate( dataset, tensor_names, baskets, rest_api_schema, disable_tqdm=True) else: predictions = self._get_predictions(dataset, tensor_names, baskets, rest_api_schema, disable_tqdm=True) yield from predictions p.close() p.join()
def _inference_with_multiprocessing(self, dicts, return_json, aggregate_preds, multiprocessing_chunksize): """ Implementation of inference. This method is a generator that yields the results. :param dicts: Samples to run inference on provided as a list of dicts or a generator object that yield dicts. :type dicts: iter(dict) :param return_json: Whether the output should be in a json appropriate format. If False, it returns the prediction object where applicable, else it returns PredObj.to_json() :type return_json: bool :param aggregate_preds: whether to aggregate predictions across different samples (e.g. for QA on long texts) :type aggregate_preds: bool :param multiprocessing_chunksize: number of dicts to put together in one chunk and feed to one process :type multiprocessing_chunksize: int :return: generator object that yield predictions :rtype: iter """ # We group the input dicts into chunks and feed each chunk to a different process # in the pool, where it gets converted to a pytorch dataset results = self.process_pool.imap( partial(self._create_datasets_chunkwise, processor=self.processor), grouper(iterable=dicts, n=multiprocessing_chunksize), 1, ) # Once a process spits out a preprocessed chunk. we feed this dataset directly to the model. # So we don't need to wait until all preprocessing has finished before getting first predictions. for dataset, tensor_names, problematic_sample_ids, baskets in results: self.problematic_sample_ids.update(problematic_sample_ids) if dataset is None: logger.error( f"Part of the dataset could not be converted! \n" f"BE AWARE: The order of predictions will not conform with the input order!" ) else: # TODO change format of formatted_preds in QA (list of dicts) if aggregate_preds: predictions = self._get_predictions_and_aggregate( dataset, tensor_names, baskets) else: predictions = self._get_predictions( dataset, tensor_names, baskets) if return_json: # TODO this try catch should be removed when all tasks return prediction objects try: predictions = [x.to_json() for x in predictions] except AttributeError: pass yield from predictions
def __iter__(self): # With IterableDataset, the same __iter__ is copied over to the multiple workers of # a Dataloader. Hence, we need to configure the __iter__ to not yield duplicated data # when more than 1 workers are used. # # To avoid duplicates, we need to split the input dicts between the workers. # The grouper() converts a dict generator given as input and yields only the # dicts that are to be processed by the given worker_id. # # For instance, consider input as [dictA, dictB, dictC, ...], then the grouper # (with n=2) will return, [[dictA, dictB], [dictE, dictF] ...] for worker 1 and # [[dictC, dictD], [dictG, dictH] ...] for worker 2. if self.dataloader_workers > 1: worker_info = torch.utils.data.get_worker_info() worker_id = worker_info.id dicts = grouper(self.file_to_dicts_generator, n=10, worker_id=worker_id, total_workers=self.dataloader_workers) else: dicts = grouper(self.file_to_dicts_generator, n=10) results = map(self._dataset_from_chunk, dicts) batch = [] for datasets, tensor_names in results: if not datasets: continue self.tensor_names = tensor_names for ds in datasets: batch.append(ds) if len(batch) == self.batch_size: yield batch batch = [] if batch: yield batch
def _get_dataset(self, filename): dicts = self.processor.file_to_dicts(filename) #shuffle list of dicts here if we later want to have a random dev set splitted from train set if self.processor.train_filename in filename: if not self.processor.dev_filename: if self.processor.dev_split > 0.0: random.shuffle(dicts) num_cpus = min(mp.cpu_count(), self.max_processes) or 1 dicts_per_cpu = np.ceil(len(dicts) / num_cpus) # automatic adjustment of multiprocessing chunksize # for small files (containing few dicts) we want small chunksize to ulitize all available cores but never less # than 2, because we need it to sample another random sentence in LM finetuning # for large files we want to minimize processor spawning without giving too much data to one process, so we # clip it at 5k multiprocessing_chunk_size = int( np.clip((np.ceil(dicts_per_cpu / 5)), a_min=2, a_max=5000)) dict_batches_to_process = int(len(dicts) / multiprocessing_chunk_size) num_cpus_used = min(mp.cpu_count(), self.max_processes, dict_batches_to_process) or 1 with ExitStack() as stack: p = stack.enter_context(mp.Pool(processes=num_cpus_used)) logger.info( f"Got ya {num_cpus_used} parallel workers to convert dict chunks to datasets (chunksize = {multiprocessing_chunk_size})..." ) log_ascii_workers(num_cpus_used, logger) results = p.imap( partial(self._multiproc, processor=self.processor), grouper(dicts, multiprocessing_chunk_size), chunksize=1, ) datasets = [] with tqdm(total=len(dicts), unit=' Dicts') as pbar: for dataset, tensor_names in results: datasets.append(dataset) pbar.update(multiprocessing_chunk_size) concat_datasets = ConcatDataset(datasets) return concat_datasets, tensor_names
def inference_from_dicts(self, dicts, rest_api_schema=False, max_processes=128): """ Runs down-stream inference on samples created from input dictionaries. The format of the input `dicts` depends on the task: QA: [{"qas": ["What is X?"], "context": "Some context containing the answer"}] Classification / NER: [{"text": "Some input text"}] :param dicts: Samples to run inference on provided as a list of dicts. One dict per sample. :type dicts: [dict] :param rest_api_schema: Whether input dicts use the format that complies with the FARM REST API. Currently only used for QA to switch from squad to a more useful format in production. While input is almost the same, output contains additional meta data(offset, context..) :type rest_api_schema: bool :return: dict of predictions :param max_processes: The maximum size of `multiprocessing.Pool`. Set to value of 1 to disable multiprocessing. If you want to debug the Language Model, you might need to disable multiprocessing! For very small number of dicts, time incurred in spawning processes could outweigh performance boost, eg, in the case of HTTP APIs for Inference. For such cases multiprocessing should be disabled. """ if self.prediction_type == "embedder": raise TypeError( "You have called inference_from_dicts for a model without any prediction head! " "If you want to: " "a) ... extract vectors from the language model: call `Inferencer.extract_vectors(...)`" f"b) ... run inference on a downstream task: make sure your model path {self.name} contains a saved prediction head" ) # Using multiprocessing if max_processes > 1: # use multiprocessing if max_processes > 1 multiprocessing_chunk_size, num_cpus_used = calc_chunksize( len(dicts), max_processes=max_processes) with ExitStack() as stack: # Get us some workers (i.e. processes) p = stack.enter_context(mp.Pool(processes=num_cpus_used)) logger.info( f"Got ya {num_cpus_used} parallel workers to do inference on {len(dicts)}dicts (chunksize = {multiprocessing_chunk_size})..." ) log_ascii_workers(num_cpus_used, logger) # We group the input dicts into chunks and feed each chunk to a different process, # where it gets converted to a pytorch dataset results = p.imap( partial(self._create_datasets_chunkwise, processor=self.processor, rest_api_schema=rest_api_schema), grouper(dicts, multiprocessing_chunk_size), 1, ) # Once a process spits out a preprocessed chunk. we feed this dataset directly to the model. # So we don't need to wait until all preprocessing has finished before getting first predictions. preds_all = [] with tqdm(total=len(dicts), unit=" Dicts") as pbar: for dataset, tensor_names, baskets in results: # TODO change format of formatted_preds in QA (list of dicts) preds_all.extend( self._get_predictions(dataset, tensor_names, baskets, rest_api_schema)) pbar.update(multiprocessing_chunk_size) # Using single process (helpful for debugging!) else: chunk = next(grouper(dicts, len(dicts))) dataset, tensor_names, baskets = self._create_datasets_chunkwise( chunk, processor=self.processor, rest_api_schema=rest_api_schema) # TODO change formot of formatted_preds in QA (list of dicts) preds_all = self._get_predictions(dataset, tensor_names, baskets, rest_api_schema) return preds_all
def inference_from_dicts(self, dicts, rest_api_schema=False, max_processes=128, min_chunksize=4): """ Runs down-stream inference on samples created from input dictionaries. The format of the input `dicts` depends on the task: QA: [{"qas": ["What is X?"], "context": "Some context containing the answer"}] Classification / NER / embeddings: [{"text": "Some input text"}] :param dicts: Samples to run inference on provided as a list of dicts. One dict per sample. :type dicts: [dict] :param rest_api_schema: Whether input dicts use the format that complies with the FARM REST API. Currently only used for QA to switch from squad to a more useful format in production. While input is almost the same, output contains additional meta data(offset, context..) :type rest_api_schema: bool :return: dict of predictions :param max_processes: The maximum size of `multiprocessing.Pool`. Set to value of 1 to disable multiprocessing. If you want to debug the Language Model, you might need to disable multiprocessing! For very small number of dicts, time incurred in spawning processes could outweigh performance boost, eg, in the case of HTTP APIs for Inference. For such cases multiprocessing should be disabled. :param min_chunksize: minimum number of dicts to put together in one chunk and feed to one process (only relevant if you do multiprocessing) :type min_chunksize: int """ # whether to aggregate predictions across different samples (e.g. for QA on long texts) aggregate_preds = False if len(self.model.prediction_heads) > 0: aggregate_preds = hasattr(self.model.prediction_heads[0], "aggregate_preds") # Using multiprocessing if max_processes > 1: # use multiprocessing if max_processes > 1 multiprocessing_chunk_size, num_cpus_used = calc_chunksize(len(dicts), max_processes=max_processes, min_chunksize=min_chunksize) # Get us some workers (i.e. processes) p = mp.Pool(processes=num_cpus_used) logger.info( f"Got ya {num_cpus_used} parallel workers to do inference on {len(dicts)} dicts (chunksize = {multiprocessing_chunk_size})..." ) log_ascii_workers(num_cpus_used, logger) # We group the input dicts into chunks and feed each chunk to a different process, # where it gets converted to a pytorch dataset results = p.imap( partial(self._create_datasets_chunkwise, processor=self.processor, rest_api_schema=rest_api_schema), grouper(dicts, multiprocessing_chunk_size), 1, ) # Once a process spits out a preprocessed chunk. we feed this dataset directly to the model. # So we don't need to wait until all preprocessing has finished before getting first predictions. preds_all = [] with tqdm(total=len(dicts), desc=f"Inferencing Dicts", unit=" Dicts") as pbar: for dataset, tensor_names, baskets in results: # TODO change format of formatted_preds in QA (list of dicts) if aggregate_preds: preds_all.extend(self._get_predictions_and_aggregate(dataset, tensor_names, baskets, rest_api_schema, disable_tqdm=True)) else: preds_all.extend(self._get_predictions(dataset, tensor_names, baskets, rest_api_schema, disable_tqdm=True)) pbar.update(multiprocessing_chunk_size) p.close() p.join() # Using single process (helpful for debugging!) else: chunk = next(grouper(dicts, len(dicts))) dataset, tensor_names, baskets = self._create_datasets_chunkwise(chunk, processor=self.processor, rest_api_schema=rest_api_schema) # TODO change format of formatted_preds in QA (list of dicts) if aggregate_preds: preds_all = self._get_predictions_and_aggregate(dataset, tensor_names, baskets, rest_api_schema) else: preds_all = self._get_predictions(dataset, tensor_names, baskets, rest_api_schema) return preds_all