def __init__( self, serialization_dir: str, cuda_device: Union[int, torch.device] = -1, distributed: bool = False, local_rank: int = 0, world_size: int = 1, ) -> None: check_for_gpu(cuda_device) self._serialization_dir = serialization_dir if isinstance(cuda_device, list): raise ConfigurationError( "In allennlp 1.0, the Trainer can only be assigned a single `cuda_device`. " "Instead, we use torch's DistributedDataParallel at the command level, meaning " "our Trainer always uses a single GPU per process.") if distributed and world_size <= 1: raise ConfigurationError( "Distributed training can be performed only with more than 1 device. Check " "`cuda_device` key in the experiment configuration.") self.cuda_device = int_to_device(cuda_device) self._distributed = distributed self._rank = local_rank self._master = self._rank == 0 self._world_size = world_size
def __init__( self, name: str, model: Model, optimizer: Optimizer, cuda_device: int, grad_norm: Optional[float] = None, scaler: Optional[amp.GradScaler] = None, grad_clipping: Optional[float] = None, learning_rate_scheduler: Optional[LearningRateScheduler] = None, momentum_scheduler: Optional[MomentumScheduler] = None ) -> "ComponentOptimizer": self.name = name self.model = model self._optimizer = optimizer if cuda_device is None: from torch import cuda if cuda.device_count() > 0: cuda_device = 0 else: cuda_device = -1 check_for_gpu(cuda_device) self._cuda_device = int_to_device(cuda_device) self._grad_norm = grad_norm self._scaler = scaler self._grad_clipping = grad_clipping self._learning_rate_scheduler = learning_rate_scheduler self._momentum_scheduler = momentum_scheduler self._loss = {'train': ComponentLoss(), 'validation': ComponentLoss()}
def __init__( self, local_rank: Optional[int] = None, world_size: Optional[int] = None, cuda_device: Union[torch.device, int] = -1, ) -> None: self.local_rank: int = local_rank if local_rank is not None else dist.get_rank( ) self.world_size: int = world_size if world_size is not None else dist.get_world_size( ) self.is_primary: bool = local_rank == 0 self.cuda_device = int_to_device(cuda_device)
def __init__( self, serialization_dir: str = None, cuda_device: Optional[Union[int, torch.device]] = None, distributed: bool = False, local_rank: int = 0, world_size: int = 1, ) -> None: if cuda_device is None: from torch import cuda if cuda.device_count() > 0: cuda_device = 0 else: cuda_device = -1 check_for_gpu(cuda_device) if serialization_dir is None: import tempfile self._serialization_dir = tempfile.mkdtemp() else: self._serialization_dir = serialization_dir # Ensure serialization directory exists. os.makedirs(self._serialization_dir, exist_ok=True) if isinstance(cuda_device, list): raise ConfigurationError( "In allennlp 1.0, the Trainer can only be assigned a single `cuda_device`. " "Instead, we use torch's DistributedDataParallel at the command level, meaning " "our Trainer always uses a single GPU per process." ) if distributed and world_size <= 1: raise ConfigurationError( "Distributed training can be performed only with more than 1 device. Check " "`cuda_device` key in the experiment configuration." ) self.cuda_device = int_to_device(cuda_device) self._distributed = distributed self._rank = local_rank self._primary = self._rank == 0 self._world_size = world_size
def move_to_device(obj, cuda_device: Union[torch.device, int]): from allennlp.common.util import int_to_device cuda_device = int_to_device(cuda_device) if cuda_device == torch.device("cpu") or not has_tensor(obj): return obj elif isinstance(obj, torch.Tensor): return obj.cuda(cuda_device) elif isinstance(obj, dict): return {key: move_to_device(value, cuda_device) for key, value in obj.items()} elif isinstance(obj, list): return [move_to_device(item, cuda_device) for item in obj] elif isinstance(obj, tuple) and hasattr(obj, "_fields"): return obj.__class__(*(move_to_device(item, cuda_device) for item in obj)) elif isinstance(obj, tuple): return tuple(move_to_device(item, cuda_device) for item in obj) else: return obj
def __init__( self, model: Model, train_data_path: DatasetReaderInput, train_dataset_reader: DatasetReader, *, test_dataset_reader: Optional[DatasetReader] = None, train_data_loader: Lazy[DataLoader] = Lazy( SimpleDataLoader.from_dataset_reader), test_data_loader: Lazy[DataLoader] = Lazy( SimpleDataLoader.from_dataset_reader), params_to_freeze: Optional[List[str]] = None, cuda_device: int = -1, ) -> None: self.model = model self.vocab = model.vocab self.device = int_to_device(cuda_device) self._train_data_path = train_data_path self._train_loader = train_data_loader.construct( reader=train_dataset_reader, data_path=train_data_path, batch_size=1, ) self._train_loader.set_target_device(self.device) self._train_loader.index_with(self.vocab) self._test_dataset_reader = test_dataset_reader or train_dataset_reader self._lazy_test_data_loader = test_data_loader self.model.to(self.device) if params_to_freeze is not None: for name, param in self.model.named_parameters(): if any( [re.match(pattern, name) for pattern in params_to_freeze]): param.requires_grad = False # These variables are set when the corresponding public properties are accessed. # This is not set until we actually run the calculation since some parameters might not be used. self._used_params: Optional[List[torch.nn.Parameter]] = None self._used_param_names: Optional[List[str]] = None self._train_instances: Optional[List[InstanceWithGrads]] = None
def check_for_gpu(device: Union[int, torch.device, List[Union[int, torch.device]]]): if isinstance(device, list): for did in device: check_for_gpu(did) elif device is None: return else: from allennlp.common.util import int_to_device device = int_to_device(device) if device != torch.device("cpu"): num_devices_available = cuda.device_count() if num_devices_available == 0: # Torch will give a more informative exception than ours, so we want to include # that context as well if it's available. For example, if you try to run torch 1.5 # on a machine with CUDA10.1 you'll get the following: # # The NVIDIA driver on your system is too old (found version 10010). # torch_gpu_error = "" try: cuda._check_driver() except Exception as e: torch_gpu_error = "\n{0}".format(e) raise ConfigurationError( "Experiment specified a GPU but none is available;" " if you want to run on CPU use the override" " 'trainer.cuda_device=-1' in the json config file." + torch_gpu_error) elif device.index >= num_devices_available: raise ConfigurationError( f"Experiment specified GPU device {device.index}" f" but there are only {num_devices_available} devices " f" available.")
def evaluate( model: Model, data_loader: DataLoader, cuda_device: int = -1, batch_weight_key: str = None, output_file: str = None, predictions_output_file: str = None, ) -> Dict[str, Any]: """ # Parameters model : `Model` The model to evaluate data_loader : `DataLoader` The `DataLoader` that will iterate over the evaluation data (data loaders already contain their data). cuda_device : `int`, optional (default=`-1`) The cuda device to use for this evaluation. The model is assumed to already be using this device; this parameter is only used for moving the input data to the correct device. batch_weight_key : `str`, optional (default=`None`) If given, this is a key in the output dictionary for each batch that specifies how to weight the loss for that batch. If this is not given, we use a weight of 1 for every batch. metrics_output_file : `str`, optional (default=`None`) Optional path to write the final metrics to. predictions_output_file : `str`, optional (default=`None`) Optional path to write the predictions to. # Returns `Dict[str, Any]` The final metrics. """ check_for_gpu(cuda_device) data_loader.set_target_device(int_to_device(cuda_device)) predictions_file = (None if predictions_output_file is None else open( predictions_output_file, "w")) with torch.no_grad(): model.eval() iterator = iter(data_loader) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm(iterator) # Number of batches in instances. batch_count = 0 # Number of batches where the model produces a loss. loss_count = 0 # Cumulative weighted loss total_loss = 0.0 # Cumulative weight across all batches. total_weight = 0.0 for batch in generator_tqdm: batch_count += 1 batch = nn_util.move_to_device(batch, cuda_device) output_dict = model(**batch) loss = output_dict.get("loss") metrics = model.get_metrics() if loss is not None: loss_count += 1 if batch_weight_key: weight = output_dict[batch_weight_key].item() else: weight = 1.0 total_weight += weight total_loss += loss.item() * weight # Report the average loss so far. metrics["loss"] = total_loss / total_weight if not HasBeenWarned.tqdm_ignores_underscores and any( metric_name.startswith("_") for metric_name in metrics): logger.warning('Metrics with names beginning with "_" will ' "not be logged to the tqdm progress bar.") HasBeenWarned.tqdm_ignores_underscores = True description = (", ".join([ "%s: %.2f" % (name, value) for name, value in metrics.items() if not name.startswith("_") ]) + " ||") generator_tqdm.set_description(description, refresh=False) if predictions_file is not None: predictions = json.dumps( sanitize(model.make_output_human_readable(output_dict))) predictions_file.write(predictions + "\n") if predictions_file is not None: predictions_file.close() final_metrics = model.get_metrics(reset=True) if loss_count > 0: # Sanity check if loss_count != batch_count: raise RuntimeError( "The model you are trying to evaluate only sometimes produced a loss!" ) final_metrics["loss"] = total_loss / total_weight if output_file is not None: dump_metrics(output_file, final_metrics, log=True) return final_metrics
def __init__( self, image_dir: Optional[Union[str, PathLike]], *, image_loader: Optional[ImageLoader] = None, image_featurizer: Optional[Lazy[GridEmbedder]] = None, region_detector: Optional[Lazy[RegionDetector]] = None, feature_cache_dir: Optional[Union[str, PathLike]] = None, tokenizer: Optional[Tokenizer] = None, token_indexers: Optional[Dict[str, TokenIndexer]] = None, cuda_device: Optional[Union[int, torch.device]] = None, max_instances: Optional[int] = None, image_processing_batch_size: int = 8, write_to_cache: bool = True, manual_distributed_sharding: bool = True, manual_multiprocess_sharding: bool = True, ) -> None: super().__init__( max_instances=max_instances, manual_distributed_sharding=manual_distributed_sharding, manual_multiprocess_sharding=manual_multiprocess_sharding, ) # tokenizers and indexers if tokenizer is None: tokenizer = PretrainedTransformerTokenizer("bert-base-uncased") self._tokenizer = tokenizer if token_indexers is None: token_indexers = {"tokens": PretrainedTransformerIndexer("bert-base-uncased")} self._token_indexers = token_indexers if not ((image_loader is None) == (image_featurizer is None) == (region_detector is None)): raise ConfigurationError( "Please either specify all of image_loader, image_featurizer, and region_detector, " "or specify none of them if you don't want to featurize images." ) # feature cache self.feature_cache_dir = feature_cache_dir self.coordinates_cache_dir = feature_cache_dir self.class_probs_cache_dir = feature_cache_dir self.class_labels_cache_dir = feature_cache_dir if feature_cache_dir: self.write_to_cache = write_to_cache else: # If we don't have a cache dir, we use a dict in memory as a cache, so we # always write. self.write_to_cache = True self._feature_cache_instance: Optional[MutableMapping[str, Tensor]] = None self._coordinates_cache_instance: Optional[MutableMapping[str, Tensor]] = None self._class_probs_cache_instance: Optional[MutableMapping[str, Tensor]] = None self._class_labels_cache_instance: Optional[MutableMapping[str, Tensor]] = None # image processors self.image_loader = None if image_loader and image_featurizer and region_detector: if cuda_device is None: if torch.cuda.device_count() > 0: if util.is_distributed(): cuda_device = dist.get_rank() % torch.cuda.device_count() else: cuda_device = 0 else: cuda_device = -1 check_for_gpu(cuda_device) self.cuda_device = int_to_device(cuda_device) logger.info(f"Processing images on device {cuda_device}") # image loading and featurizing self.image_loader = image_loader self.image_loader.device = self.cuda_device self._lazy_image_featurizer = image_featurizer self._image_featurizer = None self._lazy_region_detector = region_detector self._region_detector = None self.image_processing_batch_size = image_processing_batch_size self.produce_featurized_images = False if self.feature_cache_dir and self.coordinates_cache_dir: logger.info(f"Featurizing images with a cache at {self.feature_cache_dir}") self.produce_featurized_images = True if image_loader and image_featurizer and region_detector: if self.produce_featurized_images: logger.info("Falling back to a full image featurization pipeline") else: logger.info("Featurizing images with a full image featurization pipeline") self.produce_featurized_images = True if self.produce_featurized_images: if image_dir is None: if image_loader and image_featurizer and region_detector: raise ConfigurationError("We need an image_dir to featurize images.") else: raise ConfigurationError( "We need an image_dir to use a cache of featurized images. Images won't be " "read if they are cached, but we need the image_dir to determine the right " "cache keys from the file names." ) logger.info("Discovering images ...") self.images = { os.path.basename(filename): filename for extension in {"png", "jpg"} for filename in tqdm( glob.iglob(os.path.join(image_dir, "**", f"*.{extension}"), recursive=True), desc=f"Discovering {extension} images", ) } logger.info("Done discovering images")
def __init__( self, image_dir: Union[str, PathLike], image_loader: ImageLoader, image_featurizer: GridEmbedder, region_detector: RegionDetector, *, feature_cache_dir: Optional[Union[str, PathLike]] = None, data_dir: Optional[Union[str, PathLike]] = None, tokenizer: Optional[Tokenizer] = None, token_indexers: Optional[Dict[str, TokenIndexer]] = None, cuda_device: Optional[Union[int, torch.device]] = None, max_instances: Optional[int] = None, ) -> None: super().__init__( max_instances=max_instances, manual_distributed_sharding=True, manual_multi_process_sharding=True, ) if cuda_device is None: from torch import cuda if cuda.device_count() > 0: cuda_device = 0 else: cuda_device = -1 from allennlp.common.checks import check_for_gpu check_for_gpu(cuda_device) from allennlp.common.util import int_to_device self.cuda_device = int_to_device(cuda_device) # Paths to data if not data_dir: github_url = "https://raw.githubusercontent.com/lil-lab/nlvr/" nlvr_commit = "68a11a766624a5b665ec7594982b8ecbedc728c7" data_dir = f"{github_url}{nlvr_commit}/nlvr2/data" self.splits = { "dev": f"{data_dir}/dev.json", "test": f"{data_dir}/test1.json", "train": f"{data_dir}/train.json", "balanced_dev": f"{data_dir}/balanced/balanced_dev.json", "balanced_test": f"{data_dir}/balanced/balanced_test1.json", "unbalanced_dev": f"{data_dir}/balanced/unbalanced_dev.json", "unbalanced_test": f"{data_dir}/balanced/unbalanced_test1.json", } from tqdm import tqdm self.images = { os.path.basename(filename): filename for filename in tqdm( glob.iglob(os.path.join(image_dir, "**", "*.png"), recursive=True), desc="Discovering images", ) } # tokenizers and indexers if not tokenizer: tokenizer = PretrainedTransformerTokenizer("bert-base-uncased") self._tokenizer = tokenizer if token_indexers is None: token_indexers = { "tokens": PretrainedTransformerIndexer("bert-base-uncased") } self._token_indexers = token_indexers # image loading self.image_loader = image_loader self.image_featurizer = image_featurizer.to(self.cuda_device) self.region_detector = region_detector.to(self.cuda_device) # feature cache if feature_cache_dir is None: self._features_cache: MutableMapping[str, Tensor] = {} self._coordinates_cache: MutableMapping[str, Tensor] = {} else: os.makedirs(feature_cache_dir, exist_ok=True) self._features_cache = TensorCache( os.path.join(feature_cache_dir, "features")) self._coordinates_cache = TensorCache( os.path.join(feature_cache_dir, "coordinates"))