def test_rmm_getinfo_uninitialized(): rmm._finalize() with pytest.raises(rmm.RMMError): rmm.get_info() rmm.reinitialize()
def transform(self, y: cudf.Series, unk_idx=0) -> cudf.Series: """ Maps y to unique ids. Parameters ----------- y : cudf Series Returns ----------- encoded: cudf Series """ if self._cats_host is None: raise Exception("Encoder was not fit!") if len(self._cats_host) == 0: raise Exception("Encoder was not fit!") avail_gpu_mem = rmm.get_info().free sub_cats_size = int(avail_gpu_mem * self.gpu_mem_trans_use / self._cats_host.dtype.itemsize) i = 0 encoded = None while i < len(self._cats_host): sub_cats = cudf.Series(self._cats_host[i : i + sub_cats_size]) if encoded is None: encoded = self._label_encoding(y, sub_cats, na_sentinel=0) else: encoded = encoded.add( self._label_encoding(y, sub_cats, na_sentinel=0), fill_value=0, ) i = i + sub_cats_size sub_cats = cudf.Series([]) return encoded[:].replace(-1, 0)
def merge(self, gdf): """ Merges gdf with the calculated group stats. Parameters ----------- gdf : cudf DataFrame Returns ----------- stats_joined: cudf DataFrame """ order = cudf.Series(cp.arange(gdf.shape[0])) gdf[self.order_column_name] = order col_names = [] if self.cont_col is not None: for i in range(len(self.cont_col)): if "sum" in self.stats_names: col_names.append(self.col + "_" + self.cont_col[i] + "_sum") if "count" in self.stats_names: col_names.append(self.col + "_count") avail_gpu_mem = rmm.get_info().free sub_stats_size = int(avail_gpu_mem * self.gpu_mem_trans_use / (self.stats.shape[1] * 8)) if sub_stats_size == 0: sub_stats_size = 1 stats_joined = None i = 0 while i < self.stats.shape[0]: sub_stats = cudf.from_pandas(self.stats.iloc[i:i + sub_stats_size]) joined = gdf[[self.col, self.order_column_name]].merge(sub_stats, on=[self.col], how="left") joined = joined.sort_values(self.order_column_name) joined.reset_index(drop=True, inplace=True) if stats_joined is None: stats_joined = joined[col_names].copy() else: stats_joined = stats_joined.add(joined[col_names], fill_value=0) i = i + sub_stats_size joined = cudf.Series([]) gdf.drop(columns=[self.order_column_name], inplace=True) # print(col_names) return stats_joined[col_names]
def intialize_reader(self, gpu_memory_frac, batch_size, **kwargs): self.reader = cudf.read_csv # Count rows and determine column names estimate_row_size = False if self.row_size is None: self.row_size = 0 estimate_row_size = True self.offset = 0 self.file_bytes = os.stat(str(self.file_path)).st_size # Use first row to estimate memory-reqs names = kwargs.get("names", None) dtype = kwargs.get("dtype", None) # default csv delim is "," sep = kwargs.get("sep", ",") self.sep = sep self.names = [] dtype_inf = {} nrows = 10 head = "".join(islice(open(self.file_path), nrows)) snippet = self.reader(io.StringIO(head), nrows=nrows, names=names, dtype=dtype, sep=sep, header=0) self.inferred_names = not names if self.file_bytes > 0: for i, col in enumerate(snippet.columns): if names: name = names[i] else: name = col self.names.append(name) for i, col in enumerate(snippet._columns): if estimate_row_size: self.row_size += col.dtype.itemsize dtype_inf[self.names[i]] = col.dtype self.dtype = dtype or dtype_inf # Determine batch size if needed if batch_size: self.batch_size = batch_size * self.row_size else: free_mem = rmm.get_info().free self.batch_size = free_mem * gpu_memory_frac self.num_chunks = int( (self.file_bytes + self.batch_size - 1) // self.batch_size)
import os import warnings # need to configure tensorflow to not use all of memory # TF_MEMORY_ALLOCATION is fraction of GPU memory if < 1, and size # in MB if > 1 import rmm import tensorflow as tf from packaging import version from tensorflow.python.feature_column import feature_column_v2 as fc from .io import GPUDatasetIterator, _shuffle_gdf from .workflow import BaseWorkflow free_gpu_mem_mb = rmm.get_info().free / (1024**2) tf_mem_size = os.environ.get("TF_MEMORY_ALLOCATION", 0.5) if float(tf_mem_size) < 1: tf_mem_size = free_gpu_mem_mb * float(tf_mem_size) tf_mem_size = int(tf_mem_size) assert tf_mem_size < free_gpu_mem_mb tf_device = os.environ.get("TF_VISIBLE_DEVICE", 0) try: tf.config.set_logical_device_configuration( tf.config.list_physical_devices("GPU")[tf_device], [tf.config.LogicalDeviceConfiguration(memory_limit=tf_mem_size)], ) except RuntimeError: warnings.warn( "TensorFlow runtime already initialized, may not be enough memory for cudf" )
def test_rmm_getinfo(): meminfo = rmm.get_info() # Basic sanity checks of returned values assert meminfo.free >= 0 assert meminfo.total >= 0 assert meminfo.free <= meminfo.total
def _allowable_batch_size(gpu_memory_frac, row_size): free_mem = rmm.get_info().free gpu_memory = free_mem * gpu_memory_frac return max(int(gpu_memory / row_size), 1)
import cudf import rmm import torch from fastai.basic_train import Learner, to_cpu from fastai.metrics import accuracy from fastai.basic_data import DataBunch from fastai.tabular import TabularModel from nvtabular.torch_dataloader import DLCollator, DLDataLoader, FileItrDataset from nvtabular.io import GPUDatasetIterator from nvtabular.ops import Categorify, LogOp, Normalize, ZeroFill from nvtabular import Workflow if args.pool: rmm.reinitialize(pool_allocator=True, initial_pool_size=0.8 * rmm.get_info().free) # Args needed GPU_id, in_dir, out_dir, in_file_type, freq_threshold, batch_size, gpu_mem_frac # day_split print(args) shuffle_arg = True if args.shuffle else False print(torch.__version__, cudf.__version__) data_path = args.in_dir df_valid = "" df_train = "" # split = 270 # fin = 332 # print('Gathering input dataset files') # train_days = [x for x in range(split)] # valid_days = [x for x in range(split, fin)] # print(train_days, valid_days)
def _get_gpu_mem_info(self): gpu_free_mem, gpu_total_mem = rmm.get_info() gpu_mem_util = (gpu_total_mem - gpu_free_mem) / gpu_total_mem return gpu_free_mem, gpu_mem_util