def parse_keyval(list_keyval, defaults={}): """ Parse list of "<key>:<value>" into a dictionary. Args: list_keyval List of "<key>:<value>" defaults Default key -> value to use (also ensure type, 'str' assumed for other keys) Returns: Associated dictionary """ parsed = {} # Parsing sep = ":" for entry in list_keyval: pos = entry.find(sep) if pos < 0: raise tools.UserException("Expected list of " + repr("<key>:<value>") + ", got " + repr(entry) + " as one entry") key = entry[:pos] if key in parsed: raise tools.UserException("Key " + repr(key) + " had already been specified with value " + repr(parsed[key])) val = entry[pos + len(sep):] if key in defaults: # Assert type constructibility try: val = type(defaults[key])(val) except Exception: raise tools.UserException("Required key " + repr(key) + " expected a value of type " + repr(getattr(type(defaults[key]), "__name__", "<unknown>"))) parsed[key] = val # Add default values (done first to be able to force a given type with 'required') for key in defaults: if key not in parsed: parsed[key] = defaults[key] # Return final dictionary return parsed
def build(struct, name, select, args, **kwargs): """ Call the constructor associated with the given selection and the given keyword + parsed arguments. Args: struct Structure defining constructors and their respective arguments name Name of what is built by the constructor select Constructor to select args List of "key:value" command line arguments ... Key-value arguments forwarded to the constructor """ # Recover constructor and argument structure if select not in struct: raise tools.UserException( "Unknown " + name + " " + repr(select) + ", " + ("no " + name + " available" if len(struct) == 0 else "expected one of: '" + ("', '").join(struct.keys()) + "'")) construct, args_struct = struct[select] # Translate parameters defaults = {} for key, val in args_struct.items(): defaults[key] = val[0] args_parsed = tools.parse_keyval(args, defaults=defaults) # Instantiate and return args_kw = {} for key, val in args_struct.items( ): # Ignore supplementary parameters by using '_struct' instead of '_parsed' args_kw[args_struct[key][1]] = args_parsed[key] return construct(**args_kw, **kwargs)
def __init__(self, dataset, model, args): # Parse key:val arguments nbcores = len(os.sched_getaffinity(0)) if nbcores == 0: nbcores = 4 # Arbitrary fallback args = tools.parse_keyval(args, defaults={ "batch-size": 32, "eval-batch-size": 1024, "weight-decay": 0.00004, "label-smoothing": 0., "labels-offset": 0, "nb-fetcher-threads": nbcores, "nb-batcher-threads": nbcores }) if args["batch-size"] <= 0: raise tools.UserException( "Cannot make batches of non-positive size") # Report experiments with tools.Context("slim", None): print("Dataset name in use: " + repr(dataset[0]) + " (in " + repr(dataset[1]) + ")") print("Dataset preprocessing: " + (repr(args["preprocessing"]) if "preprocessing" in args else "<model default>")) print("Model name in use: " + repr(model)) # Finalization self.__args = args self.__dataset = dataset self.__preproc = args[ "preprocessing"] if "preprocessing" in args else model self.__model = model self.__cntr_wk = 0 # Worker instantiation counter self.__cntr_ev = 0 # Evaluator instantiation counter
def checked(**kwargs): # Check parameter validity message = check(**kwargs) if message is not None: raise tools.UserException("Aggregation rule %r cannot be used with the given parameters: %s" % (name, message)) # Aggregation (hard to assert return value, duck-typing is allowed...) return unchecked(**kwargs)
def snapshot(self, instance, overwrite=False, deepcopy=False, nowarnref=False): """ Take/overwrite the snapshot for a given instance. Args: instance Instance to snapshot overwrite Overwrite any existing snapshot for the same class deepcopy Deep copy instance's state dictionary instead of referencing nowarnref To always avoid a warning in debug mode if restoring a state dictionary reference is the wanted behavior Returns: self """ instance, key = type(self)._prepare(instance) # Snapshot the state dictionary if not overwrite and key in self._store: raise tools.UserException( "An snapshot for %r is already stored in the checkpoint" % key) if deepcopy: self._store[key] = copy.deepcopy(instance.state_dict()) else: self._store[key] = instance.state_dict().copy() # Track whether a deepcopy was made (or whether restoring a reference is the expected behavior) if __debug__: self._copied[key] = deepcopy or nowarnref # Enable chaining return self
def __getattr__(self, name): """ Get a method from its name. Args: name Method name Returns: Bound method value """ # Assertions if not hasattr(self, nname): raise tools.UserException( "Unable to access instance as its creation failed") # Get method from name method = methods[name] native = getattr(self, nname) # Bind wrapping def call(*args): """ Call the method with the first parameter as the current instance. Args: ... Forwarded next parameters Returns: Forwarded return value """ return method(native, *args) return call
def get_module(lib, src): """ (Build then) load the native shared object. Args: lib Path to the shared object src Path to the unique source file Returns: Module instance """ # Conversion if necessary if not isinstance(lib, pathlib.Path): lib = pathlib.Path(lib) if not isinstance(src, pathlib.Path): src = pathlib.Path(src) assert src.exists(), "Source file '" + str(src) + "' does not exist" # Build if necessary if not lib.exists() or src.stat().st_mtime > lib.stat().st_mtime: command = shlex.split( "c++ -Wall -Wextra -Wfatal-errors -O2 -std=c++14 -fPIC -shared -o " + shlex.quote(str(lib)) + " " + shlex.quote(str(src))) command = subprocess.run(command) if command.returncode != 0: raise tools.UserException("Compilation of '" + str(src.resolve()) + "' failed with error code " + str(command.returncode)) # Load module return ctypes.CDLL(str(lib.resolve()))
def parse_keyval(list_keyval, defaults={}): """ Parse list of "<key>:<value>" into a dictionary. Args: list_keyval List of "<key>:<value>" defaults Default key -> value to use (also ensure type, type is guessed for other keys) Returns: Associated dictionary """ parsed = {} # Parsing sep = ":" for entry in list_keyval: pos = entry.find(sep) if pos < 0: raise tools.UserException("Expected list of " + repr("<key>:<value>") + ", got " + repr(entry) + " as one entry") key = entry[:pos] if key in parsed: raise tools.UserException( "Key " + repr(key) + " had already been specified with value " + repr(parsed[key])) val = entry[pos + len(sep):] # Guess/assert type constructibility if key in defaults: try: cls = type(defaults[key]) if cls is bool: # Special case val = val.lower() not in ("", "0", "n", "false") else: val = cls(val) except Exception: raise tools.UserException( "Required key " + repr(key) + " expected a value of type " + repr(getattr(type(defaults[key]), "__name__", "<unknown>")) ) else: val = parse_keyval_auto_convert(val) # Bind (converted) value to associated key parsed[key] = val # Add default values (done first to be able to force a given type with 'required') for key in defaults: if key not in parsed: parsed[key] = defaults[key] # Return final dictionary return parsed
def __init__(self, nbworkers, nbbyzwrks, args): # Parse key:val arguments ps = tools.parse_keyval([] if args is None else args, defaults={"ps": 0.9})["ps"] if ps <= 0 or ps > 1: raise tools.UserException("Invalid selection probability, got %s" % (ps, )) # Finalization self._p = ps self._f = nbbyzwrks
def __call__(self): """ Get a pointer to the native instance. Returns: Pointer t """ # Assertions if not hasattr(self, nname): raise tools.UserException("Unable to access instance as its creation failed") # Return pointer return getattr(self, nname)
def __init__(self, path_results): """ Load the data from a training/evaluation result directory. Args: path_results Path-like to the result directory to load """ # Conversion to path if not isinstance(path_results, pathlib.Path): path_results = pathlib.Path(path_results) # Ensure directory exist if not path_results.exists(): raise tools.UserException(f"Result directory {str(path_results)} cannot be accessed or does not exist") # Load configuration string path_config = path_results / "config" try: data_config = path_config.read_text().strip() except Exception as err: tools.warning(f"Result directory {str(path_results)}: unable to read configuration ({err})") data_config = None # Load configuration json path_json = path_results / "config.json" try: with path_json.open("r") as fd: data_json = json.load(fd) except Exception as err: tools.warning(f"Result directory {str(path_results)}: unable to read JSON configuration ({err})") data_json = None # Load training data path_study = path_results / "study" try: data_study = pandas.read_csv(path_study, sep="\t", index_col=0, na_values=" nan") data_study.index.name="Step number" except Exception as err: tools.warning(f"Result directory {str(path_results)}: unable to read training data ({err})") data_study = None # Load evaluation data path_eval = path_results / "eval" try: data_eval = pandas.read_csv(path_eval, sep="\t", index_col=0) data_eval.index.name="Step number" except Exception as err: tools.warning(f"Result directory {str(path_results)}: unable to read evaluation data ({err})") data_eval = None # Merge data frames (if both are here) if data_study is not None and data_eval is not None: data = data_study.join(data_eval, how="outer") else: data = data_study or data_eval # Finalization self.name = path_results.name self.path = path_results self.config = data_config self.json = data_json self.data = data self.thresh = None
def extract_field(text, name, conv): lim = "/" + name + ":" pos = text.find(lim) if pos < 0: raise tools.UserException("Missing field " + repr(name) + " in device name " + repr(device.name)) text = text[pos + len(lim):] pos = text.find("/") if pos < 0: return conv(text), "" else: return conv(text[:pos]), text[pos:]
def pnm(fd, tn): """ Save a 2D/3D tensor as a PGM/PBM stream. Args: fd File descriptor opened for writing binary streams tn A 2D/3D tensor to convert and save Notes: The input tensor is "intelligently" squeezed before processing For 2D tensor, assuming black is 1. and white is 0. (clamp between [0, 1]) For 3D tensor, the first dimension must be the 3 color channels RGB (all between [0, 1]) """ shape = tuple(tn.shape) # Intelligent squeezing while len(tn.shape) > 3 and tn.shape[0] == 1: tn = tn[0] # Colored image generation if len(tn.shape) == 3: if tn.shape[0] == 1: tn = tn[0] # And continue on gray-scale elif tn.shape[0] != 3: raise tools.UserException( "Expected 3 color channels for the first dimension of a 3D tensor, got %d channels" % tn.shape[0]) else: fd.write(("P6\n%d %d 255\n" % tn.shape[1:]).encode()) fd.write( bytes( tn.transpose(0, 2).transpose(0, 1).mul(256).clamp_( 0., 255.).byte().storage())) return # Gray-scale image generation if len(tn.shape) == 2: fd.write(("P5\n%d %d 255\n" % tn.shape).encode()) fd.write(bytes((1.0 - tn).mul_(256).clamp_(0., 255.).byte().storage())) return # Invalid tensor shape raise tools.UserException( "Expected a 2D or 3D tensor, got %d dimensions %r" % (len(shape), tuple(shape)))
def checked(f_real, **kwargs): # Check parameter validity message = check(f_real=f_real, **kwargs) if message is not None: raise tools.UserException( f"Attack {name!r} cannot be used with the given parameters: {message}" ) # Attack res = unchecked(f_real=f_real, **kwargs) # Forward asserted return value assert isinstance(res, list) and len( res ) == f_real, f"Expected attack {name!r} to return a list of {f_real} Byzantine gradients, got {res!r}" return res
def save(self, filepath, overwrite=False): """ Save the current checkpoint in the given file. Args: filepath Given file path overwrite Allow to overwrite if the file already exists Returns: self """ # Check if file already exists if pathlib.Path(filepath).exists() and not overwrite: raise tools.UserException(f"Unable to save checkpoint in existing file {str(filepath)!r} (overwriting has not been allowed by the caller)") # (Over)write the file torch.save(self._store, filepath) # Enable chaining return self
def restore(self, sess, path=None): """ Restore a saved session state. Args: sess Session to restore upon path Path to the storage file to restore (optional, use latest one if None) """ # Update view self._update() # Default parameter if path is None: if not self.can_restore(): raise tools.UserException("No storage file to restore") path = self.__available[-1] # Session restore self._saver().restore(sess, path)
def checked(f_real, **kwargs): # Check parameter validity message = check(f_real=f_real, **kwargs) if message is not None: raise tools.UserException( "Attack %r cannot be used with the given parameters: %s" % (name, message)) # Attack res = unchecked(f_real=f_real, **kwargs) # Forward asserted return value assert isinstance(res, list) and len( res ) == f_real, "Expected attack %r to return a list of %f Byzantine gradients, got %r" % ( name, f_real, res) return res
def __init__(self, args): # Parse key:val arguments args = tools.parse_keyval(args, defaults={"batch-size": 32}) if args["batch-size"] <= 0: raise tools.UserException("Cannot make batches of non-positive size") # Report loading with tools.Context("mnist", None): print("Loading MNIST dataset...") raw_data = tf.keras.datasets.mnist.load_data() # Finalization self.__args = args self.__raw_data = raw_data self.__datasets = None self.__cntr_wk = 0 # Worker instantiation counter self.__cntr_ev = 0 # Evaluator instantiation counter
def instantiate(self, name, *args, **kwargs): """ Instantiate a registered class. Args: name Class name ... Forwarded parameters Returns: Registered class instance """ # Assertions if name not in self.__register: cause = "Unknown name " + repr(name) + ", " if len(self.__register) == 0: cause += "no registered " + self.__denoms[0] else: cause += "available " + self.__denoms[1] + ": '" + ("', '").join(self.__register.keys()) + "'" raise tools.UserException(cause) # Instantiation return self.__register[name](*args, **kwargs)
def restore(self, instance, nothrow=False): """ Restore the snapshot for a given instance, warn if restoring a reference. Args: instance Instance to restore nothrow Do not raise exception if no snapshot available for the instance Returns: self """ instance, key = type(self)._prepare(instance) # Restore the state dictionary if key in self._store: instance.load_state_dict(self._store[key]) # Check if restoring a reference if __debug__ and not self._copied[key]: tools.warning(f"Restoring a state dictionary reference in an instance of {tools.fullqual(type(instance))}; the resulting behavior may not be the one expected") elif not nothrow: raise tools.UserException(f"No snapshot for {key!r} is available in the checkpoint") # Enable chaining return self
def load(self, filepath, overwrite=False): """ Load/overwrite the storage from the given file. Args: filepath Given file path overwrite Allow to overwrite any stored snapshot Returns: self """ # Check if empty if not overwrite and len(self._store) > 0: raise tools.UserException("Unable to load into a non-empty checkpoint") # Load the file self._store = torch.load(filepath) # Reset the 'copied' flags accordingly if __debug__: self._copied.clear() for key in self._store.keys(): self._copied[key] = True # Enable chaining return self
def _prepare(self, instance): """ Prepare the given instance for checkpointing. Args: instance Instance to snapshot/restore Returns: Checkpoint-able instance, key for the associated storage """ # Recover instance's class cls = type(instance) # Transfer if available if cls in self._transfers: res = self._transfers[cls](instance) else: res = instance # Assert the instance is checkpoint-able for prop in ("state_dict", "load_state_dict"): if not callable(getattr(res, prop, None)): raise tools.UserException(f"Given instance {instance!r} is not checkpoint-able (missing callable member {prop!r})") # Return the instance and the associated storage key return res, tools.fullqual(cls)
def average_nan(inputs): """ Compute the average coordinate by coordinate, ignoring NaN coordinate. Args: inputs Input gradients Returns: Average coordinate by coordinate, ignoring NaN """ # Function selection funcs = {4: module.average_nan_float, 8: module.average_nan_double} fsize = inputs.dtype.itemsize if fsize not in funcs: raise tools.UserException("Unsupported floating point type") # Actual call dim = ctypes.c_size_t(inputs.shape[1]) n = ctypes.c_size_t(inputs.shape[0]) ins = ctypes.c_void_p(inputs.ctypes.data) out = np.empty_like(inputs[0]) funcs[fsize](dim, n, ins, ctypes.c_void_p(out.ctypes.data)) # Return computed gradient return out
def __init__(self, name_build, config=Configuration(), *args, **kwargs): """ Model builder constructor. Args: name_build Model name or constructor function config Configuration to use for the parameter tensors ... Additional (keyword-)arguments forwarded to the constructor Notes: If possible, data parallelism is enabled automatically """ # Recover name/constructor if callable(name_build): name = tools.fullqual(name_build) build = name_build else: models = type(self)._get_models() name = str(name_build) build = models.get(name, None) if build is None: raise tools.UnavailableException(models, name, what="model name") # Build model with torch.no_grad(): model = build(*args, **kwargs) if not isinstance(model, torch.nn.Module): raise tools.UserException("Expected built model %r to be an instance of 'torch.nn.Module', found %r instead" % (name, getattr(type(model), "__name__", "<unknown>"))) model = model.to(**config) device = config["device"] if device.type == "cuda" and device.index is None: # Model is on GPU and not explicitly restricted to one particular card => enable data parallelism model = torch.nn.DataParallel(model) params = tools.flatten(model.parameters()) # NOTE: Ordering across runs/nodes seems to be ensured (i.e. only dependent on the model constructor) # Finalization self._model = model self._name = name self._config = config self._params = params self._gradient = None self._defaults = { "trainset": None, "testset": None, "loss": None, "criterion": None, "optimizer": None }
def __init__(self, args): # Parse key:val arguments nbcores = len(os.sched_getaffinity(0)) if nbcores == 0: nbcores = 4 # Arbitrary fallback args = tools.parse_keyval(args, defaults={ "batch-size": 32, "eval-batch-size": 1024, "nb-fetcher-threads": nbcores, "nb-batcher-threads": nbcores }) if args["batch-size"] <= 0: raise tools.UserException( "Cannot make batches of non-positive size") # Finalization self.__args = args self.__preproc = args[ "preprocessing"] if "preprocessing" in args else "cifarnet" self.__cntr_wk = 0 # Worker instantiation counter self.__cntr_ev = 0 # Evaluator instantiation counter
def squared_distance(a, b): """ Compute the squared l2 distance. Args: a Selected gradients b Coordinates to average Returns: (a - b)² """ # Function selection funcs = { 4: module.squared_distance_float, 8: module.squared_distance_double } fsize = a.dtype.itemsize if fsize not in funcs: raise tools.UserException("Unsupported floating point type") # Actual call dim = ctypes.c_size_t(a.shape[0]) a = ctypes.c_void_p(a.ctypes.data) b = ctypes.c_void_p(b.ctypes.data) res = funcs[fsize](dim, a, b) # Return computed scalar return res
def _g5k_parser(): """ Generate the cluster specification from the G5k-specific cluster specification file. Returns: Cluster dictionary, with only 1 ps and n-1 worker(s), all using port 7000 """ global _g5k_env_key global _g5k_cluster if _g5k_cluster is not None: return _g5k_cluster if _g5k_env_key not in os.environ: raise tools.UserException( "Key " + repr(_g5k_env_key) + " not found in environment; are you running on Grid5000?") multi = pathlib.Path(os.environ[_g5k_env_key]).read_text().strip().split( os.linesep) seens = set() nodes = [] for node in multi: if node in seens: continue nodes.append(node + ":7000") seens.add(node) _g5k_cluster = {"ps": nodes[0:1], "workers": nodes[1:]} return _g5k_cluster
def bulyan(inputs, f, s): """ Compute Bulyan of Multi-Krum. Args: inputs Input gradients f Number of byzantine gradients s Number of selected gradients Returns: Bulyan's output gradient """ # Function selection funcs = {4: module.bulyan_float, 8: module.bulyan_double} fsize = inputs.dtype.itemsize if fsize not in funcs: raise tools.UserException("Unsupported floating point type") # Actual call d = ctypes.c_size_t(inputs.shape[1]) n = ctypes.c_size_t(inputs.shape[0]) ins = ctypes.c_void_p(inputs.ctypes.data) sel = np.empty((s, inputs.shape[1]), dtype=inputs.dtype) out = np.empty(inputs.shape[1], dtype=inputs.dtype) funcs[fsize](d, n, f, s, ins, ctypes.c_void_p(sel.ctypes.data), ctypes.c_void_p(out.ctypes.data)) # Return computed gradient return out
tf.reduce_mean( tf.cast(tf.nn.in_top_k(logits, labels, 1), tf.float32))) return { "top1-X-acc": tf.add_n(accuracies, name="sum_top1Xacc") / float(len(accuracies)) } # ---------------------------------------------------------------------------- # # Experiment registering # (Try to) import slim package with tools.ExpandPath(pathlib.Path(__file__).parent / "slim"): from . import slim from .slim.datasets import dataset_factory from .slim.preprocessing import preprocessing_factory # Check whether CIFAR-10 dataset is available dspath = pathlib.Path(__file__).parent / "datasets" / dataset_name if not dspath.is_dir(): raise tools.UserException("slim dataset " + repr(dataset_name) + " in 'datasets' must be a directory") if not tools.can_access(dspath, read=True): raise tools.UserException("slim dataset " + repr(dataset_name + "/*") + " in 'datasets' must be read-able") # Register dataset directory and experiment dataset_directory = str(dspath) register("cnnet", CNNetExperiment)
# ---------------------------------------------------------------------------- # # Experiment registering # (Try to) import slim package with tools.ExpandPath(pathlib.Path(__file__).parent / "slim"): from . import slim from .slim.datasets import dataset_factory from .slim.preprocessing import preprocessing_factory from .slim.nets.nets_factory import networks_map # List available models models = list(networks_map.keys()) if len(models) == 0: raise tools.UserException("no model available in slim package") # List available datasets datasets = dict() dspath = pathlib.Path(__file__).parent / "datasets" if not dspath.is_dir(): raise tools.UserException("slim dataset at 'datasets' must be a directory") for path in dspath.iterdir(): if not tools.can_access(path, read=True): with tools.Context(None, "warning"): print("slim dataset " + repr(path.name + "/*") + " in 'datasets' is not read-able and has been ignored") continue if not path.is_dir(): # Must be after to first check for access rights... continue datasets[path.name] = str(path)