def __anonymize_attr_key(self, d, attr, ar, name, res): """ Obfuscate an attribute/resource key """ if res is not None: m = self.gmap_resc_key else: m = self.gmap_attr_key if not ar[name]: if name in m: ar[name] = m[name] else: randstr = PbsAttribute.random_str(len(name)) ar[name] = randstr m[name] = randstr if d is not None: tmp_val = d[attr] del d[attr] if res is not None: d[res + "." + ar[name]] = tmp_val else: d[ar[name]] = tmp_val if name not in self.lmap: self.lmap[name] = ar[name] if name not in m: m[name] = ar[name]
def __anonymize_attr_val(self, d, attr, ar, name, val): """ Obfuscate an attribute/resource values """ # don't obfuscate default project if attr == "project" and val == "_pbs_project_default": return nstr = [] if "." in attr: m = self.gmap_resc_val else: m = self.gmap_attr_val if val in ar[name]: nstr.append(ar[name][val]) if name in self.lmap: self.lmap[name][val] = ar[name][val] else: self.lmap[name] = {val: ar[name][val]} if name not in m: m[name] = {val: ar[name][val]} elif val not in m[name]: m[name][val] = ar[name][val] else: # Obfuscate by randomizing with a value of the same length tmp_v = val.split(",") for v in tmp_v: if v in ar[name]: r = ar[name][v] elif name in m and v in m[name]: r = m[name][v] else: r = PbsAttribute.random_str(len(v)) if not isinstance(ar[name], dict): ar[name] = {} ar[name][v] = r self.lmap[name] = {v: r} if name not in m: m[name] = {v: r} elif v not in m[name]: m[name][v] = r nstr.append(r) if d is not None: d[attr] = ",".join(nstr)
def __get_anon_key(self, key, attr_map): """ Get an anonymized string for the 'key' belonging to attr_map :param key: the key to anonymize :type key: String :param attr_map: the attr_map to which the key belongs :type attr_map: dict :returns: an anonymized string for the key """ key = self.__refactor_key(key) if key in attr_map.keys(): anon_key = attr_map[key] else: anon_key = PbsAttribute.random_str(len(key)) attr_map[key] = anon_key return anon_key
def anonymize_resource_def(self, resources): """ Anonymize the resource definition """ if not self.resc_key: return resources for curr_anon_resc, val in self.resc_key.items(): if curr_anon_resc in resources: tmp_resc = copy.copy(resources[curr_anon_resc]) del resources[curr_anon_resc] if val is None: if curr_anon_resc in self.gmap_resc_key: val = self.gmap_resc_key[curr_anon_resc] else: val = PbsAttribute.random_str(len(curr_anon_resc)) elif curr_anon_resc not in self.gmap_resc_key: self.gmap_resc_key[curr_anon_resc] = val tmp_resc.set_name(val) resources[val] = tmp_resc return resources
def anonymize_sched_config(self, scheduler): """ Anonymize the scheduler config :param scheduler: PBS scheduler object """ if len(self.resc_key) == 0: return # when anonymizing we get rid of the comments as they may contain # sensitive information scheduler._sched_config_comments = {} # If resources need to be anonymized then update the resources line # job_sort_key and node_sort_key sr = scheduler.get_resources() if sr: for i, sres in enumerate(sr): if sres in self.resc_key: if sres in self.gmap_resc_key: sr[i] = self.gmap_resc_key[sres] else: anon_res = PbsAttribute.random_str(len(sres)) self.gmap_resc_key[sres] = anon_res sr[i] = anon_res scheduler.sched_config["resources"] = ",".join(sr) for k in ["job_sort_key", "node_sort_key"]: if k in scheduler.sched_config: sc_jsk = scheduler.sched_config[k] if not isinstance(sc_jsk, list): sc_jsk = list(sc_jsk) for r in self.resc_key: for i, key in enumerate(sc_jsk): if r in key: sc_jsk[i] = key.replace(r, self.resc_key[r])
def __anonymize_fgc(self, d, attr, ar, val): """ Anonymize an FGC limit value """ m = {"u": "euser", "g": "egroup", "p": "project"} if "," in val: fgc_lim = val.split(",") else: fgc_lim = [val] nfgc = [] for lim in fgc_lim: _fgc = PbsTypeFGCLimit(attr, lim) ename = _fgc.entity_name if ename in ("PBS_GENERIC", "PBS_ALL"): nfgc.append(lim) continue obf_ename = ename for etype, nm in m.items(): if _fgc.entity_type == etype: if nm not in self.gmap_attr_val: if nm in ar and ename in ar[nm]: obf_ename = ar[nm][ename] else: obf_ename = PbsAttribute.random_str(len(ename)) self.gmap_attr_val[nm] = {ename: obf_ename} elif ename in self.gmap_attr_val[nm]: if ename in self.gmap_attr_val[nm]: obf_ename = self.gmap_attr_val[nm][ename] break _fgc.entity_name = obf_ename nfgc.append(_fgc.__val__()) d[attr] = ",".join(nfgc)
def anonymize_batch_status(self, data=None): """ Anonymize arbitrary batch_status data :param data: Batch status data :type data: List or dictionary """ if not isinstance(data, (list, dict)): self.logger.error("data expected to be dict or list") return None if isinstance(data, dict): dat = [data] else: dat = data # Local mapping data used to store obfuscation mapping data for this # specific item, d self.lmap = {} # loop over each "batch_status" entry to obfuscate for d in dat: if self.attr_delete is not None: for todel in self.attr_delete: if todel in d: del d[todel] if self.resc_delete is not None: for todel in self.resc_delete: for tmpk in d.keys(): if "." in tmpk and todel == tmpk.split(".")[1]: del d[tmpk] # Loop over each object's attributes, this is where the special # cases are handled (e.g., FGC limits, formula, select spec...) for attr in d: val = d[attr] if "." in attr: (res_type, res_name) = attr.split(".") else: res_type = None res_name = attr if res_type is not None: if self._entity and (attr.startswith("max_run") or attr.startswith("max_queued")): self.__anonymize_fgc(d, attr, self.attr_val, val) if res_name in self.resc_val: if (attr.startswith("max_run") or attr.startswith("max_queued")): self.__anonymize_fgc(d, attr, self.attr_val, val) self.__anonymize_attr_val(d, attr, self.resc_val, res_name, val) if res_name in self.resc_key: self.__anonymize_attr_key(d, attr, self.resc_key, res_name, res_type) else: if attr in self.attr_val: self.__anonymize_attr_val(d, attr, self.attr_val, attr, val) if attr in self.attr_key: self.__anonymize_attr_key(d, attr, self.attr_key, attr, None) if ((attr in ("job_sort_formula", "schedselect", "select")) and self.resc_key): for r in self.resc_key: if r in val: if r not in self.gmap_resc_key: self.gmap_resc_key[ r] = PbsAttribute.random_str(len(r)) val = val.replace(r, self.gmap_resc_key[r]) setattr(self, attr, val) d[attr] = val
def anonymize_resource_group(self, filename): """ Anonymize the user and group fields of a resource group filename :param filename: Resource group filename :type filename: str """ anon_rg = [] try: f = open(filename) lines = f.readlines() f.close() except IOError: self.logger.error("Error processing " + filename) return None for data in lines: data = data.strip() if data: if data[0] == "#": continue _d = data.split() ug = _d[0] if ":" in ug: (euser, egroup) = ug.split(":") else: euser = ug egroup = None if "euser" not in self.attr_val: anon_euser = euser else: anon_euser = None if ANON_USER_K in self.gmap_attr_val: if euser in self.gmap_attr_val[ANON_USER_K]: anon_euser = self.gmap_attr_val[ANON_USER_K][euser] else: self.gmap_attr_val[ANON_USER_K] = {} if euser is not None and anon_euser is None: anon_euser = PbsAttribute.random_str(len(euser)) self.gmap_attr_val[ANON_USER_K][euser] = anon_euser if "egroup" not in self.attr_val: anon_egroup = egroup else: anon_egroup = None if egroup is not None: if ANON_GROUP_K in self.gmap_attr_val: if egroup in self.gmap_attr_val[ANON_GROUP_K]: anon_egroup = ( self.gmap_attr_val[ANON_GROUP_K][egroup]) else: self.gmap_attr_val[ANON_GROUP_K] = {} if egroup is not None and anon_egroup is None: anon_egroup = PbsAttribute.random_str(len(egroup)) self.gmap_attr_val[ANON_GROUP_K][egroup] = anon_egroup # reconstruct the fairshare info by combining euser and egroup out = [anon_euser] if anon_egroup is not None: out[0] += ":" + anon_egroup # and appending the rest of the original line out.append(_d[1]) if len(_d) > 1: p = _d[2].strip() if (ANON_USER_K in self.gmap_attr_val and p in self.gmap_attr_val[ANON_USER_K]): out.append(self.gmap_attr_val[ANON_USER_K][p]) else: out.append(_d[2]) if len(_d) > 2: out += _d[3:] anon_rg.append(" ".join(out)) return anon_rg
def __get_anon_value(self, key, value, kv_map): """ Get an anonymied string for the 'value' belonging to the kv_map provided. The kv_map will be in the following format: key:{val1:anon_val1, val2:anon_val2, ...} :param key: the key for this value :type key: String :param value: the value to anonymize :type value: String :param kv_map: the kv_map to which the key belongs :type kv_map: dict :returns: an anonymized string for the value """ if key == "project" and value == "_pbs_project_default": return "_pbs_project_default" # Deal with attributes which have a list of values if key in (ATTR_u, ATTR_managers, ATTR_M, ATTR_g, ATTR_aclResvhost, ATTR_aclhost, ATTR_auth_g, ATTR_auth_u): value_temp = "".join(value.split()) value_list = value_temp.split(",") elif key == ATTR_exechost: value_list = [] value_list_temp = value.split("+") for item in value_list_temp: value_list.append(item.split("/")[0]) else: value_list = [value] key = self.__refactor_key(key) # Go through the list of values and anonymize each in the value string for val in value_list: if "@" in val: # value if of type "user@host" # anonymize the user and host parts separately if ANON_HOST_K in self.attr_val: try: user, host = val.split("@") host = self.__get_anon_value(ANON_HOST_K, host, self.gmap_attr_val) user = self.__get_anon_value(ANON_USER_K, user, self.gmap_attr_val) anon_val = user + "@" + host value = value.replace(val, anon_val) continue except Exception: pass if key in kv_map: value_map = kv_map[key] anon_val = self.__get_anon_key(val, value_map) else: anon_val = PbsAttribute.random_str(len(val)) kv_map[key] = {val: anon_val} value = value.replace(val, anon_val) return value