def anonymize(self, log, distributionOfAttributes, epsilon, allTimestampDifferences, allTimestamps): print("Setting up the mechanisms") starttime = datetime.datetime.now() self.__maxAllTimestampDifferences = max(allTimestampDifferences) self.__minAllTimestampDifferences = min(allTimestampDifferences) self.__maxAllTimestamp = max(allTimestamps) self.__minAllTimestamp = min(allTimestamps) sensitivity = (self.__maxAllTimestamp - self.__minAllTimestamp).total_seconds() # lower and upper values are just for initialisation, they get later overwritten in __anonymizeTimeStamps # and __performTimestampShift lower = 0 upper = 1 timeShiftMechanism = privacyMechanisms.LaplaceBoundedDomain( epsilon=epsilon, sensitivity=sensitivity, lower=lower, upper=upper) mechanisms = self.__setupMechanisms(epsilon, distributionOfAttributes, lower, upper, sensitivity) self.__domainTimestampData = dict() endtime = datetime.datetime.now() time = endtime - starttime print("Done with setting up mechanisms after " + str(time)) i = 0 for trace in log: #trace attribute anonymization for attribute in trace.attributes.keys(): trace.attributes[attribute] = self.__anonymizeAttribute( trace.attributes[attribute], mechanisms.get(attribute, None)) #event attribute anonymization for eventNr in range(0, len(trace)): event = trace[eventNr] for attribute in event.keys(): if attribute != self.__timestamp: event[attribute] = self.__anonymizeAttribute( event[attribute], mechanisms.get(attribute, None)) if attribute == "InfectionSuspected" and eventNr == 0: self.__infectionSuspected.append(event[attribute]) elif eventNr > 0: previousTimestamp = self.__getTimestamp( trace, eventNr - 1, allTimestamps) nextTimestamp = self.__getTimestamp( trace, eventNr + 1, allTimestamps) sensitivity, minTimestampDifference = self.__getTimestampDomain( trace, eventNr, distributionOfAttributes[self.__timestamp], allTimestampDifferences) event[attribute] = self.__anonymizeTimeStamps( event[attribute], previousTimestamp, nextTimestamp, sensitivity, minTimestampDifference, mechanisms[self.__timestamp]) elif eventNr == 0: self.__performTimestampShift(trace, timeShiftMechanism) i = i + 1 if (i % 100) == 0: print("Iteration " + str((i))) return log, self.__infectionSuspected
def __addNumericMechanisms(self, epsilon, mechanisms, domains): for attribute in domains.keys(): sensitivity = domains[attribute][self.__sensitivity] lowerDomainBound = domains[attribute][self.__min] upperDomainBound = domains[attribute][self.__max] laplaceMechanism = privacyMechanisms.LaplaceBoundedDomain() laplaceMechanism.set_epsilon(epsilon) laplaceMechanism.set_sensitivity(sensitivity) laplaceMechanism.set_bounds(lowerDomainBound, upperDomainBound) mechanisms[attribute] = laplaceMechanism return mechanisms
def add_laplace_noise_time(aggregate_type, dfg_time, epsilon_time): laplace_mechanism = privacyMechanisms.LaplaceBoundedDomain() sens_time = 1 """ calculating sensitivity based on type of aggregate""" if aggregate_type == AggregateType.AVG: # sens_time = 1.0 / len(dfg_time[0]) sens_time = 1.0 / len(dfg_time.keys()) elif aggregate_type == AggregateType.MAX or aggregate_type == AggregateType.MIN or aggregate_type == AggregateType.SUM: sens_time = 1 else: assert "Wrong aggregate type" # calculate the DFG for the time dfg_time = calculate_time_dfg(dfg_time, aggregate_type) dfg_time_new = Counter() if type(epsilon_time) != type(0.1): # multiple epsilon values for the time dfg for key in dfg_time.keys(): if epsilon_time[key] == inf or epsilon_time[ key] == -inf or epsilon_time[key] < 1e-11: dfg_time_new[key] = dfg_time[key] else: rv = laplace() noise = laplace.rvs(loc=0, scale=sens_time / epsilon_time[key], size=1)[0] dfg_time_new[key] = dfg_time[key] + abs(noise) else: # single epsilon value for the entire time dfg for key in dfg_time.keys(): # in case epsilon is inf , we don't need to add noise if epsilon_time == inf: dfg_time_new[key] = dfg_time[key] else: rv = laplace() noise = laplace.rvs(loc=0, scale=sens_time / epsilon_time, size=1)[0] dfg_time_new[key] = dfg_time[key] + abs(noise) return dfg_time, dfg_time_new
def __setupMechanisms(self, epsilon, distributionOfAttributes): mechanisms = dict() dataTypesOfAttributes = self.__determineDataType( distributionOfAttributes) mechanisms = self.__addBooleanMechansisms(epsilon, mechanisms, dataTypesOfAttributes) domains = self.__retrieveAttributeDomains(distributionOfAttributes, dataTypesOfAttributes) mechanisms = self.__addNumericMechanisms(epsilon, mechanisms, domains) potentialValues = self.__getPotentialValues(distributionOfAttributes, dataTypesOfAttributes) mechanisms = self.__addCatergoricalMechanisms(epsilon, mechanisms, dataTypesOfAttributes, potentialValues) mechanisms[self.__timestamp] = privacyMechanisms.LaplaceBoundedDomain( ).set_epsilon(epsilon) return mechanisms