def anonymize(self, log, distributionOfAttributes, epsilon,
               allTimestampDifferences, allTimestamps):
     print("Setting up the mechanisms")
     starttime = datetime.datetime.now()
     self.__maxAllTimestampDifferences = max(allTimestampDifferences)
     self.__minAllTimestampDifferences = min(allTimestampDifferences)
     self.__maxAllTimestamp = max(allTimestamps)
     self.__minAllTimestamp = min(allTimestamps)
     sensitivity = (self.__maxAllTimestamp -
                    self.__minAllTimestamp).total_seconds()
     # lower and upper values are just for initialisation, they get later overwritten in __anonymizeTimeStamps
     # and __performTimestampShift
     lower = 0
     upper = 1
     timeShiftMechanism = privacyMechanisms.LaplaceBoundedDomain(
         epsilon=epsilon, sensitivity=sensitivity, lower=lower, upper=upper)
     mechanisms = self.__setupMechanisms(epsilon, distributionOfAttributes,
                                         lower, upper, sensitivity)
     self.__domainTimestampData = dict()
     endtime = datetime.datetime.now()
     time = endtime - starttime
     print("Done with setting up mechanisms after " + str(time))
     i = 0
     for trace in log:
         #trace attribute anonymization
         for attribute in trace.attributes.keys():
             trace.attributes[attribute] = self.__anonymizeAttribute(
                 trace.attributes[attribute],
                 mechanisms.get(attribute, None))
         #event attribute anonymization
         for eventNr in range(0, len(trace)):
             event = trace[eventNr]
             for attribute in event.keys():
                 if attribute != self.__timestamp:
                     event[attribute] = self.__anonymizeAttribute(
                         event[attribute], mechanisms.get(attribute, None))
                     if attribute == "InfectionSuspected" and eventNr == 0:
                         self.__infectionSuspected.append(event[attribute])
                 elif eventNr > 0:
                     previousTimestamp = self.__getTimestamp(
                         trace, eventNr - 1, allTimestamps)
                     nextTimestamp = self.__getTimestamp(
                         trace, eventNr + 1, allTimestamps)
                     sensitivity, minTimestampDifference = self.__getTimestampDomain(
                         trace, eventNr,
                         distributionOfAttributes[self.__timestamp],
                         allTimestampDifferences)
                     event[attribute] = self.__anonymizeTimeStamps(
                         event[attribute], previousTimestamp, nextTimestamp,
                         sensitivity, minTimestampDifference,
                         mechanisms[self.__timestamp])
                 elif eventNr == 0:
                     self.__performTimestampShift(trace, timeShiftMechanism)
         i = i + 1
         if (i % 100) == 0:
             print("Iteration " + str((i)))
     return log, self.__infectionSuspected
 def __addNumericMechanisms(self, epsilon, mechanisms, domains):
     for attribute in domains.keys():
         sensitivity = domains[attribute][self.__sensitivity]
         lowerDomainBound = domains[attribute][self.__min]
         upperDomainBound = domains[attribute][self.__max]
         laplaceMechanism = privacyMechanisms.LaplaceBoundedDomain()
         laplaceMechanism.set_epsilon(epsilon)
         laplaceMechanism.set_sensitivity(sensitivity)
         laplaceMechanism.set_bounds(lowerDomainBound, upperDomainBound)
         mechanisms[attribute] = laplaceMechanism
     return mechanisms
Exemple #3
0
def add_laplace_noise_time(aggregate_type, dfg_time, epsilon_time):
    laplace_mechanism = privacyMechanisms.LaplaceBoundedDomain()

    sens_time = 1
    """ calculating sensitivity based on type of aggregate"""
    if aggregate_type == AggregateType.AVG:
        # sens_time = 1.0 / len(dfg_time[0])
        sens_time = 1.0 / len(dfg_time.keys())

    elif aggregate_type == AggregateType.MAX or aggregate_type == AggregateType.MIN or aggregate_type == AggregateType.SUM:
        sens_time = 1
    else:
        assert "Wrong aggregate type"
    # calculate the DFG for the time
    dfg_time = calculate_time_dfg(dfg_time, aggregate_type)
    dfg_time_new = Counter()

    if type(epsilon_time) != type(0.1):
        # multiple epsilon values for the time dfg
        for key in dfg_time.keys():

            if epsilon_time[key] == inf or epsilon_time[
                    key] == -inf or epsilon_time[key] < 1e-11:
                dfg_time_new[key] = dfg_time[key]
            else:
                rv = laplace()

                noise = laplace.rvs(loc=0,
                                    scale=sens_time / epsilon_time[key],
                                    size=1)[0]
                dfg_time_new[key] = dfg_time[key] + abs(noise)

    else:
        # single epsilon value for the entire time dfg
        for key in dfg_time.keys():

            # in case epsilon is inf , we don't need to add noise
            if epsilon_time == inf:
                dfg_time_new[key] = dfg_time[key]
            else:

                rv = laplace()
                noise = laplace.rvs(loc=0,
                                    scale=sens_time / epsilon_time,
                                    size=1)[0]
                dfg_time_new[key] = dfg_time[key] + abs(noise)

    return dfg_time, dfg_time_new
 def __setupMechanisms(self, epsilon, distributionOfAttributes):
     mechanisms = dict()
     dataTypesOfAttributes = self.__determineDataType(
         distributionOfAttributes)
     mechanisms = self.__addBooleanMechansisms(epsilon, mechanisms,
                                               dataTypesOfAttributes)
     domains = self.__retrieveAttributeDomains(distributionOfAttributes,
                                               dataTypesOfAttributes)
     mechanisms = self.__addNumericMechanisms(epsilon, mechanisms, domains)
     potentialValues = self.__getPotentialValues(distributionOfAttributes,
                                                 dataTypesOfAttributes)
     mechanisms = self.__addCatergoricalMechanisms(epsilon, mechanisms,
                                                   dataTypesOfAttributes,
                                                   potentialValues)
     mechanisms[self.__timestamp] = privacyMechanisms.LaplaceBoundedDomain(
     ).set_epsilon(epsilon)
     return mechanisms