def main(): argparser = ArgumentParser() datasource = argparser.add_mutually_exclusive_group() datasource.add_argument( '--s3name', '-S3', type=str, choices=['adult', 'census', 'credit', 'alarm', 'insurance'], help='Name of the dataset to run on') datasource.add_argument('--datapath', '-D', type=str, help='Relative path to cwd of a local data file') argparser.add_argument('--runconfig', '-RC', default='runconfig_mia.json', type=str, help='Path relative to cwd of runconfig file') argparser.add_argument( '--outdir', '-O', default='outputs/test', type=str, help='Path relative to cwd for storing output files') args = argparser.parse_args() seed(SEED) # Load runconfig with open(path.join(cwd, args.runconfig)) as f: runconfig = json.load(f) print('Runconfig:') print(runconfig) # Load data if args.s3name is not None: rawPop, metadata = load_s3_data_as_df(args.s3name) dname = args.s3name else: rawPop, metadata = load_local_data_as_df(path.join(cwd, args.datapath)) dname = args.datapath.split('/')[-1] print(f'Loaded data {dname}:') print(rawPop.info()) # Make sure outdir exists if not path.isdir(args.outdir): mkdir(args.outdir) ######################## #### GAME INPUTS ####### ######################## # Train test split rawTrain = rawPop.query(runconfig['dataFilter']['train']) rawTest = rawPop.query(runconfig['dataFilter']['test']) # Pick targets targetIDs = choice(list(rawTrain.index), size=runconfig['nTargets'], replace=False).tolist() # If specified: Add specific target records if runconfig['Targets'] is not None: targetIDs.extend(runconfig['Targets']) targets = rawTrain.loc[targetIDs, :] # Drop targets from population rawTrainWoTargets = rawTrain.drop(targetIDs) # Get test target records testRecordIDs = choice(list(rawTest.index), size=runconfig['nTargets'], replace=False).tolist() # If specified: Add specific target records if runconfig['TestRecords'] is not None: testRecordIDs.extend(runconfig['TestRecords']) testRecords = rawTest.loc[testRecordIDs, :] # List of candidate generative models to evaluate gmList = [] if 'generativeModels' in runconfig.keys(): for gm, paramsList in runconfig['generativeModels'].items(): if gm == 'IndependentHistogram': for params in paramsList: gmList.append(IndependentHistogram(metadata, *params)) elif gm == 'BayesianNet': for params in paramsList: gmList.append(BayesianNet(metadata, *params)) elif gm == 'PrivBayes': for params in paramsList: gmList.append(PrivBayes(metadata, *params)) elif gm == 'CTGAN': for params in paramsList: gmList.append(CTGAN(metadata, *params)) elif gm == 'PATEGAN': for params in paramsList: gmList.append(PATEGAN(metadata, *params)) else: raise ValueError(f'Unknown GM {gm}') # List of candidate sanitisation techniques to evaluate sanList = [] if 'sanitisationTechniques' in runconfig.keys(): for name, paramsList in runconfig['sanitisationTechniques'].items(): if name == 'SanitiserNHS': for params in paramsList: sanList.append(SanitiserNHS(metadata, *params)) else: raise ValueError(f'Unknown sanitisation technique {name}') utilityTasks = [] for taskName, paramsList in runconfig['utilityTasks'].items(): if taskName == 'RandForestClass': for params in paramsList: utilityTasks.append(RandForestClassTask(metadata, *params)) elif taskName == 'LogRegClass': for params in paramsList: utilityTasks.append(LogRegClassTask(metadata, *params)) elif taskName == 'LinReg': for params in paramsList: utilityTasks.append(LinRegTask(metadata, *params)) ################################## ######### EVALUATION ############# ################################## resultsTargetUtility = { ut.__name__: {gm.__name__: {} for gm in gmList + sanList} for ut in utilityTasks } resultsAggUtility = { ut.__name__: { gm.__name__: { 'TargetID': [], 'Accuracy': [] } for gm in gmList + sanList } for ut in utilityTasks } # Add entry for raw for ut in utilityTasks: resultsTargetUtility[ut.__name__]['Raw'] = {} resultsAggUtility[ut.__name__]['Raw'] = { 'TargetID': [], 'Accuracy': [] } print('\n---- Start the game ----') for nr in range(runconfig['nIter']): print(f'\n--- Game iteration {nr + 1} ---') # Draw a raw dataset rIdx = choice(list(rawTrainWoTargets.index), size=runconfig['sizeRawT'], replace=False).tolist() rawTout = rawTrain.loc[rIdx] LOGGER.info('Start: Utility evaluation on Raw...') # Get utility from raw without targets for ut in utilityTasks: resultsTargetUtility[ut.__name__]['Raw'][nr] = {} predErrorTargets = [] predErrorAggr = [] for _ in range(runconfig['nSynT']): ut.train(rawTout) predErrorTargets.append(ut.evaluate(testRecords)) predErrorAggr.append(ut.evaluate(rawTest)) resultsTargetUtility[ut.__name__]['Raw'][nr]['OUT'] = { 'TestRecordID': testRecordIDs, 'Accuracy': list(mean(predErrorTargets, axis=0)) } resultsAggUtility[ut.__name__]['Raw']['TargetID'].append('OUT') resultsAggUtility[ut.__name__]['Raw']['Accuracy'].append( mean(predErrorAggr)) # Get utility from raw with each target for tid in targetIDs: target = targets.loc[[tid]] rawIn = rawTout.append(target) for ut in utilityTasks: predErrorTargets = [] predErrorAggr = [] for _ in range(runconfig['nSynT']): ut.train(rawIn) predErrorTargets.append(ut.evaluate(testRecords)) predErrorAggr.append(ut.evaluate(rawTest)) resultsTargetUtility[ut.__name__]['Raw'][nr][tid] = { 'TestRecordID': testRecordIDs, 'Accuracy': list(mean(predErrorTargets, axis=0)) } resultsAggUtility[ut.__name__]['Raw']['TargetID'].append(tid) resultsAggUtility[ut.__name__]['Raw']['Accuracy'].append( mean(predErrorAggr)) LOGGER.info('Finished: Utility evaluation on Raw.') for GenModel in gmList: LOGGER.info(f'Start: Evaluation for model {GenModel.__name__}...') GenModel.fit(rawTout) synTwithoutTarget = [ GenModel.generate_samples(runconfig['sizeSynT']) for _ in range(runconfig['nSynT']) ] # Util evaluation for synthetic without all targets for ut in utilityTasks: resultsTargetUtility[ut.__name__][GenModel.__name__][nr] = {} predErrorTargets = [] predErrorAggr = [] for syn in synTwithoutTarget: ut.train(syn) predErrorTargets.append(ut.evaluate(testRecords)) predErrorAggr.append(ut.evaluate(rawTest)) resultsTargetUtility[ut.__name__][ GenModel.__name__][nr]['OUT'] = { 'TestRecordID': testRecordIDs, 'Accuracy': list(mean(predErrorTargets, axis=0)) } resultsAggUtility[ut.__name__][ GenModel.__name__]['TargetID'].append('OUT') resultsAggUtility[ut.__name__][ GenModel.__name__]['Accuracy'].append(mean(predErrorAggr)) for tid in targetIDs: LOGGER.info(f'Target: {tid}') target = targets.loc[[tid]] rawTin = rawTout.append(target) GenModel.fit(rawTin) synTwithTarget = [ GenModel.generate_samples(runconfig['sizeSynT']) for _ in range(runconfig['nSynT']) ] # Util evaluation for synthetic with this target for ut in utilityTasks: predErrorTargets = [] predErrorAggr = [] for syn in synTwithTarget: ut.train(syn) predErrorTargets.append(ut.evaluate(testRecords)) predErrorAggr.append(ut.evaluate(rawTest)) resultsTargetUtility[ut.__name__][ GenModel.__name__][nr][tid] = { 'TestRecordID': testRecordIDs, 'Accuracy': list(mean(predErrorTargets, axis=0)) } resultsAggUtility[ut.__name__][ GenModel.__name__]['TargetID'].append(tid) resultsAggUtility[ut.__name__][ GenModel.__name__]['Accuracy'].append( mean(predErrorAggr)) del synTwithoutTarget, synTwithTarget LOGGER.info(f'Finished: Evaluation for model {GenModel.__name__}.') for San in sanList: LOGGER.info(f'Start: Evaluation for sanitiser {San.__name__}...') sanOut = San.sanitise(rawTout) for ut in utilityTasks: resultsTargetUtility[ut.__name__][San.__name__][nr] = {} predErrorTargets = [] predErrorAggr = [] for _ in range(runconfig['nSynT']): ut.train(sanOut) predErrorTargets.append(ut.evaluate(testRecords)) predErrorAggr.append(ut.evaluate(rawTest)) resultsTargetUtility[ut.__name__][San.__name__][nr]['OUT'] = { 'TestRecordID': testRecordIDs, 'Accuracy': list(mean(predErrorTargets, axis=0)) } resultsAggUtility[ut.__name__][ San.__name__]['TargetID'].append('OUT') resultsAggUtility[ut.__name__][ San.__name__]['Accuracy'].append(mean(predErrorAggr)) for tid in targetIDs: LOGGER.info(f'Target: {tid}') target = targets.loc[[tid]] rawTin = rawTout.append(target) sanIn = San.sanitise(rawTin) for ut in utilityTasks: predErrorTargets = [] predErrorAggr = [] for _ in range(runconfig['nSynT']): ut.train(sanIn) predErrorTargets.append(ut.evaluate(testRecords)) predErrorAggr.append(ut.evaluate(rawTest)) resultsTargetUtility[ut.__name__][ San.__name__][nr][tid] = { 'TestRecordID': testRecordIDs, 'Accuracy': list(mean(predErrorTargets, axis=0)) } resultsAggUtility[ut.__name__][ San.__name__]['TargetID'].append(tid) resultsAggUtility[ut.__name__][ San.__name__]['Accuracy'].append(mean(predErrorAggr)) del sanOut, sanIn LOGGER.info(f'Finished: Evaluation for model {San.__name__}.') outfile = f"ResultsUtilTargets_{dname}" LOGGER.info( f"Write results to {path.join(f'{args.outdir}', f'{outfile}')}") with open(path.join(f'{args.outdir}', f'{outfile}.json'), 'w') as f: json.dump(resultsTargetUtility, f, indent=2, default=json_numpy_serialzer) outfile = f"ResultsUtilAgg_{dname}" LOGGER.info( f"Write results to {path.join(f'{args.outdir}', f'{outfile}')}") with open(path.join(f'{args.outdir}', f'{outfile}.json'), 'w') as f: json.dump(resultsAggUtility, f, indent=2, default=json_numpy_serialzer)
def main(): argparser = ArgumentParser() datasource = argparser.add_mutually_exclusive_group() datasource.add_argument( '--s3name', '-S3', type=str, choices=['adult', 'census', 'credit', 'alarm', 'insurance'], help='Name of the dataset to run on') datasource.add_argument('--datapath', '-D', type=str, help='Relative path to cwd of a local data file') argparser.add_argument('--runconfig', '-RC', default='runconfig_mia.json', type=str, help='Path relative to cwd of runconfig file') argparser.add_argument( '--outdir', '-O', default='tests', type=str, help='Path relative to cwd for storing output files') args = argparser.parse_args() # Load runconfig with open(path.join(cwd, args.runconfig)) as f: runconfig = json.load(f) print('Runconfig:') print(runconfig) # Load data if args.s3name is not None: rawPop, metadata = load_s3_data_as_df(args.s3name) dname = args.s3name else: rawPop, metadata = load_local_data_as_df(path.join(cwd, args.datapath)) dname = args.datapath.split('/')[-1] print(f'Loaded data {dname}:') print(rawPop.info()) # Make sure outdir exists if not path.isdir(args.outdir): mkdir(args.outdir) seed(SEED) ######################## #### GAME INPUTS ####### ######################## # Pick targets targetIDs = choice(list(rawPop.index), size=runconfig['nTargets'], replace=False).tolist() # If specified: Add specific target records if runconfig['Targets'] is not None: targetIDs.extend(runconfig['Targets']) targets = rawPop.loc[targetIDs, :] # Drop targets from population rawPopDropTargets = rawPop.drop(targetIDs) # Init adversary's prior knowledge rawAidx = choice(list(rawPopDropTargets.index), size=runconfig['sizeRawA'], replace=False).tolist() rawA = rawPop.loc[rawAidx, :] # List of candidate generative models to evaluate gmList = [] if 'generativeModels' in runconfig.keys(): for gm, paramsList in runconfig['generativeModels'].items(): if gm == 'IndependentHistogram': for params in paramsList: gmList.append(IndependentHistogram(metadata, *params)) elif gm == 'BayesianNet': for params in paramsList: gmList.append(BayesianNet(metadata, *params)) elif gm == 'PrivBayes': for params in paramsList: gmList.append(PrivBayes(metadata, *params)) elif gm == 'CTGAN': for params in paramsList: gmList.append(CTGAN(metadata, *params)) elif gm == 'PATEGAN': for params in paramsList: gmList.append(PATEGAN(metadata, *params)) else: raise ValueError(f'Unknown GM {gm}') # List of candidate sanitisation techniques to evaluate sanList = [] if 'sanitisationTechniques' in runconfig.keys(): for name, paramsList in runconfig['sanitisationTechniques'].items(): if name == 'SanitiserNHS': for params in paramsList: sanList.append(SanitiserNHS(metadata, *params)) else: raise ValueError(f'Unknown sanitisation technique {name}') ################################### #### ATTACK TRAINING ############# ################################## print('\n---- Attack training ----') attacks = {} for tid in targetIDs: print(f'\n--- Adversary picks target {tid} ---') target = targets.loc[[tid]] attacks[tid] = {} for San in sanList: LOGGER.info(f'Start: Attack training for {San.__name__}...') attacks[tid][San.__name__] = {} # Generate example datasets for training attack classifier sanA, labelsA = generate_mia_anon_data( San, target, rawA, runconfig['sizeRawT'], runconfig['nShadows'] * runconfig['nSynA']) # Train attack on shadow data for Feature in [ NaiveFeatureSet(DataFrame), HistogramFeatureSet(DataFrame, metadata, nbins=San.histogram_size, quids=San.quids), CorrelationsFeatureSet(DataFrame, metadata, quids=San.quids), EnsembleFeatureSet(DataFrame, metadata, nbins=San.histogram_size, quasi_id_cols=San.quids) ]: Attack = MIAttackClassifierRandomForest(metadata=metadata, FeatureSet=Feature, quids=San.quids) Attack.train(sanA, labelsA) attacks[tid][San.__name__][f'{Feature.__name__}'] = Attack # Clean up del sanA, labelsA LOGGER.info(f'Finished: Attack training.') for GenModel in gmList: LOGGER.info(f'Start: Attack training for {GenModel.__name__}...') attacks[tid][GenModel.__name__] = {} # Generate shadow model data for training attacks on this target synA, labelsSA = generate_mia_shadow_data(GenModel, target, rawA, runconfig['sizeRawT'], runconfig['sizeSynT'], runconfig['nShadows'], runconfig['nSynA']) # Train attack on shadow data for Feature in [ NaiveFeatureSet(GenModel.datatype), HistogramFeatureSet(GenModel.datatype, metadata), CorrelationsFeatureSet(GenModel.datatype, metadata) ]: Attack = MIAttackClassifierRandomForest(metadata, Feature) Attack.train(synA, labelsSA) attacks[tid][GenModel.__name__][f'{Feature.__name__}'] = Attack # Clean up del synA, labelsSA LOGGER.info(f'Finished: Attack training.') ################################## ######### EVALUATION ############# ################################## resultsTargetPrivacy = { tid: {gm.__name__: {} for gm in gmList + sanList} for tid in targetIDs } print('\n---- Start the game ----') for nr in range(runconfig['nIter']): print(f'\n--- Game iteration {nr + 1} ---') # Draw a raw dataset rIdx = choice(list(rawPopDropTargets.index), size=runconfig['sizeRawT'], replace=False).tolist() rawTout = rawPopDropTargets.loc[rIdx] for GenModel in gmList: LOGGER.info(f'Start: Evaluation for model {GenModel.__name__}...') # Train a generative model GenModel.fit(rawTout) synTwithoutTarget = [ GenModel.generate_samples(runconfig['sizeSynT']) for _ in range(runconfig['nSynT']) ] synLabelsOut = [LABEL_OUT for _ in range(runconfig['nSynT'])] for tid in targetIDs: LOGGER.info(f'Target: {tid}') target = targets.loc[[tid]] resultsTargetPrivacy[tid][f'{GenModel.__name__}'][nr] = {} rawTin = rawTout.append(target) GenModel.fit(rawTin) synTwithTarget = [ GenModel.generate_samples(runconfig['sizeSynT']) for _ in range(runconfig['nSynT']) ] synLabelsIn = [LABEL_IN for _ in range(runconfig['nSynT'])] synT = synTwithoutTarget + synTwithTarget synTlabels = synLabelsOut + synLabelsIn # Run attacks for feature, Attack in attacks[tid][ f'{GenModel.__name__}'].items(): # Produce a guess for each synthetic dataset attackerGuesses = Attack.attack(synT) resDict = { 'Secret': synTlabels, 'AttackerGuess': attackerGuesses } resultsTargetPrivacy[tid][f'{GenModel.__name__}'][nr][ feature] = resDict del synT, synTwithoutTarget, synTwithTarget LOGGER.info(f'Finished: Evaluation for model {GenModel.__name__}.') for San in sanList: LOGGER.info(f'Start: Evaluation for sanitiser {San.__name__}...') sanOut = San.sanitise(rawTout) for tid in targetIDs: LOGGER.info(f'Target: {tid}') target = targets.loc[[tid]] resultsTargetPrivacy[tid][San.__name__][nr] = {} rawTin = rawTout.append(target) sanIn = San.sanitise(rawTin) sanT = [sanOut, sanIn] sanTLabels = [LABEL_OUT, LABEL_IN] # Run attacks for feature, Attack in attacks[tid][San.__name__].items(): # Produce a guess for each synthetic dataset attackerGuesses = Attack.attack(sanT, attemptLinkage=True, target=target) resDict = { 'Secret': sanTLabels, 'AttackerGuess': attackerGuesses } resultsTargetPrivacy[tid][ San.__name__][nr][feature] = resDict del sanT, sanOut, sanIn LOGGER.info(f'Finished: Evaluation for model {San.__name__}.') outfile = f"ResultsMIA_{dname}" LOGGER.info( f"Write results to {path.join(f'{args.outdir}', f'{outfile}')}") with open(path.join(f'{args.outdir}', f'{outfile}.json'), 'w') as f: json.dump(resultsTargetPrivacy, f, indent=2, default=json_numpy_serialzer)
def main(): argparser = ArgumentParser() datasource = argparser.add_mutually_exclusive_group() datasource.add_argument( '--s3name', '-S3', type=str, choices=['adult', 'census', 'credit', 'alarm', 'insurance'], help='Name of the dataset to run on') datasource.add_argument('--datapath', '-D', type=str, help='Relative path to cwd of a local data file') argparser.add_argument('--runconfig', '-RC', default='runconfig_mia.json', type=str, help='Path relative to cwd of runconfig file') argparser.add_argument( '--outdir', '-O', default='tests', type=str, help='Path relative to cwd for storing output files') args = argparser.parse_args() # Load runconfig with open(path.join(cwd, args.runconfig)) as f: runconfig = json.load(f) print('Runconfig:') print(runconfig) # Load data if args.s3name is not None: rawPop, metadata = load_s3_data_as_df(args.s3name) dname = args.s3name else: rawPop, metadata = load_local_data_as_df(path.join(cwd, args.datapath)) dname = args.datapath.split('/')[-1] print(f'Loaded data {dname}:') print(rawPop.info()) # Make sure outdir exists if not path.isdir(args.outdir): mkdir(args.outdir) seed(SEED) ######################## #### GAME INPUTS ####### ######################## # Pick targets targetIDs = choice(list(rawPop.index), size=runconfig['nTargets'], replace=False).tolist() # If specified: Add specific target records if runconfig['Targets'] is not None: targetIDs.extend(runconfig['Targets']) targets = rawPop.loc[targetIDs, :] # Drop targets from population rawPopDropTargets = rawPop.drop(targetIDs) # List of candidate generative models to evaluate gmList = [] if 'generativeModels' in runconfig.keys(): for gm, paramsList in runconfig['generativeModels'].items(): if gm == 'IndependentHistogram': for params in paramsList: gmList.append(IndependentHistogram(metadata, *params)) elif gm == 'BayesianNet': for params in paramsList: gmList.append(BayesianNet(metadata, *params)) elif gm == 'PrivBayes': for params in paramsList: gmList.append(PrivBayes(metadata, *params)) elif gm == 'CTGAN': for params in paramsList: gmList.append(CTGAN(metadata, *params)) elif gm == 'PATEGAN': for params in paramsList: gmList.append(PATEGAN(metadata, *params)) else: raise ValueError(f'Unknown GM {gm}') # List of candidate sanitisation techniques to evaluate sanList = [] if 'sanitisationTechniques' in runconfig.keys(): for name, paramsList in runconfig['sanitisationTechniques'].items(): if name == 'SanitiserNHS': for params in paramsList: sanList.append(SanitiserNHS(metadata, *params)) else: raise ValueError(f'Unknown sanitisation technique {name}') ################################## ######### EVALUATION ############# ################################## resultsTargetPrivacy = { tid: { sa: {gm.__name__: {} for gm in gmList + sanList} for sa in runconfig['sensitiveAttributes'] } for tid in targetIDs } # Add entry for raw for tid in targetIDs: for sa in runconfig['sensitiveAttributes']: resultsTargetPrivacy[tid][sa]['Raw'] = {} print('\n---- Start the game ----') for nr in range(runconfig['nIter']): print(f'\n--- Game iteration {nr + 1} ---') # Draw a raw dataset rIdx = choice(list(rawPopDropTargets.index), size=runconfig['sizeRawT'], replace=False).tolist() rawTout = rawPopDropTargets.loc[rIdx] ############### ## ATTACKS #### ############### attacks = {} for sa, atype in runconfig['sensitiveAttributes'].items(): if atype == 'LinReg': attacks[sa] = LinRegAttack(sensitiveAttribute=sa, metadata=metadata) elif atype == 'Classification': attacks[sa] = RandForestAttack(sensitiveAttribute=sa, metadata=metadata) #### Assess advantage raw for sa, Attack in attacks.items(): Attack.train(rawTout) for tid in targetIDs: target = targets.loc[[tid]] targetAux = target.loc[[tid], Attack.knownAttributes] targetSecret = target.loc[tid, Attack.sensitiveAttribute] guess = Attack.attack(targetAux, attemptLinkage=True, data=rawTout) pCorrect = Attack.get_likelihood(targetAux, targetSecret, attemptLinkage=True, data=rawTout) resultsTargetPrivacy[tid][sa]['Raw'][nr] = { 'AttackerGuess': [guess], 'ProbCorrect': [pCorrect], 'TargetPresence': [LABEL_OUT] } for tid in targetIDs: target = targets.loc[[tid]] rawTin = rawTout.append(target) for sa, Attack in attacks.items(): targetAux = target.loc[[tid], Attack.knownAttributes] targetSecret = target.loc[tid, Attack.sensitiveAttribute] guess = Attack.attack(targetAux, attemptLinkage=True, data=rawTin) pCorrect = Attack.get_likelihood(targetAux, targetSecret, attemptLinkage=True, data=rawTin) resultsTargetPrivacy[tid][sa]['Raw'][nr][ 'AttackerGuess'].append(guess) resultsTargetPrivacy[tid][sa]['Raw'][nr]['ProbCorrect'].append( pCorrect) resultsTargetPrivacy[tid][sa]['Raw'][nr][ 'TargetPresence'].append(LABEL_IN) ##### Assess advantage Syn for GenModel in gmList: LOGGER.info(f'Start: Evaluation for model {GenModel.__name__}...') GenModel.fit(rawTout) synTwithoutTarget = [ GenModel.generate_samples(runconfig['sizeSynT']) for _ in range(runconfig['nSynT']) ] for sa, Attack in attacks.items(): for tid in targetIDs: resultsTargetPrivacy[tid][sa][GenModel.__name__][nr] = { 'AttackerGuess': [], 'ProbCorrect': [], 'TargetPresence': [LABEL_OUT for _ in range(runconfig['nSynT'])] } for syn in synTwithoutTarget: Attack.train(syn) for tid in targetIDs: target = targets.loc[[tid]] targetAux = target.loc[[tid], Attack.knownAttributes] targetSecret = target.loc[tid, Attack.sensitiveAttribute] guess = Attack.attack(targetAux) pCorrect = Attack.get_likelihood( targetAux, targetSecret) resultsTargetPrivacy[tid][sa][GenModel.__name__][nr][ 'AttackerGuess'].append(guess) resultsTargetPrivacy[tid][sa][GenModel.__name__][nr][ 'ProbCorrect'].append(pCorrect) del synTwithoutTarget for tid in targetIDs: LOGGER.info(f'Target: {tid}') target = targets.loc[[tid]] rawTin = rawTout.append(target) GenModel.fit(rawTin) synTwithTarget = [ GenModel.generate_samples(runconfig['sizeSynT']) for _ in range(runconfig['nSynT']) ] for sa, Attack in attacks.items(): targetAux = target.loc[[tid], Attack.knownAttributes] targetSecret = target.loc[tid, Attack.sensitiveAttribute] for syn in synTwithTarget: Attack.train(syn) guess = Attack.attack(targetAux) pCorrect = Attack.get_likelihood( targetAux, targetSecret) resultsTargetPrivacy[tid][sa][GenModel.__name__][nr][ 'AttackerGuess'].append(guess) resultsTargetPrivacy[tid][sa][GenModel.__name__][nr][ 'ProbCorrect'].append(pCorrect) resultsTargetPrivacy[tid][sa][GenModel.__name__][nr][ 'TargetPresence'].append(LABEL_IN) del synTwithTarget for San in sanList: LOGGER.info(f'Start: Evaluation for sanitiser {San.__name__}...') attacks = {} for sa, atype in runconfig['sensitiveAttributes'].items(): if atype == 'LinReg': attacks[sa] = LinRegAttack(sensitiveAttribute=sa, metadata=metadata, quids=San.quids) elif atype == 'Classification': attacks[sa] = RandForestAttack(sensitiveAttribute=sa, metadata=metadata, quids=San.quids) sanOut = San.sanitise(rawTout) for sa, Attack in attacks.items(): Attack.train(sanOut) for tid in targetIDs: target = targets.loc[[tid]] targetAux = target.loc[[tid], Attack.knownAttributes] targetSecret = target.loc[tid, Attack.sensitiveAttribute] guess = Attack.attack(targetAux, attemptLinkage=True, data=sanOut) pCorrect = Attack.get_likelihood(targetAux, targetSecret, attemptLinkage=True, data=sanOut) resultsTargetPrivacy[tid][sa][San.__name__][nr] = { 'AttackerGuess': [guess], 'ProbCorrect': [pCorrect], 'TargetPresence': [LABEL_OUT] } for tid in targetIDs: LOGGER.info(f'Target: {tid}') target = targets.loc[[tid]] rawTin = rawTout.append(target) sanIn = San.sanitise(rawTin) for sa, Attack in attacks.items(): targetAux = target.loc[[tid], Attack.knownAttributes] targetSecret = target.loc[tid, Attack.sensitiveAttribute] Attack.train(sanIn) guess = Attack.attack(targetAux, attemptLinkage=True, data=sanIn) pCorrect = Attack.get_likelihood(targetAux, targetSecret, attemptLinkage=True, data=sanIn) resultsTargetPrivacy[tid][sa][ San.__name__][nr]['AttackerGuess'].append(guess) resultsTargetPrivacy[tid][sa][ San.__name__][nr]['ProbCorrect'].append(pCorrect) resultsTargetPrivacy[tid][sa][ San.__name__][nr]['TargetPresence'].append(LABEL_IN) outfile = f"ResultsMLEAI_{dname}" LOGGER.info( f"Write results to {path.join(f'{args.outdir}', f'{outfile}')}") with open(path.join(f'{args.outdir}', f'{outfile}.json'), 'w') as f: json.dump(resultsTargetPrivacy, f, indent=2, default=json_numpy_serialzer)
return Response(status=415) activity = Activity().deserialize(body) auth_header = req.headers[ AUTHORIZATION_HEADER] if AUTHORIZATION_HEADER in req.headers else "" try: await ERROR_ADAPTER.process_activity(activity, auth_header, BOT.on_turn) return Response(status=201) except Exception as exception: LOGGER.error(msg=f"An unexpected exception={exception} has occurred") raise exception APP = web.Application() APP.router.add_post("/api/messages", messages) if __name__ == "__main__": try: LOGGER.info( msg= f"Starting application at host='{CONFIG.HOST}' on port={CONFIG.PORT}" ) web.run_app(APP, host=CONFIG.HOST, port=CONFIG.PORT) except Exception as error: LOGGER.error( msg=f"Application initialization failed with an error={error}") raise error