def main():
    argparser = ArgumentParser()
    datasource = argparser.add_mutually_exclusive_group()
    datasource.add_argument(
        '--s3name',
        '-S3',
        type=str,
        choices=['adult', 'census', 'credit', 'alarm', 'insurance'],
        help='Name of the dataset to run on')
    datasource.add_argument('--datapath',
                            '-D',
                            type=str,
                            help='Relative path to cwd of a local data file')
    argparser.add_argument('--runconfig',
                           '-RC',
                           default='runconfig_mia.json',
                           type=str,
                           help='Path relative to cwd of runconfig file')
    argparser.add_argument(
        '--outdir',
        '-O',
        default='outputs/test',
        type=str,
        help='Path relative to cwd for storing output files')
    args = argparser.parse_args()

    seed(SEED)
    # Load runconfig
    with open(path.join(cwd, args.runconfig)) as f:
        runconfig = json.load(f)
    print('Runconfig:')
    print(runconfig)

    # Load data
    if args.s3name is not None:
        rawPop, metadata = load_s3_data_as_df(args.s3name)
        dname = args.s3name
    else:
        rawPop, metadata = load_local_data_as_df(path.join(cwd, args.datapath))
        dname = args.datapath.split('/')[-1]

    print(f'Loaded data {dname}:')
    print(rawPop.info())

    # Make sure outdir exists
    if not path.isdir(args.outdir):
        mkdir(args.outdir)

    ########################
    #### GAME INPUTS #######
    ########################
    # Train test split
    rawTrain = rawPop.query(runconfig['dataFilter']['train'])
    rawTest = rawPop.query(runconfig['dataFilter']['test'])

    # Pick targets
    targetIDs = choice(list(rawTrain.index),
                       size=runconfig['nTargets'],
                       replace=False).tolist()

    # If specified: Add specific target records
    if runconfig['Targets'] is not None:
        targetIDs.extend(runconfig['Targets'])

    targets = rawTrain.loc[targetIDs, :]

    # Drop targets from population
    rawTrainWoTargets = rawTrain.drop(targetIDs)

    # Get test target records
    testRecordIDs = choice(list(rawTest.index),
                           size=runconfig['nTargets'],
                           replace=False).tolist()

    # If specified: Add specific target records
    if runconfig['TestRecords'] is not None:
        testRecordIDs.extend(runconfig['TestRecords'])

    testRecords = rawTest.loc[testRecordIDs, :]

    # List of candidate generative models to evaluate
    gmList = []
    if 'generativeModels' in runconfig.keys():
        for gm, paramsList in runconfig['generativeModels'].items():
            if gm == 'IndependentHistogram':
                for params in paramsList:
                    gmList.append(IndependentHistogram(metadata, *params))
            elif gm == 'BayesianNet':
                for params in paramsList:
                    gmList.append(BayesianNet(metadata, *params))
            elif gm == 'PrivBayes':
                for params in paramsList:
                    gmList.append(PrivBayes(metadata, *params))
            elif gm == 'CTGAN':
                for params in paramsList:
                    gmList.append(CTGAN(metadata, *params))
            elif gm == 'PATEGAN':
                for params in paramsList:
                    gmList.append(PATEGAN(metadata, *params))
            else:
                raise ValueError(f'Unknown GM {gm}')

    # List of candidate sanitisation techniques to evaluate
    sanList = []
    if 'sanitisationTechniques' in runconfig.keys():
        for name, paramsList in runconfig['sanitisationTechniques'].items():
            if name == 'SanitiserNHS':
                for params in paramsList:
                    sanList.append(SanitiserNHS(metadata, *params))
            else:
                raise ValueError(f'Unknown sanitisation technique {name}')

    utilityTasks = []
    for taskName, paramsList in runconfig['utilityTasks'].items():
        if taskName == 'RandForestClass':
            for params in paramsList:
                utilityTasks.append(RandForestClassTask(metadata, *params))
        elif taskName == 'LogRegClass':
            for params in paramsList:
                utilityTasks.append(LogRegClassTask(metadata, *params))
        elif taskName == 'LinReg':
            for params in paramsList:
                utilityTasks.append(LinRegTask(metadata, *params))

    ##################################
    ######### EVALUATION #############
    ##################################
    resultsTargetUtility = {
        ut.__name__: {gm.__name__: {}
                      for gm in gmList + sanList}
        for ut in utilityTasks
    }
    resultsAggUtility = {
        ut.__name__: {
            gm.__name__: {
                'TargetID': [],
                'Accuracy': []
            }
            for gm in gmList + sanList
        }
        for ut in utilityTasks
    }

    # Add entry for raw
    for ut in utilityTasks:
        resultsTargetUtility[ut.__name__]['Raw'] = {}
        resultsAggUtility[ut.__name__]['Raw'] = {
            'TargetID': [],
            'Accuracy': []
        }

    print('\n---- Start the game ----')
    for nr in range(runconfig['nIter']):
        print(f'\n--- Game iteration {nr + 1} ---')
        # Draw a raw dataset
        rIdx = choice(list(rawTrainWoTargets.index),
                      size=runconfig['sizeRawT'],
                      replace=False).tolist()
        rawTout = rawTrain.loc[rIdx]

        LOGGER.info('Start: Utility evaluation on Raw...')
        # Get utility from raw without targets
        for ut in utilityTasks:
            resultsTargetUtility[ut.__name__]['Raw'][nr] = {}

            predErrorTargets = []
            predErrorAggr = []
            for _ in range(runconfig['nSynT']):
                ut.train(rawTout)
                predErrorTargets.append(ut.evaluate(testRecords))
                predErrorAggr.append(ut.evaluate(rawTest))

            resultsTargetUtility[ut.__name__]['Raw'][nr]['OUT'] = {
                'TestRecordID': testRecordIDs,
                'Accuracy': list(mean(predErrorTargets, axis=0))
            }

            resultsAggUtility[ut.__name__]['Raw']['TargetID'].append('OUT')
            resultsAggUtility[ut.__name__]['Raw']['Accuracy'].append(
                mean(predErrorAggr))

        # Get utility from raw with each target
        for tid in targetIDs:
            target = targets.loc[[tid]]
            rawIn = rawTout.append(target)

            for ut in utilityTasks:
                predErrorTargets = []
                predErrorAggr = []
                for _ in range(runconfig['nSynT']):
                    ut.train(rawIn)
                    predErrorTargets.append(ut.evaluate(testRecords))
                    predErrorAggr.append(ut.evaluate(rawTest))

                resultsTargetUtility[ut.__name__]['Raw'][nr][tid] = {
                    'TestRecordID': testRecordIDs,
                    'Accuracy': list(mean(predErrorTargets, axis=0))
                }

                resultsAggUtility[ut.__name__]['Raw']['TargetID'].append(tid)
                resultsAggUtility[ut.__name__]['Raw']['Accuracy'].append(
                    mean(predErrorAggr))

        LOGGER.info('Finished: Utility evaluation on Raw.')

        for GenModel in gmList:
            LOGGER.info(f'Start: Evaluation for model {GenModel.__name__}...')
            GenModel.fit(rawTout)
            synTwithoutTarget = [
                GenModel.generate_samples(runconfig['sizeSynT'])
                for _ in range(runconfig['nSynT'])
            ]

            # Util evaluation for synthetic without all targets
            for ut in utilityTasks:
                resultsTargetUtility[ut.__name__][GenModel.__name__][nr] = {}

                predErrorTargets = []
                predErrorAggr = []
                for syn in synTwithoutTarget:
                    ut.train(syn)
                    predErrorTargets.append(ut.evaluate(testRecords))
                    predErrorAggr.append(ut.evaluate(rawTest))

                resultsTargetUtility[ut.__name__][
                    GenModel.__name__][nr]['OUT'] = {
                        'TestRecordID': testRecordIDs,
                        'Accuracy': list(mean(predErrorTargets, axis=0))
                    }

                resultsAggUtility[ut.__name__][
                    GenModel.__name__]['TargetID'].append('OUT')
                resultsAggUtility[ut.__name__][
                    GenModel.__name__]['Accuracy'].append(mean(predErrorAggr))

            for tid in targetIDs:
                LOGGER.info(f'Target: {tid}')
                target = targets.loc[[tid]]

                rawTin = rawTout.append(target)
                GenModel.fit(rawTin)
                synTwithTarget = [
                    GenModel.generate_samples(runconfig['sizeSynT'])
                    for _ in range(runconfig['nSynT'])
                ]

                # Util evaluation for synthetic with this target
                for ut in utilityTasks:
                    predErrorTargets = []
                    predErrorAggr = []
                    for syn in synTwithTarget:
                        ut.train(syn)
                        predErrorTargets.append(ut.evaluate(testRecords))
                        predErrorAggr.append(ut.evaluate(rawTest))

                    resultsTargetUtility[ut.__name__][
                        GenModel.__name__][nr][tid] = {
                            'TestRecordID': testRecordIDs,
                            'Accuracy': list(mean(predErrorTargets, axis=0))
                        }

                    resultsAggUtility[ut.__name__][
                        GenModel.__name__]['TargetID'].append(tid)
                    resultsAggUtility[ut.__name__][
                        GenModel.__name__]['Accuracy'].append(
                            mean(predErrorAggr))

            del synTwithoutTarget, synTwithTarget

            LOGGER.info(f'Finished: Evaluation for model {GenModel.__name__}.')

        for San in sanList:
            LOGGER.info(f'Start: Evaluation for sanitiser {San.__name__}...')
            sanOut = San.sanitise(rawTout)

            for ut in utilityTasks:
                resultsTargetUtility[ut.__name__][San.__name__][nr] = {}

                predErrorTargets = []
                predErrorAggr = []
                for _ in range(runconfig['nSynT']):
                    ut.train(sanOut)
                    predErrorTargets.append(ut.evaluate(testRecords))
                    predErrorAggr.append(ut.evaluate(rawTest))

                resultsTargetUtility[ut.__name__][San.__name__][nr]['OUT'] = {
                    'TestRecordID': testRecordIDs,
                    'Accuracy': list(mean(predErrorTargets, axis=0))
                }

                resultsAggUtility[ut.__name__][
                    San.__name__]['TargetID'].append('OUT')
                resultsAggUtility[ut.__name__][
                    San.__name__]['Accuracy'].append(mean(predErrorAggr))

            for tid in targetIDs:
                LOGGER.info(f'Target: {tid}')
                target = targets.loc[[tid]]

                rawTin = rawTout.append(target)
                sanIn = San.sanitise(rawTin)

                for ut in utilityTasks:
                    predErrorTargets = []
                    predErrorAggr = []
                    for _ in range(runconfig['nSynT']):
                        ut.train(sanIn)
                        predErrorTargets.append(ut.evaluate(testRecords))
                        predErrorAggr.append(ut.evaluate(rawTest))

                    resultsTargetUtility[ut.__name__][
                        San.__name__][nr][tid] = {
                            'TestRecordID': testRecordIDs,
                            'Accuracy': list(mean(predErrorTargets, axis=0))
                        }

                    resultsAggUtility[ut.__name__][
                        San.__name__]['TargetID'].append(tid)
                    resultsAggUtility[ut.__name__][
                        San.__name__]['Accuracy'].append(mean(predErrorAggr))

            del sanOut, sanIn

            LOGGER.info(f'Finished: Evaluation for model {San.__name__}.')

    outfile = f"ResultsUtilTargets_{dname}"
    LOGGER.info(
        f"Write results to {path.join(f'{args.outdir}', f'{outfile}')}")

    with open(path.join(f'{args.outdir}', f'{outfile}.json'), 'w') as f:
        json.dump(resultsTargetUtility,
                  f,
                  indent=2,
                  default=json_numpy_serialzer)

    outfile = f"ResultsUtilAgg_{dname}"
    LOGGER.info(
        f"Write results to {path.join(f'{args.outdir}', f'{outfile}')}")

    with open(path.join(f'{args.outdir}', f'{outfile}.json'), 'w') as f:
        json.dump(resultsAggUtility, f, indent=2, default=json_numpy_serialzer)
def main():
    argparser = ArgumentParser()
    datasource = argparser.add_mutually_exclusive_group()
    datasource.add_argument(
        '--s3name',
        '-S3',
        type=str,
        choices=['adult', 'census', 'credit', 'alarm', 'insurance'],
        help='Name of the dataset to run on')
    datasource.add_argument('--datapath',
                            '-D',
                            type=str,
                            help='Relative path to cwd of a local data file')
    argparser.add_argument('--runconfig',
                           '-RC',
                           default='runconfig_mia.json',
                           type=str,
                           help='Path relative to cwd of runconfig file')
    argparser.add_argument(
        '--outdir',
        '-O',
        default='tests',
        type=str,
        help='Path relative to cwd for storing output files')
    args = argparser.parse_args()

    # Load runconfig
    with open(path.join(cwd, args.runconfig)) as f:
        runconfig = json.load(f)
    print('Runconfig:')
    print(runconfig)

    # Load data
    if args.s3name is not None:
        rawPop, metadata = load_s3_data_as_df(args.s3name)
        dname = args.s3name
    else:
        rawPop, metadata = load_local_data_as_df(path.join(cwd, args.datapath))
        dname = args.datapath.split('/')[-1]

    print(f'Loaded data {dname}:')
    print(rawPop.info())

    # Make sure outdir exists
    if not path.isdir(args.outdir):
        mkdir(args.outdir)

    seed(SEED)

    ########################
    #### GAME INPUTS #######
    ########################
    # Pick targets
    targetIDs = choice(list(rawPop.index),
                       size=runconfig['nTargets'],
                       replace=False).tolist()

    # If specified: Add specific target records
    if runconfig['Targets'] is not None:
        targetIDs.extend(runconfig['Targets'])

    targets = rawPop.loc[targetIDs, :]

    # Drop targets from population
    rawPopDropTargets = rawPop.drop(targetIDs)

    # Init adversary's prior knowledge
    rawAidx = choice(list(rawPopDropTargets.index),
                     size=runconfig['sizeRawA'],
                     replace=False).tolist()
    rawA = rawPop.loc[rawAidx, :]

    # List of candidate generative models to evaluate
    gmList = []
    if 'generativeModels' in runconfig.keys():
        for gm, paramsList in runconfig['generativeModels'].items():
            if gm == 'IndependentHistogram':
                for params in paramsList:
                    gmList.append(IndependentHistogram(metadata, *params))
            elif gm == 'BayesianNet':
                for params in paramsList:
                    gmList.append(BayesianNet(metadata, *params))
            elif gm == 'PrivBayes':
                for params in paramsList:
                    gmList.append(PrivBayes(metadata, *params))
            elif gm == 'CTGAN':
                for params in paramsList:
                    gmList.append(CTGAN(metadata, *params))
            elif gm == 'PATEGAN':
                for params in paramsList:
                    gmList.append(PATEGAN(metadata, *params))
            else:
                raise ValueError(f'Unknown GM {gm}')

    # List of candidate sanitisation techniques to evaluate
    sanList = []
    if 'sanitisationTechniques' in runconfig.keys():
        for name, paramsList in runconfig['sanitisationTechniques'].items():
            if name == 'SanitiserNHS':
                for params in paramsList:
                    sanList.append(SanitiserNHS(metadata, *params))
            else:
                raise ValueError(f'Unknown sanitisation technique {name}')

    ###################################
    #### ATTACK TRAINING #############
    ##################################
    print('\n---- Attack training ----')
    attacks = {}

    for tid in targetIDs:
        print(f'\n--- Adversary picks target {tid} ---')
        target = targets.loc[[tid]]
        attacks[tid] = {}

        for San in sanList:
            LOGGER.info(f'Start: Attack training for {San.__name__}...')

            attacks[tid][San.__name__] = {}

            # Generate example datasets for training attack classifier
            sanA, labelsA = generate_mia_anon_data(
                San, target, rawA, runconfig['sizeRawT'],
                runconfig['nShadows'] * runconfig['nSynA'])

            # Train attack on shadow data
            for Feature in [
                    NaiveFeatureSet(DataFrame),
                    HistogramFeatureSet(DataFrame,
                                        metadata,
                                        nbins=San.histogram_size,
                                        quids=San.quids),
                    CorrelationsFeatureSet(DataFrame,
                                           metadata,
                                           quids=San.quids),
                    EnsembleFeatureSet(DataFrame,
                                       metadata,
                                       nbins=San.histogram_size,
                                       quasi_id_cols=San.quids)
            ]:

                Attack = MIAttackClassifierRandomForest(metadata=metadata,
                                                        FeatureSet=Feature,
                                                        quids=San.quids)
                Attack.train(sanA, labelsA)
                attacks[tid][San.__name__][f'{Feature.__name__}'] = Attack

            # Clean up
            del sanA, labelsA

            LOGGER.info(f'Finished: Attack training.')

        for GenModel in gmList:
            LOGGER.info(f'Start: Attack training for {GenModel.__name__}...')

            attacks[tid][GenModel.__name__] = {}

            # Generate shadow model data for training attacks on this target
            synA, labelsSA = generate_mia_shadow_data(GenModel, target, rawA,
                                                      runconfig['sizeRawT'],
                                                      runconfig['sizeSynT'],
                                                      runconfig['nShadows'],
                                                      runconfig['nSynA'])

            # Train attack on shadow data
            for Feature in [
                    NaiveFeatureSet(GenModel.datatype),
                    HistogramFeatureSet(GenModel.datatype, metadata),
                    CorrelationsFeatureSet(GenModel.datatype, metadata)
            ]:
                Attack = MIAttackClassifierRandomForest(metadata, Feature)
                Attack.train(synA, labelsSA)
                attacks[tid][GenModel.__name__][f'{Feature.__name__}'] = Attack

            # Clean up
            del synA, labelsSA

            LOGGER.info(f'Finished: Attack training.')

    ##################################
    ######### EVALUATION #############
    ##################################
    resultsTargetPrivacy = {
        tid: {gm.__name__: {}
              for gm in gmList + sanList}
        for tid in targetIDs
    }

    print('\n---- Start the game ----')
    for nr in range(runconfig['nIter']):
        print(f'\n--- Game iteration {nr + 1} ---')
        # Draw a raw dataset
        rIdx = choice(list(rawPopDropTargets.index),
                      size=runconfig['sizeRawT'],
                      replace=False).tolist()
        rawTout = rawPopDropTargets.loc[rIdx]

        for GenModel in gmList:
            LOGGER.info(f'Start: Evaluation for model {GenModel.__name__}...')
            # Train a generative model
            GenModel.fit(rawTout)
            synTwithoutTarget = [
                GenModel.generate_samples(runconfig['sizeSynT'])
                for _ in range(runconfig['nSynT'])
            ]
            synLabelsOut = [LABEL_OUT for _ in range(runconfig['nSynT'])]

            for tid in targetIDs:
                LOGGER.info(f'Target: {tid}')
                target = targets.loc[[tid]]
                resultsTargetPrivacy[tid][f'{GenModel.__name__}'][nr] = {}

                rawTin = rawTout.append(target)
                GenModel.fit(rawTin)
                synTwithTarget = [
                    GenModel.generate_samples(runconfig['sizeSynT'])
                    for _ in range(runconfig['nSynT'])
                ]
                synLabelsIn = [LABEL_IN for _ in range(runconfig['nSynT'])]

                synT = synTwithoutTarget + synTwithTarget
                synTlabels = synLabelsOut + synLabelsIn

                # Run attacks
                for feature, Attack in attacks[tid][
                        f'{GenModel.__name__}'].items():
                    # Produce a guess for each synthetic dataset
                    attackerGuesses = Attack.attack(synT)

                    resDict = {
                        'Secret': synTlabels,
                        'AttackerGuess': attackerGuesses
                    }
                    resultsTargetPrivacy[tid][f'{GenModel.__name__}'][nr][
                        feature] = resDict

            del synT, synTwithoutTarget, synTwithTarget

            LOGGER.info(f'Finished: Evaluation for model {GenModel.__name__}.')

        for San in sanList:
            LOGGER.info(f'Start: Evaluation for sanitiser {San.__name__}...')
            sanOut = San.sanitise(rawTout)

            for tid in targetIDs:
                LOGGER.info(f'Target: {tid}')
                target = targets.loc[[tid]]
                resultsTargetPrivacy[tid][San.__name__][nr] = {}

                rawTin = rawTout.append(target)
                sanIn = San.sanitise(rawTin)

                sanT = [sanOut, sanIn]
                sanTLabels = [LABEL_OUT, LABEL_IN]

                # Run attacks
                for feature, Attack in attacks[tid][San.__name__].items():
                    # Produce a guess for each synthetic dataset
                    attackerGuesses = Attack.attack(sanT,
                                                    attemptLinkage=True,
                                                    target=target)

                    resDict = {
                        'Secret': sanTLabels,
                        'AttackerGuess': attackerGuesses
                    }
                    resultsTargetPrivacy[tid][
                        San.__name__][nr][feature] = resDict

            del sanT, sanOut, sanIn

            LOGGER.info(f'Finished: Evaluation for model {San.__name__}.')

    outfile = f"ResultsMIA_{dname}"
    LOGGER.info(
        f"Write results to {path.join(f'{args.outdir}', f'{outfile}')}")

    with open(path.join(f'{args.outdir}', f'{outfile}.json'), 'w') as f:
        json.dump(resultsTargetPrivacy,
                  f,
                  indent=2,
                  default=json_numpy_serialzer)
def main():
    argparser = ArgumentParser()
    datasource = argparser.add_mutually_exclusive_group()
    datasource.add_argument(
        '--s3name',
        '-S3',
        type=str,
        choices=['adult', 'census', 'credit', 'alarm', 'insurance'],
        help='Name of the dataset to run on')
    datasource.add_argument('--datapath',
                            '-D',
                            type=str,
                            help='Relative path to cwd of a local data file')
    argparser.add_argument('--runconfig',
                           '-RC',
                           default='runconfig_mia.json',
                           type=str,
                           help='Path relative to cwd of runconfig file')
    argparser.add_argument(
        '--outdir',
        '-O',
        default='tests',
        type=str,
        help='Path relative to cwd for storing output files')
    args = argparser.parse_args()

    # Load runconfig
    with open(path.join(cwd, args.runconfig)) as f:
        runconfig = json.load(f)
    print('Runconfig:')
    print(runconfig)

    # Load data
    if args.s3name is not None:
        rawPop, metadata = load_s3_data_as_df(args.s3name)
        dname = args.s3name
    else:
        rawPop, metadata = load_local_data_as_df(path.join(cwd, args.datapath))
        dname = args.datapath.split('/')[-1]

    print(f'Loaded data {dname}:')
    print(rawPop.info())

    # Make sure outdir exists
    if not path.isdir(args.outdir):
        mkdir(args.outdir)

    seed(SEED)

    ########################
    #### GAME INPUTS #######
    ########################
    # Pick targets
    targetIDs = choice(list(rawPop.index),
                       size=runconfig['nTargets'],
                       replace=False).tolist()

    # If specified: Add specific target records
    if runconfig['Targets'] is not None:
        targetIDs.extend(runconfig['Targets'])

    targets = rawPop.loc[targetIDs, :]

    # Drop targets from population
    rawPopDropTargets = rawPop.drop(targetIDs)

    # List of candidate generative models to evaluate
    gmList = []
    if 'generativeModels' in runconfig.keys():
        for gm, paramsList in runconfig['generativeModels'].items():
            if gm == 'IndependentHistogram':
                for params in paramsList:
                    gmList.append(IndependentHistogram(metadata, *params))
            elif gm == 'BayesianNet':
                for params in paramsList:
                    gmList.append(BayesianNet(metadata, *params))
            elif gm == 'PrivBayes':
                for params in paramsList:
                    gmList.append(PrivBayes(metadata, *params))
            elif gm == 'CTGAN':
                for params in paramsList:
                    gmList.append(CTGAN(metadata, *params))
            elif gm == 'PATEGAN':
                for params in paramsList:
                    gmList.append(PATEGAN(metadata, *params))
            else:
                raise ValueError(f'Unknown GM {gm}')

    # List of candidate sanitisation techniques to evaluate
    sanList = []
    if 'sanitisationTechniques' in runconfig.keys():
        for name, paramsList in runconfig['sanitisationTechniques'].items():
            if name == 'SanitiserNHS':
                for params in paramsList:
                    sanList.append(SanitiserNHS(metadata, *params))
            else:
                raise ValueError(f'Unknown sanitisation technique {name}')

    ##################################
    ######### EVALUATION #############
    ##################################
    resultsTargetPrivacy = {
        tid: {
            sa: {gm.__name__: {}
                 for gm in gmList + sanList}
            for sa in runconfig['sensitiveAttributes']
        }
        for tid in targetIDs
    }
    # Add entry for raw
    for tid in targetIDs:
        for sa in runconfig['sensitiveAttributes']:
            resultsTargetPrivacy[tid][sa]['Raw'] = {}

    print('\n---- Start the game ----')
    for nr in range(runconfig['nIter']):
        print(f'\n--- Game iteration {nr + 1} ---')
        # Draw a raw dataset
        rIdx = choice(list(rawPopDropTargets.index),
                      size=runconfig['sizeRawT'],
                      replace=False).tolist()
        rawTout = rawPopDropTargets.loc[rIdx]

        ###############
        ## ATTACKS ####
        ###############
        attacks = {}
        for sa, atype in runconfig['sensitiveAttributes'].items():
            if atype == 'LinReg':
                attacks[sa] = LinRegAttack(sensitiveAttribute=sa,
                                           metadata=metadata)
            elif atype == 'Classification':
                attacks[sa] = RandForestAttack(sensitiveAttribute=sa,
                                               metadata=metadata)

        #### Assess advantage raw
        for sa, Attack in attacks.items():
            Attack.train(rawTout)

            for tid in targetIDs:
                target = targets.loc[[tid]]
                targetAux = target.loc[[tid], Attack.knownAttributes]
                targetSecret = target.loc[tid, Attack.sensitiveAttribute]

                guess = Attack.attack(targetAux,
                                      attemptLinkage=True,
                                      data=rawTout)
                pCorrect = Attack.get_likelihood(targetAux,
                                                 targetSecret,
                                                 attemptLinkage=True,
                                                 data=rawTout)

                resultsTargetPrivacy[tid][sa]['Raw'][nr] = {
                    'AttackerGuess': [guess],
                    'ProbCorrect': [pCorrect],
                    'TargetPresence': [LABEL_OUT]
                }

        for tid in targetIDs:
            target = targets.loc[[tid]]
            rawTin = rawTout.append(target)

            for sa, Attack in attacks.items():
                targetAux = target.loc[[tid], Attack.knownAttributes]
                targetSecret = target.loc[tid, Attack.sensitiveAttribute]

                guess = Attack.attack(targetAux,
                                      attemptLinkage=True,
                                      data=rawTin)
                pCorrect = Attack.get_likelihood(targetAux,
                                                 targetSecret,
                                                 attemptLinkage=True,
                                                 data=rawTin)

                resultsTargetPrivacy[tid][sa]['Raw'][nr][
                    'AttackerGuess'].append(guess)
                resultsTargetPrivacy[tid][sa]['Raw'][nr]['ProbCorrect'].append(
                    pCorrect)
                resultsTargetPrivacy[tid][sa]['Raw'][nr][
                    'TargetPresence'].append(LABEL_IN)

        ##### Assess advantage Syn
        for GenModel in gmList:
            LOGGER.info(f'Start: Evaluation for model {GenModel.__name__}...')
            GenModel.fit(rawTout)
            synTwithoutTarget = [
                GenModel.generate_samples(runconfig['sizeSynT'])
                for _ in range(runconfig['nSynT'])
            ]

            for sa, Attack in attacks.items():
                for tid in targetIDs:
                    resultsTargetPrivacy[tid][sa][GenModel.__name__][nr] = {
                        'AttackerGuess': [],
                        'ProbCorrect': [],
                        'TargetPresence':
                        [LABEL_OUT for _ in range(runconfig['nSynT'])]
                    }

                for syn in synTwithoutTarget:
                    Attack.train(syn)

                    for tid in targetIDs:
                        target = targets.loc[[tid]]
                        targetAux = target.loc[[tid], Attack.knownAttributes]
                        targetSecret = target.loc[tid,
                                                  Attack.sensitiveAttribute]

                        guess = Attack.attack(targetAux)
                        pCorrect = Attack.get_likelihood(
                            targetAux, targetSecret)

                        resultsTargetPrivacy[tid][sa][GenModel.__name__][nr][
                            'AttackerGuess'].append(guess)
                        resultsTargetPrivacy[tid][sa][GenModel.__name__][nr][
                            'ProbCorrect'].append(pCorrect)

            del synTwithoutTarget

            for tid in targetIDs:
                LOGGER.info(f'Target: {tid}')
                target = targets.loc[[tid]]
                rawTin = rawTout.append(target)

                GenModel.fit(rawTin)
                synTwithTarget = [
                    GenModel.generate_samples(runconfig['sizeSynT'])
                    for _ in range(runconfig['nSynT'])
                ]

                for sa, Attack in attacks.items():
                    targetAux = target.loc[[tid], Attack.knownAttributes]
                    targetSecret = target.loc[tid, Attack.sensitiveAttribute]

                    for syn in synTwithTarget:
                        Attack.train(syn)

                        guess = Attack.attack(targetAux)
                        pCorrect = Attack.get_likelihood(
                            targetAux, targetSecret)

                        resultsTargetPrivacy[tid][sa][GenModel.__name__][nr][
                            'AttackerGuess'].append(guess)
                        resultsTargetPrivacy[tid][sa][GenModel.__name__][nr][
                            'ProbCorrect'].append(pCorrect)
                        resultsTargetPrivacy[tid][sa][GenModel.__name__][nr][
                            'TargetPresence'].append(LABEL_IN)
            del synTwithTarget

        for San in sanList:
            LOGGER.info(f'Start: Evaluation for sanitiser {San.__name__}...')
            attacks = {}
            for sa, atype in runconfig['sensitiveAttributes'].items():
                if atype == 'LinReg':
                    attacks[sa] = LinRegAttack(sensitiveAttribute=sa,
                                               metadata=metadata,
                                               quids=San.quids)
                elif atype == 'Classification':
                    attacks[sa] = RandForestAttack(sensitiveAttribute=sa,
                                                   metadata=metadata,
                                                   quids=San.quids)

            sanOut = San.sanitise(rawTout)

            for sa, Attack in attacks.items():
                Attack.train(sanOut)

                for tid in targetIDs:
                    target = targets.loc[[tid]]
                    targetAux = target.loc[[tid], Attack.knownAttributes]
                    targetSecret = target.loc[tid, Attack.sensitiveAttribute]

                    guess = Attack.attack(targetAux,
                                          attemptLinkage=True,
                                          data=sanOut)
                    pCorrect = Attack.get_likelihood(targetAux,
                                                     targetSecret,
                                                     attemptLinkage=True,
                                                     data=sanOut)

                    resultsTargetPrivacy[tid][sa][San.__name__][nr] = {
                        'AttackerGuess': [guess],
                        'ProbCorrect': [pCorrect],
                        'TargetPresence': [LABEL_OUT]
                    }

            for tid in targetIDs:
                LOGGER.info(f'Target: {tid}')
                target = targets.loc[[tid]]
                rawTin = rawTout.append(target)
                sanIn = San.sanitise(rawTin)

                for sa, Attack in attacks.items():
                    targetAux = target.loc[[tid], Attack.knownAttributes]
                    targetSecret = target.loc[tid, Attack.sensitiveAttribute]

                    Attack.train(sanIn)

                    guess = Attack.attack(targetAux,
                                          attemptLinkage=True,
                                          data=sanIn)
                    pCorrect = Attack.get_likelihood(targetAux,
                                                     targetSecret,
                                                     attemptLinkage=True,
                                                     data=sanIn)

                    resultsTargetPrivacy[tid][sa][
                        San.__name__][nr]['AttackerGuess'].append(guess)
                    resultsTargetPrivacy[tid][sa][
                        San.__name__][nr]['ProbCorrect'].append(pCorrect)
                    resultsTargetPrivacy[tid][sa][
                        San.__name__][nr]['TargetPresence'].append(LABEL_IN)

    outfile = f"ResultsMLEAI_{dname}"
    LOGGER.info(
        f"Write results to {path.join(f'{args.outdir}', f'{outfile}')}")

    with open(path.join(f'{args.outdir}', f'{outfile}.json'), 'w') as f:
        json.dump(resultsTargetPrivacy,
                  f,
                  indent=2,
                  default=json_numpy_serialzer)
Example #4
0
        return Response(status=415)

    activity = Activity().deserialize(body)
    auth_header = req.headers[
        AUTHORIZATION_HEADER] if AUTHORIZATION_HEADER in req.headers else ""

    try:
        await ERROR_ADAPTER.process_activity(activity, auth_header,
                                             BOT.on_turn)
        return Response(status=201)
    except Exception as exception:
        LOGGER.error(msg=f"An unexpected exception={exception} has occurred")
        raise exception


APP = web.Application()
APP.router.add_post("/api/messages", messages)

if __name__ == "__main__":
    try:
        LOGGER.info(
            msg=
            f"Starting application at host='{CONFIG.HOST}' on port={CONFIG.PORT}"
        )

        web.run_app(APP, host=CONFIG.HOST, port=CONFIG.PORT)
    except Exception as error:
        LOGGER.error(
            msg=f"Application initialization failed with an error={error}")
        raise error