def get_database_scheme(params, verbose):
    """
    Description:
    Used for differential attacks
    Output:
    * dict 'attack_cols' looks like {'gender': ['Female', 'Male']}
    All values length is 2 (differential attackable columns)
    * dict 'record' looks like {'duration':12, 'gender':'Male'}
    """
    # Check on parameters
    print("########## Parameters passed (Scheme) ##########")
    #pp.pprint(params)
    # Attack setup
    attack = gdaAttack(params)
    attack.unsetVerbose()
    print("\nInfo >>> Attack Class created\n")
    # Information gathering
    table = attack.getAttackTableName()
    rawColNames = attack.getColNames(dbType="rawDb")
    anonColNames = attack.getColNames(dbType="anonDb")
    print(f"\nInfo >>> Working on table : {table} ...")
    cols = []  # to be used later for an SQL query
    attack_cols = dict(
    )  # theoritically attackable cols (differential) to test
    for col in rawColNames:  # to be replaced by anonColNames ...
        publicValues = attack.getPublicColValues(col, table)
        values = []
        if publicValues:
            cols.append(col)
        for t in publicValues:
            values.append(t[0])
        #print(f"## {col} ({len(values)}) >> {values}")
        # automatic fill in
        if len(values) == 2:
            print(f"## {col} ({len(values)}) >> {values}")
            attack_cols[col] = values
        #print(f"{col} >> {publicValues}")
    print("\nInfo >>> Selecting a random record ...")
    query = {}
    query['db'] = "anonDb"
    sql = sqlQueryGen(table, cols, None, None, False)
    query['sql'] = sql
    print(f"\nInfo >>> Asking SQL query : '{sql}' ...")
    attack.askAttack(query)
    #print("debug 01")
    reply = attack.getAttack()
    #print("debug 02 !>", len(reply))
    answer = random.choice(reply['answer'])
    if len(cols) != len(answer):
        print(
            f"\nError >>> Lengths of columns ({len(cols)}) and record ({len(answer)}) not matching"
        )
    print(f"\nInfo >>> Chosen record ...")
    record = dict()
    for i in range(len(cols)):
        record[cols[i]] = answer[i]
        print(f"{cols[i]} = {answer[i]}")
    return (attack_cols, record)
Ejemplo n.º 2
0
def ramyKnowledge01(params):
    # Check on parameters
    print("########## Parameters passed ##########")
    pp.pprint(params)
    # Attack setup
    attack = gdaAttack(params)
    print("\nInfo >>> Attack Class created\n")
    table = attack.getAttackTableName()
    print("### Table ###")
    pp.pprint(table)
    rawColNames = attack.getColNames(dbType="rawDb")
    print("### Raw Cols ###")
    pp.pprint(rawColNames)
    anonColNames = attack.getColNames(dbType="anonDb")
    print("### Anon Cols ###")
    pp.pprint(anonColNames)
    # set attack specific parameters
    uidCol = attack.getUidColName()
    searchedCol = random.choice(rawColNames)
    publicValues = attack.getPublicColValues(searchedCol, table)
    while searchedCol == uidCol and len(publicValues) < 2:
        searchedCol = random.choice(rawColNames)
        publicValues = attack.getPublicColValues(searchedCol, table)
    print(f"\nInfo >>> Randomly selected column : {searchedCol}\n")
    print("### Column Values ###")
    pp.pprint(publicValues)
    searchedItem = random.choice(publicValues)
    searchedValue = searchedItem[0]
    condition = f"{searchedCol} = '{searchedValue}'"
    # Define SQL queries
    sql = "SELECT "
    sql += comma_ize(rawColNames, lastComma=False)
    sql += " FROM " + table
    sql += " WHERE " + condition
    query = {}
    query['sql'] = sql
    query['db'] = "rawDb"
    # ask for knowledge
    print("\nInfo >>> Asking for knowledge ...\n")
    attack.askKnowledge(query)
    knowledge = attack.getKnowledge()
    pp.pprint(knowledge)
    # Compute and display score
    result = attack.getResults()
    gda_score = gdaScores(result)
    print("\nInfo >>> Score Class created\n")
    print("########## Knowledge learned ##########")
    score = gda_score.getScores()
    pp.pprint(score)
    # Abschliessung
    attack.cleanUp()
    final_result = finishGdaAttack(params, score)
def launch_differential_attack(params, verbose, tab1, tab2, s, bins):
    # Check on parameters
    print("########## Parameters passed ##########")
    pp.pprint(params)
    # Attack setup
    attack = gdaAttack(params)
    attack.unsetVerbose()
    print("\nInfo >>> Attack Class created\n")
    # Information gathering
    table = attack.getAttackTableName()
    rawColNames = attack.getColNames(dbType="rawDb")
    anonColNames = attack.getColNames(dbType="anonDb")
    print(f"\nInfo >>> Working on table : {table} ...")
    # Preparing attack parameters
    a_cols_star = tab1
    x_a_star = tab2
    #s = "frequency" # to be eliminated
    # Executing attack
    attack_result = full_differential_attack(attack, a_cols_star, x_a_star, s,
                                             bins)
    if attack_result == None:
        print("Attribute Non Attackable (launch level)")
        return None
    '''if attack_result:
        attack_result = 1
    else:
        attack_result = 0'''
    # Make the guess according to attack result
    print("\nInfo >>> Making claims ...\n")
    spec = {}
    guess = []
    for i in range(len(a_cols_star)):
        guess.append({'col': a_cols_star[i], 'val': x_a_star[i]})
    guess.append({'col': s, 'val': attack_result})
    spec['guess'] = guess
    attack.askClaim(spec, claim=True)
    while True:
        claim = attack.getClaim()
        if claim["stillToCome"] == 0:
            break
    # Compute and display score
    result = attack.getResults()
    gda_score = gdaScores(result)
    print("\nInfo >>> Score Class created\n")
    print("########## Attack score ##########")
    score = gda_score.getScores()
    if verbose:
        pp.pprint(score)
    # Abschliessung
    attack.cleanUp()
    final_result = finishGdaAttack(params, score)
    return None
Ejemplo n.º 4
0
    def distinctUidUtilityMeasureSingleAndDoubleColumn(self, param):
        ''' Measures coverage and accuracy.

            `param` is a single data structure from the list of structures
            returned by setupGdaAttackParameters().  The elements
            of param as follows: <br/>
            `name`: The basis for the name of the output json file. Should
            be unique among all measures. <br/>
            `rawDb`: The raw (non-anonymized) database info. <br/>
            `anonDb`: The anonymized database info. <br/>
            `table`: The name of the table in the database. <br/>
            `uid`: The name of the uid column. <br/>
            `measureParam`: The thing that gets measured. Only current value
            is "uid", which indicates that counts of distinct uids should
            be measured. <br/>
            `samples`: States the number of samples over which each utility
            group should be measured. <br/>
            `ranges`: A list of ranges. Each range specifies the lower and
            upper bound on the number of "things" that an answer should
            contain as specified by `measureParam`. <br/>
        '''
        print("Enter distinctUidUtilityMeasureSingleAndDoubleColumn")
        attack = gdaAttack(param)
        table = attack.getAttackTableName()
        uid = attack.getUidColName()
        rawColNames = attack.getColNames(dbType='rawDb')
        anonColNames = attack.getColNames(dbType='anonDb')
        if rawColNames is None or anonColNames is None:
            # This can happen if the anon table doesn't exist
            return None
        # Get table characteristics. This tells us if a given column is
        # enumerative or continuous.
        tabChar = attack.getTableCharacteristics()
        if self._p: pp.pprint(tabChar)
        coverageScores = self._measureCoverage(param, attack, tabChar, table,
                                               rawColNames, anonColNames)
        allowedColumns = self._getAllowedColumns(coverageScores)
        pp.pprint(coverageScores)
        print("Allowed Columns:")
        pp.pprint(allowedColumns)

        accuracyScores = self._measureAccuracy(param, attack, tabChar, table,
                                               uid, allowedColumns)
        self._ar['coverage'] = coverageScores
        self._ar['accuracy'] = accuracyScores
        self._ar['tableStats'] = tabChar
        attackResult = attack.getResults()
        self._ar['operational'] = attackResult['operational']
        attack.cleanUp()
        return "Done"
def launch_greedy_cloning_attack(params, verbose):
    # Check on parameters
    print("########## Parameters passed ##########")
    pp.pprint(params)
    # Attack setup
    attack = gdaAttack(params)
    attack.unsetVerbose()
    print("\nInfo >>> Attack Class created\n")
    # Information gathering
    table = attack.getAttackTableName()
    rawColNames = attack.getColNames(dbType="rawDb")
    anonColNames = attack.getColNames(dbType="anonDb")
    print(f"\nInfo >>> Working on table : {table} ...")
    # Preparing attack parameters
    a_cols_star = []
    x_a_star = []
    delta = []
    s = "attribute"
    v = "value"
    # Executing attack
    attack_result = full_cloning_attack(attack, a_cols_star, x_a_star, delta,
                                        s, v)
    if attack_result == None:
        print("Attribute Non Attackable")
        return None
    # Make the guess according to attack result
    print("\nInfo >>> Making claims ...\n")
    spec = {}
    guess = []
    for i in range(len(a_cols_star)):
        guess.append({'col': a_cols_star[i], 'val': x_a_star[i]})
    guess.append({'col': s, 'val': v})
    spec['guess'] = guess
    attack.askClaim(spec, claim=attack_result)
    while True:
        claim = attack.getClaim()
        if claim["stillToCome"] == 0:
            break
    # Compute and display score
    result = attack.getResults()
    gda_score = gdaScores(result)
    print("\nInfo >>> Score Class created\n")
    print("########## Attack score ##########")
    score = gda_score.getScores()
    if verbose:
        pp.pprint(score)
    # Abschliessung
    attack.cleanUp()
    final_result = finishGdaAttack(params, score)
    return None
Ejemplo n.º 6
0
def ramyAttack01(params):
    # Check on parameters
    print("########## Parameters passed ##########")
    pp.pprint(params)
    # Attack setup
    attack = gdaAttack(params)
    print("\nInfo >>> Attack Class created\n")
    table = attack.getAttackTableName()
    print("### Table ###")
    pp.pprint(table)
    rawColNames = attack.getColNames(dbType="rawDb")
    print("### Raw Cols ###")
    pp.pprint(rawColNames)
    anonColNames = attack.getColNames(dbType="anonDb")
    print("### Anon Cols ###")
    pp.pprint(anonColNames)
    # set attack specific parameters
    searchedCol = random.choice(rawColNames)
    publicValues = attack.getPublicColValues("gender", table)
    print(f"\nInfo >>> Randomly selected column : {searchedCol}\n")
    print("### Column Values ###")
    pp.pprint(publicValues)
    searchedValue = ""
    condition = "gender = 'Male'"
    # Define SQL queries
    sql = "SELECT "
    sql += comma_ize(rawColNames, lastComma=False)
    sql += " FROM " + table
    sql += " WHERE " + condition
    query = {}
    query['sql'] = sql
    query['db'] = "rawDb"
    # executing attack
    print("\nInfo >>> Launching attack ...\n")
    attack.askAttack(query)
    reply = attack.getAttack()
    pp.pprint(reply)
    # Compute and display score
    result = attack.getResults()
    gda_score = gdaScores(result)
    print("\nInfo >>> Score Class created\n")
    print("########## Attack reply ##########")
    score = gda_score.getScores()
    pp.pprint(score)
    # Abschliessung
    attack.cleanUp()
    final_result = finishGdaAttack(params, score)
Ejemplo n.º 7
0
def diffix_noise(params, verbose):
    # Check on parameters
    print("########## Parameters passed ##########")
    pp.pprint(params)
    # Attack setup
    attack = gdaAttack(params)
    attack.unsetVerbose()
    print("\nInfo >>> Attack Class created\n")
    # Back to target table ...
    table = attack.getAttackTableName()
    rawColNames = attack.getColNames(dbType="rawDb")
    anonColNames = attack.getColNames(dbType="anonDb")
    # Query
    sql = f"SELECT avg(duration), count(*) FROM {table} WHERE gender='Male'"
    query = {}
    query['db'] = "rawDb"
    query['sql'] = sql
    print("########## Query Info ##########")
    print(f"Table : {table} > {query['db']}")
    print(f"Query : {sql}")
    # Attack
    print("\nInfo >>> Launching attack ...")
    attack.askExplore(query)
    while True:
        reply = attack.getExplore()
        print(f"... acquiring knowledge > {reply['stillToCome']} yet to come")
        if reply["stillToCome"] == 0:
            break
    print("\n########## Query Result ##########")
    if "answer" in reply.keys():
        print(f"Result : {reply['answer']}")
    else:
        print("Error >>> Reply ...")
        pp.pprint(reply)
    # Compute and display score
    result = attack.getResults()
    gda_score = gdaScores(result)
    print("\nInfo >>> Score Class created\n")
    #print("########## Attack score ##########")
    score = gda_score.getScores()
    #pp.pprint(score)
    # Abschliessung
    attack.cleanUp()
    final_result = finishGdaAttack(params, score)
    return None
Ejemplo n.º 8
0
def distance_attack(params, verbose):
    # Check on parameters
    print("########## Parameters passed ##########")
    pp.pprint(params)
    # Attack setup
    attack = gdaAttack(params)
    attack.unsetVerbose()
    print("\nInfo >>> Attack Class created\n")
    # Exploring table schema
    table = attack.getAttackTableName()
    print(f"\nInfo >>> Working on table : {table} ...")
    rawColNames = attack.getColNames(dbType="rawDb")
    anonColNames = attack.getColNames(dbType="anonDb")
    uidCol = attack.getUidColName()
    raw_schema = dict()
    for col in rawColNames:
        publicValues = attack.getPublicColValues(col, table)
        #print("debug: ", publicValues)
        if publicValues:
            values = []
            for t in publicValues:
                values.append(t[0])
            #print("debug1 > values: ", values)
            raw_schema[col] = values
    # Preparing the attack
    print("\nInfo >>> Preparing the attack columns ...\n")
    cols = []  # columns to be attacked
    queries = []  # list of queries for knowledge acquiring
    for col in anonColNames:
        if col == uidCol:
            continue
        publicValues = attack.getPublicColValues(col, table)
        #print(f"debug: {col} > {publicValues}")
        if publicValues:
            values = []
            for t in publicValues:
                values.append(t[0])
            #print(f"debug2: {col} > {values}")
        if len(values) < 2:
            continue
        ### temporary filter
        #if type(values[0])==str:
        #    continue
        ### end filter
        cols.append(col)
    print("\nInfo >>> Attack columns selected ...")
    print(f"... {cols}\n")
    # Get a list all anonymized users' data
    print("\nInfo >>> Retrieving users' useful data ...")
    raw_data = {}  # look like {'uid': [val1, val2, ..., valn], ...}
    anon_data = {}  # look like {'uid': [val1, val2, ..., valn], ...}
    cols2 = cols[:]
    cols2.insert(0, uidCol)
    query = {}
    sql = sqlQueryGen(table, cols2, None, None)
    print(f"... SQL query : {sql}")
    query['sql'] = sql
    # fill in raw_data
    print("... users' raw data")
    query['db'] = "rawDb"
    attack.askExplore(query)
    while True:
        reply = attack.getExplore()
        if reply["stillToCome"] == 0:
            break
    if "answer" in reply.keys():
        for a in reply['answer']:
            raw_data[a[0]] = a[1:]
    # fill in anon_data
    print("... users' anonymized data\n")
    query['db'] = "anonDb"
    attack.askExplore(query)
    while True:
        reply = attack.getExplore()
        if reply["stillToCome"] == 0:
            break
    if "answer" in reply.keys():
        for a in reply['answer']:
            anon_data[a[0]] = a[1:]
    print("Info >>> Users data retrieved ...")
    print(
        f"... {len(raw_data.keys())} plain users and {len(anon_data.keys())} anonymized users\n"
    )
    if len(anon_data.keys()) == 0:
        print("\nError >>> No anonymized data to work on :(")
        return None
    #print("debug 01 > ", raw_data)
    i = 0
    for it in raw_data.items():
        #print("debug 01 > ", it)
        i += 1
        if i >= 2:
            break
    #print("debug 02 > ", anon_data)
    # Determine practically attackable columns
    att = [True] * len(
        cols)  # list of whether the column is to consider or not
    uid_att = True
    i = 0
    for it in anon_data.items():
        samp = (it[0], list(it[1]))
        i += 1
        if i >= 1:
            break
    #print("debug samp > ", samp)
    if samp[0] == None:
        uid_att = False
    #print(f"debug {samp[0]} > {uid_att}")
    #print(f"debug {type(samp[1])} > {samp[1]}")
    for i in range(len(samp[1])):
        if samp[1][i] == None:
            att[i] = False
    att = (uid_att, att)
    #print("debug att > ", att)
    # De-identification phase
    print("\nInfo >>> De-identifying users ...\n")
    matches = dict()  # record of identified users ({"fake_id": "real_id"})
    repulses = dict()  # record of identified users ({"fake_id": "real_id"})
    for anon_id in anon_data.keys():
        '''min = 10000
        for raw_id in raw_data.keys():
            d = compute_global_distance(raw_data[raw_id], anon_data[anon_id])
            if d < min:
                matches[anon_id] = raw_id
                min = d'''
        #for i in range(len(raw_data.keys())):
        #    raw_id = raw_data.keys()[i]
        first = True
        for raw_id in raw_data.keys():
            d = compute_global_distance(raw_data[raw_id], anon_data[anon_id],
                                        att)
            if d == None:
                continue
            #print("debug d > ", d)
            if first:
                matches[anon_id] = raw_id
                repulses[anon_id] = raw_id
                min = d
                max = d
                first = False
            elif d < min:
                matches[anon_id] = raw_id
                min = d
            elif d > max:
                repulses[anon_id] = raw_id
                max = d
    print("... de-identification finished")
    if not (matches.keys() or repulses.keys()):
        print("\nError >>> No user was de-identified\n")
        return None
    print(f"... {len(matches.keys())} matches found :)")
    #print("... ", matches)
    # Launching the attack
    print("\nInfo >>> Launching the attack ...\n")
    guess_all = []  # list of all guesses to make as claim=True
    guess_all_false = []  # list of all guesses to make as claim=False
    for m in matches.items():
        sg = []  # one single guess
        fid = m[0]  # anonymized uid
        rid = m[1]  # real uid
        #sg = [{'col': uidCol, 'val': fid}]
        for i in range(len(cols)):
            guessed_val = raw_data[rid][i]
            if guessed_val == None:
                continue
            sg.append({'col': cols[i], 'val': guessed_val})
        guess_all.append(sg)
    for m in repulses.items():
        fid = m[0]  # anonymized uid
        rid = m[1]  # real uid
        sg = [{'col': uidCol, 'val': fid}]
        for i in range(len(cols)):
            guessed_val = raw_data[rid][i]
            if guessed_val == None:
                continue
            sg.append({'col': cols[i], 'val': guessed_val})
        guess_all_false.append(sg)
    #print('debug > guess_all[0] ', guess_all[0])
    #print('debug > guess_all_false[0] ', guess_all_false[0])
    '''for id in matches.keys():
        target = f"{uidCol}={id}"
        sql = sqlQueryGen(table, cols, target, None)
        queries.append(sql)
        query['sql'] = sql
        attack.askAttack(query)
        replies = []
        while True:
            reply = attack.getAttack()
            #if 'error' in reply.keys():
            #    continue
            if reply["stillToCome"]==0:
                break
        replies.extend(reply['answer'])
        if len(replies)>1:
            continue # to be better treated later
        # deduce and format the guess from the reply
        #print("debug > ", reply)
        if reply['answer']!=None:
            sg = format_guess(cols, reply['answer']) # single guess
            guess_all.append(sg)'''
    # Make some guesses
    print("\nInfo >>> Making claims ...\n")
    spec = {}
    nb_guess = 0
    for g in guess_all:
        spec['guess'] = g
        try:
            attack.askClaim(spec, claim=True)
        except:
            continue
    while True:
        claim = attack.getClaim()
        if claim["stillToCome"] == 0:
            break
    # Compute and display score
    result = attack.getResults()
    gda_score = gdaScores(result)
    print("\nInfo >>> Score Class created\n")
    print("########## Attack score ##########")
    score = gda_score.getScores()
    if verbose:
        pp.pprint(score)
    # Abschliessung
    attack.cleanUp()
    final_result = finishGdaAttack(params, score)
    return None
def ramyAttack03(params, verbose):
    # Check on parameters
    print("########## Parameters passed ##########")
    pp.pprint(params)
    # Attack setup
    attack = gdaAttack(params)
    attack.unsetVerbose()
    print("\nInfo >>> Attack Class created\n")
    # Exploring DB schema
    tables = attack.getTableNames()
    print("debug1", tables)
    db_model = {}  # dict of columns for each table
    for table in tables:
        print(f"\nInfo >>> Working on table : {table} ...")
        #print("debug2")
        #rawColNames = attack.getColNames(dbType="rawDb", tableName=table)
        #print("debug3")
        anonColNames = []
        try:
            anonColNames = attack.getColNames(dbType="anonDb", tableName=table)
            db_model[table] = anonColNames
        except:
            print(f"{table} doesn't exist .. Skip to next !")
            pass
        #print("debug4")
    # Looking for similar inter-table columns (in progress ...)
    print("\n########## Database model ##########\n")
    pp.pprint(db_model)
    # Back to target table ...
    table = attack.getAttackTableName()
    rawColNames = attack.getColNames(dbType="rawDb")
    anonColNames = attack.getColNames(dbType="anonDb")
    # Looking for interesting columns and values to fetch
    interestValues = []  # list of (column, value) tuples to investigate deeper
    uidCol = attack.getUidColName()
    for col in anonColNames:
        if (col != uidCol) and not ("id" in col):
            publicValues = attack.getPublicColValues(col, table)
            for val in publicValues:
                if val[1] <= 50:
                    interestValues.append((col, val[0]))
    if not interestValues:
        print("\nInfo >>> No interesting values found")
        return None
    else:
        print("\nInfo >>> Interesting values found\n")
        print("########## Interesting values ##########")
        pp.pprint(interestValues)
    # Numbering conditions
    print("\nInfo >>> Generating conditions ...")
    conditions = []
    condition = ""
    for val in interestValues:
        condition = f"{val[0]}={val[1]}"
        conditions.append(condition)
    # Generate SQL queries regarding target table
    print("\nInfo >>> Generating SQL queries ...")
    queries = []
    sql = ""
    for cond in conditions:
        sql = sqlQueryGen(table, anonColNames, cond)
        queries.append(sql)
    # Looking at possible interesting inter-cross columns
    lucky_cols = []  # list of cols from interestValues
    interestCols = [
    ]  # list of (table, col) tuples for interesting inter-cross columns
    for i in interestValues:
        if not i[0] in lucky_cols:
            lucky_cols.append(i[0])
    for t in db_model.keys():
        if (t == table) or (not db_model[t]) or (db_model[t] == None):
            continue
        for c in db_model[t]:
            if c in lucky_cols:
                interestCols.append((t, c))
    # Generate SQL queries regarding other tables
    for col in interestCols:
        # prepare condition
        for v in interestValues:
            if v[0] == interestCols[1]:
                cond = f"{interestCols[1]}={v[1]}"
                sql = sqlQueryGen(interestCols[0], interestCols[1], cond)
                queries.append(sql)
    # executing attack
    query = {}
    query['db'] = "anonDb"
    print("\nInfo >>> Launching attack ...\n")
    print(f"... {len(queries)} SQL queries to execute")
    for q in queries:
        query['sql'] = q
        attack.askAttack(query)
    print("... getting replies")
    while True:
        reply = attack.getAttack()
        print(f"... acquiring knowledge > {reply['stillToCome']} yet to come")
        if reply["stillToCome"] == 0:
            break
    #reply = attack.getAttack()
    if verbose:
        print("########## Attack reply ##########")
        pp.pprint(reply)
    # Make some guesses :3
    print("\nInfo >>> Making claims ...\n")
    if "answer" in reply.keys():
        for row in reply['answer']:
            spec = {}
            guess = []
            for i in range(len(anonColNames)):
                guess.append({'col': anonColNames[i], 'val': row[i]})
            spec['guess'] = guess
            print("# DEBUG: 01")
            try:
                attack.askClaim(spec, claim=True)
            except:
                continue
        #claim = attack.getClaim()
        print("# DEBUG: 02")
        while True:
            claim = attack.getClaim()
            print("# DEBUG: 03")
            if claim["stillToCome"] == 0:
                break
    # Compute and display score
    print("# DEBUG: 04")
    result = attack.getResults()
    print("# DEBUG: 05")
    gda_score = gdaScores(result)
    print("# DEBUG: 06")
    print("\nInfo >>> Score Class created\n")
    print("########## Attack score ##########")
    score = gda_score.getScores()
    pp.pprint(score)
    # Abschliessung
    attack.cleanUp()
    final_result = finishGdaAttack(params, score)
    return None
Ejemplo n.º 10
0
doCache = True

config = {
    'anonTypes': [ ['no_anon'] ],
    'tables': [ ['banking','transactions'] ]
}

paramsList = setupGdaAttackParameters(config)
params = paramsList[0]
pp.pprint(params)


# Test bad inputs
if False:
    x = gdaAttack(params)
    result = x.getPriorKnowledge(['frequency'],'users',selectColumn='bad',values=[1])
    result = x.getPriorKnowledge(['bad'],'rows',count=20)
    result = x.getPriorKnowledge(['frequency'],'users',selectColumn='uid',values='bad')
    result = x.getPriorKnowledge(['frequency'],'users',selectColumn='uid',colRange='bad')
    result = x.getPriorKnowledge(['frequency'],'users',selectColumn='uid')
    result = x.getPriorKnowledge(['frequency'],'rows',count=20,selectColumn='uid')
    result = x.getPriorKnowledge(['frequency'],'rows',fraction=0.55,count=20)
    result = x.getPriorKnowledge(['frequency'],'rows',count=3.55)
    result = x.getPriorKnowledge(['frequency'],'rows',fraction=20)
    result = x.getPriorKnowledge(['frequency'],'rows',count='bad')
    result = x.getPriorKnowledge(['frequency'],'rows',fraction='bad')
    result = x.getPriorKnowledge(['frequency'],'rows')
    result = x.getPriorKnowledge(['frequency'],'boo')
    result = x.getPriorKnowledge('uid','rows')
    x.cleanUp(doExit=False)
Ejemplo n.º 11
0
def dumb_list_linkability_attack(params):
    """ Dumb List attack for the Linkability criteria.

        All it does is request rows with all columns from the anonymized link
        database. The attack succeeds if the anonymized database returns
        rows that single out users, and fails otherwise. It is designed to
        work against raw and pseudonymized data.
        
        NOTE: This is effectively the same attack as with singling out
        dumb list."""
    attack = gdaAttack(params)

    # -------------------  Exploration Phase  ------------------------
    # We need to know the columns that are in the anonymized database
    # and in the raw database. It is these columns that we can attack.
    # (Note that pseudonymization schemes typically delete some columns.)

    table = attack.getAttackTableName()
    rawColNames = attack.getColNames(dbType='rawDb')
    anonColNames = attack.getColNames(dbType='anonDb')
    if rawColNames is None or anonColNames is None:
        print(f"No table to attack (raw {rawColNames}, anon {anonColNames}")
        attack.cleanUp()
        return
    colNames = list(set(rawColNames) & set(anonColNames))

    # -------------------  Prior Knowledge Phase  --------------------
    # This attack doesn't require any prior knowledge

    # -------------------  Attack Phase  -----------------------------

    query = {}
    sql = "SELECT "
    sql += comma_ize(colNames)
    sql += str(f"count(*) FROM {table} ")
    sql += makeGroupBy(colNames)
    sql += " HAVING count(*) = 1 ORDER BY count(*) LIMIT 100"
    query['sql'] = sql
    print("-------------------- Attack query:")
    print(sql)
    attack.askAttack(query)
    reply = attack.getAttack()
    if v: print("-------------------- Attack reply:")
    if v: pp.pprint(reply)

    # -------------------  Claims Phase  ----------------------------

    if 'answer' not in reply:
        print("ERROR: reply to claim query contains no answer")
        pp.pprint(reply)
        attack.cleanUp()
        sys.exit()
    for row in reply['answer']:
        spec = {}
        guess = []
        for i in range(len(colNames)):
            guess.append({'col': colNames[i], 'val': row[i]})
        spec['guess'] = guess
        attack.askClaim(spec)

    if v: print("------------------- Attack claims:")
    while True:
        reply = attack.getClaim()
        if v: pp.pprint(reply)
        if reply['stillToCome'] == 0:
            break

    # -------------------  Scores Phase  ----------------------------

    attackResult = attack.getResults()
    sc = gdaScores(attackResult)
    score = sc.getScores()
    if v: pp.pprint(score)
    attack.cleanUp()
    final = finishGdaAttack(params, score)
    pp.pprint(final)
Ejemplo n.º 12
0
def diffix_infer_1_attack(params):
    ''' This is an inference attack against Diffix

        In this attack, we find attribute groups where the inference
        conditions exist (one one guessed column value exists for some
        set of one or more known column values). This is designed to work
        against Diffix and Full K-anonymity at least.
    '''
    attack = gdaAttack(params)

    # -------------------  Exploration Phase  ------------------------
    # We need to know the columns that are in the anonymized database
    # and in the raw database. It is these columns that we can attack.

    table = attack.getAttackTableName()
    rawColNames = attack.getColNames(dbType='rawDb')
    anonColNames = attack.getColNames(dbType='anonDb')
    colNames = list(set(rawColNames) & set(anonColNames))
    if v: print(f"Common columns are: {colNames}")

    # Get the total number of rows so that we can later determine fraction
    # of cells per column that are susceptible
    sql = str(f"SELECT count(*) FROM {table}")
    query = dict(db="rawDb", sql=sql)
    attack.askExplore(query)
    reply = attack.getExplore()
    if 'error' in reply:
        doQueryErrorAndExit(reply, attack)
    totalRows = reply['answer'][0][0]
    if v: print(f"Total Rows: {totalRows}")

    # There is really no point in trying to find instances of
    # inference where the guessed column has a large number of values.
    # In these cases, the chances of finding an inference instance is
    # very low. We (arbitrarily for now) set the threshold for this at 10

    # By the same token, an attack where the known column has a majority
    # values that are distinct to a single user won't work for an attack,
    # because in the case of Diffix, they will be low-count filtered, and
    # in the case of Full K-anonymity, they may be aggregated

    # So we record the number of distinct values per column. (In practice,
    # this would not be known exactly, but the attacker can be assumed to
    # have a reasonable guess just based on knowledge of the column.)
    distincts = {}
    guessableCols = []
    for col in colNames:
        sql = str(f"SELECT count(DISTINCT {col}) FROM {table}")
        query = dict(db="rawDb", sql=sql)
        attack.askAttack(query)
        reply = attack.getAttack()
        if 'error' in reply:
            doQueryErrorAndExit(reply, attack)
        totalDistinct = reply['answer'][0][0]
        distincts[col] = totalDistinct
        if totalDistinct <= 10:
            guessableCols.append(col)
    if v: print(f"Distincts: {distincts}")
    if v: print(f"guessableCols: {guessableCols}")

    # -------------------  Prior Knowledge Phase  --------------------
    # This attack doesn't require any prior knowledge

    for guessedCol in guessableCols:
        numClaims = 0
        remainingCols = [x for x in colNames if x != guessedCol]
        # We want to try various combinations of the remaining columns,
        # and try the attack if the ratio of distinct values (or expected
        # distinct value combinations) is not too high
        unusedCombinations = 0
        for num in range(len(remainingCols)):
            if unusedCombinations > 1000:
                # If we don't find a useable combination 1000
                # consecutive times, then give up
                break
            if numClaims > 25:
                break
            combs = itertools.combinations(remainingCols, num + 1)
            while True:
                if unusedCombinations > 1000:
                    break
                if numClaims > 25:
                    break
                try:
                    knownCols = next(combs)
                except:
                    break
                totalDistinct = 1
                for c in knownCols:
                    totalDistinct *= distincts[c]
                if v:
                    print(f"totalDistinct: {totalDistinct} "
                          "from known columns {knownCols}")
                if (totalDistinct / totalRows) > 0.8:
                    unusedCombinations += 1
                    continue
                unusedCombinations = 0
                numClaims = runOneAttack(guessedCol, knownCols, attack, table,
                                         numClaims)

    # -------------------  Scores Phase  ----------------------------

    attackResult = attack.getResults()
    sc = gdaScores(attackResult)
    # New we need to assign susceptibility scores, which means making
    # some explore queries
    for guessedCol in colNames:
        remainingCols = [x for x in colNames if x != guessedCol]
        # -------------- More exploration phase ------------------
        # First find out how many of the cells are attackable
        sql = "SELECT sum(rows) FROM (SELECT "
        sql += comma_ize(remainingCols)
        sql += str(f"count(*) AS rows FROM {table} ")
        sql += makeGroupBy(remainingCols)
        sql += str(f" HAVING count(DISTINCT {guessedCol}) = 1) t")
        if v: print("-------------------- Explore query:")
        if v: print(sql)
        query = dict(db="raw", sql=sql)
        attack.askExplore(query)
        reply = attack.getExplore()
        if 'error' in reply:
            doQueryErrorAndExit(reply, attack)
        numRows = reply['answer'][0][0]
        if v: print("-------------------- Explore reply:")
        if v: pp.pprint(reply)
        susValue = numRows / totalRows
        sc.assignColumnSusceptibility(guessedCol, susValue)
    # Get average score (default behavior)
    score = sc.getScores()
    if v: pp.pprint(score)
    score = sc.getScores(numColumns=1)
    if v: pp.pprint(score)
    attack.cleanUp(cleanUpCache=False)
    final = finishGdaAttack(params, score)
    pp.pprint(final)
Ejemplo n.º 13
0
def ramyAttack02(params, verbose):
    # Check on parameters
    print("########## Parameters passed ##########")
    pp.pprint(params)
    # Attack setup
    attack = gdaAttack(params)
    attack.unsetVerbose()
    print("\nInfo >>> Attack Class created")
    # Information gathering
    table = attack.getAttackTableName()
    rawColNames = attack.getColNames(dbType="rawDb")
    anonColNames = attack.getColNames(dbType="anonDb")
    print(f"\nInfo >>> Working on table : {table} ...")
    # Looking for interesting columns and values to fetch
    interestValues = [] # list (column, value) to investigate deeper
    uidCol = attack.getUidColName()
    for col in anonColNames:
        if (col != uidCol) and not ("id" in col):
            publicValues = attack.getPublicColValues(col, table)
            #print("debug1", publicValues)
            #print("debug2", col)
            for val in publicValues:
                if val[1]<=50:
                    interestValues.append((col, val[0]))
                    #print("debug3", val)
    if not interestValues:
        print("\nInfo >>> No interesting values found")
        return None
    else:
        print("\nInfo >>> Interesting values found\n")
        print("########## Interesting values ##########")
        pp.pprint(interestValues)
    # Numbering conditions
    print("\nInfo >>> Generating conditions ...")
    conditions = []
    condition = ""
    for val in interestValues:
        condition = f"{val[0]}={val[1]}"
        conditions.append(condition)
    # Define SQL queries
    print("\nInfo >>> Generating SQL queries ...")
    queries = []
    sql = ""
    for cond in conditions:
        sql = sqlQueryGen(table, anonColNames, cond)
        queries.append(sql)
    # executing attack
    query = {}
    query['db'] = "anonDb"
    print("\nInfo >>> Launching attack ...\n")
    for q in queries:
        query['sql'] = q
        attack.askAttack(query)
        #print("debug4", q)
    while True:
        reply = attack.getAttack()
        if reply["stillToCome"]==0:
            break
    #reply = attack.getAttack()
    if verbose:
        print("########## Attack reply ##########")
        pp.pprint(reply)
    # Make some guesses :3
    print("\nInfo >>> Making claims ...\n")
    if "answer" in reply.keys():
        for row in reply['answer']:
            spec = {}
            guess = []
            for i in range(len(anonColNames)):
                guess.append({'col':anonColNames[i],'val':row[i]})
            spec['guess'] = guess
            attack.askClaim(spec, claim=True)
        #claim = attack.getClaim()
        while True:
            claim = attack.getClaim()
            if claim["stillToCome"]==0:
                break
    # Compute and display score
    result = attack.getResults()
    gda_score = gdaScores(result)
    print("\nInfo >>> Score Class created\n")
    print("########## Attack score ##########")
    score = gda_score.getScores()
    pp.pprint(score)
    # Abschliessung
    attack.cleanUp()
    final_result = finishGdaAttack(params, score)
    return None
Ejemplo n.º 14
0
def dumb_list_singling_out_attack(params):
    """ Dumb List attack for the Singling Out criteria.

        All it does is request rows with all columns from the anonymized
        database. The attack succeeds if the anonymized database returns
        rows that single out users, and fails otherwise. It is designed to
        work against raw and pseudonymized data."""
    attack = gdaAttack(params)

    # -------------------  Exploration Phase  ------------------------
    # We need to know the columns that are in the anonymized database
    # and in the raw database. It is these columns that we can attack.
    # (Note that pseudonymization schemes can delete some columns.)

    table = attack.getAttackTableName()
    rawColNames = attack.getColNames(dbType='rawDb')
    anonColNames = attack.getColNames(dbType='anonDb')
    if rawColNames is None or anonColNames is None:
        print(f"No table to attack (raw {rawColNames}, anon {anonColNames}")
        attack.cleanUp()
        return
    uid = attack.getUidColName()
    colNamesAll = list(set(rawColNames) & set(anonColNames))
    if v: print(f"Use columns: {colNamesAll}")

    # The cloak can't handle queries with a large number of columns,
    # so we split up the attack into groups of 5 columns each. Each group
    # contains the uid column, so that we are sure that the resulting
    # answer pertains to a single user.
    groupSize = 5
    minAttacksPerGroup = 5
    groups = []
    colsWithoutUid = colNamesAll.copy()
    colsWithoutUid.remove(uid)
    if v: print(colNamesAll)
    if v: print(colsWithoutUid)
    index = 0
    while (1):
        if index >= len(colsWithoutUid):
            break
        endIndex = index + groupSize - 1
        nextGroup = colsWithoutUid[index:endIndex]
        nextGroup.append(uid)
        groups.append(nextGroup)
        index += groupSize - 1

    # This will give us around 100 attack queries total:
    numAttacksPerGroup = min(int(100 / len(groups)) + 1, minAttacksPerGroup)
    if v: pp.pprint(groups)

    # -------------------  Prior Knowledge Phase  --------------------
    # This attack doesn't require any prior knowledge

    # -------------------  Attack Phase  -----------------------------

    for colNames in groups:
        query = {}
        sql = "SELECT "
        sql += comma_ize(colNames)
        sql += str(f"count(*) FROM {table} WHERE ")
        sql += makeInNotNullConditions(colNames)
        sql += makeGroupBy(colNames)
        sql += " HAVING count(*) = 1 ORDER BY uid "
        sql += str(f" LIMIT {numAttacksPerGroup} ")
        query['sql'] = sql
        print("-------------------- Attack query:")
        print(sql)
        attack.askAttack(query)
        reply = attack.getAttack()
        if v: print("-------------------- Attack reply:")
        if v: pp.pprint(reply)

        # -------------------  Claims Phase  ----------------------------

        if 'answer' not in reply:
            print("ERROR: reply to claim query contains no answer")
            pp.pprint(reply)
            attack.cleanUp()
            sys.exit()
        for row in reply['answer']:
            spec = {}
            guess = []
            for i in range(len(colNames)):
                guess.append({'col': colNames[i], 'val': row[i]})
            spec['guess'] = guess
            attack.askClaim(spec)

        if v: print("------------------- Attack claims:")
        while True:
            reply = attack.getClaim()
            if v: pp.pprint(reply)
            if reply['stillToCome'] == 0:
                break

    # -------------------  Scores Phase  ----------------------------

    attackResult = attack.getResults()
    sc = gdaScores(attackResult)
    score = sc.getScores()
    if v: pp.pprint(score)
    attack.cleanUp()
    final = finishGdaAttack(params, score)
    pp.pprint(final)
Ejemplo n.º 15
0
def dumb_list_inference_attack(params):
    """ Dumb List attack for the Inference criteria.

        In an inference attack, there are 'known' column values, and
        'guessed' column values. An inference claim succeeds when all
        users with the known column values have the same guessed column
        values. There only needs to be one such user, so we can try
        making inferences on all columns by using all the other columns
        as known values.
        """
    attack = gdaAttack(params)
    
    # -------------------  Exploration Phase  ------------------------
    # We need to know the columns that are in the anonymized database
    # and in the raw database. It is these columns that we can attack.
    # (Note that pseudonymization schemes typically delete some columns.)
    
    table = attack.getAttackTableName()
    rawColNames = attack.getColNames(dbType='rawDb')
    anonColNames = attack.getColNames(dbType='anonDb')
    if rawColNames is None or anonColNames is None:
        print(f"No table to attack (raw {rawColNames}, anon {anonColNames}")
        attack.cleanUp()
        return
    colNames = list(set(rawColNames) & set(anonColNames))

    # Get the total number of rows so that we can later determine fraction
    # of cells per column that are susceptible
    sql = str(f"SELECT count(*) FROM {table}")
    if v: print(sql)
    query = dict(db="raw",sql=sql)
    attack.askExplore(query)
    reply = attack.getExplore()
    if 'error' in reply:
        doQueryErrorAndExit(reply,attack)
    totalRows = reply['answer'][0][0]

    # -------------------  Prior Knowledge Phase  --------------------
    # This attack doesn't require any prior knowledge
    
    # -------------------  Attack Phase  -----------------------------
    # I'm going to attack each (guessed) column by using the remaining
    # columns as the known colums. In the following, I loop through
    # attack and claims for each guessed column.

    for guessedCol in colNames:
        remainingCols = [x for x in colNames if x != guessedCol]
        # -------------- Attack phase ------------------
        # And now run the attack for some fraction of the attackable cells
        sql = "SELECT "
        sql += comma_ize(remainingCols)
        sql += str(f"max({guessedCol}) FROM {table} WHERE ")
        sql += makeInNotNullConditions(remainingCols)
        sql += makeGroupBy(remainingCols)
        sql += str(f" HAVING count(DISTINCT {guessedCol}) = 1 ")
        sql += str(f"ORDER BY 1 LIMIT 20")
        if v: print(sql)
        query = dict(sql=sql)
        attack.askAttack(query)
        reply = attack.getAttack()
        if 'error' in reply:
            # For this attack, cloak can't deal with max(text_col),
            # so just continue without claims
            continue
        # -------------- Claims phase ------------------
        for row in reply['answer']:
            spec = {}
            known = []
            for i in range(len(remainingCols)):
                known.append({'col':remainingCols[i],'val':row[i]})
            spec['known'] = known
            i = len(remainingCols)
            spec['guess'] = [{'col':guessedCol,'val':row[i]}]
            if not attack.isClaimed(spec):
                attack.askClaim(spec)
            while True:
                reply = attack.getClaim()
                if v: pp.pprint(reply)
                if reply['stillToCome'] == 0:
                    break
    
    # -------------------  Scores Phase  ----------------------------
    
    attackResult = attack.getResults()
    sc = gdaScores(attackResult)
    # New we need to assign susceptibility scores, which means making
    # some explore queries
    for guessedCol in colNames:
        remainingCols = [x for x in colNames if x != guessedCol]
        if len(remainingCols) > 20:
            remainingCols = remainingCols[:20]
        # -------------- More exploration phase ------------------
        # First find out how many of the cells are attackable
        sql = "SELECT sum(rows) FROM (SELECT "
        sql += comma_ize(remainingCols)
        sql += str(f"count(*) AS rows FROM {table} ")
        sql += makeGroupBy(remainingCols)
        sql += str(f" HAVING count(DISTINCT {guessedCol}) = 1) t")
        if v: print("-------------------- Explore query:")
        if v: print(sql)
        query = dict(db="raw",sql=sql)
        attack.askExplore(query)
        reply = attack.getExplore()
        if 'error' in reply:
            doQueryErrorAndExit(reply,attack)
        numRows = reply['answer'][0][0]
        if v: print("-------------------- Explore reply:")
        if v: pp.pprint(reply)
        susValue = numRows / totalRows
        sc.assignColumnSusceptibility(guessedCol,susValue)
    score = sc.getScores()
    if v: pp.pprint(score)
    final = finishGdaAttack(params,score)
    attack.cleanUp()
    pp.pprint(final)
Ejemplo n.º 16
0
#
# # -------------------  Prior Knowledge Phase  --------------------
# # We use the prior knowledge for some values in the database.
# # For the given query, we know that the real value is = 181962
# # """Select count(*)
# #          from transactions
# #          where operation = 'VKLAD'
# #          """
true_value = 181962
# # -------------------  Attack Phase  -----------------------------
#

print("--------------------")
print("Running First Attack")
print("--------------------")
x = gdaAttack(params)

query = {}
sql = """Select count(*)
         from transactions
         where operation = 'VKLAD'
         """
query['sql'] = sql
query['epsilon'] = 1

x.askAttack(query)
replyCorrect = x.getAttack()

# get attack can say how much of the bugdet is still available
remaining_eps = replyCorrect['remaining_dp_budget']