Esempio n. 1
0
def non_iid(max_similarity, Xtest, ytest, from_class, to_class, model_names, numClasses, numParams, softmax_test, topk_prop, iterations=3000, numSybils=2,
    ideal_attack=False, poisoner_indices = []):
    numFeatures = int(numParams/numClasses)
    batch_size = 10
    topk = int(numParams / 2)

    list_of_models = []

    for dataset in model_names:
        list_of_models.append(softmax_model_obj.SoftMaxModel(dataset, numClasses))

    # Include the model that sends the ideal vector on each iteration
    if ideal_attack:
        list_of_models.append(softmax_model_obj.SoftMaxModelEvil(dataPath +
           "_bad_ideal_4_9", numClasses))

    numClients = len(list_of_models)
    model_aggregator.init(numClients, numParams, numClasses)

    print("Start training across " + str(numClients) + " clients.")

    weights = np.random.rand(numParams) / 100.0
    train_progress = []

    summed_deltas = np.zeros((numClients, numParams))

    #### Cosine similarity for adversaries ####
    sybil_noise = getOrthogonalNoise2(numSybils, numParams)
    Attack_rates = []
    Attack_rates_iter = []
    numPoisonContribution = 0.0
    for i in xrange(iterations):
        # if i % 100 == 0 and max_similarity < 0.2:
        #     score = poisoning_compare.eval(Xtest, ytest, weights, int(from_class), int(to_class), numClasses, numFeatures, False)
        #     Attack_rates.append(score[4])
        #     Attack_rates_iter.append(i)
        #     print("Attack rate: " + str(score[4]))
        #     # attack success
        #     if score[4] > 0.8:
        #         pdb.set_trace()
        #         return weights, numPoisonContribution / i

        delta = np.zeros((numClients, numParams))
        
        # Significant features filter
        # sig_features_idx = np.argpartition(weights, -topk)[-topk:]
        sig_features_idx = np.arange(numParams)

        for k in range(len(list_of_models)):
            delta[k, :] = list_of_models[k].privateFun(1, weights, batch_size)

            # normalize delta
            if np.linalg.norm(delta[k, :]) > 1:
                delta[k, :] = delta[k, :] / np.linalg.norm(delta[k, :])

        # Add adversarial noise
        # noisevec = rescale(np.random.rand(numParams), np.min(delta), np.max(delta))
        # delta[poisoner_indices[0], :] = delta[poisoner_indices[0], :] + 10000*noisevec
        # delta[poisoner_indices[1], :] = delta[poisoner_indices[1], :] - 10000*noisevec

        ### Adaptive poisoning !! use even number sybils ###
        adaptive = True
        if adaptive:
            # sybil_deltas = summed_deltas[10:10+numSybils].copy()
            # sybil_deltas = sybil_deltas + delta[10:10+numSybils]
            sybil_cs = smp.cosine_similarity(summed_deltas[numClasses:numClasses+numSybils] + delta[numClasses:numClasses+numSybils]) - np.eye(numSybils)
            sybil_cs = np.max(sybil_cs, axis=0)


            if np.any(sybil_cs > max_similarity):
                delta[numClasses:numClasses+numSybils] = rescaleOrthogonalNoise(sybil_noise, delta)
            else:
                numPoisonContribution += 1.0
        # delta[10:10+numSybils] = getOrthogonalNoise(numSybils, numParams) 
        # pdb.set_trace()
        # pdb:: np.max(smp.cosine_similarity(delta[10:10+numSybils]) - np.eye(numSybils), axis=1)
        ##########################
        

        # Track the total vector from each individual client
        summed_deltas = summed_deltas + delta
        # Use Foolsgold
        this_delta = model_aggregator.foolsgold(delta, summed_deltas, sig_features_idx, i, weights, 0.05, importance=True, importanceHard=False)
        # this_delta = model_aggregator.average(delta)
        
        weights = weights + this_delta

        if i % 100 == 0:
            error = softmax_test.train_error(weights)
            print("Train error: %.10f" % error)
            train_progress.append(error)


    print("Done iterations!")
    print("Train error: %d", softmax_test.train_error(weights))
    print("Test error: %d", softmax_test.test_error(weights))
    # pdb.set_trace()
    # import sklearn.metrics.pairwise as smp
    # cs = smp.cosine_similarity(summed_deltas)
    return weights, numPoisonContribution / iterations
Esempio n. 2
0
    if (dataset == "mnist"):
        numClasses = 10
        numFeatures = 784
    elif (dataset == "kddcup"):
        numClasses = 23
        numFeatures = 41
    elif (dataset == "amazon"):
        numClasses = 50
        numFeatures = 10000
    else:
        print("Dataset " + dataset + " not found. Available datasets: mnist kddcup amazon")

    numParams = numClasses * numFeatures
    dataPath = dataset + "/" + dataset

    full_model = softmax_model_obj.SoftMaxModel(dataPath + "_train", numClasses)
    Xtest, ytest = full_model.get_data()

    models = []

    for i in range(numClasses):
        # Try a little more IID
        models.append(dataPath + str(i))# + str((i + 1) % 10) + str((i
        # + 2) % 10))

    for attack in argv[2:]:
        attack_delim = attack.split("_")
        sybil_set_size = attack_delim[0]
        from_class = attack_delim[1]
        to_class = attack_delim[2]
        for i in range(int(sybil_set_size)):
Esempio n. 3
0
def non_iid(model_names,
            numClasses,
            numParams,
            softmax_test,
            iterations=3000,
            ideal_attack=False):

    # SGD batch size
    batch_size = 50

    # The number of local steps each client takes
    fed_avg_size = 1

    list_of_models = []

    for dataset in model_names:
        list_of_models.append(
            softmax_model_obj.SoftMaxModel(dataset, numClasses))

    # Include the model that sends the ideal vector on each iteration
    if ideal_attack:
        list_of_models.append(
            softmax_model_obj.SoftMaxModelEvil(dataPath + "_bad_ideal_4_9",
                                               numClasses))

    numClients = len(list_of_models)
    model_aggregator.init(numClients, numParams, numClasses)

    print("Start training across " + str(numClients) + " clients.")

    weights = np.random.rand(numParams) / 100.0
    train_progress = []

    # The number of previous iterations to use FoolsGold on
    memory_size = 0
    delta_memory = np.zeros((numClients, numParams, memory_size))

    summed_deltas = np.zeros((numClients, numParams))

    for i in xrange(iterations):

        delta = np.zeros((numClients, numParams))

        ##################################
        # Use significant features filter or not
        ##################################

        # Significant features filter, the top k biggest weights
        # topk = int(numParams / 2)
        # sig_features_idx = np.argpartition(weights, -topk)[-topk:]
        sig_features_idx = np.arange(numParams)

        ##################################
        # Use history or not
        ##################################

        if memory_size > 0:

            for k in range(len(list_of_models)):

                delta[k, :] = list_of_models[k].privateFun(
                    weights,
                    batch_size=batch_size,
                    num_iterations=fed_avg_size)

                # normalize delta
                if np.linalg.norm(delta[k, :]) > 1:
                    delta[k, :] = delta[k, :] / np.linalg.norm(delta[k, :])

                delta_memory[k, :, i % memory_size] = delta[k, :]

            # Track the total vector from each individual client
            summed_deltas = np.sum(delta_memory, axis=2)

        else:

            for k in range(len(list_of_models)):

                delta[k, :] = list_of_models[k].privateFun(
                    weights,
                    batch_size=batch_size,
                    num_iterations=fed_avg_size)

                # normalize delta
                if np.linalg.norm(delta[k, :]) > 1:
                    delta[k, :] = delta[k, :] / np.linalg.norm(delta[k, :])

            # Track the total vector from each individual client
            summed_deltas = summed_deltas + delta

        ##################################
        # Use FoolsGold or something else
        ##################################

        # Use Foolsgold (can optionally clip gradients via Krum)
        this_delta = model_aggregator.foolsgold(delta,
                                                summed_deltas,
                                                sig_features_idx,
                                                i,
                                                weights,
                                                clip=0)

        # Krum
        # this_delta = model_aggregator.krum(delta, clip=1)

        # Simple Average
        # this_delta = model_aggregator.average(delta)

        weights = weights + this_delta

        if i % 200 == 0:
            error = softmax_test.train_error(weights)
            print("Train error: %.10f" % error)
            train_progress.append(error)

    print("Done iterations!")
    print("Train error: %d", softmax_test.train_error(weights))
    print("Test error: %d", softmax_test.test_error(weights))
    return weights
Esempio n. 4
0
    if (dataset == "mnist"):
        numClasses = 10
        numFeatures = 784
    elif (dataset == "kddcup"):
        numClasses = 23
        numFeatures = 41
    elif (dataset == "amazon"):
        numClasses = 50
        numFeatures = 10000
    else:
        print("Dataset " + dataset + " not found. Available datasets: mnist kddcup amazon")

    numParams = numClasses * numFeatures
    dataPath = dataset + "/" + dataset

    full_model = softmax_model_obj.SoftMaxModel(dataPath + "_train", numClasses)
    Xtest, ytest = full_model.get_data()

    backdoor_model = softmax_model_obj.SoftMaxModel(dataPath + "_backdoor_test", numClasses)
    Xback, yback = backdoor_model.get_data()

    to_class = '7'

    for run in range(5):

        backdoor_eval_data = np.zeros((10, 3))
        for sybil_count in range(10):

            models = []

            for i in range(numClasses):
Esempio n. 5
0
def non_iid(model_names,
            numClasses,
            numParams,
            softmax_test,
            topk_prop,
            iterations=3000,
            numSybils=2,
            ideal_attack=False,
            poisoner_indices=[],
            solution=None):
    batch_size = 50
    topk = int(numParams / 10)

    list_of_models = []

    for dataset in model_names:
        list_of_models.append(
            softmax_model_obj.SoftMaxModel(dataset, numClasses))

    # Include the model that sends the ideal vector on each iteration
    if ideal_attack:
        list_of_models.append(
            softmax_model_obj.SoftMaxModelEvil(dataPath + "_bad_ideal_4_9",
                                               numClasses))

    numClients = len(list_of_models)
    model_aggregator.init(numClients, numParams, numClasses)

    print("\nStart training across " + str(numClients) +
          " clients with solution " + str(solution) + '.')

    weights = np.random.rand(numParams) / 100.0
    lr = np.ones(numClients, )
    acc_in_iterations = []
    delta_all = []
    train_progress = []
    norm_progress = []
    loss_progress = []

    summed_deltas = np.zeros((numClients, numParams))

    for i in range(iterations):

        delta = np.zeros((numClients, numParams))

        # Significant features filter
        # sig_features_idx = np.argpartition(weights, -topk)[-topk:]
        sig_features_idx = np.arange(numParams)

        for k in range(len(list_of_models)):
            delta[k, :], _ = list_of_models[k].privateFun(weights, batch_size)

            # normalize delta
            if np.linalg.norm(delta[k, :]) > 1:
                delta[k, :] = delta[k, :] / np.linalg.norm(delta[k, :])

        # Add adversarial noise
        noisevec = rescale(np.random.rand(numParams), np.min(delta),
                           np.max(delta))
        delta[
            poisoner_indices[0], :] = delta[poisoner_indices[0], :] + noisevec
        delta[
            poisoner_indices[1], :] = delta[poisoner_indices[1], :] - noisevec

        # Track the total vector from each individual client
        summed_deltas = summed_deltas + delta
        if solution:
            if solution == 'fg':
                # Use Foolsgold
                this_delta = model_aggregator.foolsgold(delta,
                                                        summed_deltas,
                                                        sig_features_idx,
                                                        i,
                                                        weights,
                                                        lr,
                                                        topk_prop,
                                                        importance=False,
                                                        importanceHard=True)
            if solution == 'ours':
                this_delta, lr = model_aggregator.foolsgold2(
                    delta,
                    summed_deltas,
                    sig_features_idx,
                    i,
                    weights,
                    lr,
                    topk_prop,
                    importance=False,
                    importanceHard=True)
            if solution == 'krum':
                # Krum
                this_delta = model_aggregator.krum(delta, clip=1)
            if solution == 'average':
                this_delta = model_aggregator.average(delta)
            if solution == 'median':
                this_delta = model_aggregator.median(delta)
            if solution == 'trimmed_mean':
                this_delta = model_aggregator.trimmed_mean(delta, 0.2)
        else:
            this_delta = np.dot(delta.T, lr)

        weights = weights + this_delta

        if i % 10 == 0:
            delta_index = heapq.nlargest(20, range(len(this_delta)),
                                         this_delta.take)
            delta_each_client = []
            for idx in delta_index:
                delta_each_client.append(
                    np.hstack(([i, idx], delta[:, idx], this_delta[idx])))
            delta_all.append(delta_each_client)
            norm_progress.append(np.mean(np.linalg.norm(delta, axis=1)))
            test_error = softmax_test.test_error(weights)
            train_progress.append(test_error)
            acc_in_iterations.append([test_error] + list(
                poisoning_compare.eval(Xtest,
                                       ytest,
                                       weights,
                                       int(from_class),
                                       int(to_class),
                                       numClasses,
                                       numFeatures,
                                       verbose=False)))

            # if i % 100 == 0:
            #     print("Validation error: %.5f" % test_error)
    column = ['iteration', 'deltaInxex'
              ] + ['client{}'.format(i)
                   for i in range(numClients)] + ['combined']
    pd.DataFrame(columns=column, data=np.reshape(
        delta_all, (-1, len(column)))).to_csv('_'.join(argv) + '_' +
                                              str(solution) + '_delta.csv')
    test_error = softmax_test.test_error(weights)
    acc_in_iterations.append([test_error] + list(
        poisoning_compare.eval(Xtest,
                               ytest,
                               weights,
                               int(from_class),
                               int(to_class),
                               numClasses,
                               numFeatures,
                               verbose=True)))
    # column = ['iteration', 'Test error', 'Accuracy overall', 'Accuracy on other digits',
    #           'Target Accuracy on source label',
    #           'Target Accuracy on target label', 'Target Attack Rate']
    # acc_in_iterations = np.insert(acc_in_iterations, 0, values=np.arange(0, iterations + 1, 10), axis=1)
    # res = pd.DataFrame(columns=column, data=acc_in_iterations)
    # res.to_csv('_'.join(argv) + '_' + str(solution) + '.csv')
    print("Done iterations!")
    print("Train error: {}".format(softmax_test.train_error(weights)))
    print("Test error: {}".format(softmax_test.test_error(weights)))
    # pdb.set_trace()
    # import sklearn.metrics.pairwise as smp
    # cs = smp.cosine_similarity(summed_deltas)
    return weights
Esempio n. 6
0
def non_iid(model_names,
            numClasses,
            numParams,
            softmax_test,
            topk_prop,
            iterations=3000,
            numSybils=2,
            ideal_attack=False,
            poisoner_indices=[]):

    batch_size = 50
    topk = int(numParams / 10)

    list_of_models = []

    for dataset in model_names:
        list_of_models.append(
            softmax_model_obj.SoftMaxModel(dataset, numClasses))

    # Include the model that sends the ideal vector on each iteration
    if ideal_attack:
        list_of_models.append(
            softmax_model_obj.SoftMaxModelEvil(dataPath + "_bad_ideal_4_9",
                                               numClasses))

    numClients = len(list_of_models)
    model_aggregator.init(numClients, numParams, numClasses)

    print("Start training across " + str(numClients) + " clients.")

    weights = np.random.rand(numParams) / 100.0
    train_progress = []

    summed_deltas = np.zeros((numClients, numParams))

    for i in xrange(iterations):

        delta = np.zeros((numClients, numParams))

        # Significant features filter
        # sig_features_idx = np.argpartition(weights, -topk)[-topk:]
        sig_features_idx = np.arange(numParams)

        for k in range(len(list_of_models)):
            delta[k, :] = list_of_models[k].privateFun(1, weights, batch_size)

            # normalize delta
            if np.linalg.norm(delta[k, :]) > 1:
                delta[k, :] = delta[k, :] / np.linalg.norm(delta[k, :])

        # Add adversarial noise
        noisevec = rescale(np.random.rand(numParams), np.min(delta),
                           np.max(delta))
        delta[
            poisoner_indices[0], :] = delta[poisoner_indices[0], :] + noisevec
        delta[
            poisoner_indices[1], :] = delta[poisoner_indices[1], :] - noisevec

        # Track the total vector from each individual client
        summed_deltas = summed_deltas + delta

        # Use Foolsgold
        this_delta = model_aggregator.foolsgold(delta,
                                                summed_deltas,
                                                sig_features_idx,
                                                i,
                                                weights,
                                                topk_prop,
                                                importance=False,
                                                importanceHard=True)
        # this_delta = model_aggregator.average(delta)

        weights = weights + this_delta

        if i % 100 == 0:
            error = softmax_test.train_error(weights)
            print("Train error: %.10f" % error)
            train_progress.append(error)

    print("Done iterations!")
    print("Train error: %d", softmax_test.train_error(weights))
    print("Test error: %d", softmax_test.test_error(weights))
    # pdb.set_trace()
    # import sklearn.metrics.pairwise as smp
    # cs = smp.cosine_similarity(summed_deltas)
    return weights