Ejemplo n.º 1
0
def non_iid(model_names,
            numClasses,
            numParams,
            iterations=3000,
            ideal_attack=False):

    batch_size = 50
    memory_size = 0

    list_of_models = []

    transform = transforms.Compose([transforms.ToTensor()])
    model = MNISTCNNModel
    dataset = MNISTDataset
    numParams = 41386
    train_client = Client("mnist", "mnist_train", batch_size, model(), dataset,
                          transform)
    test_client = Client("mnist", "mnist_test", batch_size, model(), dataset,
                         transform)
    init_weights = train_client.getModelWeights()

    for dataset_name in model_names:
        list_of_models.append(
            Client("mnist", dataset_name, batch_size, model(), dataset,
                   transform))
        list_of_models[-1].updateModel(init_weights)

    # # Include the model that sends the ideal vector on each iteration
    # if ideal_attack:
    #     list_of_models.append(softmax_model_obj.SoftMaxModelEvil(dataPath +
    #        "_bad_ideal_4_9", numClasses))

    numClients = len(list_of_models)
    model_aggregator.init(numClients, numParams, numClasses)

    print("Start training across " + str(numClients) + " clients.")

    # weights = np.random.rand(numParams) / 100.0
    train_progress = []

    delta_memory = np.zeros((numClients, numParams, memory_size))
    summed_deltas = np.zeros((numClients, numParams))

    for i in range(iterations):

        delta = np.zeros((numClients, numParams))

        ##################################
        # Use significant features filter or not
        ##################################
        topk = int(numParams / 2)

        # Significant features filter, the top k biggest weights
        # sig_features_idx = np.argpartition(weights, -topk)[-topk:]
        sig_features_idx = np.arange(numParams)

        ##################################
        # Use annealing strategy or not
        ##################################
        if memory_size > 0:

            for k in range(len(list_of_models)):

                delta[k, :] = list_of_models[k].getGrad()

                # normalize delta
                if np.linalg.norm(delta[k, :]) > 1:
                    delta[k, :] = delta[k, :] / np.linalg.norm(delta[k, :])

                delta_memory[k, :, i % memory_size] = delta[k, :]

            # Track the total vector from each individual client
            summed_deltas = np.sum(delta_memory, axis=2)

        else:

            for k in range(len(list_of_models)):

                delta[k, :] = list_of_models[k].getGrad()

                # normalize delta
                if np.linalg.norm(delta[k, :]) > 1:
                    delta[k, :] = delta[k, :] / np.linalg.norm(delta[k, :])

            # Track the total vector from each individual client
            summed_deltas = summed_deltas + delta

        ##################################
        # Use FoolsGold or something else
        ##################################
        # Use Foolsgold (can optionally clip gradients via Krum)
        weights = list_of_models[0].getModelWeights()
        this_delta = model_aggregator.foolsgold(delta,
                                                summed_deltas,
                                                sig_features_idx,
                                                i,
                                                weights,
                                                clip=0)

        # Mean
        # this_delta = model_aggregator.average(delta)

        # Krum
        # this_delta = model_aggregator.krum(delta, clip=1)

        # Step in new gradient direction
        for k in range(len(list_of_models)):
            list_of_models[k].simpleStep(this_delta)

        if i % 20 == 0:
            loss = 0.0
            for i in range(10):
                loss += list_of_models[i].getLoss()
            print("Average loss is " + str(loss / len(list_of_models)))

    print("Done iterations!")
    train_client.updateModel(weights)
    test_client.updateModel(weights)
    print("Train error: %d", train_client.getTrainErr())
    print("Test error: %d", test_client.getTrainErr())

    return weights
Ejemplo n.º 2
0
def non_iid(max_similarity,
            Xtest,
            ytest,
            from_class,
            to_class,
            model_names,
            numClasses,
            numParams,
            softmax_test,
            topk_prop,
            iterations=3000,
            numSybils=2,
            ideal_attack=False,
            poisoner_indices=[]):
    numFeatures = int(numParams / numClasses)
    batch_size = 50
    topk = int(numParams / 2)

    list_of_models = []

    for dataset in model_names:
        list_of_models.append(
            softmax_model_obj.SoftMaxModel(dataset, numClasses))

    # Include the model that sends the ideal vector on each iteration
    if ideal_attack:
        list_of_models.append(
            softmax_model_obj.SoftMaxModelEvil(dataPath + "_bad_ideal_4_9",
                                               numClasses))

    numClients = len(list_of_models)
    model_aggregator.init(numClients, numParams, numClasses)

    print("Start training across " + str(numClients) + " clients.")

    weights = np.random.rand(numParams) / 100.0
    train_progress = []

    summed_deltas = np.zeros((numClients, numParams))

    #### Cosine similarity for adversaries ####
    sybil_noise = getOrthogonalNoise(numSybils, numParams)
    Attack_rates = []
    Attack_rates_iter = []
    numPoisonContribution = 0.0
    for i in xrange(iterations):
        # if i % 10 == 0 and max_similarity < 0.2:
        #     score = poisoning_compare.eval(Xtest, ytest, weights, int(from_class), int(to_class), numClasses, numFeatures, False)
        #     Attack_rates.append(score[4])
        #     Attack_rates_iter.append(i)
        #     print("Attack rate: " + str(score[4]))
        #     # attack success
        #     if score[4] > 0.8:
        #         pdb.set_trace()
        #         return weights, numPoisonContribution / i

        delta = np.zeros((numClients, numParams))

        # Significant features filter
        # sig_features_idx = np.argpartition(weights, -topk)[-topk:]
        sig_features_idx = np.arange(numParams)

        for k in range(len(list_of_models)):
            delta[k, :] = list_of_models[k].privateFun(1, weights, batch_size)

            # normalize delta
            if np.linalg.norm(delta[k, :]) > 1:
                delta[k, :] = delta[k, :] / np.linalg.norm(delta[k, :])

        # Add adversarial noise
        # noisevec = rescale(np.random.rand(numParams), np.min(delta), np.max(delta))
        # delta[poisoner_indices[0], :] = delta[poisoner_indices[0], :] + 10000*noisevec
        # delta[poisoner_indices[1], :] = delta[poisoner_indices[1], :] - 10000*noisevec

        ### Adaptive poisoning !! use even number sybils ###
        adaptive = True
        if adaptive:
            # sybil_deltas = summed_deltas[10:10+numSybils].copy()
            # sybil_deltas = sybil_deltas + delta[10:10+numSybils]
            sybil_cs = smp.cosine_similarity(
                summed_deltas[numClasses:numClasses + numSybils] +
                delta[numClasses:numClasses + numSybils]) - np.eye(numSybils)
            sybil_cs = np.max(sybil_cs, axis=0)
            # max_similarity = 1.0

            if np.any(sybil_cs > max_similarity):
                delta[numClasses:numClasses +
                      numSybils] = rescaleOrthogonalNoise(sybil_noise, delta)
            else:
                numPoisonContribution += 1.0
        # delta[10:10+numSybils] = getOrthogonalNoise(numSybils, numParams)
        # pdb.set_trace()
        # pdb:: np.max(smp.cosine_similarity(delta[10:10+numSybils]) - np.eye(numSybils), axis=1)
        ##########################

        # Track the total vector from each individual client
        summed_deltas = summed_deltas + delta

        # Use Foolsgold
        this_delta = model_aggregator.foolsgold(delta,
                                                summed_deltas,
                                                sig_features_idx,
                                                i,
                                                weights,
                                                1.0,
                                                importance=True,
                                                importanceHard=False)
        # this_delta = model_aggregator.average(delta)

        weights = weights + this_delta

        if i % 100 == 0:
            error = softmax_test.train_error(weights)
            print("Train error: %.10f" % error)
            train_progress.append(error)

    print("Done iterations!")
    print("Train error: %d", softmax_test.train_error(weights))
    print("Test error: %d", softmax_test.test_error(weights))

    return weights, numPoisonContribution / iterations
Ejemplo n.º 3
0
def non_iid(model_names,
            numClasses,
            numParams,
            softmax_test,
            topk_prop,
            iterations=3000,
            numSybils=2,
            ideal_attack=False,
            poisoner_indices=[],
            solution=None):
    batch_size = 50
    topk = int(numParams / 10)

    list_of_models = []

    for dataset in model_names:
        list_of_models.append(
            softmax_model_obj.SoftMaxModel(dataset, numClasses))

    # Include the model that sends the ideal vector on each iteration
    if ideal_attack:
        list_of_models.append(
            softmax_model_obj.SoftMaxModelEvil(dataPath + "_bad_ideal_4_9",
                                               numClasses))

    numClients = len(list_of_models)
    model_aggregator.init(numClients, numParams, numClasses)

    print("\nStart training across " + str(numClients) +
          " clients with solution " + str(solution) + '.')

    weights = np.random.rand(numParams) / 100.0
    lr = np.ones(numClients, )
    acc_in_iterations = []
    delta_all = []
    train_progress = []
    norm_progress = []
    loss_progress = []

    summed_deltas = np.zeros((numClients, numParams))

    for i in range(iterations):

        delta = np.zeros((numClients, numParams))

        # Significant features filter
        # sig_features_idx = np.argpartition(weights, -topk)[-topk:]
        sig_features_idx = np.arange(numParams)

        for k in range(len(list_of_models)):
            delta[k, :], _ = list_of_models[k].privateFun(weights, batch_size)

            # normalize delta
            if np.linalg.norm(delta[k, :]) > 1:
                delta[k, :] = delta[k, :] / np.linalg.norm(delta[k, :])

        # Add adversarial noise
        noisevec = rescale(np.random.rand(numParams), np.min(delta),
                           np.max(delta))
        delta[
            poisoner_indices[0], :] = delta[poisoner_indices[0], :] + noisevec
        delta[
            poisoner_indices[1], :] = delta[poisoner_indices[1], :] - noisevec

        # Track the total vector from each individual client
        summed_deltas = summed_deltas + delta
        if solution:
            if solution == 'fg':
                # Use Foolsgold
                this_delta = model_aggregator.foolsgold(delta,
                                                        summed_deltas,
                                                        sig_features_idx,
                                                        i,
                                                        weights,
                                                        lr,
                                                        topk_prop,
                                                        importance=False,
                                                        importanceHard=True)
            if solution == 'ours':
                this_delta, lr = model_aggregator.foolsgold2(
                    delta,
                    summed_deltas,
                    sig_features_idx,
                    i,
                    weights,
                    lr,
                    topk_prop,
                    importance=False,
                    importanceHard=True)
            if solution == 'krum':
                # Krum
                this_delta = model_aggregator.krum(delta, clip=1)
            if solution == 'average':
                this_delta = model_aggregator.average(delta)
            if solution == 'median':
                this_delta = model_aggregator.median(delta)
            if solution == 'trimmed_mean':
                this_delta = model_aggregator.trimmed_mean(delta, 0.2)
        else:
            this_delta = np.dot(delta.T, lr)

        weights = weights + this_delta

        if i % 10 == 0:
            delta_index = heapq.nlargest(20, range(len(this_delta)),
                                         this_delta.take)
            delta_each_client = []
            for idx in delta_index:
                delta_each_client.append(
                    np.hstack(([i, idx], delta[:, idx], this_delta[idx])))
            delta_all.append(delta_each_client)
            norm_progress.append(np.mean(np.linalg.norm(delta, axis=1)))
            test_error = softmax_test.test_error(weights)
            train_progress.append(test_error)
            acc_in_iterations.append([test_error] + list(
                poisoning_compare.eval(Xtest,
                                       ytest,
                                       weights,
                                       int(from_class),
                                       int(to_class),
                                       numClasses,
                                       numFeatures,
                                       verbose=False)))

            # if i % 100 == 0:
            #     print("Validation error: %.5f" % test_error)
    column = ['iteration', 'deltaInxex'
              ] + ['client{}'.format(i)
                   for i in range(numClients)] + ['combined']
    pd.DataFrame(columns=column, data=np.reshape(
        delta_all, (-1, len(column)))).to_csv('_'.join(argv) + '_' +
                                              str(solution) + '_delta.csv')
    test_error = softmax_test.test_error(weights)
    acc_in_iterations.append([test_error] + list(
        poisoning_compare.eval(Xtest,
                               ytest,
                               weights,
                               int(from_class),
                               int(to_class),
                               numClasses,
                               numFeatures,
                               verbose=True)))
    # column = ['iteration', 'Test error', 'Accuracy overall', 'Accuracy on other digits',
    #           'Target Accuracy on source label',
    #           'Target Accuracy on target label', 'Target Attack Rate']
    # acc_in_iterations = np.insert(acc_in_iterations, 0, values=np.arange(0, iterations + 1, 10), axis=1)
    # res = pd.DataFrame(columns=column, data=acc_in_iterations)
    # res.to_csv('_'.join(argv) + '_' + str(solution) + '.csv')
    print("Done iterations!")
    print("Train error: {}".format(softmax_test.train_error(weights)))
    print("Test error: {}".format(softmax_test.test_error(weights)))
    # pdb.set_trace()
    # import sklearn.metrics.pairwise as smp
    # cs = smp.cosine_similarity(summed_deltas)
    return weights
Ejemplo n.º 4
0
def non_iid(model_names,
            numClasses,
            numParams,
            softmax_test,
            iterations=3000,
            ideal_attack=False):

    # SGD batch size
    batch_size = 50

    # The number of local steps each client takes
    fed_avg_size = 1

    list_of_models = []

    for dataset in model_names:
        list_of_models.append(
            softmax_model_obj.SoftMaxModel(dataset, numClasses))

    # Include the model that sends the ideal vector on each iteration
    if ideal_attack:
        list_of_models.append(
            softmax_model_obj.SoftMaxModelEvil(dataPath + "_bad_ideal_4_9",
                                               numClasses))

    numClients = len(list_of_models)
    model_aggregator.init(numClients, numParams, numClasses)

    print("Start training across " + str(numClients) + " clients.")

    weights = np.random.rand(numParams) / 100.0
    train_progress = []

    # The number of previous iterations to use FoolsGold on
    memory_size = 0
    delta_memory = np.zeros((numClients, numParams, memory_size))

    summed_deltas = np.zeros((numClients, numParams))

    for i in xrange(iterations):

        delta = np.zeros((numClients, numParams))

        ##################################
        # Use significant features filter or not
        ##################################

        # Significant features filter, the top k biggest weights
        # topk = int(numParams / 2)
        # sig_features_idx = np.argpartition(weights, -topk)[-topk:]
        sig_features_idx = np.arange(numParams)

        ##################################
        # Use history or not
        ##################################

        if memory_size > 0:

            for k in range(len(list_of_models)):

                delta[k, :] = list_of_models[k].privateFun(
                    weights,
                    batch_size=batch_size,
                    num_iterations=fed_avg_size)

                # normalize delta
                if np.linalg.norm(delta[k, :]) > 1:
                    delta[k, :] = delta[k, :] / np.linalg.norm(delta[k, :])

                delta_memory[k, :, i % memory_size] = delta[k, :]

            # Track the total vector from each individual client
            summed_deltas = np.sum(delta_memory, axis=2)

        else:

            for k in range(len(list_of_models)):

                delta[k, :] = list_of_models[k].privateFun(
                    weights,
                    batch_size=batch_size,
                    num_iterations=fed_avg_size)

                # normalize delta
                if np.linalg.norm(delta[k, :]) > 1:
                    delta[k, :] = delta[k, :] / np.linalg.norm(delta[k, :])

            # Track the total vector from each individual client
            summed_deltas = summed_deltas + delta

        ##################################
        # Use FoolsGold or something else
        ##################################

        # Use Foolsgold (can optionally clip gradients via Krum)
        this_delta = model_aggregator.foolsgold(delta,
                                                summed_deltas,
                                                sig_features_idx,
                                                i,
                                                weights,
                                                clip=0)

        # Krum
        # this_delta = model_aggregator.krum(delta, clip=1)

        # Simple Average
        # this_delta = model_aggregator.average(delta)

        weights = weights + this_delta

        if i % 200 == 0:
            error = softmax_test.train_error(weights)
            print("Train error: %.10f" % error)
            train_progress.append(error)

    print("Done iterations!")
    print("Train error: %d", softmax_test.train_error(weights))
    print("Test error: %d", softmax_test.test_error(weights))
    return weights
Ejemplo n.º 5
0
def non_iid(model_names,
            numClasses,
            numParams,
            softmax_test,
            topk_prop,
            iterations=3000,
            numSybils=2,
            ideal_attack=False,
            poisoner_indices=[]):

    batch_size = 50
    topk = int(numParams / 10)

    list_of_models = []

    for dataset in model_names:
        list_of_models.append(
            softmax_model_obj.SoftMaxModel(dataset, numClasses))

    # Include the model that sends the ideal vector on each iteration
    if ideal_attack:
        list_of_models.append(
            softmax_model_obj.SoftMaxModelEvil(dataPath + "_bad_ideal_4_9",
                                               numClasses))

    numClients = len(list_of_models)
    model_aggregator.init(numClients, numParams, numClasses)

    print("Start training across " + str(numClients) + " clients.")

    weights = np.random.rand(numParams) / 100.0
    train_progress = []

    summed_deltas = np.zeros((numClients, numParams))

    for i in xrange(iterations):

        delta = np.zeros((numClients, numParams))

        # Significant features filter
        # sig_features_idx = np.argpartition(weights, -topk)[-topk:]
        sig_features_idx = np.arange(numParams)

        for k in range(len(list_of_models)):
            delta[k, :] = list_of_models[k].privateFun(1, weights, batch_size)

            # normalize delta
            if np.linalg.norm(delta[k, :]) > 1:
                delta[k, :] = delta[k, :] / np.linalg.norm(delta[k, :])

        # Add adversarial noise
        noisevec = rescale(np.random.rand(numParams), np.min(delta),
                           np.max(delta))
        delta[
            poisoner_indices[0], :] = delta[poisoner_indices[0], :] + noisevec
        delta[
            poisoner_indices[1], :] = delta[poisoner_indices[1], :] - noisevec

        # Track the total vector from each individual client
        summed_deltas = summed_deltas + delta

        # Use Foolsgold
        this_delta = model_aggregator.foolsgold(delta,
                                                summed_deltas,
                                                sig_features_idx,
                                                i,
                                                weights,
                                                topk_prop,
                                                importance=False,
                                                importanceHard=True)
        # this_delta = model_aggregator.average(delta)

        weights = weights + this_delta

        if i % 100 == 0:
            error = softmax_test.train_error(weights)
            print("Train error: %.10f" % error)
            train_progress.append(error)

    print("Done iterations!")
    print("Train error: %d", softmax_test.train_error(weights))
    print("Test error: %d", softmax_test.test_error(weights))
    # pdb.set_trace()
    # import sklearn.metrics.pairwise as smp
    # cs = smp.cosine_similarity(summed_deltas)
    return weights