コード例 #1
0
def run_experiments(dataset,
                    net_type="large",
                    optimizer="sgd",
                    initial_lr=2e-3,
                    batch_size=100,
                    num_epochs=100,
                    num_exps=20,
                    num_repts=5,
                    l2_reg=200e-4,
                    bias_l2_reg=None,
                    curriculum="None",
                    sorted_indices=None,
                    data_augmentation=False,
                    comp_grads=False):
    cache_file = os.path.join(dataset.data_path, 'data.pkl')
    (x_train, cls_train,
     y_train), (x_test, cls_test, y_test) = dataset.load_data_cache(cache_file)

    # x_train = x_train.astype('float32')
    # x_train /= 255
    x_train = (x_train - 128.) / 128
    # x_test = x_test.astype('float32')
    x_test = (x_test - 128.) / 128

    if data_augmentation == "True":
        datagen = DataGenerator(
            featurewise_center=True,  # set input mean to 0 over the dataset
            # rescale=1. / 255,
            samplewise_center=False,  # set each sample mean to 0
            featurewise_std_normalization=
            True,  # divide inputs by std of the dataset
            samplewise_std_normalization=False,  # divide each input by its std
            zca_whitening=False,  # apply ZCA whitening
            rotation_range=
            20,  # randomly rotate images in the range (degrees, 0 to 180)
            width_shift_range=
            0.1,  # randomly shift images horizontally (fraction of total width)
            height_shift_range=
            0.1,  # randomly shift images vertically (fraction of total height)
            horizontal_flip=True  # randomly flip images
        )
    else:
        print("No Aug")
        datagen = DataGenerator()

    index_array = np.arange(len(x_train))
    datagen.subset_index_array = index_array
    datagen.sorted_indices = sorted_indices
    datagen.steps_per_epoch = len(x_train) / batch_size
    datagen.num_classes = dataset.num_classes
    datagen.curriculum = False
    datagen.x_test = x_test
    datagen.y_test = y_test

    # print(sorted_indices.shape)
    x_train = x_train[sorted_indices]
    # print(x_train.shape)
    # exit()
    y_train = y_train[sorted_indices]
    datagen.x_train = x_train
    datagen.y_train = y_train
    if curriculum != "None":  # curriculum
        # x_train = x_train[sorted_indices]
        # y_train = y_train[sorted_indices]
        datagen.curriculum = True
        datagen.curriculum_schedule = get_curriculum_schedule(len(x_train))

    if comp_grads:
        comp_grads = 'comp_grads/'
    else:
        comp_grads = ''

    # results_path = os.path.join(dataset.results_path,
    #                             net_type + "_acc8/" + comp_grads + optimizer + "/" + str(initial_lr) + "/" + str(
    #                                 l2_reg) + "/" + curriculum + "/")
    results_path = os.path.join(
        dataset.results_path, net_type + "/" + comp_grads + optimizer + "2/" +
        str(initial_lr) + "/" + str(l2_reg) + "/" + curriculum + "/")
    if not os.path.exists(results_path):
        os.makedirs(results_path)
    print(results_path)

    reduce_lr = LearningRateScheduler(get_lr_scheduler(initial_lr))
    train_acc = TrainHistory(datagen)

    if "subset" in dataset.data_path:
        dataset.data_path = subset_model_path
    ######################################################################  testing
    with open(os.path.join(dataset.data_path, 'svm_results.pkl'),
              mode='rb') as file:
        prob_estimates, preds_svm, _, _, _, _ = pickle.load(file)
    ######################################################################  testing end

    for exp in range(0, num_exps):
        grads_history1 = defaultdict(list)
        grads_history2 = defaultdict(list)
        print("Experiment  ", exp)
        results_path_ = os.path.join(results_path, "exp{0}/".format(exp))
        if not os.path.exists(results_path_):
            os.makedirs(results_path_)
        for rpt in range(num_repts):
            print("Rept.  ", rpt)
            old_model = load_model(
                os.path.join(
                    dataset.data_path,
                    'model/' + net_type + '/model_init_{0}.h5'.format(exp)))
            model = create_model(net_type=net_type,
                                 n_classes=dataset.num_classes,
                                 reg_factor=l2_reg,
                                 bias_reg_factor=bias_l2_reg)
            model.set_weights(old_model.get_weights())
            # K.set_value(model.l2_reg, K.cast_to_floatx(100e-3))
            # K.set_value(model.l2_bias_reg, K.cast_to_floatx(100e-3))
            if optimizer == "adam":
                opt = keras.optimizers.adam(lr=initial_lr,
                                            beta_1=0.9,
                                            beta_2=0.999,
                                            epsilon=None,
                                            decay=0.0,
                                            amsgrad=False)
            elif optimizer == "sgd":
                opt = keras.optimizers.sgd(lr=initial_lr)

            model.compile(loss='categorical_crossentropy',
                          optimizer=opt,
                          metrics=['accuracy'])
            model.summary()
            listOfVariableTensors = model.trainable_weights
            gradients_fetcher = create_gradients_fetcher(
                model, listOfVariableTensors)

            # ######################################################################  testing
            # history = model.fit(x=x_train, y=y_train,
            #                     batch_size=batch_size,
            #                     epochs=num_epochs,
            #                     verbose=2,
            #                     sample_weight=[],
            #                     validation_data=(x_test, y_test),
            #                     callbacks=[train_acc, reduce_lr],
            #                     steps_per_epoch=len(x_train) / batch_size)
            # ######################################################################  testing end

            loss, acc = model.evaluate(x_train,
                                       y_train,
                                       batch_size=100,
                                       verbose=1)
            val_loss, val_acc = model.evaluate(x_test,
                                               y_test,
                                               batch_size=100,
                                               verbose=1)
            train_acc.datagen.history = defaultdict(list)
            train_acc.datagen.history[0] = {
                'acc': acc,
                'val_acc': val_acc,
                'val_loss': val_loss,
                'loss': loss
            }
            print({
                'acc': acc,
                'val_acc': val_acc,
                'val_loss': val_loss,
                'loss': loss
            })

            if comp_grads:

                num_batches = np.int(len(x_train) / batch_size)
                for e in range(num_epochs + 1):
                    print("Epoch #", e)

                    # (euc_dist, cos_dist, sim, rad, angle) = grads_comp_update(model, x_train, y_train, batch_size)
                    #
                    # grads_history1[(rpt, e, i)].append((euc_dist, cos_dist, sim, rad, angle))
                    #
                    #
                    #

                    grads_full = gradients_fetcher(x_train,
                                                   y_train,
                                                   batch_size=1)
                    grads1 = gradients_fetcher(x_train,
                                               y_train,
                                               batch_size=len(x_train))  #2500)

                    for i in range(len(x_train)):
                        grads2 = grads_full[i]
                        euc_dist = np.sum(np.subtract(grads1, grads2)**2)
                        cos_dist = scipy.spatial.distance.cosine(
                            grads1, grads2)
                        rad = 1 - cos_dist
                        sim = 1 - cos_dist
                        angle = np.degrees(np.arccos(rad))
                        grads_history1[(rpt, e, i)].append(
                            (euc_dist, cos_dist, sim, rad, angle))
                        #print((euc_dist, cos_dist, sim, rad, angle))

                    for b in range(num_batches):
                        grads2 = gradients_fetcher(
                            x_train[b * batch_size:(b + 1) * batch_size],
                            y_train[b * batch_size:(b + 1) * batch_size],
                            batch_size=batch_size)
                        euc_dist = np.sum(np.subtract(grads1, grads2)**2)
                        cos_dist = scipy.spatial.distance.cosine(
                            grads1, grads2)
                        rad = 1 - cos_dist
                        sim = 1 - cos_dist
                        angle = np.degrees(np.arccos(rad))
                        grads_history2[(rpt, e, b)].append(
                            (euc_dist, cos_dist, sim, rad, angle))
                        #print((euc_dist, cos_dist, sim, rad, angle))

                        # grads_history1[(rpt, e)] = gradients_fetcher(x_train, y_train, batch_size=1)
                        # grads1 = gradients_fetcher(x_train, y_train, batch_size=2500)
                        # grads_history2[(rpt, e,num_batches)].append(grads1)
                        # for b in range(num_batches):
                        #     grads2 = gradients_fetcher(x_train[b * batch_size:(b + 1) * batch_size],
                        #                                y_train[b * batch_size:(b + 1) * batch_size], batch_size=batch_size)
                        #     grads_history2[(rpt, e, num_batches)].append(grads2)

                        #   euc_dist = np.sum(np.subtract(grads1, grads2) ** 2)
                        #    cos_dist = scipy.spatial.distance.cosine(grads1, grads2)
                        #    print("samples  batch #{0}:    dist= {1}    cos-sim={2}".format(b, euc_dist, cos_dist))
                        # print("samples  {0}:    ".format(b), 1-scipy.spatial.distance.cosine(grads1, grads2))

                        # grads_history[(i,b)].append((euc_dist, 1-cos_dist))

                        # grads1 = gradients_fetcher(x_train, y_train, batch_size=2500)#np.sum(grads_history[e], axis=0)
                        # print(grads_history[e].shape)
                        # print(grads1.shape)
                        #
                        # for i in range(len(x_train)):
                        #     grads2 = grads_history[e][i]
                        #     euc_dist = np.sum(np.subtract(grads1, grads2) ** 2)
                        #     cos_dist = scipy.spatial.distance.cosine(grads1, grads2)
                        #
                        #     print("sample  #{0}:    dist= {1}    cos-sim={2} ".format(i, euc_dist, cos_dist))
                        #     angle = math.degrees(math.acos(cos_dist))
                        #     print("angle = {0}".format(angle))

                        #
                        #     batch_grad = grads_history[e]
                        #
                        #     grads2 = gradients_fetcher(x_train[b * batch_size:(b + 1) * batch_size],
                        #                                y_train[b * batch_size:(b + 1) * batch_size], batch_size=batch_size)
                        #
                        #     euc_dist = np.sum(np.subtract(grads1, grads2) ** 2)
                        #     cos_dist = scipy.spatial.distance.cosine(grads1, grads2)
                        #     print("samples  batch #{0}:    dist= {1}    cos-sim={2}".format(b, euc_dist, 1-cos_dist))
                        #     # print("samples  {0}:    ".format(b), 1-scipy.spatial.distance.cosine(grads1, grads2))
                        #
                        # grads1 = gradients_fetcher(x_train, y_train, batch_size=2500)
                        # print("DIFF    ", np.sum(np.abs(np.sum(grads_history[e], axis=0) - np.sum(grads1, axis=0))))

                        # grads1 = gradients_fetcher(x_train[:100], y_train[:100], batch_size=100)
                        # grads2 = gradients_fetcher(x_train[:100], y_train[:100], batch_size=10)
                        # grads3 = gradients_fetcher(x_train[:100], y_train[:100], batch_size=1)
                        # grads4 = gradients_fetcher(x_train[:100], y_train[:100], batch_size=1)
                        # print(np.sum(grads1,axis=0).shape)
                        # print(np.sum(grads2,axis=0).shape)
                        # print("DIFF    ", np.sum(np.abs(np.sum(grads1,axis=0) - np.sum(grads2,axis=0))))
                        # print("DIFF    ", np.sum(np.abs(np.sum(grads3) - np.sum(grads4))))
                        # print("DIFF    ", np.sum(np.abs(np.sum(grads1) - np.sum(grads3))))

                        # grads2 = gradients_fetcher(x_train[:10], y_train[:10], batch_size=1)
                        # grads4 = gradients_fetcher(x_train[:10], y_train[:10], batch_size=1)
                        #
                        #
                        # grads_history[e] = gradients_fetcher(x_train[:100], y_train[:100], batch_size=1)
                        # grads1 = gradients_fetcher(x_train[:100], y_train[:100], batch_size=10)
                        # print(np.sum(np.sqrt((grads1 - np.sum(grads_history[e], axis=0))**2)))

                        # grads1 = gradients_fetcher(x_train, y_train, batch_size=2500)
                        # for b in range(np.int(len(x_train) / batch_size)):
                        #
                        #   grads2 = gradients_fetcher(x_train[b * batch_size:(b + 1) * batch_size],
                        #                               y_train[b * batch_size:(b + 1) * batch_size], batch_size=batch_size)

                        #   euc_dist = np.sum(np.subtract(grads1, grads2) ** 2)
                        #    cos_dist = scipy.spatial.distance.cosine(grads1, grads2)
                        #    print("samples  batch #{0}:    dist= {1}    cos-sim={2}".format(b, euc_dist, cos_dist))
                        # print("samples  {0}:    ".format(b), 1-scipy.spatial.distance.cosine(grads1, grads2))

                        # grads_history[(i,b)].append((euc_dist, 1-cos_dist))

                    history = model.fit(
                        x=x_train,
                        y=y_train,
                        batch_size=100,  # batch_size,
                        epochs=1,
                        verbose=2,
                        sample_weight=[],
                        validation_data=(x_test, y_test),
                        callbacks=[train_acc])

                    # grads_history1[(rpt, num_epochs)] = gradients_fetcher(x_train, y_train, batch_size=1)
                    # grads1 = gradients_fetcher(x_train, y_train, batch_size=2500)
                    # grads_history2[(rpt, num_epochs, num_batches)].append(grads1)
                    # for b in range(num_batches):
                    #     grads2 = gradients_fetcher(x_train[b * batch_size:(b + 1) * batch_size],
                    #                                y_train[b * batch_size:(b + 1) * batch_size], batch_size=batch_size)
                    #     grads_history2[(rpt, num_epochs, num_batches)].append(grads2)
                    # with open(os.path.join(results_path_, "gradsHistoryDict1_{0}".format(rpt)), 'wb') as file_pi:
                    #     pickle.dump(grads_history1, file_pi, protocol=4)
                    # with open(os.path.join(results_path_, "gradsHistoryDict2_{0}".format(rpt)), 'wb') as file_pi:
                    #     pickle.dump(grads_history2, file_pi, protocol=4)
            else:
                history = model.fit_generator(
                    generator=datagen.flow(x=x_train,
                                           y=y_train,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           seed=seed),
                    steps_per_epoch=len(x_train) / batch_size,
                    epochs=num_epochs,
                    verbose=2,
                    validation_data=(x_test, y_test),
                    callbacks=[train_acc, reduce_lr],
                    workers=4)
                # print(x_train.shape)
                # grads1 = gradients_fetcher(x_train[:10], y_train[:10], batch_size=10)
                # grads3 = gradients_fetcher(x_train[:10], y_train[:10], batch_size=10)
                # grads2 = gradients_fetcher(x_train[:10], y_train[:10], batch_size=1)
                # grads4 = gradients_fetcher(x_train[:10], y_train[:10], batch_size=1)

                # # print(np.std(grads2))
                # print(np.sum(np.sqrt((grads1 - np.mean(grads2, axis=0))**2)))
                #
                # print(np.sum((grads1 - np.mean(grads2[:100], axis=0)) ** 2))
                #
                # print(np.sum((grads1 - np.mean(grads2[-100:], axis=0)) ** 2))
                # print(np.sum(np.abs(grads1 - np.mean(grads2, axis=0))))

                # exit()

            model.save(
                os.path.join(results_path_, "model_trained{0}.h5".format(rpt)))
            with open(
                    os.path.join(results_path_,
                                 "trainHistoryDict{0}".format(rpt)),
                    'wb') as file_pi:
                pickle.dump(train_acc.datagen.history, file_pi)
        if comp_grads:
            with open(os.path.join(results_path_, "gradsHistoryDict1"),
                      'wb') as file_pi:
                pickle.dump(grads_history1, file_pi, protocol=4)
            with open(os.path.join(results_path_, "gradsHistoryDict2"),
                      'wb') as file_pi:
                pickle.dump(grads_history2, file_pi, protocol=4)

    return None