Example #1
0
def main():
    # open session
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        model = DANN(sess)
        model.build_model()
        # you can choose source, target or dann.
        print('\nDomain adaptation training')
        source_acc, target_acc, d_acc, dann_emb, _ = model.train_and_evaluate(
            'dann')
        print('Source (MNIST) accuracy:', source_acc)
        print('Target (MNIST-M) accuracy:', target_acc)
        print('Domain accuracy:', d_acc)
Example #2
0
def main():
    data_folder = './data/'  # where the datasets are
    source_name = 'dvd'  # source domain: books, dvd, kitchen, or electronics
    target_name = 'electronics'  # traget domain: books, dvd, kitchen, or electronics
    adversarial = False  # set to False to learn a standard NN

    hidden_layer_size = 50
    lambda_adapt = 0.1 if adversarial else 0.
    learning_rate = 0.001
    maxiter = 200

    print("Loading data...")
    xs, ys, xt, _, xtest, ytest = load_amazon(source_name,
                                              target_name,
                                              data_folder,
                                              verbose=True)

    nb_valid = int(0.1 * len(ys))
    xv, yv = xs[-nb_valid:, :], ys[-nb_valid:]
    xs, ys = xs[0:-nb_valid, :], ys[0:-nb_valid]

    print("Fit...")
    algo = DANN(lambda_adapt=lambda_adapt,
                hidden_layer_size=hidden_layer_size,
                learning_rate=learning_rate,
                maxiter=maxiter,
                epsilon_init=None,
                seed=12342,
                adversarial_representation=adversarial,
                verbose=True)

    for i in range(100000):
        for j in range(100000):
            algo.fit(xs, ys, xt, xv, yv)

    print("Predict...")
    prediction_train = algo.predict(xs)
    prediction_valid = algo.predict(xv)
    prediction_test = algo.predict(xtest)

    print('Training Risk   = %f' % np.mean(prediction_train != ys))
    print('Validation Risk = %f' % np.mean(prediction_valid != yv))
    print('Test Risk       = %f' % np.mean(prediction_test != ytest))

    print('==================================================================')

    print('Computing PAD on DANN representation...')
    pad_dann = compute_proxy_distance(algo.hidden_representation(xs),
                                      algo.hidden_representation(xt),
                                      verbose=True)
    print('PAD on DANN representation = %f' % pad_dann)

    print('==================================================================')

    print('Computing PAD on original data...')
    pad_original = compute_proxy_distance(xs, xt, verbose=True)
    print('PAD on original data = %f' % pad_original)
def main():
    X, y, Xt, yt = make_trans_moons(35, nb=150)
    Xall = np.vstack((X, Xt))

    special = np.array([[-2.4, -1.6], [-1.2, 0.4], [.8, -.5], [2.5, 1.5]])
    special_points = Xall[np.argmin(cdist(special, Xall), axis=1), :]

    # Standard NN
    algo = DANN(hidden_layer_size=15, maxiter=500, lambda_adapt=6., seed=42, adversarial_representation=False)
    algo.fit(X, y, Xt)

    pyplot.subplot(2, 4, 1)
    pyplot.title("NN: Label classification")
    draw_trans_data(X, y, Xt, algo.predict, special_points=special_points,
                    special_xytext=[(40, -15), (-30, -80), (-50, 40), (-70, 0)])

    pyplot.subplot(2, 4, 2)
    pyplot.title("NN: Representation PCA")
    run_pca(X, y, Xt, algo, special_points=special_points, mult=[-1, -1])

    pyplot.subplot(2, 4, 3)
    pyplot.title("NN: Domain classification")
    draw_trans_data(X, y, Xt, algo.predict_domain, colormap_index=1)

    pyplot.subplot(2, 4, 4)
    pyplot.title("NN: Hidden neurons")
    draw_trans_data(X, y, Xt, neurons_to_draw=(algo.W, algo.b))

    # DANN
    algo = DANN(hidden_layer_size=15, maxiter=500, lambda_adapt=6., seed=42)
    algo.fit(X, y, Xt)

    pyplot.subplot(2, 4, 5)
    pyplot.title("DANN: Label classification")
    draw_trans_data(X, y, Xt, algo.predict,  special_points=special_points,
                    special_xytext=[(50,-15), (-20,-90), (-50,40), (-80,0)] )

    pyplot.subplot(2, 4, 6)
    pyplot.title("DANN: Representation PCA")
    run_pca(X, y, Xt, algo, special_points=special_points, mult=[-1,1],
            special_xytext=[(-10,-80), (50,-60), (-40,50), (-20,70)])

    pyplot.subplot(2, 4, 7)
    pyplot.title("DANN: Domain classification")
    draw_trans_data(X, y, Xt, algo.predict_domain, colormap_index=1)

    pyplot.subplot(2, 4, 8)
    pyplot.title("DANN: Hidden neurons")
    draw_trans_data(X, y, Xt, neurons_to_draw=(algo.W, algo.b))

    pyplot.show()
Example #4
0
def use_DANN(key):
    # X_train, Y_train
    X_train, Y_train = [], []
    # get training dataset
    for i in [0, 1]:
        f = open(DS_PATH.format(key, i) + '.txt', 'r')
        sents = [x[:-1] for x in f if x[:-1] != '']
        embs = embedding(sents, DS_EMB_PATH.format(key, i), ARCH, key)
        embs = embs[np.random.choice(len(embs), 110, replace=False), :]
        X_train.append(embs)
        Y_train.extend([i] * embs.shape[0])
        f.close()
    X_train = np.concatenate(X_train, axis=0)
    Y_train = np.array(Y_train)

    # X_valid, Y_valid
    raw_valid, X_valid = list(open(
        TARGET_PATH, 'r')), np.load(TARGET_EMB_PATH + '.' + ARCH + '.npy')
    X_valid_b = X_valid
    if (IS_BALANCED):
        raw_valid, X_valid = balance(key, raw_valid, X_valid)
    Y_valid = np.array([(key in x) for x in raw_valid])

    clf = DANN(input_size=EMB_DIM_TABLE[ARCH],
               maxiter=DANN_MAXITER,
               verbose=False,
               name=key,
               batch_size=DANN_BATCH_SIZE,
               lambda_adapt=DANN_LAMBDA,
               hidden_layer_size=DANN_HIDDEN)

    # How to chose X_adapt? X_valid(after/before balanced),
    acc = clf.fit(X_train,
                  Y_train,
                  X_adapt=X_valid,
                  X_valid=X_valid,
                  Y_valid=Y_valid)
    return acc
Example #5
0
def main():

    # training data
    x_train = [[1, 2], [3, 4], [-1, -2], [-3, -4]]
    y_train = [[0, 1], [0, 1], [1, 0], [1, 0]]

    dann = DANN()
    dann.fit(x_train, y_train)
    dann.evaluate(x_train, y_train)
Example #6
0
def DANNA(key, size=2000):
    X, Y = [], []
    for i in [0, 1]:  # while my training data is from gpt
        f = open(DS_PATH.format(key, i) + '.txt', 'r')
        sents = [x[:-1] for x in f if x[:-1] != '']
        embs = embedding(sents, DS_EMB_PATH.format(key, i), ARCH)
        embs = embs[np.random.
                    choice(len(embs), min(size, len(embs)), replace=False), :]
        X.append(embs)
        Y.extend([i] * embs.shape[0])
    X = np.concatenate(X, axis=0)
    Y = np.array(Y)

    train_embs = np.load(TRAIN_EMB_PATH + '.' + ARCH + '.npy')

    # load validation set (let us load gpt2)
    raw_valid, X_valid = list(open(
        TARGET_PATH, 'r')), np.load(TARGET_EMB_PATH + '.' + ARCH + '.npy')

    # the potato case is somehow necessary, because it is the case where all the answers should be negative
    if (key != 'potato'):
        raw_valid, X_valid = balance(key, raw_valid, X_valid)
    print(len(raw_valid))
    Y_valid = np.array([(key in x) for x in raw_valid])

    # learn a transfer
    clf = DANN(input_size=EMB_DIM_TABLE[ARCH],
               maxiter=4000,
               verbose=VERBOSE,
               name=key,
               batch_size=BATCH_SIZE,
               lambda_adapt=LAMDA,
               hidden_layer_size=HIDDEN)
    acc = clf.fit(X, Y, X_adapt=train_embs, X_valid=X_valid, Y_valid=Y_valid)

    return acc
def main():
    data_folder = './data/'     # where the datasets are
    source_name = 'dvd'         # source domain: books, dvd, kitchen, or electronics
    target_name = 'electronics' # traget domain: books, dvd, kitchen, or electronics
    adversarial = False          # set to False to learn a standard NN

    hidden_layer_size = 50
    lambda_adapt = 0.1 if adversarial else 0.
    learning_rate = 0.001
    maxiter = 200

    print("Loading data...")
    xs, ys, xt, _, xtest, ytest = load_amazon(source_name, target_name, data_folder, verbose=True)

    nb_valid = int(0.1 * len(ys))
    xv, yv = xs[-nb_valid:, :], ys[-nb_valid:]
    xs, ys = xs[0:-nb_valid, :], ys[0:-nb_valid]

    print("Fit...")
    algo = DANN(lambda_adapt=lambda_adapt, hidden_layer_size=hidden_layer_size, learning_rate=learning_rate,
                maxiter=maxiter, epsilon_init=None, seed=12342, adversarial_representation=adversarial, verbose=True)
    algo.fit(xs, ys, xt, xv, yv)

    print("Predict...")
    prediction_train = algo.predict(xs)
    prediction_valid = algo.predict(xv)
    prediction_test = algo.predict(xtest)

    print('Training Risk   = %f' % np.mean(prediction_train != ys))
    print('Validation Risk = %f' % np.mean(prediction_valid != yv))
    print('Test Risk       = %f' % np.mean(prediction_test != ytest))

    print('==================================================================')

    print('Computing PAD on DANN representation...')
    pad_dann = compute_proxy_distance(algo.hidden_representation(xs), algo.hidden_representation(xt), verbose=True)
    print('PAD on DANN representation = %f' % pad_dann)

    print('==================================================================')

    print('Computing PAD on original data...')
    pad_original = compute_proxy_distance(xs, xt, verbose=True)
    print('PAD on original data = %f' % pad_original)
Example #8
0
                             dataset_name,
                             noise=0.5,
                             suffix='t')
xtest, ytest = load_representations(context_folder,
                                    dataset_name,
                                    noise=0.5,
                                    suffix='test')
ys = (ys + 1) / 2
ytest = (ytest + 1) / 2
nb_valid = int(0.1 * len(ys))
xv, yv = xs[-nb_valid:, :], ys[-nb_valid:]
xs, ys = xs[0:-nb_valid, :], ys[0:-nb_valid]

print("Fit...")
algo = DANN(lambda_adapt=lambda_adapt,
            hidden_layer_size=hidden_layer_size,
            learning_rate=learning_rate,
            maxiter=maxiter,
            epsilon_init=None,
            seed=12342)
algo.fit(xs, ys, xt, xv, yv)

print("Predict...")
prediction_train = algo.predict(xs)
prediction_valid = algo.predict(xv)
prediction_test = algo.predict(xtest)

print('Training Risk = %f' % np.mean(prediction_train != ys))
print('Validation Risk = %f' % np.mean(prediction_valid != yv))
print('Test Risk = %f' % np.mean(prediction_test != ytest))
Example #9
0
def train_atk_classifier(key, size=1900):
    pca = None
    X_train, Y_train = [], []

    for i in [0, 1]:
        f = open(PATH.format(key, i), 'r')
        sents = [x[:-1] for x in f if x[:-1] != '']
        embs = embedding(sents, EMB_PATH.format(key, i), ARCH)
        if args.prefix != 'part':
            embs = embs[np.random.choice(len(embs), size, replace=False), :]
        X_train.append(embs)
        Y_train.extend([i] * embs.shape[0])
    X_train = np.concatenate(X_train, axis=0)
    Y_train = np.array(Y_train)
    train_embs = np.load(TRAIN_EMB_PATH)

    # BottleNeck
    # X_train = np.load(TRAIN_EMB_PATH)
    # raw_train = list(open(TRAIN_PATH, 'r'))
    # if IS_BALANCED:
    # raw_train, X_train = balance(key, raw_train, X_train)
    # Y_train = np.array([(key in x) for x in raw_train])

    # load validation set

    raw_valid, X_valid = list(open(TARGET_PATH, 'r')), np.load(TARGET_EMB_PATH)
    if (key != 'potato' and IS_BALANCED):
        raw_valid, X_valid = balance(key, raw_valid, X_valid)
    print(len(raw_valid))
    Y_valid = np.array([(key in x) for x in raw_valid])
    acc = -1
    # learn a transfer

    # clf = linear_model.SGDClassifier(max_iter = 1000,  verbose = 0)
    # clf = SVC(kernel = 'rbf', gamma = 'scale', verbose = False)
    # clf = KNeighborsClassifier(n_neighbors=1, p = 1)
    if (NONLINEAR):
        # clf = DANN(input_size = EMB_DIM, maxiter = 2000, verbose = False, name = key, batch_size = 128)
        clf = DANN(input_size=EMB_DIM,
                   maxiter=4000,
                   verbose=True,
                   name=key,
                   batch_size=64,
                   lambda_adapt=1.0,
                   hidden_layer_size=25)
        acc = clf.fit(X_train,
                      Y_train,
                      X_adapt=train_embs,
                      X_valid=X_valid,
                      Y_valid=Y_valid)
        print("DANN Acc.: {:.4f}".format(acc))
    # train_embs = train_embs[np.random.choice(len(train_embs), 2000), :]

    # # apply pca first
    # if(DO_PCA):
    # train_embs = train_embs[np.random.choice(len(train_embs), size = 6 * int(len(X_train)), replace = False)]
    # package = np.concatenate([X_train, train_embs], axis = 0)
    # pca = PCA(n_components=INPUT_DIM)
    # pca.fit(package)
    # X_train, train_embs = pca.transform(X_train), pca.transform(train_embs)

    # if NONLINEAR:
    # clf = NonLinearClassifier(key, ARCH, cls_num = 2, pca = pca, use_pca = DO_PCA)

    # clf.fit(X_train, Y_train)

    if NONLINEAR:
        clf.to(torch.device('cpu'))
    # on current set
    # correct = 0
    if (VERBOSE):
        print("TRAIN INFERENCE MODEL FROM EXTERNAL SOURCES (# = {})".format(
            len(X_train)))
        correct = np.sum((clf.predict(X_train) == Y_train))
        print("Source Domain Acc.: {:.4f}".format(correct / len(Y_train)))
    return clf, pca, acc
Example #10
0
def ATTACK(key, use_dp=False, defense=None, verbose=VERBOSE, size=2000):
    # (X, Y) is from external corpus.
    # X are sentence embeddings. Y are labels.
    # To prepare an external corpus, we substitute the food keywords in Yelp dataset to body keywords.

    ## GET THE TRAINING DATA, NO NEED TO DEFEND
    X, Y = [], []
    mean_direction = []
    for i in [0, 1]:  # while my training data is from gpt
        f = open(DS_PATH.format(key, i) + '.txt', 'r')
        sents = [x[:-1] for x in f if x[:-1] != '']
        print(DS_EMB_PATH.format(key, i))
        embs = embedding(sents, DS_EMB_PATH.format(key, i), ARCH)
        embs = embs[np.random.
                    choice(len(embs), min(size, len(embs)), replace=False), :]
        mean_direction.append(np.mean(embs, axis=0))
        X.append(embs)
        Y.extend([i] * embs.shape[0])
    X = np.concatenate(X, axis=0)
    Y = np.array(Y)
    trans_D = mean_direction[1] - mean_direction[0]

    # print(trans_D)
    # (Target_sents, Target_X) is from target domain.
    # Target_X are sentence embeddings. Target_sents are original sentences.
    f = open(TRAIN_PATH, 'r')
    Target_sents = [x[:-1] for x in f if x[:-1] != '']
    f.close()

    # trunk DEFEND
    Target_X = embedding(Target_sents, TRAIN_EMB_PATH, ARCH)
    rand_idx = np.random.permutation(Target_X.shape[0])
    shuffled_target_X = Target_X[rand_idx, :]
    if (not NO_BALANCE):
        Target_sents, Target_X = balance(key, Target_sents, Target_X)
    else:
        Target_X = Target_X[rand_idx, :]
        Target_sents = [Target_sents[i] for i in rand_idx]
        Target_X = Target_X[:1000, :]
        Target_sents = Target_sents[:1000]
    # print(Target_sents[0])
    Target_Y = np.array([int(key in x) for x in Target_sents])
    sents = [x.split('\t') for x in Target_sents if x[:-1] != '']
    # print(sents)
    # print(sents[0])
    if (DATASET == 'medical'):
        target_util_y = np.array([int(s[1]) for s in sents])
    else:
        target_util_y = np.array([0 for s in sents])

    # print("Balanced: {}".format(np.mean(Target_Y)))

    # now the target Y here is the sensitive label

    if (use_dp):
        protected_target_X = defense(Target_X, Target_Y)
        # print(Target_X[0, :])
        # print(torch.sum(protected_target_X[0, :]))

    # (X_valid, Y_valid) is from valid set.
    # SVM: This is regarded as shadow corpus of Target domain.
    # DANN or MLP: This is used to early stop.
    # X_valid are sentence embeddings. Y_valid are labels.
    raw_valid, X_valid = list(open(
        TARGET_PATH, 'r')), np.load(TARGET_EMB_PATH + '.' + ARCH + '.npy')
    if (not NO_BALANCE):
        raw_valid, X_valid = balance(key, raw_valid, X_valid)

    Y_valid = np.array([int(key in x) for x in raw_valid])

    # load the utility model

    if (DATASET == 'medical'):
        util_clf = NonLinearClassifier(key,
                                       EMB_DIM_TABLE[ARCH],
                                       HIDDEN_DIM,
                                       cls_num=10)

        # util_clf.load_state_dict(torch.load(UTIL_MODEL_PATH + "medical_functional_{}.cpt".format(ARCH)))
        util_clf.cuda()
        preds = util_clf.predict(Target_X)
        util_acc = np.mean(preds == target_util_y)
    # print("Util Acc. {:.4f}".format(acc))
    if (use_dp):
        protected_target_X = torch.FloatTensor(protected_target_X)
        preds = util_clf.predict(protected_target_X)
        protected_util_acc = np.mean(preds == target_util_y)

    if (VERBOSE):
        print("TRAINING SET SIZE: {}".format(len(Y)))
        print("EMBEDDINGS FROM TARGET DOMAIN: {}".format(len(Target_Y)))
        print("TEST SET SIZE: {}".format(len(Y_valid)))
        # learn a transfer
        print("TESTING MODEL: {}".format(CLS))

    acc, protected_acc = 0.0, 0.0
    util_acc, protected_util_acc = 0.0, 0.0
    if CLS == 'MLP':
        print("Histogram of the Target Y: {}".format(np.histogram(Target_Y)))
        clf = NonLinearClassifier(key, EMB_DIM_TABLE[ARCH], HIDDEN_DIM)
        clf.cuda()
        clf.fit(X, Y)
        # assume the existence of the model

        acc = clf._evaluate(Target_X, Target_Y)

        if (use_dp):
            protected_target_X = torch.FloatTensor(protected_target_X)
            protected_acc = clf._evaluate(protected_target_X, Target_Y)

    elif CLS == 'SVM':
        # for discussion
        REVERSE = False
        # shadow
        clf = SVC(kernel='{}'.format(SVM_KERNEL),
                  gamma='scale',
                  verbose=VERBOSE,
                  max_iter=5000)
        # print(X_valid)
        # print(Y_valid)

        if (REVERSE):
            clf.fit(Target_X, Target_Y)
        else:
            start = time.time()
            clf.fit(X_valid, Y_valid)
            end = time.time()
            print("Time: {}".format(end - start))
        # if(defense):
        # the common approach
        if (REVERSE):
            preds = clf.predict(X_valid)
            acc = np.mean(preds == Y_valid)
        else:
            preds = clf.predict(Target_X)
            acc = np.mean(preds == Target_Y)
        # print(acc)
        if (use_dp):
            preds = clf.predict(protected_target_X)
            protected_acc = np.mean(preds == Target_Y)
    elif CLS == 'DANN':
        # I have no idea whether the 1000 is.
        DANN_CPT_PATHs = DANN_CPT_PATH + "{}_cracker_{}.cpt".format(key, ARCH)
        clf = DANN(input_size=EMB_DIM_TABLE[ARCH],
                   maxiter=MAXITER,
                   verbose=VERBOSE,
                   name=key,
                   batch_size=BATCH_SIZE,
                   lambda_adapt=LAMDA,
                   hidden_layer_size=HIDDEN,
                   cached=DANN_CACHED,
                   cpt_path=DANN_CPT_PATHs)
        # clf.cuda()

        # set the size of the Target_X
        trans_D = 0.5 * trans_D
        concated_Target_X = np.concatenate(
            [Target_X - trans_D, Target_X + trans_D], axis=0)

        clf.fit(X,
                Y,
                X_adapt=concated_Target_X,
                X_valid=Target_X - trans_D,
                Y_valid=Target_Y)

        Target_X = torch.FloatTensor(Target_X - trans_D)
        acc = clf.validate(Target_X, Target_Y)
        # print(acc)
        if (use_dp):
            protected_target_X = torch.FloatTensor(protected_target_X).cuda()
            protected_acc = clf.validate(protected_target_X, Target_Y)
        # print("Target Domain Inference {} Acc: {:.3f}".format(key, acc))
        # return acc
    elif CLS == 'MLP_SHADOW':
        clf = NonLinearClassifier(key, EMB_DIM_TABLE[ARCH], HIDDEN_DIM)
        clf.cuda()
        clf.fit(X_valid, Y_valid)
        acc = clf._evaluate(Target_X, Target_Y)

    else:
        clf = None
        print('wrong cls\' name')
    return acc, protected_acc, util_acc, protected_util_acc
Example #11
0
def main():
    data_folder = './data/'  # where the datasets are
    source_name = 'books'  # source domain: books, dvd, kitchen, or electronics
    target_name = 'data'  # traget domain: books, dvd, kitchen, or electronics
    adversarial = True  # set to False to learn a standard NN
    msda = True

    hidden_layer_size = 50
    lambda_adapt = 0.1 if adversarial else 0.
    learning_rate = 0.001 if not msda else 0.0001
    maxiter = 100
    print("Loading data...")
    xs, ys, xt, _, xtest, ytest = load_amazon(source_name,
                                              target_name,
                                              data_folder,
                                              verbose=True)
    if msda:
        xs_path, xt_path, xtest_path = [
            '%s/%s.%s_%s_msda.npy' % (data_folder, source_name, target_name, E)
            for E in ('source', 'target', 'test')
        ]
        # try:
        #     xs_msda = np.load(xs_path)
        #     xt_msda = np.load(xt_path)
        #     xtest_msda = np.load(xtest_path)
        #     print('mSDA representations loaded from disk')
        # except:
        print('Computing mSDA representations...')
        xs_msda, xt_msda, xtest_msda = compute_msda_representation(
            xs, xt, xtest)
        ds, ns = np.shape(xs_msda)
        print 'shape(xs_msda)'
        print ds, ns
        # print xs_msda
        dt, nt = np.shape(xt_msda)
        print 'shape(xt_msda)'
        print dt, nt
        # print xt_msda
        dxtest, nxtest = np.shape(xtest_msda)
        print 'shape(xtest_msda)'
        print dxtest, nxtest
        # np.save(xs_path, xs_msda)
        # np.save(xt_path, xt_msda)
        # np.save(xtest_path, xtest_msda)

        xs, xt, xtest = xs_msda, xt_msda, xtest_msda

    nb_valid = int(0.1 * len(ys))

    xv, yv = xs[-nb_valid:, :], ys[-nb_valid:]
    xs, ys = xs[0:-nb_valid, :], ys[0:-nb_valid]

    print("Fit...")
    algo = DANN(lambda_adapt=lambda_adapt,
                hidden_layer_size=hidden_layer_size,
                learning_rate=learning_rate,
                maxiter=maxiter,
                epsilon_init=None,
                seed=12342,
                adversarial_representation=adversarial,
                verbose=True)
    algo.fit(xs, ys, xt, xv, yv)

    print("Predict...")
    prediction_train = algo.predict(xs)
    prediction_valid = algo.predict(xv)
    prediction_test = algo.predict(xtest)

    va = count(prediction_valid)
    print '验证集正向的预测标签比率'
    print va[0], "%.2f%%" % ((float(va[0]) / len(prediction_valid)) * 100)
    print '验证集负向的预测标签比率'
    print va[1], "%.2f%%" % ((float(va[1]) / len(prediction_valid)) * 100)
    te = count(prediction_test)
    print '测试集正向的预测标签比率'
    print te[0], "%.2f%%" % ((float(te[0]) / len(prediction_test)) * 100)
    print '测试集负向的预测标签比率'
    print te[1], "%.2f%%" % ((float(te[1]) / len(prediction_test)) * 100)

    print('Training Risk   = %f' % np.mean(prediction_train != ys))
    print('Validation Risk = %f' % np.mean(prediction_valid != yv))
    print('Test Risk       = %f' % np.mean(prediction_test != ytest))

    print '验证集正向和负向的预测准确率'
    print compare(yv, prediction_valid)
    print '测试集正向和负向的预测准确率'
    print compare(ytest, prediction_test)

    print('==================================================================')
def main():
    data_folder = './data/'     # where the datasets are
    source_name = 'dvd'         # source domain: books, dvd, kitchen, or electronics
    target_name = 'electronics' # traget domain: books, dvd, kitchen, or electronics
    adversarial = False          # set to False to learn a standard NN
    msda = True

    hidden_layer_size = 50
    lambda_adapt = 0.1 if adversarial else 0.
    learning_rate = 0.001 if not msda else 0.0001
    maxiter = 200

    print("Loading data...")
    xs, ys, xt, _, xtest, ytest = load_amazon(source_name, target_name, data_folder, verbose=True)

    if msda:
        xs_path, xt_path, xtest_path = ['%s/%s.%s_%s_msda.npy' % (data_folder, source_name, target_name, E)
                                        for E in ('source', 'target', 'test')]
        try:
            xs_msda = np.load(xs_path)
            xt_msda = np.load(xt_path)
            xtest_msda = np.load(xtest_path)
            print('mSDA representations loaded from disk')
        except:
            print('Computing mSDA representations...')
            xs_msda, xt_msda, xtest_msda = compute_msda_representation(xs, xt, xtest)
            np.save(xs_path, xs_msda)
            np.save(xt_path, xt_msda)
            np.save(xtest_path, xtest_msda)

        xs, xt, xtest = xs_msda, xt_msda, xtest_msda

    nb_valid = int(0.1 * len(ys))
    xv, yv = xs[-nb_valid:, :], ys[-nb_valid:]
    xs, ys = xs[0:-nb_valid, :], ys[0:-nb_valid]

    print("Fit...")
    algo = DANN(lambda_adapt=lambda_adapt, hidden_layer_size=hidden_layer_size, learning_rate=learning_rate,
                maxiter=maxiter, epsilon_init=None, seed=12342, adversarial_representation=adversarial, verbose=True)
    algo.fit(xs, ys, xt, xv, yv)

    print("Predict...")
    prediction_train = algo.predict(xs)
    prediction_valid = algo.predict(xv)
    prediction_test = algo.predict(xtest)

    print('Training Risk   = %f' % np.mean(prediction_train != ys))
    print('Validation Risk = %f' % np.mean(prediction_valid != yv))
    print('Test Risk       = %f' % np.mean(prediction_test != ytest))

    print('==================================================================')

    print('Computing PAD on DANN representation...')
    pad_dann = compute_proxy_distance(algo.hidden_representation(xs), algo.hidden_representation(xt), verbose=True)
    print('PAD on DANN representation = %f' % pad_dann)

    print('==================================================================')

    print('Computing PAD on original data...')
    pad_original = compute_proxy_distance(xs, xt, verbose=True)
    print('PAD on original data = %f' % pad_original)