def svm(data):
    #create results file if it doesn't already exist
    results_directory_name = "results"
    result_file = os.path.join(results_directory_name, str(data.currentY), "svm_fr", "result_main_svm_fr.txt")
    # crée le répertoire "results" et le sous-répertoire "svm_fr"
    if not (os.path.exists(results_directory_name)):
        os.mkdir(results_directory_name)
    if not (os.path.exists(os.path.join(results_directory_name, str(data.currentY)))):
        os.mkdir(os.path.join(results_directory_name, str(data.currentY)))
    if not (os.path.exists(os.path.join(results_directory_name, str(data.currentY), 'svm_fr'))):
        os.mkdir(os.path.join(results_directory_name, str(data.currentY), 'svm_fr'))
    ut.log_print(result_file,
                 '<==========  BEGIN @ ' + datetime.datetime.now().strftime("%I:%M%p on %B %d, %Y") +  ' ============>\n')
    best_params = []
    for s in range(len(data.Xsource)):
        print("\n--- Domain:",s,"---\n")
        # création de répertoires results/svm_fr/decision_values/...
        dv_dir = os.path.join(results_directory_name, str(data.currentY), 'svm_fr', 'decision_values', data.domain_names[s])
        if not (os.path.exists(results_directory_name)):
            os.mkdir(results_directory_name)
        if not (os.path.exists(os.path.join(results_directory_name, str(data.currentY)))):
            os.mkdir(os.path.join(results_directory_name, str(data.currentY)))
        if not (os.path.exists(os.path.join(results_directory_name, str(data.currentY), 'svm_fr'))):
            os.mkdir(os.path.join(results_directory_name, str(data.currentY), 'svm_fr'))
        if not (os.path.exists(os.path.join(results_directory_name, str(data.currentY), 'svm_fr', 'decision_values'))):
            os.mkdir(os.path.join(results_directory_name, str(data.currentY), 'svm_fr', 'decision_values'))
        if not (os.path.exists(dv_dir)):
            os.mkdir(dv_dir)
        #for the s-th source domain...
        Xsource = data.Xsource[s]
        ysource = data.ysource[s]
        #glue together the labelled data from the source and target domains
        Xsparse = sp.sparse.vstack([data.Xtarget, csc_matrix(Xsource)])
        Xdata = Xsparse.todense()
        #get the associated labels for all training points
        y = np.concatenate((data.ytarget, ysource))
        src_index = [i + data.Xtarget.shape[0] for i in range(Xsource.shape[0])]
        tar_index = [i for i in range(data.Xtarget.shape[0])]

        for r in range(data.nRound):
            tar_train_index = data.tar_train_index[r]
            tar_test_index = data.tar_test_index[r]
            train_index = np.concatenate((src_index, tar_train_index))
            dv_file = os.path.join(dv_dir, "dv_round=" + str(r) + ".mat")
            if os.path.exists(dv_file):
                decision_values = io.loadmat(dv_file)['decision_values']
            else:
                Ymatrix = np.asarray(np.squeeze(y[train_index]), dtype=float)

                Xmatrix = np.asarray(Xdata[train_index])

                classifier = prepare_svm(Xmatrix, Ymatrix, False)
                classifier_params = classifier.get_params()
                best_params.append(classifier_params)

                decision_values = classifier.decision_function(Xdata[tar_index])
                decision_values = np.array(decision_values)

                training_predictions = classifier.predict(Xmatrix)
                print("Training Accuracy", ut.final_accuracy(training_predictions, y[train_index]))

                predictions = classifier.predict(Xdata[tar_index])
                print("Test Accuracy", ut.final_accuracy(predictions, y[tar_index]))
                io.savemat(dv_file, {'decision_values': decision_values})

                ut.save_mmd_fr(data, best_params, s)
def second_phase(data, results):
    result_directory_name = 'results'
    result_filename = os.path.join(result_directory_name, 'secondPhase',
                                   'results_main_secondPhase.txt')
    if not (os.path.exists(result_directory_name)):
        os.mkdir(result_directory_name)
    if not (os.path.exists(os.path.join(result_directory_name,
                                        'secondPhase'))):
        os.mkdir(os.path.join(result_directory_name, 'secondPhase'))
    Xdata = copy.deepcopy(data.Xtarget)
    K = Xdata.dot(Xdata.T)
    tar_train_index = data.tar_train_index
    tar_test_index = data.tar_test_index
    all_test_dv = []
    mmd_values = []

    for r in range(data.nRound):
        #loading DVs and MMD values
        for s in range(len(data.Xsource)):
            dv_dir = os.path.join(result_directory_name, str(data.currentY),
                                  'svm_fr', 'decision_values',
                                  data.domain_names[s])
            mmd_dir = os.path.join(result_directory_name, str(data.currentY),
                                   'mmd_values_fr', data.domain_names[s])
            dv_file = os.path.join(dv_dir, "dv_round=" + str(r) + ".mat")
            if os.path.exists(dv_file):
                decision_values = io.loadmat(dv_file)['decision_values']
            else:
                print(
                    'You need to run the required baseline algorithms to obtain the decision values required by algorithm'
                )
                return -1
            mmd_file = os.path.join(mmd_dir, 'mmd.mat')
            if os.path.exists(mmd_file):
                mmd_value = (io.loadmat(mmd_file))['mmd_value']
            else:
                print(
                    'please run the proper save_mmd first to prepare the mmd values required by this algorithm'
                )
                return -1
            mmd_values.extend(mmd_value)
            all_test_dv.extend([decision_values])

        y = copy.deepcopy(data.ytarget)
        y[tar_test_index] = 0

        f_s = np.squeeze(np.array(all_test_dv))
        print("mmds", mmd_values)
        gamma_s = np.zeros((len(mmd_values)), dtype=float)
        mmd_aux = np.reshape(
            np.asarray(mmd_values).flatten(), (len(mmd_values), 1))
        mmd_aux = np.power(mmd_aux, 2).flatten()
        gamma_s = gamma_s + np.exp((-1 * BETA) * mmd_aux)
        gamma_s = gamma_s / np.sum(gamma_s)
        print("gamma", gamma_s)
        theta1 = LAMBDA_L
        theta2 = LAMBDA_D

        dv = train_fast_dam(K, y, f_s, gamma_s, theta1, theta2, np.array([]),
                            np.array([]))

        formatted_results = []
        for item in dv[tar_test_index]:
            formatted_results.extend(item)
        formatted_results = np.asarray(formatted_results).reshape(
            (len(formatted_results), 1))
        results = np.hstack((results, formatted_results))

        print("DV", dv[tar_test_index])
        accuracy = ut.final_accuracy(np.squeeze(dv[tar_test_index]),
                                     np.squeeze(data.ytarget[tar_test_index]))
        print("Accuracy?!", accuracy, "\n")

        return results
def second_phase(data, results):
    result_directory_name = 'results'
    result_filename = os.path.join(result_directory_name, 'secondPhase', 'results_main_secondPhase.txt')
    if not (os.path.exists(result_directory_name)):
        os.mkdir(result_directory_name)
    if not (os.path.exists(os.path.join(result_directory_name, 'secondPhase'))):
        os.mkdir(os.path.join(result_directory_name, 'secondPhase'))
    Xdata = copy.deepcopy(data.Xtarget)
    K = Xdata.dot(Xdata.T)
    tar_train_index = data.tar_train_index
    tar_test_index = data.tar_test_index
    all_test_dv = []
    mmd_values = []

    for r in range(data.nRound):
        #loading DVs and MMD values
        for s in range(len(data.Xsource)):
            dv_dir = os.path.join(result_directory_name, str(data.currentY), 'svm_fr', 'decision_values', data.domain_names[s])
            mmd_dir = os.path.join(result_directory_name, str(data.currentY), 'mmd_values_fr', data.domain_names[s])
            dv_file = os.path.join(dv_dir, "dv_round="+str(r)+".mat")
            if os.path.exists(dv_file):
                decision_values = io.loadmat(dv_file)['decision_values']
            else:
                print('You need to run the required baseline algorithms to obtain the decision values required by algorithm')
                return -1
            mmd_file = os.path.join(mmd_dir, 'mmd.mat')
            if os.path.exists(mmd_file):
                mmd_value = (io.loadmat(mmd_file))['mmd_value']
            else:
                print('please run the proper save_mmd first to prepare the mmd values required by this algorithm')
                return -1
            mmd_values.extend(mmd_value)
            all_test_dv.extend([decision_values])

        y = copy.deepcopy(data.ytarget)
        y[tar_test_index] = 0

        f_s = np.squeeze(np.array(all_test_dv))
        print("mmds", mmd_values)
        gamma_s = np.zeros((len(mmd_values)), dtype=float)
        mmd_aux = np.reshape(np.asarray(mmd_values).flatten(), (len(mmd_values), 1))
        mmd_aux = np.power(mmd_aux, 2).flatten()
        gamma_s = gamma_s + np.exp((-1*BETA)*mmd_aux)
        gamma_s = gamma_s/np.sum(gamma_s)
        print("gamma", gamma_s)
        theta1 = LAMBDA_L
        theta2 = LAMBDA_D

        dv = train_fast_dam(K, y, f_s, gamma_s, theta1, theta2, np.array([]), np.array([]))

        formatted_results = []
        for item in dv[tar_test_index]:
            formatted_results.extend(item)
        formatted_results = np.asarray(formatted_results).reshape((len(formatted_results),1))
        results = np.hstack((results, formatted_results))

        print("DV", dv[tar_test_index])
        accuracy = ut.final_accuracy(np.squeeze(dv[tar_test_index]), np.squeeze(data.ytarget[tar_test_index]))
        print("Accuracy?!", accuracy, "\n")

        return results