def svm(data): #create results file if it doesn't already exist results_directory_name = "results" result_file = os.path.join(results_directory_name, str(data.currentY), "svm_fr", "result_main_svm_fr.txt") # crée le répertoire "results" et le sous-répertoire "svm_fr" if not (os.path.exists(results_directory_name)): os.mkdir(results_directory_name) if not (os.path.exists(os.path.join(results_directory_name, str(data.currentY)))): os.mkdir(os.path.join(results_directory_name, str(data.currentY))) if not (os.path.exists(os.path.join(results_directory_name, str(data.currentY), 'svm_fr'))): os.mkdir(os.path.join(results_directory_name, str(data.currentY), 'svm_fr')) ut.log_print(result_file, '<========== BEGIN @ ' + datetime.datetime.now().strftime("%I:%M%p on %B %d, %Y") + ' ============>\n') best_params = [] for s in range(len(data.Xsource)): print("\n--- Domain:",s,"---\n") # création de répertoires results/svm_fr/decision_values/... dv_dir = os.path.join(results_directory_name, str(data.currentY), 'svm_fr', 'decision_values', data.domain_names[s]) if not (os.path.exists(results_directory_name)): os.mkdir(results_directory_name) if not (os.path.exists(os.path.join(results_directory_name, str(data.currentY)))): os.mkdir(os.path.join(results_directory_name, str(data.currentY))) if not (os.path.exists(os.path.join(results_directory_name, str(data.currentY), 'svm_fr'))): os.mkdir(os.path.join(results_directory_name, str(data.currentY), 'svm_fr')) if not (os.path.exists(os.path.join(results_directory_name, str(data.currentY), 'svm_fr', 'decision_values'))): os.mkdir(os.path.join(results_directory_name, str(data.currentY), 'svm_fr', 'decision_values')) if not (os.path.exists(dv_dir)): os.mkdir(dv_dir) #for the s-th source domain... Xsource = data.Xsource[s] ysource = data.ysource[s] #glue together the labelled data from the source and target domains Xsparse = sp.sparse.vstack([data.Xtarget, csc_matrix(Xsource)]) Xdata = Xsparse.todense() #get the associated labels for all training points y = np.concatenate((data.ytarget, ysource)) src_index = [i + data.Xtarget.shape[0] for i in range(Xsource.shape[0])] tar_index = [i for i in range(data.Xtarget.shape[0])] for r in range(data.nRound): tar_train_index = data.tar_train_index[r] tar_test_index = data.tar_test_index[r] train_index = np.concatenate((src_index, tar_train_index)) dv_file = os.path.join(dv_dir, "dv_round=" + str(r) + ".mat") if os.path.exists(dv_file): decision_values = io.loadmat(dv_file)['decision_values'] else: Ymatrix = np.asarray(np.squeeze(y[train_index]), dtype=float) Xmatrix = np.asarray(Xdata[train_index]) classifier = prepare_svm(Xmatrix, Ymatrix, False) classifier_params = classifier.get_params() best_params.append(classifier_params) decision_values = classifier.decision_function(Xdata[tar_index]) decision_values = np.array(decision_values) training_predictions = classifier.predict(Xmatrix) print("Training Accuracy", ut.final_accuracy(training_predictions, y[train_index])) predictions = classifier.predict(Xdata[tar_index]) print("Test Accuracy", ut.final_accuracy(predictions, y[tar_index])) io.savemat(dv_file, {'decision_values': decision_values}) ut.save_mmd_fr(data, best_params, s)
def second_phase(data, results): result_directory_name = 'results' result_filename = os.path.join(result_directory_name, 'secondPhase', 'results_main_secondPhase.txt') if not (os.path.exists(result_directory_name)): os.mkdir(result_directory_name) if not (os.path.exists(os.path.join(result_directory_name, 'secondPhase'))): os.mkdir(os.path.join(result_directory_name, 'secondPhase')) Xdata = copy.deepcopy(data.Xtarget) K = Xdata.dot(Xdata.T) tar_train_index = data.tar_train_index tar_test_index = data.tar_test_index all_test_dv = [] mmd_values = [] for r in range(data.nRound): #loading DVs and MMD values for s in range(len(data.Xsource)): dv_dir = os.path.join(result_directory_name, str(data.currentY), 'svm_fr', 'decision_values', data.domain_names[s]) mmd_dir = os.path.join(result_directory_name, str(data.currentY), 'mmd_values_fr', data.domain_names[s]) dv_file = os.path.join(dv_dir, "dv_round=" + str(r) + ".mat") if os.path.exists(dv_file): decision_values = io.loadmat(dv_file)['decision_values'] else: print( 'You need to run the required baseline algorithms to obtain the decision values required by algorithm' ) return -1 mmd_file = os.path.join(mmd_dir, 'mmd.mat') if os.path.exists(mmd_file): mmd_value = (io.loadmat(mmd_file))['mmd_value'] else: print( 'please run the proper save_mmd first to prepare the mmd values required by this algorithm' ) return -1 mmd_values.extend(mmd_value) all_test_dv.extend([decision_values]) y = copy.deepcopy(data.ytarget) y[tar_test_index] = 0 f_s = np.squeeze(np.array(all_test_dv)) print("mmds", mmd_values) gamma_s = np.zeros((len(mmd_values)), dtype=float) mmd_aux = np.reshape( np.asarray(mmd_values).flatten(), (len(mmd_values), 1)) mmd_aux = np.power(mmd_aux, 2).flatten() gamma_s = gamma_s + np.exp((-1 * BETA) * mmd_aux) gamma_s = gamma_s / np.sum(gamma_s) print("gamma", gamma_s) theta1 = LAMBDA_L theta2 = LAMBDA_D dv = train_fast_dam(K, y, f_s, gamma_s, theta1, theta2, np.array([]), np.array([])) formatted_results = [] for item in dv[tar_test_index]: formatted_results.extend(item) formatted_results = np.asarray(formatted_results).reshape( (len(formatted_results), 1)) results = np.hstack((results, formatted_results)) print("DV", dv[tar_test_index]) accuracy = ut.final_accuracy(np.squeeze(dv[tar_test_index]), np.squeeze(data.ytarget[tar_test_index])) print("Accuracy?!", accuracy, "\n") return results
def second_phase(data, results): result_directory_name = 'results' result_filename = os.path.join(result_directory_name, 'secondPhase', 'results_main_secondPhase.txt') if not (os.path.exists(result_directory_name)): os.mkdir(result_directory_name) if not (os.path.exists(os.path.join(result_directory_name, 'secondPhase'))): os.mkdir(os.path.join(result_directory_name, 'secondPhase')) Xdata = copy.deepcopy(data.Xtarget) K = Xdata.dot(Xdata.T) tar_train_index = data.tar_train_index tar_test_index = data.tar_test_index all_test_dv = [] mmd_values = [] for r in range(data.nRound): #loading DVs and MMD values for s in range(len(data.Xsource)): dv_dir = os.path.join(result_directory_name, str(data.currentY), 'svm_fr', 'decision_values', data.domain_names[s]) mmd_dir = os.path.join(result_directory_name, str(data.currentY), 'mmd_values_fr', data.domain_names[s]) dv_file = os.path.join(dv_dir, "dv_round="+str(r)+".mat") if os.path.exists(dv_file): decision_values = io.loadmat(dv_file)['decision_values'] else: print('You need to run the required baseline algorithms to obtain the decision values required by algorithm') return -1 mmd_file = os.path.join(mmd_dir, 'mmd.mat') if os.path.exists(mmd_file): mmd_value = (io.loadmat(mmd_file))['mmd_value'] else: print('please run the proper save_mmd first to prepare the mmd values required by this algorithm') return -1 mmd_values.extend(mmd_value) all_test_dv.extend([decision_values]) y = copy.deepcopy(data.ytarget) y[tar_test_index] = 0 f_s = np.squeeze(np.array(all_test_dv)) print("mmds", mmd_values) gamma_s = np.zeros((len(mmd_values)), dtype=float) mmd_aux = np.reshape(np.asarray(mmd_values).flatten(), (len(mmd_values), 1)) mmd_aux = np.power(mmd_aux, 2).flatten() gamma_s = gamma_s + np.exp((-1*BETA)*mmd_aux) gamma_s = gamma_s/np.sum(gamma_s) print("gamma", gamma_s) theta1 = LAMBDA_L theta2 = LAMBDA_D dv = train_fast_dam(K, y, f_s, gamma_s, theta1, theta2, np.array([]), np.array([])) formatted_results = [] for item in dv[tar_test_index]: formatted_results.extend(item) formatted_results = np.asarray(formatted_results).reshape((len(formatted_results),1)) results = np.hstack((results, formatted_results)) print("DV", dv[tar_test_index]) accuracy = ut.final_accuracy(np.squeeze(dv[tar_test_index]), np.squeeze(data.ytarget[tar_test_index])) print("Accuracy?!", accuracy, "\n") return results