def val_computation(alpha, beta, kappa): # Compute the matrix and val n_train time nmi_list, pk_list, pk_rdm_list, wd_list, wd_rdm_list = [], [], [], [], [] for _ in range(n_tests): # Compute the membership matrix res_matrix = spatial_clustering(d_ext_mat=d_ext_mat, exch_mat=exch_mat, w_mat=w_mat, n_groups=n_groups, alpha=alpha, beta=beta, kappa=kappa, known_labels=known_labels) # Compute the groups alg_group_vec = np.argmax(res_matrix, 1) + 1 rstr_alg_group_vec = np.delete(alg_group_vec, indices_for_known_label) # Compute nmi score nmi = normalized_mutual_info_score(rstr_real_group_vec, rstr_alg_group_vec) nmi_list.append(nmi) # Segmentation evaluation pk, wd, pk_rdm, wd_rdm = seg_eval(alg_group_vec, real_group_vec) pk_list.append(pk) pk_rdm_list.append(pk_rdm) wd_list.append(wd) wd_rdm_list.append(wd_rdm) return np.mean(nmi_list), np.mean(pk_list), np.mean(pk_rdm_list), np.mean(wd_list), \ np.mean(wd_rdm_list)
# Loop on chunk algo_group_vec = [] for chunk_id, token_chunk_list in enumerate(token_list_list): # words x documents probabilities word_likelihood = (norm_word_array * np.outer(norm_document_array[chunk_id, :], np.ones(norm_word_array.shape[1]))).T word_groups = np.argmax(word_likelihood, 1) + 1 # Contruct the algo_group_vec algo_chunk_group_vec = [] actual_g = 1 for w in token_chunk_list: if len(np.where(np.array(model_voc) == w)[0]) > 0: actual_g = word_groups[np.where( np.array(model_voc) == w)[0][0]] algo_chunk_group_vec.append(actual_g) algo_group_vec.extend(algo_chunk_group_vec) # NMI nmi = normalized_mutual_info_score(real_group_vec, algo_group_vec) # Segmentation evaluation pk, wd, pk_rdm, wd_rdm = seg_eval(algo_group_vec, real_group_vec) # Writing results with open(results_file_name, "a") as output_file: output_file.write(f"{input_text_file},{n_groups},{chunk_size},{nmi}," f"{pk},{pk_rdm},{wd},{wd_rdm}\n")