Example #1
0
                def val_computation(alpha, beta, kappa):

                    # Compute the matrix and val  n_train time
                    nmi_list, pk_list, pk_rdm_list, wd_list, wd_rdm_list = [], [], [], [], []
                    for _ in range(n_tests):
                        # Compute the membership matrix
                        res_matrix = spatial_clustering(d_ext_mat=d_ext_mat,
                                                        exch_mat=exch_mat,
                                                        w_mat=w_mat,
                                                        n_groups=n_groups,
                                                        alpha=alpha,
                                                        beta=beta,
                                                        kappa=kappa,
                                                        known_labels=known_labels)
                        # Compute the groups
                        alg_group_vec = np.argmax(res_matrix, 1) + 1
                        rstr_alg_group_vec = np.delete(alg_group_vec, indices_for_known_label)
                        # Compute nmi score
                        nmi = normalized_mutual_info_score(rstr_real_group_vec, rstr_alg_group_vec)
                        nmi_list.append(nmi)
                        # Segmentation evaluation
                        pk, wd, pk_rdm, wd_rdm = seg_eval(alg_group_vec, real_group_vec)
                        pk_list.append(pk)
                        pk_rdm_list.append(pk_rdm)
                        wd_list.append(wd)
                        wd_rdm_list.append(wd_rdm)

                    return np.mean(nmi_list), np.mean(pk_list), np.mean(pk_rdm_list), np.mean(wd_list), \
                        np.mean(wd_rdm_list)
Example #2
0
    # Loop on chunk
    algo_group_vec = []
    for chunk_id, token_chunk_list in enumerate(token_list_list):

        # words x documents probabilities
        word_likelihood = (norm_word_array *
                           np.outer(norm_document_array[chunk_id, :],
                                    np.ones(norm_word_array.shape[1]))).T
        word_groups = np.argmax(word_likelihood, 1) + 1

        # Contruct the algo_group_vec
        algo_chunk_group_vec = []
        actual_g = 1
        for w in token_chunk_list:
            if len(np.where(np.array(model_voc) == w)[0]) > 0:
                actual_g = word_groups[np.where(
                    np.array(model_voc) == w)[0][0]]
            algo_chunk_group_vec.append(actual_g)

        algo_group_vec.extend(algo_chunk_group_vec)

    # NMI
    nmi = normalized_mutual_info_score(real_group_vec, algo_group_vec)
    # Segmentation evaluation
    pk, wd, pk_rdm, wd_rdm = seg_eval(algo_group_vec, real_group_vec)

    # Writing results
    with open(results_file_name, "a") as output_file:
        output_file.write(f"{input_text_file},{n_groups},{chunk_size},{nmi},"
                          f"{pk},{pk_rdm},{wd},{wd_rdm}\n")