コード例 #1
0
def plot_clusters(samples, targets, title, type = "predicted"):
    clustered = [[] for _ in np.unique(targets)]

    for i in range(len(samples)):
        clustered[int(targets[i])].append(samples[i])

    if type == "actual":
        labels = ["silences", "clicks", "breathing"]
        colors = ["red", "green", "blue"]
        cmap = "seismic"
    if type == "predicted":
        labels = ["cluster_{}".format(i) for i in np.unique(targets)]
        colors = []
        cmap = "RdBu"
    
    markers = [".", "*", "+"]

    for i, cluster in enumerate(clustered):
        cluster = np.array(cluster)
        if type == "actual":
            plt.scatter(cluster[:, 0], cluster[:, 1], cmap = cmap, marker = markers[i], label = labels[i])
        elif type == "predicted":
            plt.scatter(cluster[:, 0], cluster[:, 1], label = labels[i], cmap = cmap, marker = '.', linewidths = 0)

    plt.legend()
    plt.title("{} clusters".format(title))

    output_filename = "{}_clusters.png".format("_".join(title.split()))
    output_dir = path_manipulations.create_or_return([data_handling.results_root, "img", "unsupervised"])

    plt.savefig(os.path.join(output_dir, output_filename))

    plt.show()
コード例 #2
0
def cluster_scatter_2D(samples, targets, tsne_inst):
    plt.clf()
    clustered = [[] for _ in np.unique(targets)]

    for i in range(len(samples)):
        clustered[int(targets[i])].append(samples[i])

    labels = ["silences", "clicks", "breathing"]
    markers = [".", "*", "+"]

    for i, cluster in enumerate(clustered):
        cluster = np.array(cluster)
        plt.scatter(cluster[:, 0], cluster[:, 1], cmap = "RdBu", marker = markers[i], label = labels[i])

    tsne_params = tsne_inst.get_params()
    p = tsne_params["perplexity"]
    l_r = tsne_params["learning_rate"]
    ex = tsne_params["early_exaggeration"]
    features = data_handling.get_feature_set_size()

    plt.legend()
    plt.title("tsne_reduced_clusters. {}f, {}p, {}lr, {}ex".format(features, p, l_r, ex))

    output_filename = "clusters_tsne_reduced_{}_{}_{}_{}.png".format(features, p, l_r, ex)
    output_dir = path_manipulations.create_or_return([data_handling.results_root, data_handling.conversation_directory, "img", "tsne_viz"])
    plt.savefig(os.path.join( output_dir, output_filename))
    plt.show()
コード例 #3
0
def plot_clusters(samples, targets, dataset, speakers, visually_best):
    clustered = [[] for _ in np.unique(targets)]

    for i in range(len(samples)):
        clustered[int(targets[i])].append(i)

    cmap = plt.get_cmap("Dark2")

    # Different clustering process, based on number of clusters
    # If number of clusters > 3 assume DBSCAN and deal with method's noise
    if len(clustered) > 3:
        for i in range(len(clustered[:-1])):
            cluster = clustered[i]
            speaker_clusters = _get_speaker_clusters(cluster, speakers,
                                                     samples)

            current_plot = _create_cluster_by_speakers(speaker_clusters, i,
                                                       cmap)

        # Cluster for the DBSCAN noise
        cluster = clustered[-1]
        speaker_clusters = _get_speaker_clusters(cluster, speakers, samples)
        current_plot = _create_noise_cluster(speaker_clusters, current_plot)
    else:
        for i in range(len(clustered)):
            cluster = clustered[i]
            speaker_clusters = _get_speaker_clusters(cluster, speakers,
                                                     samples)

            current_plot = _create_cluster_by_speakers(speaker_clusters, i,
                                                       cmap)

    plt.legend()

    title = "{} - {} clusters".format(
        data_handling.conversation_directory.split("_")[1], dataset)
    plt.title(title)

    if visually_best:
        output_filename = "{}_visually_best_clusters.png".format("_".join(
            dataset.split()))
    else:
        output_filename = "{}_least_error_clusters.png".format("_".join(
            dataset.split()))

    output_dir = path_manipulations.create_or_return([
        data_handling.results_root, data_handling.conversation_directory,
        "img", "unsupervised"
    ])
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, output_filename), dpi=200)

    plt.show()
コード例 #4
0
def set_data_root(from_here):
    if type(from_here) is not list:
        from_here = [from_here]

    global data_root
    data_root = os.path.join("../data", from_here[0])

    global dataset_root
    dataset_root = os.path.join("../data", from_here[0])

    global results_root
    results_root = os.path.join("../results", from_here[0])

    for subpath in from_here[1:]:
        data_root = path_manipulations.create_or_return([data_root, subpath])
コード例 #5
0
def scatter_2D(reduced_samples, tsne_inst):
    plt.figure(1)
    plt.clf()
    plt.scatter(reduced_samples[:, 0], reduced_samples[:, 1], marker = '.', color = "black", linewidths = 0)

    tsne_params = tsne_inst.get_params()
    p = tsne_params["perplexity"]
    l_r = tsne_params["learning_rate"]
    ex = tsne_params["early_exaggeration"]
    features = data_handling.get_feature_set_size()

    # Make this store in different directories for different convos
    plt.title("tsne_reduced_dimensionality. {}f, {}p, {}lr, {}ex".format(features, p, l_r, ex))

    output_filename = "tsne_reduced_{}_{}_{}_{}.png".format(features, p, l_r, ex)
    output_dir = path_manipulations.create_or_return([data_handling.results_root, data_handling.conversation_directory, "img", "tsne_viz"])
    plt.savefig(os.path.join(output_dir,output_filename))
コード例 #6
0
def extract_to_file(names, samples, targets, algo = "DBSCAN"):
    if np.shape(names)[0] != np.shape(samples)[0]:
        print("names shape : {} \t samples shape: {}".format(np.shape(names), np.shape(samples)))
        raise ValueError("length of names not equal to length of samples")

    output_filename = "{}_clustering_out.csv".format(algo)
    output_dir = path_manipulations.create_or_return([data_handling.results_root,"output_files"])

    with open(os.path.join(output_dir, output_filename ), 'w') as f:
        for i, sample in enumerate(samples):
            sample_name = names[i]
            parts = sample_name.split('_')
            start = parts[4]
            end = parts[5][:-4]
            sample_label = targets[i]

            line = "{}, {}, {}, {}, {}\n".format(i, sample_name, start, end, sample_label)
            f.write(line)

    print("Output file created")
コード例 #7
0
def store_log_entry(entry, file="log.json"):
    output_dir = path_manipulations.create_or_return([
        data_handling.results_root, data_handling.conversation_directory,
        "logs"
    ])
    log = os.path.join(output_dir, file)

    if not os.path.isfile(log):
        with open(log, "w") as f:
            log_feed = []
            json_p = json.dumps(log_feed)
            f.write(json_p)

    with open(log, "r") as f:
        log_feed = json.load(f)
        log_feed.append(entry)

    with open(log, "w") as f:
        json_p = json.dumps(log_feed, indent=3)
        f.write(json_p)
コード例 #8
0
def _get_path_to_reductions():
    return path_manipulations.create_or_return([
        data_handling.results_root, data_handling.conversation_directory,
        "tSNE_reduced_files"
    ])