Ejemplo n.º 1
0
def dmdbscan_algorithm(data, folder):
    """
        Function to find optimal distance for DBSCAN using DMDBSCAN algorithm.
        param:
            1. data - pandas DataFrame (10000, 82) or (10000, 3), where
                values are mean spendings of customers for every category
            2. folder - string path to save plot
        return:
            Float value of optimal distance
    """
    # Create Nearest Neighbors model to find distance to the
    # first closest neighbor
    nn_model = sklearn.neighbors.NearestNeighbors(n_neighbors=2,
                                                  n_jobs=-1).fit(data)

    # Get and sort distances
    distances, indices = nn_model.kneighbors(data)
    distances = np.sort(distances, axis=0)[:, 1]

    # Find elbow (knee) on distances
    knee_loc = kneed.KneeLocator(distances,
                                 np.arange(len(distances)),
                                 curve="concave",
                                 direction="increasing",
                                 online=False,
                                 interp_method="polynomial")

    # Plot distances and optimal distance
    plotting.line_plotting(
        [np.arange(len(distances)), distances, knee_loc.knee],
        ["Distance", ""], "Optimal distance", folder)

    return knee_loc.knee
Ejemplo n.º 2
0
def silhouette_method(data, folder, max_clusters=102):
    """
        Function to find clusters number for k-means using Silhouette method.
        param:
            1. data - pandas DataFrame (10000, 82) or (10000, 3), where
                values are mean spendings of customers for every category
            2. folder - string path to save plot
            3. max_clusters - int number of maximum clusters (102 as default)
        return:
            clusters_number - int value of optimal clusters number
    """
    silhouette_results = []

    # Do k-means clustering with number of clusters from 2 to
    # max_clusters (102) and compute silhouette scores for every clustering
    for clusters_number in range(2, max_clusters):
        kmeans_model = sklearn.cluster.KMeans(n_clusters=clusters_number,
                                              n_jobs=-1).fit(data)

        silhouette_results.append(
            sklearn.metrics.silhouette_score(data, kmeans_model.labels_))

    silhouette_results = np.array(silhouette_results)

    # Get optimal clusters number as index of max score plus 2
    clusters_number = np.argmax(silhouette_results) + 2

    # Plot scores and optimal number of clusters
    plotting.line_plotting(
        [silhouette_results,
         np.arange(2, max_clusters), clusters_number],
        ["Clusters number", "Score"], "Silhouette score", folder)

    return clusters_number
def distribution_thesis(data):
    """
        Method to plot mean distribution for categories.
        param:
            data - pandas DataFrame of initial data
    """
    for category in data['categories'].unique().tolist():
        # Distributions computing
        mean_distribution = \
            data.loc[data['categories'] == category].\
            drop(columns=['y', 'categories'], axis=1).mean(axis=0)

        # Distribution plotting
        plotting.line_plotting(mean_distribution, ["Brain activity", "Mean"],
                               "Distribution for " + category,
                               "modeling/distributions",
                               font_size=8,
                               distribution=True)

    return
Ejemplo n.º 4
0
def elbow_method(data, folder, max_clusters=102):
    """
        Function to find clusters number for k-means using Elbow method.
        param:
            1. data - pandas DataFrame (10000, 82) or (10000, 3), where
                values are mean spendings of customers for every category
            2. folder - string path to save plot
            3. max_clusters - int number of maximum clusters (102 as default)
        return:
            Int value of optimal clusters number
    """
    elbow_results = []

    # Do k-means clustering with number of clusters from 2 to
    # max_clusters (102) and compute sum of squared distances of samples
    # to their closest cluster centers as scores
    for clusters_number in range(2, max_clusters):
        kmeans_model = sklearn.cluster.KMeans(n_clusters=clusters_number,
                                              n_jobs=-1).fit(data)

        elbow_results.append(kmeans_model.inertia_)

    elbow_results = np.array(elbow_results)

    # Find the elbow (knee) on scores
    knee_loc = kneed.KneeLocator(np.arange(2, max_clusters),
                                 elbow_results,
                                 curve="convex",
                                 direction="decreasing",
                                 online=False,
                                 interp_method="polynomial")

    # Plot scores and optimal number of clusters
    plotting.line_plotting(
        [elbow_results,
         np.arange(2, max_clusters), knee_loc.knee],
        ["Clusters number", "Score"], "Elbow score", folder)

    return knee_loc.knee
Ejemplo n.º 5
0
                 str(startTime.strftime("%Y_%m_%d %H_%M_%S"))).mkdir(
                     parents=True, exist_ok=True)
    with open(
            'simulation_results/' +
            str(startTime.strftime("%Y_%m_%d %H_%M_%S")) +
            '/mot_vel_distribution.txt', 'w') as mot_file:
        for i in range(len(vel_x_atoms_in_mot)):
            mot_file.write(
                str(vel_x_atoms_in_mot[i]) + ";" + str(vel_y_atoms_in_mot[i]) +
                ";" + str(vel_z_atoms_in_mot[i]) + "\n")
    print("Plotting...")
    # deprecated since line_plotting is a better way for plotting line plots
    # eval_plotting(number_of_atoms, v_min, v_max, bin_count, atoms_in_mot, observing_z_pos, observing_magnetic_field,
    #                         excitation_freq_development, excitation_probability_development,
    #                         vel_z_atoms_in_mot, start_z_vel, zeeman_shift, observing_z_velocity, startTime)
    line_plotting(observing_z_position, observing_z_velocity, 'z position',
                  'z velocity', 0.0, 0.55, 0.0, 1000.0, startTime, False)
    # line_plotting(observing_z_position, excitation_freq_development, 'z position', 'excitation_freq_development', 0.0, 0.7, -1E10, 1E10, startTime)
    line_plotting(observing_z_position, excitation_probability_development,
                  'z position', 'excitation probability', 0.0, 0.7, 0.0, 0.55,
                  startTime, False)
    print(laser_detuning)
    print("Average velocity of atoms in trap center: ",
          sum(vel_z_atoms_in_mot) / len(vel_z_atoms_in_mot))
    print(len(start_z_vel_atoms_in_mot), len(vel_z_atoms_in_mot))
    print(min(start_vel_upper_state))

    str_plane_slice_pos = sim_param_data['positions_for_slicing']

    slice_plotting(str_plane_slice_pos, vel_z_plane_slices_upper_gs,
                   vel_z_plane_slices_lower_gs, v_min, v_max, n, bin_count,
                   startTime)
Ejemplo n.º 6
0
                    sim_param_data, runtime, startTime, bin_count, v_max)
    pathlib.Path('simulation_results/' +
                 str(startTime.strftime("%Y_%m_%d %H_%M_%S"))).mkdir(
                     parents=True, exist_ok=True)
    with open(
            'simulation_results/' +
            str(startTime.strftime("%Y_%m_%d %H_%M_%S")) +
            '/mot_vel_distribution.txt', 'w') as mot_file:
        for i in range(len(vel_x_atoms_in_mot)):
            mot_file.write(
                str(vel_x_atoms_in_mot[i]) + ";" + str(vel_y_atoms_in_mot[i]) +
                ";" + str(vel_z_atoms_in_mot[i]) + "\n")

    #plot velocity evolutions of single atoms
    line_plotting(observing_z_position, observing_z_velocity, 'z position',
                  'z velocity', 0.0, target_center_z + 0.005, 0.0, 2000.0,
                  startTime, False)

    #plot dead atoms
    fig, ax = plt.subplots()
    ax.hist(dead_pos, bins=100)
    print("number of dead atoms", len(dead_pos))
    plt.xlim(target_center_z - 0.5, target_center_z + 0.001)
    plt.xlabel("Position in m", fontsize=22)
    plt.ylabel("Number of dead atoms", fontsize=22)
    plt.title("Total number of dead atoms: {} of {}".format(len(dead_pos), n),
              fontsize=22)
    plt.rcParams.update({'font.size': 22})
    plt.xticks(fontsize=22)
    plt.yticks(fontsize=22)
    plt.show()