def dmdbscan_algorithm(data, folder): """ Function to find optimal distance for DBSCAN using DMDBSCAN algorithm. param: 1. data - pandas DataFrame (10000, 82) or (10000, 3), where values are mean spendings of customers for every category 2. folder - string path to save plot return: Float value of optimal distance """ # Create Nearest Neighbors model to find distance to the # first closest neighbor nn_model = sklearn.neighbors.NearestNeighbors(n_neighbors=2, n_jobs=-1).fit(data) # Get and sort distances distances, indices = nn_model.kneighbors(data) distances = np.sort(distances, axis=0)[:, 1] # Find elbow (knee) on distances knee_loc = kneed.KneeLocator(distances, np.arange(len(distances)), curve="concave", direction="increasing", online=False, interp_method="polynomial") # Plot distances and optimal distance plotting.line_plotting( [np.arange(len(distances)), distances, knee_loc.knee], ["Distance", ""], "Optimal distance", folder) return knee_loc.knee
def silhouette_method(data, folder, max_clusters=102): """ Function to find clusters number for k-means using Silhouette method. param: 1. data - pandas DataFrame (10000, 82) or (10000, 3), where values are mean spendings of customers for every category 2. folder - string path to save plot 3. max_clusters - int number of maximum clusters (102 as default) return: clusters_number - int value of optimal clusters number """ silhouette_results = [] # Do k-means clustering with number of clusters from 2 to # max_clusters (102) and compute silhouette scores for every clustering for clusters_number in range(2, max_clusters): kmeans_model = sklearn.cluster.KMeans(n_clusters=clusters_number, n_jobs=-1).fit(data) silhouette_results.append( sklearn.metrics.silhouette_score(data, kmeans_model.labels_)) silhouette_results = np.array(silhouette_results) # Get optimal clusters number as index of max score plus 2 clusters_number = np.argmax(silhouette_results) + 2 # Plot scores and optimal number of clusters plotting.line_plotting( [silhouette_results, np.arange(2, max_clusters), clusters_number], ["Clusters number", "Score"], "Silhouette score", folder) return clusters_number
def distribution_thesis(data): """ Method to plot mean distribution for categories. param: data - pandas DataFrame of initial data """ for category in data['categories'].unique().tolist(): # Distributions computing mean_distribution = \ data.loc[data['categories'] == category].\ drop(columns=['y', 'categories'], axis=1).mean(axis=0) # Distribution plotting plotting.line_plotting(mean_distribution, ["Brain activity", "Mean"], "Distribution for " + category, "modeling/distributions", font_size=8, distribution=True) return
def elbow_method(data, folder, max_clusters=102): """ Function to find clusters number for k-means using Elbow method. param: 1. data - pandas DataFrame (10000, 82) or (10000, 3), where values are mean spendings of customers for every category 2. folder - string path to save plot 3. max_clusters - int number of maximum clusters (102 as default) return: Int value of optimal clusters number """ elbow_results = [] # Do k-means clustering with number of clusters from 2 to # max_clusters (102) and compute sum of squared distances of samples # to their closest cluster centers as scores for clusters_number in range(2, max_clusters): kmeans_model = sklearn.cluster.KMeans(n_clusters=clusters_number, n_jobs=-1).fit(data) elbow_results.append(kmeans_model.inertia_) elbow_results = np.array(elbow_results) # Find the elbow (knee) on scores knee_loc = kneed.KneeLocator(np.arange(2, max_clusters), elbow_results, curve="convex", direction="decreasing", online=False, interp_method="polynomial") # Plot scores and optimal number of clusters plotting.line_plotting( [elbow_results, np.arange(2, max_clusters), knee_loc.knee], ["Clusters number", "Score"], "Elbow score", folder) return knee_loc.knee
str(startTime.strftime("%Y_%m_%d %H_%M_%S"))).mkdir( parents=True, exist_ok=True) with open( 'simulation_results/' + str(startTime.strftime("%Y_%m_%d %H_%M_%S")) + '/mot_vel_distribution.txt', 'w') as mot_file: for i in range(len(vel_x_atoms_in_mot)): mot_file.write( str(vel_x_atoms_in_mot[i]) + ";" + str(vel_y_atoms_in_mot[i]) + ";" + str(vel_z_atoms_in_mot[i]) + "\n") print("Plotting...") # deprecated since line_plotting is a better way for plotting line plots # eval_plotting(number_of_atoms, v_min, v_max, bin_count, atoms_in_mot, observing_z_pos, observing_magnetic_field, # excitation_freq_development, excitation_probability_development, # vel_z_atoms_in_mot, start_z_vel, zeeman_shift, observing_z_velocity, startTime) line_plotting(observing_z_position, observing_z_velocity, 'z position', 'z velocity', 0.0, 0.55, 0.0, 1000.0, startTime, False) # line_plotting(observing_z_position, excitation_freq_development, 'z position', 'excitation_freq_development', 0.0, 0.7, -1E10, 1E10, startTime) line_plotting(observing_z_position, excitation_probability_development, 'z position', 'excitation probability', 0.0, 0.7, 0.0, 0.55, startTime, False) print(laser_detuning) print("Average velocity of atoms in trap center: ", sum(vel_z_atoms_in_mot) / len(vel_z_atoms_in_mot)) print(len(start_z_vel_atoms_in_mot), len(vel_z_atoms_in_mot)) print(min(start_vel_upper_state)) str_plane_slice_pos = sim_param_data['positions_for_slicing'] slice_plotting(str_plane_slice_pos, vel_z_plane_slices_upper_gs, vel_z_plane_slices_lower_gs, v_min, v_max, n, bin_count, startTime)
sim_param_data, runtime, startTime, bin_count, v_max) pathlib.Path('simulation_results/' + str(startTime.strftime("%Y_%m_%d %H_%M_%S"))).mkdir( parents=True, exist_ok=True) with open( 'simulation_results/' + str(startTime.strftime("%Y_%m_%d %H_%M_%S")) + '/mot_vel_distribution.txt', 'w') as mot_file: for i in range(len(vel_x_atoms_in_mot)): mot_file.write( str(vel_x_atoms_in_mot[i]) + ";" + str(vel_y_atoms_in_mot[i]) + ";" + str(vel_z_atoms_in_mot[i]) + "\n") #plot velocity evolutions of single atoms line_plotting(observing_z_position, observing_z_velocity, 'z position', 'z velocity', 0.0, target_center_z + 0.005, 0.0, 2000.0, startTime, False) #plot dead atoms fig, ax = plt.subplots() ax.hist(dead_pos, bins=100) print("number of dead atoms", len(dead_pos)) plt.xlim(target_center_z - 0.5, target_center_z + 0.001) plt.xlabel("Position in m", fontsize=22) plt.ylabel("Number of dead atoms", fontsize=22) plt.title("Total number of dead atoms: {} of {}".format(len(dead_pos), n), fontsize=22) plt.rcParams.update({'font.size': 22}) plt.xticks(fontsize=22) plt.yticks(fontsize=22) plt.show()