def __init__(self, training_set, dissimilarity_matrix): self.__E = training_set self.__D = dissimilarity_matrix self.__K = 2 self.__G = [] self.__U = [] self.__n = 0 self.__m = 2 self.__J = 0.0 self.__q = 2 self.d = Dissimilarity(training_set)
class SFCMdd(object): def __init__(self, training_set, dissimilarity_matrix): self.__E = training_set self.__D = dissimilarity_matrix self.__K = 2 self.__G = [] self.__U = [] self.__n = 0 self.__m = 2 self.__J = 0.0 self.__q = 2 self.d = Dissimilarity(training_set) def pick_prototypes(self): all_values = [] while len(all_values) < (self.__K * self.__q): element = self.__E[randint(0,self.__n-1)] if element not in all_values: all_values.append(element) i = 0 newG = [] for j in range(self.__K): Gi = [] while len(Gi) < self.__q: Gi.append(all_values[i]) i+=1 newG.append(Gi) return newG def membership_degree(self,element): ui = [] t1=0 t2=0 for ek in self.__G: uik = [] t1 = sum([self.d.dissimilarity(element,e)+1 for e in ek]) for eh in self.__G: values = [self.d.dissimilarity(element,e)+1 for e in eh] t2 = sum(values) uik.append((t1/t2)**(1/(self.__m-1))) ui.append(sum(uik)**(-1)) return ui def adequacy_criterion(self): j_values = [] for k in range(self.__K): n_values = [] for i in range(self.__n): ui = self.__U[i] uik = ui[k] ei = self.__E[i] sum_d = sum([self.d.dissimilarity(ei,e) for e in self.__G[k]]) n_values.append((uik**(self.__m))*sum_d) j_values.append(sum(n_values)) return sum(j_values) def step1(self): newG = [] for k in range(self.__K): l = [] l_values = [] for eh in self.__E: l_value = 0.0 l_values_of_h = [] for i in range(self.__n): ei = self.__E[i] ui = self.__U[i] l_values_of_h.append((ui[k]**self.__m)*self.d.dissimilarity(ei,eh)) l_values.append([sum(l_values_of_h),eh]) l_values.sort(key=lambda tup: tup[0]) el = [eh for sum_l,eh in l_values] newG.append(el[:self.__K]) return newG def compute(self, K=2, T=150, emax=(10.e-10), m=2, q=2): # Initialization error = 1.0 t = 0 self.__n = len(self.__E) self.__K = K self.__q = q # Randomly select K distinct prototypes Gk self.__G = self.pick_prototypes() # For each object ei compute its membership degree uik self.__U = [self.membership_degree(self.__E[i]) for i in range(self.__n)] self.__J = self.adequacy_criterion() while error > emax and t < T: # Computation of the Best Prototypes t = t + 1 #print("U: "+str(self.__U[:5])) #print("G: "+str(self.__G)) self.__G = self.step1() # Definition of the Best Fuzzy Partition self.__U = [self.membership_degree(element) for element in self.__E] # Stopping Criterion J_t = self.adequacy_criterion() error = abs(J_t - self.__J) self.__J = J_t print("Iteration "+str(t)+"...") if error < emax: print("Stopped with error: "+str(error)) elif t >= T: print("Stopped with "+str(t)+" Iterations") return [self.__U,self.__G,self.__J]
def main(): st.sidebar.subheader("Input Data") input_file_path = st.sidebar.text_input('CSV file path' , 'Input_data.csv') st.sidebar.subheader("") try: # Gets the raw input data from the input csv file path from user raw_input_data = pd.read_csv(input_file_path).fillna('') except: # When the path of the csv file is invalid, system exits and throws an error message sys.exit('Invalid path or csv file! Please input valid path or csv file.') if (validate_data_for_invalid_chars(raw_input_data)): converted_input_data = convert_char_seq_to_numeric_grid(raw_input_data) population_size = int(converted_input_data.shape[0] * converted_input_data.shape[1]) # Streamlit Apps ##################################### # Dissimilarity (Segregation Model) # ##################################### st.title("Dissimilarity : Segregation Model") st.sidebar.subheader("Dissimilarity : Segregation Model Inputs") input_row = st.sidebar.number_input("Number of Rows per Tract", 1) input_col = st.sidebar.number_input("Number of Columns per Tract", 1) st.header('Original Data Grid') st.dataframe(raw_input_data.values); if st.sidebar.button('Calculate Index of Dissimilarity'): is_valid_row_col_input = validate_row_column_inputs(raw_input_data, input_row, input_col) if is_valid_row_col_input[0]: dissimilarity = DissimilaritySegregationModel(raw_input_data) total_number_of_tracts = int(population_size/(input_row*input_col)) partial_indices = [] data_tracts = dissimilarity.get_splitted_data(input_row, input_col) tract_number = 1 for data_per_tract in data_tracts: partial_index = dissimilarity.calculate_partial_index(data_per_tract) partial_indices.append(partial_index) st.text('Data Grid for Tract ' + str(tract_number) + ' with Partial Index: ' + str(round(partial_index, 2))) st.dataframe(data_per_tract) tract_number += 1 D = round(0.5*sum(partial_indices), 2) st.sidebar.subheader("Index of Dissimilarity: " + str(D)) else: if is_valid_row_col_input[1] == "NOT_MULTIPLE": st.error("The population per tract (No. of Row x No. of Column) is: " + str(input_row*input_col) + ". It should be a multiple of the total population: " + str(population_size)) else: st.error("Cannot split the data grid with equal number of characterss per tract/splice based on the input row or column.") st.error("Please enter valid data.") ################################## # Schelling's Segregartion Model # ################################## st.title("Schelling's Segregation Model") st.sidebar.subheader("") st.sidebar.subheader("Schelling's Segregation Model Inputs") similarity_threshold = st.sidebar.slider("Similarity Threshold", 0., 1., .4) n_iterations = st.sidebar.number_input("Number of Iterations", 20) schelling = SchellingModel(converted_input_data, similarity_threshold, 3) mean_similarity_ratio = [] mean_similarity_ratio.append(schelling.get_average_similarity_ratio()) # Plot the graphs at initial stage plt.style.use("ggplot") plt.figure(figsize=(8, 4)) # Left hand side graph with Schelling simulation plot cmap = ListedColormap(['royalblue', 'white', 'red']) plt.subplot(121) plt.axis('off') plt.title("X - Red \nO - Blue", fontsize=10) plt.pcolor(schelling.data_grid, cmap=cmap, edgecolors='w', linewidths=1) plt.gca().invert_yaxis() # Right hand side graph with Mean Similarity Ratio graph plt.subplot(122) plt.xlabel("Iterations") plt.xlim([0, n_iterations]) plt.ylim([0.4, 1]) plt.title("Mean Similarity Ratio", fontsize=12) plt.text(1, 0.95, "Similarity Ratio: %.4f" % schelling.get_average_similarity_ratio(), fontsize=10) data_grid_plot = st.pyplot(plt) progress_bar = st.progress(0) new_satisfied_data_grid = np.array([]) if st.sidebar.button('Run Schelling Simulation'): current_highest_mean_sim_ratio = schelling.get_average_similarity_ratio(); for i in range(n_iterations): # Starts running the Schelling Model Simulation schelling.run_simulation() latest_sim_ratio = schelling.get_average_similarity_ratio() if current_highest_mean_sim_ratio < latest_sim_ratio: current_highest_mean_sim_ratio = latest_sim_ratio new_satisfied_data_grid = schelling.data_grid mean_similarity_ratio.append(schelling.get_average_similarity_ratio()) plt.figure(figsize=(8, 4)) # Plotting the current Data Grid plt.subplot(121) plt.axis('off') plt.title("X - Red \nO - Blue", fontsize=10) plt.pcolor(schelling.data_grid, cmap=cmap, edgecolors='w', linewidths=1) plt.gca().invert_yaxis() plt.subplot(122) plt.xlabel("Iterations") plt.xlim([0, n_iterations]) plt.ylim([0.4, 1]) plt.title("Mean Similarity Ratio", fontsize=15) plt.plot(range(1, len(mean_similarity_ratio)+1), mean_similarity_ratio) plt.text(1, 0.95, "Similarity Ratio: %.4f" % schelling.get_average_similarity_ratio(), fontsize=10) data_grid_plot.pyplot(plt) plt.close("all") progress_bar.progress((i+1.)/n_iterations) if new_satisfied_data_grid.size != 0: # Display the new data grid with satisfied neighboring characters new_data_grid_df = convert_numeric_grid_to_char_seq_grid(new_satisfied_data_grid) st.header("New Data Grid with Satisfied Neighboring Characters") st.dataframe(new_data_grid_df) # Save output to Output.csv file pd.DataFrame(new_data_grid_df).to_csv('Output_data.csv', index=False) st.warning("Output_data.csv file has been created.") else: st.error('ERROR: Invalid characters in the data. Please check dataset from Input_data.csv and retry.')