Example #1
0
 def __init__(self, training_set, dissimilarity_matrix):
     self.__E = training_set
     self.__D = dissimilarity_matrix
     self.__K = 2
     self.__G = []
     self.__U = []
     self.__n = 0
     self.__m = 2
     self.__J = 0.0
     self.__q = 2
     self.d = Dissimilarity(training_set)
Example #2
0
class SFCMdd(object):
    def __init__(self, training_set, dissimilarity_matrix):
        self.__E = training_set
        self.__D = dissimilarity_matrix
        self.__K = 2
        self.__G = []
        self.__U = []
        self.__n = 0
        self.__m = 2
        self.__J = 0.0
        self.__q = 2
        self.d = Dissimilarity(training_set)

    def pick_prototypes(self):
        all_values = []
        while len(all_values) < (self.__K * self.__q):
            element = self.__E[randint(0,self.__n-1)]
            if element not in all_values:
                all_values.append(element)
        i = 0
        newG = []
        for j in range(self.__K):
            Gi = []
            while len(Gi) < self.__q:
                Gi.append(all_values[i])
                i+=1
            newG.append(Gi)
        return newG

    def membership_degree(self,element):
        ui = []
        t1=0
        t2=0
        for ek in self.__G:
            uik = []
            t1 = sum([self.d.dissimilarity(element,e)+1 for e in ek])
            for eh in self.__G:
                values = [self.d.dissimilarity(element,e)+1 for e in eh]
                t2 = sum(values)
                uik.append((t1/t2)**(1/(self.__m-1)))
            ui.append(sum(uik)**(-1))

        return ui

    def adequacy_criterion(self):
        j_values = []
        for k in range(self.__K):
            n_values = []
            for i in range(self.__n):
                ui = self.__U[i]
                uik = ui[k]
                ei = self.__E[i]
                sum_d = sum([self.d.dissimilarity(ei,e) for e in self.__G[k]])
                n_values.append((uik**(self.__m))*sum_d)
            j_values.append(sum(n_values))

        return sum(j_values)

    def step1(self):
        newG = []
        for k in range(self.__K):
            l = []
            l_values = []
            for eh in self.__E:
                l_value = 0.0
                l_values_of_h = []
                for i in range(self.__n):
                    ei = self.__E[i]
                    ui = self.__U[i]
                    l_values_of_h.append((ui[k]**self.__m)*self.d.dissimilarity(ei,eh))
                l_values.append([sum(l_values_of_h),eh])

            l_values.sort(key=lambda tup: tup[0])
            el = [eh for sum_l,eh in l_values]
            newG.append(el[:self.__K])
        return newG

    def compute(self, K=2, T=150, emax=(10.e-10), m=2, q=2):
        # Initialization
        error = 1.0
        t = 0
        self.__n = len(self.__E)
        self.__K = K
        self.__q = q
        # Randomly select K distinct prototypes Gk
        self.__G = self.pick_prototypes()
        # For each object ei compute its membership degree uik

        self.__U = [self.membership_degree(self.__E[i]) for i in range(self.__n)]
        self.__J = self.adequacy_criterion()

        while error > emax and t < T:
            # Computation of the Best Prototypes
            t = t + 1
            #print("U: "+str(self.__U[:5]))
            #print("G: "+str(self.__G))
            self.__G = self.step1()
            # Definition of the Best Fuzzy Partition
            self.__U = [self.membership_degree(element) for element in self.__E]
            # Stopping Criterion
            J_t = self.adequacy_criterion()
            error = abs(J_t - self.__J)
            self.__J = J_t
            print("Iteration "+str(t)+"...")

        if error < emax:
            print("Stopped with error: "+str(error))
        elif t >= T:
            print("Stopped with "+str(t)+" Iterations")

        return [self.__U,self.__G,self.__J]
Example #3
0
def main():
    st.sidebar.subheader("Input Data")
    input_file_path = st.sidebar.text_input('CSV file path' , 'Input_data.csv')
    st.sidebar.subheader("")

    try:
        # Gets the raw input data from the input csv file path from user
        raw_input_data = pd.read_csv(input_file_path).fillna('')
    except:
        # When the path of the csv file is invalid, system exits and throws an error message
        sys.exit('Invalid path or csv file! Please input valid path or csv file.')

    if (validate_data_for_invalid_chars(raw_input_data)):
        converted_input_data = convert_char_seq_to_numeric_grid(raw_input_data)
        population_size = int(converted_input_data.shape[0] * converted_input_data.shape[1])

        # Streamlit Apps
        #####################################
        # Dissimilarity (Segregation Model) #
        #####################################
        st.title("Dissimilarity : Segregation Model")
        st.sidebar.subheader("Dissimilarity : Segregation Model Inputs")
        input_row = st.sidebar.number_input("Number of Rows per Tract", 1)
        input_col = st.sidebar.number_input("Number of Columns per Tract", 1)
        st.header('Original Data Grid')
        st.dataframe(raw_input_data.values);

        if st.sidebar.button('Calculate Index of Dissimilarity'):
            is_valid_row_col_input = validate_row_column_inputs(raw_input_data, input_row, input_col)
            if is_valid_row_col_input[0]:
                dissimilarity = DissimilaritySegregationModel(raw_input_data)
                total_number_of_tracts = int(population_size/(input_row*input_col))
                partial_indices = []

                data_tracts = dissimilarity.get_splitted_data(input_row, input_col)
                tract_number = 1
                for data_per_tract in data_tracts:
                    partial_index = dissimilarity.calculate_partial_index(data_per_tract)
                    partial_indices.append(partial_index)

                    st.text('Data Grid for Tract ' + str(tract_number) + ' with Partial Index: ' + str(round(partial_index, 2)))
                    st.dataframe(data_per_tract)
                    tract_number += 1

                D = round(0.5*sum(partial_indices), 2)
                st.sidebar.subheader("Index of Dissimilarity: " + str(D))

            else:
                if is_valid_row_col_input[1] == "NOT_MULTIPLE":
                    st.error("The population per tract (No. of Row x No. of Column) is: " + str(input_row*input_col) + ". It should be a multiple of the total population: " + str(population_size))
                else:
                    st.error("Cannot split the data grid with equal number of characterss per tract/splice based on the input row or column.")
                st.error("Please enter valid data.")

        ##################################
        # Schelling's Segregartion Model #
        ##################################
        st.title("Schelling's Segregation Model")
        st.sidebar.subheader("")
        st.sidebar.subheader("Schelling's Segregation Model Inputs")
        similarity_threshold = st.sidebar.slider("Similarity Threshold", 0., 1., .4)
        n_iterations = st.sidebar.number_input("Number of Iterations", 20)

        schelling = SchellingModel(converted_input_data, similarity_threshold, 3)
        mean_similarity_ratio = []
        mean_similarity_ratio.append(schelling.get_average_similarity_ratio())

        # Plot the graphs at initial stage
        plt.style.use("ggplot")
        plt.figure(figsize=(8, 4))

        # Left hand side graph with Schelling simulation plot
        cmap = ListedColormap(['royalblue', 'white', 'red'])
        plt.subplot(121)
        plt.axis('off')
        plt.title("X - Red \nO - Blue", fontsize=10)
        plt.pcolor(schelling.data_grid, cmap=cmap, edgecolors='w', linewidths=1)
        plt.gca().invert_yaxis()

        # Right hand side graph with Mean Similarity Ratio graph
        plt.subplot(122)
        plt.xlabel("Iterations")
        plt.xlim([0, n_iterations])
        plt.ylim([0.4, 1])
        plt.title("Mean Similarity Ratio", fontsize=12)
        plt.text(1, 0.95, "Similarity Ratio: %.4f" % schelling.get_average_similarity_ratio(), fontsize=10)

        data_grid_plot = st.pyplot(plt)
        progress_bar = st.progress(0)

        new_satisfied_data_grid = np.array([])
        if st.sidebar.button('Run Schelling Simulation'):
            current_highest_mean_sim_ratio = schelling.get_average_similarity_ratio();
            for i in range(n_iterations):
                # Starts running the Schelling Model Simulation
                schelling.run_simulation()
                latest_sim_ratio = schelling.get_average_similarity_ratio()
                if current_highest_mean_sim_ratio < latest_sim_ratio:
                    current_highest_mean_sim_ratio = latest_sim_ratio
                    new_satisfied_data_grid = schelling.data_grid
                mean_similarity_ratio.append(schelling.get_average_similarity_ratio())
                plt.figure(figsize=(8, 4))
            
                # Plotting the current Data Grid
                plt.subplot(121)
                plt.axis('off')
                plt.title("X - Red \nO - Blue", fontsize=10)
                plt.pcolor(schelling.data_grid, cmap=cmap, edgecolors='w', linewidths=1)
                plt.gca().invert_yaxis()

                plt.subplot(122)
                plt.xlabel("Iterations")
                plt.xlim([0, n_iterations])
                plt.ylim([0.4, 1])
                plt.title("Mean Similarity Ratio", fontsize=15)
                plt.plot(range(1, len(mean_similarity_ratio)+1), mean_similarity_ratio)
                plt.text(1, 0.95, "Similarity Ratio: %.4f" % schelling.get_average_similarity_ratio(), fontsize=10)

                data_grid_plot.pyplot(plt)
                plt.close("all")
                progress_bar.progress((i+1.)/n_iterations)

        if new_satisfied_data_grid.size != 0:
            # Display the new data grid with satisfied neighboring characters
            new_data_grid_df = convert_numeric_grid_to_char_seq_grid(new_satisfied_data_grid)
            st.header("New Data Grid with Satisfied Neighboring Characters")
            st.dataframe(new_data_grid_df)

            # Save output to Output.csv file
            pd.DataFrame(new_data_grid_df).to_csv('Output_data.csv', index=False)
            st.warning("Output_data.csv file has been created.")

    else:
        st.error('ERROR: Invalid characters in the data. Please check dataset from Input_data.csv and retry.')