Ejemplo n.º 1
0
 def calculate_silhoutte(self,location,labels):
     util=Utilty()
     self.result=util.read_file("C:\personal\PhD\Dataset\sonar-data-set\glass-classification\\glass.csv",",")
     '''print('result',result.shape[0])'''
     D = pairwise_distances(self.result, metric='euclidean')
     '''print('result', D)'''
     '''labels=np.ones(result.shape[0])
     labels[0:300]=0'''
     print('silhoutte labels', labels)
     silhoute=silhouette_score(D, labels, "precomputed")
     '''print('silhoutte labels', labels)
     print('silhoutte score',silhoute)'''
     return silhoute
Ejemplo n.º 2
0
    def delete_identical_row(self,iterator):
        util = Utilty()
        '''print('before deletion',iterator)'''
        to_delete=[]
        for i in range(len(iterator)):
           is_identical= util.checkEqual(iterator[i])
           '''print('is_identical',is_identical)'''

           if is_identical==True:
               print('is_identical is true need to delete',i)
               to_delete.append(i)
        iterator = np.delete(iterator, to_delete, axis=0)
        '''print('after deletion',iterator)'''
        return iterator
Ejemplo n.º 3
0
 def calculate_dunn(self,location,labels):
     utility=Utilty()
     D = pairwise_distances(self.result, metric='euclidean')
     '''print("labels bouldin",labels)'''
     dunn_score = davies_bouldin_score(D,labels)
     '''print("labels bouldin", labels)
     print("bouldin score", dunn_score)'''
     return dunn_score
Ejemplo n.º 4
0
 def calculate_silhoutte(self, location, labels):
     util = Utilty()
     #self.result=util.read_file("yahoo_data.csv",",")
     '''print('result',result.shape[0])'''
     D = pairwise_distances(self.result, metric='euclidean')
     '''print('result', D)'''
     '''labels=np.ones(result.shape[0])
     labels[0:300]=0'''
     print('silhoutte labels', labels)
     silhoute = silhouette_score(D, labels, "precomputed")
     '''print('silhoutte labels', labels)
     print('silhoutte score',silhoute)'''
     return silhoute
Ejemplo n.º 5
0
 def calculate_silhoutte(self, location, labels):
     util = Utilty()
     self.result = load_iris()
     self.result = np.asarray(self.result['data'])
     '''print('result',result.shape[0])'''
     D = pairwise_distances(self.result, metric='euclidean')
     '''print('result', D)'''
     '''labels=np.ones(result.shape[0])
     labels[0:300]=0'''
     print('silhoutte labels', labels)
     silhoute = silhouette_score(D, labels, "precomputed")
     '''print('silhoutte labels', labels)
     print('silhoutte score',silhoute)'''
     return silhoute
Ejemplo n.º 6
0
    def calculate_silhoutte(self, location, labels):
        util = Utilty()

        self.result = np.loadtxt(open(
            "C:\personal\PhD\Dataset\Anomaly\Libras\\real_6.csv", "r"),
                                 delimiter=",")
        '''print('result',result.shape[0])'''
        D = pairwise_distances(self.result, metric='euclidean')
        '''print('result', D)'''
        '''labels=np.ones(result.shape[0])
        labels[0:300]=0'''
        print('silhoutte labels', labels)
        silhoute = silhouette_score(D, labels, "precomputed")
        '''print('silhoutte labels', labels)
        print('silhoutte score',silhoute)'''
        return silhoute
Ejemplo n.º 7
0
            if len(tree_dic[key]) > small_cluster_threshold:
                large_clusters[key] = tree_dic[key]

    for key in large_clusters:
        array = large_clusters[key]
        centroid = np.mean(array, axis=0)
        for itr in range(len(large_clusters[key])):
            anomalyscore_largeclusters = np.linalg.norm(array[itr] - centroid)
            anomaly_largeclusters[anomalyscore_largeclusters] = array[itr]

    return anomaly_largeclusters


# In[4]:

util = Utilty()
'''reading data'''
result = util.read_file("processed_dataset_D1.csv", ",")
obj = MSMA(2, len(result), '', 12, result)
print('dataset shape', result.shape)
'''obtaining best population sample'''
final_dict = obj.start_clustering()
print('final dict')
print(final_dict, '\n')

final_cluster = final_dict[max(final_dict)]
'''running recursive algorithm'''
print('running binary tree\n')
count = 0
leaf_nodes = []
tree_dic = {}
Ejemplo n.º 8
0
    def start_clustering(self):
        '''this dict contains the max silhoutte version of each iteration'''
        final_dict={}
        '''created the population randomly'''
        population = self.create_population()
        '''deletion chromosome if all the elements are identical'''
        print('new random created population',population)
        population=self.delete_identical_row(population)
        '''below loop callculate the silhoutte score for each version'''
        silhoutte_dict = self.get_silhoutte_score(population)
        util = Utilty()
        '''sort the population based on their score in a dictionary, where '''
        sorted_dict = util.sortDictionary(silhoutte_dict)
        '''print('random dict',sorted_dict)'''
        for outerstage in range(100):
         counter = 0
         parent_dict = {}
         survivor_dict = {}
         elitismRate = math.floor(len(sorted_dict) / 2)
         if elitismRate%2==1:
             print('elitismRate',elitismRate)
             elitismRate=elitismRate+1

         '''print('sorted dict before outer iteration',len(sorted_dict))'''
         for key in sorted_dict:
            if counter < elitismRate:
                parent_dict[key] = sorted_dict[key]
            else:
                survivor_dict[key] = sorted_dict[key]
            counter = counter + 1
         offspring_dict=self.get_crossover(parent_dict)
         mutant_dict=self.get_mutation(offspring_dict)
         if len(offspring_dict)==0 | len(mutant_dict)==0:
            print('cannot proceed further as the offspring and mutant list is empty')
         else:
            newpopulation=util.merge_dicts(mutant_dict,survivor_dict)
            '''print('merged newpopulation',len(newpopulation))'''
            newpopluation_array = util.convert_dict_to_list(newpopulation,len(newpopulation),self.datapoint_number)
            newpopluation_array=self.delete_identical_row(newpopluation_array)
            '''print('merged newpopulation array', len(newpopulation))
            print('merged newpopulation arrayfull', len(newpopulation))'''
            silhoutte_dict=self.get_silhoutte_score(newpopluation_array)

            sorted_dict = util.sortDictionary(silhoutte_dict)
            print('max val',max(sorted_dict))
            final_dict[max(sorted_dict)]=sorted_dict[max(sorted_dict)]
            for innerstage in range(40):
                parent_dict = {}
                survivor_dict = {}
                counter=0
                elitismRate = math.floor(len(sorted_dict) / 2)
                '''print('inner stage sortedict length',len(sorted_dict))'''
                if elitismRate%2==1:
                    elitismRate=elitismRate+1
                    print('elitismRate',elitismRate)
                for key in sorted_dict:
                    if counter < elitismRate:
                        parent_dict[key] = sorted_dict[key]
                    else:
                        survivor_dict[key] = sorted_dict[key]
                    counter = counter + 1
                offspring_dict = self.get_crossover(parent_dict)
                mutant_dict = self.get_mutation(offspring_dict)
                if len(offspring_dict) == 0 | len(mutant_dict) == 0:
                    print('cannot proceed further as the offspring and mutant list is empty')
                else:
                  newpopulation = util.merge_dicts(mutant_dict, survivor_dict)
                  '''print('inner stage after mergin',len(newpopulation))
                  print('inner stage after mergin full', newpopulation)'''
                  newpopluation_array = util.convert_dict_to_list(newpopulation, len(newpopulation),self.datapoint_number)
                  newpopluation_array = self.delete_identical_row(newpopluation_array)
                  '''print('inner stage after mergin array', len(newpopluation_array))
                  print('inner stage after mergin arrayfull', newpopluation_array)'''
                  dunnscore_dict=self.get_dunn_score(newpopluation_array)
                  '''print(dunnscore_dict)'''
                  sorted_dict = util.sortDictionaryForDavis(dunnscore_dict)
        silhoutte_dict = self.get_silhoutte_score(newpopluation_array)

        sorted_dict = util.sortDictionary(silhoutte_dict)

        return  final_dict
Ejemplo n.º 9
0
 def calculate_silhoutte(self, location, labels, D):
     util = Utilty()
     silhoute = silhouette_score(D, labels, "precomputed")
     return silhoute
Ejemplo n.º 10
0
    def start_clustering(self):
        '''this dict contains the max silhoutte version of each iteration'''
        final_dict = {}
        '''created the population randomly'''
        population = self.create_population()
        '''deletion chromosome if all the elements are identical'''
        # print('new random created population',population)
        population = self.delete_identical_row(population)
        print('population shape', population.shape)
        '''below loop callculate the silhoutte score for each version'''
        D = pairwise_distances(self.result, metric='euclidean')
        silhoutte_dict = self.get_silhoutte_score(population, D)
        util = Utilty()
        # # '''sort the population based on their score in a dictionary, where '''
        sorted_dict = util.sortDictionary(silhoutte_dict)
        for outerstage in range(10):  #10
            counter = 0
            parent_dict = {}
            survivor_dict = {}
            elitismRate = math.floor(len(sorted_dict) / 2)
            if elitismRate % 2 == 1:
                # print('Odd elitismrate',elitismRate)
                elitismRate = elitismRate + 1
            '''print('sorted dict before outer iteration',len(sorted_dict))'''
            for key in sorted_dict:
                if counter < elitismRate:
                    parent_dict[key] = sorted_dict[key]
                else:
                    survivor_dict[key] = sorted_dict[key]
                counter = counter + 1
            offspring_dict = self.get_crossover(parent_dict)
            mutant_dict = self.get_mutation(offspring_dict)
            if len(offspring_dict) == 0 | len(mutant_dict) == 0:
                print(
                    'cannot proceed further as the offspring and mutant list is empty'
                )
            else:
                newpopulation = util.merge_dicts(mutant_dict, survivor_dict)
                '''print('merged newpopulation',len(newpopulation))'''
                newpopluation_array = util.convert_dict_to_list(
                    newpopulation, len(newpopulation), self.datapoint_number)
                newpopluation_array = self.delete_identical_row(
                    newpopluation_array)
                silhoutte_dict = self.get_silhoutte_score(
                    newpopluation_array, D)

                sorted_dict = util.sortDictionary(silhoutte_dict)
                final_dict[max(sorted_dict)] = sorted_dict[max(sorted_dict)]
                for innerstage in range(10):
                    parent_dict = {}
                    survivor_dict = {}
                    counter = 0
                    elitismRate = math.floor(len(sorted_dict) / 2)
                    if elitismRate % 2 == 1:
                        elitismRate = elitismRate + 1
                    for key in sorted_dict:
                        if counter < elitismRate:
                            parent_dict[key] = sorted_dict[key]
                        else:
                            survivor_dict[key] = sorted_dict[key]
                        counter = counter + 1
                    offspring_dict = self.get_crossover(parent_dict)
                    mutant_dict = self.get_mutation(offspring_dict)
                    if len(offspring_dict) == 0 | len(mutant_dict) == 0:
                        print(
                            'cannot proceed further as the offspring and mutant list is empty'
                        )
                    else:
                        newpopulation = util.merge_dicts(
                            mutant_dict, survivor_dict)
                        newpopluation_array = util.convert_dict_to_list(
                            newpopulation, len(newpopulation),
                            self.datapoint_number)
                        newpopluation_array = self.delete_identical_row(
                            newpopluation_array)
                        dunnscore_dict = self.get_dunn_score(
                            newpopluation_array)
                        sorted_dict = util.sortDictionaryForDavis(
                            dunnscore_dict)
        # print('outside both loops newpopulation array')
        # print(newpopluation_array)
        silhoutte_dict = self.get_silhoutte_score(newpopluation_array, D)

        sorted_dict = util.sortDictionary(silhoutte_dict)
        # print('final_dict')
        # print(final_dict)
        return final_dict