def doMutation(self, chromosomeBeforeM, generationAfterM, flagMutation, fitnessList, i): Pm = self.Pm dice = [] length = len(chromosomeBeforeM.genes) chromosome = Chromosome([], length) geneFlag = [] for j in range(length): dice.append(float('%.2f' % random.uniform(0.0, 1.0))) if dice[j] > Pm: chromosome.genes.append(chromosomeBeforeM.genes[j]) geneFlag.append(0) if dice[j] <= Pm: chromosome.genes.append( float('%.2f' % random.uniform(0.0, 1.0))) geneFlag.append(1) check = sum(geneFlag) if check == 0: flagMutation[i] = 0 chromosome.fitness = fitnessList[i] else: flagMutation[i] = 1 #---clustering---- clustering = Clustering(chromosomeBeforeM, self.data, self.kmax) chromosome = clustering.calcChildFit( chromosome) #------------------ generationAfterM.chromosomes.append(chromosome) return generationAfterM
def doCrossover(self, generation, i, index): chromo = generation.chromosomes length = chromo[0].length cut = random.randint(1, length - 1) parent1 = chromo[index[i]] parent2 = chromo[index[i + 1]] genesChild1 = parent1.genes[0:cut] + parent2.genes[cut:length] genesChild2 = parent1.genes[cut:length] + parent2.genes[0:cut] child1 = Chromosome(genesChild1, len(genesChild1)) child2 = Chromosome(genesChild2, len(genesChild2)) # ----clustering---- clustering = Clustering(generation, self.data, self.kmax) child1 = clustering.calcChildFit(child1) child2 = clustering.calcChildFit(child2) # ------------------- listA = [] listA.append(parent1) listA.append(parent2) listA.append(child1) listA.append(child2) # sort parent and child by fitness / dec listA = sorted(listA, reverse=True, key=lambda elem: elem.fitness) generation.chromosomes[index[i]] = listA[0] generation.chromosomes[index[i + 1]] = listA[1] return generation
def createModel(self): result = Model(self.parameters) result.initialize( case_clustering=Clustering(copyFrom=self.case_clustering), event_clustering=Clustering(copyFrom=self.event_clustering), rng=self.rng) return result
def performCrossValidationRun(self, fullTestData, trainIndex, testIndex, parameters): maxNumCases = parameters["max_num_cases_in_training"] cvRunIndex = parameters["cross-validation-run"] nSplits = parameters["cross-validation-splits"] writeLog("Starting cross-validation run %d of %d" % (cvRunIndex, nSplits)) if (maxNumCases != None) and (maxNumCases < len(trainIndex)): writeLog("Filtering out %d cases out of %d" % (maxNumCases, len(trainIndex))) trainIndex = np.random.choice(trainIndex, maxNumCases, replace=False) runEventLog = self.createEmptyCopy() runEventLog.data["cases"] = fullTestData[trainIndex] runEventLog.pTraining = parameters["test_data_percentage"] runEventLog.setTrainingSize(parameters, runEventLog.pTraining) runEventLog.initializationReport() m = ModelCluster(runEventLog.rng) m.initialize( parameters=parameters, case_clustering=Clustering( parameters["case_clustering_method"], parameters, { "num_clusters": parameters["num_case_clusters"], "max_num_clusters": parameters["max_num_case_clusters"], "ignore_values_threshold": parameters["ignore_values_threshold_for_case_attributes"] }), event_clustering=Clustering( parameters["event_clustering_method"], parameters, { "num_clusters": parameters["num_event_clusters"], "max_num_clusters": parameters["max_num_event_clusters"], "ignore_values_threshold": parameters["ignore_values_threshold_for_event_attributes"] }), rng=runEventLog.rng) trainResult = m.train(runEventLog) writeLog("Starting cross-validation test for run %d" % (cvRunIndex)) runEventLog = self.createEmptyCopy() runEventLog.data["cases"] = fullTestData[testIndex] runEventLog.testData = fullTestData[testIndex] runEventLog.trainingData = [] runEventLog.pTraining = 0.0 runEventLog.initializeDerivedData() runEventLog.initializationReport() maxNumTraces = parameters[ "max_num_traces_in_testing"] if "max_num_traces_in_testing" in parameters else None m.test(runEventLog, 1.0, trainResult, maxNumTraces)
def get_clusters(): html = '<html><body><h3>HN Cluster groups</h3>' r = Retriever() allwords, index, doc_to_title = r.retrieve() c = Clustering() root, cluster_doc_map = c.hcluster(allwords, index) relevant_clusters = c.subclusters(root, 0.90) singles = [] for cluster in relevant_clusters: item_c = c.subcluster_items(cluster, cluster_doc_map, doc_to_title) if len(item_c) == 1: singles.append(item_c[0]); continue for item in item_c: html += '<a href="%s">%s</a><br>' % (doc_to_title[cluster_doc_map[item]][1], doc_to_title[cluster_doc_map[item]][0]) html += '<hr><br><br>' html += '<h3>Single clusters</h3>' for item in singles: html += '<a href="%s">%s</a><br>' % (doc_to_title[cluster_doc_map[item]][1], doc_to_title[cluster_doc_map[item]][0]) html += '</body></html>'
def get_clusters(): html = '<html><body><h3>HN Cluster groups</h3>' r = Retriever() allwords, index, doc_to_title = r.retrieve() c = Clustering() root, cluster_doc_map = c.hcluster(allwords, index) relevant_clusters = c.subclusters(root, 0.90) singles = [] for cluster in relevant_clusters: item_c = c.subcluster_items(cluster, cluster_doc_map, doc_to_title) if len(item_c) == 1: singles.append(item_c[0]) continue for item in item_c: html += '<a href="%s">%s</a><br>' % ( doc_to_title[cluster_doc_map[item]][1], doc_to_title[cluster_doc_map[item]][0]) html += '<hr><br><br>' html += '<h3>Single clusters</h3>' for item in singles: html += '<a href="%s">%s</a><br>' % (doc_to_title[ cluster_doc_map[item]][1], doc_to_title[cluster_doc_map[item]][0]) html += '</body></html>'
def fit(self, verbose=0): clustering = Clustering(self.X_train, self.config['clustering'], verbose=verbose) clustering.model_init() clustering.fit() self.model_names = clustering.model_names self.y_preds = clustering.y_preds return self
def cluster(self): self.logger.info('Clustering NPs and relation phrases'); fname1 = self.p.out_path + self.p.file_entClust fname2 = self.p.out_path + self.p.file_relClust if not checkFile(fname1) or not checkFile(fname2): self.sub2embed, self.sub2id = {}, {} # Clustering only subjects for sub_id, eid in enumerate(self.side_info.isSub.keys()): self.sub2id[eid] = sub_id self.sub2embed[sub_id] = self.ent2embed[eid] self.side_info.id2sub = invertDic(self.sub2id) clust = Clustering(self.sub2embed, self.rel2embed, self.side_info, self.p) self.ent_clust = clust.ent_clust self.rel_clust = clust.rel_clust dumpCluster(fname1, self.ent_clust, self.side_info.id2ent) dumpCluster(fname2, self.rel_clust, self.side_info.id2rel) else: self.logger.info('\tLoading cached Clustering') self.ent_clust = loadCluster(fname1, self.side_info.ent2id) self.rel_clust = loadCluster(fname2, self.side_info.rel2id)
# dim or pattern id # 11k 11k 11k chromosome_length = kmax * dim #-------------------------------------------------------# # main # #-------------------------------------------------------# print('Setting Generation Class') initial = Generation(numOfInd, 0) print('Generating random initial chromosomes') initial.randomGenerateChromosomes( chromosome_length) # initial generate chromosome print('Setting Clustering Class') clustering = Clustering(initial, data, kmax) # eval fit of chromosomes # ------------------calc fitness------------------# print('Calculating initial fitness') generation = clustering.calcChromosomesFit() # ------------------------GA----------------------# print('Looping through each generation') while generationCount <= budget: print('Generation ' + str(generationCount) + ':') print('\tSetting up Genetic class') GA = Genetic(numOfInd, Ps, Pm, Pc, budget, data, generationCount, kmax) print('\tExecuting genetic process') generation, generationCount = GA.geneticProcess(generation) iBest = generation.chromosomes[0] clustering.printIBest(iBest)
def get_cluster(): cl = Clustering() cl.cluster_data() return jsonify({'cluster': "clustering done!"})
def post(self, request, format=None): try: param = int(request.POST['param']) problem = request.POST['problem'] inputType = request.POST['input_type'] # Get selected arch id's selected = request.POST['selected'] selected = selected[1:-1] selected_arch_ids = selected.split(',') # Convert strings to ints behavioral = [] for s in selected_arch_ids: behavioral.append(int(s)) # Get non-selected arch id's non_selected = request.POST['non_selected'] non_selected = non_selected[1:-1] non_selected_arch_ids = non_selected.split(',') # Convert strings to ints non_behavioral = [] for s in non_selected_arch_ids: non_behavioral.append(int(s)) # Load architecture data from the session info architectures = request.session['data'] data = [] for arch in architectures: if arch['id'] in behavioral: data.append(arch['outputs']) else: pass id_list = behavioral # dir_path = os.path.dirname(os.path.realpath(__file__)) # with open(os.path.join(dir_path,"data.csv"), "w") as file: # for i, row in enumerate(data): # out = [] # out.append(str(id_list[i])) # for val in row: # out.append(str(val)) # out = ",".join(out) # file.write(out + "\n") from cluster import Clustering clustering = Clustering(data) labels = clustering.kMeans(param) out = {"id": id_list, "labels": labels} return Response(out) except Exception as detail: logger.exception('Exception in clustering: ' + str(detail)) return Response('')
def run(parameters): rng = np.random.RandomState(random_seed) writeLog("Running test using parameters: " + json.dumps(parameters)) inputJson = None if (opts.input_data_from_standard_input): writeLog("Reading from standard input") inputJson = sys.stdin.readline() writeLog("Standard input reading finished") if (parameters["write_input_to_file"]): filename = get_filename( "testdata_", "%s_%s_%s" % (parameters["file_handle"], "", ""), "json") with open(filename, "w") as f: f.write(inputJson) if (parameters["model_filename"] != None): m = ModelCluster(rng) m.load(parameters["model_filename"], parameters) inputFilename = None if parameters[ "test_filename"] == None else parameters["test_filename"] if (inputFilename != None): writeLog("Reading test data from file: " + inputFilename) el = EventLog(parameters, rng, inputFilename, modelCluster=m, inputJson=inputJson) jsonResult = "{}" if (len(el.testData) > 0): writeLog("Test set contains %d cases." % (len(el.testData))) result = m.test(el) jsonResult = json.dumps(result) filename = get_filename( "predict_result", "%s_%s_%s" % (parameters["file_handle"], m.case_name, m.eventlog.filename), "json") with open(filename, "w") as f: f.write(jsonResult) writeLog("Generated results saved into file: %s" % filename) else: writeLog("Test set is empty. No results created.") print(jsonResult) elif ((parameters["input_filename"] != None) or (inputJson != None)): if parameters["cross-validation-splits"] != None: EventLog.performCrossValidatedTests(parameters, inputJson, rng) return e = EventLog(parameters, rng, parameters["input_filename"], parameters["test_data_percentage"], inputJson=inputJson) m = ModelCluster(rng) m.initialize( parameters=parameters, case_clustering=Clustering( parameters["case_clustering_method"], parameters, { "num_clusters": parameters["num_case_clusters"], "max_num_clusters": parameters["max_num_case_clusters"], "ignore_values_threshold": parameters["ignore_values_threshold_for_case_attributes"] }), event_clustering=Clustering( parameters["event_clustering_method"], parameters, { "num_clusters": parameters["num_event_clusters"], "max_num_clusters": parameters["max_num_event_clusters"], "ignore_values_threshold": parameters["ignore_values_threshold_for_event_attributes"] }), rng=rng) trainResult = m.train(e) filename = m.save(parameters["file_handle"], parameters) writeLog("Generated model saved into file: %s" % filename) print(filename) if (parameters["test_filename"] != None): m = ModelCluster(rng) m.load(filename, parameters) el = EventLog(parameters, rng, parameters["test_filename"], modelCluster=m) result = m.test(el, 1.0, trainResult) jsonResult = json.dumps(result) filename = get_filename( "predict_result", "%s_%s_%s" % (parameters["file_handle"], m.case_name, m.eventlog.filename), "json") with open(filename, "w") as f: f.write(jsonResult) writeLog("Generated results saved into file: %s" % filename) print(jsonResult)