Ejemplo n.º 1
0
    def doMutation(self, chromosomeBeforeM, generationAfterM, flagMutation, fitnessList, i):
        Pm = self.Pm
        dice = []
        length = len(chromosomeBeforeM.genes)
        chromosome = Chromosome([], length)
        geneFlag = []

        for j in range(length):
            dice.append(float('%.2f' % random.uniform(0.0, 1.0)))
            if dice[j] > Pm:
                chromosome.genes.append(chromosomeBeforeM.genes[j])
                geneFlag.append(0)

            if dice[j] <= Pm:
                chromosome.genes.append(
                    float('%.2f' % random.uniform(0.0, 1.0)))
                geneFlag.append(1)

        check = sum(geneFlag)

        if check == 0:
            flagMutation[i] = 0
            chromosome.fitness = fitnessList[i]
        else:
            flagMutation[i] = 1

            #---clustering----
            clustering = Clustering(chromosomeBeforeM, self.data, self.kmax)
            chromosome = clustering.calcChildFit(
                chromosome)
            #------------------

        generationAfterM.chromosomes.append(chromosome)
        return generationAfterM
Ejemplo n.º 2
0
    def doCrossover(self, generation, i, index):

        chromo = generation.chromosomes
        length = chromo[0].length
        cut = random.randint(1, length - 1)
        parent1 = chromo[index[i]]
        parent2 = chromo[index[i + 1]]
        genesChild1 = parent1.genes[0:cut] + parent2.genes[cut:length]
        genesChild2 = parent1.genes[cut:length] + parent2.genes[0:cut]
        child1 = Chromosome(genesChild1, len(genesChild1))
        child2 = Chromosome(genesChild2, len(genesChild2))

        # ----clustering----
        clustering = Clustering(generation, self.data, self.kmax)
        child1 = clustering.calcChildFit(child1)
        child2 = clustering.calcChildFit(child2)
        # -------------------

        listA = []
        listA.append(parent1)
        listA.append(parent2)
        listA.append(child1)
        listA.append(child2)
        # sort parent and child by fitness / dec
        listA = sorted(listA, reverse=True,
                       key=lambda elem: elem.fitness)

        generation.chromosomes[index[i]] = listA[0]
        generation.chromosomes[index[i + 1]] = listA[1]

        return generation
Ejemplo n.º 3
0
 def createModel(self):
     result = Model(self.parameters)
     result.initialize(
         case_clustering=Clustering(copyFrom=self.case_clustering),
         event_clustering=Clustering(copyFrom=self.event_clustering),
         rng=self.rng)
     return result
Ejemplo n.º 4
0
    def performCrossValidationRun(self, fullTestData, trainIndex, testIndex,
                                  parameters):
        maxNumCases = parameters["max_num_cases_in_training"]
        cvRunIndex = parameters["cross-validation-run"]
        nSplits = parameters["cross-validation-splits"]

        writeLog("Starting cross-validation run %d of %d" %
                 (cvRunIndex, nSplits))

        if (maxNumCases != None) and (maxNumCases < len(trainIndex)):
            writeLog("Filtering out %d cases out of %d" %
                     (maxNumCases, len(trainIndex)))
            trainIndex = np.random.choice(trainIndex,
                                          maxNumCases,
                                          replace=False)

        runEventLog = self.createEmptyCopy()

        runEventLog.data["cases"] = fullTestData[trainIndex]
        runEventLog.pTraining = parameters["test_data_percentage"]
        runEventLog.setTrainingSize(parameters, runEventLog.pTraining)
        runEventLog.initializationReport()

        m = ModelCluster(runEventLog.rng)
        m.initialize(
            parameters=parameters,
            case_clustering=Clustering(
                parameters["case_clustering_method"], parameters, {
                    "num_clusters":
                    parameters["num_case_clusters"],
                    "max_num_clusters":
                    parameters["max_num_case_clusters"],
                    "ignore_values_threshold":
                    parameters["ignore_values_threshold_for_case_attributes"]
                }),
            event_clustering=Clustering(
                parameters["event_clustering_method"], parameters, {
                    "num_clusters":
                    parameters["num_event_clusters"],
                    "max_num_clusters":
                    parameters["max_num_event_clusters"],
                    "ignore_values_threshold":
                    parameters["ignore_values_threshold_for_event_attributes"]
                }),
            rng=runEventLog.rng)
        trainResult = m.train(runEventLog)

        writeLog("Starting cross-validation test for run %d" % (cvRunIndex))

        runEventLog = self.createEmptyCopy()
        runEventLog.data["cases"] = fullTestData[testIndex]
        runEventLog.testData = fullTestData[testIndex]
        runEventLog.trainingData = []
        runEventLog.pTraining = 0.0
        runEventLog.initializeDerivedData()
        runEventLog.initializationReport()
        maxNumTraces = parameters[
            "max_num_traces_in_testing"] if "max_num_traces_in_testing" in parameters else None
        m.test(runEventLog, 1.0, trainResult, maxNumTraces)
Ejemplo n.º 5
0
def get_clusters():
  html = '<html><body><h3>HN Cluster groups</h3>'
  r = Retriever()
  allwords, index, doc_to_title = r.retrieve()
  c = Clustering()
  root, cluster_doc_map = c.hcluster(allwords, index)
  relevant_clusters = c.subclusters(root, 0.90)
  singles = []
  for cluster in relevant_clusters:
    item_c = c.subcluster_items(cluster, cluster_doc_map, doc_to_title)
    if len(item_c) == 1: singles.append(item_c[0]); continue
    for item in item_c:
      html += '<a href="%s">%s</a><br>' % (doc_to_title[cluster_doc_map[item]][1],
                                            doc_to_title[cluster_doc_map[item]][0])
    html += '<hr><br><br>'
  html += '<h3>Single clusters</h3>'
  for item in singles:
    html += '<a href="%s">%s</a><br>' % (doc_to_title[cluster_doc_map[item]][1],
                                          doc_to_title[cluster_doc_map[item]][0])
  html += '</body></html>'
Ejemplo n.º 6
0
def get_clusters():
    html = '<html><body><h3>HN Cluster groups</h3>'
    r = Retriever()
    allwords, index, doc_to_title = r.retrieve()
    c = Clustering()
    root, cluster_doc_map = c.hcluster(allwords, index)
    relevant_clusters = c.subclusters(root, 0.90)
    singles = []
    for cluster in relevant_clusters:
        item_c = c.subcluster_items(cluster, cluster_doc_map, doc_to_title)
        if len(item_c) == 1:
            singles.append(item_c[0])
            continue
        for item in item_c:
            html += '<a href="%s">%s</a><br>' % (
                doc_to_title[cluster_doc_map[item]][1],
                doc_to_title[cluster_doc_map[item]][0])
        html += '<hr><br><br>'
    html += '<h3>Single clusters</h3>'
    for item in singles:
        html += '<a href="%s">%s</a><br>' % (doc_to_title[
            cluster_doc_map[item]][1], doc_to_title[cluster_doc_map[item]][0])
    html += '</body></html>'
Ejemplo n.º 7
0
 def fit(self, verbose=0):
     clustering = Clustering(self.X_train,
                             self.config['clustering'],
                             verbose=verbose)
     clustering.model_init()
     clustering.fit()
     self.model_names = clustering.model_names
     self.y_preds = clustering.y_preds
     return self
Ejemplo n.º 8
0
	def cluster(self):
		self.logger.info('Clustering NPs and relation phrases');

		fname1 = self.p.out_path + self.p.file_entClust
		fname2 = self.p.out_path + self.p.file_relClust

		if not checkFile(fname1) or not checkFile(fname2):
			
			self.sub2embed, self.sub2id = {}, {}		# Clustering only subjects
			for sub_id, eid in enumerate(self.side_info.isSub.keys()):
				self.sub2id[eid]    	= sub_id
				self.sub2embed[sub_id] = self.ent2embed[eid]
			self.side_info.id2sub = invertDic(self.sub2id)

			clust = Clustering(self.sub2embed, self.rel2embed, self.side_info, self.p)
			self.ent_clust = clust.ent_clust
			self.rel_clust = clust.rel_clust

			dumpCluster(fname1, self.ent_clust, self.side_info.id2ent)
			dumpCluster(fname2, self.rel_clust, self.side_info.id2rel)
		else:
			self.logger.info('\tLoading cached Clustering')
			self.ent_clust = loadCluster(fname1, self.side_info.ent2id)
			self.rel_clust = loadCluster(fname2, self.side_info.rel2id)
Ejemplo n.º 9
0
    # dim or pattern id
    # 11k 11k 11k
    chromosome_length = kmax * dim

    #-------------------------------------------------------#
    # 							main 						#
    #-------------------------------------------------------#
    print('Setting Generation Class')
    initial = Generation(numOfInd, 0)
    print('Generating random initial chromosomes')
    initial.randomGenerateChromosomes(
        chromosome_length)  # initial generate chromosome

    print('Setting Clustering Class')
    clustering = Clustering(initial, data, kmax)  # eval fit of chromosomes

    # ------------------calc fitness------------------#
    print('Calculating initial fitness')
    generation = clustering.calcChromosomesFit()

    # ------------------------GA----------------------#
    print('Looping through each generation')
    while generationCount <= budget:
        print('Generation ' + str(generationCount) + ':')
        print('\tSetting up Genetic class')
        GA = Genetic(numOfInd, Ps, Pm, Pc, budget, data, generationCount, kmax)
        print('\tExecuting genetic process')
        generation, generationCount = GA.geneticProcess(generation)
        iBest = generation.chromosomes[0]
        clustering.printIBest(iBest)
Ejemplo n.º 10
0
def get_cluster():
    cl = Clustering()
    cl.cluster_data()
    return jsonify({'cluster': "clustering done!"})
Ejemplo n.º 11
0
    def post(self, request, format=None):

        try:

            param = int(request.POST['param'])

            problem = request.POST['problem']
            inputType = request.POST['input_type']

            # Get selected arch id's
            selected = request.POST['selected']
            selected = selected[1:-1]
            selected_arch_ids = selected.split(',')
            # Convert strings to ints
            behavioral = []
            for s in selected_arch_ids:
                behavioral.append(int(s))

            # Get non-selected arch id's
            non_selected = request.POST['non_selected']
            non_selected = non_selected[1:-1]
            non_selected_arch_ids = non_selected.split(',')
            # Convert strings to ints
            non_behavioral = []
            for s in non_selected_arch_ids:
                non_behavioral.append(int(s))

            # Load architecture data from the session info
            architectures = request.session['data']

            data = []
            for arch in architectures:
                if arch['id'] in behavioral:
                    data.append(arch['outputs'])
                else:
                    pass

            id_list = behavioral

            # dir_path = os.path.dirname(os.path.realpath(__file__))
            # with open(os.path.join(dir_path,"data.csv"), "w") as file:

            #     for i, row in enumerate(data):
            #         out = []
            #         out.append(str(id_list[i]))

            #         for val in row:
            #             out.append(str(val))
            #         out = ",".join(out)
            #         file.write(out + "\n")

            from cluster import Clustering

            clustering = Clustering(data)

            labels = clustering.kMeans(param)

            out = {"id": id_list, "labels": labels}

            return Response(out)

        except Exception as detail:
            logger.exception('Exception in clustering: ' + str(detail))
            return Response('')
Ejemplo n.º 12
0
def run(parameters):
    rng = np.random.RandomState(random_seed)

    writeLog("Running test using parameters: " + json.dumps(parameters))

    inputJson = None
    if (opts.input_data_from_standard_input):
        writeLog("Reading from standard input")
        inputJson = sys.stdin.readline()
        writeLog("Standard input reading finished")
        if (parameters["write_input_to_file"]):
            filename = get_filename(
                "testdata_", "%s_%s_%s" % (parameters["file_handle"], "", ""),
                "json")
            with open(filename, "w") as f:
                f.write(inputJson)

    if (parameters["model_filename"] != None):
        m = ModelCluster(rng)
        m.load(parameters["model_filename"], parameters)
        inputFilename = None if parameters[
            "test_filename"] == None else parameters["test_filename"]
        if (inputFilename != None):
            writeLog("Reading test data from file: " + inputFilename)
        el = EventLog(parameters,
                      rng,
                      inputFilename,
                      modelCluster=m,
                      inputJson=inputJson)
        jsonResult = "{}"
        if (len(el.testData) > 0):
            writeLog("Test set contains %d cases." % (len(el.testData)))
            result = m.test(el)
            jsonResult = json.dumps(result)
            filename = get_filename(
                "predict_result", "%s_%s_%s" %
                (parameters["file_handle"], m.case_name, m.eventlog.filename),
                "json")
            with open(filename, "w") as f:
                f.write(jsonResult)
            writeLog("Generated results saved into file: %s" % filename)
        else:
            writeLog("Test set is empty. No results created.")
        print(jsonResult)
    elif ((parameters["input_filename"] != None) or (inputJson != None)):
        if parameters["cross-validation-splits"] != None:
            EventLog.performCrossValidatedTests(parameters, inputJson, rng)
            return
        e = EventLog(parameters,
                     rng,
                     parameters["input_filename"],
                     parameters["test_data_percentage"],
                     inputJson=inputJson)
        m = ModelCluster(rng)
        m.initialize(
            parameters=parameters,
            case_clustering=Clustering(
                parameters["case_clustering_method"], parameters, {
                    "num_clusters":
                    parameters["num_case_clusters"],
                    "max_num_clusters":
                    parameters["max_num_case_clusters"],
                    "ignore_values_threshold":
                    parameters["ignore_values_threshold_for_case_attributes"]
                }),
            event_clustering=Clustering(
                parameters["event_clustering_method"], parameters, {
                    "num_clusters":
                    parameters["num_event_clusters"],
                    "max_num_clusters":
                    parameters["max_num_event_clusters"],
                    "ignore_values_threshold":
                    parameters["ignore_values_threshold_for_event_attributes"]
                }),
            rng=rng)
        trainResult = m.train(e)
        filename = m.save(parameters["file_handle"], parameters)
        writeLog("Generated model saved into file: %s" % filename)
        print(filename)

        if (parameters["test_filename"] != None):
            m = ModelCluster(rng)
            m.load(filename, parameters)
            el = EventLog(parameters,
                          rng,
                          parameters["test_filename"],
                          modelCluster=m)
            result = m.test(el, 1.0, trainResult)
            jsonResult = json.dumps(result)
            filename = get_filename(
                "predict_result", "%s_%s_%s" %
                (parameters["file_handle"], m.case_name, m.eventlog.filename),
                "json")
            with open(filename, "w") as f:
                f.write(jsonResult)
            writeLog("Generated results saved into file: %s" % filename)
            print(jsonResult)