Python Partition.get_membership 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: clustering

클래스/타입: Partition

메소드/함수: get_membership

hotexamples.com에서의 예제들: 4

Python Partition.get_membership - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 clustering.Partition.get_membership에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Partition(17)

get_membership(2)

자주 사용되는 메소드들

Partition (17)

get_membership (2)

예제 #1

파일 보기

파일: algorithms.py 프로젝트: mgperry/treeCl

 def clusters_init(self):
     k = self.nclusters
     assignment = [0] * len(self.scorer.records)
     for i in range(k):
         assignment[np.random.randint(0, len(assignment))] = i + 1
     partition = Partition(assignment)
     clusters = [0] * k
     members = partition.get_membership()[1:]
     self.assign_clusters(clusters, members)
     for (index, record) in enumerate(self.scorer.records):
         scores = [self.ml(record, clusters[n]) for n in range(self.nclusters)]
         # print scores
         if assignment.count(assignment[index]) > 1 or assignment[index] == 0:
             assignment[index] = scores.index(max(scores)) + 1
     self.partition = Partition(assignment)
     self.L = self.scorer.score(self.partition)

예제 #2

파일 보기

파일: algorithms.py 프로젝트: mgperry/treeCl

 def clusters_init(self):
     k = self.nclusters
     assignment = [0] * len(self.scorer.records)
     for i in range(k):
         assignment[np.random.randint(0, len(assignment))] = i + 1
     partition = Partition(assignment)
     clusters = [0] * k
     members = partition.get_membership()[1:]
     self.assign_clusters(clusters, members)
     for (index, record) in enumerate(self.scorer.records):
         scores = [
             self.ml(record, clusters[n]) for n in range(self.nclusters)
         ]
         # print scores
         if assignment.count(
                 assignment[index]) > 1 or assignment[index] == 0:
             assignment[index] = scores.index(max(scores)) + 1
     self.partition = Partition(assignment)
     self.L = self.scorer.score(self.partition)

예제 #3

파일 보기

파일: algorithms.py 프로젝트: mgperry/treeCl

class EMTrees(object):
    def __init__(
        self,
        collection,
        nclusters,
        metric='euc',
        tmpdir=None,
    ):

        if not isinstance(nclusters, int) or nclusters <= 1:
            raise Exception('Need appropriate value for number of clusters.')

        self.nclusters = nclusters
        self.scorer = Scorer(collection.records,
                             collection.analysis)  # Could check for entries
        self.datatype = collection.datatype
        self.metric = metric

        try:
            self.tmpdir
        except:
            self.tmpdir = collection.tmpdir

    def clusters_init(self):
        k = self.nclusters
        assignment = [0] * len(self.scorer.records)
        for i in range(k):
            assignment[np.random.randint(0, len(assignment))] = i + 1
        partition = Partition(assignment)
        clusters = [0] * k
        members = partition.get_membership()[1:]
        self.assign_clusters(clusters, members)
        for (index, record) in enumerate(self.scorer.records):
            scores = [
                self.ml(record, clusters[n]) for n in range(self.nclusters)
            ]
            # print scores
            if assignment.count(
                    assignment[index]) > 1 or assignment[index] == 0:
                assignment[index] = scores.index(max(scores)) + 1
        self.partition = Partition(assignment)
        self.L = self.scorer.score(self.partition)

    def random_partition(self):
        self.partition = Partition(
            tuple(
                np.random.randint(self.nclusters,
                                  size=len(self.scorer.records))))
        self.L = self.scorer.score(self.partition)

    def assign_clusters(self, clusters, members):
        for n in range(self.nclusters):
            if not clusters[n] or clusters[n].members != members[n]:
                clusters[n] = Cluster(members[n], self.scorer.records,
                                      self.scorer.analysis)

        return (clusters)

    def maximise(self, method):
        clusters = [0] * self.nclusters
        alg = getattr(self, method)
        count = 0

        while True:
            self.assign_clusters(clusters, self.partition.get_membership())
            assignment = list(self.partition.partition_vector)

            for (index, record) in enumerate(self.scorer.records):
                scores = [
                    alg(record, clusters[n]) for n in range(self.nclusters)
                ]
                # print scores
                if assignment.count(
                        assignment[index]) > 1 or assignment[index] == 0:
                    assignment[index] = scores.index(max(scores)) + 1

            assignment = Partition(assignment)
            score = self.scorer.score(assignment)

            if score > self.L:
                self.L = score
                self.partition = assignment

            else:
                count += 1
                if count > 1:
                    break  # Algorithm is deterministic so no need for more iterations

    def maximise_random(self, method):
        clusters = [0] * self.nclusters
        alg = getattr(self, method)
        count = 0
        sampled = []

        while True:
            self.assign_clusters(clusters, self.partition.get_membership())
            assignment = list(self.partition.partition_vector)

            index = randint(0, len(self.scorer.records) - 1)

            if index in sampled:
                continue
            else:
                record = self.scorer.records[index]
                sampled.append(index)

            scores = [alg(record, clusters[n]) for n in range(self.nclusters)]

            if assignment.count(
                    assignment[index]) > 1 or assignment[index] == 0:
                assignment[index] = scores.index(max(scores)) + 1

            assignment = Partition(assignment)
            score = self.scorer.score(assignment)

            if score > self.L:
                self.L = score
                self.partition = assignment
                sampled = []
                count = 0
            else:
                count += 1
                if count == len(assignment): break

    def maximise_heuristic(self):
        clusters = [0] * self.nclusters
        sampled = []

        for i in range(1000):
            self.assign_clusters(clusters, self.partition.get_membership())
            assignment = list(self.partition.partition_vector)

            index = randint(0, len(self.scorer.records) - 1)

            record = self.scorer.records[index]
            sampled.append(index)

            lls = [self.ml(record, clusters[n]) for n in range(self.nclusters)]

            a = {'ll': max(lls)}
            a['n'] = lls.index(a['ll'])
            lls.pop(a['n'])

            b = {'ll': max(lls)}
            b['n'] = lls.index(b['ll'])

            a['p'] = np.maths.exp(a['ll'] - logsum(a['ll'], b['ll']))

            if np.random.uniform() > a['p']:
                choice = a['n']
            else:
                choice = b['n']

            if assignment.count(
                    assignment[index]) > 1 or assignment[index] == 0:
                assignment[index] = choice + 1

            assignment = Partition(assignment)

            if i % 10 == 0:
                score = self.scorer.score(assignment)

                if score > self.L:
                    self.max_L = score
                    self.max_partition = assignment

    def dist(self, obj1, obj2):
        distance = DistanceMatrix([obj1.tree, obj2.tree], self.metric)[0][1]
        return (-distance)

    def ml(self, record, cluster, verbose=1):
        p = Phyml(record, tmpdir=self.tmpdir)
        input_tree = os.path.join(self.tmpdir, 'input_tree')
        cluster.tree.write_to_file(input_tree)
        p.add_tempfile(input_tree)
        p.add_flag('--inputtree', input_tree)
        p.add_flag('-o', 'r')  # Optimise only on substitutions`
        p.add_flag('-a', 'e')
        p.add_flag('-b', 0)
        p.add_flag('-c', 4)
        p.add_flag('--quiet', '')

        if self.datatype == 'protein':
            p.add_flag('-d', 'aa')
        elif self.datatype == 'dna':
            p.add_flag('-d', 'nt')

        score = p.run(verbosity=verbose).score
        return (score)

예제 #4

파일 보기

파일: algorithms.py 프로젝트: mgperry/treeCl

class EMTrees(object):
    def __init__(
        self,
        collection,
        nclusters,
        metric='euc',
        tmpdir=None,
    ):

        if not isinstance(nclusters, int) or nclusters <= 1:
            raise Exception('Need appropriate value for number of clusters.')

        self.nclusters = nclusters
        self.scorer = Scorer(collection.records, collection.analysis)  # Could check for entries
        self.datatype = collection.datatype
        self.metric = metric

        try:
            self.tmpdir
        except:
            self.tmpdir = collection.tmpdir

    def clusters_init(self):
        k = self.nclusters
        assignment = [0] * len(self.scorer.records)
        for i in range(k):
            assignment[np.random.randint(0, len(assignment))] = i + 1
        partition = Partition(assignment)
        clusters = [0] * k
        members = partition.get_membership()[1:]
        self.assign_clusters(clusters, members)
        for (index, record) in enumerate(self.scorer.records):
            scores = [self.ml(record, clusters[n]) for n in range(self.nclusters)]
            # print scores
            if assignment.count(assignment[index]) > 1 or assignment[index] == 0:
                assignment[index] = scores.index(max(scores)) + 1
        self.partition = Partition(assignment)
        self.L = self.scorer.score(self.partition)

    def random_partition(self):
        self.partition = Partition(tuple(np.random.randint(self.nclusters,
                                   size=len(self.scorer.records))))
        self.L = self.scorer.score(self.partition)

    def assign_clusters(self, clusters, members):
        for n in range(self.nclusters):
            if not clusters[n] or clusters[n].members != members[n]:
                clusters[n] = Cluster(members[n], self.scorer.records, self.scorer.analysis)

        return(clusters)

    def maximise(self, method):
        clusters = [0] * self.nclusters
        alg = getattr(self, method)
        count = 0

        while True:
            self.assign_clusters(clusters, self.partition.get_membership())
            assignment = list(self.partition.partition_vector)

            for (index, record) in enumerate(self.scorer.records):
                scores = [alg(record, clusters[n]) for n in range(self.nclusters)]
                # print scores
                if assignment.count(assignment[index]) > 1 or assignment[index] == 0:
                    assignment[index] = scores.index(max(scores)) + 1

            assignment = Partition(assignment)
            score = self.scorer.score(assignment)

            if score > self.L:
                self.L = score
                self.partition = assignment

            else:
                count += 1
                if count > 1: break  # Algorithm is deterministic so no need for more iterations

    def maximise_random(self, method):
        clusters = [0] * self.nclusters
        alg = getattr(self, method)
        count = 0
        sampled = []

        while True:
            self.assign_clusters(clusters, self.partition.get_membership())
            assignment = list(self.partition.partition_vector)

            index = randint(0, len(self.scorer.records) - 1)

            if index in sampled:
                continue
            else:
                record = self.scorer.records[index]
                sampled.append(index)

            scores = [alg(record, clusters[n]) for n in range(self.nclusters)]

            if assignment.count(assignment[index]) > 1 or assignment[index] == 0:
                assignment[index] = scores.index(max(scores)) + 1

            assignment = Partition(assignment)
            score = self.scorer.score(assignment)

            if score > self.L:
                self.L = score
                self.partition = assignment
                sampled = []
                count = 0
            else:
                count += 1
                if count == len(assignment): break

    def maximise_heuristic(self):
        clusters = [0] * self.nclusters
        sampled = []

        for i in range(1000):
            self.assign_clusters(clusters, self.partition.get_membership())
            assignment = list(self.partition.partition_vector)

            index = randint(0, len(self.scorer.records) - 1)

            record = self.scorer.records[index]
            sampled.append(index)

            lls = [self.ml(record, clusters[n]) for n in range(self.nclusters)]

            a = {'ll': max(lls)}
            a['n'] = lls.index(a['ll'])
            lls.pop(a['n'])

            b = {'ll': max(lls)}
            b['n'] = lls.index(b['ll'])

            a['p'] = np.maths.exp(a['ll'] - logsum(a['ll'], b['ll']))

            if np.random.uniform() > a['p']:
                choice = a['n']
            else:
                choice = b['n']

            if assignment.count(assignment[index]) > 1 or assignment[index] == 0:
                assignment[index] = choice + 1

            assignment = Partition(assignment)

            if i % 10 == 0:
                score = self.scorer.score(assignment)

                if score > self.L:
                    self.max_L = score
                    self.max_partition = assignment

    def dist(self, obj1, obj2):
        distance = DistanceMatrix([obj1.tree, obj2.tree], self.metric)[0][1]
        return(-distance)

    def ml(self, record, cluster, verbose=1):
        p = Phyml(record, tmpdir=self.tmpdir)
        input_tree = os.path.join(self.tmpdir, 'input_tree')
        cluster.tree.write_to_file(input_tree)
        p.add_tempfile(input_tree)
        p.add_flag('--inputtree', input_tree)
        p.add_flag('-o', 'r')  # Optimise only on substitutions`
        p.add_flag('-a', 'e')
        p.add_flag('-b', 0)
        p.add_flag('-c', 4)
        p.add_flag('--quiet', '')

        if self.datatype == 'protein':
            p.add_flag('-d', 'aa')
        elif self.datatype == 'dna':
            p.add_flag('-d', 'nt')

        score = p.run(verbosity=verbose).score
        return(score)