Esempio n. 1
0
def mode_importance(weights):
	"""
	Detects separate modes in the posterior using clustering of the top 99% of the posterior.
	
	weights is an array of entries [u, x, L, logwidth]

	Returns: A list of modes with their median, standard deviation, and 
	local evidence.
	"""
	
	# strip lowest 1% of posterior
	weights = sorted(weights, key=getitem(2))
	u, x, L, lw = weights[-1]
	# use highest as normalization to avoid small numbers
	offset = L + lw
	total = sum([exp(L + lw - offset) for u, x, L, lw in weights])
	logtotal = log(total) + offset
	
	bottom_sum = 0
	high_parts = []
	for i, (u, x, L, lw) in enumerate(weights):
		bottom_sum += exp(L + lw)
		if bottom_sum > total * 0.01:
			high_parts.append(i)
	
	# perform clustering on the top
	# in prior space or in transformed space?
	# transformed space is probably pretty weird
	
	pos = [u for u, x, L, lw in weights[high_parts]]

	distances = scipy.spatial.distance.cdist(pos, pos)
	cluster = scipy.cluster.hierarchy.single(distances)

	# the idea here is that the inter-cluster distances must be much smaller than the cluster-distances
	# small cluster distances multiplied by 10 will remain small, if there is a well-separated cluster
	# if no cluster, then this will be ~clusterdists.max()/3, and no splitting will be done
	threshold = scipy.stats.mstats.mquantiles(clusterdists, 0.1)*10 + clusterdists.max()/3

	assigned = clusterdetect.cut_cluster(cluster, distances, threshold)
	# now we have clusters with some members
	
	clusterids = sorted(set(assigned))
	results = []
	for i in clusterids:
		inside = assigned == i
		inweights = weights[high_parts][inside]
		membersu = [x for u, x, L, lw in inweights]
		membersx = [u for u, x, L, lw in inweights]
		membersw = [L + lw for u, x, L, lw in inweights]
		probs = exp(numpy.array(membersw) - offset)
		# compute weighted mean 
		# compute weighted standard deviation
		umean, ustdev = _weighted_avg_and_std(values=membersu, weights=probs)
		xmean, xstdev = _weighted_avg_and_std(values=membersx, weights=probs)
		# compute evidence
		local_evidence = log(sum(probs)) + offset
		relative_evidence = exp(log(sum(probs)) - log(total))
		results.append(dict(
			members = [membersu, membersx, membersw],
			mean = xmean,
			stdev = xstdev,
			untransformed_mean = umean,
			untransformed_stdev = ustdev,
			relative_posterior_probability = relative_evidence,
			local_evidence = local_evidence,
		))
	return results
Esempio n. 2
0
    def draw_constrained(self, Lmin, priortransform, loglikelihood, previous,
                         ndim, **kwargs):
        # previous is [[u, x, L], ...]
        previousL = numpy.array([L for _, _, L in previous])
        previousu = numpy.array([u for u, _, _ in previous])
        assert previousu.shape[1] == ndim, previousu.shape
        self.iter += 1
        rebuild = self.iter % 50 == 1
        if rebuild:
            high = previousL > Lmin
            u = previousu[high]
            L = previousL[high]

            # detect clusters using hierarchical clustering
            assert len(u.shape) == 2, u.shape
            distances = scipy.spatial.distance.cdist(u, u)
            cluster = scipy.cluster.hierarchy.single(distances)

            n = len(distances)
            clusterdists = cluster[:, 2]
            threshold = scipy.stats.mstats.mquantiles(
                clusterdists, 0.1) * 20 + clusterdists.max() / 2
            assigned = clusterdetect.cut_cluster(cluster, distances, threshold)
            # now we have clusters with some members

            # find some rough boundaries
            # make sure to make them so that they enclose all the points
            clusterids = sorted(set(assigned))
            rects = []
            for i in clusterids:
                inside = assigned == i
                ulow = u[inside].min(axis=0)
                uhigh = u[inside].max(axis=0)
                width = uhigh - ulow
                # expand, to avoid over-shrinkage
                ulow -= width * 0.2
                uhigh += width * 0.2
                j = L[inside].argmax()
                ustart = u[inside][j]
                Lstart = L[inside][j]
                rects.append((i, (ulow, uhigh, (log(uhigh - ulow)).sum())))
                #print 'adding new rectangle:', (i, (ulow, uhigh))
            rects = dict(rects)

            # now that we got a little more familiar with out clusters,
            # we want to sample from them
            # for this, we want to create boundaries between high and -high
            # we will do a multi-stage SVM, for every cluster
            rectid = numpy.zeros(len(previous), dtype=int) - 1
            rectid[high] = assigned
            #try:
            #if True:
            #if high.mean() >= 0.9:
            #	print 'not worth it yet to apply svm'
            clf, svmtransform = None, None
            if high.mean() < 0.9 and self.iter % 200 == 1:
                clf, svmtransform = svm_classify(previousu, rectid)
            #except ValueError as e:
            #	clf, svmtransform = None, None
            #	print 'WARNING: SVM step failed: ', e
            self.clf = clf
            self.svmtransform = svmtransform
            self.rects = rects

        if len(self.rects) == 1:

            def get_rect_id(x):
                return 0
        else:
            x = range(len(self.rects))
            y = numpy.array([self.rects[i][2] for i in x])
            minlogsize = y.min()
            y -= y.min()
            y = exp(y)
            totalsize = y.sum()
            y /= y.sum()
            y = [0] + y.cumsum().tolist() + [1]
            x = [x[0]] + list(x) + [x[-1]]
            get_rect_id = scipy.interpolate.interp1d(y, x, kind='zero')

        ntoaccept = 0
        while True:
            # sample from rectangles, and through against SVM
            dice = numpy.random.random()
            i = int(get_rect_id(dice))
            ulow, uhigh, logsize = self.rects[i]

            u = numpy.random.uniform(ulow, uhigh, size=ndim)
            if len(self.rects) != 1:
                # count in how many rectangles it is
                nrect = sum([
                    exp(logsize - minlogsize)
                    for ulow, uhigh, logsize in self.rects.values()
                    if ((u >= ulow).all() and (u <= uhigh).all())
                ])
                # reject proportionally  ~  1. / nrect
                coin = numpy.random.uniform(0, 1)
                accept = coin < exp(logsize) / nrect
                if not accept:
                    continue

            # if survives (classified to be in high region)
            # then evaluate
            if self.clf is not None:
                prob = self.clf.predict_proba(self.svmtransform(u))[0][0]
                #print 'svm evaluation:', u, prob
                # we allow 1 false positive classified
                if prob > 1 - 1. / len(previous) and ntoaccept % 100 != 95:
                    continue

            x = priortransform(u)
            L = loglikelihood(x)
            ntoaccept += 1
            if L >= Lmin:
                # yay, we win
                if ntoaccept > 5:
                    print '%d samples before accept' % ntoaccept, u, x, L
                return u, x, L, ntoaccept
Esempio n. 3
0
    def draw_constrained(self, Lmin, priortransform, loglikelihood, previous,
                         ndim, **kwargs):
        # previous is [[u, x, L], ...]
        previousL = numpy.array([L for _, _, L in previous])
        previousu = numpy.array([u for u, _, _ in previous])
        assert previousu.shape[1] == ndim, previousu.shape
        self.iter += 1
        rebuild = self.iter % 50 == 1
        if rebuild:
            high = previousL > Lmin
            u = previousu[high]
            L = previousL[high]

            # detect clusters using hierarchical clustering
            assert len(u.shape) == 2, u.shape
            distances = scipy.spatial.distance.cdist(u, u)
            cluster = scipy.cluster.hierarchy.single(distances)

            n = len(distances)
            clusterdists = cluster[:, 2]
            threshold = scipy.stats.mstats.mquantiles(
                clusterdists, 0.1) * 20 + clusterdists.max() / 2
            assigned = clusterdetect.cut_cluster(cluster, distances, threshold)
            # now we have clusters with some members

            # find some rough boundaries
            # make sure to make them so that they enclose all the points
            clusterids = sorted(set(assigned))
            rects = []
            for i in clusterids:
                inside = assigned == i
                ulow = u[inside].min(axis=0)
                uhigh = u[inside].max(axis=0)
                j = L[inside].argmax()
                ustart = u[inside][j]
                Lstart = L[inside][j]
                assert len(ulow) == ndim
                assert len(uhigh) == ndim
                assert len(ustart) == ndim

                # find maximum in each cluster
                isinside = lambda ui: (ui >= ulow).all() and (ui <= uhigh).all(
                )
                assert isinside(ustart)
                clustermaxima = [[mu, mL] for mu, mL in self.maxima
                                 if isinside(mu)]
                if len(clustermaxima) == 0:
                    print 'optimizing in cluster', i, ulow, uhigh

                    def minfunc(ui):
                        if not isinside(ui):
                            return 1e300
                        return -loglikelihood(priortransform(ui))

                    ubest = self.optimizer(minfunc, ustart)
                    assert len(ubest) == ndim
                    #ulow = numpy.min([ulow, ubest], axis=0)
                    #uhigh = numpy.max([uhigh, ubest], axis=0)
                    Lbest = loglikelihood(priortransform(ubest))
                    print 'new best:', ubest, Lbest
                    if self.sampler:
                        self.sampler.Lmax = max(self.sampler.Lmax, Lbest)
                    self.maxima.append([ubest, Lbest])
                else:
                    if len(clustermaxima) > 1:
                        print 'WARNING: multiple maxima fitted already', clustermaxima
                    ubest, Lbest = clustermaxima[0]

                rects.append((i, (ulow, uhigh, ubest, Lbest)))
                print 'adding new rectangle:', (i, (ulow, uhigh, ubest, Lbest))
            rects = dict(rects)

            # now that we got a little more familiar with out clusters,
            # we want to sample from them
            # for this, we want to create boundaries between high and -high
            # we will do a multi-stage SVM, for every cluster
            rectid = numpy.zeros(len(previous), dtype=int) - 1
            rectid[high] = assigned
            try:
                if high.mean() >= 0.9:
                    raise ValueError('not worth it yet')
                clf, svmtransform = svm_classify(previousu, rectid)
            except ValueError as e:
                clf, svmtransform = None, None
                print 'WARNING: SVM step failed: ', e
            self.clf = clf
            self.svmtransform = svmtransform
            self.rects = rects

        ntoaccept = 0
        while True:
            # sample from rectangles, and through against SVM
            i = numpy.random.randint(0, len(self.rects))
            ulow, uhigh, ubest, Lbest = self.rects[i]

            assert len(ulow) == ndim
            assert len(uhigh) == ndim
            u = numpy.random.uniform(ulow, uhigh, size=ndim)
            assert len(u) == ndim

            # count in how many rectangles it is
            nrect = sum([((u >= ulow).all() and (u <= uhigh).all())
                         for ulow, uhigh, ubest, Lbest in self.rects.values()])

            # reject proportionally
            if nrect > 1 and numpy.random.uniform(0, 1) > 1. / nrect:
                continue

            # if survives (classified to be in high region)
            # then evaluate
            if self.clf is not None:
                prob = self.clf.predict_proba(self.svmtransform(u))[0][0]
                #print 'svm evaluation:', u, prob
                if prob > 1 - 1. / len(previous) and ntoaccept % 100 != 95:
                    continue

            x = priortransform(u)
            L = loglikelihood(x)
            ntoaccept += 1
            if L > Lmin:
                # yay, we win
                #print '%d samples before accept' % ntoaccept, u, x, L
                return u, x, L, ntoaccept
Esempio n. 4
0
def mode_importance(weights):
    """
	Detects separate modes in the posterior using clustering of the top 99% of the posterior.
	
	weights is an array of entries [u, x, L, logwidth]

	Returns: A list of modes with their median, standard deviation, and 
	local evidence.
	"""

    # strip lowest 1% of posterior
    weights = sorted(weights, key=getitem(2))
    u, x, L, lw = weights[-1]
    # use highest as normalization to avoid small numbers
    offset = L + lw
    total = sum([exp(L + lw - offset) for u, x, L, lw in weights])
    logtotal = log(total) + offset

    bottom_sum = 0
    high_parts = []
    for i, (u, x, L, lw) in enumerate(weights):
        bottom_sum += exp(L + lw)
        if bottom_sum > total * 0.01:
            high_parts.append(i)

    # perform clustering on the top
    # in prior space or in transformed space?
    # transformed space is probably pretty weird

    pos = [u for u, x, L, lw in weights[high_parts]]

    distances = scipy.spatial.distance.cdist(pos, pos)
    cluster = scipy.cluster.hierarchy.single(distances)

    # the idea here is that the inter-cluster distances must be much smaller than the cluster-distances
    # small cluster distances multiplied by 10 will remain small, if there is a well-separated cluster
    # if no cluster, then this will be ~clusterdists.max()/3, and no splitting will be done
    threshold = scipy.stats.mstats.mquantiles(
        clusterdists, 0.1) * 10 + clusterdists.max() / 3

    assigned = clusterdetect.cut_cluster(cluster, distances, threshold)
    # now we have clusters with some members

    clusterids = sorted(set(assigned))
    results = []
    for i in clusterids:
        inside = assigned == i
        inweights = weights[high_parts][inside]
        membersu = [x for u, x, L, lw in inweights]
        membersx = [u for u, x, L, lw in inweights]
        membersw = [L + lw for u, x, L, lw in inweights]
        probs = exp(numpy.array(membersw) - offset)
        # compute weighted mean
        # compute weighted standard deviation
        umean, ustdev = _weighted_avg_and_std(values=membersu, weights=probs)
        xmean, xstdev = _weighted_avg_and_std(values=membersx, weights=probs)
        # compute evidence
        local_evidence = log(sum(probs)) + offset
        relative_evidence = exp(log(sum(probs)) - log(total))
        results.append(
            dict(
                members=[membersu, membersx, membersw],
                mean=xmean,
                stdev=xstdev,
                untransformed_mean=umean,
                untransformed_stdev=ustdev,
                relative_posterior_probability=relative_evidence,
                local_evidence=local_evidence,
            ))
    return results
Esempio n. 5
0
	def draw_constrained(self, Lmin, priortransform, loglikelihood, previous, ndim, **kwargs):
		# previous is [[u, x, L], ...]
		previousL = numpy.array([L for _, _, L in previous])
		previousu = numpy.array([u for u, _, _ in previous])
		assert previousu.shape[1] == ndim, previousu.shape
		self.iter += 1
		rebuild = self.iter % 50 == 1
		if rebuild:
			high = previousL > Lmin
			u = previousu[high]
			L = previousL[high]
		
			# detect clusters using hierarchical clustering
			assert len(u.shape) == 2, u.shape
			distances = scipy.spatial.distance.cdist(u, u)
			cluster = scipy.cluster.hierarchy.single(distances)
			
			n = len(distances)
			clusterdists = cluster[:,2]
			threshold = scipy.stats.mstats.mquantiles(clusterdists, 0.1)*20 + clusterdists.max()/2
			assigned = clusterdetect.cut_cluster(cluster, distances, threshold)
			# now we have clusters with some members
		
			# find some rough boundaries
			# make sure to make them so that they enclose all the points
			clusterids = sorted(set(assigned))
			rects = []
			for i in clusterids:
				inside = assigned == i
				ulow  = u[inside].min(axis=0)
				uhigh = u[inside].max(axis=0)
				width = uhigh - ulow
				# expand, to avoid over-shrinkage
				ulow  -= width*0.2
				uhigh += width*0.2
				j = L[inside].argmax()
				ustart = u[inside][j]
				Lstart = L[inside][j]
				rects.append((i, (ulow, uhigh, (log(uhigh - ulow)).sum())))
				#print 'adding new rectangle:', (i, (ulow, uhigh))
			rects = dict(rects)
			
			# now that we got a little more familiar with out clusters,
			# we want to sample from them
			# for this, we want to create boundaries between high and -high
			# we will do a multi-stage SVM, for every cluster
			rectid = numpy.zeros(len(previous), dtype=int) - 1
			rectid[high] = assigned
			#try:
			#if True:
			#if high.mean() >= 0.9:
			#	print 'not worth it yet to apply svm'
			clf, svmtransform = None, None
			if high.mean() < 0.9 and self.iter % 200 == 1:
				clf, svmtransform = svm_classify(previousu, rectid)
			#except ValueError as e:
			#	clf, svmtransform = None, None
			#	print 'WARNING: SVM step failed: ', e
			self.clf = clf
			self.svmtransform = svmtransform
			self.rects = rects
		
		if len(self.rects) == 1:
			def get_rect_id(x): return 0
		else:
			x = range(len(self.rects))
			y = numpy.array([self.rects[i][2] for i in x])
			minlogsize = y.min()
			y -= y.min()
			y = exp(y)
			totalsize = y.sum()
			y /= y.sum()
			y = [0] + y.cumsum().tolist() + [1]
			x = [x[0]] + list(x) + [x[-1]]
			get_rect_id = scipy.interpolate.interp1d(y, x, kind='zero')
		
		ntoaccept = 0
		while True:
			# sample from rectangles, and through against SVM
			dice = numpy.random.random()
			i = int(get_rect_id(dice))
			ulow, uhigh, logsize = self.rects[i]
			
			u = numpy.random.uniform(ulow, uhigh, size=ndim)
			if len(self.rects) != 1:
				# count in how many rectangles it is
				nrect = sum([exp(logsize - minlogsize)
					for ulow, uhigh, logsize in self.rects.values() if ((u >= ulow).all() and (u <= uhigh).all())])
				# reject proportionally  ~  1. / nrect
				coin = numpy.random.uniform(0, 1)
				accept = coin < exp(logsize) / nrect
				if not accept:
					continue
			
			# if survives (classified to be in high region)
			# then evaluate
			if self.clf is not None:
				prob = self.clf.predict_proba(self.svmtransform(u))[0][0]
				#print 'svm evaluation:', u, prob
				# we allow 1 false positive classified
				if prob > 1 - 1. / len(previous) and ntoaccept % 100 != 95:
					continue
			
			x = priortransform(u)
			L = loglikelihood(x)
			ntoaccept += 1
			if L >= Lmin:
				# yay, we win
				if ntoaccept > 5:
					print '%d samples before accept' % ntoaccept, u, x, L
				return u, x, L, ntoaccept
Esempio n. 6
0
	def draw_constrained(self, Lmin, priortransform, loglikelihood, previous, ndim, **kwargs):
		# previous is [[u, x, L], ...]
		previousL = numpy.array([L for _, _, L in previous])
		previousu = numpy.array([u for u, _, _ in previous])
		assert previousu.shape[1] == ndim, previousu.shape
		self.iter += 1
		rebuild = self.iter % 50 == 1
		if rebuild:
			high = previousL > Lmin
			u = previousu[high]
			L = previousL[high]
		
			# detect clusters using hierarchical clustering
			assert len(u.shape) == 2, u.shape
			distances = scipy.spatial.distance.cdist(u, u)
			cluster = scipy.cluster.hierarchy.single(distances)
			
			n = len(distances)
			clusterdists = cluster[:,2]
			threshold = scipy.stats.mstats.mquantiles(clusterdists, 0.1)*20 + clusterdists.max()/2
			assigned = clusterdetect.cut_cluster(cluster, distances, threshold)
			# now we have clusters with some members
		
			# find some rough boundaries
			# make sure to make them so that they enclose all the points
			clusterids = sorted(set(assigned))
			rects = []
			for i in clusterids:
				inside = assigned == i
				ulow  = u[inside].min(axis=0)
				uhigh = u[inside].max(axis=0)
				j = L[inside].argmax()
				ustart = u[inside][j]
				Lstart = L[inside][j]
				assert len(ulow) == ndim
				assert len(uhigh) == ndim
				assert len(ustart) == ndim
				
				# find maximum in each cluster
				isinside = lambda ui: (ui >= ulow).all() and (ui <= uhigh).all()
				assert isinside(ustart)
				clustermaxima = [[mu, mL] for mu, mL in self.maxima if isinside(mu)]
				if len(clustermaxima) == 0:
					print('optimizing in cluster', i, ulow, uhigh)
					def minfunc(ui):
						if not isinside(ui):
							return 1e300
						return -loglikelihood(priortransform(ui))
					ubest = self.optimizer(minfunc, ustart)
					assert len(ubest) == ndim
					#ulow = numpy.min([ulow, ubest], axis=0)
					#uhigh = numpy.max([uhigh, ubest], axis=0)
					Lbest = loglikelihood(priortransform(ubest))
					print('new best:', ubest, Lbest)
					if self.sampler:
						self.sampler.Lmax = max(self.sampler.Lmax, Lbest)
					self.maxima.append([ubest, Lbest])
				else:
					if len(clustermaxima) > 1:
						print('WARNING: multiple maxima fitted already', clustermaxima)
					ubest, Lbest = clustermaxima[0]
				
				rects.append((i, (ulow, uhigh, ubest, Lbest)))
				print('adding new rectangle:', (i, (ulow, uhigh, ubest, Lbest)))
			rects = dict(rects)
		
			# now that we got a little more familiar with out clusters,
			# we want to sample from them
			# for this, we want to create boundaries between high and -high
			# we will do a multi-stage SVM, for every cluster
			rectid = numpy.zeros(len(previous), dtype=int) - 1
			rectid[high] = assigned
			try:
				if high.mean() >= 0.9:
					raise ValueError('not worth it yet')
				clf, svmtransform = svm_classify(previousu, rectid)
			except ValueError as e:
				clf, svmtransform = None, None
				print('WARNING: SVM step failed: ', e)
			self.clf = clf
			self.svmtransform = svmtransform
			self.rects = rects
		
		ntoaccept = 0
		while True:
			# sample from rectangles, and through against SVM
			i = numpy.random.randint(0, len(self.rects))
			ulow, uhigh, ubest, Lbest = self.rects[i]
			
			assert len(ulow) == ndim
			assert len(uhigh) == ndim
			u = numpy.random.uniform(ulow, uhigh, size=ndim)
			assert len(u) == ndim
			
			# count in how many rectangles it is
			nrect = sum([((u >= ulow).all() and (u <= uhigh).all()) for ulow, uhigh, ubest, Lbest in self.rects.values()])
			
			# reject proportionally
			if nrect > 1 and numpy.random.uniform(0, 1) > 1./nrect:
				continue
			
			# if survives (classified to be in high region)
			# then evaluate
			if self.clf is not None:
				prob = self.clf.predict_proba(self.svmtransform(u))[0][0]
				#print 'svm evaluation:', u, prob
				if prob > 1 - 1./len(previous) and ntoaccept % 100 != 95:
					continue
			
			x = priortransform(u)
			L = loglikelihood(x)
			ntoaccept += 1
			if L > Lmin:
				# yay, we win
				#print '%d samples before accept' % ntoaccept, u, x, L
				return u, x, L, ntoaccept