Example #1
0
def calc_avg_rcmks(parser):
    options,args= parser.parse_args()
    njks= 101
    nmks= 101
    jks= numpy.linspace(0.5,0.8,njks)
    mks= numpy.linspace(-0.5,-3.,nmks)
    if options.basti:
        zs= numpy.array([0.004,0.008,0.01,0.0198,0.03,0.04])
        zsolar= 0.019
    elif options.parsec:
        zs= numpy.arange(0.0005,0.06005,0.0005)
#        zs= numpy.array([0.01,0.02])
        zsolar= 0.019
    else:
        zs= numpy.arange(0.0005,0.03005,0.0005)
#        zs= numpy.array([0.01,0.02])
        zsolar= 0.019
    if not os.path.exists(options.outfilename):
        logpz= localzdist(zs,zsolar=zsolar)
        logmkp= numpy.zeros((len(zs),njks,nmks))
        logp= numpy.zeros((len(zs),njks,nmks))      
        funcargs= (zs,options,njks,jks,nmks,mks,logpz)
        multOut= multi.parallel_map((lambda x: indiv_calc(x,
                                                          *funcargs)),
                                    range(len(zs)),
                                    numcores=numpy.amin([64,len(zs),
                                                         multiprocessing.cpu_count()]))
        for ii in range(len(zs)):
            logmkp[ii,:,:]= multOut[ii][0,:,:]
            logp[ii,:,:]= multOut[ii][1,:,:]
        save_pickles(options.outfilename,logmkp,logp)
    else:
        savefile= open(options.outfilename,'rb')
        logmkp= pickle.load(savefile)
        logp= pickle.load(savefile)
        savefile.close()
    indx= numpy.isnan(logp)
    logp[indx]= -numpy.finfo(numpy.dtype(numpy.float64)).max
    logmkp[indx]= -numpy.finfo(numpy.dtype(numpy.float64)).max
    #Average the peak, so calculate the peak
    for ii in range(len(zs)):
        for jj in range(njks):
            maxmkindx= numpy.argmax(logp[ii,jj,:])
            totlogp= maxentropy.logsumexp(logp[ii,jj,:])
            logmkp[ii,jj,:]= logmkp[ii,jj,maxmkindx]-logp[ii,jj,maxmkindx]+totlogp
            logp[ii,jj,:]= totlogp
    avgmk= numpy.exp(maxentropy.logsumexp(logmkp.flatten())\
                         -maxentropy.logsumexp(logp.flatten()))
    solindx= numpy.argmin(numpy.fabs(zs-0.017))
    avgmksolar= numpy.exp(maxentropy.logsumexp(logmkp[solindx,:,:].flatten())\
                              -maxentropy.logsumexp(logp[solindx,:,:].flatten()))
    print "Average mk: %f" % (-avgmk)
    print "Average mk if solar: %f" % (-avgmksolar)
    return -avgmk
Example #2
0
def calc_model(params,options,data,logpiso,logpisodwarf,df,nlocs,locations,iso):
    avg_plate_model= numpy.zeros(nlocs)
    for ii in range(nlocs):
        #Calculate vlos | los
        indx= (data['LOCATION'] == locations[ii])
        thesedata= data[indx]
        thislogpiso= logpiso[indx,:]
        if options.dwarf:
            thislogpisodwarf= logpisodwarf[indx,:]
        else:
            thislogpisodwarf= None
        vlos= numpy.linspace(-200.,200.,options.nvlos)
        pvlos= numpy.zeros(options.nvlos)
        if not options.multi is None:
            pvlos= multi.parallel_map((lambda x: pvlosplate(params,vlos[x],
                                                            thesedata,
                                                            df,options,
                                                            thislogpiso,
                                                            thislogpisodwarf,iso)),
                                      range(options.nvlos),
                                      numcores=numpy.amin([len(vlos),multiprocessing.cpu_count(),options.multi]))
        else:
            for jj in range(options.nvlos):
                print jj
                pvlos[jj]= pvlosplate(params,vlos[jj],thesedata,df,options,
                                      thislogpiso,thislogpisodwarf)
        pvlos-= logsumexp(pvlos)
        pvlos= numpy.exp(pvlos)
        #Calculate mean and velocity dispersion
        avg_plate_model[ii]= numpy.sum(vlos*pvlos)
    return avg_plate_model
Example #3
0
 def call_polymorphism(self, obs, post):
     """Get the polymorphism probability.
     This is the posterior probability that the strain is homozygous
     for the non-reference base with the highest count at this position.
     @param obs: one ref base count and three non-ref base counts
     @param post: the posterior hidden state probabilities
     @return: the polymorphism probability
     """
     # unpack the posterior state distribution
     p_recent, p_ancient, p_garbage, p_misaligned = post
     # get the prior probability of polymorphism conditional on state
     p_recent_AA = self.states[0].get_posterior_distribution(obs)[2]
     p_ancient_AA = self.states[1].get_posterior_distribution(obs)[2]
     # compute the posterior probability of a polymorphism
     posterior_polymorphism = 0
     posterior_polymorphism += p_recent * p_recent_AA
     posterior_polymorphism += p_ancient * p_ancient_AA
     # Given that a polymorphism occurred,
     # get the probability distribution over the
     # three non-reference nucleotides.
     r = self.seqerr
     log_Pr = math.log(r/4.0)
     log_PA = math.log(1 - 3*r/4.0)
     logs = [
             obs[1]*log_PA + obs[2]*log_Pr + obs[3]*log_Pr,
             obs[1]*log_Pr + obs[2]*log_PA + obs[3]*log_Pr,
             obs[1]*log_Pr + obs[2]*log_Pr + obs[3]*log_PA]
     condmaxpost = math.exp(max(logs) - logsumexp(logs))
     # get the posterior probability distribution
     maxpost = posterior_polymorphism * condmaxpost
     return maxpost
Example #4
0
    def word_bound(self, Elogtheta, Elogbeta, doc_ids=None):
        """
        Note that this is not strictly speaking a likelihood.

        Compute the expectation of the log conditional likelihood of the data,

            E_q[log p(w_d | theta, beta, A_d)],

        where p(w_d | theta, beta, A_d) is the log conditional likelihood of the data.
        """

        if doc_ids is None:
            docs = self.corpus
        else:
            docs = [self.corpus[d] for d in doc_ids]

        bound = 0.0
        for d, doc in enumerate(docs):
            ids = numpy.array([id for id, _ in doc])  # Word IDs in doc.
            cts = numpy.array([cnt for _, cnt in doc])  # Word counts.
            bound_d = 0.0
            for vi, v in enumerate(ids):
                bound_d += cts[vi] * logsumexp(Elogtheta[d, :] +
                                               Elogbeta[:, v])
            bound += bound_d

            # Above is the same as:
            #Elogthetad = Elogtheta[d, :]
            #likelihood += numpy.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, id]) for id, cnt in doc)

        return bound
Example #5
0
def logsumexp(a, axis=None):
    """
    Evaluates :math:`\log(\sum_i \exp(a_i) )` in a bit smarter manner.
    If axis is None (default) then tries to use scipy's logsumexp, otherwise
    computed logsumexp manually along first axis.

    :param a: positions where logsumexp will be evaluated
    :type a:  numpy.array
    :param axis: along which axis logsumexp shall be computed, default=None
    :type axis: int
    :returns: function values
    :rtype:   numpy.array
    """
    if axis is None:
        # Use the scipy.maxentropy version.
        if hasattr(misc, 'logsumexp'):
            return misc.logsumexp(a)
        elif hasattr(maxentropy, 'logsumexp'):
            return maxentropy.logsumexp(a)
        else:
            axis = 0
    a = asarray(a)
    shp = list(a.shape)
    shp[axis] = 1
    a_max = a.max(axis=axis)
    s = log(exp(a - a_max.reshape(shp)).sum(axis=axis))
    lse = a_max + s
    return lse
Example #6
0
def run(*args):
	dprintn(8, "# Generating data")
	
	global hypotheses
	RANK = str(MPI.COMM_WORLD.Get_rank())
	
	data_size = args[0]
	
	p_representation = defaultdict(int) # how often do you get the right representation
	p_response = defaultdict(int) # how often do you get the right response?
	p_representation_literal = defaultdict(int) # how often do you get the right representation
	p_response_literal = defaultdict(int)  # how often do you get the right response?
	p_representation_presup = defaultdict(int) # how often do you get the right representation
	p_response_presup = defaultdict(int) # how often do you get the right response?
	
	dprintn(8, "# Generating data")
	data = generate_data(data_size)
	
	# recompute these
	dprintn(8, "# Computing posterior")
	#[ x.unclear_functions() for x in hypotheses ]
	[ x.compute_posterior(data) for x in hypotheses ]
	
	# normalize the posterior in fs
	dprintn(8, "# Computing normalizer")
	Z = logsumexp([x.posterior_score for x in hypotheses])
	
	# and output the top hypotheses
	qq = FiniteBestSet(max=True, N=25)
	for h in hypotheses: qq.push(h, h.posterior_score) # get the tops
	for i, h in enumerate(qq.get_sorted()):
		for w in h.all_words():
			fprintn(8, data_size, i, w, h.posterior_score, q(h.lex[w]), f=options.OUT_PATH+"-hypotheses."+RANK+".txt")
	
	# and compute the probability of being correct
	dprintn(8, "# Computing correct probability")
	for h in hypotheses:
		hstr = str(h)
		#print data_size, len(data), exp(h.posterior_score), correct[ str(h)+":"+w ]
		for w in words:
			p = exp(h.posterior_score - Z)
			key = w + ":" + hstr
			
			p_representation[w] += p * (agree_pct[key] == 1.)
			p_representation_presup[w]  += p * (agree_pct_presup[key] == 1.) # if we always agree with the target, then we count as the right rep.
			p_representation_literal[w] += p * (agree_pct_literal[key] == 1.)
			
			# and just how often does the hypothesis agree?
			p_response[w] += p * agree_pct[key]
			p_response_presup[w]  += p * agree_pct_presup[key]
			p_response_literal[w] += p * agree_pct_literal[key]
			
	dprintn(8, "# Outputting")
	

	for w in words:
		fprintn(10, rank, q(w), data_size, p_representation[w], p_representation_presup[w], p_representation_literal[w], p_response[w], p_response_presup[w], p_response_literal[w], f=options.OUT_PATH+"-stats."+RANK+".txt")
	
	return 0
Example #7
0
def testErrs(options,args):
    ndfehs, ndafes= 201,201
    dfehs= numpy.linspace(0.01,0.4,ndfehs)
    dafes= numpy.linspace(0.01,0.3,ndafes)
    if os.path.exists(args[0]):
        savefile= open(args[0],'rb')
        loglike= pickle.load(savefile)
        ii= pickle.load(savefile)
        jj= pickle.load(savefile)
        savefile.close()
    else:
        loglike= numpy.zeros((ndfehs,ndafes))
        ii, jj= 0, 0
    while ii < ndfehs:
        while jj < ndafes:
            sys.stdout.write('\r'+"Working on %i / %i" %(ii*ndafes+jj+1,ndafes*ndfehs))
            sys.stdout.flush()
            loglike[ii,jj]= errsLogLike(dfehs[ii],dafes[jj],options)
            jj+= 1
        ii+= 1
        jj= 0
        save_pickles(args[0],loglike,ii,jj)
    save_pickles(args[0],loglike,ii,jj)
    sys.stdout.write('\r'+_ERASESTR+'\r')
    sys.stdout.flush()
    if options.prior:
        prior= numpy.zeros((ndfehs,ndafes))
        for ii in range(ndfehs):
            prior[ii,:]= -0.5*(dafes-0.1)**2./0.1**2.-0.5*(dfehs[ii]-0.2)**2./0.1**2.
        loglike+= prior
    loglike-= maxentropy.logsumexp(loglike)
    loglike= numpy.exp(loglike)
    loglike/= numpy.sum(loglike)*(dfehs[1]-dfehs[0])*(dafes[1]-dafes[0])
    #Plot
    bovy_plot.bovy_print()
    bovy_plot.bovy_dens2d(loglike.T,origin='lower',
                          cmap='gist_yarg',
                          xlabel=r'\delta_{[\mathrm{Fe/H}]}',
                          ylabel=r'\delta_{[\alpha/\mathrm{Fe}]}',
                          xrange=[dfehs[0],dfehs[-1]],
                          yrange=[dafes[0],dafes[-1]],
                          contours=True,
                          cntrmass=True,
                          onedhists=True,
                          levels= special.erf(0.5*numpy.arange(1,4)))
    if options.prior:
        bovy_plot.bovy_text(r'$\mathrm{with\ Gaussian\ prior:}$'+
                            '\n'+r'$\delta_{[\mathrm{Fe/H}]}= 0.2 \pm 0.1$'
                            +'\n'+r'$\delta_{[\alpha/\mathrm{Fe}]}= 0.1 \pm 0.1$',
top_right=True)
    bovy_plot.bovy_end_print(options.plotfile)
Example #8
0
    def bound(self, corpus, gamma=None, subsample_ratio=1.0):
        """
        Estimate the variational bound of documents from `corpus`:
        E_q[log p(corpus)] - E_q[log q(corpus)]

        `gamma` are the variational parameters on topic weights for each `corpus`
        document (=2d matrix=what comes out of `inference()`).
        If not supplied, will be inferred from the model.

        """
        score = 0.0
        _lambda = self.state.get_lambda()
        Elogbeta = dirichlet_expectation(_lambda)

        for d, doc in enumerate(
                corpus
        ):  # stream the input doc-by-doc, in case it's too large to fit in RAM
            if d % self.chunksize == 0:
                logger.debug("bound: at document #%i", d)
            if gamma is None:
                gammad, _ = self.inference([doc])
            else:
                gammad = gamma[d]
            Elogthetad = dirichlet_expectation(gammad)

            # E[log p(doc | theta, beta)]
            score += np.sum(cnt *
                            logsumexp(Elogthetad + Elogbeta[:, int(id)])
                            for id, cnt in doc)

            # E[log p(theta | alpha) - log q(theta | gamma)]; assumes alpha is a vector
            score += np.sum((self.alpha - gammad) * Elogthetad)
            score += np.sum(gammaln(gammad) - gammaln(self.alpha))
            score += gammaln(np.sum(self.alpha)) - gammaln(np.sum(gammad))

        # Compensate likelihood for when `corpus` above is only a sample of the whole corpus. This ensures
        # that the likelihood is always rougly on the same scale.
        score *= subsample_ratio

        # E[log p(beta | eta) - log q (beta | lambda)]; assumes eta is a scalar
        score += np.sum((self.eta - _lambda) * Elogbeta)
        score += np.sum(gammaln(_lambda) - gammaln(self.eta))

        if np.ndim(self.eta) == 0:
            sum_eta = self.eta * self.num_terms
        else:
            sum_eta = np.sum(self.eta)

        score += np.sum(gammaln(sum_eta) - gammaln(np.sum(_lambda, 1)))

        return score
    def neg_log_likelihood(theta_sparse, hb = None):
        if not hb is None:
            h, b = hb
        else:
            h, b = dp(theta_sparse)
        
        log_kappa = logsumexp(h[0] + b[1])

        nll = log_kappa
        nll -= h[0][0]
        for k in range(1, params['M']):
            nll -= h[k][0,0]
        for ind in theta_sparse:
            nll += params['lambda'] * np.abs(theta_sparse[ind])
        return nll
def pvlosplate(params,vhelio,data,df,options,logpiso,logpisodwarf,iso):
    """
    NAME:
       pvlosplate
    PURPOSE:
       calculate the vlos probability for a given location
    INPUT:
       params - parameters of the model
       vhelio - heliocentric los velocity to evaluate
       data - data array for this location
       df - df object(s) (?)
       options - options
       logpiso, logpisodwarf - precalculated isochrones
    OUTPUT:
       log of the probability
    HISTORY:
       2012-02-20 - Written - Bovy (IAS)
    """
    #Output is sum over data l,b,jk,h
    l= data['GLON']*_DEGTORAD
    b= data['GLAT']*_DEGTORAD
    sinl= numpy.sin(l)
    cosl= numpy.cos(l)
    sinb= numpy.sin(b)
    cosb= numpy.cos(b)
    jk= data['J0MAG']-data['K0MAG']
    try:
        jk[(jk < 0.5)]= 0.5 #BOVY: FIX THIS HACK BY EMAILING GAIL
    except TypeError:
        pass #HACK
    h= data['H0MAG']
    options.multi= 1 #To avoid conflict
    out= -mloglike(params,numpy.zeros(len(data))+vhelio,
                   l,
                   b,
                   jk,
                   h,
                   df,options,
                   sinl,
                   cosl,
                   cosb,
                   sinb,
                   logpiso,
                   logpisodwarf,True,None,iso,data['FEH']) #None iso for now
    #indx= (out >= -0.1)*(out <= 0.1)
    #print out[indx], jk[indx], h[indx]
    return logsumexp(out)
Example #11
0
    def bound(self, corpus, gamma=None, subsample_ratio=1.0):
        """
        Estimate the variational bound of documents from `corpus`:
        E_q[log p(corpus)] - E_q[log q(corpus)]

        `gamma` are the variational parameters on topic weights for each `corpus`
        document (=2d matrix=what comes out of `inference()`).
        If not supplied, will be inferred from the model.

        """
        score = 0.0
        _lambda = self.state.get_lambda()
        Elogbeta = dirichlet_expectation(_lambda)

        for d, doc in enumerate(corpus):  # stream the input doc-by-doc, in case it's too large to fit in RAM
            if d % self.chunksize == 0:
                logger.debug("bound: at document #%i", d)
            if gamma is None:
                gammad, _ = self.inference([doc])
            else:
                gammad = gamma[d]
            Elogthetad = dirichlet_expectation(gammad)

            # E[log p(doc | theta, beta)]
            score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)

            # E[log p(theta | alpha) - log q(theta | gamma)]; assumes alpha is a vector
            score += np.sum((self.alpha - gammad) * Elogthetad)
            score += np.sum(gammaln(gammad) - gammaln(self.alpha))
            score += gammaln(np.sum(self.alpha)) - gammaln(np.sum(gammad))

        # Compensate likelihood for when `corpus` above is only a sample of the whole corpus. This ensures
        # that the likelihood is always rougly on the same scale.
        score *= subsample_ratio

        # E[log p(beta | eta) - log q (beta | lambda)]; assumes eta is a scalar
        score += np.sum((self.eta - _lambda) * Elogbeta)
        score += np.sum(gammaln(_lambda) - gammaln(self.eta))

        if np.ndim(self.eta) == 0:
            sum_eta = self.eta * self.num_terms
        else:
            sum_eta = np.sum(self.eta)

        score += np.sum(gammaln(sum_eta) - gammaln(np.sum(_lambda, 1)))

        return score
    def inference(self, doc):
        """
        Perform inference on a single document.
        
        Return 3-tuple of (likelihood of this document, word-topic distribution
        phi, expected word counts gamma (~topic distribution)).
        
        A document is simply a bag-of-words collection which supports len() and 
        iteration over (wordIndex, wordCount) 2-tuples.
        
        The model itself is not affected in any way (this function is read-only aka 
        const).
        """
        # init help structures
        totalWords = sum(wordCount for _, wordCount in doc)
        gamma = numpy.zeros(
            self.numTopics) + self.alpha + 1.0 * totalWords / self.numTopics
        phi = numpy.zeros(shape=(len(doc),
                                 self.numTopics)) + 1.0 / self.numTopics
        likelihood = likelihoodOld = converged = numpy.NAN

        # variational estimate
        for i in xrange(self.VAR_MAX_ITER):
            #            logging.debug("inference step #%s, converged=%s, likelihood=%s, likelikelihoodOld=%s" %
            #                          (i, converged, likelihood, likelihoodOld))

            if numpy.isfinite(converged) and converged <= self.VAR_CONVERGED:
                logging.debug("document converged in %i iterations" % i)
                break

            for n, (wordIndex, wordCount) in enumerate(doc):
                # compute phi vars, in log space, to prevent numerical nastiness
                tmp = digamma(
                    gamma) + self.logProbW[:, wordIndex]  # vector operation

                # convert phi and update gamma
                newPhi = numpy.exp(tmp - logsumexp(tmp))
                gamma += wordCount * (newPhi - phi[n])
                phi[n] = newPhi

            likelihood = self.computeLikelihood(doc, phi, gamma)
            assert numpy.isfinite(likelihood)
            converged = numpy.divide(likelihoodOld - likelihood, likelihoodOld)
            likelihoodOld = likelihood
        return likelihood, phi, gamma
Example #13
0
 def _parse_hz_dict_indiv(self,hz):
     htype= hz.get('type','exp')
     if htype == 'exp':
         zd= hz.get('h',0.0375)
         th= lambda z, tzd=zd: 1./2./tzd*numpy.exp(-numpy.fabs(z)/tzd)
         tH= lambda z, tzd= zd: (numpy.exp(-numpy.fabs(z)/tzd)-1.
                                 +numpy.fabs(z)/tzd)*tzd/2.
         tdH= lambda z, tzd= zd: 0.5*numpy.sign(z)\
             *(1.-numpy.exp(-numpy.fabs(z)/tzd))
     elif htype == 'sech2':
         zd= hz.get('h',0.0375)
         th= lambda z, tzd=zd: 1./numpy.cosh(z/2./tzd)**2./4./tzd
         # Avoid overflow in cosh
         tH= lambda z, tzd= zd: \
             tzd*(logsumexp(numpy.array([z/2./tzd,-z/2./tzd]),axis=0)\
                      -numpy.log(2.))
         tdH= lambda z, tzd= zd: numpy.tanh(z/2./tzd)/2.
     return (th,tH,tdH)
Example #14
0
 def _parse_hz_dict_indiv(self,hz):
     htype= hz.get('type','exp')
     if htype == 'exp':
         zd= hz.get('h',0.0375)
         th= lambda z, tzd=zd: 1./2./tzd*numpy.exp(-numpy.fabs(z)/tzd)
         tH= lambda z, tzd= zd: (numpy.exp(-numpy.fabs(z)/tzd)-1.
                                 +numpy.fabs(z)/tzd)*tzd/2.
         tdH= lambda z, tzd= zd: 0.5*numpy.sign(z)\
             *(1.-numpy.exp(-numpy.fabs(z)/tzd))
     elif htype == 'sech2':
         zd= hz.get('h',0.0375)
         th= lambda z, tzd=zd: 1./numpy.cosh(z/2./tzd)**2./4./tzd
         # Avoid overflow in cosh
         tH= lambda z, tzd= zd: \
             tzd*(logsumexp(numpy.array([z/2./tzd,-z/2./tzd]),axis=0)\
                      -numpy.log(2.))
         tdH= lambda z, tzd= zd: numpy.tanh(z/2./tzd)/2.
     return (th,tH,tdH)
Example #15
0
 def bound(self, corpus, gamma=None, subsample_ratio=1.0):
     score = 0.0
     _lambda = self.state.get_lambda()
     Elogbeta = dirichlet_expectation(_lambda)
     for d, doc in enumerate(corpus):
         if gamma is None:
             gammad, _ = self.inference([doc])
         else:
             gammad = gamma[d]
         Elogthetad = dirichlet_expectation(gammad)
         score += numpy.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, id]) for id, cnt in doc)
         score += numpy.sum((self.alpha - gammad) * Elogthetad)
         score += numpy.sum(gammaln(gammad) - gammaln(self.alpha))
         score += gammaln(numpy.sum(self.alpha)) - gammaln(numpy.sum(gammad))
     score *= subsample_ratio
     score += numpy.sum((self.eta - _lambda) * Elogbeta)
     score += numpy.sum(gammaln(_lambda) - gammaln(self.eta))
     score += numpy.sum(gammaln(self.eta * self.num_terms) - gammaln(numpy.sum(_lambda, 1)))
     return score
Example #16
0
def _eval_sumgaussians(x,xamp,xmean,xcovar):
    """x array [ndata,ndim], return log"""
    ndata= x.shape[0]
    da= x.shape[1]
    out= numpy.zeros(ndata)
    ngauss= len(xamp)
    loglike= numpy.zeros(ngauss)
    for ii in range(ndata):
        for kk in range(ngauss):
            if xamp[kk] == 0.:
                loglike[kk]= numpy.finfo(numpy.dtype(numpy.float64)).min
                continue
            tinv= linalg.inv(xcovar[kk,:,:])
            delta= x[ii,:]-xmean[kk,:]
            loglike[kk]= numpy.log(xamp[kk])+0.5*numpy.log(linalg.det(tinv))\
                -0.5*numpy.dot(delta,numpy.dot(tinv,delta))+\
                da*_SQRTTWOPI
        out[ii]= maxentropy.logsumexp(loglike)
    return out
Example #17
0
    def bound(self, corpus, gamma=None):
        """
        Estimate the variational bound of documents from `corpus`.

        `gamma` are the variational parameters on topic weights (one for each
        document in `corpus`). If not supplied, will be automatically inferred
        from the model.
        """
        score = 0.0
        Elogbeta = numpy.log(self.expElogbeta)

        for d, doc in enumerate(corpus):
            if d % self.chunks == 0:
                logger.info("PROGRESS: at document #%i" % d)
            if gamma is None:
                gammad, _ = self.inference([doc])
            else:
                gammad = gamma[d, :]
            Elogthetad = dirichlet_expectation(gammad)
            expElogthetad = numpy.exp(Elogthetad)
            ids = [id for id, _ in doc]
            cts = numpy.array([cnt for _, cnt in doc])
            phinorm = numpy.zeros(len(ids))
            for i in xrange(len(ids)):
                phinorm[i] = logsumexp(Elogthetad + Elogbeta[:, ids[i]])

            # E[log p(docs | theta, beta)]
            score += numpy.sum(cts * phinorm)

            # E[log p(theta | alpha) - log q(theta | gamma)]
            score += numpy.sum((self.alpha - gammad) * Elogthetad)
            score += numpy.sum(gammaln(gammad) - gammaln(self.alpha))
            score += gammaln(self.alpha * self.numTopics) - gammaln(
                numpy.sum(gammad))

        # E[log p(beta | eta) - log q (beta | lambda)]
        score += numpy.sum((self.eta - self._lambda) * Elogbeta)
        score += numpy.sum(gammaln(self._lambda) - gammaln(self.eta))
        score += numpy.sum(
            gammaln(self.eta * self.numTerms) -
            gammaln(numpy.sum(self._lambda, 1)))

        return score
    def inference(self, doc):
        """
        Perform inference on a single document.
        
        Return 3-tuple of `(likelihood of this document, word-topic distribution
        phi, expected word counts gamma (~topic distribution))`.
        
        A document is simply a bag-of-words collection which supports len() and 
        iteration over (wordIndex, wordCount) 2-tuples.
        
        The model itself is not affected in any way (this function is read-only aka 
        const).
        """
        # init help structures
        totalWords = sum(wordCount for _, wordCount in doc)
        gamma = numpy.zeros(self.numTopics) + self.alpha + 1.0 * totalWords / self.numTopics
        phi = numpy.zeros(shape = (len(doc), self.numTopics)) + 1.0 / self.numTopics
        likelihood = likelihoodOld = converged = numpy.NAN
        
        # variational estimate
        for i in xrange(self.VAR_MAX_ITER):
#            logger.debug("inference step #%s, converged=%s, likelihood=%s, likelikelihoodOld=%s" % 
#                          (i, converged, likelihood, likelihoodOld))
            
            if numpy.isfinite(converged) and converged <= self.VAR_CONVERGED:
                logger.debug("document converged in %i iterations" % i)
                break
            
            for n, (wordIndex, wordCount) in enumerate(doc):
                # compute phi vars, in log space, to prevent numerical nastiness
                tmp = digamma(gamma) + self.logProbW[:, wordIndex] # vector operation

                # convert phi and update gamma
                newPhi = numpy.exp(tmp - logsumexp(tmp))
                gamma += wordCount * (newPhi - phi[n])
                phi[n] = newPhi
            
            likelihood = self.computeLikelihood(doc, phi, gamma)
            assert numpy.isfinite(likelihood)
            converged = numpy.divide(likelihoodOld - likelihood, likelihoodOld)
            likelihoodOld = likelihood
        return likelihood, phi, gamma
Example #19
0
    def bound(self, corpus, gamma=None):
        """
        Estimate the variational bound of documents from `corpus`.

        `gamma` are the variational parameters on topic weights (one for each
        document in `corpus`). If not supplied, will be automatically inferred
        from the model.
        """
        score = 0.0
        Elogbeta = numpy.log(self.expElogbeta)

        for d, doc in enumerate(corpus):
            if d % self.chunks == 0:
                logger.info("PROGRESS: at document #%i" % d)
            if gamma is None:
                gammad, _ = self.inference([doc])
            else:
                gammad = gamma[d, :]
            Elogthetad = dirichlet_expectation(gammad)
            expElogthetad = numpy.exp(Elogthetad)
            ids = [id for id, _ in doc]
            cts = numpy.array([cnt for _, cnt in doc])
            phinorm = numpy.zeros(len(ids))
            for i in xrange(len(ids)):
                phinorm[i] = logsumexp(Elogthetad + Elogbeta[:, ids[i]])

            # E[log p(docs | theta, beta)]
            score += numpy.sum(cts * phinorm)

            # E[log p(theta | alpha) - log q(theta | gamma)]
            score += numpy.sum((self.alpha - gammad) * Elogthetad)
            score += numpy.sum(gammaln(gammad) - gammaln(self.alpha))
            score += gammaln(self.alpha * self.numTopics) - gammaln(numpy.sum(gammad))

        # E[log p(beta | eta) - log q (beta | lambda)]
        score += numpy.sum((self.eta - self._lambda) * Elogbeta)
        score += numpy.sum(gammaln(self._lambda) - gammaln(self.eta))
        score += numpy.sum(gammaln(self.eta * self.numTerms) -
                              gammaln(numpy.sum(self._lambda, 1)))

        return score
    def dp(theta_sparse):
        theta = theta_dense(theta_sparse)

        h = [None] * params['M']
        h[0] = np.empty(n_w[0])
        for w in range(n_w[0]):
            h[0][w] = np.sum(theta * hits_pre[0][w])
        for k in range(1, params['M']):
            h[k] = np.empty((n_w[k-1], n_w[k]))
            for w_prev in range(n_w[k-1]):
                for w in range(n_w[k]):
                    h[k][w_prev,w] = np.sum(theta * hits_pre[k][w_prev,w])

        b = [None] * (params['M']+1)
        b[params['M']] = np.zeros(n_w[params['M']-1])
        for k in range(params['M']-1, 0, -1):
            b[k] = np.empty(n_w[k-1])
            for w_prev in range(n_w[k-1]):
                b[k][w_prev] = logsumexp(h[k][w_prev] + b[k+1])

        return h, b
Example #21
0
def _eval_gauss_grid(x,y,xamp,xmean,xcovar):
    nx= len(x)
    ny= len(y)
    out= numpy.zeros((nx,ny))
    ngauss= len(xamp)
    dim= xmean.shape[1]
    loglike= numpy.zeros(ngauss)
    for ii in range(nx):
        for jj in range(ny):
            a= numpy.array([x[ii],y[jj]])
            for kk in range(ngauss):
                if xamp[kk] == 0.:
                    loglike[kk]= numpy.finfo(numpy.dtype(numpy.float64)).min
                    continue
                tinv= numpy.linalg.inv(xcovar[kk,:,:])
                delta= a-xmean[kk,:]
                loglike[kk]= numpy.log(xamp[kk])+0.5*numpy.log(numpy.linalg.det(tinv))\
                    -0.5*numpy.dot(delta,numpy.dot(tinv,delta))+\
                    dim*_SQRTTWOPI
            out[ii,jj]= logsumexp(loglike)
    return out
def pvlosplate(params, vhelio, data, df, options, logpiso, logpisodwarf, iso):
    """
    NAME:
       pvlosplate
    PURPOSE:
       calculate the vlos probability for a given location
    INPUT:
       params - parameters of the model
       vhelio - heliocentric los velocity to evaluate
       data - data array for this location
       df - df object(s) (?)
       options - options
       logpiso, logpisodwarf - precalculated isochrones
    OUTPUT:
       log of the probability
    HISTORY:
       2012-02-20 - Written - Bovy (IAS)
    """
    #Output is sum over data l,b,jk,h
    l = data['GLON'] * _DEGTORAD
    b = data['GLAT'] * _DEGTORAD
    sinl = numpy.sin(l)
    cosl = numpy.cos(l)
    sinb = numpy.sin(b)
    cosb = numpy.cos(b)
    jk = data['J0MAG'] - data['K0MAG']
    try:
        jk[(jk < 0.5)] = 0.5  #BOVY: FIX THIS HACK BY EMAILING GAIL
    except TypeError:
        pass  #HACK
    h = data['H0MAG']
    options.multi = 1  #To avoid conflict
    out = -mloglike(params,
                    numpy.zeros(len(data)) + vhelio, l, b, jk, h, df, options,
                    sinl, cosl, cosb, sinb, logpiso, logpisodwarf, True, None,
                    iso, data['FEH'])  #None iso for now
    #indx= (out >= -0.1)*(out <= 0.1)
    #print out[indx], jk[indx], h[indx]
    return logsumexp(out)
Example #23
0
 def bound(self, corpus, gamma=None, subsample_ratio=1.0):
     score = 0.0
     _lambda = self.state.get_lambda()
     Elogbeta = dirichlet_expectation(_lambda)
     for d, doc in enumerate(corpus):
         if gamma is None:
             gammad, _ = self.inference([doc])
         else:
             gammad = gamma[d]
         Elogthetad = dirichlet_expectation(gammad)
         score += numpy.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, id])
                            for id, cnt in doc)
         score += numpy.sum((self.alpha - gammad) * Elogthetad)
         score += numpy.sum(gammaln(gammad) - gammaln(self.alpha))
         score += gammaln(numpy.sum(self.alpha)) - gammaln(
             numpy.sum(gammad))
     score *= subsample_ratio
     score += numpy.sum((self.eta - _lambda) * Elogbeta)
     score += numpy.sum(gammaln(_lambda) - gammaln(self.eta))
     score += numpy.sum(
         gammaln(self.eta * self.num_terms) -
         gammaln(numpy.sum(_lambda, 1)))
     return score
Example #24
0
def run(*args):
	dprintn(8, "# Generating data")
	
	global hypotheses
	
	data_size = args[0]
	
	here_correct = dict() # how often is each word right?
	for w in words: here_correct[w] = 0.0
	
	dprintn(8, "# Generating data")
	data = generate_data(data_size)
	
	# recompute these
	dprintn(8, "# Computing posterior")
	[ x.compute_posterior(data) for x in hypotheses ]
	
	# normalize the posterior in fs
	dprintn(8, "# Computing normalizer")
	Z = logsumexp([x.lp for x in hypotheses])
	
	# and compute the probability of being correct
	dprintn(8, "# Computing correct probability")
	for h in hypotheses:
		#print data_size, len(data), exp(h.lp), correct[ str(h)+":"+w ]
		for w in words:
			# the posterior times the prob of agreement with the right one, weighted by number of iterations
			here_correct[w] += exp(h.lp-Z) * correct[ str(h)+":"+w ] 
	
	dprintn(8, "# Outputting")
	o = open(OUT_PATH+str(rank), 'a')
	for w in words:
		print >>o, rank, data_size, here_correct[w], q(w)
	o.close()
	
	return 0
Example #25
0
    def func_bg(index):
        """
        Author: Marusa Zerjal, 2019 - 07 - 18

        Multiprocessing function should be pickable

        :param index:
        :return:
        """
        star_mean = star_means[index]
        star_cov = star_covs[index]
        try:
            bg_lnol = get_lnoverlaps(star_cov, star_mean, background_covs,
                                     background_means, nstars)
            bg_lnol = logsumexp(bg_lnol)  # sum in linear space

        # Do we really want to make exceptions here? If the sum fails then
        # there's something wrong with the data.
        except:
            # TC: Changed sign to negative (surely if it fails, we want it to
            # have a neglible background overlap?
            print('bg ln overlap failed, setting it to -inf')
            bg_lnol = -np.inf
        return bg_lnol
 #Calculate vlos | los
 vlos= numpy.linspace(-200.,200.,options.nvlos)
 pvlos= numpy.zeros(options.nvlos)
 if not options.multi is None:
     pvlos= multi.parallel_map((lambda x: pvlosplate(params,vlos[x],
                                                     thesedata,df,options,
                                                     thislogpiso,
                                                     thislogpisodwarf,iso)),
                               range(options.nvlos),
                               numcores=numpy.amin([len(vlos),multiprocessing.cpu_count(),options.multi]))
 else:
     for ii in range(options.nvlos):
         print ii
         pvlos[ii]= pvlosplate(params,vlos[ii],thesedata,df,options,
                               thislogpiso,thislogpisodwarf,iso)
 pvlos-= logsumexp(pvlos)
 pvlos= numpy.exp(pvlos)
 if _PLOTZERO:
     pvloszero= numpy.zeros(options.nvlos)
     params[2]= -3.8
     if not options.multi is None:
         pvloszero= multi.parallel_map((lambda x: pvlosplate(params,vlos[x],
                                                             thesedata,df,options,
                                                             thislogpiso,
                                                             thislogpisodwarf,iso)),
                                       range(options.nvlos),
                                       numcores=numpy.amin([len(vlos),multiprocessing.cpu_count(),options.multi]))
     else:
         for ii in range(options.nvlos):
             print ii
             pvloszero[ii]= pvlosplate(params,vlos[ii],thesedata,df,options,
Example #27
0
def plot_rovo(filename, plotfilename):
    if not os.path.exists(filename):
        raise IOError("given filename does not exist")
    savefile = open(filename, 'rb')
    params = pickle.load(savefile)
    savefile.close()
    if _ANALYTIC:  #Calculate by fixing everything except for Ro anv vo
        options = plot_pdfs.set_options(None)
        nros = 15
        noos = 15
        ros = numpy.linspace(7., 13., nros)
        oos = numpy.linspace(20., 30., noos)
        ll = numpy.zeros((noos, nros))
        for ii in range(noos):
            if not _MULTI is None:
                theseparamss = []
                for jj in range(nros):
                    theseparams = copy.copy(params)
                    theseparams[0] = oos[ii] * ros[jj] / _REFV0
                    theseparams[1] = ros[jj] / _REFR0
                    theseparamss.append(theseparams)
                thisll = multi.parallel_map(
                    (lambda x: numpy.sum(
                        logl.logl(init=theseparamss[x], options=options))),
                    range(nros),
                    numcores=numpy.amin(
                        [nros, _MULTI,
                         multiprocessing.cpu_count()]))
                ll[ii, :] = thisll
            else:
                for jj in range(nros):
                    theseparams = copy.copy(params)
                    theseparams[0] = oos[ii] * ros[jj] / _REFV0
                    theseparams[1] = ros[jj] / _REFR0
                    ll[ii, jj] = numpy.sum(
                        logl.logl(init=theseparams, options=options))
        #Normalize
        ll -= logsumexp(ll)
        ll = numpy.exp(ll)
        levels = list(special.erf(0.5 * numpy.arange(1, 4)))
        bovy_plot.bovy_dens2d(
            ll.T,
            origin='lower',
            levels=levels,
            xlabel=r'$\Omega_0\ [\mathrm{km\ s}^{-1}\ \mathrm{kpc}^{-1}]$',
            ylabel=r'$R_0\ [\mathrm{kpc}]$',
            xrange=[20., 35.],
            yrange=[7., 13.],
            contours=True,
            cntrcolors='k',
            onedhists=True,
            cmap='gist_yarg')
    else:
        vos = numpy.array([s[0] for s in params]) * _REFV0
        ros = numpy.array([s[1] for s in params]) * _REFR0
        bovy_plot.bovy_print()
        levels = list(special.erf(0.5 * numpy.arange(1, 4)))
        levels.append(1.01)  #HACK to not plot outliers
        bovy_plot.scatterplot(
            vos / ros,
            ros,
            'k,',
            levels=levels,
            xlabel=r'$\Omega_0\ [\mathrm{km\ s}^{-1}\ \mathrm{kpc}^{-1}]$',
            ylabel=r'$R_0\ [\mathrm{kpc}]$',
            bins=31,
            xrange=[200. / 8., 250. / 8.],
            yrange=[7., 9.],
            contours=True,
            cntrcolors='k',
            onedhists=True,
            cmap='gist_yarg')
    bovy_plot.bovy_end_print(plotfilename)
Example #28
0
def plot_rovo(filename,plotfilename):
    if not os.path.exists(filename):
        raise IOError("given filename does not exist")
    savefile= open(filename,'rb')
    params= pickle.load(savefile)
    savefile.close()
    if _ANALYTIC: #Calculate by fixing everything except for Ro anv vo
        options= plot_pdfs.set_options(None)
        nros= 15
        noos= 15
        ros= numpy.linspace(7.,13.,nros)
        oos= numpy.linspace(20.,30.,noos)
        ll= numpy.zeros((noos,nros))
        for ii in range(noos):
            if not _MULTI is None:
                theseparamss= []
                for jj in range(nros):
                    theseparams= copy.copy(params)
                    theseparams[0]= oos[ii]*ros[jj]/_REFV0
                    theseparams[1]= ros[jj]/_REFR0
                    theseparamss.append(theseparams)
                thisll= multi.parallel_map((lambda x: numpy.sum(logl.logl(init=theseparamss[x],options=options))),
                                           range(nros),
                                           numcores=numpy.amin([nros,_MULTI,multiprocessing.cpu_count()]))
                ll[ii,:]= thisll
            else:
                for jj in range(nros):
                    theseparams= copy.copy(params)
                    theseparams[0]= oos[ii]*ros[jj]/_REFV0
                    theseparams[1]= ros[jj]/_REFR0
                    ll[ii,jj]= numpy.sum(logl.logl(init=theseparams,
                                                   options=options))
        #Normalize
        ll-= logsumexp(ll)
        ll= numpy.exp(ll)
        levels= list(special.erf(0.5*numpy.arange(1,4)))
        bovy_plot.bovy_dens2d(ll.T,origin='lower',levels=levels,
                              xlabel=r'$\Omega_0\ [\mathrm{km\ s}^{-1}\ \mathrm{kpc}^{-1}]$',
                              ylabel=r'$R_0\ [\mathrm{kpc}]$',
                              xrange=[20.,35.],
                              yrange=[7.,13.],
                              contours=True,
                              cntrcolors='k',
                              onedhists=True,
                              cmap='gist_yarg')
    else:
        vos= numpy.array([s[0] for s in params])*_REFV0
        ros= numpy.array([s[1] for s in params])*_REFR0
        bovy_plot.bovy_print()
        levels= list(special.erf(0.5*numpy.arange(1,4)))
        levels.append(1.01) #HACK to not plot outliers
        bovy_plot.scatterplot(vos/ros,ros,'k,',levels=levels,
                              xlabel=r'$\Omega_0\ [\mathrm{km\ s}^{-1}\ \mathrm{kpc}^{-1}]$',
                              ylabel=r'$R_0\ [\mathrm{kpc}]$',
                              bins=31,
                              xrange=[200./8.,250./8.],
                              yrange=[7.,9.],
                              contours=True,
                              cntrcolors='k',
                              onedhists=True,
                              cmap='gist_yarg')
    bovy_plot.bovy_end_print(plotfilename)
Example #29
0
def _fit_orbit_mlogl(new_vxvv,vxvv,vxvv_err,pot,radec,lb,tmockAA,
                     ro,vo,obs):
    """The log likelihood for fitting an orbit"""
    #Use this _parse_args routine, which does forward and backward integration
    iR,ivR,ivT,iz,ivz,iphi= tmockAA._parse_args(True,False,
                                                new_vxvv[0],
                                                new_vxvv[1],
                                                new_vxvv[2],
                                                new_vxvv[3],
                                                new_vxvv[4],
                                                new_vxvv[5])
    if radec or lb:
        #Need to transform to ra,dec
        #First transform to X,Y,Z,vX,vY,vZ (Galactic)
        X,Y,Z = coords.galcencyl_to_XYZ(iR.flatten(),iphi.flatten(),
                                        iz.flatten(),
                                        Xsun=obs[0]/ro,
                                        Ysun=obs[1]/ro,
                                        Zsun=obs[2]/ro)
        vX,vY,vZ = coords.galcencyl_to_vxvyvz(ivR.flatten(),ivT.flatten(),
                                              ivz.flatten(),iphi.flatten(),
                                              vsun=nu.array(\
                obs[3:6])/vo)
        bad_indx= (X == 0.)*(Y == 0.)*(Z == 0.)
        if True in bad_indx: X[bad_indx]+= ro/10000.
        lbdvrpmllpmbb= coords.rectgal_to_sphergal(X*ro,Y*ro,Z*ro,
                                                  vX*vo,vY*vo,vZ*vo,
                                                  degree=True)
        if lb:
            orb_vxvv= nu.array([lbdvrpmllpmbb[:,0],
                                lbdvrpmllpmbb[:,1],
                                lbdvrpmllpmbb[:,2],
                                lbdvrpmllpmbb[:,4],
                                lbdvrpmllpmbb[:,5],
                                lbdvrpmllpmbb[:,3]]).T
        else:
            #Further transform to ra,dec,pmra,pmdec
            radec= coords.lb_to_radec(lbdvrpmllpmbb[:,0],
                                      lbdvrpmllpmbb[:,1],degree=True)
            pmrapmdec= coords.pmllpmbb_to_pmrapmdec(lbdvrpmllpmbb[:,4],
                                                    lbdvrpmllpmbb[:,5],
                                                    lbdvrpmllpmbb[:,0],
                                                    lbdvrpmllpmbb[:,1],
                                                    degree=True)
            orb_vxvv= nu.array([radec[:,0],radec[:,1],
                                lbdvrpmllpmbb[:,2],
                                pmrapmdec[:,0],pmrapmdec[:,1],
                                lbdvrpmllpmbb[:,3]]).T
    else:
        #shape=(2tintJ-1,6)
        orb_vxvv= nu.array([iR.flatten(),ivR.flatten(),ivT.flatten(),
                            iz.flatten(),ivz.flatten(),iphi.flatten()]).T 
    out= 0.
    for ii in range(vxvv.shape[0]):
        sub_vxvv= (orb_vxvv-vxvv[ii,:].flatten())**2.
        #print sub_vxvv[nu.argmin(nu.sum(sub_vxvv,axis=1))]
        if not vxvv_err is None:
            sub_vxvv/= vxvv_err[ii,:]**2.
        else:
            sub_vxvv/= 0.01**2.
        out+= logsumexp(-0.5*nu.sum(sub_vxvv,axis=1))
    return -out
Example #30
0
def localzdist(z,zsolar=0.019):
    #From 2 Gaussian XD fit to Casagrande et al. (2011)
    feh= isodist.Z2FEH(z,zsolar=zsolar)
    logfehdist= maxentropy.logsumexp([numpy.log(0.8)-numpy.log(0.15)-0.5*(feh-0.016)**2./0.15**2.,
                                      numpy.log(0.2)-numpy.log(0.22)-0.5*(feh+0.15)**2./0.22**2.])
    return logfehdist-numpy.log(z)
Example #31
0
def get_background_overlaps_with_covariances(background_means, star_means,
                                             star_covs):
    """
    author: Marusa Zerjal 2019 - 05 - 25

    Determine background overlaps using means and covariances for both
    background and stars.
    Covariance matrices for the background are Identity*bandwidth.

    Parameters
    ----------
    background_means: [nstars,6] float array_like
        Phase-space positions of some star set that greatly envelops points
        in question. Typically contents of gaia_xyzuvw.npy, or the output of
        >> tabletool.build_data_dict_from_table(
                   '../data/gaia_cartesian_full_6d_table.fits',
                    historical=True)['means']
    star_means: [npoints,6] float array_like
        Phase-space positions of stellar data that we are fitting components to
    star_covs: [npoints,6,6] float array_like
        Phase-space covariances of stellar data that we are fitting components to

    Returns
    -------
    bg_lnols: [nstars] float array_like
        Background log overlaps of stars with background probability density
        function.

    Notes
    -----
    We invert the vertical values (Z and U) because the typical background
    density should be symmetric along the vertical axis, and this distances
    stars from their siblings. I.e. association stars aren't assigned
    higher background overlaps by virtue of being an association star.

    Edits
    -----
    TC 2019-05-28: changed signature such that it follows similar usage as
                   get_kernel_densitites
    """
    # Inverting the vertical values
    star_means = np.copy(star_means)
    star_means[:, 2] *= -1
    star_means[:, 5] *= -1

    # Background covs with bandwidth using Scott's rule
    d = 6.0  # number of dimensions
    nstars = background_means.shape[0]
    bandwidth = nstars**(-1.0 / (d + 4.0))
    background_cov = np.cov(background_means.T) * bandwidth**2
    background_covs = np.array(nstars *
                               [background_cov])  # same cov for every star

    # shapes of the c_get_lnoverlaps input must be: (6, 6), (6,), (120, 6, 6), (120, 6)
    # So I do it in a loop for every star
    bg_lnols = []
    for i, (star_mean, star_cov) in enumerate(zip(star_means, star_covs)):
        print('bgols', i)
        #print('{} of {}'.format(i, len(star_means)))
        #print(star_cov)
        #print('det', np.linalg.det(star_cov))
        #bg_lnol = get_lnoverlaps(star_cov, star_mean, background_covs,
        #                         background_means, nstars)
        try:
            #print('***********', nstars, star_cov, star_mean, background_covs, background_means)
            bg_lnol = get_lnoverlaps(star_cov, star_mean, background_covs,
                                     background_means, nstars)
            #print('intermediate', bg_lnol)
            # bg_lnol = np.log(np.sum(np.exp(bg_lnol))) # sum in linear space
            bg_lnol = logsumexp(bg_lnol)  # sum in linear space

        # Do we really want to make exceptions here? If the sum fails then
        # there's something wrong with the data.
        except:
            # TC: Changed sign to negative (surely if it fails, we want it to
            # have a neglible background overlap?
            print('bg ln overlap failed, setting it to -inf')
            bg_lnol = -np.inf
        bg_lnols.append(bg_lnol)
        #print(bg_lnol)
        #print('')

    # This should be parallelized
    #bg_lnols = [np.sum(get_lnoverlaps(star_cov, star_mean, background_covs, background_means, nstars)) for star_mean, star_cov in zip(star_means, star_covs)]
    #print(bg_lnols)

    return bg_lnols
Example #32
0
def createFakeData(parser):
    options, args = parser.parse_args()
    if len(args) == 0:
        parser.print_help()
        return
    if os.path.exists(options.plotfile):
        print "Outfile " + options.plotfile + " exists ..."
        print "Returning ..."
        return None
    #Read the data
    numpy.random.seed(options.seed)
    print "Reading the data ..."
    data = readVclosData(
        postshutdown=options.postshutdown,
        fehcut=options.fehcut,
        cohort=options.cohort,
        lmin=options.lmin,
        bmax=options.bmax,
        validfeh=options.indivfeh,  #if indivfeh, we need validfeh
        ak=True,
        cutmultiples=options.cutmultiples,
        jkmax=options.jkmax)
    #HACK
    indx = (data['J0MAG'] - data['K0MAG'] < 0.5)
    data['J0MAG'][indx] = 0.5 + data['K0MAG'][indx]
    #Set up the isochrone
    #Set up the isochrone
    if not options.isofile is None and os.path.exists(options.isofile):
        print "Loading the isochrone model ..."
        isofile = open(options.isofile, 'rb')
        iso = pickle.load(isofile)
        if options.indivfeh:
            zs = pickle.load(isofile)
        elif options.varfeh:
            locl = pickle.load(isofile)
        isofile.close()
    else:
        print "Setting up the isochrone model ..."
        if options.indivfeh:
            #Load all isochrones
            iso = []
            zs = numpy.arange(0.0005, 0.03005, 0.0005)
            for ii in range(len(zs)):
                iso.append(
                    isomodel.isomodel(imfmodel=options.imfmodel,
                                      expsfh=options.expsfh,
                                      Z=zs[ii]))
        elif options.varfeh:
            locs = list(set(data['LOCATION']))
            iso = []
            for ii in range(len(locs)):
                indx = (data['LOCATION'] == locs[ii])
                locl = numpy.mean(data['GLON'][indx] * _DEGTORAD)
                iso.append(
                    isomodel.isomodel(imfmodel=options.imfmodel,
                                      expsfh=options.expsfh,
                                      marginalizefeh=True,
                                      glon=locl))
        else:
            iso = isomodel.isomodel(imfmodel=options.imfmodel,
                                    Z=options.Z,
                                    expsfh=options.expsfh)
        if options.dwarf:
            iso = [
                iso,
                isomodel.isomodel(imfmodel=options.imfmodel,
                                  Z=options.Z,
                                  dwarf=True,
                                  expsfh=options.expsfh)
            ]
        else:
            iso = [iso]
        if not options.isofile is None:
            isofile = open(options.isofile, 'wb')
            pickle.dump(iso, isofile)
            if options.indivfeh:
                pickle.dump(zs, isofile)
            elif options.varfeh:
                pickle.dump(locl, isofile)
            isofile.close()
    df = None
    print "Pre-calculating isochrone distance prior ..."
    logpiso = numpy.zeros((len(data), _BINTEGRATENBINS))
    ds = numpy.linspace(_BINTEGRATEDMIN, _BINTEGRATEDMAX, _BINTEGRATENBINS)
    dm = _dm(ds)
    for ii in range(len(data)):
        mh = data['H0MAG'][ii] - dm
        if options.indivfeh:
            #Find closest Z
            thisZ = isodist.FEH2Z(data[ii]['FEH'])
            indx = numpy.argmin((thisZ - zs))
            logpiso[ii, :] = iso[0][indx](numpy.zeros(_BINTEGRATENBINS) +
                                          (data['J0MAG'] - data['K0MAG'])[ii],
                                          mh)
        elif options.varfeh:
            #Find correct iso
            indx = (locl == data[ii]['LOCATION'])
            logpiso[ii, :] = iso[0][indx](numpy.zeros(_BINTEGRATENBINS) +
                                          (data['J0MAG'] - data['K0MAG'])[ii],
                                          mh)
        else:
            logpiso[ii, :] = iso[0](numpy.zeros(_BINTEGRATENBINS) +
                                    (data['J0MAG'] - data['K0MAG'])[ii], mh)
    if options.dwarf:
        logpisodwarf = numpy.zeros((len(data), _BINTEGRATENBINS))
        dwarfds = numpy.linspace(_BINTEGRATEDMIN_DWARF, _BINTEGRATEDMAX_DWARF,
                                 _BINTEGRATENBINS)
        dm = _dm(dwarfds)
        for ii in range(len(data)):
            mh = data['H0MAG'][ii] - dm
            logpisodwarf[ii, :] = iso[1](numpy.zeros(_BINTEGRATENBINS) +
                                         (data['J0MAG'] - data['K0MAG'])[ii],
                                         mh)
    else:
        logpisodwarf = None
    #Load initial parameters from file
    savefile = open(args[0], 'rb')
    params = pickle.load(savefile)
    savefile.close()
    #Prep data
    l = data['GLON'] * _DEGTORAD
    b = data['GLAT'] * _DEGTORAD
    sinl = numpy.sin(l)
    cosl = numpy.cos(l)
    sinb = numpy.sin(b)
    cosb = numpy.cos(b)
    jk = data['J0MAG'] - data['K0MAG']
    jk[(jk < 0.5)] = 0.5  #BOVY: FIX THIS HACK BY EMAILING GAIL
    h = data['H0MAG']
    #Re-sample
    vlos = numpy.linspace(-200., 200., options.nvlos)
    pvlos = numpy.zeros((len(data), options.nvlos))
    if options.dwarf:
        thislogpisodwarf = logpisodwarf
    else:
        thislogpisodwarf = None
    if not options.multi is None and options.multi > 1:
        thismulti = options.multi
        options.multi = 1  #To avoid conflict
        thispvlos = multi.parallel_map(
            (lambda x: -mloglike(params,
                                 numpy.zeros(len(data)) + vlos[x], l, b, jk, h,
                                 df, options, sinl, cosl, cosb, sinb, logpiso,
                                 thislogpisodwarf, True, None, None, None)),
            range(options.nvlos),
            numcores=numpy.amin(
                [len(vlos), multiprocessing.cpu_count(), thismulti]))
        for jj in range(options.nvlos):
            pvlos[:, jj] = thispvlos[jj]
    else:
        for jj in range(options.nvlos):
            pvlos[:, jj] = -mloglike(
                params,
                numpy.zeros(len(data)) + vlos[jj], l, b, jk, h, df, options,
                sinl, cosl, cosb, sinb, logpiso, thislogpisodwarf, True, None,
                None, None)
    """
    for jj in range(options.nvlos):
        pvlos[:,jj]= -mloglike(params,numpy.zeros(len(data))+vlos[jj],
                               l,
                               b,
                               jk,
                               h,
                               df,options,
                               sinl,
                               cosl,
                               cosb,
                               sinb,
                               logpiso,
                               thislogpisodwarf,True,None,None,None)
    """
    for ii in range(len(data)):
        pvlos[ii, :] -= logsumexp(pvlos[ii, :])
        pvlos[ii, :] = numpy.exp(pvlos[ii, :])
        pvlos[ii, :] = numpy.cumsum(pvlos[ii, :])
        pvlos[ii, :] /= pvlos[ii, -1]
        #Draw
        randindx = numpy.random.uniform()
        kk = 0
        while pvlos[ii, kk] < randindx:
            kk += 1
        data['VHELIO'][ii] = vlos[kk]
    #Dump raw
    fitsio.write(options.plotfile, data, clobber=True)
Example #33
0
def eval_distpdf(ds,mdict=None,mivardict=None,logg=None,logg_ivar=None,
                 teff=None,teff_ivar=None,logage=None,logage_ivar=None,
                 Z=None,Z_ivar=None,feh=None,feh_ivar=None,
                 afe=None,afe_ivar=None,
                 padova=None,padova_type=None,
                 normalize=False,
                 ageprior=None):
    """
    NAME:
       eval_distpdf
    PURPOSE:
       evaluate the distance PDF for an object
    INPUT:
       ds- list or ndarray of distance (or a single distance), in kpc
       mdict= dictionary of apparent magnitudes (e.g., {'J':12.,'Ks':13.})
       mivardict= dictionary of magnitude inverse variances (matched to mdict)
       logg= observed logg
       logg_ivar= inverse variance of logg measurement
       teff= observed T_eff [K]
       logg_ivar= inverse variance of T_eff measurement
       logage= observed log_10 age [Gyr]
       logage_ivar= inverse variance of log_10 age measurement
       Z= observed metallicity
       Z_ivar= inverse variance of Z measurement
       feh= observed metallicity (alternative to Z)
       feh_ivar= inverse variance of FeH measurement
       afe= observed [\alpha/Fe]
       afe_ivar= [\alpha/Fe] inverse variance
       padova= if True, use Padova isochrones, 
               if set to a PadovaIsochrone objects, use this
       padova_type= type of PadovaIsochrone to use (e.g., 2mass-spitzer-wise)
       normalize= if True, normalize output PDF (default: False)
       ageprior= - None: flat in log age
                 - flat: flat in age
    OUTPUT:
       log of probability
    HISTORY:
       2011-04-28 - Written - Bovy (NYU)
    """
    #load isochrones
    if not padova is None and isinstance(padova,PadovaIsochrone):
        iso= padova
    elif not padova is None and isinstance(padova,bool) and padova:
        iso= PadovaIsochrone(type=padova_type)
    #Parse metallicity info
    if not feh is None: raise NotImplementedError("'feh' not yet implemented")
    #set up output
    if isinstance(ds,(list,nu.ndarray)):
        scalarOut= False
        if isinstance(ds,list):
            _ds= nu.array(ds)
        else: _ds= ds
    elif isinstance(ds,float):
        scalarOut= True
        _ds= [ds]
    #Pre-calculate all absolute magnitudes
    absmagdict= {}
    for key in mdict.keys():
        absmagdict[key]= -_distmodulus(_ds)+mdict[key]        
    #loop through isochrones
    ZS= iso.Zs()
    logages= iso.logages()
    allout= nu.zeros((len(_ds),len(ZS),len(logages)))
    for zz in range(len(ZS)):
        for aa in range(len(logages)):
            thisiso= iso(logages[aa],Z=ZS[zz])
            dmpm= nu.roll(thisiso['M_ini'],-1)-thisiso['M_ini']
            loglike= nu.zeros((len(_ds),len(thisiso['M_ini'])-1))
            loglike-= nu.log(thisiso['M_ini'][-1])
            for ii in range(1,len(thisiso['M_ini'])-1):
                if dmpm[ii] > 0.: 
                    loglike[:,ii]+= nu.log(dmpm[ii])
                else: 
                    loglike[:,ii]= nu.finfo(nu.dtype(nu.float64)).min
                    continue #no use in continuing here
                if not teff is None:
                    loglike[:,ii]-= (teff-10**thisiso['logTe'][ii])**2.*teff_ivar
                if not logg is None:
                    loglike[:,ii]-= (logg-thisiso['logg'][ii])**2.*logg_ivar
                for key in mdict.keys():
                    #print absmagdict[key][2], thisiso[key][ii]
                    loglike[:,ii]-= (absmagdict[key]-thisiso[key][ii])**2.\
                        *mivardict[key]
            #marginalize over mass
            for jj in range(len(_ds)):
                allout[jj,zz,aa]= logsumexp(loglike[jj,:])
            #add age constraint and prior
            if not logage is None:
                allout[:,zz,aa]+= -(logage-logages[aa])**2.*logage_ivar
            if not ageprior is None:
                if isinstance(ageprior,str) and ageprior.lower() == 'flat':
                    allout[:,zz,aa]+= logages[aa]*_LOGTOLN
        #add Z constraint and prior
        if not Z is None:
            allout[:,zz,:]+= -(Z-ZS[zz])**2.*Z_ivar
    #prepare final output
    out= nu.zeros(len(_ds))
    for jj in range(len(_ds)):
        out[jj]= logsumexp(allout[jj,:,:])
    if normalize and not scalarOut:
        out-= logsumexp(out)+nu.log(ds[1]-ds[0])
    #return
    if scalarOut: return out[0]
    else: return out
     pvlos = multi.parallel_map((lambda x: pvlosplate(
         params, vlos[x], thesedata, df, options, thislogpiso,
         thislogpisodwarf, iso)),
                                range(options.nvlos),
                                numcores=numpy.amin([
                                    len(vlos),
                                    multiprocessing.cpu_count(),
                                    options.multi
                                ]))
 else:
     for ii in range(options.nvlos):
         print ii
         pvlos[ii] = pvlosplate(params, vlos[ii], thesedata, df,
                                options, thislogpiso,
                                thislogpisodwarf, iso)
 pvlos -= logsumexp(pvlos)
 pvlos = numpy.exp(pvlos)
 if _PLOTZERO:
     pvloszero = numpy.zeros(options.nvlos)
     params[2] = -3.8
     if not options.multi is None:
         pvloszero = multi.parallel_map(
             (lambda x: pvlosplate(params, vlos[
                 x], thesedata, df, options, thislogpiso,
                                   thislogpisodwarf, iso)),
             range(options.nvlos),
             numcores=numpy.amin([
                 len(vlos),
                 multiprocessing.cpu_count(), options.multi
             ]))
     else:
Example #35
0
def map_vc_like_simple(parser):
    """
    NAME:
       map_vc_like_simple
    PURPOSE:
       map the vc likelihood assuming knowledge of the DF
    INPUT:
       parser - from optparse
    OUTPUT:
       stuff as specified by the options
    HISTORY:
       2011-04-20 - Written - Bovy (NYU)
    """
    (options, args) = parser.parse_args()
    if len(args) == 0:
        parser.print_help()
        sys.exit(-1)
    #Set up DF
    dfc = dehnendf(beta=0.,
                   profileParams=(options.rd, options.rs, options.so),
                   correct=True,
                   niter=20)
    #Load data
    picklefile = open(args[0], 'rb')
    out = pickle.load(picklefile)
    picklefile.close()
    ndata = len(out)
    if options.linearfit:
        plot_linear(out, options.los * _DEGTORAD, options, dfc)
        return None
    #Map likelihood
    vcirc = nu.linspace(options.vmin, options.vmax, options.nvcirc)
    if not options.nbeta is None:
        betas = nu.linspace(options.betamin, options.betamax, options.nbeta)
        like = nu.zeros((options.nvcirc, options.nbeta))
        for ii in range(options.nvcirc):
            for kk in range(options.nbeta):
                thislike = 0.
                for jj in range(ndata):
                    thislike += single_vlos_loglike(vcirc[ii],
                                                    out[jj],
                                                    dfc,
                                                    options,
                                                    options.los * _DEGTORAD,
                                                    beta=betas[kk])
                like[ii, kk] = thislike
        like -= logsumexp(like.flatten()) + m.log(vcirc[1] - vcirc[0])
        bovy_plot.bovy_print()
        bovy_plot.bovy_dens2d(nu.exp(like).T,
                              origin='lower',
                              xrange=[options.vmin,options.vmax],
                              yrange=[options.betamin,options.betamax],
                              aspect=(options.vmax-options.vmin)/\
                                  (options.betamax-options.betamin),
                              cmap='gist_yarg',
                              xlabel=r'$v_c / v_0$',
                              ylabel=r'$\beta$',
                              contours=True,cntrmass=True,
                              levels=[0.682,0.954,0.997])
        bovy_plot.bovy_text(r'$\sigma_R(R_0) = %4.2f \ v_0$' % options.so\
                                +'\n'+\
                                r'$l  = %i^\circ$' % round(options.los),
                            top_left=True)
        bovy_plot.bovy_end_print(options.plotfilename)
    else:
        like = nu.zeros(options.nvcirc)
        for ii in range(options.nvcirc):
            thislike = 0.
            for jj in range(ndata):
                thislike += single_vlos_loglike(vcirc[ii], out[jj], dfc,
                                                options,
                                                options.los * _DEGTORAD)
            like[ii] = thislike
        like -= logsumexp(like) + m.log(vcirc[1] - vcirc[0])
        #Calculate mean and sigma
        vcmean = nu.sum(vcirc * nu.exp(like) * (vcirc[1] - vcirc[0]))
        vc2mean = nu.sum(vcirc**2. * nu.exp(like) * (vcirc[1] - vcirc[0]))
        #Plot
        bovy_plot.bovy_print()
        bovy_plot.bovy_plot(vcirc,
                            nu.exp(like),
                            'k-',
                            xlabel=r'$v_c / v_0$',
                            ylabel=r'$p(\mathrm{data} | v_c)$')
        bovy_plot.bovy_text(r'$\langle v_c \rangle = %4.2f \ v_0$' % vcmean +'\n'+
                            r'$\sqrt{\langle v_c^2 \rangle - \langle v_c \rangle^2} = %4.2f \ v_0$' % (m.sqrt(vc2mean-vcmean**2.)) +'\n'+\
                                r'$\sigma_R(R_0) = %4.2f \ v_0$' % options.so+'\n'+\
                                r'$l  = %i^\circ$' % round(options.los),
                            top_left=True)
        bovy_plot.bovy_end_print(options.plotfilename)
Example #36
0
def plot_bestfit(parser):
    (options, args) = parser.parse_args()
    if len(args) == 0 or options.plotfilename is None:
        parser.print_help()
        return
    # Read the data
    print "Reading the data ..."
    data = readVclosData(
        postshutdown=options.postshutdown,
        fehcut=options.fehcut,
        cohort=options.cohort,
        lmin=options.lmin,
        bmax=options.bmax,
        ak=True,
        cutmultiples=options.cutmultiples,
        validfeh=options.indivfeh,  # if indivfeh, we need validfeh
        jkmax=options.jkmax,
        datafilename=options.fakedata,
    )
    # HACK
    indx = data["J0MAG"] - data["K0MAG"] < 0.5
    data["J0MAG"][indx] = 0.5 + data["K0MAG"][indx]
    # Cut inner disk locations
    # data= data[(data['GLON'] > 75.)]
    # Cut outliers
    # data= data[(data['VHELIO'] < 200.)*(data['VHELIO'] > -200.)]
    print "Using %i data points ..." % len(data)
    # Set up the isochrone
    if not options.isofile is None and os.path.exists(options.isofile):
        print "Loading the isochrone model ..."
        isofile = open(options.isofile, "rb")
        iso = pickle.load(isofile)
        if options.indivfeh:
            zs = pickle.load(isofile)
        if options.varfeh:
            locl = pickle.load(isofile)
        isofile.close()
    else:
        print "Setting up the isochrone model ..."
        if options.indivfeh:
            # Load all isochrones
            iso = []
            zs = numpy.arange(0.0005, 0.03005, 0.0005)
            for ii in range(len(zs)):
                iso.append(isomodel.isomodel(imfmodel=options.imfmodel, expsfh=options.expsfh, Z=zs[ii]))
        elif options.varfeh:
            locs = list(set(data["LOCATION"]))
            iso = []
            for ii in range(len(locs)):
                indx = data["LOCATION"] == locs[ii]
                locl = numpy.mean(data["GLON"][indx] * _DEGTORAD)
                iso.append(
                    isomodel.isomodel(imfmodel=options.imfmodel, expsfh=options.expsfh, marginalizefeh=True, glon=locl)
                )
        else:
            iso = isomodel.isomodel(imfmodel=options.imfmodel, Z=options.Z, expsfh=options.expsfh)
        if options.dwarf:
            iso = [iso, isomodel.isomodel(imfmodel=options.imfmodel, Z=options.Z, dwarf=True, expsfh=options.expsfh)]
        else:
            iso = [iso]
        if not options.isofile is None:
            isofile = open(options.isofile, "wb")
            pickle.dump(iso, isofile)
            if options.indivfeh:
                pickle.dump(zs, isofile)
            elif options.varfeh:
                pickle.dump(locl, isofile)
            isofile.close()
    df = None
    print "Pre-calculating isochrone distance prior ..."
    logpiso = numpy.zeros((len(data), _BINTEGRATENBINS))
    ds = numpy.linspace(_BINTEGRATEDMIN, _BINTEGRATEDMAX, _BINTEGRATENBINS)
    dm = _dm(ds)
    for ii in range(len(data)):
        mh = data["H0MAG"][ii] - dm
        if options.indivfeh:
            # Find closest Z
            thisZ = isodist.FEH2Z(data[ii]["FEH"])
            indx = numpy.argmin(numpy.fabs(thisZ - zs))
            logpiso[ii, :] = iso[0][indx](numpy.zeros(_BINTEGRATENBINS) + (data["J0MAG"] - data["K0MAG"])[ii], mh)
        elif options.varfeh:
            # Find correct iso
            indx = locl == data[ii]["LOCATION"]
            logpiso[ii, :] = iso[0][indx](numpy.zeros(_BINTEGRATENBINS) + (data["J0MAG"] - data["K0MAG"])[ii], mh)
        else:
            logpiso[ii, :] = iso[0](numpy.zeros(_BINTEGRATENBINS) + (data["J0MAG"] - data["K0MAG"])[ii], mh)
    if options.dwarf:
        logpisodwarf = numpy.zeros((len(data), _BINTEGRATENBINS))
        dwarfds = numpy.linspace(_BINTEGRATEDMIN_DWARF, _BINTEGRATEDMAX_DWARF, _BINTEGRATENBINS)
        dm = _dm(dwarfds)
        for ii in range(len(data)):
            mh = data["H0MAG"][ii] - dm
            logpisodwarf[ii, :] = iso[1](numpy.zeros(_BINTEGRATENBINS) + (data["J0MAG"] - data["K0MAG"])[ii], mh)
    else:
        logpisodwarf = None
    # Calculate data means etc.
    # Calculate means
    locations = list(set(data["LOCATION"]))
    nlocs = len(locations)
    l_plate = numpy.zeros(nlocs)
    avg_plate = numpy.zeros(nlocs)
    sig_plate = numpy.zeros(nlocs)
    siga_plate = numpy.zeros(nlocs)
    sigerr_plate = numpy.zeros(nlocs)
    for ii in range(nlocs):
        indx = data["LOCATION"] == locations[ii]
        l_plate[ii] = numpy.mean(data["GLON"][indx])
        avg_plate[ii] = numpy.mean(data["VHELIO"][indx])
        sig_plate[ii] = numpy.std(data["VHELIO"][indx])
        siga_plate[ii] = numpy.std(data["VHELIO"][indx]) / numpy.sqrt(numpy.sum(indx))
        sigerr_plate[ii] = bootstrap_sigerr(data["VHELIO"][indx])
    # Calculate plate means and variances from the model
    # Load initial parameters from file
    savefile = open(args[0], "rb")
    params = pickle.load(savefile)
    if not options.index is None:
        params = params[options.index]
    savefile.close()
    # params[0]= 245./235.
    # params[1]= 8.5/8.
    avg_plate_model = numpy.zeros(nlocs)
    sig_plate_model = numpy.zeros(nlocs)
    for ii in range(nlocs):
        # Calculate vlos | los
        indx = data["LOCATION"] == locations[ii]
        thesedata = data[indx]
        thislogpiso = logpiso[indx, :]
        if options.dwarf:
            thislogpisodwarf = logpisodwarf[indx, :]
        else:
            thislogpisodwarf = None
        vlos = numpy.linspace(-200.0, 200.0, options.nvlos)
        pvlos = numpy.zeros(options.nvlos)
        if not options.multi is None:
            pvlos = multi.parallel_map(
                (lambda x: pvlosplate(params, vlos[x], thesedata, df, options, thislogpiso, thislogpisodwarf, iso)),
                range(options.nvlos),
                numcores=numpy.amin([len(vlos), multiprocessing.cpu_count(), options.multi]),
            )
        else:
            for jj in range(options.nvlos):
                print jj
                pvlos[jj] = pvlosplate(params, vlos[jj], thesedata, df, options, thislogpiso, thislogpisodwarf, iso)
        pvlos -= logsumexp(pvlos)
        pvlos = numpy.exp(pvlos)
        # Calculate mean and velocity dispersion
        avg_plate_model[ii] = numpy.sum(vlos * pvlos)
        sig_plate_model[ii] = numpy.sqrt(numpy.sum(vlos ** 2.0 * pvlos) - avg_plate_model[ii] ** 2.0)
    # Plot everything
    left, bottom, width, height = 0.1, 0.4, 0.8, 0.5
    axTop = pyplot.axes([left, bottom, width, height])
    left, bottom, width, height = 0.1, 0.1, 0.8, 0.3
    axMean = pyplot.axes([left, bottom, width, height])
    # left, bottom, width, height= 0.1, 0.1, 0.8, 0.2
    # axSig= pyplot.axes([left,bottom,width,height])
    fig = pyplot.gcf()
    fig.sca(axTop)
    pyplot.ylabel(r"$\mathrm{Heliocentric\ velocity}\ [\mathrm{km\ s}^{-1}]$")
    pyplot.xlabel(r"$\mathrm{Galactic\ longitude}\ [\mathrm{deg}]$")
    pyplot.xlim(0.0, 360.0)
    pyplot.ylim(-200.0, 200.0)
    nullfmt = NullFormatter()  # no labels
    axTop.xaxis.set_major_formatter(nullfmt)
    bovy_plot.bovy_plot(data["GLON"], data["VHELIO"], "k,", yrange=[-200.0, 200.0], xrange=[0.0, 360.0], overplot=True)
    ndata_t = int(math.floor(len(data) / 1000.0))
    ndata_h = len(data) - ndata_t * 1000
    bovy_plot.bovy_plot(l_plate, avg_plate, "o", overplot=True, mfc="0.5", mec="none")
    bovy_plot.bovy_plot(l_plate, avg_plate_model, "x", overplot=True, ms=10.0, mew=1.5, color="0.7")
    # Legend
    bovy_plot.bovy_plot([260.0], [150.0], "k,", overplot=True)
    bovy_plot.bovy_plot([260.0], [120.0], "o", mfc="0.5", mec="none", overplot=True)
    bovy_plot.bovy_plot([260.0], [90.0], "x", ms=10.0, mew=1.5, color="0.7", overplot=True)
    bovy_plot.bovy_text(270.0, 145.0, r"$\mathrm{data}$")
    bovy_plot.bovy_text(270.0, 115.0, r"$\mathrm{data\ mean}$")
    bovy_plot.bovy_text(270.0, 85.0, r"$\mathrm{model\ mean}$")
    bovy_plot._add_ticks()
    # Now plot the difference
    fig.sca(axMean)
    bovy_plot.bovy_plot([0.0, 360.0], [0.0, 0.0], "-", color="0.5", overplot=True)
    bovy_plot.bovy_plot(l_plate, avg_plate - avg_plate_model, "ko", overplot=True)
    pyplot.errorbar(
        l_plate, avg_plate - avg_plate_model, yerr=siga_plate, marker="o", color="k", linestyle="none", elinestyle="-"
    )
    pyplot.ylabel(r"$\bar{V}_{\mathrm{data}}-\bar{V}_{\mathrm{model}}$")
    pyplot.ylim(-14.5, 14.5)
    pyplot.xlim(0.0, 360.0)
    bovy_plot._add_ticks()
    # axMean.xaxis.set_major_formatter(nullfmt)
    pyplot.xlabel(r"$\mathrm{Galactic\ longitude}\ [\mathrm{deg}]$")
    pyplot.xlim(0.0, 360.0)
    bovy_plot._add_ticks()
    # Save
    bovy_plot.bovy_end_print(options.plotfilename)
    return None
    # Sigma
    fig.sca(axSig)
    pyplot.plot([0.0, 360.0], [1.0, 1.0], "-", color="0.5")
    bovy_plot.bovy_plot(l_plate, sig_plate / sig_plate_model, "ko", overplot=True)
    pyplot.errorbar(
        l_plate,
        sig_plate / sig_plate_model,
        yerr=sigerr_plate / sig_plate_model,
        marker="o",
        color="k",
        linestyle="none",
        elinestyle="-",
    )
    pyplot.ylabel(r"$\sigma_{\mathrm{los}}^{\mathrm{data}}/ \sigma_{\mathrm{los}}^{\mathrm{model}}$")
    pyplot.ylim(0.5, 1.5)
Example #37
0
def classQSO(parser):
    (options,args)= parser.parse_args()
    if len(args) == 0:
        parser.print_help()
        return
    if os.path.exists(options.outfile):
        print options.outfile+" exists"
        print "Remove this file before running ..."
        print "Returning ..."
        return None
    #Load fit params: Quasars
    if os.path.exists(options.qsomodel):
        qsofile= open(options.qsomodel,'rb')
        try:
            xamp_qso= pickle.load(qsofile)
            xmean_qso= pickle.load(qsofile)
            xcovar_qso= pickle.load(qsofile)
        finally:
            qsofile.close()
    else:
        print "Input to 'qsomodel' not recognized ..."
        print "Returning ..."
        return
    #Stars
    if os.path.exists(options.starmodel):
        starfile= open(options.starmodel,'rb')
        try:
            xamp_star= pickle.load(starfile)
            xmean_star= pickle.load(starfile)
            xcovar_star= pickle.load(starfile)
        finally:
            starfile.close()
    else:
        print "Input to 'starmodel' not recognized ..."
        print "Returning ..."
        return
    ##RR Lyrae
    if os.path.exists(options.rrlyraemodel):
        rrlyraefile= open(options.rrlyraemodel,'rb')
        try:
            xamp_rrlyrae= pickle.load(rrlyraefile)
            xmean_rrlyrae= pickle.load(rrlyraefile)
            xcovar_rrlyrae= pickle.load(rrlyraefile)
        finally:
            rrlyraefile.close()
    else:
        print "Input to 'rrlyraemodel' not recognized ..."
        print "Returning ..."
        return
    #Restore samples
    savefilename= args[0]
    print "Reading data ..."
    if os.path.exists(savefilename):
        savefile= open(savefilename,'rb')
        samples= pickle.load(savefile)
        type= pickle.load(savefile)
        band= pickle.load(savefile)
        mean= pickle.load(savefile)
        savefile.close()
    else:
        print "Input file does not exist ..."
        print "Returning ..."
        return
    #Restore samples
    savefilename= args[1]
    print "Reading best fits ..."
    if os.path.exists(savefilename):
        savefile= open(savefilename,'rb')
        params= pickle.load(savefile)
        type= pickle.load(savefile)
        band= pickle.load(savefile)
        mean= pickle.load(savefile)
        savefile.close()
    else:
        print "Input file does not exist ..."
        print "Returning ..."
        return
    #Load the overall data, to later match back to ra and dec
    if 'nuvx' in args[0].lower():
        sources= fitsio.read('../data/nUVX_woname.fit')
    elif 'uvx' in args[0].lower():
        sources= fitsio.read('../data/uvx_woname.fit')
    sourcesDict= {}
    for ii in range(len(sources)):
        sourcesDict[sources[ii]['ONAME'].strip().replace(' ', '')+'.fit']= ii
    #Classify each source
    ndata= len(samples)
    print ndata
    logpxagamma_qso= numpy.zeros(ndata)
    logpxagamma_star= numpy.zeros(ndata)
    logpxagamma_rrlyrae= numpy.zeros(ndata)
    ras= numpy.zeros(ndata)
    decs= numpy.zeros(ndata)
    outgammas= numpy.zeros(ndata)
    outlogAs= numpy.zeros(ndata)
    for ii, key in enumerate(samples.keys()):
        sys.stdout.write('\r'+_ERASESTR+'\r')
        sys.stdout.flush()
        sys.stdout.write('\rWorking on %i / %i\r' % (ii+1,ndata))
        sys.stdout.flush()
        outgammas[ii]= params[key]['gamma'][0]
        outlogAs[ii]= params[key]['logA'][0]/2.
        if type == 'powerlawSF':
           #Stack as A,g,Ac,gc
            loggammas= []
            logAs= []
            try:
                for sample in samples[key]:
                    loggammas.append(numpy.log(sample['gamma'][0]))
                    logAs.append(sample['logA'][0]) #RITABAN
            except TypeError:
                loggammas.append(numpy.log(samples[key]['gamma'][0]))
                logAs.append(samples[key]['logA'][0])
            loggammas= numpy.array(loggammas)
            logAs= numpy.array(logAs)
            weights= -loggammas #the 1/gamma to get a flat prior in log gamma, but expressed as log(1/gamma)
            weights-= maxentropy.logsumexp(weights) #sum weights = 1
            #Stack the data
            thisydata= numpy.reshape(loggammas,
                                     (len(loggammas),1))
            thisydata2= numpy.reshape(logAs,(len(logAs),1))
            thisydata=numpy.column_stack( [ thisydata, thisydata2 ] )
            #Evaluate quasar/star/RR lyrae distributions
            logpxagamma_qso[ii]= maxentropy.logsumexp(weights+_eval_sumgaussians(thisydata,
                                                                 xamp_qso,
                                                                 xmean_qso,
                                                                 xcovar_qso))
            logpxagamma_star[ii]= maxentropy.logsumexp(weights+_eval_sumgaussians(thisydata,
                                                                 xamp_star,
                                                                 xmean_star,
                                                                 xcovar_star))
            logpxagamma_rrlyrae[ii]= maxentropy.logsumexp(weights+_eval_sumgaussians(thisydata,
                                                                 xamp_rrlyrae,
                                                                 xmean_rrlyrae,
                                                                 xcovar_rrlyrae))
            #Find RA and Dec
            try:
                ratmp= sources[sourcesDict[key]]['RA']
                dectmp= sources[sourcesDict[key]]['DEC']
            except KeyError:
                print "Failed to match for RA and Dec ..."
                continue
            else:
                ras[ii]= ratmp
                decs[ii]= dectmp
    sys.stdout.write('\r'+_ERASESTR+'\r')
    sys.stdout.flush()
    #Save
    saveClass(logpxagamma_qso,
              logpxagamma_star,
              logpxagamma_rrlyrae,
              ras,decs,
              outgammas,outlogAs,
              options.outfile)
    return None
def plot_distanceprior(parser):
    (options,args)= parser.parse_args()
    #Read the data
    print "Reading the data ..."
    data= readVclosData(postshutdown=options.postshutdown,
                        fehcut=options.fehcut,
                        cohort=options.cohort,
                        lmin=options.lmin,
                        bmax=options.bmax,
                        ak=True,
                        cutmultiples=options.cutmultiples,
                        validfeh=options.indivfeh, #if indivfeh, we need validfeh
                        jkmax=options.jkmax,
                        datafilename=options.fakedata)
    l= data['GLON']*_DEGTORAD
    b= data['GLAT']*_DEGTORAD
    sinl= numpy.sin(l)
    cosl= numpy.cos(l)
    sinb= numpy.sin(b)
    cosb= numpy.cos(b)
    jk= data['J0MAG']-data['K0MAG']
    jk[(jk < 0.5)]= 0.5 #BOVY: FIX THIS HACK BY EMAILING GAIL
    h= data['H0MAG']
    #Set up the isochrone
    if not options.isofile is None and os.path.exists(options.isofile):
        print "Loading the isochrone model ..."
        isofile= open(options.isofile,'rb')
        iso= pickle.load(isofile)
        if options.indivfeh:
            zs= pickle.load(isofile)
        elif options.varfeh:
            locl= pickle.load(isofile)
        isofile.close()
    else:
        print "Setting up the isochrone model ..."
        if options.indivfeh:
            #Load all isochrones
            iso= []
            zs= numpy.arange(0.0005,0.03005,0.0005)
            for ii in range(len(zs)):
                iso.append(isomodel.isomodel(imfmodel=options.imfmodel,
                                             expsfh=options.expsfh,
                                             Z=zs[ii]))
        elif options.varfeh:
            locs= list(set(data['LOCATION']))
            iso= []
            for ii in range(len(locs)):
                indx= (data['LOCATION'] == locs[ii])
                locl= numpy.mean(data['GLON'][indx]*_DEGTORAD)
                iso.append(isomodel.isomodel(imfmodel=options.imfmodel,
                                             expsfh=options.expsfh,
                                             marginalizefeh=True,
                                             glon=locl))
        else:
            iso= isomodel.isomodel(imfmodel=options.imfmodel,Z=options.Z,
                                   expsfh=options.expsfh)
    #Set up polar grid
    res= 51
    xgrid= numpy.linspace(0.,2.*math.pi*(1.-1./res/2.),
                       2*res)
    ygrid= numpy.linspace(0.5,2.8,res)
    plotxgrid= numpy.linspace(xgrid[0]-(xgrid[1]-xgrid[0])/2.,
                              xgrid[-1]+(xgrid[1]-xgrid[0])/2.,
                              len(xgrid)+1)
    plotygrid= numpy.linspace(ygrid[0]-(ygrid[1]-ygrid[0])/2.,
                              ygrid[-1]+(ygrid[1]-ygrid[0])/2.,
                              len(ygrid)+1)
    plotthis= numpy.zeros((2*res,res,len(data)))-numpy.finfo(numpy.dtype(numpy.float64)).max
    #_BINTEGRATENBINS= 11 #For quick testing
    ds= numpy.linspace(_BINTEGRATEDMIN,_BINTEGRATEDMAX,_BINTEGRATENBINS)
    logpiso= numpy.zeros((len(data),_BINTEGRATENBINS))
    dm= _dm(ds)
    for ii in range(len(data)):
        mh= h[ii]-dm 
        if options.indivfeh:
            #Find closest Z
            thisZ= isodist.FEH2Z(data[ii]['FEH'])
            indx= numpy.argmin(numpy.fabs(thisZ-zs))
            logpiso[ii,:]= iso[0][indx](numpy.zeros(_BINTEGRATENBINS)+jk[ii],mh)
        elif options.varfeh:
            #Find correct iso
            indx= (locl == data[ii]['LOCATION'])
            logpiso[ii,:]= iso[0][indx](numpy.zeros(_BINTEGRATENBINS)+jk[ii],mh)
        else:
            logpiso[ii,:]= iso(numpy.zeros(_BINTEGRATENBINS)+jk[ii],mh)
    for jj in range(_BINTEGRATENBINS):
        d= ds[jj]/_REFR0
        R= numpy.sqrt(1.+d**2.-2.*d*cosl)
        indx= (R == 0.)
        R[indx]+= 0.0001
        theta= numpy.arcsin(d/R*sinl)
        indx= (1./cosl < d)*(cosl > 0.)
        theta[indx]= numpy.pi-theta[indx]
        indx= (theta < 0.)
        theta[indx]+= 2.*math.pi
        thisout= _logpd([0.,1.],d,None,None,
                        None,None,None,
                        options,R,theta,
                        1.,0.,logpiso[:,jj])
        #Find bin to which these contribute
        thetabin= numpy.floor((theta-xgrid[0])/(xgrid[1]-xgrid[0])+0.5)
        Rbin= numpy.floor((R-plotygrid[0])/(plotygrid[1]-plotygrid[0]))
        indx= (thetabin < 0)
        thetabin[indx]= 0
        Rbin[indx]= 0
        thisout[indx]= -numpy.finfo(numpy.dtype(numpy.float64)).max
        indx= (thetabin >= 2*res)
        thetabin[indx]= 0. #Has to be
        #Rbin[indx]= 0
        #thisout[indx]= -numpy.finfo(numpy.dtype(numpy.float64)).max
        indx= (Rbin < 0)
        thetabin[indx]= 0
        Rbin[indx]= 0
        thisout[indx]= -numpy.finfo(numpy.dtype(numpy.float64)).max
        indx= (Rbin >= res)
        thetabin[indx]= 0
        Rbin[indx]= 0
        thisout[indx]= -numpy.finfo(numpy.dtype(numpy.float64)).max
        thetabin= thetabin.astype('int')
        Rbin= Rbin.astype('int')
        for ii in range(len(data)):
            plotthis[thetabin,Rbin,ii]= thisout[ii]
    #Normalize
    for ii in range(2*res):
        for jj in range(res):
            plotthis[ii,jj,0]= logsumexp(plotthis[ii,jj,:])
    plotthis= plotthis[:,:,0]
    plotthis-= numpy.amax(plotthis)
    plotthis= numpy.exp(plotthis)
    plotthis[(plotthis == 0.)]= numpy.nan
    #Get los
    locations= list(set(data['LOCATION']))
    nlocs= len(locations)
    l_plate= numpy.zeros(nlocs)
    for ii in range(nlocs):
        indx= (data['LOCATION'] == locations[ii])
        l_plate[ii]= numpy.mean(data['GLON'][indx])
    bovy_plot.bovy_print()
    ax= pyplot.subplot(111,projection='galpolar')#galpolar is in bovy_plot
    vmin, vmax= 0., 1.
    out= ax.pcolor(plotxgrid,plotygrid,plotthis.T,cmap='gist_yarg',
                   vmin=vmin,vmax=vmax,zorder=2)
    #Overlay los
    for ii in range(nlocs):
        lds= numpy.linspace(0.,2.95,501)
        lt= numpy.zeros(len(lds))
        lr= numpy.zeros(len(lds))
        lr= numpy.sqrt(1.+lds**2.-2.*lds*numpy.cos(l_plate[ii]*_DEGTORAD))
        lt= numpy.arcsin(lds/lr*numpy.sin(l_plate[ii]*_DEGTORAD))
        indx= (1./numpy.cos(l_plate[ii]*_DEGTORAD) < lds)*(numpy.cos(l_plate[ii]*_DEGTORAD) > 0.)
        lt[indx]= numpy.pi-lt[indx]
        ax.plot(lt,lr,
               ls='--',color='w',zorder=3)      
    from matplotlib.patches import Arrow, FancyArrowPatch
    arr= FancyArrowPatch(posA=(-math.pi/2.,1.8),
                         posB=(-math.pi/4.,1.8),
                         arrowstyle='->', 
                         connectionstyle='arc3,rad=%4.2f' % (-math.pi/16.),
                         shrinkA=2.0, shrinkB=2.0, mutation_scale=20.0, 
                         mutation_aspect=None,fc='k')
    ax.add_patch(arr)
    bovy_plot.bovy_text(-math.pi/2.,1.97,r'$\mathrm{Galactic\ rotation}$',
                         rotation=-22.5)
    radii= numpy.array([0.5,1.,1.5,2.,2.5])
    labels= []
    for r in radii:
        ax.plot(numpy.linspace(0.,2.*math.pi,501,),
                numpy.zeros(501)+r,ls='-',color='0.65',zorder=1,lw=0.5)
        labels.append(r'$%i$' % int(r*8.))
    pyplot.rgrids(radii,labels=labels,angle=-32.5)
    bovy_plot.bovy_text(5.785,2.82,r'$\mathrm{kpc}$')
    azs= numpy.array([0.,45.,90.,135.,180.,225.,270.,315.])*_DEGTORAD
    for az in azs:
        ax.plot(numpy.zeros(501)+az,
                numpy.linspace(0.,2.8,501),'-',color='0.6',lw=0.5,zorder=1)
    #Sun
    bovy_plot.bovy_text(0.065,.9075,r'$\odot$')
    pyplot.ylim(0.,2.8)
    bovy_plot.bovy_end_print(options.plotfile)
Example #39
0
def _fit_orbit_mlogl(new_vxvv, vxvv, vxvv_err, pot, radec, lb, customsky,
                     lb_to_customsky, pmllpmbb_to_customsky, tmockAA, ro, vo,
                     obs):
    """The log likelihood for fitting an orbit"""
    #Use this _parse_args routine, which does forward and backward integration
    iR, ivR, ivT, iz, ivz, iphi = tmockAA._parse_args(True, False, new_vxvv[0],
                                                      new_vxvv[1], new_vxvv[2],
                                                      new_vxvv[3], new_vxvv[4],
                                                      new_vxvv[5])
    if radec or lb or customsky:
        #Need to transform to (l,b), (ra,dec), or a custom set
        #First transform to X,Y,Z,vX,vY,vZ (Galactic)
        X, Y, Z = coords.galcencyl_to_XYZ(iR.flatten(),
                                          iphi.flatten(),
                                          iz.flatten(),
                                          Xsun=obs[0] / ro,
                                          Zsun=obs[2] / ro).T
        vX,vY,vZ = coords.galcencyl_to_vxvyvz(ivR.flatten(),ivT.flatten(),
                                              ivz.flatten(),iphi.flatten(),
                                              vsun=nu.array(\
                obs[3:6])/vo,Xsun=obs[0]/ro,Zsun=obs[2]/ro).T
        bad_indx = (X == 0.) * (Y == 0.) * (Z == 0.)
        if True in bad_indx: X[bad_indx] += ro / 10000.
        lbdvrpmllpmbb = coords.rectgal_to_sphergal(X * ro,
                                                   Y * ro,
                                                   Z * ro,
                                                   vX * vo,
                                                   vY * vo,
                                                   vZ * vo,
                                                   degree=True)
        if lb:
            orb_vxvv = nu.array([
                lbdvrpmllpmbb[:, 0], lbdvrpmllpmbb[:, 1], lbdvrpmllpmbb[:, 2],
                lbdvrpmllpmbb[:, 4], lbdvrpmllpmbb[:, 5], lbdvrpmllpmbb[:, 3]
            ]).T
        elif radec:
            #Further transform to ra,dec,pmra,pmdec
            radec = coords.lb_to_radec(lbdvrpmllpmbb[:, 0],
                                       lbdvrpmllpmbb[:, 1],
                                       degree=True,
                                       epoch=None)
            pmrapmdec = coords.pmllpmbb_to_pmrapmdec(lbdvrpmllpmbb[:, 4],
                                                     lbdvrpmllpmbb[:, 5],
                                                     lbdvrpmllpmbb[:, 0],
                                                     lbdvrpmllpmbb[:, 1],
                                                     degree=True,
                                                     epoch=None)
            orb_vxvv = nu.array([
                radec[:, 0], radec[:, 1], lbdvrpmllpmbb[:, 2], pmrapmdec[:, 0],
                pmrapmdec[:, 1], lbdvrpmllpmbb[:, 3]
            ]).T
        elif customsky:
            #Further transform to ra,dec,pmra,pmdec
            customradec = lb_to_customsky(lbdvrpmllpmbb[:, 0],
                                          lbdvrpmllpmbb[:, 1],
                                          degree=True)
            custompmrapmdec = pmllpmbb_to_customsky(lbdvrpmllpmbb[:, 4],
                                                    lbdvrpmllpmbb[:, 5],
                                                    lbdvrpmllpmbb[:, 0],
                                                    lbdvrpmllpmbb[:, 1],
                                                    degree=True)
            orb_vxvv = nu.array([
                customradec[:, 0], customradec[:, 1], lbdvrpmllpmbb[:, 2],
                custompmrapmdec[:, 0], custompmrapmdec[:, 1], lbdvrpmllpmbb[:,
                                                                            3]
            ]).T
    else:
        #shape=(2tintJ-1,6)
        orb_vxvv = nu.array([
            iR.flatten(),
            ivR.flatten(),
            ivT.flatten(),
            iz.flatten(),
            ivz.flatten(),
            iphi.flatten()
        ]).T
    out = 0.
    for ii in range(vxvv.shape[0]):
        sub_vxvv = (orb_vxvv - vxvv[ii, :].flatten())**2.
        #print(sub_vxvv[nu.argmin(nu.sum(sub_vxvv,axis=1))])
        if not vxvv_err is None:
            sub_vxvv /= vxvv_err[ii, :]**2.
        else:
            sub_vxvv /= 0.01**2.
        out += logsumexp(-0.5 * nu.sum(sub_vxvv, axis=1))
    return -out
Example #40
0
File: crf.py Project: 52nlp/iir
def logdotexp_vec_mat(loga, logM):
    return numpy.array([maxentropy.logsumexp(loga + x) for x in logM.T], copy=False)
Example #41
0
File: crf.py Project: zzmjohn/iir
def logdotexp_mat_vec(logM, logb):
    return numpy.array([maxentropy.logsumexp(x + logb) for x in logM],
                       copy=False)
Example #42
0
def plot_chi2(parser):
    (options,args)= parser.parse_args()
    if len(args) == 0 or options.plotfilename is None:
        parser.print_help()
        return
    #Read the data
    print "Reading the data ..."
    data= readVclosData(postshutdown=options.postshutdown,
                        fehcut=options.fehcut,
                        cohort=options.cohort,
                        lmin=options.lmin,
                        bmax=options.bmax,
                        ak=True,
                        cutmultiples=options.cutmultiples,
                        validfeh=options.indivfeh, #if indivfeh, we need validfeh
                        jkmax=options.jkmax,
                        datafilename=options.fakedata)
    #HACK
    indx= (data['J0MAG']-data['K0MAG'] < 0.5)
    data['J0MAG'][indx]= 0.5+data['K0MAG'][indx]
    #Cut inner disk locations
    #data= data[(data['GLON'] > 75.)]
    #Cut outliers
    #data= data[(data['VHELIO'] < 200.)*(data['VHELIO'] > -200.)]
    print "Using %i data points ..." % len(data)
    #Set up the isochrone
    if not options.isofile is None and os.path.exists(options.isofile):
        print "Loading the isochrone model ..."
        isofile= open(options.isofile,'rb')
        iso= pickle.load(isofile)
        if options.indivfeh:
            zs= pickle.load(isofile)
        if options.varfeh:
            locl= pickle.load(isofile)
        isofile.close()
    else:
        print "Setting up the isochrone model ..."
        if options.indivfeh:
            #Load all isochrones
            iso= []
            zs= numpy.arange(0.0005,0.03005,0.0005)
            for ii in range(len(zs)):
                iso.append(isomodel.isomodel(imfmodel=options.imfmodel,
                                             expsfh=options.expsfh,
                                             Z=zs[ii]))
        elif options.varfeh:
            locs= list(set(data['LOCATION']))
            iso= []
            for ii in range(len(locs)):
                indx= (data['LOCATION'] == locs[ii])
                locl= numpy.mean(data['GLON'][indx]*_DEGTORAD)
                iso.append(isomodel.isomodel(imfmodel=options.imfmodel,
                                             expsfh=options.expsfh,
                                             marginalizefeh=True,
                                             glon=locl))
        else:
            iso= isomodel.isomodel(imfmodel=options.imfmodel,Z=options.Z,
                                   expsfh=options.expsfh)
        if options.dwarf:
            iso= [iso, 
                  isomodel.isomodel(imfmodel=options.imfmodel,Z=options.Z,
                                    dwarf=True,expsfh=options.expsfh)]
        else:
            iso= [iso]
        if not options.isofile is None:
            isofile= open(options.isofile,'wb')
            pickle.dump(iso,isofile)
            if options.indivfeh:
                pickle.dump(zs,isofile)
            elif options.varfeh:
                pickle.dump(locl,isofile)
            isofile.close()
    df= None
    print "Pre-calculating isochrone distance prior ..."
    logpiso= numpy.zeros((len(data),_BINTEGRATENBINS))
    ds= numpy.linspace(_BINTEGRATEDMIN,_BINTEGRATEDMAX,
                       _BINTEGRATENBINS)
    dm= _dm(ds)
    for ii in range(len(data)):
        mh= data['H0MAG'][ii]-dm
        if options.indivfeh:
            #Find closest Z
            thisZ= isodist.FEH2Z(data[ii]['FEH'])
            indx= numpy.argmin((thisZ-zs))
            logpiso[ii,:]= iso[0][indx](numpy.zeros(_BINTEGRATENBINS)+(data['J0MAG']-data['K0MAG'])[ii],mh)
        elif options.varfeh:
            #Find correct iso
            indx= (locl == data[ii]['LOCATION'])
            logpiso[ii,:]= iso[0][indx](numpy.zeros(_BINTEGRATENBINS)+(data['J0MAG']-data['K0MAG'])[ii],mh)
        else:
            logpiso[ii,:]= iso[0](numpy.zeros(_BINTEGRATENBINS)
                                  +(data['J0MAG']-data['K0MAG'])[ii],mh)
    if options.dwarf:
        logpisodwarf= numpy.zeros((len(data),_BINTEGRATENBINS))
        dwarfds= numpy.linspace(_BINTEGRATEDMIN_DWARF,_BINTEGRATEDMAX_DWARF,
                                    _BINTEGRATENBINS)
        dm= _dm(dwarfds)
        for ii in range(len(data)):
            mh= data['H0MAG'][ii]-dm
            logpisodwarf[ii,:]= iso[1](numpy.zeros(_BINTEGRATENBINS)
                                       +(data['J0MAG']-data['K0MAG'])[ii],mh)
    else:
        logpisodwarf= None
    #Load initial parameters from file
    savefile= open(args[0],'rb')
    params= pickle.load(savefile)
    if not options.index is None:
        params= params[options.index]
    savefile.close()
    #params[0]= 245./235.
    #params[1]= 8.5/8.
    #Calculate data means etc.
    #Calculate means
    locations= list(set(data['LOCATION']))
    nlocs= len(locations)
    l_plate= numpy.zeros(nlocs)
    avg_plate= numpy.zeros(nlocs)
    sig_plate= numpy.zeros(nlocs)
    siga_plate= numpy.zeros(nlocs)
    sigerr_plate= numpy.zeros(nlocs)
    fidlogl= logl.logl(init=params,data=data,options=options)
    logl_plate= numpy.zeros(nlocs)
    for ii in range(nlocs):
        indx= (data['LOCATION'] == locations[ii])
        l_plate[ii]= numpy.mean(data['GLON'][indx])
        avg_plate[ii]= numpy.mean(data['VHELIO'][indx])
        sig_plate[ii]= numpy.std(data['VHELIO'][indx])
        siga_plate[ii]= numpy.std(data['VHELIO'][indx])/numpy.sqrt(numpy.sum(indx))
        sigerr_plate[ii]= bootstrap_sigerr(data['VHELIO'][indx])
        #Logl
        logl_plate[ii]= -2.*(numpy.sum(fidlogl[indx])-numpy.sum(fidlogl)/len(indx)*numpy.sum(indx))
    #Calculate plate means and variances from the model
    avg_plate_model= numpy.zeros(nlocs)
    sig_plate_model= numpy.zeros(nlocs)
    for ii in range(nlocs):
        #Calculate vlos | los
        indx= (data['LOCATION'] == locations[ii])
        thesedata= data[indx]
        thislogpiso= logpiso[indx,:]
        if options.dwarf:
            thislogpisodwarf= logpisodwarf[indx,:]
        else:
            thislogpisodwarf= None
        vlos= numpy.linspace(-200.,200.,options.nvlos)
        pvlos= numpy.zeros(options.nvlos)
        if not options.multi is None:
            pvlos= multi.parallel_map((lambda x: pvlosplate(params,vlos[x],
                                                            thesedata,
                                                            df,options,
                                                            thislogpiso,
                                                            thislogpisodwarf,iso)),
                                      range(options.nvlos),
                                      numcores=numpy.amin([len(vlos),multiprocessing.cpu_count(),options.multi]))
        else:
            for jj in range(options.nvlos):
                print jj
                pvlos[jj]= pvlosplate(params,vlos[jj],thesedata,df,options,
                                      thislogpiso,thislogpisodwarf,iso)
        pvlos-= logsumexp(pvlos)
        pvlos= numpy.exp(pvlos)
        #Calculate mean and velocity dispersion
        avg_plate_model[ii]= numpy.sum(vlos*pvlos)
        sig_plate_model[ii]= numpy.sqrt(numpy.sum(vlos**2.*pvlos)\
                                            -avg_plate_model[ii]**2.)
    #Plot everything
    left, bottom, width, height= 0.1, 0.4, 0.8, 0.3
    axTop= pyplot.axes([left,bottom,width,height])
    left, bottom, width, height= 0.1, 0.1, 0.8, 0.3
    axChi2= pyplot.axes([left,bottom,width,height])
    #left, bottom, width, height= 0.1, 0.1, 0.8, 0.2
    #axSig= pyplot.axes([left,bottom,width,height])
    fig= pyplot.gcf()
    #Plot the difference
    fig.sca(axTop)
    bovy_plot.bovy_plot([0.,360.],[0.,0.],'-',color='0.5',overplot=True)
    bovy_plot.bovy_plot(l_plate,
                        avg_plate-avg_plate_model,
                        'ko',overplot=True)
    pyplot.errorbar(l_plate,avg_plate-avg_plate_model,
                    yerr=siga_plate,marker='o',color='k',linestyle='none',elinestyle='-')
    pyplot.ylabel(r'$\langle v_{\mathrm{los}}\rangle_{\mathrm{data}}-\langle v_{\mathrm{los}}\rangle_{\mathrm{model}}$')
    pyplot.ylim(-14.5,14.5)
    pyplot.xlim(0.,360.)
    bovy_plot._add_ticks()
    nullfmt   = NullFormatter()         # no labels
    axTop.xaxis.set_major_formatter(nullfmt)
    #pyplot.xlabel(r'$\mathrm{Galactic\ longitude}\ [\mathrm{deg}]$')
    pyplot.xlim(0.,360.)
    bovy_plot._add_ticks()
    #Plot the chi2
    fig.sca(axChi2)
    bovy_plot.bovy_plot([0.,360.],[0.,0.],'-',color='0.5',overplot=True)
    bovy_plot.bovy_plot(l_plate,
                        logl_plate,
                        'ko',overplot=True)
    pyplot.ylabel(r'$\Delta \chi^2$')
    #pyplot.ylim(numpy.amin(logl_plate),numpy.amax(logl_plate))
    pyplot.ylim(-150.,150.)
    pyplot.xlim(0.,360.)
    bovy_plot._add_ticks()
    pyplot.xlabel(r'$\mathrm{Galactic\ longitude}\ [\mathrm{deg}]$')
    pyplot.xlim(0.,360.)
    bovy_plot._add_ticks()
    #Save
    bovy_plot.bovy_end_print(options.plotfilename)
    return None
Example #43
0
File: crf.py Project: 52nlp/iir
def logdotexp_mat_vec(logM, logb):
    return numpy.array([maxentropy.logsumexp(x + logb) for x in logM], copy=False)
Example #44
0
background_means = comm.bcast(background_means, root=0)
background_covs = comm.bcast(background_covs, root=0)

# SCATTER DATA
star_means = comm.scatter(star_means, root=0)
star_covs = comm.scatter(star_covs, root=0)

#print(rank, len(star_means))

# EVERY PROCESS DOES THIS FOR ITS DATA
bg_ln_ols = []
for star_cov, star_mean in zip(star_covs, star_means):
    try:
        bg_lnol = get_lnoverlaps(star_cov, star_mean, background_covs,
                                 background_means, nstars)
        bg_lnol = logsumexp(bg_lnol)  # sum in linear space
    except:
        # TC: Changed sign to negative (surely if it fails, we want it to
        # have a neglible background overlap?
        print('bg ln overlap failed, setting it to -inf')
        bg_lnol = -np.inf

    bg_ln_ols.append(bg_lnol)
#print(rank, bg_ln_ols)

# GATHER DATA
bg_ln_ols_result = comm.gather(bg_ln_ols, root=0)
if rank == 0:
    bg_ln_ols_result = list(itertools.chain.from_iterable(bg_ln_ols_result))
    np.savetxt('bgols_multiprocessing_%d.dat' % NI, bg_ln_ols_result)
def map_vc_like_simple(parser):
    """
    NAME:
       map_vc_like_simple
    PURPOSE:
       map the vc likelihood assuming knowledge of the DF
    INPUT:
       parser - from optparse
    OUTPUT:
       stuff as specified by the options
    HISTORY:
       2011-04-20 - Written - Bovy (NYU)
    """
    (options,args)= parser.parse_args()
    if len(args) == 0:
        parser.print_help()
        sys.exit(-1)
    #Set up DF
    dfc= dehnendf(beta=0.,profileParams=(options.rd,options.rs,options.so),
                  correct=True,niter=20)
    #Load data
    picklefile= open(args[0],'rb')
    out= pickle.load(picklefile)
    picklefile.close()
    ndata= len(out)
    if options.linearfit:
        plot_linear(out,options.los*_DEGTORAD,options,dfc)
        return None
    #Map likelihood
    vcirc= nu.linspace(options.vmin,options.vmax,options.nvcirc)
    if not options.nbeta is None:
        betas= nu.linspace(options.betamin,options.betamax,options.nbeta)
        like= nu.zeros((options.nvcirc,options.nbeta))
        for ii in range(options.nvcirc):
            for kk in range(options.nbeta):
                thislike= 0.
                for jj in range(ndata):
                    thislike+= single_vlos_loglike(vcirc[ii],out[jj],dfc,
                                                   options,
                                                   options.los*_DEGTORAD,
                                                   beta=betas[kk])
                like[ii,kk]= thislike
        like-= logsumexp(like.flatten())+m.log(vcirc[1]-vcirc[0])
        bovy_plot.bovy_print()
        bovy_plot.bovy_dens2d(nu.exp(like).T,
                              origin='lower',
                              xrange=[options.vmin,options.vmax],
                              yrange=[options.betamin,options.betamax],
                              aspect=(options.vmax-options.vmin)/\
                                  (options.betamax-options.betamin),
                              cmap='gist_yarg',
                              xlabel=r'$v_c / v_0$',
                              ylabel=r'$\beta$',
                              contours=True,cntrmass=True,
                              levels=[0.682,0.954,0.997])
        bovy_plot.bovy_text(r'$\sigma_R(R_0) = %4.2f \ v_0$' % options.so\
                                +'\n'+\
                                r'$l  = %i^\circ$' % round(options.los),
                            top_left=True)
        bovy_plot.bovy_end_print(options.plotfilename)
    else:
        like= nu.zeros(options.nvcirc)           
        for ii in range(options.nvcirc):
            thislike= 0.
            for jj in range(ndata):
                thislike+= single_vlos_loglike(vcirc[ii],out[jj],dfc,options,
                                               options.los*_DEGTORAD)
            like[ii]= thislike
        like-= logsumexp(like)+m.log(vcirc[1]-vcirc[0])
        #Calculate mean and sigma
        vcmean= nu.sum(vcirc*nu.exp(like)*(vcirc[1]-vcirc[0]))
        vc2mean= nu.sum(vcirc**2.*nu.exp(like)*(vcirc[1]-vcirc[0]))
        #Plot
        bovy_plot.bovy_print()
        bovy_plot.bovy_plot(vcirc,nu.exp(like),'k-',xlabel=r'$v_c / v_0$',
                            ylabel=r'$p(\mathrm{data} | v_c)$')
        bovy_plot.bovy_text(r'$\langle v_c \rangle = %4.2f \ v_0$' % vcmean +'\n'+
                            r'$\sqrt{\langle v_c^2 \rangle - \langle v_c \rangle^2} = %4.2f \ v_0$' % (m.sqrt(vc2mean-vcmean**2.)) +'\n'+\
                                r'$\sigma_R(R_0) = %4.2f \ v_0$' % options.so+'\n'+\
                                r'$l  = %i^\circ$' % round(options.los),
                            top_left=True)
        bovy_plot.bovy_end_print(options.plotfilename)
Example #46
0
File: crf.py Project: zzmjohn/iir
def logdotexp_vec_mat(loga, logM):
    return numpy.array([maxentropy.logsumexp(loga + x) for x in logM.T],
                       copy=False)
def createFakeData(parser):
    options, args= parser.parse_args()
    if len(args) == 0:
        parser.print_help()
        return
    if os.path.exists(options.plotfile):
        print "Outfile "+options.plotfile+" exists ..."
        print "Returning ..."
        return None
    #Read the data
    numpy.random.seed(options.seed)
    print "Reading the data ..."
    data= readVclosData(postshutdown=options.postshutdown,
                        fehcut=options.fehcut,
                        cohort=options.cohort,
                        lmin=options.lmin,
                        bmax=options.bmax,
                        validfeh=options.indivfeh, #if indivfeh, we need validfeh
                        ak=True,
                        cutmultiples=options.cutmultiples,
                        jkmax=options.jkmax)
    #HACK
    indx= (data['J0MAG']-data['K0MAG'] < 0.5)
    data['J0MAG'][indx]= 0.5+data['K0MAG'][indx]
    #Set up the isochrone
    #Set up the isochrone
    if not options.isofile is None and os.path.exists(options.isofile):
        print "Loading the isochrone model ..."
        isofile= open(options.isofile,'rb')
        iso= pickle.load(isofile)
        if options.indivfeh:
            zs= pickle.load(isofile)
        elif options.varfeh:
            locl= pickle.load(isofile)
        isofile.close()
    else:
        print "Setting up the isochrone model ..."
        if options.indivfeh:
            #Load all isochrones
            iso= []
            zs= numpy.arange(0.0005,0.03005,0.0005)
            for ii in range(len(zs)):
                iso.append(isomodel.isomodel(imfmodel=options.imfmodel,
                                             expsfh=options.expsfh,
                                             Z=zs[ii]))
        elif options.varfeh:
            locs= list(set(data['LOCATION']))
            iso= []
            for ii in range(len(locs)):
                indx= (data['LOCATION'] == locs[ii])
                locl= numpy.mean(data['GLON'][indx]*_DEGTORAD)
                iso.append(isomodel.isomodel(imfmodel=options.imfmodel,
                                             expsfh=options.expsfh,
                                             marginalizefeh=True,
                                             glon=locl))
        else:
            iso= isomodel.isomodel(imfmodel=options.imfmodel,Z=options.Z,
                                   expsfh=options.expsfh)
        if options.dwarf:
            iso= [iso,
                  isomodel.isomodel(imfmodel=options.imfmodel,Z=options.Z,
                                    dwarf=True,expsfh=options.expsfh)]
        else:
            iso= [iso]
        if not options.isofile is None:
            isofile= open(options.isofile,'wb')
            pickle.dump(iso,isofile)
            if options.indivfeh:
                pickle.dump(zs,isofile)
            elif options.varfeh:
                pickle.dump(locl,isofile)
            isofile.close()
    df= None
    print "Pre-calculating isochrone distance prior ..."
    logpiso= numpy.zeros((len(data),_BINTEGRATENBINS))
    ds= numpy.linspace(_BINTEGRATEDMIN,_BINTEGRATEDMAX,
                       _BINTEGRATENBINS)
    dm= _dm(ds)
    for ii in range(len(data)):
        mh= data['H0MAG'][ii]-dm
        if options.indivfeh:
            #Find closest Z
            thisZ= isodist.FEH2Z(data[ii]['FEH'])
            indx= numpy.argmin((thisZ-zs))
            logpiso[ii,:]= iso[0][indx](numpy.zeros(_BINTEGRATENBINS)+(data['J0MAG']-data['K0MAG'])[ii],mh)
        elif options.varfeh:
            #Find correct iso
            indx= (locl == data[ii]['LOCATION'])
            logpiso[ii,:]= iso[0][indx](numpy.zeros(_BINTEGRATENBINS)+(data['J0MAG']-data['K0MAG'])[ii],mh)
        else:
            logpiso[ii,:]= iso[0](numpy.zeros(_BINTEGRATENBINS)
                                  +(data['J0MAG']-data['K0MAG'])[ii],mh)
    if options.dwarf:
        logpisodwarf= numpy.zeros((len(data),_BINTEGRATENBINS))
        dwarfds= numpy.linspace(_BINTEGRATEDMIN_DWARF,_BINTEGRATEDMAX_DWARF,
                                    _BINTEGRATENBINS)
        dm= _dm(dwarfds)
        for ii in range(len(data)):
            mh= data['H0MAG'][ii]-dm
            logpisodwarf[ii,:]= iso[1](numpy.zeros(_BINTEGRATENBINS)
                                       +(data['J0MAG']-data['K0MAG'])[ii],mh)
    else:
        logpisodwarf= None
    #Load initial parameters from file
    savefile= open(args[0],'rb')
    params= pickle.load(savefile)
    savefile.close()
    #Prep data
    l= data['GLON']*_DEGTORAD
    b= data['GLAT']*_DEGTORAD
    sinl= numpy.sin(l)
    cosl= numpy.cos(l)
    sinb= numpy.sin(b)
    cosb= numpy.cos(b)
    jk= data['J0MAG']-data['K0MAG']
    jk[(jk < 0.5)]= 0.5 #BOVY: FIX THIS HACK BY EMAILING GAIL
    h= data['H0MAG']
    #Re-sample
    vlos= numpy.linspace(-200.,200.,options.nvlos)
    pvlos= numpy.zeros((len(data),options.nvlos))
    if options.dwarf:
        thislogpisodwarf= logpisodwarf
    else:
        thislogpisodwarf= None
    if not options.multi is None and options.multi > 1:
        thismulti= options.multi
        options.multi= 1 #To avoid conflict
        thispvlos= multi.parallel_map((lambda x: -mloglike(params,
                                                           numpy.zeros(len(data))+vlos[x],
                                                           l,
                                                           b,
                                                           jk,
                                                           h,
                                                           df,options,
                                                           sinl,
                                                           cosl,
                                                           cosb,
                                                           sinb,
                                                           logpiso,
                                                           thislogpisodwarf,
                                                           True,
                                                           None,None,None)),
                                      range(options.nvlos),
                                      numcores=numpy.amin([len(vlos),multiprocessing.cpu_count(),thismulti]))
        for jj in range(options.nvlos):
            pvlos[:,jj]= thispvlos[jj]
    else:
        for jj in range(options.nvlos):
            pvlos[:,jj]= -mloglike(params,numpy.zeros(len(data))+vlos[jj],
                                   l,
                                   b,
                                   jk,
                                   h,
                                   df,options,
                                   sinl,
                                   cosl,
                                   cosb,
                                   sinb,
                                   logpiso,
                                   thislogpisodwarf,True,None,None,None)
    """
    for jj in range(options.nvlos):
        pvlos[:,jj]= -mloglike(params,numpy.zeros(len(data))+vlos[jj],
                               l,
                               b,
                               jk,
                               h,
                               df,options,
                               sinl,
                               cosl,
                               cosb,
                               sinb,
                               logpiso,
                               thislogpisodwarf,True,None,None,None)
    """
    for ii in range(len(data)):
        pvlos[ii,:]-= logsumexp(pvlos[ii,:])
        pvlos[ii,:]= numpy.exp(pvlos[ii,:])
        pvlos[ii,:]= numpy.cumsum(pvlos[ii,:])
        pvlos[ii,:]/= pvlos[ii,-1]
        #Draw
        randindx= numpy.random.uniform()
        kk= 0
        while pvlos[ii,kk] < randindx:
            kk+= 1
        data['VHELIO'][ii]= vlos[kk]
    #Dump raw
    fitsio.write(options.plotfile,data,clobber=True)
Example #48
0
def plot_distanceprior(parser):
    (options, args) = parser.parse_args()
    #Read the data
    print "Reading the data ..."
    data = readVclosData(
        postshutdown=options.postshutdown,
        fehcut=options.fehcut,
        cohort=options.cohort,
        lmin=options.lmin,
        bmax=options.bmax,
        ak=True,
        cutmultiples=options.cutmultiples,
        validfeh=options.indivfeh,  #if indivfeh, we need validfeh
        jkmax=options.jkmax,
        datafilename=options.fakedata)
    l = data['GLON'] * _DEGTORAD
    b = data['GLAT'] * _DEGTORAD
    sinl = numpy.sin(l)
    cosl = numpy.cos(l)
    sinb = numpy.sin(b)
    cosb = numpy.cos(b)
    jk = data['J0MAG'] - data['K0MAG']
    jk[(jk < 0.5)] = 0.5  #BOVY: FIX THIS HACK BY EMAILING GAIL
    h = data['H0MAG']
    #Set up the isochrone
    if not options.isofile is None and os.path.exists(options.isofile):
        print "Loading the isochrone model ..."
        isofile = open(options.isofile, 'rb')
        iso = pickle.load(isofile)
        if options.indivfeh:
            zs = pickle.load(isofile)
        elif options.varfeh:
            locl = pickle.load(isofile)
        isofile.close()
    else:
        print "Setting up the isochrone model ..."
        if options.indivfeh:
            #Load all isochrones
            iso = []
            zs = numpy.arange(0.0005, 0.03005, 0.0005)
            for ii in range(len(zs)):
                iso.append(
                    isomodel.isomodel(imfmodel=options.imfmodel,
                                      expsfh=options.expsfh,
                                      Z=zs[ii]))
        elif options.varfeh:
            locs = list(set(data['LOCATION']))
            iso = []
            for ii in range(len(locs)):
                indx = (data['LOCATION'] == locs[ii])
                locl = numpy.mean(data['GLON'][indx] * _DEGTORAD)
                iso.append(
                    isomodel.isomodel(imfmodel=options.imfmodel,
                                      expsfh=options.expsfh,
                                      marginalizefeh=True,
                                      glon=locl))
        else:
            iso = isomodel.isomodel(imfmodel=options.imfmodel,
                                    Z=options.Z,
                                    expsfh=options.expsfh)
    #Set up polar grid
    res = 51
    xgrid = numpy.linspace(0., 2. * math.pi * (1. - 1. / res / 2.), 2 * res)
    ygrid = numpy.linspace(0.5, 2.8, res)
    plotxgrid = numpy.linspace(xgrid[0] - (xgrid[1] - xgrid[0]) / 2.,
                               xgrid[-1] + (xgrid[1] - xgrid[0]) / 2.,
                               len(xgrid) + 1)
    plotygrid = numpy.linspace(ygrid[0] - (ygrid[1] - ygrid[0]) / 2.,
                               ygrid[-1] + (ygrid[1] - ygrid[0]) / 2.,
                               len(ygrid) + 1)
    plotthis = numpy.zeros((2 * res, res, len(data))) - numpy.finfo(
        numpy.dtype(numpy.float64)).max
    #_BINTEGRATENBINS= 11 #For quick testing
    ds = numpy.linspace(_BINTEGRATEDMIN, _BINTEGRATEDMAX, _BINTEGRATENBINS)
    logpiso = numpy.zeros((len(data), _BINTEGRATENBINS))
    dm = _dm(ds)
    for ii in range(len(data)):
        mh = h[ii] - dm
        if options.indivfeh:
            #Find closest Z
            thisZ = isodist.FEH2Z(data[ii]['FEH'])
            indx = numpy.argmin(numpy.fabs(thisZ - zs))
            logpiso[ii, :] = iso[0][indx](numpy.zeros(_BINTEGRATENBINS) +
                                          jk[ii], mh)
        elif options.varfeh:
            #Find correct iso
            indx = (locl == data[ii]['LOCATION'])
            logpiso[ii, :] = iso[0][indx](numpy.zeros(_BINTEGRATENBINS) +
                                          jk[ii], mh)
        else:
            logpiso[ii, :] = iso(numpy.zeros(_BINTEGRATENBINS) + jk[ii], mh)
    for jj in range(_BINTEGRATENBINS):
        d = ds[jj] / _REFR0
        R = numpy.sqrt(1. + d**2. - 2. * d * cosl)
        indx = (R == 0.)
        R[indx] += 0.0001
        theta = numpy.arcsin(d / R * sinl)
        indx = (1. / cosl < d) * (cosl > 0.)
        theta[indx] = numpy.pi - theta[indx]
        indx = (theta < 0.)
        theta[indx] += 2. * math.pi
        thisout = _logpd([0., 1.], d, None, None, None, None, None, options, R,
                         theta, 1., 0., logpiso[:, jj])
        #Find bin to which these contribute
        thetabin = numpy.floor((theta - xgrid[0]) / (xgrid[1] - xgrid[0]) +
                               0.5)
        Rbin = numpy.floor((R - plotygrid[0]) / (plotygrid[1] - plotygrid[0]))
        indx = (thetabin < 0)
        thetabin[indx] = 0
        Rbin[indx] = 0
        thisout[indx] = -numpy.finfo(numpy.dtype(numpy.float64)).max
        indx = (thetabin >= 2 * res)
        thetabin[indx] = 0.  #Has to be
        #Rbin[indx]= 0
        #thisout[indx]= -numpy.finfo(numpy.dtype(numpy.float64)).max
        indx = (Rbin < 0)
        thetabin[indx] = 0
        Rbin[indx] = 0
        thisout[indx] = -numpy.finfo(numpy.dtype(numpy.float64)).max
        indx = (Rbin >= res)
        thetabin[indx] = 0
        Rbin[indx] = 0
        thisout[indx] = -numpy.finfo(numpy.dtype(numpy.float64)).max
        thetabin = thetabin.astype('int')
        Rbin = Rbin.astype('int')
        for ii in range(len(data)):
            plotthis[thetabin, Rbin, ii] = thisout[ii]
    #Normalize
    for ii in range(2 * res):
        for jj in range(res):
            plotthis[ii, jj, 0] = logsumexp(plotthis[ii, jj, :])
    plotthis = plotthis[:, :, 0]
    plotthis -= numpy.amax(plotthis)
    plotthis = numpy.exp(plotthis)
    plotthis[(plotthis == 0.)] = numpy.nan
    #Get los
    locations = list(set(data['LOCATION']))
    nlocs = len(locations)
    l_plate = numpy.zeros(nlocs)
    for ii in range(nlocs):
        indx = (data['LOCATION'] == locations[ii])
        l_plate[ii] = numpy.mean(data['GLON'][indx])
    bovy_plot.bovy_print()
    ax = pyplot.subplot(111, projection='galpolar')  #galpolar is in bovy_plot
    vmin, vmax = 0., 1.
    out = ax.pcolor(plotxgrid,
                    plotygrid,
                    plotthis.T,
                    cmap='gist_yarg',
                    vmin=vmin,
                    vmax=vmax,
                    zorder=2)
    #Overlay los
    for ii in range(nlocs):
        lds = numpy.linspace(0., 2.95, 501)
        lt = numpy.zeros(len(lds))
        lr = numpy.zeros(len(lds))
        lr = numpy.sqrt(1. + lds**2. -
                        2. * lds * numpy.cos(l_plate[ii] * _DEGTORAD))
        lt = numpy.arcsin(lds / lr * numpy.sin(l_plate[ii] * _DEGTORAD))
        indx = (1. / numpy.cos(l_plate[ii] * _DEGTORAD) < lds) * (numpy.cos(
            l_plate[ii] * _DEGTORAD) > 0.)
        lt[indx] = numpy.pi - lt[indx]
        ax.plot(lt, lr, ls='--', color='w', zorder=3)
    from matplotlib.patches import Arrow, FancyArrowPatch
    arr = FancyArrowPatch(posA=(-math.pi / 2., 1.8),
                          posB=(-math.pi / 4., 1.8),
                          arrowstyle='->',
                          connectionstyle='arc3,rad=%4.2f' % (-math.pi / 16.),
                          shrinkA=2.0,
                          shrinkB=2.0,
                          mutation_scale=20.0,
                          mutation_aspect=None,
                          fc='k')
    ax.add_patch(arr)
    bovy_plot.bovy_text(-math.pi / 2.,
                        1.97,
                        r'$\mathrm{Galactic\ rotation}$',
                        rotation=-22.5)
    radii = numpy.array([0.5, 1., 1.5, 2., 2.5])
    labels = []
    for r in radii:
        ax.plot(numpy.linspace(
            0.,
            2. * math.pi,
            501,
        ),
                numpy.zeros(501) + r,
                ls='-',
                color='0.65',
                zorder=1,
                lw=0.5)
        labels.append(r'$%i$' % int(r * 8.))
    pyplot.rgrids(radii, labels=labels, angle=-32.5)
    bovy_plot.bovy_text(5.785, 2.82, r'$\mathrm{kpc}$')
    azs = numpy.array([0., 45., 90., 135., 180., 225., 270., 315.]) * _DEGTORAD
    for az in azs:
        ax.plot(numpy.zeros(501) + az,
                numpy.linspace(0., 2.8, 501),
                '-',
                color='0.6',
                lw=0.5,
                zorder=1)
    #Sun
    bovy_plot.bovy_text(0.065, .9075, r'$\odot$')
    pyplot.ylim(0., 2.8)
    bovy_plot.bovy_end_print(options.plotfile)
Example #49
0
def plot_bestfit(parser):
    (options, args) = parser.parse_args()
    if len(args) == 0 or options.plotfilename is None:
        parser.print_help()
        return
    #Read the data
    print "Reading the data ..."
    data = readVclosData(
        postshutdown=options.postshutdown,
        fehcut=options.fehcut,
        cohort=options.cohort,
        lmin=options.lmin,
        bmax=options.bmax,
        ak=True,
        cutmultiples=options.cutmultiples,
        validfeh=options.indivfeh,  #if indivfeh, we need validfeh
        jkmax=options.jkmax,
        datafilename=options.fakedata)
    #HACK
    indx = (data['J0MAG'] - data['K0MAG'] < 0.5)
    data['J0MAG'][indx] = 0.5 + data['K0MAG'][indx]
    #Cut inner disk locations
    #data= data[(data['GLON'] > 75.)]
    #Cut outliers
    #data= data[(data['VHELIO'] < 200.)*(data['VHELIO'] > -200.)]
    print "Using %i data points ..." % len(data)
    #Set up the isochrone
    if not options.isofile is None and os.path.exists(options.isofile):
        print "Loading the isochrone model ..."
        isofile = open(options.isofile, 'rb')
        iso = pickle.load(isofile)
        if options.indivfeh:
            zs = pickle.load(isofile)
        if options.varfeh:
            locl = pickle.load(isofile)
        isofile.close()
    else:
        print "Setting up the isochrone model ..."
        if options.indivfeh:
            #Load all isochrones
            iso = []
            zs = numpy.arange(0.0005, 0.03005, 0.0005)
            for ii in range(len(zs)):
                iso.append(
                    isomodel.isomodel(imfmodel=options.imfmodel,
                                      expsfh=options.expsfh,
                                      Z=zs[ii]))
        elif options.varfeh:
            locs = list(set(data['LOCATION']))
            iso = []
            for ii in range(len(locs)):
                indx = (data['LOCATION'] == locs[ii])
                locl = numpy.mean(data['GLON'][indx] * _DEGTORAD)
                iso.append(
                    isomodel.isomodel(imfmodel=options.imfmodel,
                                      expsfh=options.expsfh,
                                      marginalizefeh=True,
                                      glon=locl))
        else:
            iso = isomodel.isomodel(imfmodel=options.imfmodel,
                                    Z=options.Z,
                                    expsfh=options.expsfh)
        if options.dwarf:
            iso = [
                iso,
                isomodel.isomodel(imfmodel=options.imfmodel,
                                  Z=options.Z,
                                  dwarf=True,
                                  expsfh=options.expsfh)
            ]
        else:
            iso = [iso]
        if not options.isofile is None:
            isofile = open(options.isofile, 'wb')
            pickle.dump(iso, isofile)
            if options.indivfeh:
                pickle.dump(zs, isofile)
            elif options.varfeh:
                pickle.dump(locl, isofile)
            isofile.close()
    df = None
    print "Pre-calculating isochrone distance prior ..."
    logpiso = numpy.zeros((len(data), _BINTEGRATENBINS))
    ds = numpy.linspace(_BINTEGRATEDMIN, _BINTEGRATEDMAX, _BINTEGRATENBINS)
    dm = _dm(ds)
    for ii in range(len(data)):
        mh = data['H0MAG'][ii] - dm
        if options.indivfeh:
            #Find closest Z
            thisZ = isodist.FEH2Z(data[ii]['FEH'])
            indx = numpy.argmin(numpy.fabs(thisZ - zs))
            logpiso[ii, :] = iso[0][indx](numpy.zeros(_BINTEGRATENBINS) +
                                          (data['J0MAG'] - data['K0MAG'])[ii],
                                          mh)
        elif options.varfeh:
            #Find correct iso
            indx = (locl == data[ii]['LOCATION'])
            logpiso[ii, :] = iso[0][indx](numpy.zeros(_BINTEGRATENBINS) +
                                          (data['J0MAG'] - data['K0MAG'])[ii],
                                          mh)
        else:
            logpiso[ii, :] = iso[0](numpy.zeros(_BINTEGRATENBINS) +
                                    (data['J0MAG'] - data['K0MAG'])[ii], mh)
    if options.dwarf:
        logpisodwarf = numpy.zeros((len(data), _BINTEGRATENBINS))
        dwarfds = numpy.linspace(_BINTEGRATEDMIN_DWARF, _BINTEGRATEDMAX_DWARF,
                                 _BINTEGRATENBINS)
        dm = _dm(dwarfds)
        for ii in range(len(data)):
            mh = data['H0MAG'][ii] - dm
            logpisodwarf[ii, :] = iso[1](numpy.zeros(_BINTEGRATENBINS) +
                                         (data['J0MAG'] - data['K0MAG'])[ii],
                                         mh)
    else:
        logpisodwarf = None
    #Calculate data means etc.
    #Calculate means
    locations = list(set(data['LOCATION']))
    nlocs = len(locations)
    l_plate = numpy.zeros(nlocs)
    avg_plate = numpy.zeros(nlocs)
    sig_plate = numpy.zeros(nlocs)
    siga_plate = numpy.zeros(nlocs)
    sigerr_plate = numpy.zeros(nlocs)
    for ii in range(nlocs):
        indx = (data['LOCATION'] == locations[ii])
        l_plate[ii] = numpy.mean(data['GLON'][indx])
        avg_plate[ii] = numpy.mean(data['VHELIO'][indx])
        sig_plate[ii] = numpy.std(data['VHELIO'][indx])
        siga_plate[ii] = numpy.std(data['VHELIO'][indx]) / numpy.sqrt(
            numpy.sum(indx))
        sigerr_plate[ii] = bootstrap_sigerr(data['VHELIO'][indx])
    #Calculate plate means and variances from the model
    #Load initial parameters from file
    savefile = open(args[0], 'rb')
    params = pickle.load(savefile)
    if not options.index is None:
        params = params[options.index]
    savefile.close()
    #params[0]= 245./235.
    #params[1]= 8.5/8.
    avg_plate_model = numpy.zeros(nlocs)
    sig_plate_model = numpy.zeros(nlocs)
    for ii in range(nlocs):
        #Calculate vlos | los
        indx = (data['LOCATION'] == locations[ii])
        thesedata = data[indx]
        thislogpiso = logpiso[indx, :]
        if options.dwarf:
            thislogpisodwarf = logpisodwarf[indx, :]
        else:
            thislogpisodwarf = None
        vlos = numpy.linspace(-200., 200., options.nvlos)
        pvlos = numpy.zeros(options.nvlos)
        if not options.multi is None:
            pvlos = multi.parallel_map(
                (lambda x: pvlosplate(params, vlos[x], thesedata, df, options,
                                      thislogpiso, thislogpisodwarf, iso)),
                range(options.nvlos),
                numcores=numpy.amin(
                    [len(vlos),
                     multiprocessing.cpu_count(), options.multi]))
        else:
            for jj in range(options.nvlos):
                print jj
                pvlos[jj] = pvlosplate(params, vlos[jj], thesedata, df,
                                       options, thislogpiso, thislogpisodwarf,
                                       iso)
        pvlos -= logsumexp(pvlos)
        pvlos = numpy.exp(pvlos)
        #Calculate mean and velocity dispersion
        avg_plate_model[ii] = numpy.sum(vlos * pvlos)
        sig_plate_model[ii]= numpy.sqrt(numpy.sum(vlos**2.*pvlos)\
                                            -avg_plate_model[ii]**2.)
    #Plot everything
    left, bottom, width, height = 0.1, 0.4, 0.8, 0.5
    axTop = pyplot.axes([left, bottom, width, height])
    left, bottom, width, height = 0.1, 0.1, 0.8, 0.3
    axMean = pyplot.axes([left, bottom, width, height])
    #left, bottom, width, height= 0.1, 0.1, 0.8, 0.2
    #axSig= pyplot.axes([left,bottom,width,height])
    fig = pyplot.gcf()
    fig.sca(axTop)
    pyplot.ylabel(r'$\mathrm{Heliocentric\ velocity}\ [\mathrm{km\ s}^{-1}]$')
    pyplot.xlabel(r'$\mathrm{Galactic\ longitude}\ [\mathrm{deg}]$')
    pyplot.xlim(0., 360.)
    pyplot.ylim(-200., 200.)
    nullfmt = NullFormatter()  # no labels
    axTop.xaxis.set_major_formatter(nullfmt)
    bovy_plot.bovy_plot(data['GLON'],
                        data['VHELIO'],
                        'k,',
                        yrange=[-200., 200.],
                        xrange=[0., 360.],
                        overplot=True)
    ndata_t = int(math.floor(len(data) / 1000.))
    ndata_h = len(data) - ndata_t * 1000
    bovy_plot.bovy_plot(l_plate,
                        avg_plate,
                        'o',
                        overplot=True,
                        mfc='0.5',
                        mec='none')
    bovy_plot.bovy_plot(l_plate,
                        avg_plate_model,
                        'x',
                        overplot=True,
                        ms=10.,
                        mew=1.5,
                        color='0.7')
    #Legend
    bovy_plot.bovy_plot([260.], [150.], 'k,', overplot=True)
    bovy_plot.bovy_plot([260.], [120.],
                        'o',
                        mfc='0.5',
                        mec='none',
                        overplot=True)
    bovy_plot.bovy_plot([260.], [90.],
                        'x',
                        ms=10.,
                        mew=1.5,
                        color='0.7',
                        overplot=True)
    bovy_plot.bovy_text(270., 145., r'$\mathrm{data}$')
    bovy_plot.bovy_text(270., 115., r'$\mathrm{data\ mean}$')
    bovy_plot.bovy_text(270., 85., r'$\mathrm{model\ mean}$')
    bovy_plot._add_ticks()
    #Now plot the difference
    fig.sca(axMean)
    bovy_plot.bovy_plot([0., 360.], [0., 0.], '-', color='0.5', overplot=True)
    bovy_plot.bovy_plot(l_plate,
                        avg_plate - avg_plate_model,
                        'ko',
                        overplot=True)
    pyplot.errorbar(l_plate,
                    avg_plate - avg_plate_model,
                    yerr=siga_plate,
                    marker='o',
                    color='k',
                    linestyle='none',
                    elinestyle='-')
    pyplot.ylabel(r'$\bar{V}_{\mathrm{data}}-\bar{V}_{\mathrm{model}}$')
    pyplot.ylim(-14.5, 14.5)
    pyplot.xlim(0., 360.)
    bovy_plot._add_ticks()
    #axMean.xaxis.set_major_formatter(nullfmt)
    pyplot.xlabel(r'$\mathrm{Galactic\ longitude}\ [\mathrm{deg}]$')
    pyplot.xlim(0., 360.)
    bovy_plot._add_ticks()
    #Save
    bovy_plot.bovy_end_print(options.plotfilename)
    return None
    #Sigma
    fig.sca(axSig)
    pyplot.plot([0., 360.], [1., 1.], '-', color='0.5')
    bovy_plot.bovy_plot(l_plate,
                        sig_plate / sig_plate_model,
                        'ko',
                        overplot=True)
    pyplot.errorbar(l_plate,
                    sig_plate / sig_plate_model,
                    yerr=sigerr_plate / sig_plate_model,
                    marker='o',
                    color='k',
                    linestyle='none',
                    elinestyle='-')
    pyplot.ylabel(
        r'$\sigma_{\mathrm{los}}^{\mathrm{data}}/ \sigma_{\mathrm{los}}^{\mathrm{model}}$'
    )
    pyplot.ylim(0.5, 1.5)