예제 #1
0
def rhmc_worker_theta(comm, block_width, start, y, template, theta, mu, sigmasq,
                      region_types, prop_df=5., eps_max=0.1, eps_min=0.001,
                      n_steps=100, sigmasq_p=1., adj=10, verbose=0):
    # Compute needed data properties
    chrom_length = y.size
    w = template.size/2 + 1

    # Calculate subset of data to work on
    end = min(chrom_length, start + block_width)
    block = slice(max(start-w, 0), min(end+w, chrom_length))
    size_block = block.stop - block.start

    original = slice(start-block.start, size_block - (block.stop-end))

    subset = slice(w*(start!=0)+start-block.start,
                   size_block-w*(end!=chrom_length) - (block.stop-end))
    size_subset = subset.stop - subset.start
    
    theta_block = theta[:size_block]
    theta_subset = theta_block[subset]

    # Setup initial return value
    ret_val = np.empty(block_width)

    # Calculate diagonal of Hessian if requested (by setting sigma.p to None)
    if sigmasq_p is None:
        result = lib.deconvolve(lib.loglik_convolve, lib.dloglik_convolve,
                                y[block], region_types[block], template,
                                mu, sigmasq,
                                subset=subset, theta0=theta_block,
                                log=True,
                                messages=0)
        sigmasq_p = lib.ddloglik_diag_convolve(theta=result[0], y=y[block],
                                               region_types=region_types[block],
                                               template=template, mu=mu,
                                               sigmasq=sigmasq,
                                               theta0=theta_block,
                                               subset=subset, log=True)
    sigma_p = np.sqrt(sigmasq_p)

    # Draw momentum variables
    p = np.random.randn(size_subset)*sigma_p
    p_0 = p.copy()

    # Repeat leapfrog process until valid result is obtained
    leapfrog_done = False
    eps = np.random.uniform(eps_min, eps_max)
    
    while not leapfrog_done:
        # Initialize new draw of theta
        theta_draw = theta_subset.copy()

        # Run leapfrog iterations
        grad = lib.dloglik_convolve(theta=theta_draw, y=y[block],
                                    region_types=region_types[block],
                                    template=template, mu=mu, sigmasq=sigmasq,
                                    theta0=theta_block, subset=subset, log=True)
        
        # Start with half step for momentum
        p -= eps*grad / 2.
        
        # Alternate full steps for position and momentum
        for i in xrange(n_steps):
            # Full step for position
            theta_draw += eps*p/sigmasq_p
            # Update gradient
            grad = lib.dloglik_convolve(theta=theta_draw, y=y[block],
                                        region_types=region_types[block],
                                        template=template, mu=mu,
                                        sigmasq=sigmasq, theta0=theta_block,
                                        subset=subset, log=True)
            # Full step for momentum, except at the end of the trajectory
            if i<(n_steps - 1): p -= eps*grad

        # Half step for momentum at the end
        p -= eps*grad/2.

        if np.min(np.isfinite(theta_draw)):
            leapfrog_done = True
        else:
            # Restart with smaller step size
            eps /= adj
            p[:] = p_0
            theta_draw[:] = theta_subset

    # Reverse momentum at end of trajectory to make the proposal symmetric.
    p = -p

    # Construct complete proposal for theta
    theta_prop = theta_block.copy()
    theta_prop[subset] = theta_draw

    # Compute log target and kinetic energy differences
    log_target_ratio = -lib.loglik_convolve(theta=theta_prop, y=y[block],
                                            region_types=region_types[block],
                                            template=template, mu=mu,
                                            sigmasq=sigmasq, subset=None,
                                            theta0=theta_prop, log=True)
    log_target_ratio -= -lib.loglik_convolve(theta=theta_block, y=y[block],
                                             region_types=region_types[block],
                                             template=template, mu=mu,
                                             sigmasq=sigmasq, subset=None,
                                             theta0=theta_block, log=True)

    log_kinetic_diff = 0.5*np.sum((p**2 - p_0**2)/sigmasq_p)

    # Execute MH step
    log_accept_prob = log_target_ratio - log_kinetic_diff
    if np.log(np.random.uniform()) < log_accept_prob:
        accept = 1
        ret_val[:end-start] = theta_prop[original]
    else:
        accept = 0
        ret_val[:end-start] = theta_block[original]
    
    if verbose > 0:
        print np.mean(sigma_p), np.std(sigma_p), eps, log_accept_prob, accept, start, end

    if verbose > 1 and np.isnan(log_accept_prob):
        print sigmasq_p.min(), sigmasq_p.mean(), sigmasq_p.max(), sigmasq_p.std()
        print >> sys.stderr, -lib.loglik_convolve(theta=theta_prop, y=y[block],
                                                  region_types=region_types[block],
                                                  template=template, mu=mu,
                                                  sigmasq=sigmasq, subset=None,
                                                  theta0=theta_prop, log=True)
        print >> sys.stderr, -lib.loglik_convolve(theta=theta_block, y=y[block],
                                                  region_types=region_types[block],
                                                  template=template, mu=mu,
                                                  sigmasq=sigmasq, subset=None,
                                                  theta0=theta_block, log=True)
        print >> sys.stderr, log_kinetic_diff
        print >> sys.stderr, log_target_ratio
        print >> sys.stderr, log_accept_prob, accept

    # Transmit result
    comm.Send(ret_val, dest=MPIROOT, tag=accept)
예제 #2
0
def rhmc_worker_beta(comm, block_width, start, y, template, theta, mu, sigmasq,
                     region_types, prop_df=5., eps=0.01, n_steps=100,
                     sigmasq_p=1., adj=2., verbose=0):
    # Compute needed data properties
    chrom_length = y.size
    w = template.size/2 + 1

    # Calculate subset of data to work on
    end = min(chrom_length, start + block_width)
    block = slice(max(start-w, 0), min(end+w, chrom_length))
    size_block = block.stop - block.start

    original = slice(start-block.start, size_block - (block.stop-end))

    subset = slice(w*(start!=0)+start-block.start,
                   size_block-w*(end!=chrom_length) - (block.stop-end))
    size_subset = subset.stop - subset.start
    
    beta_block = np.exp(theta[:size_block])
    beta_subset = beta_block[subset]

    # Setup initial return value
    ret_val = np.empty(block_width)

    # Calculate diagonal of Hessian if requested (by setting sigma.p to None)
    if sigmasq_p is None:
        result = lib.deconvolve(lib.loglik_convolve, lib.dloglik_convolve,
                                y[block], region_types[block], template,
                                mu, sigmasq,
                                subset=subset, theta0=beta_block,
                                log=False,
                                messages=0)
        sigmasq_p = lib.ddloglik_diag_convolve(theta=result[0], y=y[block],
                                               region_types=region_types[block],
                                               template=template, mu=mu,
                                               sigmasq=sigmasq,
                                               theta0=beta_block,
                                               subset=subset, log=False)
    sigma_p = np.sqrt(sigmasq_p)

    ## Repeat leapfrog process until valid result is obtained
    #leapfrog_done = False
    #
    #while not leapfrog_done:
    # Draw momentum variables
    p = np.random.randn(size_subset)*sigma_p
    p_0 = p.copy()

    # Initialize new draw of theta
    beta_draw = beta_subset.copy()
    
    # Initial gradient computation
    grad = lib.dloglik_convolve(theta=beta_draw, y=y[block],
                                region_types=region_types[block],
                                template=template, mu=mu, sigmasq=sigmasq,
                                theta0=beta_block, subset=subset, log=False)

    # Run leapfrog iterations
    for i in xrange(n_steps):
        # Half step for momentum
        p -= eps*grad/2.

        # Full step for position
        beta_draw += eps*p/sigmasq_p

        # Reflect to satisfy constraints
        p[beta_draw <= 0] *= -1.
        beta_draw[beta_draw <= 0] *= -1.

        # Update gradient
        grad = lib.dloglik_convolve(theta=beta_draw, y=y[block],
                                    region_types=region_types[block],
                                    template=template, mu=mu,
                                    sigmasq=sigmasq, theta0=beta_block,
                                    subset=subset, log=False)

        # Half step for momentum after reflection
        p -= eps*grad/2.

    # Reverse momentum at end of trajectory to make the proposal symmetric.
    p *= -1.

    # Construct complete proposal for theta
    beta_prop = beta_block.copy()
    beta_prop[subset] = beta_draw

    # Compute log target and kinetic energy differences
    log_target_ratio = -lib.loglik_convolve(theta=beta_prop, y=y[block],
                                            region_types=region_types[block],
                                            template=template, mu=mu,
                                            sigmasq=sigmasq, subset=None,
                                            theta0=beta_prop, log=False)
    log_target_ratio -= -lib.loglik_convolve(theta=beta_block, y=y[block],
                                             region_types=region_types[block],
                                             template=template, mu=mu,
                                             sigmasq=sigmasq, subset=None,
                                             theta0=beta_block, log=False)

    log_kinetic_diff = 0.5*np.sum((p**2 - p_0**2)/sigmasq_p)

    # Execute MH step
    log_accept_prob = log_target_ratio - log_kinetic_diff
    if np.log(np.random.uniform()) < log_accept_prob:
        accept = 1
        ret_val[:end-start] = np.log(beta_prop[original])
    else:
        accept = 0
        ret_val[:end-start] = np.log(beta_block[original])
    
    if verbose > 0:
        print np.mean(sigma_p), np.std(sigma_p), eps, log_accept_prob, accept, start, end

    # Transmit result
    comm.Send(ret_val, dest=MPIROOT, tag=accept)
예제 #3
0
def rmh_worker_theta(comm, block_width, start, y, template, theta, mu, sigmasq,
                     region_types, prop_df=5.):
    # Compute needed data properties
    chrom_length = y.size
    w = template.size/2 + 1

    # Calculate subset of data to work on
    end = min(chrom_length, start + block_width)
    block = slice(max(start-w, 0), min(end+w, chrom_length))
    size_block = block.stop - block.start

    subset = slice(w*(start!=0)+start-block.start,
                   size_block-w*(end!=chrom_length) - (block.stop-end))
    size_subset = subset.stop - subset.start

    original = slice(start-block.start, size_block - (block.stop-end))

    theta_block = theta[block]
    theta_subset = theta_block[subset]

    # Setup initial return value
    ret_val = np.empty(block_width)

    # Run optimization to obtain conditional posterior mode
    theta_hat = lib.deconvolve(lib.loglik_convolve,
                               lib.dloglik_convolve,
                               y[block], region_types[block], template,
                               mu, sigmasq,
                               subset=subset, theta0=theta_block,
                               log=True,
                               messages=0)[0]

    # Compute (sparse) conditional observed information
    X = sparse.spdiags((np.ones((template.size,size_block)).T *
                        template).T, diags=range(-w+1, w),
                        m=size_block, n=size_block, format='csr')

    info = lib.ddloglik(theta=theta_hat,
                        theta0=theta_block,
                        X=X, Xt=X, y=y[block],
                        mu=mu, sigmasq=sigmasq,
                        region_types=region_types[block],
                        subset=subset, log=True)
    info = info[subset,:]
    info = info.tocsc()
    info = info[:,subset]

    # Propose from multivariate t distribution
    try:
        info_factor = cholmod.cholesky(info)
    except:
        # Always reject for these cases
        accept = 0
        ret_val[:end-start] = theta_block[original]

        # Transmit result
        comm.Send(ret_val, dest=MPIROOT, tag=accept)

        return

    L, D = info_factor.L_D()
    D = D.diagonal()
    #
    z = np.random.standard_t(df=prop_df, size=size_subset)
    #
    theta_draw = info_factor.solve_Lt(z / np.sqrt(D))
    theta_draw = info_factor.solve_Pt(theta_draw)
    theta_draw = theta_draw.flatten()
    theta_draw += theta_hat
    #
    theta_prop = theta_block.copy()
    theta_prop[subset] = theta_draw

    # Check for overflow issues
    if np.max(theta_prop) >= np.log(np.finfo(np.float).max)/2.:
        # Always reject for these cases
        accept = 0
        ret_val[:end-start] = theta_block[original]

        # Transmit result
        comm.Send(ret_val, dest=MPIROOT, tag=accept)

        return

    # Demean and decorrelate previous draw
    z_prev =  L.T * info_factor.solve_P(theta_subset-theta_hat)
    z_prev = z_prev.flatten()
    z_prev *= np.sqrt(D)

    # Compute log target and proposal ratios
    log_target_ratio = -lib.loglik_convolve(theta=theta_prop,
                               y=y[block],
                               region_types=region_types[block],
                               template=template, mu=mu,
                               sigmasq=sigmasq, subset=None,
                               theta0=theta_prop, log=True)
    log_target_ratio -= -lib.loglik_convolve(theta=theta_block,
                               y=y[block],
                               region_types=region_types[block],
                               template=template, mu=mu,
                               sigmasq=sigmasq, subset=None,
                               theta0=theta_block, log=True)

    log_prop_ratio = -0.5*(prop_df+1)*np.sum(np.log(1. + z**2/prop_df)-
                                         np.log(1. + z_prev**2/prop_df))

    # Execute MH step
    log_accept_prob = log_target_ratio - log_prop_ratio
    #print block, log_target_ratio, log_prop_ratio, log_accept_prob
    if np.log(np.random.uniform()) < log_accept_prob:
        accept = 1
        ret_val[:end-start] = theta_prop[original]
    else:
        accept = 0
        ret_val[:end-start] = theta_block[original]

    # Transmit result
    comm.Send(ret_val, dest=MPIROOT, tag=accept)
예제 #4
0
def rhmc_worker_beta(comm,
                     block_width,
                     start,
                     y,
                     template,
                     theta,
                     mu,
                     sigmasq,
                     region_types,
                     prop_df=5.,
                     eps=0.01,
                     n_steps=100,
                     sigmasq_p=1.,
                     adj=2.,
                     verbose=0):
    # Compute needed data properties
    chrom_length = y.size
    w = template.size / 2 + 1

    # Calculate subset of data to work on
    end = min(chrom_length, start + block_width)
    block = slice(max(start - w, 0), min(end + w, chrom_length))
    size_block = block.stop - block.start

    original = slice(start - block.start, size_block - (block.stop - end))

    subset = slice(w * (start != 0) + start - block.start,
                   size_block - w * (end != chrom_length) - (block.stop - end))
    size_subset = subset.stop - subset.start

    beta_block = np.exp(theta[:size_block])
    beta_subset = beta_block[subset]

    # Setup initial return value
    ret_val = np.empty(block_width)

    # Calculate diagonal of Hessian if requested (by setting sigma.p to None)
    if sigmasq_p is None:
        result = lib.deconvolve(lib.loglik_convolve,
                                lib.dloglik_convolve,
                                y[block],
                                region_types[block],
                                template,
                                mu,
                                sigmasq,
                                subset=subset,
                                theta0=beta_block,
                                log=False,
                                messages=0)
        sigmasq_p = lib.ddloglik_diag_convolve(
            theta=result[0],
            y=y[block],
            region_types=region_types[block],
            template=template,
            mu=mu,
            sigmasq=sigmasq,
            theta0=beta_block,
            subset=subset,
            log=False)
    sigma_p = np.sqrt(sigmasq_p)

    ## Repeat leapfrog process until valid result is obtained
    #leapfrog_done = False
    #
    #while not leapfrog_done:
    # Draw momentum variables
    p = np.random.randn(size_subset) * sigma_p
    p_0 = p.copy()

    # Initialize new draw of theta
    beta_draw = beta_subset.copy()

    # Initial gradient computation
    grad = lib.dloglik_convolve(theta=beta_draw,
                                y=y[block],
                                region_types=region_types[block],
                                template=template,
                                mu=mu,
                                sigmasq=sigmasq,
                                theta0=beta_block,
                                subset=subset,
                                log=False)

    # Run leapfrog iterations
    for i in xrange(n_steps):
        # Half step for momentum
        p -= eps * grad / 2.

        # Full step for position
        beta_draw += eps * p / sigmasq_p

        # Reflect to satisfy constraints
        p[beta_draw <= 0] *= -1.
        beta_draw[beta_draw <= 0] *= -1.

        # Update gradient
        grad = lib.dloglik_convolve(theta=beta_draw,
                                    y=y[block],
                                    region_types=region_types[block],
                                    template=template,
                                    mu=mu,
                                    sigmasq=sigmasq,
                                    theta0=beta_block,
                                    subset=subset,
                                    log=False)

        # Half step for momentum after reflection
        p -= eps * grad / 2.

    # Reverse momentum at end of trajectory to make the proposal symmetric.
    p *= -1.

    # Construct complete proposal for theta
    beta_prop = beta_block.copy()
    beta_prop[subset] = beta_draw

    # Compute log target and kinetic energy differences
    log_target_ratio = -lib.loglik_convolve(theta=beta_prop,
                                            y=y[block],
                                            region_types=region_types[block],
                                            template=template,
                                            mu=mu,
                                            sigmasq=sigmasq,
                                            subset=None,
                                            theta0=beta_prop,
                                            log=False)
    log_target_ratio -= -lib.loglik_convolve(theta=beta_block,
                                             y=y[block],
                                             region_types=region_types[block],
                                             template=template,
                                             mu=mu,
                                             sigmasq=sigmasq,
                                             subset=None,
                                             theta0=beta_block,
                                             log=False)

    log_kinetic_diff = 0.5 * np.sum((p**2 - p_0**2) / sigmasq_p)

    # Execute MH step
    log_accept_prob = log_target_ratio - log_kinetic_diff
    if np.log(np.random.uniform()) < log_accept_prob:
        accept = 1
        ret_val[:end - start] = np.log(beta_prop[original])
    else:
        accept = 0
        ret_val[:end - start] = np.log(beta_block[original])

    if verbose > 0:
        print np.mean(sigma_p), np.std(
            sigma_p), eps, log_accept_prob, accept, start, end

    # Transmit result
    comm.Send(ret_val, dest=MPIROOT, tag=accept)
예제 #5
0
def rhmc_worker_theta(comm,
                      block_width,
                      start,
                      y,
                      template,
                      theta,
                      mu,
                      sigmasq,
                      region_types,
                      prop_df=5.,
                      eps_max=0.1,
                      eps_min=0.001,
                      n_steps=100,
                      sigmasq_p=1.,
                      adj=10,
                      verbose=0):
    # Compute needed data properties
    chrom_length = y.size
    w = template.size / 2 + 1

    # Calculate subset of data to work on
    end = min(chrom_length, start + block_width)
    block = slice(max(start - w, 0), min(end + w, chrom_length))
    size_block = block.stop - block.start

    original = slice(start - block.start, size_block - (block.stop - end))

    subset = slice(w * (start != 0) + start - block.start,
                   size_block - w * (end != chrom_length) - (block.stop - end))
    size_subset = subset.stop - subset.start

    theta_block = theta[:size_block]
    theta_subset = theta_block[subset]

    # Setup initial return value
    ret_val = np.empty(block_width)

    # Calculate diagonal of Hessian if requested (by setting sigma.p to None)
    if sigmasq_p is None:
        result = lib.deconvolve(lib.loglik_convolve,
                                lib.dloglik_convolve,
                                y[block],
                                region_types[block],
                                template,
                                mu,
                                sigmasq,
                                subset=subset,
                                theta0=theta_block,
                                log=True,
                                messages=0)
        sigmasq_p = lib.ddloglik_diag_convolve(
            theta=result[0],
            y=y[block],
            region_types=region_types[block],
            template=template,
            mu=mu,
            sigmasq=sigmasq,
            theta0=theta_block,
            subset=subset,
            log=True)
    sigma_p = np.sqrt(sigmasq_p)

    # Draw momentum variables
    p = np.random.randn(size_subset) * sigma_p
    p_0 = p.copy()

    # Repeat leapfrog process until valid result is obtained
    leapfrog_done = False
    eps = np.random.uniform(eps_min, eps_max)

    while not leapfrog_done:
        # Initialize new draw of theta
        theta_draw = theta_subset.copy()

        # Run leapfrog iterations
        grad = lib.dloglik_convolve(theta=theta_draw,
                                    y=y[block],
                                    region_types=region_types[block],
                                    template=template,
                                    mu=mu,
                                    sigmasq=sigmasq,
                                    theta0=theta_block,
                                    subset=subset,
                                    log=True)

        # Start with half step for momentum
        p -= eps * grad / 2.

        # Alternate full steps for position and momentum
        for i in xrange(n_steps):
            # Full step for position
            theta_draw += eps * p / sigmasq_p
            # Update gradient
            grad = lib.dloglik_convolve(theta=theta_draw,
                                        y=y[block],
                                        region_types=region_types[block],
                                        template=template,
                                        mu=mu,
                                        sigmasq=sigmasq,
                                        theta0=theta_block,
                                        subset=subset,
                                        log=True)
            # Full step for momentum, except at the end of the trajectory
            if i < (n_steps - 1): p -= eps * grad

        # Half step for momentum at the end
        p -= eps * grad / 2.

        if np.min(np.isfinite(theta_draw)):
            leapfrog_done = True
        else:
            # Restart with smaller step size
            eps /= adj
            p[:] = p_0
            theta_draw[:] = theta_subset

    # Reverse momentum at end of trajectory to make the proposal symmetric.
    p = -p

    # Construct complete proposal for theta
    theta_prop = theta_block.copy()
    theta_prop[subset] = theta_draw

    # Compute log target and kinetic energy differences
    log_target_ratio = -lib.loglik_convolve(theta=theta_prop,
                                            y=y[block],
                                            region_types=region_types[block],
                                            template=template,
                                            mu=mu,
                                            sigmasq=sigmasq,
                                            subset=None,
                                            theta0=theta_prop,
                                            log=True)
    log_target_ratio -= -lib.loglik_convolve(theta=theta_block,
                                             y=y[block],
                                             region_types=region_types[block],
                                             template=template,
                                             mu=mu,
                                             sigmasq=sigmasq,
                                             subset=None,
                                             theta0=theta_block,
                                             log=True)

    log_kinetic_diff = 0.5 * np.sum((p**2 - p_0**2) / sigmasq_p)

    # Execute MH step
    log_accept_prob = log_target_ratio - log_kinetic_diff
    if np.log(np.random.uniform()) < log_accept_prob:
        accept = 1
        ret_val[:end - start] = theta_prop[original]
    else:
        accept = 0
        ret_val[:end - start] = theta_block[original]

    if verbose > 0:
        print np.mean(sigma_p), np.std(
            sigma_p), eps, log_accept_prob, accept, start, end

    if verbose > 1 and np.isnan(log_accept_prob):
        print sigmasq_p.min(), sigmasq_p.mean(), sigmasq_p.max(
        ), sigmasq_p.std()
        print >> sys.stderr, -lib.loglik_convolve(
            theta=theta_prop,
            y=y[block],
            region_types=region_types[block],
            template=template,
            mu=mu,
            sigmasq=sigmasq,
            subset=None,
            theta0=theta_prop,
            log=True)
        print >> sys.stderr, -lib.loglik_convolve(
            theta=theta_block,
            y=y[block],
            region_types=region_types[block],
            template=template,
            mu=mu,
            sigmasq=sigmasq,
            subset=None,
            theta0=theta_block,
            log=True)
        print >> sys.stderr, log_kinetic_diff
        print >> sys.stderr, log_target_ratio
        print >> sys.stderr, log_accept_prob, accept

    # Transmit result
    comm.Send(ret_val, dest=MPIROOT, tag=accept)
예제 #6
0
def rmh_worker_theta(comm,
                     block_width,
                     start,
                     y,
                     template,
                     theta,
                     mu,
                     sigmasq,
                     region_types,
                     prop_df=5.):
    # Compute needed data properties
    chrom_length = y.size
    w = template.size / 2 + 1

    # Calculate subset of data to work on
    end = min(chrom_length, start + block_width)
    block = slice(max(start - w, 0), min(end + w, chrom_length))
    size_block = block.stop - block.start

    subset = slice(w * (start != 0) + start - block.start,
                   size_block - w * (end != chrom_length) - (block.stop - end))
    size_subset = subset.stop - subset.start

    original = slice(start - block.start, size_block - (block.stop - end))

    theta_block = theta[block]
    theta_subset = theta_block[subset]

    # Setup initial return value
    ret_val = np.empty(block_width)

    # Run optimization to obtain conditional posterior mode
    theta_hat = lib.deconvolve(lib.loglik_convolve,
                               lib.dloglik_convolve,
                               y[block],
                               region_types[block],
                               template,
                               mu,
                               sigmasq,
                               subset=subset,
                               theta0=theta_block,
                               log=True,
                               messages=0)[0]

    # Compute (sparse) conditional observed information
    X = sparse.spdiags((np.ones((template.size, size_block)).T * template).T,
                       diags=range(-w + 1, w),
                       m=size_block,
                       n=size_block,
                       format='csr')

    info = lib.ddloglik(theta=theta_hat,
                        theta0=theta_block,
                        X=X,
                        Xt=X,
                        y=y[block],
                        mu=mu,
                        sigmasq=sigmasq,
                        region_types=region_types[block],
                        subset=subset,
                        log=True)
    info = info[subset, :]
    info = info.tocsc()
    info = info[:, subset]

    # Propose from multivariate t distribution
    try:
        info_factor = cholmod.cholesky(info)
    except:
        # Always reject for these cases
        accept = 0
        ret_val[:end - start] = theta_block[original]

        # Transmit result
        comm.Send(ret_val, dest=MPIROOT, tag=accept)

        return

    L, D = info_factor.L_D()
    D = D.diagonal()
    #
    z = np.random.standard_t(df=prop_df, size=size_subset)
    #
    theta_draw = info_factor.solve_Lt(z / np.sqrt(D))
    theta_draw = info_factor.solve_Pt(theta_draw)
    theta_draw = theta_draw.flatten()
    theta_draw += theta_hat
    #
    theta_prop = theta_block.copy()
    theta_prop[subset] = theta_draw

    # Check for overflow issues
    if np.max(theta_prop) >= np.log(np.finfo(np.float).max) / 2.:
        # Always reject for these cases
        accept = 0
        ret_val[:end - start] = theta_block[original]

        # Transmit result
        comm.Send(ret_val, dest=MPIROOT, tag=accept)

        return

    # Demean and decorrelate previous draw
    z_prev = L.T * info_factor.solve_P(theta_subset - theta_hat)
    z_prev = z_prev.flatten()
    z_prev *= np.sqrt(D)

    # Compute log target and proposal ratios
    log_target_ratio = -lib.loglik_convolve(theta=theta_prop,
                                            y=y[block],
                                            region_types=region_types[block],
                                            template=template,
                                            mu=mu,
                                            sigmasq=sigmasq,
                                            subset=None,
                                            theta0=theta_prop,
                                            log=True)
    log_target_ratio -= -lib.loglik_convolve(theta=theta_block,
                                             y=y[block],
                                             region_types=region_types[block],
                                             template=template,
                                             mu=mu,
                                             sigmasq=sigmasq,
                                             subset=None,
                                             theta0=theta_block,
                                             log=True)

    log_prop_ratio = -0.5 * (prop_df + 1) * np.sum(
        np.log(1. + z**2 / prop_df) - np.log(1. + z_prev**2 / prop_df))

    # Execute MH step
    log_accept_prob = log_target_ratio - log_prop_ratio
    #print block, log_target_ratio, log_prop_ratio, log_accept_prob
    if np.log(np.random.uniform()) < log_accept_prob:
        accept = 1
        ret_val[:end - start] = theta_prop[original]
    else:
        accept = 0
        ret_val[:end - start] = theta_block[original]

    # Transmit result
    comm.Send(ret_val, dest=MPIROOT, tag=accept)
예제 #7
0
def worker(comm, rank, n_proc, data, init, cfg):
    '''
    Worker-node process for parallel approximate EM algorithm. Receives
    parameters and commands from master node, sends updated estimates.

    Parameters
    ----------
        - comm : mpi4py.MPI.COMM
            Initialized MPI communicator.
        - rank : int
            Rank (>= MPIROOT) of worker.
        - n_proc : int
            Number of processes in communicator.
        - data : dictionary
            Data as output from load_data.
        - init : dictionary
            Initial parameter values as output from initialize.
        - cfg : dictionary
            Dictionary containing (at least) prior and estimation_params
            sections with appropriate entries.

    Returns
    -------
        None.
    '''
    # Create references to relevant data entries in local namespace
    y           = data['y']
    template    = data['template']
    region_types = data['region_types']
    
    # Compute needed data properties
    chrom_length = y.size
    w = template.size/2 + 1
    
    # Extract needed initializations for parameters
    theta   = init['theta']
    mu      = init['mu']
    sigmasq = init['sigmasq']
    params  = np.array([mu, sigmasq])
    
    # Compute block width for parallel approximate E-step
    n_workers = n_proc - 1
    if cfg['estimation_params']['block_width'] is None:
        block_width = chrom_length / n_workers
    else:
        block_width = cfg['estimation_params']['block_width']
    
    # Prepare to receive tasks
    working = True
    status = MPI.Status()
    ret_val = np.empty(block_width, dtype=np.float)
    while working:
        # Receive task information
        start, log = comm.recv(source=MPIROOT, tag=MPI.ANY_TAG, status=status)
        
        if status.Get_tag() == STOPTAG:
            working = False
        elif status.Get_tag() == SYNCTAG:
            # Synchronize parameters (conditioning information)
            comm.Bcast(theta, root=MPIROOT)
            comm.Bcast(params, root=MPIROOT)
            mu, sigmasq = params
        elif status.Get_tag() == WORKTAG:            
            # Calculate subset of data to work on
            end = min(chrom_length, start + block_width)
            block = slice(max(start-w, 0), min(end+w, chrom_length))
            size_block  = block.stop - block.start
            
            subset = slice(w*(start!=0)+start-block.start,
                           size_block-w*(end!=chrom_length) - (block.stop-end))
            
            original = slice(start-block.start, size_block - (block.stop-end))
            
            # Setup initial return value
            ret_val[end-start:] = 0
            
            # Run optimization
            result = lib.deconvolve(lib.loglik_convolve, lib.dloglik_convolve,
                                    y[block], region_types[block], template,
                                    mu, sigmasq,
                                    subset=subset, theta0=theta[block],
                                    log=log,
                                    messages=0)
            
            # Build resulting subset of new theta
            theta_new = theta[block]
            theta_new[subset] = result[0]
            ret_val[:end-start] = theta_new[original]
            
            # Transmit result
            comm.Send(ret_val, dest=MPIROOT, tag=start)
        elif status.Get_tag() == UPDATETAG:
            # Update value of theta for next job within given outer loop
            comm.Recv(theta, source=MPIROOT, tag=MPI.ANY_TAG)