def main(run_name, pool, overwrite=False, seed=None, limit=None): c = Config.from_run_name(run_name) if not os.path.exists(c.prior_cache_file): raise IOError(f"Prior cache file {c.prior_cache_file} does not exist! " "Did you run make_prior_cache.py?") if not os.path.exists(c.tasks_path): raise IOError("Tasks file '{0}' does not exist! Did you run " "make_tasks.py?") # Make directory for temp. files, one per worker: tmpdir = os.path.join(c.run_path, 'thejoker') if os.path.exists(tmpdir): logger.warning(f"Stale temp. file directory found at {tmpdir}: " "combining files first...") tmpdir_combine(tmpdir, c.joker_results_path) # ensure the results file exists logger.debug("Loading past results...") with h5py.File(c.joker_results_path, 'a') as f: done_apogee_ids = list(f.keys()) if overwrite: done_apogee_ids = list() # Get data files out of config file: logger.debug("Loading data...") allstar, _ = c.load_alldata() allstar = allstar[~np.isin(allstar['APOGEE_ID'], done_apogee_ids)] # Create TheJoker sampler instance with the specified random seed and pool rnd = np.random.RandomState(seed=seed) logger.debug(f"Processing pool has size = {pool.size}") apogee_ids = np.unique(allstar['APOGEE_ID']) if limit is not None: apogee_ids = apogee_ids[:limit] if done_apogee_ids: logger.info(f"{len(done_apogee_ids)} already completed - " f"{len(apogee_ids)} left to process") # Load the prior: logger.debug("Creating JokerPrior instance...") prior = c.get_prior() os.makedirs(tmpdir) atexit.register(tmpdir_combine, tmpdir, c.joker_results_path) logger.debug("Preparing tasks...") if len(apogee_ids) > 10 * pool.size: n_tasks = min(16 * pool.size, len(apogee_ids)) else: n_tasks = pool.size tasks = batch_tasks(len(apogee_ids), n_tasks, arr=apogee_ids, args=(c, prior, tmpdir, rnd)) logger.info(f'Done preparing tasks: split into {len(tasks)} task chunks') for r in pool.map(worker, tasks, callback=callback): pass
def worker(task): apogee_ids, worker_id, c, results_path, prior, tmpdir, global_rnd = task # This worker's results: results_filename = os.path.join(tmpdir, f'worker-{worker_id}.hdf5') metadata = QTable.read(c.metadata_path) rnd = global_rnd.seed(worker_id) logger.log(1, f"Worker {worker_id}: Creating TheJoker instance with {rnd}") prior = c.get_prior() joker = tj.TheJoker(prior, random_state=rnd) logger.debug(f"Worker {worker_id} on node {socket.gethostname()}: " f"{len(apogee_ids)} stars left to process") # Initialize to get packed column order: logger.log(1, f"Worker {worker_id}: Loading prior samples from cache " f"{c.prior_cache_file}") with h5py.File(c.tasks_path, 'r') as tasks_f: data = tj.RVData.from_timeseries(tasks_f[apogee_ids[0]]) joker_helper = joker._make_joker_helper(data) _slice = slice(0, c.max_prior_samples, 1) batch = read_batch(c.prior_cache_file, joker_helper.packed_order, slice_or_idx=_slice, units=joker_helper.internal_units) ln_prior = read_batch(c.prior_cache_file, ['ln_prior'], _slice)[:, 0] logger.log(1, f"Worker {worker_id}: Loaded {len(batch)} prior samples") for apogee_id in apogee_ids: if apogee_id not in metadata['APOGEE_ID']: logger.debug(f"{apogee_id} not found in metadata file!") continue with h5py.File(c.tasks_path, 'r') as tasks_f: data = tj.RVData.from_timeseries(tasks_f[apogee_id]) # Subtract out MAP sample, run on residual: metadata_row = metadata[metadata['APOGEE_ID'] == apogee_id] MAP_sample = extract_MAP_sample(metadata_row) orbit = MAP_sample.get_orbit(0) new_rv = data.rv - orbit.radial_velocity(data.t) data = tj.RVData(t=data.t, rv=new_rv, rv_err=data.rv_err) logger.debug(f"Worker {worker_id}: Running {apogee_id} " f"({len(data)} visits)") t0 = time.time() try: samples = joker.iterative_rejection_sample( data=data, n_requested_samples=c.requested_samples_per_star, prior_samples=batch, init_batch_size=250_000, growth_factor=32, randomize_prior_order=c.randomize_prior_order, return_logprobs=ln_prior, in_memory=True) except Exception as e: logger.warning(f"\t Failed sampling for star {apogee_id} " f"\n Error: {e}") continue dt = time.time() - t0 logger.debug(f"Worker {worker_id}: {apogee_id} ({len(data)} visits): " f"done sampling - {len(samples)} raw samples returned " f"({dt:.2f} seconds)") # Ensure only positive K values samples.wrap_K() with h5py.File(results_filename, 'a') as results_f: if apogee_id in results_f: del results_f[apogee_id] g = results_f.create_group(apogee_id) samples.write(g) result = {'tmp_filename': results_filename, 'joker_results_path': results_path, 'hostname': socket.gethostname(), 'worker_id': worker_id} return result
def main(c, prior, metadata_row, overwrite=False): mcmc_cache_path = os.path.join(c.run_path, 'mcmc') os.makedirs(mcmc_cache_path, exist_ok=True) apogee_id = metadata_row['APOGEE_ID'] this_cache_path = os.path.join(mcmc_cache_path, apogee_id) if os.path.exists(this_cache_path) and not overwrite: logger.info(f"{apogee_id} already done!") # Assume it's already done return # Set up The Joker: joker = tj.TheJoker(prior) # Load the data: logger.debug(f"{apogee_id}: Loading all data") allstar, allvisit = c.load_alldata() allstar = allstar[np.isin(allstar['APOGEE_ID'].astype(str), apogee_id)] allvisit = allvisit[np.isin(allvisit['APOGEE_ID'].astype(str), allstar['APOGEE_ID'].astype(str))] visits = allvisit[allvisit['APOGEE_ID'] == apogee_id] data = get_rvdata(visits) t0 = time.time() # Read MAP sample: MAP_sample = extract_MAP_sample(metadata_row) logger.log(1, f"{apogee_id}: MAP sample loaded") # Run MCMC: with joker.prior.model as model: logger.log(1, f"{apogee_id}: Setting up MCMC...") mcmc_init = joker.setup_mcmc(data, MAP_sample) logger.log(1, f"{apogee_id}: ...setup complete") if 'ln_prior' not in model.named_vars: ln_prior_var = None for k in joker.prior._nonlinear_equiv_units: var = model.named_vars[k] try: if ln_prior_var is None: ln_prior_var = var.distribution.logp(var) else: ln_prior_var = ln_prior_var + var.distribution.logp( var) except Exception as e: logger.warning("Cannot auto-compute log-prior value for " f"parameter {var}.") print(e) continue pm.Deterministic('ln_prior', ln_prior_var) logger.log(1, f"{apogee_id}: setting up ln_prior in pymc3 model") if 'logp' not in model.named_vars: pm.Deterministic('logp', model.logpt) logger.log(1, f"{apogee_id}: setting up logp in pymc3 model") logger.debug(f"{apogee_id}: Starting MCMC sampling") trace = pm.sample(start=mcmc_init, chains=4, cores=1, step=xo.get_dense_nuts_step(target_accept=0.95), tune=c.tune, draws=c.draws) pm.save_trace(trace, directory=this_cache_path, overwrite=True) logger.debug( "{apogee_id}: Finished MCMC sampling ({time:.2f} seconds)".format( apogee_id=apogee_id, time=time.time() - t0))
def worker(task): apogee_ids, worker_id, c = task logger.debug( f"Worker {worker_id}: {len(apogee_ids)} stars left to process") rows = [] units = None for apogee_id in apogee_ids: with h5py.File(c.tasks_path, 'r') as tasks_f: data = tj.RVData.from_timeseries(tasks_f[apogee_id]) with h5py.File(c.joker_results_path, 'r') as results_f: if apogee_id not in results_f: logger.warning("No samples for: {}".format(apogee_id)) return None, None # Load samples from The Joker and probabilities samples = tj.JokerSamples.read(results_f[apogee_id]) if len(samples) < 1: logger.warning("No samples for: {}".format(apogee_id)) return None, None row = dict() row['APOGEE_ID'] = apogee_id row['n_visits'] = len(data) MAP_idx = (samples['ln_prior'] + samples['ln_likelihood']).argmax() MAP_sample = samples[MAP_idx] for k in MAP_sample.par_names: row['MAP_' + k] = MAP_sample[k] row['t0_bmjd'] = MAP_sample.t0.tcb.mjd row['MAP_ln_likelihood'] = samples['ln_likelihood'][MAP_idx] row['MAP_ln_prior'] = samples['ln_prior'][MAP_idx] if len(samples) == c.requested_samples_per_star: row['joker_completed'] = True else: row['joker_completed'] = False if unimodal_P(samples, data): row['unimodal'] = True else: row['unimodal'] = False row['baseline'] = (data.t.mjd.max() - data.t.mjd.min()) * u.day row['max_phase_gap'] = max_phase_gap(MAP_sample, data) row['phase_coverage'] = phase_coverage(MAP_sample, data) row['periods_spanned'] = periods_spanned(MAP_sample, data) row['phase_coverage_per_period'] = phase_coverage_per_period( MAP_sample, data) # Use the max marginal likelihood sample _unit = data.rv.unit max_ll_sample = samples[samples['ln_likelihood'].argmax()] orbit = max_ll_sample.get_orbit() var = data.rv_err**2 + max_ll_sample['s']**2 ll = ln_normal( orbit.radial_velocity(data.t).to_value(_unit), data.rv.to_value(_unit), var.to_value(_unit**2)).sum() row['max_unmarginalized_ln_likelihood'] = ll # Compute the evidence p(D) for the Kepler model and for the constant RV row['constant_ln_evidence'] = constant_model_evidence(data) row['kepler_ln_evidence'] = ( logsumexp(samples['ln_likelihood'] + samples['ln_prior']) - np.log(len(samples))) if units is None: units = dict() for k in row.keys(): if hasattr(row[k], 'unit'): units[k] = row[k].unit for k in units: row[k] = row[k].value rows.append(row) tbl = QTable(rows) for k in units: tbl[k] = tbl[k] * units[k] return tbl