def _get_1d_initialisation_point(y, scalar, bounds=None): N= y.size init = dict( theta=0.75, mu_single=np.min([np.median(y, axis=0), 10]), sigma_single=0.2, sigma_multiple=0.5) if bounds is not None: for k, (lower, upper) in bounds.items(): if not (upper >= init[k] >= lower): init[k] = np.mean([upper, lower]) bounds = bounds.copy() else: bounds = dict() lower_mu_multiple = np.log(init["mu_single"] + scalar * init["sigma_single"]) \ + init["sigma_multiple"]**2 init["mu_multiple"] = 1.1 * lower_mu_multiple bounds.update(mu_multiple=[lower_mu_multiple, 1000]) op_kwds = dict(x0=utils._pack_params(**init), args=(y, 1)) nlp = lambda params, y, L: -utils.ln_prob(y, L, *params, bounds=bounds) with warnings.catch_warnings(): warnings.simplefilter("ignore") p_opt = op.minimize(nlp, **op_kwds) keys = ("theta", "mu_single", "sigma_single", "mu_multiple", "sigma_multiple") init_dict = utils._check_params_dict(init) op_dict = utils._check_params_dict(dict(zip(keys, utils._unpack_params(p_opt.x)))) # Only return valid init values. valid_inits = [] for init in (init_dict, op_dict): if np.isfinite(nlp(utils._pack_params(**init), y, 1)): valid_inits.append(init) valid_inits.append("random") return valid_inits
def sp_swarm(*sp_indices, **kwargs): logger.info("Running single processor swarm") print(sp_indices) with tqdm.tqdm(sp_indices, total=len(sp_indices)) as pbar: for index in sp_indices: j = np.where(npm_indices == index)[0][0] if done[j]: print(f"Skipping {j} {index} because done") continue print(f"Kwargs {kwargs}") _, result, meta = optimize_mixture_model(index, **kwargs) pbar.update() done[j] = True if result is not None: npm_results[j] = utils._pack_params(**result) return None
def sp_swarm(*sp_indices, **kwargs): logger.info("Running single processor swarm") with tqdm.tqdm(sp_indices, total=len(sp_indices)) as pbar: for index in sp_indices: npm_index = np.where(npm_indices == index)[0] #print(f"{index}, {npm_index}") if done[npm_index]: print("Skipping because done") continue _, result, meta = optimize_mixture_model(index, **kwargs) #print(f"result: {result}") pbar.update() done[npm_index] = True if result is not None: npm_results[npm_index] = utils._pack_params(**result) return None
def label_excess(y, p_opt, label_index): y = np.atleast_1d(y) _, s_mu, s_sigma, __, ___ = utils._unpack_params(utils._pack_params(**p_opt)) assert s_mu.size == y.size, "The size of y should match the size of mu" excess = np.sqrt(y[label_index]**2 - s_mu[label_index]**2) significance = excess/s_sigma[label_index] return (excess, significance)
def sp_swarm(*sp_indices, **kwargs): logger.info("Running single processor swarm") with tqdm.tqdm(sp_indices, total=len(sp_indices)) as pbar: for j, index in enumerate(sp_indices): if done[j]: continue _, result, meta = optimize_mixture_model(index, **kwargs) pbar.update() done[j] = True if result is not None: npm_results[j] = utils._pack_params(**result) return None
def get_initialization_point(y): N, D = y.shape ok = y <= np.mean(y) init_dict = dict( theta=0.5, mu_single=np.median(y[ok], axis=0), sigma_single=0.1 * np.median(y[ok], axis=0), sigma_multiple=0.1 * np.ones(D), ) # mu_multiple is *highly* constrained. Select the mid-point between what is # OK: mu_multiple_ranges = np.array([ np.log(init_dict["mu_single"] + 1 * init_dict["sigma_single"]) + init_dict["sigma_multiple"]**2, np.log(init_dict["mu_single"] + 5 * init_dict["sigma_single"]) + pow(init_dict["sigma_multiple"], 2) ]) init_dict["mu_multiple"] = np.mean(mu_multiple_ranges, axis=0) #init_dict["mu_multiple_uv"] = 0.5 * np.ones(D) x0 = utils._pack_params(**init_dict) op_kwds = dict(x0=x0, args=(y, D)) p_opt = op.minimize(nlp, **op_kwds) init_dict = dict(zip( ("theta", "mu_single", "sigma_single", "mu_multiple", "sigma_multiple"), utils._unpack_params(p_opt.x))) init_dict["mu_multiple_uv"] = 0.5 * np.ones(D) init_dict = utils._check_params_dict(init_dict) return init_dict
def optimize_mixture_model(index, inits=None, debug=False): suppress = config.get("suppress_stan_output", True) # Select indices and get data. d, nearby_idx, meta = npm.query_around_point( kdt, X[index], **kdt_kwds) y = Y[nearby_idx] ball = X[nearby_idx] if inits is None: inits = npm._get_1d_initialisation_point( y, scalar=mu_multiple_scalar, bounds=bounds) # Update meta dictionary with things about the data. meta = dict(max_log_y=np.log(np.max(y)), N=nearby_idx.size, y_percentiles=np.percentile(y, [16, 50, 84]), ball_ptps=np.ptp(ball, axis=0), ball_medians=np.median(ball, axis=0), init_points=inits, kdt_indices=nearby_idx) data_dict = dict(y=y, N=y.size, scalar=mu_multiple_scalar) data_dict.update(stan_bounds) p_opts = [] ln_probs = [] for j, init_dict in enumerate(inits): opt_kwds = dict(init=init_dict, data=data_dict, as_vector=False) opt_kwds.update(default_opt_kwds) # Do optimization. # TODO: Suppressing output is always dangerous. with stan.suppress_output(suppress) as sm: try: p_opt = model.optimizing(**opt_kwds) except: logger.exception(f"Exception occurred when optimizing index {index}"\ f" from {init_dict}:") else: if p_opt is not None: p_opts.append(p_opt["par"]) ln_probs.append( utils.ln_prob( y, 1, *utils._pack_params(**p_opt["par"]), bounds=bounds)) assert abs(ln_probs[-1] - p_opt["value"]) < 1e-8 try: p_opt except UnboundLocalError: logger.warning("Stan failed. STDOUT & STDERR:") logger.warning("\n".join(sm.outputs)) else: if p_opt is None: stdout, stderr = sm.outputs logger.warning("Stan only returned p_opt = None") logger.warning(f"STDOUT:\n{stdout}\nSTDERR:\n{stderr}") if len(p_opts) < 1: logger.warning(f"Optimization on index {index} did not converge"\ "from any initial point trialled. Consider "\ "relaxing the optimization tolerances! If this "\ "occurs regularly then something is very wrong!") return (index, None, meta) else: # evaluate best. idx = np.argmax(ln_probs) p_opt = p_opts[idx] meta["init_idx"] = idx """ # Calculate uncertainties. op_bounds = () def nlp(p): w, mu_s, sigma_s, sigma_m = p mu_m = np.log(mu_s + mu_multiple_scalar * sigma_s) + sigma_m**2 if not (bounds["theta"][1] >= w >= bounds["theta"][0]) \ or not (bounds["mu_single"][1] >= mu_s >= bounds["mu_single"][0]) \ or not (bounds["sigma_multiple"][1] >= sigma_m >= bounds["sigma_multiple"][0]): return np.inf return -utils.ln_likelihood(y, w, mu_s, sigma_s, mu_m, sigma_m) op_bounds = [bounds["theta"], bounds["mu_single"], bounds["sigma_single"], bounds["sigma_multiple"], ] #x0 = utils._pack_params(**p_opt) x0 = (p_opt["theta"], p_opt["mu_single"], p_opt["sigma_single"], p_opt["sigma_multiple"]) p_opt2 = op.minimize(nlp, x0, bounds=op_bounds, method="L-BFGS-B") """ # Create a three-panel figure showing: # (1) a log-density of the HRD + the selected ball points # (2) a log-density of colour vs apparent magnitude + the selected ball points # (3) the jitter + fitted parameters if sampling: chains = 2 # TODO: move to config file. sampling_kwds = dict(data=opt_kwds["data"], init=[p_opt] * chains, chains=chains) try: samples = model.sampling(**sampling_kwds) except: None else: extracted = samples.extract() chains = np.array( [extracted[k] for k in samples.flatnames]).T latex_labels = dict( theta=r"$w$", mu_single=r"$\mu_\mathrm{single}$", sigma_single=r"$\sigma_\mathrm{single}$", mu_multiple=r"$\mu_\mathrm{multiple}$", sigma_multiple=r"$\sigma_\mathrm{multiple}$") corner_fig = corner.corner( chains, labels=[ latex_labels[k] for k in samples.flatnames ]) source_id = S[index] figure_path = os.path.join( figures_dir, f"{model_name}-{source_id}-samples.png") corner_fig.savefig(figure_path, dpi=150) chains_path = os.path.join( figures_dir, f"{model_name}-{source_id}-chains.pkl") dump = dict(names=samples.flatnames, chains=chains, y=y, ball=ball, X=X[index]) with open(chains_path, "wb") as fp: pickle.dump(dump, fp) plt.close("all") if plot_mixture_model_figures: source_id = S[index] figure_path = os.path.join( figures_dir, f"{model_name}-{source_id}.png") x_upper = 2 * config["models"][model_name]["bounds"][ "mu_single"][1] bins = np.linspace(0, x_upper, 51) xi = np.linspace(0, x_upper, 1000) y_s = utils.norm_pdf(xi, p_opt["mu_single"], p_opt["sigma_single"], p_opt["theta"]) y_m = utils.lognorm_pdf(xi, p_opt["mu_multiple"], p_opt["sigma_multiple"], p_opt["theta"]) items_for_deletion = [ axes[0].scatter(ball.T[0], ball.T[1], c="tab:blue", s=1, zorder=10, alpha=0.5), axes[1].scatter(ball.T[0], ball.T[2], c="tab:blue", s=1, zorder=10, alpha=0.5), axes[2].hist(y, bins=bins, facecolor="#cccccc", density=True, zorder=-1)[-1], axes[2].axvline(Y[index], c="#666666"), axes[2].plot(xi, y_s, c="tab:blue"), axes[2].fill_between(xi, np.zeros_like(y_s), y_s, facecolor="tab:blue", alpha=0.25), axes[2].plot(xi, y_m, c="tab:red"), axes[2].fill_between(xi, np.zeros_like(y_m), y_m, facecolor="tab:red", alpha=0.25), ] # Ax limits. axes[0].set_xlim(-0.5, 5) axes[0].set_ylim(10, -15) axes[1].set_xlim(-0.5, 5) axes[1].set_ylim(15, 3) axes[2].set_xlim(0, x_upper) axes[2].set_yticks([]) fig.tight_layout() fig.savefig(figure_path, dpi=150) for item in items_for_deletion: try: item.set_visible(False) except AttributeError: for _ in item: if hasattr(_, "set_visible"): _.set_visible(False) if debug: # Create raise a return (index, p_opt, meta)
while not np.all(done): # Check for output. try: r = out_queue.get(timeout=30) except mp.queues.Empty: logger.info("No npm_results") break else: j, index, result, meta = r done[j] = True if result is not None: npm_results[j] = utils._pack_params(**result) pbar.update(1) # Do not use bad results. # Bad results include: # - Things that are so clearly discrepant in every parameter. # - Things that are on the edge of the boundaries of parameter space. tol_sigma = model_config["tol_sum_sigma"] tol_proximity = model_config["tol_proximity"] parameter_names = ("theta", "mu_single", "sigma_single", "mu_multiple", "sigma_multiple")