kde_on_X_test_log_likelihood = np.sum(kde_on_X.score_samples(X_test)) normal_dist_on_X = stats.multivariate_normal(mean=np.mean(X_train, axis=0), cov=np.cov(X_train.T)) normal_dist_on_X_test_log_likelihood = np.sum(normal_dist_on_X.logpdf(X_test)) print("X shape: ", X.shape) initial_dags = [ random_graph.random_dag(X.shape[1], max_deg) for i in range(0, 1) ] kernel = 'gaussian' print("kde_on_X_test_log_likelihood: ", kde_on_X_test_log_likelihood) print("normal dist test log likelihood: ", normal_dist_on_X_test_log_likelihood) #placeholders. Phillip, pls pick good values for these initial_temp = kde_bayesian_net_log_likelihood.bayesian_net_log_likelihood( X_train, X_test, kernel, 0, 0, negative=True)(initial_dags[0]) #generally seems to be the case that the baseline log likelihood as at the least half the intial temperature final_temp = initial_temp / 2 alpha = (initial_temp - final_temp) / 5000 opt_dag = simulated_annealing.simulated_annealing(initial_dags[0], \ initial_temp,\ final_temp,\ alpha, \ kde_bayesian_net_log_likelihood.bayesian_net_log_likelihood(X_train, X_test, kernel, 0, 0, negative = True),\ simulated_annealing_dag.degree_constrained_neighbors_func(max_deg),\ print_iters = 10) kde_on_X_test_log_likelihood = np.sum(np.log(kde_on_X.evaluate(X_test.T)))
def get_P_binary(proj_sep, delta_v_trans, num_sys=100000, method='kde', kde_method='sklearn'): """ This function calculates the probability of a random star having the observed proper motion Parameters ---------- proj_sep : float Projected separation between two stars delta_v_trans : float Transverse velocity difference between two stars method : string Method to perform 2D interpolation (options:kde) kde_method : string Which KDE algorithm to use (options: scipy, sklearn) Returns ------- P(proj_sep, delta_v_trans) : float Probability that angular separation, pm separation is due to a genuine binary """ # Catalog check global binary_set if binary_set is None: generate_binary_set(num_sys=num_sys) if method is 'kde': # Use a Gaussian KDE global binary_kde #if binary_kde is None: binary_kde = gaussian_kde((binary_set["proj_sep"], binary_set["delta_v_trans"])) # We work in log space for the set of binaries if kde_method is not 'sklearn' and kde_method is not 'scipy': print("Must use a valid kde algorithm: options are 'sklearn' and 'scipy'") print("NOTE: sklean's KDE is the Lotus to scipy's 3-cylinder Pinto") return if binary_kde is None: if kde_method is 'sklearn': kwargs = {'kernel':'tophat'} binary_kde = KernelDensity(bandwidth=0.1, **kwargs) binary_kde.fit( np.array([np.log10(binary_set['proj_sep']), np.log10(binary_set['delta_v_trans'])]).T ) else: binary_kde = gaussian_kde((np.log10(binary_set["proj_sep"]), np.log10(binary_set["delta_v_trans"]))) if isinstance(delta_v_trans, np.ndarray) and isinstance(proj_sep, np.ndarray): if kde_method is 'sklearn': values = np.array([np.log10(proj_sep), np.log10(delta_v_trans)]).T prob_binary = np.exp(binary_kde.score_samples(values)) else: values = np.array([np.log10(proj_sep), np.log10(delta_v_trans)]) prob_binary = binary_kde.evaluate(values) elif isinstance(delta_v_trans, np.ndarray): if kde_method is 'sklearn': values = np.array([np.log10(proj_sep)*np.ones(len(delta_v_trans)), np.log10(delta_v_trans)]).T prob_binary = np.exp(binary_kde.score_samples(values)) else: values = np.array([np.log10(proj_sep)*np.ones(len(delta_v_trans)), np.log10(delta_v_trans)]) prob_binary = binary_kde.evaluate(values) else: if kde_method is 'sklearn': prob_binary = np.exp(binary_kde.score_samples([np.log10(proj_sep), np.log10(delta_v_trans)])) else: prob_binary = binary_kde.evaluate([np.log10(proj_sep), np.log10(delta_v_trans)]) else: print("You must input an appropriate method.") print("Options: 'kde' only") return # Convert back from log10-space to linear-space # the log(10) terms convert from log10 to ln prob_binary = prob_binary / (proj_sep*np.log(10.)) / (delta_v_trans*np.log(10.)) return prob_binary
def get_P_binary(proj_sep, delta_v_trans, num_sys=100000, method='kde', kde_method='sklearn'): """ This function calculates the probability of a random star having the observed proper motion Parameters ---------- proj_sep : float Projected separation between two stars delta_v_trans : float Transverse velocity difference between two stars method : string Method to perform 2D interpolation (options:kde) kde_method : string Which KDE algorithm to use (options: scipy, sklearn) Returns ------- P(proj_sep, delta_v_trans) : float Probability that angular separation, pm separation is due to a genuine binary """ # Catalog check global binary_set if binary_set is None: generate_binary_set(num_sys=num_sys) if method is 'kde': # Use a Gaussian KDE global binary_kde #if binary_kde is None: binary_kde = gaussian_kde((binary_set["proj_sep"], binary_set["delta_v_trans"])) # We work in log space for the set of binaries if kde_method is not 'sklearn' and kde_method is not 'scipy': print( "Must use a valid kde algorithm: options are 'sklearn' and 'scipy'" ) print( "NOTE: sklean's KDE is the Lotus to scipy's 3-cylinder Pinto") return if binary_kde is None: if kde_method is 'sklearn': kwargs = {'kernel': 'tophat'} binary_kde = KernelDensity(bandwidth=0.1, **kwargs) binary_kde.fit( np.array([ np.log10(binary_set['proj_sep']), np.log10(binary_set['delta_v_trans']) ]).T) else: binary_kde = gaussian_kde( (np.log10(binary_set["proj_sep"]), np.log10(binary_set["delta_v_trans"]))) if isinstance(delta_v_trans, np.ndarray) and isinstance( proj_sep, np.ndarray): if kde_method is 'sklearn': values = np.array( [np.log10(proj_sep), np.log10(delta_v_trans)]).T prob_binary = np.exp(binary_kde.score_samples(values)) else: values = np.array( [np.log10(proj_sep), np.log10(delta_v_trans)]) prob_binary = binary_kde.evaluate(values) elif isinstance(delta_v_trans, np.ndarray): if kde_method is 'sklearn': values = np.array([ np.log10(proj_sep) * np.ones(len(delta_v_trans)), np.log10(delta_v_trans) ]).T prob_binary = np.exp(binary_kde.score_samples(values)) else: values = np.array([ np.log10(proj_sep) * np.ones(len(delta_v_trans)), np.log10(delta_v_trans) ]) prob_binary = binary_kde.evaluate(values) else: if kde_method is 'sklearn': prob_binary = np.exp( binary_kde.score_samples( [np.log10(proj_sep), np.log10(delta_v_trans)])) else: prob_binary = binary_kde.evaluate( [np.log10(proj_sep), np.log10(delta_v_trans)]) else: print("You must input an appropriate method.") print("Options: 'kde' only") return # Convert back from log10-space to linear-space # the log(10) terms convert from log10 to ln prob_binary = prob_binary / (proj_sep * np.log(10.)) / (delta_v_trans * np.log(10.)) return prob_binary