def __init__(self, α: np.ndarray, z: np.ndarray, bor: float): """ :param α: sufficient statistics of the posterior Dirichlet density on model/family frequencies :param z: posterior probabilities for each subject to belong to each model/family :param bor: Bayesian omnibus risk p(y|H0)/(p(y|H0)+p(y|H1)) """ self.attribution = z.copy() self.frequency_mean = dirichlet.mean(α) self.frequency_var = dirichlet.var(α) self.exceedance_probability = exceedance_probability(dirichlet(α)) self.protected_exceedance_probability = self.exceedance_probability * ( 1 - bor) + bor / len(α) # (7)
def test_frozen_dirichlet(): np.random.seed(2846) n = np.random.randint(1, 32) alpha = np.random.uniform(10e-10, 100, n) d = dirichlet(alpha) assert_equal(d.var(), dirichlet.var(alpha)) assert_equal(d.mean(), dirichlet.mean(alpha)) assert_equal(d.entropy(), dirichlet.entropy(alpha)) num_tests = 10 for i in range(num_tests): x = np.random.uniform(10e-10, 100, n) x /= np.sum(x) assert_equal(d.pdf(x[:-1]), dirichlet.pdf(x[:-1], alpha)) assert_equal(d.logpdf(x[:-1]), dirichlet.logpdf(x[:-1], alpha))
def splitCargoes(alpha_s, n_customer, N, V, Cat, IT, IT_num, nv, Container_vehicle, locationtuples, RP, A, nmaps, dist, historical_routes, historical_routes_tuples): # Drichlet distribution of plans multinomial_ed_s = [] for alpha_s_inside in alpha_s: drichlet_mean_s = dirichlet.mean(alpha_s_inside) multinomial_ed_s.append(multinomial.rvs(300, drichlet_mean_s, size=1)) # Choosing Cargoes final_set = [] for cargoewclassiterator in range(0, len(multinomial_ed_s)): sorted_cargoes = [] for maxfinder in range(max(multinomial_ed_s[cargoewclassiterator][0]), -1, -1): if maxfinder > -1: finded_cargoes = np.where( multinomial_ed_s[cargoewclassiterator][0] == maxfinder) sorted_cargoes.append(finded_cargoes[0]) first_set = [] for hh in sorted_cargoes: for ff in hh: try: first_set.append(IT[cargoewclassiterator][ff]) except: pass final_set.append([first_set]) # Cargoes for vehicles set_for_vehicles = [] for vehicle_num in range(0, nv): init_set = [] for custom_num in range(0, n_customer): init_set.append( final_set[custom_num][0] [vehicle_num * round(len(final_set[custom_num][0]) / nv):vehicle_num * round(len(final_set[custom_num][0]) / nv) + round(len(final_set[custom_num][0]) / nv)]) set_for_vehicles.append(init_set) return set_for_vehicles, final_set
def findRoutPlans(alpha, n_customer, N, V, Cat, IT, IT_num, nv, Container_vehicle, locationtuples, RP, A, nmaps, dist, historical_routes, historical_routes_tuples): # Drichlet distribution of routes multinomial_ed = [] for alpha_inside in alpha: drichlet_mean = dirichlet.mean(alpha_inside) multinomial_ed.append(multinomial.rvs(300, drichlet_mean, size=RP * 2)) # Finding Edges init_final_path = [] final_path = [] for eachroute in multinomial_ed: semi_final_path = [] for routplansiterator in range(0, len(eachroute)): sorted_edges = [] for maxifinder in range(max(eachroute[routplansiterator]), -1, -1): finded = np.where(eachroute[routplansiterator] == maxifinder) sorted_edges.append(finded[0]) first_path = [] for sedge in sorted_edges: for seg in sedge: # Checking for duplicate collection points if len(first_path) > 0: first_elements = [i for i, j in first_path] second_elements = [j for i, j in first_path] if A[seg][0] not in first_elements and A[seg][ 1] not in second_elements: first_path.append(A[seg]) else: first_path.append(A[seg]) if len(first_path) > n_customer: semi_final_path.append(first_path[0:(n_customer + 2)]) # Removing invalid routes numfailed = [] for rpiterator in range(0, len(semi_final_path)): starting_point = 0 for jkl in semi_final_path[rpiterator]: starting_point = starting_point + 1 for lkj in semi_final_path[rpiterator][starting_point:]: if (jkl[1] == lkj[0] and jkl[0] == lkj[1]): numfailed.append(rpiterator) clear_paths = [ i for n, i in enumerate(semi_final_path) if n not in numfailed ] init_final_path.append(clear_paths) # Choosing different paths for every vehicle between existing rouyte plans d = 0 final_path = [] for i in range(0, len(init_final_path)): a = init_final_path[i][d] final_path.append(a) for j in range(i + 1, min(i + 2, len(init_final_path))): p = [] for k in range(0, len(init_final_path[j])): p.append(len(set(a) & set(init_final_path[j][k]))) d = p.index(min(p)) return final_path
def compute_LPV_from_parameters(alpha_vector): M = dlt.mean(alpha_vector) V = dlt.var(alpha_vector) LPV = M - 1.65 * np.sqrt(V) # 5-percentile return np.where(LPV < 0, 0, LPV)
# ============================================================================= # Settings plot_priors = False n_samples = 10000 # Observed Data count_obs = OrderedDict({'id1': 87, 'id2': 34, 'id3': 1}) counts = np.array(list(count_obs.values()), dtype=int) dirichlet_prior = np.ones_like( counts) # uninformative prior based on pseudo-counts dirichlet_posterior = dirichlet_prior + counts prior_samples = get_samples(dirichlet_prior) posterior_samples = get_samples(dirichlet_posterior) print('prior means: %s' % (str(dlt.mean(dirichlet_prior)))) PoM = dlt.mean(dirichlet_posterior) print('posterior means: %s' % (str(PoM))) PoV = dlt.var(dirichlet_posterior) print('posterior variances: %s' % (str(PoV))) print('naive posterior means: %s' % ((counts + 1) / np.sum(counts + 1)) ) # expected from value counts plus assumed prior counts print('Entropy DLT prior:', dlt.entropy(dirichlet_prior)) print('Entropy DLT posterior:', dlt.entropy(dirichlet_posterior)) if plot_priors: plt.figure(figsize=(9, 6)) for i, label in enumerate(count_obs.keys()): ax = plt.hist(prior_samples[:, i], bins=50, density=True,
def mean(self): # What is the point of this?? return dirichlet.mean(self.alpha)