def test_alpha_values_single_pair(alpha_cr: float, alpha_cp: float, label_cr, label_cp): run_costs = dict() labels = [label_cr, label_cp] pairs = [(label_cr, label_cp)] alpha_labels = {pair: [alpha_cr, alpha_cp] for pair in pairs} for train_index, test_index in k_fold.split(train_x): train = train_x[train_index] test = train_x[test_index] classifiers = learn_classifiers(dataset, train, labels, 10, train_index=train_index) posterior_probabilities = compute_posterior_probabilities(dataset, test, labels, classifiers) train_y = dict() test_y = dict() for label in labels: train_y[label] = np.asarray(dataset.target[train_index, dataset.target_names.searchsorted(label)].todense()).squeeze() test_y[label] = np.asarray(dataset.target[test_index, dataset.target_names.searchsorted(label)].todense()).squeeze() prior_probabilities, _ = compute_prevalence(labels, train_y) costs = Costs(cost_structure_1, pairs, posterior_probabilities, test_y) minecore = MineCore(pairs, prior_probabilities, posterior_probabilities, test_y, alpha_labels, 1.0, 1.0) tau_rs, tau_ps, _, cm_3 = minecore.run_plusplus(costs) for key, value in costs.get_third_phase_costs(cm_3, tau_rs, tau_ps)[0].items(): prec_val = run_costs.setdefault(key, {(alpha_cr, alpha_cp): 0}) run_costs[key][(alpha_cr, alpha_cp)] = prec_val[(alpha_cr, alpha_cp)] + value logging.info(f"\nRun costs for alpha {(alpha_cr, alpha_cp)}:\n{run_costs}\n") return run_costs
def show_automatic_costs(): before_costs = Costs(cost_structure_1, pairs, classifier_posteriors, quarter_y_arr) before_emq_costs = before_costs.get_automatic_costs() after_costs = Costs(cost_structure_1, pairs, emq_posteriors, quarter_y_arr) after_emq_costs = after_costs.get_automatic_costs() show_delta_costs_graph(before_emq_costs[0], after_emq_costs[0]) print( f"Before EMQ Costs: {before_emq_costs}\nAfter EMQ Costs: {after_emq_costs}" )
labels.add(cr) labels.add(cp) pos_prevalences, neg_prevalences = compute_prevalence( labels, training_y_arr) def save(mc, costs, name): tau_rs, tau_ps, cm_2, cm_3 = mc.run(costs) with open(name, 'wb') as f: pickle.dump([tau_rs, tau_ps, cm_2, cm_3], f) # Before EMQ # costs = Costs(cost_structure_1, pairs, posterior_probs, quarter_y_arr) # mc = MineCore(pairs, posterior_probs, quarter_y_arr) # t1 = threading.Thread(target=save, args=(mc, costs, "before_emq.pkl")) # t1.start() # After EMQ new_posteriors, pos_priors = emq_new_attempt(posterior_probs, pos_prevalences, labels) with open('./pickles/newemq_posteriors_0607.pkl', 'wb') as f: pickle.dump(new_posteriors, f) # emq_better_posteriors = get_emq_better_posteriors(labels, quarter_y_arr, posterior_probs, new_posteriors, lambda m: m['TNM'] + m['TPM']) costs = Costs(cost_structure_1, pairs, new_posteriors, quarter_y_arr) mc = MineCore(pairs, None, new_posteriors, quarter_y_arr, None, 1, 1) # # # t1.join() save(mc, costs, "after_newemq_0607.pkl")
# classifiers = learn_classifiers(dataset, train_x, labels, 10, training_set_end=TRAINING_SET_END) # posterior_probabilities = compute_posterior_probabilities(dataset, dataset.data[TEST_SET_START:TEST_SET_END, :], labels, classifiers) prior_probabilities, neg_priors = compute_prevalence(labels, train_y) # new_priors = dict() # for label in labels: # print(f"Updating probabilities for label: {label}") # _, em_pos, em_neg = emq_attempt(posterior_probabilities[label], prior_probabilities[label], neg_priors[label]) # new_priors[label] = em_pos # alphas = dict(map(lambda kv: (kv[0], (1.0, 1.0)), alphas.items())) ro_r = 0.50 ro_p = 0.99 costs = Costs(cost_structure_1, pairs, posterior_probabilities, quarter_y_arr, alphas=alphas, prior_probabilities=prior_probabilities, ro_r=ro_r, ro_p=ro_p) minecore = MineCore(pairs, prior_probabilities, posterior_probabilities, quarter_y_arr, alphas, ro_r, ro_p) def run_and_save(run_func, costs, file_name): tau_rs, tau_ps, cm_2, cm_3 = run_func(costs) with open(file_name, 'wb') as f: pickle.dump([tau_rs, tau_ps, cm_2, cm_3], f) p1 = Process( target=run_and_save,
('C31', 'C15'), ('C31', 'ECAT'), ('C31', 'C21'), ('C31', 'M14'), ('C181', 'C151'), ('C181', 'GCAT'), ('C181', 'C152'), ('C181', 'C15'), ('C181', 'C17'), ('M141', 'ECAT'), ('M141', 'GCAT'), ('M141', 'C24'), ('M141', 'C31'), ('M141', 'C21'), ('M11', 'ECAT'), ('M11', 'C152'), ('M11', 'M132'), ('M11', 'M13'), ('M11', 'CCAT'), ('E21', 'C31'), ('E21', 'M12'), ('E21', 'MCAT'), ('E21', 'E12'), ('E21', 'GPOL'), ('C17', 'MCAT'), ('C17', 'C152'), ('C17', 'C15'), ('C17', 'C18'), ('C17', 'ECAT'), ('M13', 'E21'), ('M13', 'M11'), ('M13', 'GCAT'), ('M13', 'E12'), ('M13', 'ECAT'), ('C18', 'E12'), ('C18', 'GCAT'), ('C18', 'C152'), ('C18', 'C15'), ('C18', 'C17'), ('GPOL', 'MCAT'), ('GPOL', 'CCAT'), ('GPOL', 'GCRIM'), ('GPOL', 'E21'), ('GPOL', 'GVIO'), ('C152', 'M11'), ('C152', 'C17'), ('C152', 'C31'), ('C152', 'C181'), ('C152', 'C18'), ('M14', 'M132'), ('M14', 'M13'), ('M14', 'GCAT'), ('M14', 'C24'), ('M14', 'C31'), ('C151', 'C181'), ('C151', 'C18'), ('C151', 'C17'), ('C151', 'C31'), ('C151', 'C152'), ('ECAT', 'GVIO'), ('ECAT', 'C17'), ('ECAT', 'M13'), ('ECAT', 'GPOL'), ('ECAT', 'MCAT')] labels = set() for cr, cp in pairs: labels.add(cr) labels.add(cp) fake_post = fake_posteriors() costs = Costs(cost_structure_1, pairs, fake_post, quarter_y_arr) mc = MineCore(pairs, fake_post, quarter_y_arr) def save(mc, costs, name): tau_rs, tau_ps, cm_2, cm_3 = mc.run_plusplus(costs) with open(name, 'wb') as f: pickle.dump([tau_rs, tau_ps, cm_2, cm_3], f) save(mc, costs, "with_fk_posteriors.pkl")
('E21', 'E12'), ('E21', 'GPOL'), ('C17', 'MCAT'), ('C17', 'C152'), ('C17', 'C15'), ('C17', 'C18'), ('C17', 'ECAT'), ('M13', 'E21'), ('M13', 'M11'), ('M13', 'GCAT'), ('M13', 'E12'), ('M13', 'ECAT'), ('C18', 'E12'), ('C18', 'GCAT'), ('C18', 'C152'), ('C18', 'C15'), ('C18', 'C17'), ('GPOL', 'MCAT'), ('GPOL', 'CCAT'), ('GPOL', 'GCRIM'), ('GPOL', 'E21'), ('GPOL', 'GVIO'), ('C152', 'M11'), ('C152', 'C17'), ('C152', 'C31'), ('C152', 'C181'), ('C152', 'C18'), ('M14', 'M132'), ('M14', 'M13'), ('M14', 'GCAT'), ('M14', 'C24'), ('M14', 'C31'), ('C151', 'C181'), ('C151', 'C18'), ('C151', 'C17'), ('C151', 'C31'), ('C151', 'C152'), ('ECAT', 'GVIO'), ('ECAT', 'C17'), ('ECAT', 'M13'), ('ECAT', 'GPOL'), ('ECAT', 'MCAT')] labels = set() for cr, cp in pairs: labels.add(cr) labels.add(cp) with open('./pickles/post_prob.pkl', 'rb') as f: posterior_probabilities = pickle.load(f) with open('./pickles/emq_posteriors.pkl', 'rb') as f: emq_posteriors = pickle.load(f) costs = Costs(cost_structure_1, pairs, posterior_probabilities, quarter_y_arr) mc = MineCore(pairs, emq_posteriors, posterior_probabilities, quarter_y_arr) result = mc.run_plusplus(costs) with open('./pickles/test120319_autoonly.pkl', 'wb') as f: pickle.dump(result, f)
i].todense()).squeeze() labels = set() for cr, cp in pairs: labels.add(cr) labels.add(cp) prior_probabilities, neg_priors = compute_prevalence( labels, training_y_arr) ro_r = 0.50 ro_p = 0.99 # alphas = dict(map(lambda kv: (kv[0], (1.0, 1.0)), alphas.items())) costs = Costs(cost_structure_1, pairs, classifier_posteriors, quarter_y_arr, alphas=alphas, prior_probabilities=prior_probabilities, ro_r=ro_r, ro_p=ro_p) minecore = MineCore(pairs, prior_probabilities, classifier_posteriors, quarter_y_arr, alphas, ro_r, ro_p) costs_results_after = costs.get_third_phase_costs(res_after[3], res_after[0], res_after[1]) costs_results_before = costs.get_third_phase_costs(res_before[3], res_before[0], res_before[1]) overall_costs_after = costs_results_after[0] overall_costs_before = costs_results_before[0]
y_arr[label] = np.asarray(dataset.target[ mask, dataset.target_names.searchsorted(label)].todense()).squeeze() training_y[label] = np.asarray(dataset.target[ train_idxs, dataset.target_names.searchsorted(label)].todense()).squeeze() prob[label] = prob[label][mask] pos_prev, neg_prev = compute_prevalence(labels, training_y) print("Computing EMQ") for label in labels: post, pos_prior, neg_prior = emq_attempt(prob[label], pos_prev[label], neg_prev[label]) emq_posteriors[label] = post costs = Costs(cost_structure_1, pairs, emq_posteriors, y_arr) minecore = MineCore(pairs, None, emq_posteriors, y_arr, None, 1.0, 1.0) print("Running standard Minecore") # standard_results = minecore.run(costs) # minecore.posterior_probabilities = emq_posteriors print("Running EMQ Minecore") emq_results = minecore.run(costs) print("Saving results") # with open('./pickles/minecore_al_standard_0307.pkl', 'wb') as f: # pickle.dump(standard_results, f) with open('./pickles/minecore_al_emq_0307.pkl', 'wb') as f: