Ejemplo n.º 1
0
def show_automatic_costs():
    before_costs = Costs(cost_structure_1, pairs, classifier_posteriors,
                         quarter_y_arr)
    before_emq_costs = before_costs.get_automatic_costs()

    after_costs = Costs(cost_structure_1, pairs, emq_posteriors, quarter_y_arr)
    after_emq_costs = after_costs.get_automatic_costs()
    show_delta_costs_graph(before_emq_costs[0], after_emq_costs[0])

    print(
        f"Before EMQ Costs: {before_emq_costs}\nAfter EMQ Costs: {after_emq_costs}"
    )
Ejemplo n.º 2
0
def test_alpha_values_single_pair(alpha_cr: float, alpha_cp: float, label_cr, label_cp):
    run_costs = dict()
    labels = [label_cr, label_cp]
    pairs = [(label_cr, label_cp)]
    alpha_labels = {pair: [alpha_cr, alpha_cp] for pair in pairs}
    for train_index, test_index in k_fold.split(train_x):
        train = train_x[train_index]
        test = train_x[test_index]
        classifiers = learn_classifiers(dataset, train, labels, 10, train_index=train_index)
        posterior_probabilities = compute_posterior_probabilities(dataset, test, labels, classifiers)

        train_y = dict()
        test_y = dict()

        for label in labels:
            train_y[label] = np.asarray(dataset.target[train_index, dataset.target_names.searchsorted(label)].todense()).squeeze()
            test_y[label] = np.asarray(dataset.target[test_index, dataset.target_names.searchsorted(label)].todense()).squeeze()

        prior_probabilities, _ = compute_prevalence(labels, train_y)
        costs = Costs(cost_structure_1, pairs, posterior_probabilities, test_y)
        minecore = MineCore(pairs, prior_probabilities, posterior_probabilities, test_y, alpha_labels, 1.0, 1.0)
        tau_rs, tau_ps, _, cm_3 = minecore.run_plusplus(costs)
        for key, value in costs.get_third_phase_costs(cm_3, tau_rs, tau_ps)[0].items():
            prec_val = run_costs.setdefault(key, {(alpha_cr, alpha_cp): 0})
            run_costs[key][(alpha_cr, alpha_cp)] = prec_val[(alpha_cr, alpha_cp)] + value
    logging.info(f"\nRun costs for alpha {(alpha_cr, alpha_cp)}:\n{run_costs}\n")
    return run_costs
Ejemplo n.º 3
0
        labels.add(cr)
        labels.add(cp)

    pos_prevalences, neg_prevalences = compute_prevalence(
        labels, training_y_arr)

    def save(mc, costs, name):
        tau_rs, tau_ps, cm_2, cm_3 = mc.run(costs)
        with open(name, 'wb') as f:
            pickle.dump([tau_rs, tau_ps, cm_2, cm_3], f)

    # Before EMQ
    # costs = Costs(cost_structure_1, pairs, posterior_probs, quarter_y_arr)
    # mc = MineCore(pairs, posterior_probs, quarter_y_arr)
    # t1 = threading.Thread(target=save, args=(mc, costs, "before_emq.pkl"))
    # t1.start()

    # After EMQ
    new_posteriors, pos_priors = emq_new_attempt(posterior_probs,
                                                 pos_prevalences, labels)

    with open('./pickles/newemq_posteriors_0607.pkl', 'wb') as f:
        pickle.dump(new_posteriors, f)

    # emq_better_posteriors = get_emq_better_posteriors(labels, quarter_y_arr, posterior_probs, new_posteriors, lambda m: m['TNM'] + m['TPM'])
    costs = Costs(cost_structure_1, pairs, new_posteriors, quarter_y_arr)
    mc = MineCore(pairs, None, new_posteriors, quarter_y_arr, None, 1, 1)
    #
    # # t1.join()
    save(mc, costs, "after_newemq_0607.pkl")
Ejemplo n.º 4
0
# classifiers = learn_classifiers(dataset, train_x, labels, 10, training_set_end=TRAINING_SET_END)
# posterior_probabilities = compute_posterior_probabilities(dataset, dataset.data[TEST_SET_START:TEST_SET_END, :], labels, classifiers)
prior_probabilities, neg_priors = compute_prevalence(labels, train_y)
# new_priors = dict()
# for label in labels:
#     print(f"Updating probabilities for label: {label}")
#     _, em_pos, em_neg = emq_attempt(posterior_probabilities[label], prior_probabilities[label], neg_priors[label])
#     new_priors[label] = em_pos

# alphas = dict(map(lambda kv: (kv[0], (1.0, 1.0)), alphas.items()))
ro_r = 0.50
ro_p = 0.99
costs = Costs(cost_structure_1,
              pairs,
              posterior_probabilities,
              quarter_y_arr,
              alphas=alphas,
              prior_probabilities=prior_probabilities,
              ro_r=ro_r,
              ro_p=ro_p)
minecore = MineCore(pairs, prior_probabilities, posterior_probabilities,
                    quarter_y_arr, alphas, ro_r, ro_p)


def run_and_save(run_func, costs, file_name):
    tau_rs, tau_ps, cm_2, cm_3 = run_func(costs)
    with open(file_name, 'wb') as f:
        pickle.dump([tau_rs, tau_ps, cm_2, cm_3], f)


p1 = Process(
    target=run_and_save,
         ('C31', 'C15'), ('C31', 'ECAT'), ('C31', 'C21'), ('C31', 'M14'), ('C181', 'C151'), ('C181', 'GCAT'),
         ('C181', 'C152'), ('C181', 'C15'), ('C181', 'C17'), ('M141', 'ECAT'), ('M141', 'GCAT'), ('M141', 'C24'),
         ('M141', 'C31'), ('M141', 'C21'), ('M11', 'ECAT'), ('M11', 'C152'), ('M11', 'M132'), ('M11', 'M13'),
         ('M11', 'CCAT'), ('E21', 'C31'), ('E21', 'M12'), ('E21', 'MCAT'), ('E21', 'E12'), ('E21', 'GPOL'),
         ('C17', 'MCAT'), ('C17', 'C152'), ('C17', 'C15'), ('C17', 'C18'), ('C17', 'ECAT'), ('M13', 'E21'),
         ('M13', 'M11'), ('M13', 'GCAT'), ('M13', 'E12'), ('M13', 'ECAT'), ('C18', 'E12'), ('C18', 'GCAT'),
         ('C18', 'C152'), ('C18', 'C15'), ('C18', 'C17'), ('GPOL', 'MCAT'), ('GPOL', 'CCAT'), ('GPOL', 'GCRIM'),
         ('GPOL', 'E21'), ('GPOL', 'GVIO'), ('C152', 'M11'), ('C152', 'C17'), ('C152', 'C31'), ('C152', 'C181'),
         ('C152', 'C18'), ('M14', 'M132'), ('M14', 'M13'), ('M14', 'GCAT'), ('M14', 'C24'), ('M14', 'C31'), ('C151', 'C181'),
         ('C151', 'C18'), ('C151', 'C17'), ('C151', 'C31'), ('C151', 'C152'), ('ECAT', 'GVIO'), ('ECAT', 'C17'),
         ('ECAT', 'M13'), ('ECAT', 'GPOL'), ('ECAT', 'MCAT')]

labels = set()
for cr, cp in pairs:
    labels.add(cr)
    labels.add(cp)

fake_post = fake_posteriors()
costs = Costs(cost_structure_1, pairs, fake_post, quarter_y_arr)
mc = MineCore(pairs, fake_post, quarter_y_arr)


def save(mc, costs, name):
    tau_rs, tau_ps, cm_2, cm_3 = mc.run_plusplus(costs)
    with open(name, 'wb') as f:
        pickle.dump([tau_rs, tau_ps, cm_2, cm_3], f)


save(mc, costs, "with_fk_posteriors.pkl")

Ejemplo n.º 6
0
         ('E21', 'E12'), ('E21', 'GPOL'), ('C17', 'MCAT'), ('C17', 'C152'),
         ('C17', 'C15'), ('C17', 'C18'), ('C17', 'ECAT'), ('M13', 'E21'),
         ('M13', 'M11'), ('M13', 'GCAT'), ('M13', 'E12'), ('M13', 'ECAT'),
         ('C18', 'E12'), ('C18', 'GCAT'), ('C18', 'C152'), ('C18', 'C15'),
         ('C18', 'C17'), ('GPOL', 'MCAT'), ('GPOL', 'CCAT'), ('GPOL', 'GCRIM'),
         ('GPOL', 'E21'), ('GPOL', 'GVIO'), ('C152', 'M11'), ('C152', 'C17'),
         ('C152', 'C31'), ('C152', 'C181'), ('C152', 'C18'), ('M14', 'M132'),
         ('M14', 'M13'), ('M14', 'GCAT'), ('M14', 'C24'), ('M14', 'C31'),
         ('C151', 'C181'), ('C151', 'C18'), ('C151', 'C17'), ('C151', 'C31'),
         ('C151', 'C152'), ('ECAT', 'GVIO'), ('ECAT', 'C17'), ('ECAT', 'M13'),
         ('ECAT', 'GPOL'), ('ECAT', 'MCAT')]

labels = set()
for cr, cp in pairs:
    labels.add(cr)
    labels.add(cp)

with open('./pickles/post_prob.pkl', 'rb') as f:
    posterior_probabilities = pickle.load(f)

with open('./pickles/emq_posteriors.pkl', 'rb') as f:
    emq_posteriors = pickle.load(f)

costs = Costs(cost_structure_1, pairs, posterior_probabilities, quarter_y_arr)
mc = MineCore(pairs, emq_posteriors, posterior_probabilities, quarter_y_arr)

result = mc.run_plusplus(costs)

with open('./pickles/test120319_autoonly.pkl', 'wb') as f:
    pickle.dump(result, f)
Ejemplo n.º 7
0
    y_arr[label] = np.asarray(dataset.target[
        mask, dataset.target_names.searchsorted(label)].todense()).squeeze()
    training_y[label] = np.asarray(dataset.target[
        train_idxs,
        dataset.target_names.searchsorted(label)].todense()).squeeze()
    prob[label] = prob[label][mask]

pos_prev, neg_prev = compute_prevalence(labels, training_y)

print("Computing EMQ")
for label in labels:
    post, pos_prior, neg_prior = emq_attempt(prob[label], pos_prev[label],
                                             neg_prev[label])
    emq_posteriors[label] = post

costs = Costs(cost_structure_1, pairs, emq_posteriors, y_arr)
minecore = MineCore(pairs, None, emq_posteriors, y_arr, None, 1.0, 1.0)

print("Running standard Minecore")
# standard_results = minecore.run(costs)

# minecore.posterior_probabilities = emq_posteriors

print("Running EMQ Minecore")
emq_results = minecore.run(costs)

print("Saving results")
# with open('./pickles/minecore_al_standard_0307.pkl', 'wb') as f:
#     pickle.dump(standard_results, f)

with open('./pickles/minecore_al_emq_0307.pkl', 'wb') as f: