def skeptical_prediction(pid, tasks, queue, results, class_model, class_model_challenger=None): # export LD_PRELOAD=/usr/local/MATLAB/R2018b/sys/os/glnxa64/libstdc++.so.6.0.22 # QPBB_PATH_SERVER = ['/home/lab/ycarranz/QuadProgBB', '/opt/cplex128/cplex/matlab/x86-64_linux'] # QPBB_PATH_SERVER = ['/volper/users/ycarranz/QuadProgBB', '/volper/users/ycarranz/cplex128/cplex/matlab/x86-64_linux'] QPBB_PATH_SERVER = [] # executed in host try: model_skeptic = __create_dynamic_class(class_model, solver_matlab=False, gda_method="nda", add_path_matlab=QPBB_PATH_SERVER, DEBUG=False) while True: training = queue.get() if training is None: break MLCNCC.missing_labels_learn_data_set(learn_data_set=training["learn_data_set"], nb_labels=training["nb_labels"], missing_pct=training["missing_pct"]) MLCNCC.noise_labels_learn_data_set(learn_data_set=training["learn_data_set"], nb_labels=training["nb_labels"], noise_label_pct=training["noise_label_pct"], noise_label_type=training["noise_label_type"], noise_label_prob=training["noise_label_prob"]) # remove some keys of dict unused for learn method del training['noise_label_pct'] del training['noise_label_type'] del training['noise_label_prob'] del training["missing_pct"] model_skeptic.learn(learn_data_set=training["learn_data_set"], nb_labels=training["nb_labels"], ell_imprecision=training["ell_imprecision"]) while True: task = tasks.get() if task is None: break skeptical_inference, precise_inference, prec_prob_marginal = \ model_skeptic.evaluate(**task['kwargs'])[0] # probabilities Yi =1 probabilities_yi_eq_1 = np.array(prec_prob_marginal)[:, 1].copy() # print partial prediction results print("(pid, skeptical, precise, ground-truth, probabilities_yi_eq_1) ", pid, skeptical_inference, precise_inference, task['y_test'], probabilities_yi_eq_1, flush=True) results.append(dict({'skeptical': skeptical_inference, 'precise': precise_inference, 'y_eq_1_probabilities': probabilities_yi_eq_1, 'ground_truth': task['y_test']})) queue.task_done() except Exception as e: raise Exception(e, "Error in job of PID " + str(pid)) finally: print("Worker PID finished", pid, flush=True)
data.discretize(discmet='eqfreq', numint=5) dataset = 'yeast' nblab = 14 # We start by creating an instance of the base classifier we want to use print("Model creation and learning \n") from classifip.models.mlc import nccbr from classifip.models.mlc.mlcncc import MLCNCC model = nccbr.NCCBR() # Learning missing_pct = 0.0 # missing % percentage of values of label MLCNCC.missing_labels_learn_data_set(learn_data_set=data, nb_labels=nblab, missing_pct=missing_pct) model.learn(data, nblab) import numpy as np for s in np.arange(0.1, 1.5, 0.1): # Evaluation : we can set the parameters of the classifier test = model.evaluate([row[0:len(row) - nblab] for row in data.data[0:1]], ncc_epsilon=0.001, ncc_s_param=s) # The output is a list of probability intervals, we can print each instance : print( "Probability intervals obtained for each label on the first test instance \n", s)
def experiments_chaining_imprecise( in_path=None, out_path=None, seed=None, nb_kFold=10, nb_process=1, min_ncc_s_param=0.5, max_ncc_s_param=6.0, step_ncc_s_param=1.0, missing_pct=0.0, noise_label_pct=0.0, noise_label_type=-1, noise_label_prob=0.5, remove_features=None, scaling=False, strategy_chaining=IMLCStrategy.IMPRECISE_BRANCHING, safety_chaining=False): assert os.path.exists(in_path), "Without training data, not testing" assert os.path.exists(out_path), "File for putting results does not exist" logger = create_logger("computing_best_imprecise_mean", True) logger.info('Training dataset (%s, %s)', in_path, out_path) logger.info( "(min_ncc_s_param, max_ncc_s_param, step_ncc_s_param) (%s, %s, %s)", min_ncc_s_param, max_ncc_s_param, step_ncc_s_param) logger.info("(scaling, remove_features, process) (%s, %s, %s)", scaling, remove_features, nb_process) logger.info( "(missing_pct, noise_label_pct, noise_label_type, noise_label_prob) (%s, %s, %s, %s)", missing_pct, noise_label_pct, noise_label_type, noise_label_prob) logger.info("(strategy_chaining, safety_chaining) (%s, %s)", strategy_chaining, safety_chaining) # Seeding a random value for k-fold top learning-testing data if seed is None: seed = [random.randrange(sys.maxsize) for _ in range(nb_kFold)] logger.debug("[FIRST-STEP-SEED] SEED: %s", seed) # Create a CSV file for saving results file_csv = open(out_path, 'a') writer = csv.writer(file_csv) manager = ManagerWorkers(nb_process=nb_process) manager.executeAsync( class_model="classifip.models.mlc.chainncc.MLChaining") ich, cph, acc, acc_trans, avg_sols = dict(), dict(), dict(), dict(), dict() min_discretize, max_discretize = 5, 7 for nb_disc in range(min_discretize, max_discretize): data_learning = arff.ArffFile() data_learning.load(in_path) if remove_features is not None: for r_feature in remove_features: try: data_learning.remove_col(r_feature) except Exception as err: print("Remove feature error: {0}".format(err)) nb_labels = get_nb_labels_class(data_learning) if scaling: normalize(data_learning, n_labels=nb_labels) data_learning.discretize(discmet="eqfreq", numint=nb_disc) for time in range(nb_kFold): # 10-10 times cross-validation logger.info( "Number interval for discreteness and labels (%1d, %1d)." % (nb_disc, nb_labels)) cv_kfold = k_fold_cross_validation(data_learning, nb_kFold, randomise=True, random_seed=seed[time]) splits_s = list([]) for training, testing in cv_kfold: train_clone_data = training.make_clone() test_clone_data = testing.make_clone() MLCNCC.shuffle_labels_train_testing(train_clone_data, test_clone_data, nb_labels=nb_labels) logger.info("Splits %s train %s", len(training.data), training.data[0]) logger.info("Splits %s test %s", len(testing.data), testing.data[0]) splits_s.append((train_clone_data, test_clone_data)) disc = str(nb_disc) + "-" + str(time) ich[disc], cph[disc] = dict(), dict() acc_trans[disc], acc[disc] = dict(), dict() avg_sols[disc] = dict() for s_ncc in np.arange(min_ncc_s_param, max_ncc_s_param, step_ncc_s_param): ks_ncc = str(s_ncc) ich[disc][ks_ncc], cph[disc][ks_ncc] = 0, 0 acc[disc][ks_ncc], acc_trans[disc][ks_ncc] = 0, 0 avg_sols[disc][ks_ncc] = 0 for idx_fold, (training, testing) in enumerate(splits_s): res = computing_training_testing_step( training, testing, nb_labels, s_ncc, manager, strategy_chaining, safety_chaining, missing_pct, noise_label_pct, noise_label_type, noise_label_prob, ich[disc][ks_ncc], cph[disc][ks_ncc], acc[disc][ks_ncc], acc_trans[disc][ks_ncc], avg_sols[disc][ks_ncc]) ich[disc][ks_ncc], cph[disc][ks_ncc] = res[0], res[1] acc[disc][ks_ncc], acc_trans[disc][ks_ncc] = res[2], res[3] avg_sols[disc][ks_ncc] = res[4] logger.debug( "Partial-step-cumulative (acc, ich, acc_trans, avg_sols) (%s, %s, %s, %s)", acc[disc][ks_ncc], ich[disc][ks_ncc], acc_trans[disc][ks_ncc], avg_sols[disc][ks_ncc]) writer.writerow([ str(nb_disc), s_ncc, time, ich[disc][ks_ncc] / nb_kFold, cph[disc][ks_ncc] / nb_kFold, acc[disc][ks_ncc] / nb_kFold, acc_trans[disc][ks_ncc] / nb_kFold, avg_sols[disc][ks_ncc] / nb_kFold ]) file_csv.flush() logger.debug("Partial-s-k_step (%s, %s, %s, %s, %s, %s)", disc, s_ncc, time, ich[disc][ks_ncc] / nb_kFold, cph[disc][ks_ncc] / nb_kFold, acc_trans[disc][ks_ncc] / nb_kFold) manager.poisonPillTraining() file_csv.close() logger.debug("Results Final: %s, %s", ich, cph)
def skeptical_prediction(pid, tasks, queue, results, class_model, class_model_challenger=None): try: model_skeptic = __create_dynamic_class(class_model) while True: training = queue.get() if training is None: break nb_labels = training["nb_labels"] p_dimension_all = training.pop('p_dimension') + nb_labels global_data_continuous = training.pop('data_continuous') new_continuous_data = global_data_continuous.make_clone() new_continuous_data.data = list() for row_instance in training["learn_data_set"].data: row_index = row_instance.pop( p_dimension_all) # delete index raw instance by reference new_continuous_data.data.append( global_data_continuous.data[int(row_index)].copy()) # Missing and Noise labels if the percentage is greater than 0 MLCNCC.missing_labels_learn_data_set( learn_data_set=training["learn_data_set"], nb_labels=training["nb_labels"], missing_pct=training["missing_pct"]) MLCNCC.noise_labels_learn_data_set( learn_data_set=training["learn_data_set"], nb_labels=training["nb_labels"], noise_label_pct=training["noise_label_pct"], noise_label_type=training["noise_label_type"], noise_label_prob=training["noise_label_prob"]) # remove some keys of dict unused for learn method del training['noise_label_pct'] del training['noise_label_type'] del training['noise_label_prob'] del training["missing_pct"] model_skeptic.learn(learn_data_set=new_continuous_data, nb_labels=training["nb_labels"], learn_disc_set=training["learn_data_set"]) while True: task = tasks.get() if task is None: break # procedure to skeptic inference instance_test = task['kwargs']['test_dataset'][0] index_test = instance_test.pop(p_dimension_all) raw_instance_test = global_data_continuous.data[int( index_test)] inferences = model_skeptic.evaluate( test_dataset=[(raw_instance_test, instance_test)], ncc_s_param=task['kwargs']['ncc_s_param'], k=task['k_nearest'], laplace_smoothing=task['kwargs']['laplace_smoothing'])[0] set_prob_marginal = inferences[0] prec_prob_marginal = inferences[1] skeptical_inference = set_prob_marginal.multilab_dom() precise_inference = prec_prob_marginal.multilab_dom() # procedure to reject option epsilon_rejects = task["epsilon_rejects"] precise_rejects = dict() if epsilon_rejects is not None and len(epsilon_rejects) > 0: for epsilon_reject in epsilon_rejects: precise_reject = -2 * np.ones(nb_labels, dtype=int) all_idx = set(range(nb_labels)) probabilities_yi_eq_1 = prec_prob_marginal.scores[:, 0].copy( ) ones = set( np.where(probabilities_yi_eq_1 >= 0.5 + epsilon_reject)[0]) zeros = set( np.where(probabilities_yi_eq_1 <= 0.5 - epsilon_reject)[0]) stars = all_idx - ones - zeros precise_reject[list(stars)] = -1 precise_reject[list(zeros)] = 0 precise_reject[list(ones)] = 1 precise_rejects[str(epsilon_reject)] = precise_reject # print partial prediction results print( "(pid, skeptical, precise, precise_reject ground-truth) ", pid, skeptical_inference, precise_inference, precise_rejects, task['y_test'], flush=True) results.append( dict({ 'skeptical': skeptical_inference, 'precise': precise_inference, 'reject': precise_rejects, 'ground_truth': task['y_test'] })) queue.task_done() except Exception as e: raise Exception(e, "Error in job of PID " + str(pid)) finally: print("Worker PID finished", pid, flush=True)
def prediction(pid, tasks, queue, results, class_model, class_model_challenger=None): try: model = __create_dynamic_class(class_model) is_compared_with_precise = False # if model_challenger is None, thus comparing with precise version if class_model_challenger is None: class_model_challenger = class_model is_compared_with_precise = True model_challenger = __create_dynamic_class(class_model_challenger) while True: # training models training = queue.get() if training is None: break MLCNCC.missing_labels_learn_data_set( learn_data_set=training["learn_data_set"], nb_labels=training["nb_labels"], missing_pct=training["missing_pct"]) MLCNCC.noise_labels_learn_data_set( learn_data_set=training["learn_data_set"], nb_labels=training["nb_labels"], noise_label_pct=training["noise_label_pct"], noise_label_type=training["noise_label_type"], noise_label_prob=training["noise_label_prob"]) del training['noise_label_pct'] del training['noise_label_type'] del training['noise_label_prob'] del training["missing_pct"] model.learn(**training) if class_model_challenger is not None: model_challenger.learn(**training) while True: task = tasks.get() if task is None: break # prediction of main model prediction = model.evaluate(**task['kwargs']) # prediction challenger prediction_challenger = None if class_model_challenger is not None: if 'is_dynamic_context' in task[ 'kwargs'] and is_compared_with_precise: task['kwargs']['is_dynamic_context'] = False task['kwargs']['ncc_s_param'] = 0.0 if is_compared_with_precise \ else task['kwargs']['ncc_s_param'] task['kwargs']['ncc_epsilon'] = 0.0 if is_compared_with_precise \ else task['kwargs']['ncc_epsilon'] prediction_challenger = model_challenger.evaluate( **task['kwargs']) # print and save predictions print("(pid, prediction, ground-truth) ", pid, prediction[0] if len(prediction) > 1 else prediction, task['y_test'], flush=True) results.append( dict({ 'prediction': prediction, 'challenger': prediction_challenger, 'ground_truth': task['y_test'] })) queue.task_done() except Exception as e: raise Exception(e, "Error in job of PID " + pid) finally: print("Worker PID finished", pid, flush=True)
def skeptical_prediction(pid, tasks, queue, results, class_model, class_model_challenger=None): try: model_outer = __create_dynamic_class(class_model_challenger, has_imprecise_marginal=True) model_exact = __create_dynamic_class(class_model) while True: training = queue.get() if training is None: break MLCNCC.missing_labels_learn_data_set( learn_data_set=training["learn_data_set"], nb_labels=training["nb_labels"], missing_pct=training["missing_pct"]) MLCNCC.noise_labels_learn_data_set( learn_data_set=training["learn_data_set"], nb_labels=training["nb_labels"], noise_label_pct=training["noise_label_pct"], noise_label_type=training["noise_label_type"], noise_label_prob=training["noise_label_prob"]) # remove some keys of dict unused for learn method del training['noise_label_pct'] del training['noise_label_type'] del training['noise_label_prob'] del training['missing_pct'] model_outer.learn(**training) model_exact.learn(**training) while True: task = tasks.get() if task is None: break # naive and improvement exact maximality inference skeptical_inference = model_exact.evaluate(**task['kwargs'])[0] \ if task['do_inference_exact'] \ else [-1] * len(task['y_test']) # outer-approximation binary relevance set_prob_marginal = model_outer.evaluate(**task['kwargs'])[0] # precise and e-precise inference with s equal 0, epsilon > 0.0 task['kwargs']['ncc_s_param'] = 0.0 prec_prob_marginal = model_outer.evaluate(**task['kwargs'])[0] outer_inference = set_prob_marginal.multilab_dom() precise_inference = prec_prob_marginal.multilab_dom() probabilities_yi_eq_1 = prec_prob_marginal.scores[:, 0].copy() # print partial prediction results print( "(pid, skeptical, outer, precise, ground-truth, probabilities_yi_eq_1) ", pid, len(skeptical_inference), outer_inference, precise_inference, task['y_test'], probabilities_yi_eq_1, flush=True) results.append( dict({ 'skeptical': skeptical_inference, 'outer': outer_inference, 'precise': precise_inference, 'y_eq_1_probabilities': probabilities_yi_eq_1, 'ground_truth': task['y_test'] })) queue.task_done() except Exception as e: raise Exception(e, "Error in job of PID " + str(pid)) finally: print("Worker PID finished", pid, flush=True)
def skeptical_prediction(pid, tasks, queue, results, class_model, class_model_challenger=None): try: model_skeptic = __create_dynamic_class(class_model, DEBUG=True) while True: training = queue.get() if training is None: break MLCNCC.missing_labels_learn_data_set( learn_data_set=training["learn_data_set"], nb_labels=training["nb_labels"], missing_pct=training["missing_pct"]) MLCNCC.noise_labels_learn_data_set( learn_data_set=training["learn_data_set"], nb_labels=training["nb_labels"], noise_label_pct=training["noise_label_pct"], noise_label_type=training["noise_label_type"], noise_label_prob=training["noise_label_prob"]) # remove some keys of dict unused for learn method del training['noise_label_pct'] del training['noise_label_type'] del training['noise_label_prob'] del training["missing_pct"] model_skeptic.learn(learn_data_set=training["learn_data_set"], nb_labels=training["nb_labels"], nb_lassos_models=41, min_gamma=0.01, max_gamma=1, nb_process=int(training["nb_labels"] / 2)) while True: task = tasks.get() if task is None: break # skeptical inference with binary relevance credal_set, precise_probability = model_skeptic.evaluate( **task['kwargs'])[0] skeptical_inference = credal_set.multilab_dom() # precise inference by label precise_inference = [] probabilities_yi_eq_1 = [] for prob_label in precise_probability: precise_inference.append(prob_label.getmaximaldecision()) probabilities_yi_eq_1.append(prob_label.proba[1]) # print("==================================================", flush=True) # print("----->", skeptical_inference, precise_inference, flush=True) # print("----->", credal_set, flush=True) # print("----->", probabilities_yi_eq_1, flush=True) # print("==================================================", flush=True) # print partial prediction results print( "(pid, skeptical, precise, ground-truth, probabilities_yi_eq_1) ", pid, skeptical_inference, precise_inference, task['y_test'], probabilities_yi_eq_1, flush=True) results.append( dict({ 'skeptical': skeptical_inference, 'precise': precise_inference, 'y_eq_1_probabilities': np.array(probabilities_yi_eq_1), 'ground_truth': task['y_test'] })) queue.task_done() except Exception as e: raise Exception(e, "Error in job of PID " + str(pid)) finally: print("Worker PID finished", pid, flush=True)