max_round = 100 if wl_type == 'random_': max_round = 2000 for ind, c_ecoc in enumerate(best_ecoc[1]): print('Training function {}...'.format(ind)) # TODO preprocess labels, so that labels match ecoc, {0, 1} -> {-1, 1} bin_label = util.generate_bin_label_from_ecoc(tr_data[1], c_ecoc) # TODO prepare distribution d = util.init_distribution(tr_n) thresh_cs = None if wl == ds.DecisionStump: # TODO precompute thresholds cheat sheet thresh_cs = util.pre_compute_threshes_8news(tr_data[0], bin_label, threshes) boost = b.Boosting(d) training_predict = np.zeros((1, tr_n)).tolist()[0] round_tr_err = [] round = 0 while round < max_round: st = time.time() # start ts round += 1 boost.add_model(wl, tr_data[0], bin_label, threshes, thresh_cs, True) boost.update_predict(tr_data[0], training_predict) c_model_err = boost.model[-1].w_err c_tr_err = util.get_err_from_predict(training_predict, bin_label) round_tr_err.append(c_tr_err) c_f_ind = boost.model[-1].f_ind c_thresh = boost.model[-1].thresh print('Time used: {}'.format(time.time() - st))
# serialize the best ecoc with open(ecoc_path, 'wb+') as f: pickle.dump(best_ecoc, f) print('Init ecoc done!') # train 20 boosts boosts = [] function_tr_err = [] max_round = 10 cs_dict = {} for ind, c_ecoc in enumerate(best_ecoc[1]): print('Generating cheat sheet {}...'.format(ind)) # TODO preprocess the labels # TODO preprocess labels, so that labels match ecoc, {0, 1} -> {-1, 1} bin_label = copy.deepcopy(tr_data[1]) for i in range(len(c_ecoc)): for j in range(len(bin_label)): if bin_label[j] == i: bin_label[j] = c_ecoc[i] if c_ecoc[i] == 1 else -1 # TODO precompute thresholds cheat sheet thresh_cs = util.pre_compute_threshes_8news(tr_data[0], tr_data[1], threshes) cs_dict[ind] = thresh_cs # early terminating if ind == 0: break with open(thresh_cs_path, 'wb+') as f: pickle.dump(cs_dict, f)