def run(filename, train_sample, train_label, test_sample, test_label, title, M, thresh, CART_step): train_sample, train_sample_size = Load.loadSample(train_sample) train_label, train_label_size = Load.loadLabel(train_label) assert train_sample_size == train_label_size, 'train_sample_size does not match train_label_size' test_sample, test_sample_size = Load.loadSample(test_sample) test_label, test_label_size = Load.loadLabel(test_label) assert test_sample_size == test_label_size, 'test_sample_size does not match test_label_size' train_sample = Preprocess.normalize(train_sample, True).values.tolist() # list test_sample = Preprocess.normalize(test_sample, True).values.tolist() # list label_to_index = { label: index for index, label in enumerate(set(train_label['x'].tolist())) } train_index = Preprocess.labelMap(train_label, label_to_index) # list test_index = Preprocess.labelMap(test_label, label_to_index) # list input_size = len(train_sample[0]) sample_size = len(train_sample) sample_weights = [1 / sample_size for _ in range(sample_size)] classifier_weights = [] classifier_thresholds = [] threshold_positions = [] test_corrs = [] test_times = [i + 1 for i in range(M)] for i in range(M): threshold, position, errors = Calc.CART(train_sample, train_index, sample_weights, thresh, CART_step) total_error = Calc.gentleError(np.array(sample_weights), np.array(errors)) classifier_weights.append(round(Calc.classifierError(total_error), 3)) classifier_thresholds.append(threshold) threshold_positions.append(position) sample_weights = Calc.updateVariableWeights(np.array(sample_weights), total_error, errors) # print('errors: {}'.format(errors)) # print('sample_weights: {}'.format(sample_weights)) # print('classifier_threshold: {} in {}'.format(threshold, position)) print('total_error: {}'.format(total_error)) print('threshold_positions: {}'.format(threshold_positions)) print('classifier_thresholds: {}'.format(classifier_thresholds)) print('classifier_weights: {}'.format(classifier_weights)) test_corr = 0 test_size = len(test_sample) for sample, index in zip(test_sample, test_index): vote = 0 for threshold, position, weight in zip(classifier_thresholds, threshold_positions, classifier_weights): if sample[position] >= threshold: vote += weight elif sample[position] < threshold: vote -= weight if vote >= 0 and index == 1: test_corr += 1 elif vote < 0 and index == 0: test_corr += 1 test_corrs.append(round(test_corr / test_size, 3)) Log.log(filename, 'M: {}; correction: {}\n'.format(M, test_corrs[-1])) print( '-----------------thresh: {}; CART_step: {}; iter: {}-----------------' .format(thresh, CART_step, i + 1)) Graph.draw(filename, test_times, test_corrs, test_times[-1], 1.0, title) return test_corrs
# print(list(label_to_index.keys())[1]) nn_decision = 1 else: # print(list(label_to_index.keys())[0]) nn_decision = 0 # print(test_label.values.tolist()[i][0]) if test_label.values.tolist()[i][0] == list( label_to_index.keys())[nn_decision]: # right correct_count += 1 test_correct = correct_count / test_sample_size Log.log(filename, 'k: {}; correct rate: {}\n'.format(k, test_correct)) return test_correct if __name__ == '__main__': Log.clearLog(filename) rec = [] for k in range(1, 101): rec.append(run(train_sample, train_label, test_sample, test_label, k)) maxi = max(rec) mini = min(rec) avrg = sum(rec) / len(rec) stdv = np.std(rec, ddof=1) Log.log( filename, 'max: {}; min: {}; average: {}; std: {}'.format( maxi, mini, avrg, stdv)) Graph.draw(filename, [i for i in range(1, 101)], rec, 100, 1.0, graph_name)