def nbest(metric, folder): nbest_coefs = np.arange(500, 4999, 100) metric = sorted(metric, key=lambda x: x[0]) metric_f1 = [] metric_n_feat = [] print(folder + ': classifying N BEST') timer.set_new() for i in range(len(nbest_coefs)): frame.progress((i + 1) / len(nbest_coefs)) indexes_metric = [x[1] for x in metric[-nbest_coefs[i]:]] metric_data = trim.trim_data(data, indexes_metric) metric_data_valid = trim.trim_data(data_valid, indexes_metric) metric_f1.append( metrics.f1_score(labels_valid, classify(metric_data, metric_data_valid, labels))) metric_n_feat.append(len(indexes_metric)) print(' DONE in ' + timer.get_diff_str()) dump.dump_object(nbest_coefs, folder + '/nbest/svm/coefs.dump') dump.dump_object(metric_f1, folder + '/nbest/svm/f1.dump') dump.dump_object(metric_n_feat, folder + '/nbest/svm/feat.dump') metric_cls = [(nbest_coefs[i], metric_f1[i]) for i in range(len(nbest_coefs))] metric_coef_max = max(metric_cls, key=lambda x: x[1])[0] indexes_metric = [x[1] for x in metric[-metric_coef_max:]] # to eiler's diagram dump.dump_object(indexes_metric, folder + '/nbest/max/indexes.dump')
def cond_entropy_full(x, y): from util.frame import progress print('Information gain: computing conditional entropy:') feat_len = len(x) result = [] for i in range(feat_len): result.append(cond_entropy(x[i], y)) if i % 10 == 0: progress((i + 1) / feat_len) progress(1) return np.asarray(result)
def correlation(x, y): from util.frame import progress print('Pearson: computing corellation coefficients:') feat_len = len(x) result = [] for i in range(feat_len): result.append(feature_correlation(x[i], y)) if i % 10 == 0: progress((i + 1) / feat_len) progress(1) return np.asarray(result)
score = metrics.f1_score(labels_valid, classify(data, data_valid, labels)) print(score) print() dump.dump_object(score, 'score.dump') # INFO GAIN if INFO_GAIN: ig = dump.load_object('ig/ig.dump') ig_coefs = np.arange(0.1, 0.91, 0.01) ig_f1 = [] ig_n_feat = [] print('Information Gain: classifying on different coefficients') timer.set_new() for i in range(len(ig_coefs)): frame.progress((i + 1) / len(ig_coefs)) trimmed_ig = [x for x in ig if x[0] > ig_coefs[i]] indexes_ig = [x[1] for x in trimmed_ig] ig_data = trim.trim_data(data, indexes_ig) ig_data_valid = trim.trim_data(data_valid, indexes_ig) ig_f1.append( metrics.f1_score(labels_valid, classify(ig_data, ig_data_valid, labels))) ig_n_feat.append(len(indexes_ig)) print(' DONE in ' + timer.get_diff_str()) dump.dump_object(ig_coefs, 'ig/svm/coefs.dump') dump.dump_object(ig_f1, 'ig/svm/f1.dump') dump.dump_object(ig_n_feat, 'ig/svm/feat.dump') ig_cls = [(ig_coefs[i], ig_f1[i]) for i in range(len(ig_coefs))] ig_coef_max = max(ig_cls, key=lambda x: x[1])[0]
images_test = images_to_np_array(test_images[2]) labels_test = labels_to_np_array(test_labels[1]) rang_test = len(images_test) def classify(): predicted = network.predict(images_test) predicted = get_predicted(predicted) return accuracy_score(test_labels[1], predicted) network = NeuralNetwork(1, 1, 1) images_train = images_to_np_array(train_images[2]) labels_train = labels_to_np_array(train_labels[1]) cycles = 10 print('Training...') progress(0) timer = Timer() rang = list(range(150, 250, 10)) for j in range(len(rang)): if not rang[j] in stats_x: np.random.seed(1) network = NeuralNetwork(image_size[0] * image_size[1], 300, 10) for i in range(cycles): randoms = np.random.randint(0, 60000, rang[j]) network.train(images_train[randoms], labels_train[randoms], 0.1) if i % 1 == 0: progress((j * cycles + i + 1) / (cycles * len(rang))) stats_x.append(rang[j]) stats_y.append(classify()) progress(1)
network = load_object('network.dump') print(classify()) else: images_train = images_to_np_array(train_images[2]) labels_train = labels_to_np_array(train_labels[1]) stats = [] if NETWORK_CONTINUE: network = load_object('network.dump') stats = load_object('stats.dump') else: network = NeuralNetwork(image_size[0] * image_size[1], 10, 10) rang_train = len(images_train) print('Training...') cycles = 0 timer = Timer() progress(0) for i in range(cycles): network.train(images_train, labels_train) dump_object(network, 'network.dump') dump_object(stats, 'stats.dump') progress((i+1) / cycles) stats.append(classify()) print(' DONE in ', timer.get_diff_str()) import pylab as pt x, y = [0], [0] step = 25 for i in range(len(stats) // step): x.append(i * step + step) selection = stats[i*step:i*step + step] y.append(sum(selection) / step) pt.plot(range(len(stats)), stats)