Esempio n. 1
0
def nbest(metric, folder):
    nbest_coefs = np.arange(500, 4999, 100)
    metric = sorted(metric, key=lambda x: x[0])
    metric_f1 = []
    metric_n_feat = []
    print(folder + ': classifying N BEST')
    timer.set_new()
    for i in range(len(nbest_coefs)):
        frame.progress((i + 1) / len(nbest_coefs))
        indexes_metric = [x[1] for x in metric[-nbest_coefs[i]:]]
        metric_data = trim.trim_data(data, indexes_metric)
        metric_data_valid = trim.trim_data(data_valid, indexes_metric)
        metric_f1.append(
            metrics.f1_score(labels_valid,
                             classify(metric_data, metric_data_valid, labels)))
        metric_n_feat.append(len(indexes_metric))
    print(' DONE in ' + timer.get_diff_str())
    dump.dump_object(nbest_coefs, folder + '/nbest/svm/coefs.dump')
    dump.dump_object(metric_f1, folder + '/nbest/svm/f1.dump')
    dump.dump_object(metric_n_feat, folder + '/nbest/svm/feat.dump')

    metric_cls = [(nbest_coefs[i], metric_f1[i])
                  for i in range(len(nbest_coefs))]
    metric_coef_max = max(metric_cls, key=lambda x: x[1])[0]
    indexes_metric = [x[1]
                      for x in metric[-metric_coef_max:]]  # to eiler's diagram
    dump.dump_object(indexes_metric, folder + '/nbest/max/indexes.dump')
Esempio n. 2
0
 def cond_entropy_full(x, y):
     from util.frame import progress
     print('Information gain: computing conditional entropy:')
     feat_len = len(x)
     result = []
     for i in range(feat_len):
         result.append(cond_entropy(x[i], y))
         if i % 10 == 0:
             progress((i + 1) / feat_len)
     progress(1)
     return np.asarray(result)
Esempio n. 3
0
 def correlation(x, y):
     from util.frame import progress
     print('Pearson: computing corellation coefficients:')
     feat_len = len(x)
     result = []
     for i in range(feat_len):
         result.append(feature_correlation(x[i], y))
         if i % 10 == 0:
             progress((i + 1) / feat_len)
     progress(1)
     return np.asarray(result)
Esempio n. 4
0
score = metrics.f1_score(labels_valid, classify(data, data_valid, labels))
print(score)
print()
dump.dump_object(score, 'score.dump')

# INFO GAIN
if INFO_GAIN:
    ig = dump.load_object('ig/ig.dump')
    ig_coefs = np.arange(0.1, 0.91, 0.01)
    ig_f1 = []
    ig_n_feat = []
    print('Information Gain: classifying on different coefficients')
    timer.set_new()
    for i in range(len(ig_coefs)):
        frame.progress((i + 1) / len(ig_coefs))
        trimmed_ig = [x for x in ig if x[0] > ig_coefs[i]]
        indexes_ig = [x[1] for x in trimmed_ig]
        ig_data = trim.trim_data(data, indexes_ig)
        ig_data_valid = trim.trim_data(data_valid, indexes_ig)
        ig_f1.append(
            metrics.f1_score(labels_valid,
                             classify(ig_data, ig_data_valid, labels)))
        ig_n_feat.append(len(indexes_ig))
    print(' DONE in ' + timer.get_diff_str())
    dump.dump_object(ig_coefs, 'ig/svm/coefs.dump')
    dump.dump_object(ig_f1, 'ig/svm/f1.dump')
    dump.dump_object(ig_n_feat, 'ig/svm/feat.dump')

    ig_cls = [(ig_coefs[i], ig_f1[i]) for i in range(len(ig_coefs))]
    ig_coef_max = max(ig_cls, key=lambda x: x[1])[0]
Esempio n. 5
0
    images_test = images_to_np_array(test_images[2])
    labels_test = labels_to_np_array(test_labels[1])
    rang_test = len(images_test)

    def classify():
        predicted = network.predict(images_test)
        predicted = get_predicted(predicted)
        return accuracy_score(test_labels[1], predicted)

    network = NeuralNetwork(1, 1, 1)
    images_train = images_to_np_array(train_images[2])
    labels_train = labels_to_np_array(train_labels[1])

    cycles = 10
    print('Training...')
    progress(0)
    timer = Timer()
    rang = list(range(150, 250, 10))
    for j in range(len(rang)):
        if not rang[j] in stats_x:
            np.random.seed(1)
            network = NeuralNetwork(image_size[0] * image_size[1], 300, 10)
            for i in range(cycles):
                randoms = np.random.randint(0, 60000, rang[j])
                network.train(images_train[randoms], labels_train[randoms],
                              0.1)
                if i % 1 == 0:
                    progress((j * cycles + i + 1) / (cycles * len(rang)))
            stats_x.append(rang[j])
            stats_y.append(classify())
    progress(1)
Esempio n. 6
0
    network = load_object('network.dump')
    print(classify())
else:
    images_train = images_to_np_array(train_images[2])
    labels_train = labels_to_np_array(train_labels[1])
    stats = []
    if NETWORK_CONTINUE:
        network = load_object('network.dump')
        stats = load_object('stats.dump')
    else:
        network = NeuralNetwork(image_size[0] * image_size[1], 10, 10)
    rang_train = len(images_train)
    print('Training...')
    cycles = 0
    timer = Timer()
    progress(0)
    for i in range(cycles):
        network.train(images_train, labels_train)
        dump_object(network, 'network.dump')
        dump_object(stats, 'stats.dump')
        progress((i+1) / cycles)
        stats.append(classify())
    print(' DONE in ', timer.get_diff_str())
    import pylab as pt
    x, y = [0], [0]
    step = 25
    for i in range(len(stats) // step):
        x.append(i * step + step)
        selection = stats[i*step:i*step + step]
        y.append(sum(selection) / step)
    pt.plot(range(len(stats)), stats)