예제 #1
0
                    d = distance(point, cluster["mean"])
                    ds.append(d)

                minId = ds.index(min(ds))

                if point.clusterId != minId:
                    self.clusters[point.clusterId]["cluster"].remove(point)
                    self.clusters[minId]["cluster"].add(point)
                    point.clusterId = minId
                    stop = False

            self._reCalculateMean()


if __name__ == "__main__":

    ks, ps, fs = [], [], []

    for k in xrange(5, 26):
        points = readPoints("dataset1.dat", "dataset1-label.dat")
        kmeans = KmeansClustering(points, k)

        kmeans.clustering()
        ks.append(k)
        ps.append(purity(kmeans.clusters, kmeans.points))
        fs.append(fscore(kmeans.clusters, kmeans.points))

    print "\t".join([str(i) for i in ks])
    print "\t".join([str(i) for i in ps])
    print "\t".join([str(i) for i in fs])
예제 #2
0
                    cluster['cluster'].add(p)
            self.clusters.append(cluster)


if __name__ == '__main__':

    es, ps, fs = [], [], []

    for i in xrange(1, 10):
        points = readPoints('dataset2.dat', 'dataset2-label.dat')
        dbscan = DBscanClustering(points, i, 100)
        dbscan.clustering()

        es.append(i)
        ps.append(purity(dbscan.clusters, dbscan.points))
        fs.append(fscore(dbscan.clusters, dbscan.points))

    print '\t'.join([str(i) for i in es])
    print '\t'.join([str(i) for i in ps])
    print '\t'.join([str(i) for i in fs])

    print "\n\n"

    ms, ps, fs = [], [], []

    for i in xrange(10, 200, 10):
        points = readPoints('dataset2.dat', 'dataset2-label.dat')
        dbscan = DBscanClustering(points, 6, i)
        dbscan.clustering()

        ms.append(i)
예제 #3
0
def main(unused_argv):
    train_dataset, validate_dataset, test_dataset = input.input(
        shuffle_files=False)
    #Text information
    info = tf.constant([
        "Batch size = %s" % f.FLAGS.batch_size,
        "Epochs = %s" % f.FLAGS.num_epochs,
        "Learning rate = %s" % f.FLAGS.learning_rate,
        "Batch normalization = No",
        "Window size = %s" % f.FLAGS.window_size, "Shuffle Files = No",
        "CNN model = %s" % f.FLAGS.cnn_model, "Shuffle Samples = YES"
    ])
    with tf.name_scope('input'):
        x = tf.placeholder(tf.float32, [
            None, input.SAMPLE_DEPTH, input.SAMPLE_HEIGHT, input.SAMPLE_WIDTH
        ])
        y_ = tf.placeholder(tf.float32, [None, 2])
        dropout_rate = tf.placeholder(tf.float32)
        is_training = tf.placeholder(tf.bool)

    with tf.name_scope('logits'):
        if f.FLAGS.cnn_model == "lenet5":
            logits = lenet5.model_fn(sample_input=x,
                                     is_training=is_training,
                                     summaries=summaries)

    with tf.name_scope('loss'):
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_,
                                                                logits=logits)
        mean_cross_entropy_loss = tf.reduce_mean(cross_entropy)

        loss_summ = tf.summary.scalar('Mean_cross_entropy_loss',
                                      mean_cross_entropy_loss)
        summaries['train'].append(loss_summ)
        #summaries['validate'].append(loss_summ)

    with tf.name_scope('adam_optimizer'):
        optimizer = tf.train.AdamOptimizer(
            f.FLAGS.learning_rate).minimize(mean_cross_entropy_loss)

    with tf.name_scope('accuracy'):
        preds = tf.argmax(logits, 1)
        correct_preds = tf.argmax(y_, 1)
        equal = tf.equal(preds, correct_preds)
        training_accuracy_op = tf.reduce_mean(tf.cast(equal, tf.float32))
        summaries['train'].append(
            tf.summary.scalar('Training_Accuracy', training_accuracy_op))

    with tf.name_scope('Evaluation_Metrics'):
        tp_op = evaluate.tp(logits=logits, labels=y_)
        fp_op = evaluate.fp(logits=logits, labels=y_)
        tn_op = evaluate.tn(logits=logits, labels=y_)
        fn_op = evaluate.fn(logits=logits, labels=y_)

        tp_sum = tf.placeholder(tf.float32)
        tn_sum = tf.placeholder(tf.float32)
        fp_sum = tf.placeholder(tf.float32)
        fn_sum = tf.placeholder(tf.float32)

        precision_op = evaluate.precision(tp=tp_sum,
                                          fp=fp_sum,
                                          tn=tn_sum,
                                          fn=fn_sum)
        accuracy_op = evaluate.accuracy(tp=tp_sum,
                                        fp=fp_sum,
                                        tn=tn_sum,
                                        fn=fn_sum)
        recall_op = evaluate.recall(tp=tp_sum, fp=fp_sum, tn=tn_sum, fn=fn_sum)
        fscore_op = evaluate.fscore(tp=tp_sum, fp=fp_sum, tn=tn_sum, fn=fn_sum)

        precision_summ = tf.summary.scalar('Precision', precision_op)
        accuracy_summ = tf.summary.scalar('Accuracy', accuracy_op)
        recall_summ = tf.summary.scalar('Recall', recall_op)
        fscore_summ = tf.summary.scalar('Fscore', fscore_op)

        summaries['validate'].append(accuracy_summ)
        summaries['validate'].append(precision_summ)
        summaries['validate'].append(recall_summ)
        summaries['validate'].append(fscore_summ)

        summaries['test'].append(accuracy_summ)
        summaries['test'].append(precision_summ)
        summaries['test'].append(recall_summ)
        summaries['test'].append(fscore_summ)

    print("Saving graph to %s" % f.FLAGS.log_dir)
    train_writer = tf.summary.FileWriter(f.FLAGS.log_dir + "/train")
    validate_writer = tf.summary.FileWriter(f.FLAGS.log_dir + "/validate")
    test_writer = tf.summary.FileWriter(f.FLAGS.log_dir + "/test")
    train_writer.add_graph(tf.get_default_graph())

    train_summaries = tf.summary.merge(summaries['train'])
    validate_summaries = tf.summary.merge(summaries['validate'])
    test_summaries = tf.summary.merge(summaries['test'])

    with tf.Session() as sess:
        train_writer.add_summary(sess.run(tf.summary.text("Information",
                                                          info)))
        train_iter = train_dataset.make_initializable_iterator()
        train_next_elem = train_iter.get_next()
        sess.run(tf.global_variables_initializer())
        global_step = 0
        display_freq = 10
        validate_freq = 50
        test_freq = 50
        for epoch in range(1, f.FLAGS.num_epochs + 1):
            sess.run(train_iter.initializer)
            step_time = 0.0
            fetch_time = 0.0
            while True:
                try:
                    a = time.time()
                    global_step += 1
                    sample, label = sess.run(train_next_elem)
                    fetch_time += time.time() - a
                    #print (sample.shape, label.shape)
                    #print (label)
                    #for s in sample[0][0]:
                    #      print (s)
                    a = time.time()
                    _, summ = sess.run([optimizer, train_summaries],
                                       feed_dict={
                                           x: sample,
                                           y_: label,
                                           dropout_rate: 0.5,
                                           is_training: True
                                       })
                    train_writer.add_summary(summ, global_step)
                    step_time += time.time() - a
                except tf.errors.OutOfRangeError:
                    break

                if global_step % display_freq == 0:
                    batch_loss, batch_accuracy = sess.run(
                        [mean_cross_entropy_loss, training_accuracy_op],
                        feed_dict={
                            x: sample,
                            y_: label,
                            dropout_rate: 1.0,
                            is_training: False
                        })
                    print(
                        "Epoch {:3}\t Step {:5}:\t Loss={:.3f}, \tTraining Accuracy={:.5f} \tStep Time {:4.2f}m, Fetch Time {:4.2f}m"
                        .format(epoch, global_step, batch_loss, batch_accuracy,
                                step_time / 60, fetch_time / 60))
                    step_time = 0.0
                    fetch_time = 0.0

            #Validate and test after each epoch
            val_it = validate_dataset.make_one_shot_iterator()
            val_next_elem = val_it.get_next()
            tot_tp, tot_tn, tot_fp, tot_fn = 0, 0, 0, 0
            while True:
                try:
                    sample, label = sess.run(val_next_elem)
                    tp, fp, tn, fn = sess.run(
                        [tp_op, fp_op, tn_op, fn_op],
                        feed_dict={
                            x: sample,
                            y_: label,
                            dropout_rate: 1.0,
                            is_training: False
                        })
                except tf.errors.OutOfRangeError:
                    break
                tot_tp += tp
                tot_fp += fp
                tot_fn += fn
                tot_tn += tn
            precision, recall, accuracy, fscore, summ = sess.run([
                precision_op, recall_op, accuracy_op, fscore_op,
                validate_summaries
            ],
                                                                 feed_dict={
                                                                     tp_sum:
                                                                     tot_tp,
                                                                     tn_sum:
                                                                     tot_tn,
                                                                     fp_sum:
                                                                     tot_fp,
                                                                     fn_sum:
                                                                     tot_fn
                                                                 })
            validate_writer.add_summary(summ, global_step)
            print("Epoch %d, Step %d" % (epoch, global_step))
            print("=" * 10, "Validating Results", "=" * 10)
            print("TP: %g\nTN: %g\nFP: %g\nFN: %g" %
                  (tot_tp, tot_tn, tot_fp, tot_fn))
            print(
                "\tPrecision: %g\n\tRecall: %g\n\tF1_score: %g\n\tAccuracy: %g"
                % (precision, recall, fscore, accuracy))

            test_it = test_dataset.make_one_shot_iterator()
            test_next_elem = test_it.get_next()
            tot_tp, tot_tn, tot_fp, tot_tn = 0, 0, 0, 0
            while True:
                try:
                    sample, label = sess.run(test_next_elem)
                    tp, fp, tn, fn = sess.run(
                        [tp_op, fp_op, tn_op, fn_op],
                        feed_dict={
                            x: sample,
                            y_: label,
                            dropout_rate: 1.0,
                            is_training: False
                        })
                except tf.errors.OutOfRangeError:
                    break
                tot_tp += tp
                tot_fp += fp
                tot_fn += fn
                tot_tn += tn
            precision, recall, accuracy, fscore, summ = sess.run([
                precision_op, recall_op, accuracy_op, fscore_op, test_summaries
            ],
                                                                 feed_dict={
                                                                     tp_sum:
                                                                     tot_tp,
                                                                     tn_sum:
                                                                     tot_tn,
                                                                     fp_sum:
                                                                     tot_fp,
                                                                     fn_sum:
                                                                     tot_fn
                                                                 })

            test_writer.add_summary(summ, global_step)

            print("=" * 10, "Testing Results", "=" * 10)
            print("TP: %g\nTN: %g\nFP: %g\nFN: %g" %
                  (tot_tp, tot_tn, tot_fp, tot_fn))
            print(
                "\tPrecision: %g\n\tRecall: %g\n\tF1_score: %g\n\tAccuracy: %g"
                % (precision, recall, fscore, accuracy))
            print("=" * 10, "===============", "=" * 10)