Ejemplo n.º 1
0
def cv_evaluate():
    path = 'pretrained/cv/VisionModel'

    reader = InstanceReader("data/cv/testing.csv")
    batch_size = 64
    epochs = reader.length / batch_size

    sess = tf.Session()
    model = VisionModel(num_classes=200)

    model.restore(sess, path)

    total_acc = 0.0
    total_cnt = 0.0

    for step, pi in etc.range(epochs):
        # Get a batch of training instances.
        _, batch_images, batch_labels = reader.read(lines=batch_size)

        # Calculate batch accuracy and loss
        acc, loss = model.evaluate(sess, batch_images, batch_labels)

        total_acc += acc
        total_cnt += 1.0

        print("Iter " + str(1 + step) + " / " + str(epochs) + \
              ", Validation Accuracy= " + \
              "{:.5f}".format(total_acc / total_cnt) + ", Time Remaining= " + \
              etc.format_seconds(pi.time_remaining()), file = sys.stderr)
Ejemplo n.º 2
0
def cv_extract_features():
    path = 'pretrained/cv/VisionModel'

    reader = InstanceReader("data/cv/all.csv")
    batch_size = 1
    epochs = reader.length / batch_size

    sess = tf.Session()
    model = VisionModel(num_classes=200)

    model.restore(sess, path)

    cache = FeatureCache()

    for step, pi in etc.range(epochs):
        # Get a batch of testing instances.
        batch_ids, batch_images, _ = reader.read(lines=batch_size)

        # Retrieve the first path from the batch.
        path = batch_images[0]

        # Calculate batch accuracy and loss
        label, features, confidence = model.predict(sess, path)

        cache.set(batch_ids[0], label[0], features)

        print("Iter " + str(1 + step) + " / " + str(epochs) +
              ", Time Remaining= " + \
              etc.format_seconds(pi.time_remaining()), file = sys.stderr)

    cache.save("data/cv/features.csv")
Ejemplo n.º 3
0
def cv_train():
    path = 'data/inception_resnet_v2_2016_08_30.ckpt'
    batch_size = 128

    reader = InstanceReader("data/cv/training.csv")
    epochs = reader.length / 128 * 10

    def get_batch(batch_size):
        return reader.read(lines=batch_size)

    sess = tf.Session()
    model = VisionModel(num_classes=200)

    model.restore(sess, path, last_layer=False)

    for step, pi in etc.range(epochs):
        # Get a batch of training instances.
        batch_ids, batch_images, batch_labels = get_batch(batch_size)
        model.train(sess, batch_images, batch_labels)

        # Calculate batch accuracy and loss
        acc, loss = model.evaluate(sess, batch_images, batch_labels)

        print("Iter " + str(1 + step) + " / " + str(epochs) + \
              ", Minibatch Loss= " + \
              "{:.6f}".format(loss) + ", Training Accuracy= " + \
              "{:.5f}".format(acc) + ", Time Remaining= " + \
              etc.format_seconds(pi.time_remaining()), file = sys.stderr)

        model.save(sess, 1 + step)

    print("Optimization Finished!", file=sys.stderr)
Ejemplo n.º 4
0
def sc_evaluate():
    max_length = 30
    num_classes = 200
    embedding_size = len(Vocabulary().restore("data/ds/vocabulary.csv"))

    reader = SentenceReader("data/sc/testing.csv",
                            num_classes,
                            embedding_size=embedding_size)

    batch_size = 128
    epochs = len(reader) / batch_size

    # Network Parameters
    num_hidden = 512  # hidden layer num of features

    tf.reset_default_graph()
    model = SentenceClassifier(max_length, embedding_size, num_hidden,
                               num_classes)

    init = tf.global_variables_initializer()

    with tf.Session() as sess:
        sess.run(init)

        model.restore(sess, "pretrained/sc/SentenceClassifier")

        total_acc = 0.0
        total_cnt = 0.0

        for step, pi in etc.range(epochs):
            # Get a batch of training instances.
            batch_x, batch_y, batch_len = reader.read(lines=batch_size)

            # Calculate batch accuracy and loss
            acc, loss = model.evaluate(batch_x, batch_y, batch_len, sess)

            total_acc += acc
            total_cnt += 1.0

            print("Iter " + str(1 + step) + " / " + str(epochs) + \
                  ", Validation Accuracy= " + \
                  "{:.5f}".format(total_acc / total_cnt) + \
                  ", Time Remaining= " + \
                  etc.format_seconds(pi.time_remaining()), file = sys.stderr)
Ejemplo n.º 5
0
def sc_train():
    max_length = 30
    num_classes = 200
    embedding_size = len(Vocabulary().restore("data/ds/vocabulary.csv"))

    reader = SentenceReader("data/sc/training.csv",
                            num_classes,
                            embedding_size=embedding_size)

    batch_size = 128
    epochs = 2000

    # Network Parameters
    num_hidden = 512  # hidden layer num of features

    model = SentenceClassifier(max_length, embedding_size, num_hidden,
                               num_classes)

    init = tf.global_variables_initializer()

    with tf.Session() as sess:
        sess.run(init)

        for step, pi in etc.range(epochs):
            # Get a batch of training instances.
            batch_x, batch_y, batch_len = reader.read(lines=batch_size)

            # Run optimization op (backprop)
            model.train(batch_x, batch_y, batch_len, sess)

            # Calculate batch accuracy and loss
            acc, loss = model.evaluate(batch_x, batch_y, batch_len, sess)

            print("Iter " + str(1 + step) + " / " + str(epochs) + \
                  ", Minibatch Loss= " + \
                  "{:.6f}".format(loss) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc) + ", Time Remaining= " + \
                  etc.format_seconds(pi.time_remaining()), file = sys.stderr)

            if (1 + step) % 10 == 0:
                model.save(sess, 1 + step)

        print("Optimization Finished!", file=sys.stderr)
Ejemplo n.º 6
0
def sc_extract_embeddings():
    max_length     = 30
    num_classes    = 200
    embedding_size = len(Vocabulary().restore("data/ds/vocabulary.csv"))

    reader = SentenceReader("data/sc/training.csv",
                            num_classes, embedding_size = embedding_size)

    epochs     = num_classes

    # Network Parameters
    num_hidden = 512 # hidden layer num of features

    model = SentenceClassifier(max_length, embedding_size,
                               num_hidden, num_classes)

    init = tf.global_variables_initializer()

    cache = EmbeddingCache()

    with tf.Session() as sess:
        sess.run(init)

        model.restore(sess, "pretrained/sc/SentenceClassifier")

        for step, pi in etc.range(epochs):
            # Get a batch of training instances.
            batch_x, batch_y, batch_len = reader.query(label = step + 1)

            # Calculate batch accuracy and loss
            activations = model.extract(sess, batch_x, batch_y, batch_len)
            embedding   = np.mean(activations, axis = 0)

            cache.set(step + 1, embedding)

            print("Iter " + str(1 + step) + " / " + str(epochs) + \
                  ", Time Remaining= " + \
                  etc.format_seconds(pi.time_remaining()), file = sys.stderr)

    cache.save("data/sc/embeddings.csv")
Ejemplo n.º 7
0
def lm_train():
    max_length = 30

    dataset = Dataset()

    vocabulary = Vocabulary().restore("data/ds/vocabulary.csv")
    embedding_size = len(vocabulary)
    feature_size = 512 + 1536

    reader = lm.SentenceReader("data/lm/training.csv",
                               embedding_size=embedding_size)

    batch_size = 128
    epochs = 5000

    # Network Parameters
    num_hidden = 512  # hidden layer num of features

    model = lm.LanguageModel(max_length=max_length,
                             embedding_size=embedding_size,
                             feature_size=feature_size,
                             num_hidden=num_hidden,
                             num_classes=200)

    init = tf.global_variables_initializer()

    embedding_cache = EmbeddingCache()
    embedding_cache.restore("data/sc/embeddings.csv")

    feature_cache = FeatureCache()
    feature_cache.restore("data/cv/features.csv")

    try:
        os.makedirs("logs")
    except:
        pass

    writer = tf.summary.FileWriter("logs", graph=tf.get_default_graph())

    sc_path = "pretrained/sc/SentenceClassifier"

    with tf.Session() as sess:
        sess.run(init)

        model.sentence_classifier.restore(sess, sc_path)

        print("Restored")

        for step, pi in etc.range(epochs):
            # Get a batch of training instances.
            instances, labels, sentences, lengths = \
            reader.read(lines = batch_size)

            features = [feature_cache.get(index) for index in instances]

            features = [
                np.concatenate([embedding_cache.get(label), feature])
                for label, feature in features
            ]

            # Run optimization op (backprop)
            summary = model.train(sess, sentences, features, labels, lengths)
            writer.add_summary(summary, step)

            # Calculate batch accuracy and loss
            acc, loss, rel, dis = model.evaluate(sess, sentences, features,
                                                 labels, lengths)

            print("Iter " + str(1 + step) + " / " + str(epochs) + \
                  ", Minibatch Loss= " + \
                  "{:.6f}".format(loss) + \
                  " (rel: {:.6f}, dis: {:.6f})".format(rel, dis) + \
                  ", Training Accuracy= " + \
                  "{:.5f}".format(acc) + ", Time Remaining= " + \
                  etc.format_seconds(pi.time_remaining()), file = sys.stderr)

            # Generate a sample sentence after each 10 iterations.
            if (1 + step) % 10 == 0:
                sentences = model.generate(sess, features)

                for sentence in sentences[0:1]:
                    print(vocabulary.sentence([word for word in sentence]),
                          file=sys.stderr)

                model.save(sess, step)

        print("Optimization Finished!", file=sys.stderr)
Ejemplo n.º 8
0
def lm_generate():
    max_length = 30

    dataset = Dataset()

    vocabulary     = Vocabulary().restore("data/ds/vocabulary.csv")
    embedding_size = len(vocabulary)
    feature_size   = 512 + 1536

    reader = lm.SentenceReader("data/lm/training.csv",
                               embedding_size = embedding_size)

    batch_size = 128
    epochs     = len(reader) / batch_size

    # Network Parameters
    num_hidden = 512 # hidden layer num of features

    model = lm.LanguageModel(max_length     = max_length,
                             embedding_size = embedding_size,
                             feature_size   = feature_size,
                             num_hidden     = num_hidden,
                             num_classes    = 200)

    init = tf.global_variables_initializer()

    embedding_cache = EmbeddingCache()
    embedding_cache.restore("data/sc/embeddings.csv")

    feature_cache = FeatureCache()
    feature_cache.restore("data/cv/features.csv")

    try:
        os.makedirs("logs")
    except:
        pass

    writer = tf.summary.FileWriter("logs", graph = tf.get_default_graph())

    with tf.Session() as sess:
        sess.run(init)

        model.restore(sess, "pretrained/lm/LanguageModel")

        results = []

        for step, pi in etc.range(epochs):
            # Get a batch of training instances.
            instances, labels, sentences, lengths = \
            reader.read(lines = batch_size)

            features = [
                feature_cache.get(index)
                for index in instances
            ]

            labels = [ label for label, feature in features ]

            features = [
                np.concatenate([ embedding_cache.get(experiment(label)), feature ])
                for label, feature in features
            ]

            sentences = model.generate(sess, features)

            print("Iter " + str(1 + step) + " / " + str(epochs) + \
                  ", Time Remaining= " + \
                  etc.format_seconds(pi.time_remaining()), file = sys.stderr)

            for i, sentence in enumerate(sentences):
                results.append({
                    "image_id": dataset.example(instances[i]).path + ".jpg",
                    "caption": vocabulary.sentence([
                        word for word in sentence
                    ], limit = True)
                })

            if step % 10 == 0:
                try:
                    os.makedirs("products")
                except:
                    pass

                with open('products/sentences.best.json', 'w') as file:
                    json.dump(results, file)

        print("Optimization Finished!", file = sys.stderr)