Beispiel #1
0
def rntn():
    global rntn_model

    trees = [marmot.datasets.Trees.parse(l.strip('\n')) for l in request.files['file'].readlines()]

    split = int(math.floor(len(trees) * 0.9))
    training_data = marmot.datasets.Trees(trees[:split], minibatch_size=25)
    validation_data = marmot.datasets.Trees(trees[split:], wordmap=training_data.wordmap, minibatch_size=25)

    recursive = marmot.layers.Recursive(
        word_vec_length=30,
        wordmap=training_data.wordmap
    )
    softmax   = marmot.layers.Softmax(prev_layer=recursive, n=5)
    l2reg     = marmot.layers.L2Reg(prev_layer=softmax, reg_weight = 1e-4)

    # Define a learning strategy
    learning_rule = marmot.sgd.Adadelta(decay = 0.90, epsilon = 1e-6)
    strategy = marmot.sgd.SGD(learning_rule=learning_rule)

    # Initialize and run the training loop
    marmot.train_loop(
        l2reg, 
        strategy, 
        training_data, 
        validation_data, 
        patience_factor=1.1, 
        validation_frequency=3
    )

    rntn_model['model'] = softmax
    rntn_model['wordmap'] = training_data.wordmap

    return redirect(url_for('test_rntn'))
Beispiel #2
0
def rntn():
    global rntn_model

    trees = [
        marmot.datasets.Trees.parse(l.strip('\n'))
        for l in request.files['file'].readlines()
    ]

    split = int(math.floor(len(trees) * 0.9))
    training_data = marmot.datasets.Trees(trees[:split], minibatch_size=25)
    validation_data = marmot.datasets.Trees(trees[split:],
                                            wordmap=training_data.wordmap,
                                            minibatch_size=25)

    recursive = marmot.layers.Recursive(word_vec_length=30,
                                        wordmap=training_data.wordmap)
    softmax = marmot.layers.Softmax(prev_layer=recursive, n=5)
    l2reg = marmot.layers.L2Reg(prev_layer=softmax, reg_weight=1e-4)

    # Define a learning strategy
    learning_rule = marmot.sgd.Adadelta(decay=0.90, epsilon=1e-6)
    strategy = marmot.sgd.SGD(learning_rule=learning_rule)

    # Initialize and run the training loop
    marmot.train_loop(l2reg,
                      strategy,
                      training_data,
                      validation_data,
                      patience_factor=1.1,
                      validation_frequency=3)

    rntn_model['model'] = softmax
    rntn_model['wordmap'] = training_data.wordmap

    return redirect(url_for('test_rntn'))
Beispiel #3
0
def logistic():
    global logistic_model

    data = [
        l.strip('\n').split("\t") for l in request.files['file'].readlines()
    ]
    random.shuffle(data)

    labels = [d[0] for d in data]
    strings = [d[1] for d in data]

    # Split into training/validation sets
    split = int(math.floor(len(data) * 0.9))

    labelmap = {}
    labelmap_len = 0
    for label in labels[:split]:
        if label not in labelmap:
            labelmap[label] = labelmap_len
            labelmap_len += 1

    wordmap = {'UNK': 0}
    wordmap_len = 0
    for string in strings[:split]:
        words = string.split(' ')
        for word in words:
            if word not in wordmap:
                wordmap[word] = wordmap_len
                wordmap_len += 1

    labels = [labelmap[l] for l in labels]
    word_bags = to_wordbags(strings, wordmap)

    training_data = marmot.datasets.Simple(word_bags[:split],
                                           labels[:split],
                                           minibatch_size=128)
    validation_data = marmot.datasets.Simple(word_bags[split:], labels[split:])

    inputs = marmot.layers.Input(len(wordmap))
    softmax = marmot.layers.Softmax(prev_layer=inputs, n=len(labelmap))

    learning_rule = marmot.sgd.Adadelta(decay=0.90, epsilon=1e-4)
    strategy = marmot.sgd.SGD(learning_rule=learning_rule)

    marmot.train_loop(softmax,
                      strategy,
                      training_data,
                      validation_data,
                      min_patience=100,
                      patience_factor=1.1,
                      validation_frequency=3)

    logistic_model['model'] = softmax
    logistic_model['wordmap'] = wordmap
    logistic_model['labelmap'] = labelmap

    return redirect(url_for('test_logistic'))
Beispiel #4
0
def logistic():
    global logistic_model

    data = [l.strip('\n').split("\t") for l in request.files['file'].readlines()]
    random.shuffle(data)

    labels = [d[0] for d in data]
    strings = [d[1] for d in data]

    # Split into training/validation sets
    split = int(math.floor(len(data) * 0.9))

    labelmap = {}
    labelmap_len = 0
    for label in labels[:split]:
        if label not in labelmap:
            labelmap[label] = labelmap_len
            labelmap_len += 1

    wordmap = {'UNK': 0}
    wordmap_len = 0
    for string in strings[:split]:
        words = string.split(' ')
        for word in words:
            if word not in wordmap:
                wordmap[word] = wordmap_len
                wordmap_len += 1

    labels = [labelmap[l] for l in labels]
    word_bags = to_wordbags(strings, wordmap)

    training_data = marmot.datasets.Simple(word_bags[:split], labels[:split], minibatch_size=128)
    validation_data = marmot.datasets.Simple(word_bags[split:], labels[split:])

    inputs  = marmot.layers.Input(len(wordmap))
    softmax = marmot.layers.Softmax(prev_layer=inputs, n=len(labelmap))

    learning_rule = marmot.sgd.Adadelta(decay = 0.90, epsilon = 1e-4)
    strategy = marmot.sgd.SGD(learning_rule=learning_rule)

    marmot.train_loop(
        softmax,
        strategy,
        training_data,
        validation_data,
        min_patience=100,
        patience_factor=1.1,
        validation_frequency=3
    )

    logistic_model['model'] = softmax
    logistic_model['wordmap'] = wordmap
    logistic_model['labelmap'] = labelmap

    return redirect(url_for('test_logistic'))
Beispiel #5
0
# Build the model by composing layers
inputs = marmot.layers.Input(28 * 28)  # Each MNIST image has size 28*28
inputs = marmot.layers.BatchNormalize(inputs)

hidden = marmot.layers.Feedforward(prev_layer=inputs, n=500)
# hidden  = marmot.layers.BatchNormalize(hidden)

hidden = marmot.layers.Feedforward(prev_layer=hidden, n=500)
# hidden  = marmot.layers.BatchNormalize(hidden)

hidden = marmot.layers.Feedforward(prev_layer=hidden, n=500)
# hidden  = marmot.layers.BatchNormalize(hidden)

hidden = marmot.layers.Feedforward(prev_layer=hidden, n=500)
# hidden  = marmot.layers.BatchNormalize(hidden)

softmax = marmot.layers.Softmax(prev_layer=hidden, n=10)
#l2reg   = marmot.layers.L2Reg(prev_layer=softmax, reg_weight = 1e-5)

# Define a learning strategy
learning_rule = marmot.sgd.Adadelta(decay=0.75, epsilon=1e-3)
strategy = marmot.sgd.SGD(learning_rule=learning_rule)

# Initialize and run the training loop
marmot.train_loop(softmax,
                  strategy,
                  training_data,
                  validation_data,
                  patience_factor=2,
                  validation_frequency=10)
Beispiel #6
0
            marmot.datasets.Trees.parse(l.strip('\n')) for l in f.readlines()
        ]


print "Loading data..."

training_data = marmot.datasets.Trees(
    load_file('data/stanford_treebank/train.txt'), minibatch_size=25)

validation_data = marmot.datasets.Trees(
    load_file('data/stanford_treebank/dev.txt'), wordmap=training_data.wordmap)

print "Training..."

recursive = marmot.layers.Recursive(word_vec_length=30,
                                    wordmap=training_data.wordmap,
                                    tensor=True)
softmax = marmot.layers.Softmax(prev_layer=recursive, n=5)
l2reg = marmot.layers.L2Reg(prev_layer=softmax, reg_weight=1e-4)

# Define a learning strategy
learning_rule = marmot.sgd.Adadelta(decay=0.90, epsilon=1e-6)
strategy = marmot.sgd.SGD(learning_rule=learning_rule)

# Initialize and run the training loop
marmot.train_loop(l2reg,
                  strategy,
                  training_data,
                  validation_data,
                  patience_factor=2,
                  validation_frequency=3)
Beispiel #7
0
    training_data = marmot.datasets.Trees(trees[:split], minibatch_size=25)
    validation_data = marmot.datasets.Trees(trees[split:],
                                            wordmap=training_data.wordmap)

    learning_rule = marmot.sgd.Adadelta(decay=0.90, epsilon=1e-4)
    strategy = marmot.sgd.SGD(learning_rule=learning_rule)

    model = build_model(training_data.wordmap, label_count)

    print "Starting training..."

    # Train!
    results = marmot.train_loop(
        model,
        strategy,
        training_data,
        validation_data,
        patience_factor=2,
        validation_frequency=3,
    )

    # Save the best params to a file specified in the third command line arg
    model_data = {
        'param_dump': results['param_dump'],
        'label_count': label_count,
        'wordmap': training_data.wordmap
    }
    with open(sys.argv[3], 'w') as f:
        pickle.dump(model_data, f, pickle.HIGHEST_PROTOCOL)

elif sys.argv[1] == 'test':
Beispiel #8
0
    minibatch_size=25
)

validation_data = marmot.datasets.Trees(
    load_file('data/stanford_treebank/dev.txt'), 
    wordmap=training_data.wordmap
)

print "Training..."

recursive = marmot.layers.Recursive(
    word_vec_length=30,
    wordmap=training_data.wordmap,
    tensor=True
)
softmax   = marmot.layers.Softmax(prev_layer=recursive, n=5)
l2reg     = marmot.layers.L2Reg(prev_layer=softmax, reg_weight = 1e-4)

# Define a learning strategy
learning_rule = marmot.sgd.Adadelta(decay = 0.90, epsilon = 1e-6)
strategy = marmot.sgd.SGD(learning_rule=learning_rule)

# Initialize and run the training loop
marmot.train_loop(
    l2reg, 
    strategy, 
    training_data, 
    validation_data, 
    patience_factor=2, 
    validation_frequency=3
)
Beispiel #9
0
        trees[split:],
        wordmap=training_data.wordmap
    )

    learning_rule = marmot.sgd.Adadelta(decay = 0.90, epsilon = 1e-4)
    strategy = marmot.sgd.SGD(learning_rule=learning_rule)

    model = build_model(training_data.wordmap, label_count)

    print "Starting training..."

    # Train!
    results = marmot.train_loop(
        model,
        strategy,
        training_data,
        validation_data,
        patience_factor=2,
        validation_frequency=3,
    )

    # Save the best params to a file specified in the third command line arg
    model_data = {
        'param_dump': results['param_dump'],
        'label_count': label_count,
        'wordmap': training_data.wordmap
    }
    with open(sys.argv[3], 'w') as f:
        pickle.dump(model_data, f, pickle.HIGHEST_PROTOCOL)

elif sys.argv[1] == 'test':