コード例 #1
0
def functions(sequence_length):
    """
    Return two functions
     * The first function does prediction.
     * The second function does learning.
    """
    global cached_functions
    p = (sequence_length)
    if len(cached_functions.keys()) > 1:
        # This is problematic because we use global variables for the model parameters.
        # Hence, we might be unsafe, if we are using the wrong model parameters globally.
        assert 0
    if p not in cached_functions:
        print "Need to construct graph for sequence_length=%d..." % (sequence_length)
        # Create the sequence_length inputs.
        # Each is a t.xmatrix(), initial word embeddings (provided by
        # Jason + Ronan) to be transformed into an initial representation.
        # We could use a vector, but instead we use a matrix with one row.
        sequence = [t.xmatrix() for i in range(sequence_length)]
        correct_repr = t.xmatrix()
        noise_repr = t.xmatrix()
#        correct_scorebias = t.xscalar()
#        noise_scorebias = t.xscalar()
        correct_scorebias = t.xvector()
        noise_scorebias = t.xvector()

        stackedsequence = stack(sequence)
        predictrepr = dot(stackedsequence, output_weights) + output_biases

        correct_score = score(correct_repr, predictrepr) + correct_scorebias
        noise_score = score(noise_repr, predictrepr) + noise_scorebias
        loss = t.clip(1 - correct_score + noise_score, 0, 1e999)

        (doutput_weights, doutput_biases) = t.grad(loss, [output_weights, output_biases])
        dsequence = t.grad(loss, sequence)
        (dcorrect_repr, dnoise_repr) = t.grad(loss, [correct_repr, noise_repr])
        (dcorrect_scorebias, dnoise_scorebias) = t.grad(loss, [correct_scorebias, noise_scorebias])
        #print "REMOVEME", len(dcorrect_inputs)
        predict_inputs = sequence + [correct_repr, correct_scorebias, output_weights, output_biases]
        train_inputs = sequence + [correct_repr, noise_repr, correct_scorebias, noise_scorebias, output_weights, output_biases]
        predict_outputs = [predictrepr, correct_score]
        train_outputs = [loss, predictrepr, correct_score, noise_score] + dsequence + [dcorrect_repr, dnoise_repr, doutput_weights, doutput_biases, dcorrect_scorebias, dnoise_scorebias]
#        train_outputs = [loss, correct_repr, correct_score, noise_repr, noise_score]

        import theano.gof.graph

        nnodes = len(theano.gof.graph.ops(predict_inputs, predict_outputs))
        print "About to compile predict function over %d ops [nodes]..." % nnodes
        predict_function = theano.function(predict_inputs, predict_outputs, mode=COMPILE_MODE)
        print "...done constructing graph for sequence_length=%d" % (sequence_length)

        nnodes = len(theano.gof.graph.ops(train_inputs, train_outputs))
        print "About to compile train function over %d ops [nodes]..." % nnodes
        train_function = theano.function(train_inputs, train_outputs, mode=COMPILE_MODE)
        print "...done constructing graph for sequence_length=%d" % (sequence_length)

        cached_functions[p] = (predict_function, train_function)
    return cached_functions[p]
コード例 #2
0
ファイル: graphcw.py プロジェクト: everwind/DNNNLP
def functions(sequence_length):
    """
    Return two functions
     * The first function does prediction.
     * The second function does learning.
    """
    global cached_functions
    cachekey = (sequence_length)
    if len(cached_functions.keys()) > 1:
        # This is problematic because we use global variables for the model parameters.
        # Hence, we might be unsafe, if we are using the wrong model parameters globally.
        assert 0
    if cachekey not in cached_functions:
        print "Need to construct graph for sequence_length=%d..." % (
            sequence_length)
        # Create the sequence_length inputs.
        # Each is a t.xmatrix(), initial word embeddings (provided by
        # Jason + Ronan) to be transformed into an initial representation.
        # We could use a vector, but instead we use a matrix with one row.
        correct_inputs = [t.xmatrix() for i in range(sequence_length)]
        noise_inputs = [t.xmatrix() for i in range(sequence_length)]
        learning_rate = t.xscalar()

        stacked_correct_inputs = stack(correct_inputs)
        stacked_noise_inputs = stack(noise_inputs)

        correct_score, correct_prehidden = score(stacked_correct_inputs)
        noise_score, noise_prehidden = score(stacked_noise_inputs)
        unpenalized_loss = t.clip(1 - correct_score + noise_score, 0, 1e999)

        from hyperparameters import HYPERPARAMETERS
        if HYPERPARAMETERS["CW_EMBEDDING_L1_PENALTY"] != 0:
            l1penalty = t.sum(
                t.abs_(stacked_correct_inputs) + t.abs_(stacked_noise_inputs),
                axis=1).T * HYPERPARAMETERS["CW_EMBEDDING_L1_PENALTY"]
        else:
            l1penalty = t.as_tensor_variable(numpy.asarray(0, dtype=floatX))
#            l1penalty = t.as_tensor_variable(numpy.asarray((0,), dtype=floatX))
        loss = (unpenalized_loss.T + l1penalty).T

        #        import sys
        #        print >> sys.stderr, "FIXME: MODEL_LEARNING_RATE = fixed at 0.001"
        #        MODEL_LEARNING_RATE = t.as_tensor_variable(numpy.asarray(0.001, dtype=floatX))

        total_loss = t.sum(loss)

        (dhidden_weights,
         dhidden_biases, doutput_weights, doutput_biases) = t.grad(
             total_loss,
             [hidden_weights, hidden_biases, output_weights, output_biases])
        dcorrect_inputs = t.grad(total_loss, correct_inputs)
        dnoise_inputs = t.grad(total_loss, noise_inputs)
        #print "REMOVEME", len(dcorrect_inputs)
        predict_inputs = correct_inputs
        train_inputs = correct_inputs + noise_inputs + [learning_rate]
        verbose_predict_inputs = predict_inputs
        predict_outputs = [correct_score]
        train_outputs = dcorrect_inputs + dnoise_inputs + [
            loss, unpenalized_loss, l1penalty, correct_score, noise_score
        ]
        verbose_predict_outputs = [correct_score, correct_prehidden]

        import theano.gof.graph

        nnodes = len(theano.gof.graph.ops(predict_inputs, predict_outputs))
        print "About to compile predict function over %d ops [nodes]..." % nnodes
        predict_function = pfunc(predict_inputs,
                                 predict_outputs,
                                 mode=COMPILE_MODE)
        print "...done constructing graph for sequence_length=%d" % (
            sequence_length)

        nnodes = len(
            theano.gof.graph.ops(verbose_predict_inputs,
                                 verbose_predict_outputs))
        print "About to compile predict function over %d ops [nodes]..." % nnodes
        verbose_predict_function = pfunc(verbose_predict_inputs,
                                         verbose_predict_outputs,
                                         mode=COMPILE_MODE)
        print "...done constructing graph for sequence_length=%d" % (
            sequence_length)

        nnodes = len(theano.gof.graph.ops(train_inputs, train_outputs))
        print "About to compile train function over %d ops [nodes]..." % nnodes
        train_function = pfunc(
            train_inputs,
            train_outputs,
            mode=COMPILE_MODE,
            updates=[(p, p - learning_rate * gp) for p, gp in zip((
                hidden_weights, hidden_biases, output_weights,
                output_biases), (dhidden_weights, dhidden_biases,
                                 doutput_weights, doutput_biases))])
        print "...done constructing graph for sequence_length=%d" % (
            sequence_length)

        cached_functions[cachekey] = (predict_function, train_function,
                                      verbose_predict_function)
    return cached_functions[cachekey]
コード例 #3
0
def functions(sequence_length):
    """
    Return two functions
     * The first function does prediction.
     * The second function does learning.
    """
    global cached_functions
    p = (sequence_length)
    if len(cached_functions.keys()) > 1:
        # This is problematic because we use global variables for the model parameters.
        # Hence, we might be unsafe, if we are using the wrong model parameters globally.
        assert 0
    if p not in cached_functions:
        print "Need to construct graph for sequence_length=%d..." % (
            sequence_length)
        # Create the sequence_length inputs.
        # Each is a t.xmatrix(), initial word embeddings (provided by
        # Jason + Ronan) to be transformed into an initial representation.
        # We could use a vector, but instead we use a matrix with one row.
        sequence = [t.xmatrix() for i in range(sequence_length)]
        correct_repr = t.xmatrix()
        noise_repr = t.xmatrix()
        #        correct_scorebias = t.xscalar()
        #        noise_scorebias = t.xscalar()
        correct_scorebias = t.xvector()
        noise_scorebias = t.xvector()

        stackedsequence = stack(sequence)
        predictrepr = dot(stackedsequence, output_weights) + output_biases

        correct_score = score(correct_repr, predictrepr) + correct_scorebias
        noise_score = score(noise_repr, predictrepr) + noise_scorebias
        loss = t.clip(1 - correct_score + noise_score, 0, 1e999)

        (doutput_weights,
         doutput_biases) = t.grad(loss, [output_weights, output_biases])
        dsequence = t.grad(loss, sequence)
        (dcorrect_repr, dnoise_repr) = t.grad(loss, [correct_repr, noise_repr])
        (dcorrect_scorebias,
         dnoise_scorebias) = t.grad(loss, [correct_scorebias, noise_scorebias])
        #print "REMOVEME", len(dcorrect_inputs)
        predict_inputs = sequence + [
            correct_repr, correct_scorebias, output_weights, output_biases
        ]
        train_inputs = sequence + [
            correct_repr, noise_repr, correct_scorebias, noise_scorebias,
            output_weights, output_biases
        ]
        predict_outputs = [predictrepr, correct_score]
        train_outputs = [
            loss, predictrepr, correct_score, noise_score
        ] + dsequence + [
            dcorrect_repr, dnoise_repr, doutput_weights, doutput_biases,
            dcorrect_scorebias, dnoise_scorebias
        ]
        #        train_outputs = [loss, correct_repr, correct_score, noise_repr, noise_score]

        import theano.gof.graph

        nnodes = len(theano.gof.graph.ops(predict_inputs, predict_outputs))
        print "About to compile predict function over %d ops [nodes]..." % nnodes
        predict_function = theano.function(predict_inputs,
                                           predict_outputs,
                                           mode=COMPILE_MODE)
        print "...done constructing graph for sequence_length=%d" % (
            sequence_length)

        nnodes = len(theano.gof.graph.ops(train_inputs, train_outputs))
        print "About to compile train function over %d ops [nodes]..." % nnodes
        train_function = theano.function(train_inputs,
                                         train_outputs,
                                         mode=COMPILE_MODE)
        print "...done constructing graph for sequence_length=%d" % (
            sequence_length)

        cached_functions[p] = (predict_function, train_function)
    return cached_functions[p]