Python NanGuardMode Examples

Programming Language: Python

Namespace/Package Name: theano.compile.nanguardmode

Class/Type: NanGuardMode

Examples at hotexamples.com: 30

Python NanGuardMode - 30 examples found. These are the top rated real world Python examples of theano.compile.nanguardmode.NanGuardMode extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

NanGuardMode(30)

Frequently Used Methods

NanGuardMode (30)

Example #1

Show file

    def setup_generate(self):

        # dimensions: (batch, time, 12)
        chord_types = T.btensor3()

        # dimensions: (batch, time)
        chord_roots = T.imatrix()

        n_batch, n_time = chord_roots.shape

        specs = [lstmstack.prepare_sample_scan(  start_pos=T.alloc(np.array(encoding.STARTING_POSITION, np.int32), (n_batch)),
                                                    start_out=T.tile(encoding.initial_encoded_form(), (n_batch,1)),
                                                    timestep=T.tile(T.arange(n_time), (n_batch,1)),
                                                    cur_chord_type=chord_types,
                                                    cur_chord_root=chord_roots,
                                                    deterministic_dropout=True )
                    for lstmstack, encoding in zip(self.lstmstacks, self.encodings)]

        updates, all_chosen, all_probs, indiv_probs = helper_generate_from_spec(specs, self.lstmstacks, self.encodings, self.srng, n_batch, n_time, self.bounds, self.normalize_artic_only)

        self.generate_fun = theano.function(
            inputs=[chord_roots, chord_types],
            updates=updates,
            outputs=all_chosen,
            allow_input_downcast=True,
            mode=(NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) if self.nanguard else None))

        self.generate_visualize_fun = theano.function(
            inputs=[chord_roots, chord_types],
            updates=updates,
            outputs=[all_chosen, all_probs] + indiv_probs,
            allow_input_downcast=True,
            mode=(NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) if self.nanguard else None))

Example #2

Show file

File: test_nanguardmode.py Project: michaelosthege/aesara

def test_NanGuardMode():
    # Tests if NanGuardMode is working by feeding in numpy.inf and numpy.nans
    # intentionally. A working implementation should be able to capture all
    # the abnormalties.
    x = tt.matrix()
    w = theano.shared(np.random.randn(5, 7).astype(theano.config.floatX))
    y = tt.dot(x, w)

    fun = theano.function([x],
                          y,
                          mode=NanGuardMode(nan_is_error=True,
                                            inf_is_error=True))
    a = np.random.randn(3, 5).astype(theano.config.floatX)
    infa = np.tile((np.asarray(100.0)**1000000).astype(theano.config.floatX),
                   (3, 5))
    nana = np.tile(np.asarray(np.nan).astype(theano.config.floatX), (3, 5))
    biga = np.tile(np.asarray(1e20).astype(theano.config.floatX), (3, 5))

    fun(a)  # normal values

    # Temporarily silence logger
    _logger = logging.getLogger("theano.compile.nanguardmode")
    try:
        _logger.propagate = False
        with pytest.raises(AssertionError):
            fun(infa)  # INFs
        with pytest.raises(AssertionError):
            fun(nana)  # NANs
        with pytest.raises(AssertionError):
            fun(biga)  # big values
    finally:
        _logger.propagate = True

    # slices
    a = np.random.randn(3, 4, 5).astype(theano.config.floatX)
    infa = np.tile((np.asarray(100.0)**1000000).astype(theano.config.floatX),
                   (3, 4, 5))
    nana = np.tile(np.asarray(np.nan).astype(theano.config.floatX), (3, 4, 5))
    biga = np.tile(np.asarray(1e20).astype(theano.config.floatX), (3, 4, 5))

    x = tt.tensor3()
    y = x[:, tt.arange(2), tt.arange(2), None]
    fun = theano.function([x],
                          y,
                          mode=NanGuardMode(nan_is_error=True,
                                            inf_is_error=True))
    fun(a)  # normal values
    try:
        _logger.propagate = False
        with pytest.raises(AssertionError):
            fun(infa)  # INFs
        with pytest.raises(AssertionError):
            fun(nana)  # NANs
        with pytest.raises(AssertionError):
            fun(biga)  # big values
    finally:
        _logger.propagate = True

Example #3

Show file

File: test_nanguardmode.py Project: ALISCIFP/Segmentation

def test_NanGuardMode():
    """
    Tests if NanGuardMode is working by feeding in numpy.inf and numpy.nans
    intentionally. A working implementation should be able to capture all
    the abnormalties.
    """
    x = T.matrix()
    w = theano.shared(numpy.random.randn(5, 7).astype(theano.config.floatX))
    y = T.dot(x, w)

    fun = theano.function(
        [x], y,
        mode=NanGuardMode(nan_is_error=True, inf_is_error=True)
    )
    a = numpy.random.randn(3, 5).astype(theano.config.floatX)
    infa = numpy.tile(
        (numpy.asarray(100.) ** 1000000).astype(theano.config.floatX), (3, 5))
    nana = numpy.tile(
        numpy.asarray(numpy.nan).astype(theano.config.floatX), (3, 5))
    biga = numpy.tile(
        numpy.asarray(1e20).astype(theano.config.floatX), (3, 5))

    fun(a)  # normal values

    # Temporarily silence logger
    _logger = logging.getLogger("theano.compile.nanguardmode")
    try:
        _logger.propagate = False
        assert_raises(AssertionError, fun, infa)  # INFs
        assert_raises(AssertionError, fun, nana)  # NANs
        assert_raises(AssertionError, fun, biga)  # big values
    finally:
        _logger.propagate = True

Example #4

Show file

File: doodle.py Project: aliok/neural-doodle

    def prepare_style(self, scale=1.0):
        """Called each phase of the optimization, process the style image according to the scale, then run it
        through the model to extract intermediate outputs (e.g. sem4_1) and turn them into patches.
        """
        style_image = skimage.transform.rescale(self.style_img_original,
                                                scale) * 255.0
        self.style_image = self.model.prepare_image(style_image)

        style_map = skimage.transform.rescale(
            self.style_map_original * args.semantic_weight, scale) * 255.0
        self.style_map = style_map.transpose(
            (2, 0, 1))[np.newaxis].astype(np.float32)

        # Compile a function to run on the GPU to extract patches for all layers at once.
        extractor = theano.function(
            [self.model.tensor_img, self.model.tensor_map],
            self.extract_patches([
                self.model.tensor_outputs['sem' + l] for l in self.style_layers
            ]),
            mode=NanGuardMode(nan_is_error=True,
                              inf_is_error=True,
                              big_is_error=False))
        result = extractor(self.style_image, self.style_map)

        # For each layer, we now have a set of patches and their magnitude.
        for layer, patches, norms in zip(self.style_layers, result[::2],
                                         result[1::2]):
            l = self.model.network['nn' + layer]
            l.N = theano.shared(norms)
            l.W.set_value(patches)
            l.num_filters = patches.shape[0]
            print('  - Style layer sem{}: {} patches in {:,}kb.'.format(
                layer, patches.shape[0], patches.size // 1000))

Example #5

Show file

File: FMeasure.py Project: purushottamkar/DeepPerf

    def get_fns(self,
                input_dim=123,
                p_learning_rate=0.01,
                d_learning_rate=0.0001,
                p=0.23928176569346055):
        x = T.matrix('X')
        y = T.vector('y')

        mlp, updates, cost, probs = self.primal_step(x, y, p_learning_rate,
                                                     input_dim)
        train_fn = theano.function([x, y], [cost],
                                   updates=updates,
                                   mode=NanGuardMode(nan_is_error=True,
                                                     inf_is_error=True,
                                                     big_is_error=True))

        # Calculate Validation in batch_mode for speedup
        valid_th_fns = theano.function([x], probs)

        def valid_fn(x, y):
            probs = valid_th_fns(x)
            f_beta = self.get_cost(y, probs)
            return f_beta

        return train_fn, valid_fn

Example #6

Show file

def test_nan_guard_mode():
    # Also test that abs uint* and bool have c code.
    for dtype in ["uint8", "int64", "bool"]:
        x = tensor.vector(dtype=dtype)
        y = x + 1
        mode = NanGuardMode(nan_is_error=True, optimizer=mode_with_gpu.optimizer)
        f = theano.function([x], y, mode=mode)
        d = np.asarray([23, 7]).astype(dtype)
        assert np.allclose(f(d), d + 1)

Example #7

Show file

    def prepare_style(self, scale=1.0):
        """Called each phase of the optimization, process the style image according to the scale, then run it
        through the model to extract intermediate outputs (e.g. sem4_1) and turn them into patches.
        """
        style_image = skimage.transform.rescale(self.style_img_original,
                                                scale) * 255.0
        self.style_image = self.model.prepare_image(style_image)

        style_map = skimage.transform.rescale(self.style_map_original,
                                              scale) * 255.0
        self.style_map = style_map.transpose(
            (2, 0, 1))[np.newaxis].astype(np.float32)

        # Workaround for Issue #8. Not clear what this is caused by, NaN seems to happen in convolution node
        # on some OSX installations. https://github.com/alexjc/neural-doodle/issues/8
        if args.safe_mode:
            from theano.compile.nanguardmode import NanGuardMode
            flags = {
                'mode':
                NanGuardMode(nan_is_error=True,
                             inf_is_error=True,
                             big_is_error=False)
            }
        else:
            flags = {}

        # Compile a function to run on the GPU to extract patches for all layers at once.
        required_layers = ['conv' + l for l in self.style_layers
                           ] + ['map' + l for l in self.style_layers]
        extractor = theano.function(
            [self.model.tensor_img, self.model.tensor_map],
            self.extract_patches([
                self.model.tensor_outputs[l] for l in required_layers
            ]), **flags)
        result = extractor(self.style_image, self.style_map)

        # For each layer, build it from set of patches and their magnitude.
        def build(layer, prefix, name, patches, norms):
            l = self.model.network[prefix + layer]
            l.N = theano.shared(norms)
            l.W.set_value(patches)
            l.num_filters = patches.shape[0]
            print('  - {} layer {}: {} patches in {:,}kb.'.format(
                name, layer, patches.shape[0], patches.size // 1000))

        if args.style_weight > 0.0:
            result_nn = result[:len(self.style_layers) * 2]
            for layer, *data in zip(self.style_layers, result_nn[::2],
                                    result_nn[1::2]):
                build(layer, 'nn', 'Style', *data)

        if args.semantic_weight > 0.0:
            result_mm = result[len(self.style_layers) * 2:]
            for layer, *data in zip(self.style_layers, result_mm[::2],
                                    result_mm[1::2]):
                build(layer, 'mm', 'Semantic', *data)

Example #8

Show file

File: optimizers.py Project: elliottd/imagination

def rmsprop(lr, tparams, grads, inp, cost, opt_ret=None):
    """
    RMS prop optimizer

    :param lr:
    :param tparams:
    :param grads:
    :param inp:
    :param cost:
    :param opt_ret:
    :return f_grad_shared, f_update:
    """
    zipped_grads = [
        theano.shared(p.get_value() * numpy.float32(0.), name='%s_grad' % k)
        for k, p in iteritems(tparams)
    ]
    running_grads = [
        theano.shared(p.get_value() * numpy.float32(0.), name='%s_rgrad' % k)
        for k, p in iteritems(tparams)
    ]
    running_grads2 = [
        theano.shared(p.get_value() * numpy.float32(0.), name='%s_rgrad2' % k)
        for k, p in iteritems(tparams)
    ]

    zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
    rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
    rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g**2))
             for rg2, g in zip(running_grads2, grads)]

    f_grad_shared = theano.function(inp, [cost],
                                    updates=zgup + rgup + rg2up,
                                    profile=profile,
                                    mode=NanGuardMode(nan_is_error=True,
                                                      inf_is_error=True,
                                                      big_is_error=True))

    updir = [
        theano.shared(p.get_value() * numpy.float32(0.), name='%s_updir' % k)
        for k, p in iteritems(tparams)
    ]
    updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg**2 + 1e-4))
                 for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads,
                                            running_grads2)]
    param_up = [(p, p + udn[1])
                for p, udn in zip(itervalues(tparams), updir_new)]
    f_update = theano.function([lr], [],
                               updates=updir_new + param_up,
                               on_unused_input='ignore',
                               profile=profile)

    return f_grad_shared, f_update

Example #9

Show file

def compile(inputs, outputs, *args, mode=None, **kwargs):
    """
    Use as theano.function().
    TODO: Something useful with non-symbolic output ?
    
    Parameters
    ----------
    ...
    mode: In addition to the values accepted by `theano.function`, also accepts
       a string to make it easier to use `NanGuardMode`.
       If a string, a `NanGuardMode` object is created; the string should contain
       comma separated values indicating against which values we want to guard.
       For example, with the string ``"nan,inf"``, a `NanGuardMode` object is
       created with the options ``NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=False)``.
    """
    if not any(
            core.is_theano_object(arg) for arg in itertools.chain(
                [inputs, outputs], args, kwargs.values())):
        raise ValueError(
            "`shim.graph.function()` is undefined for non-symbolic outputs")
    if mode:
        from theano.compile.nanguardmode import NanGuardMode
        if isinstance(mode, NanGuardMode):
            kwargs['mode'] = mode
        elif isinstance(mode, str):
            nanguard = 'nan' in mode
            infguard = 'inf' in mode
            bigguard = 'big' in mode
            kwargs['mode'] = NanGuardMode(nan_is_error=nanguard,
                                          inf_is_error=infguard,
                                          big_is_error=bigguard)
    # Replace dict by OrderedDict to silence Theano warnings – since 3.7, dicts
    # now have guaranteed order
    if sys.version_info.major >= 3 and sys.version_info.minor >= 7:
        args = tuple(
            collections.OrderedDict(a) if type(a) is dict else a for a in args)
        kwargs = {
            k: collections.OrderedDict(v) if type(v) is dict else v
            for k, v in kwargs.items()
        }
    return core.theano.function(inputs, outputs, *args, **kwargs)

Example #10

Show file

 def buildvalidfun(self, model):
     self.tt.tick("compiling validation function")
     inps, out = self.autobuild_model(model,
                                      *self.traindata,
                                      _trainmode=False)
     if issequence(out):
         out = out[0]
     metrics, newinp = self.buildlosses(out, self.validators)
     inputs = newinp if newinp is not None else inps
     ret = None
     if len(metrics) > 0:
         ret = theano.function(inputs=[x.d
                                       for x in inputs] + [self.goldvar],
                               outputs=metrics,
                               mode=NanGuardMode(nan_is_error=True,
                                                 inf_is_error=False,
                                                 big_is_error=False))
     else:
         self.tt.msg("NO VALIDATION METRICS DEFINED, RETURNS NONE")
     self.tt.tock("validation function compiled")
     return ret

Example #11

Show file

File: test_nanguardmode.py Project: shivamvats/Theano

def test_NanGuardMode():
    """
    Tests if NanGuardMode is working by feeding in numpy.inf and numpy.nans
    intentionally. A working implementation should be able to capture all
    the abnormalties.
    """
    x = T.matrix()
    w = theano.shared(numpy.random.randn(5, 7).astype(theano.config.floatX))
    y = T.dot(x, w)

    fun = theano.function([x],
                          y,
                          mode=NanGuardMode(nan_is_error=True,
                                            inf_is_error=True))
    a = numpy.random.randn(3, 5).astype(theano.config.floatX)
    infa = numpy.tile(
        (numpy.asarray(100.)**1000000).astype(theano.config.floatX), (3, 5))
    nana = numpy.tile(
        numpy.asarray(numpy.nan).astype(theano.config.floatX), (3, 5))
    biga = numpy.tile(numpy.asarray(1e20).astype(theano.config.floatX), (3, 5))

    work = [False, False, False]

    fun(a)  # normal values
    try:
        fun(infa)  # INFs
    except AssertionError:
        work[0] = True
    try:
        fun(nana)  # NANs
    except AssertionError:
        work[1] = True
    try:
        fun(biga)  # big values
    except AssertionError:
        work[2] = True

    if not (work[0] and work[1] and work[2]):
        raise AssertionError("NanGuardMode not working.")

Example #12

Show file

    def setup_encode(self):

        # dimensions: (batch, time, 12)
        chord_types = T.btensor3()
        # dimensions: (batch, time)
        chord_roots = T.imatrix()
        # dimensions: (batch, time)
        relative_posns = [T.imatrix() for _ in self.encodings]
        # dimesions: (batch, time, output_data)
        encoded_melodies = [T.btensor3() for _ in self.encodings]
        n_batch, n_time = chord_roots.shape

        all_activations = []
        for encoding, enc_lstmstack, encoded_melody, relative_pos in zip(
                self.encodings, self.enc_lstmstacks, encoded_melodies,
                relative_posns):
            activations = enc_lstmstack.do_preprocess_scan(
                timestep=T.tile(T.arange(n_time), (n_batch, 1)),
                relative_position=relative_pos,
                cur_chord_type=chord_types,
                cur_chord_root=chord_roots,
                cur_input=encoded_melody,
                deterministic_dropout=True)
            all_activations.append(activations)
        reduced_activations = functools.reduce((lambda x, y: x + y),
                                               all_activations)
        strengths, vects = self.qman.get_strengths_and_vects(
            reduced_activations)

        self.encode_fun = theano.function(
            inputs=[chord_types, chord_roots] + relative_posns +
            encoded_melodies,
            outputs=[strengths, vects],
            allow_input_downcast=True,
            mode=(NanGuardMode(nan_is_error=True,
                               inf_is_error=True,
                               big_is_error=True) if self.nanguard else None))

Example #13

Show file

 def buildvalidfun(self, model, batsize):
     self.tt.tick("validation - autobuilding")
     inps, outps = self.autobuild_model(model,
                                        *self.traindata,
                                        _trainmode=False,
                                        _batsize=batsize)
     assert (len(outps) == 1)
     outp = outps[0]
     self.tt.tock("validation - autobuilt")
     self.tt.tick("compiling validation function")
     metrics, newinp = self.buildlosses(outp, self.validators)
     inputs = newinp if newinp is not None else inps
     ret = None
     if len(metrics) > 0:
         ret = theano.function(inputs=[x.d
                                       for x in inputs] + [self.goldvar],
                               outputs=metrics,
                               mode=NanGuardMode(nan_is_error=True,
                                                 inf_is_error=False,
                                                 big_is_error=True))
     else:
         self.tt.msg("NO VALIDATION METRICS DEFINED, RETURNS NONE")
     self.tt.tock("validation function compiled")
     return ret

Example #14

Show file

File: variational_rbm.py Project: srinivengala/neural-variational-inference

    def __init__(
        self, n_dim, n_out, n_chan=1, n_superbatch=12800, opt_alg='adam',
        opt_params={'lr': 1e-3, 'b1': 0.9, 'b2': 0.99}
    ):
        """RBM constructor.
        Defines the parameters of the model along with
        basic operations for inferring hidden from visible (and vice-versa),
        as well as for performing CD updates.
        """
        self.numpy_rng = np.random.RandomState(1234)
        self.theano_rng = RandomStreams(self.numpy_rng.randint(2 ** 30))
        self.create_mat = lambda x, y: self.numpy_rng.normal(0, 0.01, (x, y)).astype(theano.config.floatX)

        # save config
        n_batch = opt_params.get('nb')
        self.n_hidden = 100
        self.n_visible = n_chan*n_dim*n_dim  # size of visible layer
        self.n_batch = n_batch
        self.n_qk = 10 # num of components in MoB used of q
        self.n_mc = 30 # num of monte carlo samples from each MoB component

        self.n_dim = n_dim
        self.n_out = n_out
        self.n_superbatch = n_superbatch
        self.alg = opt_alg

        # set up general RBM methods
        AbstractRBM.__init__(self, n_dim, n_chan, n_out, n_superbatch, opt_alg, opt_params) 

        # create updates
        alpha = T.scalar(dtype=theano.config.floatX)  # learning rate

        # save config
        self.n_class = 2
        self.n_dim = n_dim
        self.n_out = n_out


        self.n_components = self.n_qk
        self.n_samples = self.n_mc
        self.n_tot_samples = self.n_samples*self.n_components


        # create input variables
        D, idx1, idx2 = self.create_inputs()

        # create model
        self.network = self.create_model()

        # create objectives
        loglik, plik = self.create_objectives(D)

        # create gradients
        dL_Theta, dE_Theta, dlogZ_Theta, dL_Phi = self.create_gradients()
        grads = dL_Theta, dE_Theta, dlogZ_Theta, dL_Phi

        # create updates
        uL_Theta, uL_Phi, avg_updates, avg_Theta_updates \
          = self.create_updates(grads, None, alpha, opt_alg, opt_params)
      
        # logF_avg, Z_avg = self.create_llik_estimate(D)
        
        mode = NanGuardMode(nan_is_error=True, inf_is_error=False, big_is_error=False)
        mode = None

        common_update1 = OrderedDict(avg_updates.items() + uL_Phi.items())
        self.train_q = theano.function([idx1, idx2], [loglik, plik], 
          updates=common_update1, mode=mode,
          givens={D: self.train_set_x[idx1:idx2]})

        common_update2 = OrderedDict(avg_Theta_updates.items() + uL_Theta.items())
        self.train_p = theano.function([idx1, idx2], [loglik, plik], 
            updates=common_update2, mode=mode, on_unused_input='warn',
            givens={D: self.train_set_x[idx1:idx2]})
        # self.llik = theano.function([D], logF_avg - T.log(Z_avg), mode=mode)

        common_update3 = OrderedDict(common_update1.items() + common_update2.items())
        self.train = theano.function([idx1, idx2], [loglik, plik], 
            updates=common_update3, mode=mode,
            givens={D: self.train_set_x[idx1:idx2]})

Example #15

Show file

    def setup_train(self):

        # dimensions: (batch, time, 12)
        chord_types = T.btensor3()

        # dimensions: (batch, time)
        chord_roots = T.imatrix()

        # dimensions: (batch, time)
        relative_posns = [T.imatrix() for _ in self.encodings]

        # dimesions: (batch, time, output_data)
        encoded_melodies = [T.btensor3() for _ in self.encodings]

        # dimesions: (batch, time)
        correct_notes = T.imatrix()

        n_batch, n_time = chord_roots.shape

        def _build(det_dropout):
            all_out_probs = []
            for encoding, lstmstack, encoded_melody, relative_pos in zip(self.encodings, self.lstmstacks, encoded_melodies, relative_posns):
                activations = lstmstack.do_preprocess_scan( timestep=T.tile(T.arange(n_time), (n_batch,1)) ,
                                                            relative_position=relative_pos,
                                                            cur_chord_type=chord_types,
                                                            cur_chord_root=chord_roots,
                                                            last_output=T.concatenate([T.tile(encoding.initial_encoded_form(), (n_batch,1,1)),
                                                                                encoded_melody[:,:-1,:] ], 1),
                                                            deterministic_dropout=det_dropout)

                out_probs = encoding.decode_to_probs(activations, relative_pos, self.bounds.lowbound, self.bounds.highbound)
                all_out_probs.append(out_probs)
            reduced_out_probs = functools.reduce((lambda x,y: x*y), all_out_probs)
            if self.normalize_artic_only:
                non_artic_probs = reduced_out_probs[:,:,:2]
                artic_probs = reduced_out_probs[:,:,2:]
                non_artic_sum = T.sum(non_artic_probs, 2, keepdims=True)
                artic_sum = T.sum(artic_probs, 2, keepdims=True)
                norm_artic_probs = artic_probs*(1-non_artic_sum)/artic_sum
                norm_out_probs = T.concatenate([non_artic_probs, norm_artic_probs], 2)
            else:
                normsum = T.sum(reduced_out_probs, 2, keepdims=True)
                normsum = T.maximum(normsum, constants.EPSILON)
                norm_out_probs = reduced_out_probs/normsum
            return Encoding.compute_loss(norm_out_probs, correct_notes, True)

        train_loss, train_info = _build(False)
        updates = Adam(train_loss, self.get_optimize_params(), lr=self.learning_rate_var)

        eval_loss, eval_info = _build(True)

        self.loss_info_keys = list(train_info.keys())

        self.update_fun = theano.function(
            inputs=[chord_types, chord_roots, correct_notes] + relative_posns + encoded_melodies,
            outputs=[train_loss]+list(train_info.values()),
            updates=updates,
            allow_input_downcast=True,
            on_unused_input='ignore',
            mode=(NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) if self.nanguard else None))

        self.eval_fun = theano.function(
            inputs=[chord_types, chord_roots, correct_notes] + relative_posns + encoded_melodies,
            outputs=[eval_loss]+list(eval_info.values()),
            allow_input_downcast=True,
            on_unused_input='ignore',
            mode=(NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) if self.nanguard else None))

Example #16

Show file

args = parser.parse_args()
if args.save != default_save and not args.overwrite and os.path.isfile(
        'interim/%s_model.pkl' % args.save):
    raise Exception(
        'A model with this name was already saved. Provide the --overwrite flag (-o) when trying to --save over an existing model.'
    )

import numpy as np
import scipy
import theano
from theano import tensor as T
from six.moves import cPickle
import sys
sys.setrecursionlimit(100000)
from theano.compile.nanguardmode import NanGuardMode
ngm = NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=False)
from sklearn.svm import SVC

from vcd import image_iter, models, util

# Configuration
cfg = {

    # General
    'patch_shape': (33, 33),
    'aug_noise_std': 0.05,
    'train_test_split':
    0.8,  # Proportion of dataset to use for the train+validation set if the test set is enabled.
    'train_valid_split':
    0.75,  # Proportion of train+validation set to use for the training set.
    # Note that the final training set size is (label_count * train_test_split * train_valid_split).

Example #17

Show file

def main():
    # Al inici es recuperen els valor del fitxer de configuració
    trainingSize, validationSize, batchSize, testDataSize, nLayer, num_epochs, getFromFile = getConfigData(
    )

    printAndSave("Loading data...", dt=False)

    # Segons l'escollit al fitxer de configuració les metadades
    # es generen o obtenen d'un fitxer
    if (getFromFile):
        printAndSave("Getting metadata from file...", dt=False)
        getMetadata = getMetadataFromFile
    else:
        printAndSave("Calculating metadata...", dt=False)
        getMetadata = calculateMetadata

    # S'obtenen les dades tant de les coleccions d'entrada com de les etiquetes per validar
    train, trainTargets, val, valTargets, test, \
    testTargets, metadata, colsToRemove = \
    getTrainingTestLists( traiSize = trainingSize,
                            valSize = validationSize,
                            testSize = testDataSize,
                            getMetadata =  getMetadata)

    # Es preparen les variables de theano per a
    # l'entrada i les etiquetes que s'utilitzen
    # per validar els reusltats
    input_var = T.matrix('inputs')
    target_var = T.ivector('targets')

    # Es crea la FNN
    network = buid_MLP(input_var=input_var,
                       depth=nLayer,
                       drop_input=.2,
                       drop_hidden=.5,
                       nCols=len(metadata))

    # S'obté la predicció a partir de la sorida de la MLP
    prediction = lasagne.layers.get_output(network)
    # Expressió per la perdua
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()

    # Es creen les expressions d'update per modificar els
    # parametres en cada pas del training
    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.momentum(loss,
                                       params,
                                       learning_rate=0.01,
                                       momentum=0.9)

    # S'obté la predicció a partir de la sorida de la MLP per la validació i testing,
    # a diferència de l'anterior aquí es desactiven les capes de dropout passant a
    # través de tota la xarxa amb el mode deterministic a True
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(
        test_prediction, target_var)
    test_loss = test_loss.mean()

    # Expressió per la precissió de la classificació es realitza a
    # partir de la predicció obtinguda al a sortida del MLP
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    # Compilar la funcio executant un pas de training mitjancant un petit
    # paquet de dades, es retornara la perdua
    # S'activa el mode NanGuardMode per tal d'obtenir un error en cas de
    # nombres massa grans això val per comprobar la validesa en la
    # normalització de les dades
    train_fn = theano.function([input_var, target_var],
                               loss,
                               updates=updates,
                               name="TrainingFunc",
                               mode=NanGuardMode(nan_is_error=True,
                                                 inf_is_error=True,
                                                 big_is_error=True))

    # Es recuperarà la pèrdua i precissió,
    # s'utilitza tant en la validació com en el test
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc],
                             name="ValidationFunc")

    # S'inicialitza l'entrenament
    printAndSave("*" * 53, dt=False)
    printAndSave("Starting training...", dt=False)
    training_start_time = time.time()

    for epoch in range(num_epochs):
        start_time = time.time()
        # Per cada iteració es fa una execució completa de les dades d'entrenament
        train_err = 0
        train_batches = 0
        for batch in iterate_minibatches(train, trainTargets, batchSize,
                                         metadata, colsToRemove):
            inputs, targets = batch
            tmp = train_fn(inputs, targets)
            train_err += tmp
            train_batches += 1

        # Validació de la iteració
        val_err = 0
        val_acc = 0
        val_batches = 0
        for batch in iterate_minibatches(val, valTargets, batchSize, metadata,
                                         colsToRemove):
            inputs, targets = batch
            err, acc = val_fn(inputs, targets)
            val_err += err
            val_acc += acc
            val_batches += 1

        # Impressió de resultats de la iteració
        printAndSave("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs,
            time.time() - start_time),
                     dt=False)
        printAndSave("  training loss:\t\t{:.6f}".format(train_err /
                                                         train_batches),
                     dt=False)
        printAndSave("  validation loss:\t\t{:.6f}".format(val_err /
                                                           val_batches),
                     dt=False)
        printAndSave("  validation accuracy:\t\t{:.2f} %".format(
            val_acc / val_batches * 100),
                     dt=False)

        # #######################################
        # Activar per calcular l'error i precisió
        # amb les dades de test en cada epoch
        # #######################################
        # Es realitza el test
        # start_time = time.time()
        # test_err = 0
        # test_acc = 0
        # test_batches = 0
        # for batch in iterate_minibatches(test,testTargets, batchSize, metadata, colsToRemove):
        #     inputs, targets = batch
        #     err, acc = val_fn(inputs, targets)
        #     test_err += err
        #     test_acc += acc
        #     test_batches += 1
        # # Impressió de resultats del test
        # printAndSave("Final results:",dt=False)
        # printAndSave("  test loss:\t\t\t{:.6f}".format(test_err / test_batches),dt=False)
        # printAndSave("  test accuracy:\t\t{:.2f} %".format(
        #     test_acc / test_batches * 100),dt=False)
        # printAndSave("Tests in {}".format(time.time()-start_time),dt=False)

    printAndSave("Training in {}".format(time.time() - training_start_time),
                 dt=False)

    # Es realitza el test
    start_time = time.time()
    test_err = 0
    test_acc = 0
    test_batches = 0
    for batch in iterate_minibatches(test, testTargets, batchSize, metadata,
                                     colsToRemove):
        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        test_err += err
        test_acc += acc
        test_batches += 1

    # Impressió de resultats del test
    printAndSave("Final results:", dt=False)
    printAndSave("  test loss:\t\t\t{:.6f}".format(test_err / test_batches),
                 dt=False)
    printAndSave("  test accuracy:\t\t{:.2f} %".format(test_acc /
                                                       test_batches * 100),
                 dt=False)
    printAndSave("Tests in {}".format(time.time() - start_time), dt=False)

Example #18

Show file

File: model.py Project: ashik94vc/scene-detection

    def __init__(self,parameters=None):
        X = tensor.tensor4()
        Y = tensor.lvector()
        self.params = parameters
        if parameters == None:
            W1 = theano.shared(np.random.randn(32,3,5,5).astype(theano.config.floatX)*0.01)
            b1 = theano.shared(np.zeros(32,).astype(theano.config.floatX))
            W2 = theano.shared(np.random.randn(64,32,5,5).astype(theano.config.floatX)*0.01)
            b2 = theano.shared(np.zeros(64,).astype(theano.config.floatX))
            W3 = theano.shared(np.random.randn(128,64,5,5).astype(theano.config.floatX)*0.01)
            b3 = theano.shared(np.zeros(128,).astype(theano.config.floatX))
            W5 = theano.shared(np.random.randn(28800,1084).astype(theano.config.floatX)*0.01)
            # b5 = theano.shared(np.zeros(64*9*9,))
            W6 = theano.shared(np.random.randn(1084,2).astype(theano.config.floatX)*0.01)
            b6 = theano.shared(np.zeros(2,).astype(theano.config.floatX))
        else:
            W1 = theano.shared(parameters["W1"])
            b1 = theano.shared(parameters["b1"])
            W2 = theano.shared(parameters["W2"])
            b2 = theano.shared(parameters["b2"])
            W3 = theano.shared(parameters["W3"])
            b3 = theano.shared(parameters["b3"])
            W5 = theano.shared(parameters["W5"])
            W6 = theano.shared(parameters["W6"])
            b6 = theano.shared(parameters["b6"])

        layer_1 = conv2d(X,W1)
        layer_1_pool = pool_2d(layer_1,(2,2),ignore_border=True)
        layer_1_output = tensor.tanh(layer_1_pool+b1.dimshuffle('x', 0, 'x', 'x'))

        layer_2 = conv2d(layer_1_output, W2)
        layer_2_pool = pool_2d(layer_2,(2,2),ignore_border=True)
        layer_2_output = tensor.tanh(layer_2_pool+b2.dimshuffle('x', 0, 'x', 'x'))

        layer_3 = conv2d(layer_2_output, W3)
        layer_3_pool = pool_2d(layer_3,(2,2),ignore_border=True)
        layer_3_output = tensor.tanh(layer_3_pool+b3.dimshuffle('x', 0, 'x', 'x'))

        layer_4 = layer_3_output.flatten(2)

        layer_5 = tensor.dot(layer_4,W5)
        layer_5_output = layer_5.tanh()

        layer_6 = tensor.dot(layer_5_output, W6) + b6

        #softmax instead of sigmoid.
        layer_6_output = softmax(layer_6) + 0.0000001
        output = tensor.argmax(layer_6_output,axis=1)
        # cost = ((Y-layer_6_output)**2).sum()

        # Negative Log Likelihood
        cost = -tensor.mean(tensor.log(layer_6_output)[tensor.arange(Y.shape[0]), Y], dtype=theano.config.floatX)

        error = tensor.mean(tensor.neq(output, Y))

        parameters = [W1,b1,W2,b2,W3,b3,W5,W6,b6]

        updates = self.GradientDescent(cost,parameters)

        params = {"W1": W1, "b1": b1, "W2": W2, "b2": b2, "W3": W3, "b3": b3, "W5": W5, "W6": W6, "b6": b6}
        self.parameters = theano.function([],params)
        mode = NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True).excluding('local_elemwise_fusion','inplace')
        self.train = theano.function([X, Y], cost,updates=updates, mode=mode)
        self.test = theano.function([X, Y], error)
        self.predict = theano.function([X],output)

Example #19

Show file

    def __init__(self,
                 K,
                 vocab_size,
                 num_chars,
                 W_init,
                 S_init,
                 nhidden,
                 embed_dim,
                 dropout,
                 train_emb,
                 sub_dim,
                 use_feat,
                 gating_fn,
                 save_attn=False):
        self.nhidden = nhidden
        self.embed_dim = embed_dim
        self.dropout = dropout
        self.train_emb = train_emb
        self.sub_dim = sub_dim
        self.learning_rate = LEARNING_RATE
        self.num_chars = num_chars
        self.use_feat = use_feat
        self.save_attn = save_attn
        self.gating_fn = gating_fn

        self.use_subs = self.sub_dim != 0
        if W_init is None:
            W_init = lasagne.init.GlorotNormal().sample(
                (vocab_size, self.embed_dim))
        # W_init = lasagne.init.GlorotNormal().sample((vocab_size, self.embed_dim))
        doc_var, query_var, cand_var = T.itensor3('doc'), T.itensor3('quer'), \
                T.wtensor3('cand')
        docmask_var, qmask_var, candmask_var = T.bmatrix('doc_mask'), T.bmatrix('q_mask'), \
                T.bmatrix('c_mask')
        target_var = T.ivector('ans')
        feat_var = T.imatrix('feat')
        doc_toks, qry_toks = T.imatrix('dchars'), T.imatrix('qchars')
        tok_var, tok_mask = T.imatrix('tok'), T.bmatrix('tok_mask')
        cloze_var = T.ivector('cloze')
        self.inps = [
            doc_var, doc_toks, query_var, qry_toks, cand_var, target_var,
            docmask_var, qmask_var, tok_var, tok_mask, candmask_var, feat_var,
            cloze_var
        ]

        self.predicted_probs, predicted_probs_val, self.network, W_emb, attentions = (
            self.build_network(K, vocab_size, W_init, S_init))

        self.loss_fn = T.nnet.categorical_crossentropy(self.predicted_probs,
                                                       target_var).mean()
        self.eval_fn = lasagne.objectives.categorical_accuracy(
            self.predicted_probs, target_var).mean()

        loss_fn_val = T.nnet.categorical_crossentropy(predicted_probs_val,
                                                      target_var).mean()
        eval_fn_val = lasagne.objectives.categorical_accuracy(
            predicted_probs_val, target_var).mean()

        self.params = L.get_all_params(self.network, trainable=True)

        updates = lasagne.updates.adam(self.loss_fn,
                                       self.params,
                                       learning_rate=self.learning_rate)

        self.train_fn = theano.function(
            self.inps, [self.loss_fn, self.eval_fn, self.predicted_probs],
            updates=updates,
            on_unused_input='ignore')
        self.validate_fn = theano.function(
            self.inps,
            [loss_fn_val, eval_fn_val, predicted_probs_val] + attentions,
            mode=NanGuardMode(nan_is_error=True,
                              inf_is_error=True,
                              big_is_error=True),
            on_unused_input='ignore')

Example #20

Show file

File: train_loop.py Project: ganow/PixelVAE

def train_loop(inputs,
               cost,
               train_data,
               times,
               prints=None,
               inject_total_iters=False,
               test_data=None,
               callback=None,
               optimizer=lasagne.updates.adam,
               save_params=False,
               nan_guard=False):

    params = lib.search(cost, lambda x: hasattr(x, 'param'))
    lib.print_params_info(params)

    grads = T.grad(cost, wrt=params, disconnected_inputs='warn')

    grads = [T.clip(g, lib.floatX(-1), lib.floatX(1)) for g in grads]

    updates = optimizer(grads, params)

    if prints is None:
        prints = [('cost', cost)]
    else:
        prints = [('cost', cost)] + prints

    print "Compiling train function..."
    if nan_guard:
        from theano.compile.nanguardmode import NanGuardMode
        mode = NanGuardMode(nan_is_error=True,
                            inf_is_error=True,
                            big_is_error=True)
    else:
        mode = None
    train_fn = theano.function(inputs, [p[1] for p in prints],
                               updates=updates,
                               on_unused_input='warn',
                               mode=mode)

    print "Compiling eval function..."
    eval_fn = theano.function(inputs, [p[1] for p in prints],
                              on_unused_input='warn')

    print "Training!"

    total_iters = 0
    total_seconds = 0.
    last_print = 0
    last_gen = 0

    if len(times) >= 4:
        gen_every = times[3]
    else:
        gen_every = times[1]

    if len(times) >= 5:
        early_stop = times[4]
        if len(times) >= 6:
            early_stop_min = times[5]
        else:
            early_stop_min = 0
    else:
        early_stop = None
        early_stop_min = None

    best_test_cost = np.inf
    best_test_cost_iter = 0.

    all_outputs = []
    all_stats = []
    for epoch in itertools.count():

        generator = train_data()
        while True:
            try:
                inputs = generator.__next__()
            except StopIteration:
                break

            if inject_total_iters:
                inputs = [np.int32(total_iters)] + list(inputs)

            start_time = time.time()
            outputs = train_fn(*inputs)
            total_seconds += time.time() - start_time
            total_iters += 1

            all_outputs.append(outputs)

            if total_iters == 1:
                try:  # This only matters on Ishaan's computer
                    import experiment_tools
                    experiment_tools.register_crash_notifier()
                except ImportError:
                    pass

            if (times[0]=='iters' and total_iters-last_print == times[1]) or \
                (times[0]=='seconds' and total_seconds-last_print >= times[1]):

                mean_outputs = np.array(all_outputs).mean(axis=0)

                if test_data is not None:
                    if inject_total_iters:
                        test_outputs = [
                            eval_fn(np.int32(total_iters), *inputs)
                            for inputs in test_data()
                        ]
                    else:
                        test_outputs = [
                            eval_fn(*inputs) for inputs in test_data()
                        ]
                    test_mean_outputs = np.array(test_outputs).mean(axis=0)

                stats = collections.OrderedDict()
                stats['epoch'] = epoch
                stats['iters'] = total_iters
                for i, p in enumerate(prints):
                    stats['train ' + p[0]] = mean_outputs[i]
                if test_data is not None:
                    for i, p in enumerate(prints):
                        stats['test ' + p[0]] = test_mean_outputs[i]
                stats['secs'] = total_seconds
                stats['secs/iter'] = total_seconds / total_iters

                if test_data != None and (stats['test cost'] < best_test_cost
                                          or
                                          (early_stop_min != None
                                           and total_iters <= early_stop_min)):
                    best_test_cost = stats['test cost']
                    best_test_cost_iter = total_iters

                print_str = ""
                for k, v in stats.items():
                    if isinstance(v, int):
                        print_str += "{}:{}\t".format(k, v)
                    else:
                        print_str += "{}:{:.4f}\t".format(k, v)
                print print_str[:-1]  # omit the last \t

                all_stats.append(stats)

                all_outputs = []
                last_print += times[1]

            if (times[0]=='iters' and total_iters-last_gen==gen_every) or \
                (times[0]=='seconds' and total_seconds-last_gen >= gen_every):
                tag = "iters{}_time{}".format(total_iters, total_seconds)
                if callback is not None:
                    callback(tag)
                if save_params:
                    lib.save_params('params_{}.pkl'.format(tag))

                last_gen += gen_every

            if (times[0]=='iters' and total_iters == times[2]) or \
                (times[0]=='seconds' and total_seconds >= times[2]) or \
                (test_data != None and early_stop != None and total_iters > (3*early_stop) and (total_iters-best_test_cost_iter) > early_stop):

                if (test_data != None and early_stop != None and total_iters >
                    (3 * early_stop)
                        and (total_iters - best_test_cost_iter) > early_stop):
                    print "Early stop! Best test cost was {} at iter {}".format(
                        best_test_cost, best_test_cost_iter)

                print "Done!"

                try:  # This only matters on Ishaan's computer
                    import experiment_tools
                    experiment_tools.send_sms("done!")
                except ImportError:
                    pass

                return all_stats

Example #21

Show file

y = net.fprop(x**2 / 2.)
cost = y.mean()

parameters = net.params

from blocks.algorithms import Scale
from blocks.algorithms import GradientDescent
optimizer = Scale(0.)

print "Calling Algorithm"
algorithm = GradientDescent(
    #gradients=grads, parameters=parameters,
    cost=cost,
    parameters=parameters,
    step_rule=optimizer)

from theano.compile.nanguardmode import NanGuardMode
fun = theano.function(inputs=[x],
                      outputs=[cost],
                      updates=algorithm.updates,
                      mode=NanGuardMode(nan_is_error=True,
                                        inf_is_error=True,
                                        big_is_error=True))
#npx = getnumpyf32((5, batch_size, channels,)+image_size)
npx = np.random.random((5, 32, 50)).astype(np.float32)
out = fun(npx)
#for i,v in enumerate(parameters):
#    if 'U' in v.name:
#        theano.printing.debugprint(algorithm.updates[i][1])
#        break

Example #22

Show file

File: nmt_uni.py Project: yifanjun233/Real-time-translator

def build_model(tparams, options):
    opt_ret = dict()

    trng = RandomStreams(1234)
    use_noise = theano.shared(numpy.float32(0.))

    # description string: #words x #samples
    x = tensor.matrix('x', dtype='int64')
    x_mask = tensor.matrix('x_mask', dtype='float32')
    y = tensor.matrix('y', dtype='int64')
    y_mask = tensor.matrix('y_mask', dtype='float32')

    # time_steps
    n_timesteps = x_mask.shape[0]
    n_timesteps_trg = y_mask.shape[0]
    n_samples = x_mask.shape[1]

    # word embedding for forward rnn (source)
    emb = tparams['Wemb'][x.flatten()]
    emb = emb.reshape([n_timesteps, n_samples, options['dim_word']])
    proj = get_layer(options['encoder'])[1](tparams,
                                            emb,
                                            options,
                                            prefix='encoder',
                                            mask=x_mask)

    # for reverse RNN: bi-directional RNN encoder
    if options.get('birnn', False):
        xr = x[::-1]
        xr_mask = x_mask[::-1]

        embr = tparams['Wemb'][xr.flatten()]
        embr = embr.reshape([n_timesteps, n_samples, options['dim_word']])
        projr = get_layer(options['encoder'])[1](tparams,
                                                 embr,
                                                 options,
                                                 prefix='encoder_r',
                                                 mask=xr_mask)
        ctx = concatenate([proj[0], projr[0][::-1]], axis=proj[0].ndim - 1)

    else:
        ctx = proj[0]  # context vectors

    # mean of the context (across time) will be used to initialize decoder rnn
    ctx_mean = (ctx * x_mask[:, :, None]).sum(0) / x_mask.sum(0)[:, None]

    # or you can use the last state of forward + backward encoder rnns
    # ctx_mean = concatenate([proj[0][-1], projr[0][-1]], axis=proj[0].ndim-2)

    # initial decoder state
    init_state = get_layer('ff')[1](tparams,
                                    ctx_mean,
                                    options,
                                    prefix='ff_state',
                                    activ='tanh')

    # word embedding (target), we will shift the target sequence one time step
    # to the right. This is done because of the bi-gram connections in the
    # readout and decoder rnn. The first target will be all zeros and we will
    # not condition on the last output.
    emb = tparams['Wemb_dec'][y.flatten()]
    emb = emb.reshape([n_timesteps_trg, n_samples, options['dim_word']])
    emb_shifted = tensor.zeros_like(emb)
    emb_shifted = tensor.set_subtensor(emb_shifted[1:], emb[:-1])
    emb = emb_shifted

    # decoder - pass through the decoder conditional gru with attention
    proj = get_layer(options['decoder'])[1](tparams,
                                            emb,
                                            options,
                                            prefix='decoder',
                                            mask=y_mask,
                                            context=ctx,
                                            context_mask=x_mask,
                                            one_step=False,
                                            init_state=init_state)
    # hidden states of the decoder gru
    proj_h = proj[0]

    # weighted averages of context, generated by attention module
    ctxs = proj[1]

    # weights (alignment matrix)
    opt_ret['dec_alphas'] = proj[2]  # --> to show the attenion weights

    # compute word probabilities
    logit_lstm = get_layer('ff')[1](tparams,
                                    proj_h,
                                    options,
                                    prefix='ff_logit_lstm',
                                    activ='linear')
    logit_prev = get_layer('ff')[1](tparams,
                                    emb,
                                    options,
                                    prefix='ff_logit_prev',
                                    activ='linear')
    logit_ctx = get_layer('ff')[1](tparams,
                                   ctxs,
                                   options,
                                   prefix='ff_logit_ctx',
                                   activ='linear')
    logit = tensor.tanh(logit_lstm + logit_prev + logit_ctx)

    # dropout (noise)
    if options['use_dropout']:
        logit = dropout_layer(logit, use_noise, trng)
    logit = get_layer('ff')[1](tparams,
                               logit,
                               options,
                               prefix='ff_logit',
                               activ='linear')
    logit_shp = logit.shape
    probs = tensor.nnet.softmax(
        logit.reshape([logit_shp[0] * logit_shp[1], logit_shp[2]]))

    # compute the cost (negative loglikelihood)
    y_flat = y.flatten()
    y_flat_idx = tensor.arange(y_flat.shape[0]) * options['n_words'] + y_flat

    cost = -tensor.log(probs.flatten()[y_flat_idx])
    cost = cost.reshape([y.shape[0], y.shape[1]])
    cost = (cost * y_mask).sum(0)

    # we will build an additional function for computing costs
    f_cost = theano.function([ctx, x_mask, y, y_mask],
                             cost,
                             mode=NanGuardMode(nan_is_error=True,
                                               inf_is_error=True,
                                               big_is_error=True))
    return trng, use_noise, x, x_mask, y, y_mask, opt_ret, cost, f_cost

Example #23

Show file

def trainer(
    r=5,
    dim_word=1000,
    dim=1000,
    trainpath=[
        '../datasets/simQA_test.txt', '../datasets/cand_ent_test.txt',
        '../datasets/cand_rel_test.txt'
    ],
    validpath=[
        '../datasets/simQA_test.txt', '../datasets/cand_ent_test.txt',
        '../datasets/cand_rel_test.txt'
    ],
    dict_character='../datasets/dict/dict.pkl',
    dict_relation='../datasets/dict/dict.pkl',
    dict_word='../datasets/dict/dict.pkl',
    relation_pattern='RWC',
    batch_size=16,
    valid_batch_size=16,
    maxlen=200,
    learning_rate=0.001,
    max_epochs=10,
    dispFreq=100,
    saveFreq=1000,
    validFreq=1000,
    saveto='model.npz',
    overwrite=True,
    patience=10,
    predicate_num=150,
    lstm_end='average',
    lstm_layers=2,
    word=False,
    word_dict_num=5000,
    relation_dict_num=8000,
    character_dict_num=200,
    cross=True,
    one_layer=False,
    en_decode_type='ff',
    qu_split=False,
    structure_number=3,
    en_pooling_type='average',  # only for pooling question when entity decoding
    relation_attention='target_attention'):
    # theano.config.warn_float64 = "raise"
    model_options = locals().copy()
    train = TextIterator(trainpath[0],
                         trainpath[1],
                         trainpath[2],
                         dict_character,
                         dict_word,
                         dict_relation,
                         predicate_num=predicate_num,
                         batch_size=model_options['batch_size'],
                         maxlen=model_options['maxlen'])

    valid = TextIterator(validpath[0],
                         validpath[1],
                         validpath[2],
                         dict_character,
                         dict_word,
                         dict_relation,
                         predicate_num=predicate_num,
                         batch_size=model_options['batch_size'],
                         maxlen=model_options['maxlen'])

    InitParamsIns = InitParams()
    tparams = InitParamsIns.inittparams(model_options)
    ModelIns = MODEL()
    print 'Build Train and Valid Model...',
    x, x_mask, y, y_mask, z_rel, z_mask_rel, z_wor, chz_mask_wor, z_cha, chz_mask_cha, t, cost, errors = ModelIns.BuildTrainModel(
        tparams, model_options)
    x_v, x_mask_v, y_v, y_mask_v, z_rel_v, z_mask_rel_v, z_wor_v, chz_mask_wor_v, z_cha_v, chz_mask_cha_v, t_v, errors_v, en_errors_v, pr_errors_v = ModelIns.BuildValidTestModel(
        tparams, model_options)
    print 'Done'
    inputs_v = [
        x_v, x_mask_v, y_v, y_mask_v, z_rel_v, z_mask_rel_v, z_wor_v,
        chz_mask_wor_v, z_cha_v, chz_mask_cha_v, t_v
    ]
    inputs = [
        x, x_mask, y, y_mask, z_rel, z_mask_rel, z_wor, chz_mask_wor, z_cha,
        chz_mask_cha, t
    ]

    # alpha=[pr_alpha]
    outputs = [cost, errors]
    optputs_v = [errors_v, en_errors_v, pr_errors_v]
    func_ctx = theano.function(inputs,
                               outputs,
                               on_unused_input='ignore',
                               allow_input_downcast=True,
                               mode=NanGuardMode(nan_is_error=True,
                                                 inf_is_error=True,
                                                 big_is_error=True))
    func_valid_error = theano.function(inputs_v,
                                       optputs_v,
                                       on_unused_input='ignore',
                                       allow_input_downcast=True)

    # func_p = theano.function(inputs,p,on_unused_input = 'ignore',allow_input_downcast=True)
    # func_alpha = theano.function(inputs,pr_alpha,on_unused_input = 'ignore',allow_input_downcast=True)
    print 'Building grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    print 'Done'

    print 'Building optimizers...',
    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = adadelta(lr, tparams, grads, inputs, cost)
    print 'Done'

    uidx = 0
    best_p = None
    estop = False
    bad_counter = 0
    history_right = []
    for epoch_idx in xrange(max_epochs):
        n_samples = 0
        for source, target, entity, predicate_relation, predicate_word, predicate_character in train:
            n_samples += len(source)
            uidx += 1

            prepare_layer = PrepareDate(source, entity, predicate_character)
            x, x_mask, y, y_mask, z_relation, \
            z_mask_relation,z_word, z_mask_word,z_character, \
            z_mask_character,t = prepare_layer.prepare_valid_test_date_for_cross(source, entity,
                                                                                predicate_relation,predicate_word,
                                                                                predicate_character,target)
            if source is None:
                print 'Minibatch with zero sample'
                uidx -= 1
                continue
            ud_start = time.time()

            cost = f_grad_shared(x, x_mask, y, y_mask, z_relation,
                                 z_mask_relation, z_word, z_mask_word,
                                 z_character, z_mask_character, t)
            # ctx_qu_rel,ctx_qu_wor,ctx_qu_cha,ctx_pr_rel,ctx_pr_wor,ctx_pr_cha=func_p(x, x_mask, y, y_mask, z_relation,
            # z_mask_relation,z_word, z_mask_word,z_character,
            # z_mask_character,t)
            f_update(learning_rate)
            ud = time.time() - ud_start
            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                break
            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', epoch_idx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud, 'learning_rate', learning_rate

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving the best model...',
                if best_p is not None:
                    params = best_p
                else:
                    params = unzip(tparams)
                numpy.savez(saveto,
                            history_errs=history_right,
                            uidx=uidx,
                            **params)
                pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'))
                print 'Done'
                # save with uidx
                if not overwrite:
                    print 'Saving the model at iteration {0}...'.format(uidx),
                    saveto_uidx = '{0}.iter{1}.npz'.format(
                        os.path.splitext(saveto)[0], uidx)
                    numpy.savez(saveto_uidx,
                                history_errs=history_right,
                                uidx=uidx,
                                **unzip(tparams))
                    print 'Done'
            # validdata model on validation set and early stop if necessary
            if numpy.mod(uidx, validFreq) == 0:
                rights = []
                for source, target, entity, predicate_relation, predicate_word, predicate_character in valid:
                    valid_prepare_layer = PrepareDate(source, entity,
                                                      predicate_character)
                    x, x_mask, y, y_mask, z_relation, \
                    z_mask_relation,z_word, z_mask_word,z_character, \
                    z_mask_character,t= valid_prepare_layer.prepare_valid_test_date_for_cross(source, entity,
                                                                                predicate_relation,predicate_word,
                                                                                predicate_character,target)

                    right = func_valid_error(x, x_mask, y, y_mask, z_relation,
                                             z_mask_relation, z_word,
                                             z_mask_word, z_character,
                                             z_mask_character, t)

                    rights.append(right[0])

                right_arr = numpy.array(rights)
                valid_right = right_arr.mean() / valid_batch_size
                history_right.append(valid_right)

                if uidx == 0 or valid_right >= numpy.array(
                        history_right).max():
                    best_p = unzip(tparams)
                    bad_counter = 0
                if len(history_right
                       ) > patience and valid_right <= numpy.array(
                           history_right)[:-patience].max():
                    bad_counter += 1
                    if bad_counter > patience:
                        print 'Early Stop!'
                        estop = True
                        break
                # if numpy.isnan(valid_err):
                #     ipdb.set_trace()
                print 'Valid ', valid_right
        print 'seen %d samples' % n_samples
        if estop:
            break
        print 'Saving the model at epoch {0}...'.format(epoch_idx),
        saveto_uidx = '{0}.epoch{1}.npz'.format(
            os.path.splitext(saveto)[0], epoch_idx)
        numpy.savez(saveto_uidx,
                    history_errs=history_right,
                    uidx=uidx,
                    **unzip(tparams))
        print 'Done'
    if best_p is not None:
        zipp(best_p, tparams)

    rights = []
    for source, target, entity, predicate_relation, predicate_word, predicate_character in valid:
        valid_prepare_layer = PrepareDate(source, entity, predicate_character)
        x, x_mask, y, y_mask, z_relation, \
        z_mask_relation,z_word, z_mask_word,z_character, \
        z_mask_character,t= valid_prepare_layer.prepare_valid_test_date_for_cross(source, entity,
                                                                    predicate_relation,predicate_word,
                                                                    predicate_character,target)
        right = func_valid_error(x, x_mask, y, y_mask, z_relation,
                                 z_mask_relation, z_word, z_mask_word,
                                 z_character, z_mask_character, t)
        rights.append(right[0])

    right_arr = numpy.array(rights)
    valid_right = right_arr.mean() / valid_batch_size

    print 'Valid ', valid_right
    # train_err =numpy.array(p_train).mean()/batch_size
    params = copy.copy(best_p)
    numpy.savez(saveto,
                zipped_params=best_p,
                history_errs=history_right,
                uidx=uidx,
                **params)
    return valid_right

Example #24

Show file

File: model.py Project: vode/gated-graph-transformer-network

    def setup(self):
        """
        Set up the model to train.
        """

        # input_words: shape (n_batch, n_sentence, sentence_len)
        input_words = T.itensor3()
        n_batch, n_sentences, sentence_len = input_words.shape
        # query_words: shape (n_batch, query_len)
        query_words = T.imatrix()
        # correct_output: shape (n_batch, ?, num_output_words)
        correct_output = T.ftensor3()

        # graph_num_new_nodes: shape(n_batch, n_sentence)
        graph_num_new_nodes = T.imatrix()
        # graph_new_node_strengths: shape(n_batch, n_sentence, new_nodes_per_iter)
        graph_new_node_strengths = T.ftensor3()
        # graph_new_node_ids: shape(n_batch, n_sentence, new_nodes_per_iter, num_node_ids)
        graph_new_node_ids = T.ftensor4()
        # graph_new_edges: shape(n_batch, n_sentence, pad_graph_size, pad_graph_size, num_edge_types)
        graph_new_edges = T.TensorType('floatX', (False, ) * 5)()

        def _build(with_correct_graph, snap_to_best, using_dropout,
                   evaluate_accuracy):
            info = {}
            # Process each sentence, flattened to (?, sentence_len)
            flat_input_words = input_words.reshape([-1, sentence_len])
            flat_input_reprs, flat_ref_matrices = self.input_transformer.process(
                flat_input_words)
            # flat_input_reprs of shape (?, input_repr_size)
            # flat_ref_matrices of shape (?, num_node_ids, input_repr_size)
            input_reprs = flat_input_reprs.reshape(
                [n_batch, n_sentences, self.input_repr_size])
            ref_matrices = flat_ref_matrices.reshape([
                n_batch, n_sentences, self.num_node_ids, self.input_repr_size
            ])

            query_repr, query_ref_matrix = self.input_transformer.process(
                query_words)

            if using_dropout:
                iter_dropouts = []
                states_mask = util.make_dropout_mask(
                    (self.node_state_size, ), self.dropout_keep, self.srng)
                if self.nodes_mutable:
                    iter_dropouts.extend(
                        self.node_state_updater.dropout_masks(
                            self.srng, states_mask))
                if len(self.word_node_mapping) > 0:
                    iter_dropouts.extend(
                        self.direct_reference_updater.dropout_masks(
                            self.srng, states_mask))
                if self.intermediate_propagate != 0:
                    iter_dropouts.extend(
                        self.intermediate_propagator.dropout_masks(
                            self.srng, states_mask))
                if self.dynamic_nodes:
                    iter_dropouts.extend(
                        self.new_node_adder.dropout_masks(self.srng))
                iter_dropouts.extend(
                    self.edge_state_updater.dropout_masks(self.srng))
            else:
                iter_dropouts = []
                states_mask = None

            def _iter_fn(input_repr,
                         ref_matrix,
                         gstate,
                         correct_num_new_nodes=None,
                         correct_new_strengths=None,
                         correct_new_node_ids=None,
                         correct_edges=None,
                         dropout_masks=None):
                # If necessary, update node state
                if self.nodes_mutable:
                    gstate, dropout_masks = self.node_state_updater.process(
                        gstate, input_repr, dropout_masks)

                if len(self.word_node_mapping) > 0:
                    gstate, dropout_masks = self.direct_reference_updater.process(
                        gstate, ref_matrix, dropout_masks)

                # If necessary, propagate node state
                if self.intermediate_propagate != 0:
                    gstate, dropout_masks = self.intermediate_propagator.process_multiple(
                        gstate, self.intermediate_propagate, dropout_masks)

                node_loss = None
                node_accuracy = None
                # Propose and vote on new nodes
                if self.dynamic_nodes:
                    new_strengths, new_ids, dropout_masks = self.new_node_adder.get_candidates(
                        gstate, input_repr, self.new_nodes_per_iter,
                        dropout_masks)
                    # new_strengths and correct_new_strengths are of shape (n_batch, new_nodes_per_iter)
                    # new_ids and correct_new_node_ids are of shape (n_batch, new_nodes_per_iter, num_node_ids)
                    if with_correct_graph:
                        perm_idxs = np.array(
                            list(
                                itertools.permutations(
                                    range(self.new_nodes_per_iter))))
                        permuted_correct_str = correct_new_strengths[:,
                                                                     perm_idxs]
                        permuted_correct_ids = correct_new_node_ids[:,
                                                                    perm_idxs]
                        # due to advanced indexing, we should have shape (n_batch, permutation, new_nodes_per_iter, num_node_ids)
                        ext_new_str = T.shape_padaxis(new_strengths, 1)
                        ext_new_ids = T.shape_padaxis(new_ids, 1)
                        strength_ll = permuted_correct_str * T.log(
                            ext_new_str +
                            util.EPSILON) + (1 - permuted_correct_str) * T.log(
                                1 - ext_new_str + util.EPSILON)
                        ids_ll = permuted_correct_ids * T.log(ext_new_ids +
                                                              util.EPSILON)
                        reduced_perm_lls = T.sum(strength_ll, axis=2) + T.sum(
                            ids_ll, axis=[2, 3])
                        if self.best_node_match_only:
                            node_loss = -T.max(reduced_perm_lls, 1)
                        else:
                            full_ll = util.reduce_log_sum(reduced_perm_lls, 1)
                            # Note that some of these permutations are identical, since we likely did not add the maximum
                            # amount of nodes. Thus we will have added repeated elements here.
                            # We have log(x+x+...+x) = log(kx), where k is the repetition factor and x is the probability we want
                            # log(kx) = log(k) + log(x)
                            # Our repetition factor k is given by (new_nodes_per_iter - correct_num_new_nodes)!
                            # Recall that n! = gamma(n+1)
                            # so log(x) = log(kx) - log(gamma(k+1))
                            log_rep_factor = T.gammaln(
                                T.cast(
                                    self.new_nodes_per_iter -
                                    correct_num_new_nodes + 1, 'floatX'))
                            scaled_ll = full_ll - log_rep_factor
                            node_loss = -scaled_ll
                        if evaluate_accuracy:
                            best_match_idx = T.argmax(reduced_perm_lls, 1)
                            # should be of shape (n_batch), indexing the best permutation
                            best_correct_str = permuted_correct_str[
                                T.arange(n_batch), best_match_idx]
                            best_correct_ids = permuted_correct_ids[
                                T.arange(n_batch), best_match_idx]
                            snapped_strengths = util.independent_best(
                                new_strengths)
                            snapped_ids = util.categorical_best(
                                new_ids) * T.shape_padright(snapped_strengths)
                            close_strengths = T.all(
                                T.isclose(best_correct_str, snapped_strengths),
                                (1))
                            close_ids = T.all(
                                T.isclose(best_correct_ids, snapped_ids),
                                (1, 2))
                            node_accuracy = T.and_(close_strengths, close_ids)
                        # now substitute in the correct nodes
                        gstate = gstate.with_additional_nodes(
                            correct_new_strengths, correct_new_node_ids)
                    elif snap_to_best:
                        snapped_strengths = util.independent_best(
                            new_strengths)
                        snapped_ids = util.categorical_best(new_ids)
                        gstate = gstate.with_additional_nodes(
                            snapped_strengths, snapped_ids)
                    else:
                        gstate = gstate.with_additional_nodes(
                            new_strengths, new_ids)

                # Update edge state
                gstate, dropout_masks = self.edge_state_updater.process(
                    gstate, input_repr, dropout_masks)
                if with_correct_graph:
                    cropped_correct_edges = correct_edges[:, :gstate.n_nodes, :
                                                          gstate.n_nodes, :]
                    edge_lls = cropped_correct_edges * T.log(
                        gstate.edge_strengths +
                        util.EPSILON) + (1 - cropped_correct_edges) * T.log(
                            1 - gstate.edge_strengths + util.EPSILON)
                    # edge_lls currently penalizes for edges connected to nodes that do not exist
                    # we do not want it to do this, so we mask it with node strengths
                    mask_src = util.shape_padaxes(gstate.node_strengths,
                                                  [2, 3])
                    mask_dest = util.shape_padaxes(gstate.node_strengths,
                                                   [1, 3])
                    masked_edge_lls = edge_lls * mask_src * mask_dest
                    edge_loss = -T.sum(masked_edge_lls, axis=[1, 2, 3])
                    if evaluate_accuracy:
                        snapped_edges = util.independent_best(
                            gstate.edge_strengths)
                        close_edges = T.isclose(cropped_correct_edges,
                                                snapped_edges)
                        ok_mask = 1 - T.cast(
                            mask_src * mask_dest, 'int8'
                        )  # its OK for things not to match if node strengths are NOT both 1
                        edge_accuracy = T.all(T.or_(close_edges, ok_mask),
                                              (1, 2, 3))
                        overall_accuracy = edge_accuracy if node_accuracy is None else T.and_(
                            node_accuracy, edge_accuracy)
                    else:
                        overall_accuracy = None
                    gstate = gstate.with_updates(
                        edge_strengths=cropped_correct_edges)
                    return gstate, node_loss, edge_loss, overall_accuracy
                elif snap_to_best:
                    snapped_edges = util.independent_best(
                        gstate.edge_strengths)
                    gstate = gstate.with_updates(edge_strengths=snapped_edges)
                    return gstate
                else:
                    return gstate

            # Scan over each sentence
            def _scan_fn(
                input_repr, *stuff
            ):  # (input_repr, [ref_matrix?], [*correct_graph_stuff?], [dropout_masks?], *flat_graph_state, pad_graph_size)
                stuff = list(stuff)

                if len(self.word_node_mapping) > 0:
                    ref_matrix = stuff[0]
                    stuff = stuff[1:]
                else:
                    ref_matrix = None

                if with_correct_graph:
                    c_num_new_nodes, c_new_strengths, c_new_node_ids, c_edges = stuff[:
                                                                                      4]
                    stuff = stuff[4:]

                if using_dropout:
                    dropout_masks = stuff[:len(iter_dropouts)]
                    stuff = stuff[len(iter_dropouts):]
                else:
                    dropout_masks = None

                flat_graph_state = stuff[:-1]
                pad_graph_size = stuff[-1]
                gstate = GraphState.unflatten_from_const_size(flat_graph_state)

                if with_correct_graph:
                    gstate, node_loss, edge_loss, overall_accuracy = _iter_fn(
                        input_repr,
                        ref_matrix,
                        gstate,
                        c_num_new_nodes,
                        c_new_strengths,
                        c_new_node_ids,
                        c_edges,
                        dropout_masks=dropout_masks)
                else:
                    gstate = _iter_fn(input_repr,
                                      ref_matrix,
                                      gstate,
                                      dropout_masks=dropout_masks)

                retvals = gstate.flatten_to_const_size(pad_graph_size)
                if with_correct_graph:
                    if self.dynamic_nodes:
                        retvals.append(node_loss)
                    retvals.append(edge_loss)
                    if evaluate_accuracy:
                        retvals.append(overall_accuracy)
                return retvals

            if self.dynamic_nodes:
                initial_gstate = GraphState.create_empty(
                    n_batch, self.num_node_ids, self.node_state_size,
                    self.num_edge_types)
            else:
                initial_gstate = GraphState.create_full_unique(
                    n_batch, self.num_node_ids, self.node_state_size,
                    self.num_edge_types)

            # Account for all nodes, plus the extra padding node to prevent GPU unpleasantness
            if self.dynamic_nodes:
                pad_graph_size = n_sentences * self.new_nodes_per_iter + 1
            else:
                pad_graph_size = self.num_node_ids
            outputs_info = initial_gstate.flatten_to_const_size(pad_graph_size)
            prepped_input = input_reprs.dimshuffle([1, 0, 2])
            sequences = [prepped_input]
            if len(self.word_node_mapping) > 0:
                sequences.append(ref_matrices.dimshuffle([1, 0, 2, 3]))
            if with_correct_graph:
                sequences.append(graph_num_new_nodes.swapaxes(0, 1))
                sequences.append(graph_new_node_strengths.swapaxes(0, 1))
                sequences.append(graph_new_node_ids.swapaxes(0, 1))
                sequences.append(graph_new_edges.swapaxes(0, 1))

                if self.dynamic_nodes:
                    outputs_info.extend([None])
                if evaluate_accuracy:
                    outputs_info.extend([None])
                outputs_info.extend([None])
            if using_dropout:
                sequences.extend(iter_dropouts)
            all_scan_out, _ = theano.scan(_scan_fn,
                                          sequences=sequences,
                                          outputs_info=outputs_info,
                                          non_sequences=[pad_graph_size])
            graph_accurate_list = None
            if with_correct_graph:
                if evaluate_accuracy:
                    full_graph_accuracy = all_scan_out[-1]
                    all_scan_out = all_scan_out[:-1]
                    graph_accurate_list = T.all(full_graph_accuracy, 0)
                    info["graph_accuracy"] = T.sum(graph_accurate_list,
                                                   dtype='floatX') / T.cast(
                                                       n_batch, 'floatX')
                if self.dynamic_nodes:
                    all_flat_gstates = all_scan_out[:-2]
                    node_loss, edge_loss = all_scan_out[-2:]
                    reduced_node_loss = T.sum(node_loss) / T.cast(
                        n_batch, 'floatX')
                    reduced_edge_loss = T.sum(edge_loss) / T.cast(
                        n_batch, 'floatX')
                    avg_graph_loss = (reduced_node_loss +
                                      reduced_edge_loss) / T.cast(
                                          input_words.shape[1], 'floatX')
                    info["node_loss"] = reduced_node_loss
                    info["edge_loss"] = reduced_edge_loss
                else:
                    all_flat_gstates = all_scan_out[:-1]
                    edge_loss = all_scan_out[-1]
                    reduced_edge_loss = T.sum(edge_loss) / T.cast(
                        n_batch, 'floatX')
                    avg_graph_loss = reduced_edge_loss / T.cast(
                        input_words.shape[1], 'floatX')
                    info["edge_loss"] = reduced_edge_loss
            else:
                all_flat_gstates = all_scan_out

            if self.sequence_representation:
                # Each part of all_flat_gstates is of shape (n_sentences, n_batch, ...)
                # except for the last one, which we handle separately
                # Swap to (n_batch, n_sentences, ...)
                # Then flatten to (n_batch*n_sentences, ...) for further processing
                final_flat_gstate = [
                    x.swapaxes(0, 1).reshape(T.concatenate([[-1],
                                                            x.shape[2:]]),
                                             ndim=(x.ndim - 1))
                    for x in all_flat_gstates[:-1]
                ]
                # As for the last one, we need to get a single scalar value. The last one will be the biggest
                # so we will take that. Note that this will introduce a bunch of zero-nodes, but thats
                # OK and we can process that later. (We REQUIRE that padding in graph_state makes zero strength
                # nodes here!)
                final_flat_gstate.append(all_flat_gstates[-1][-1])
                # We also need to repeat query_repr and query_ref_matrix so that they broadcast together
                query_repr = T.extra_ops.repeat(query_repr, n_sentences, 0)
                query_ref_matrix = T.extra_ops.repeat(query_ref_matrix,
                                                      n_sentences, 0)
            else:
                # Extract last timestep
                final_flat_gstate = [x[-1] for x in all_flat_gstates]
            final_gstate = GraphState.unflatten_from_const_size(
                final_flat_gstate)

            if self.train_with_query:
                if self.wipe_node_state:
                    final_gstate = final_gstate.with_updates(
                        node_states=T.zeros_like(final_gstate.node_states))

                qnsu_dropout_masks = self.query_node_state_updater.dropout_masks(
                    self.srng, states_mask)
                query_gstate, _ = self.query_node_state_updater.process(
                    final_gstate, query_repr, qnsu_dropout_masks)

                if len(self.word_node_mapping) > 0:
                    qdru_dropout_masks = self.query_direct_reference_updater.dropout_masks(
                        self.srng, states_mask)
                    query_gstate, _ = self.query_direct_reference_updater.process(
                        query_gstate, query_ref_matrix, qdru_dropout_masks)

                fp_dropout_masks = self.final_propagator.dropout_masks(
                    self.srng, states_mask)
                propagated_gstate, _ = self.final_propagator.process_multiple(
                    query_gstate, self.final_propagate, fp_dropout_masks)

                agg_dropout_masks = self.aggregator.dropout_masks(self.srng)
                aggregated_repr, _ = self.aggregator.process(
                    propagated_gstate,
                    agg_dropout_masks)  # shape (n_batch, output_repr_size)

                if self.sequence_representation:
                    # aggregated_repr is of shape (n_batch*n_sentences, repr_width)
                    # We want to split back to timesteps: (n_batch, n_sentences, repr_width)
                    agg_repr_seq = aggregated_repr.reshape(
                        [n_batch, n_sentences, -1])
                    # Now collapse it to a summary representation
                    aggsum_dropout_masks = self.aggregate_summarizer.dropout_masks(
                        self.srng)
                    aggregated_repr, _ = self.aggregate_summarizer.process(
                        agg_repr_seq, aggsum_dropout_masks)
                    # At this point aggregated_repr is (n_batch, repr_width) as desired

                max_seq_len = correct_output.shape[1]
                if self.output_format == ModelOutputFormat.sequence:
                    final_output = self.output_processor.process(
                        aggregated_repr,
                        max_seq_len)  # shape (n_batch, ?, num_output_words)
                else:
                    final_output = self.output_processor.process(
                        aggregated_repr)

                if snap_to_best:
                    final_output = self.output_processor.snap_to_best(
                        final_output)

                if self.output_format == ModelOutputFormat.subset:
                    elemwise_loss = T.nnet.binary_crossentropy(
                        final_output, correct_output)
                    query_loss = T.sum(elemwise_loss)
                else:
                    flat_final_output = final_output.reshape(
                        [-1, self.num_output_words])
                    flat_correct_output = correct_output.reshape(
                        [-1, self.num_output_words])
                    timewise_loss = T.nnet.categorical_crossentropy(
                        flat_final_output, flat_correct_output)
                    query_loss = T.sum(timewise_loss)
                query_loss = query_loss / T.cast(n_batch, 'floatX')
                info["query_loss"] = query_loss
            else:
                final_output = T.zeros([])

            full_loss = np.array(0.0, np.float32)
            if with_correct_graph:
                full_loss = full_loss + avg_graph_loss
            if self.train_with_query:
                full_loss = full_loss + query_loss

            if self.train_with_query:
                adjusted_query_gstates = [
                    x.reshape(T.concatenate([[n_batch, n_sentences],
                                             x.shape[1:]]),
                              ndim=(x.ndim + 1))
                    if self.sequence_representation else T.shape_padaxis(x, 1)
                    for x in query_gstate.flatten()
                ]
                adjusted_prop_gstates = [
                    x.reshape(T.concatenate([[n_batch, n_sentences],
                                             x.shape[1:]]),
                              ndim=(x.ndim + 1))
                    if self.sequence_representation else T.shape_padaxis(x, 1)
                    for x in propagated_gstate.flatten()
                ]
                full_flat_gstates = [
                    T.concatenate([a.swapaxes(0, 1), b, c], 1) for a, b, c in
                    zip(all_flat_gstates[:-1], adjusted_query_gstates,
                        adjusted_prop_gstates)
                ]
            else:
                full_flat_gstates = [
                    a.swapaxes(0, 1) for a in all_flat_gstates[:-1]
                ]
                max_seq_len = T.iscalar()
            return full_loss, final_output, full_flat_gstates, graph_accurate_list, max_seq_len, info

        train_loss, _, _, _, _, train_info = _build(self.train_with_graph,
                                                    False, True, False)
        adam_updates = Adam(train_loss, self.params, lr=self.learning_rate_var)

        self.info_keys = list(train_info.keys())

        print("Compiling...")

        optimizer = theano.compile.predefined_optimizers[
            'fast_run' if self.check_mode ==
            'debug' else theano.config.optimizer]
        optimizer = optimizer.excluding(
            "scanOp_pushout_output", "remove_constants_and_unused_inputs_scan")
        if self.check_mode == 'nan':
            mode = NanGuardMode(optimizer=optimizer,
                                nan_is_error=True,
                                inf_is_error=True,
                                big_is_error=True)
        elif self.check_mode == 'debug':
            mode = DebugMode(optimizer=optimizer,
                             check_isfinite=False,
                             check_py_code=False,
                             stability_patience=1)
            theano.tensor.TensorType.filter_checks_isfinite = False
        else:
            mode = theano.Mode(optimizer=optimizer)
        self.train_fn = theano.function([
            input_words, query_words, correct_output, graph_num_new_nodes,
            graph_new_node_strengths, graph_new_node_ids, graph_new_edges
        ], [train_loss] + list(train_info.values()),
                                        updates=adam_updates,
                                        allow_input_downcast=True,
                                        on_unused_input='ignore',
                                        mode=mode)

        eval_loss, _, full_flat_gstates, graph_accurate_list, _, eval_info = _build(
            self.train_with_graph, False, False, True)
        self.eval_info_keys = list(eval_info.keys())
        self.eval_fn = theano.function([
            input_words, query_words, correct_output, graph_num_new_nodes,
            graph_new_node_strengths, graph_new_node_ids, graph_new_edges
        ], [eval_loss, graph_accurate_list] + list(eval_info.values()),
                                       allow_input_downcast=True,
                                       on_unused_input='ignore',
                                       mode=mode)

        self.debug_test_fn = theano.function([
            input_words, query_words, correct_output, graph_num_new_nodes,
            graph_new_node_strengths, graph_new_node_ids, graph_new_edges
        ],
                                             full_flat_gstates,
                                             allow_input_downcast=True,
                                             on_unused_input='ignore',
                                             mode=mode)

        test_loss, final_output, full_flat_gstates, _, max_seq_len, _ = _build(
            False, False, False, False)
        self.fuzzy_test_fn = theano.function(
            [input_words, query_words] +
            ([max_seq_len] if self.output_format == ModelOutputFormat.sequence
             else []), [final_output] + full_flat_gstates,
            allow_input_downcast=True,
            on_unused_input='ignore',
            mode=mode)

        test_loss, final_output, full_flat_gstates, _, max_seq_len, _ = _build(
            False, True, False, False)
        self.snap_test_fn = theano.function(
            [input_words, query_words] +
            ([max_seq_len] if self.output_format == ModelOutputFormat.sequence
             else []), [final_output] + full_flat_gstates,
            allow_input_downcast=True,
            on_unused_input='ignore',
            mode=mode)

Example #25

Show file

    def update_opt(self, loss, target, leq_constraint, inputs, extra_inputs=None, constraint_name="constraint", *args,
                   **kwargs):
        """
        :param loss: Symbolic expression for the loss function.
        :param target: A parameterized object to optimize over. It should implement methods of the
        :class:`rllab.core.paramerized.Parameterized` class.
        :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon.
        :param inputs: A list of symbolic variables as inputs, which could be subsampled if needed. It is assumed
        that the first dimension of these inputs should correspond to the number of data points
        :param extra_inputs: A list of symbolic variables as extra inputs which should not be subsampled
        :return: No return value.
        """

        inputs = tuple(inputs)
        if extra_inputs is None:
            extra_inputs = tuple()
        else:
            extra_inputs = tuple(extra_inputs)

        constraint_term, constraint_value = leq_constraint

        params = target.get_params(trainable=True)
        grads = theano.grad(loss, wrt=params)
        flat_grad = ext.flatten_tensor_variables(grads)

        constraint_grads = theano.grad(constraint_term, wrt=params)
        xs = tuple([ext.new_tensor_like("%s x" % p.name, p) for p in params])
        Hx_plain_splits = TT.grad(
            TT.sum([TT.sum(g * x) for g, x in itertools.izip(constraint_grads, xs)]),
            wrt=params,
        )
        Hx_plain = TT.concatenate([TT.flatten(s) for s in Hx_plain_splits])

        self._target = target
        self._max_constraint_val = constraint_value
        self._constraint_name = constraint_name


        if self._debug_nan:
            from theano.compile.nanguardmode import NanGuardMode
            mode = NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
        else:
            mode = None

        self._opt_fun = ext.lazydict(
            f_loss=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=loss,
                log_name="f_loss",
                mode=mode,
            ),
            f_grad=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=flat_grad,
                log_name="f_grad",
                mode=mode,
            ),
            f_Hx_plain=lambda: ext.compile_function(
                inputs=inputs + extra_inputs + xs,
                outputs=Hx_plain,
                log_name="f_Hx_plain",
                mode=mode,
            ),
            f_constraint=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=constraint_term,
                log_name="constraint",
                mode=mode,
            ),
            f_loss_constraint=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=[loss, constraint_term],
                log_name="f_loss_constraint",
                mode=mode,
            ),
        )

Example #26

Show file

def main(num_epochs=1,
         n_songs_train=1,
         n_songs_val=1,
         n_songs_test=1,
         batch_size=256,
         learning_rate=1e-4):
    """
    Main function
    """

    # Theano config
    theano.config.floatX = 'float32'

    train, val, test = None, None, None
    try:
        train, val, test = use_preparsed_data(outputdir='/zap/tsob/audio/', )
    except:
        train, val, test = get_data(n_songs_train=n_songs_train,
                                    n_songs_val=n_songs_val,
                                    n_songs_test=n_songs_test,
                                    outputdir='/zap/tsob/audio/',
                                    seed=None)

    # Save the returned metadata
    np.savez('/zap/tsob/audio/metadata', train, val, test)

    # Print the dimensions
    print "Data dimensions:"
    for datapt in [
            train['Xshape'], train['yshape'], val['Xshape'], val['yshape'],
            test['Xshape'], test['yshape']
    ]:
        print datapt

    # Parse dimensions
    n_train = train['yshape'][0]
    n_val = val['yshape'][0]
    n_test = test['yshape'][0]
    n_chan = train['Xshape'][1]
    n_feats = train['Xshape'][2]
    n_frames = train['Xshape'][3]

    print "n_train  = {0}".format(n_train)
    print "n_val    = {0}".format(n_val)
    print "n_test   = {0}".format(n_test)
    print "n_chan   = {0}".format(n_chan)
    print "n_feats  = {0}".format(n_feats)
    print "n_frames = {0}".format(n_frames)

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4(name='inputs')
    target_var = T.fcol(name='targets')

    # Create neural network model (depending on first command line parameter)
    print("Building model and compiling functions..."),
    network = build_cnn(input_var)
    print("Done.")

    # Create a loss expression for training, i.e., a scalar objective we want to minimize
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.binary_hinge_loss(prediction, target_var)
    loss = loss.mean()

    # Create update expressions for training
    # Here, we'll use adam
    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.adam(loss,
                                   params,
                                   learning_rate=learning_rate,
                                   beta1=0.95,
                                   beta2=0.999,
                                   epsilon=1e-08)

    # Create a loss expression for validation/testing.
    # The crucial difference here is that we do a deterministic forward pass
    # through the network, disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)

    test_loss = lasagne.objectives.binary_hinge_loss(test_prediction,
                                                     target_var)
    test_loss = test_loss.mean()

    test_pred_fn = theano.function([input_var],
                                   test_prediction,
                                   allow_input_downcast=True)

    # As a bonus, also create an expression for the classification accuracy:
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function(
        [input_var, target_var],
        loss,
        updates=updates,
        mode=NanGuardMode(  #TODO remove
            nan_is_error=True,
            inf_is_error=True,
            big_is_error=True  #TODO remove
        ),  #TODO remove
        allow_input_downcast=True)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc],
                             allow_input_downcast=True)

    # Finally, launch the training loop.
    print("Starting training...")

    train_error_hist = []

    # We iterate over epochs:
    for epoch in range(num_epochs):

        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()

        for batch in iterate_minibatches(train, batch_size, shuffle=True):
            inputs, targets = batch
            train_err_increment = train_fn(inputs, targets)
            train_err += train_err_increment
            train_error_hist.append(train_err_increment)
            train_batches += 1

        # And a full pass over the validation data:
        val_err = 0
        val_acc = 0
        val_batches = 0
        for batch in iterate_minibatches(val, batch_size, shuffle=False):
            inputs, targets = batch
            err, acc = val_fn(inputs, targets)
            val_err += err
            val_acc += acc
            val_batches += 1

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs,
                                                   time.time() - start_time))
        print("  training loss:\t\t{:.8f}".format(train_err / train_batches))
        print("  validation loss:\t\t{:.8f}".format(val_err / val_batches))
        print("  validation accuracy:\t\t{:.2f} %".format(val_acc /
                                                          val_batches * 100))
    print("Done training.")

    # After training, we compute and print the test error:
    test_err = 0
    test_acc = 0
    test_batches = 0
    test_predictions = []
    for batch in iterate_minibatches(test, batch_size, shuffle=False):
        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        test_predictions.append(test_pred_fn(inputs))
        test_err += err
        test_acc += acc
        test_batches += 1
    print("Final results:")
    print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
    print("  test accuracy:\t\t{:.2f} %".format(test_acc / test_batches * 100))

    # Optionally, you could now dump the network weights to a file like this:
    timestr = str(time.time())
    np.savez('/zap/tsob/audio/model' + timestr + '.npz',
             *lasagne.layers.get_all_param_values(network))
    np.save('/zap/tsob/audio/train_error_hist' + timestr + '.npy',
            train_error_hist)
    np.save('/zap/tsob/audio/test_predictions' + timestr + '.npy',
            test_predictions)
    print "Wrote model to {0}, test error histogram to {1}, and test predictions to {2}".format(
        'model' + timestr + '.npz', 'train_error_hist' + timestr + '.npy',
        'test_predictions' + timestr + '.npy')

Example #27

Show file

File: __init__.py Project: chrishokamp/neural_imt

def main(exp_config, source_vocab, target_vocab, dev_stream, use_bokeh=True):

    # def setup_model_and_stream(exp_config, source_vocab, target_vocab):
    # def setup_model_and_stream(exp_config, source_vocab, target_vocab):
    train_encoder, train_decoder, theano_sampling_source_input, theano_sampling_context_input, generated, masked_stream = setup_model_and_stream(
        exp_config, source_vocab, target_vocab)
    cost = create_model(train_encoder, train_decoder,
                        exp_config.get('imt_smoothing_constant', 0.005))

    # Set up training model
    logger.info("Building model")
    train_model = Model(cost)

    # Set the parameters from a trained models (.npz file)
    logger.info("Loading parameters from model: {}".format(
        exp_config['saved_parameters']))
    # Note the brick delimeter='-' is here for legacy reasons because blocks changed the serialization API
    param_values = LoadNMT.load_parameter_values(
        exp_config['saved_parameters'],
        brick_delimiter=exp_config.get('brick_delimiter', None))
    LoadNMT.set_model_parameters(train_model, param_values)

    logger.info('Creating computational graph')
    cg = ComputationGraph(cost)

    # GRAPH TRANSFORMATIONS FOR BETTER TRAINING
    if exp_config.get('l2_regularization', False) is True:
        l2_reg_alpha = exp_config['l2_regularization_alpha']
        logger.info(
            'Applying l2 regularization with alpha={}'.format(l2_reg_alpha))
        model_weights = VariableFilter(roles=[WEIGHT])(cg.variables)

        for W in model_weights:
            cost = cost + (l2_reg_alpha * (W**2).sum())

        # why do we need to rename the cost variable? Where did the original name come from?
        cost.name = 'decoder_cost_cost'

    cg = ComputationGraph(cost)

    # apply dropout for regularization
    # Note dropout variables are hard-coded here
    if exp_config['dropout'] < 1.0:
        # dropout is applied to the output of maxout in ghog
        # this is the probability of dropping out, so you probably want to make it <=0.5
        logger.info('Applying dropout')
        dropout_inputs = [
            x for x in cg.intermediary_variables
            if x.name == 'maxout_apply_output'
        ]
        cg = apply_dropout(cg, dropout_inputs, exp_config['dropout'])

    # create the training directory, and copy this config there if directory doesn't exist
    if not os.path.isdir(exp_config['saveto']):
        os.makedirs(exp_config['saveto'])
        # TODO: mv the actual config file once we switch to .yaml for min-risk
        shutil.copy(exp_config['config_file'], exp_config['saveto'])

    # Set extensions
    logger.info("Initializing extensions")
    extensions = [
        FinishAfter(after_n_batches=exp_config['finish_after']),
        TrainingDataMonitoring([cost], after_batch=True),
        Printing(after_batch=True),
        CheckpointNMT(exp_config['saveto'],
                      every_n_batches=exp_config['save_freq'])
    ]

    # Set up beam search and sampling computation graphs if necessary
    # TODO: change the if statement here
    if exp_config['hook_samples'] >= 1 or exp_config['bleu_script'] is not None:
        logger.info("Building sampling model")
        search_model = Model(generated)
        _, samples = VariableFilter(
            bricks=[train_decoder.sequence_generator], name="outputs")(
                ComputationGraph(generated[1]))  # generated[1] is next_outputs

    # Add sampling -- TODO: sampling is broken for min-risk
    #if config['hook_samples'] >= 1:
    #    logger.info("Building sampler")
    #    extensions.append(
    #        Sampler(model=search_model, data_stream=tr_stream,
    #                hook_samples=config['hook_samples'],
    #                every_n_batches=config['sampling_freq'],
    #                src_vocab_size=config['src_vocab_size']))

    # Add early stopping based on bleu
    # TODO: use multimodal meteor and BLEU validator
    # TODO: add 'validator' key to IMT config
    # Add early stopping based on bleu
    if exp_config.get('bleu_script', None) is not None:
        logger.info("Building bleu validator")
        extensions.append(
            BleuValidator(theano_sampling_source_input,
                          theano_sampling_context_input,
                          samples=samples,
                          config=exp_config,
                          model=search_model,
                          data_stream=dev_stream,
                          src_vocab=source_vocab,
                          trg_vocab=target_vocab,
                          normalize=exp_config['normalized_bleu'],
                          every_n_batches=exp_config['bleu_val_freq']))

    if exp_config.get('imt_f1_validation', False) is not False:
        logger.info("Building imt F1 validator")
        extensions.append(
            IMT_F1_Validator(theano_sampling_source_input,
                             theano_sampling_context_input,
                             samples=samples,
                             config=exp_config,
                             model=search_model,
                             data_stream=dev_stream,
                             src_vocab=source_vocab,
                             trg_vocab=target_vocab,
                             normalize=exp_config['normalized_bleu'],
                             every_n_batches=exp_config['bleu_val_freq']))

    # Add early stopping based on Meteor
    # if exp_config.get('meteor_directory', None) is not None:
    #     logger.info("Building meteor validator")
    #     extensions.append(
    #         MeteorValidator(theano_sampling_source_input, theano_sampling_context_input,
    #                         samples=samples,
    #                         config=config,
    #                         model=search_model, data_stream=dev_stream,
    #                         src_vocab=src_vocab,
    #                         trg_vocab=trg_vocab,
    #                         normalize=config['normalized_bleu'],
    #                         every_n_batches=config['bleu_val_freq']))

    # Reload model if necessary
    if exp_config['reload']:
        extensions.append(LoadNMT(exp_config['saveto']))

    # Plot cost in bokeh if necessary
    if use_bokeh and BOKEH_AVAILABLE:
        extensions.append(
            Plot(exp_config['model_save_directory'],
                 channels=[[
                     'decoder_cost_cost', 'validation_set_imt_f1_score',
                     'validation_set_bleu_score', 'validation_set_meteor_score'
                 ]],
                 every_n_batches=10))

    # Set up training algorithm
    logger.info("Initializing training algorithm")

    # if there is l2_regularization, dropout or random noise, we need to use the output of the modified graph
    # WORKING: try to catch and fix nan
    if exp_config['dropout'] < 1.0:
        if exp_config.get('nan_guard', False):
            from theano.compile.nanguardmode import NanGuardMode
            algorithm = GradientDescent(cost=cg.outputs[0],
                                        parameters=cg.parameters,
                                        step_rule=CompositeRule([
                                            StepClipping(
                                                exp_config['step_clipping']),
                                            eval(exp_config['step_rule'])()
                                        ]),
                                        on_unused_sources='warn',
                                        theano_func_kwargs={
                                            'mode':
                                            NanGuardMode(nan_is_error=True,
                                                         inf_is_error=True)
                                        })
        else:
            algorithm = GradientDescent(cost=cg.outputs[0],
                                        parameters=cg.parameters,
                                        step_rule=CompositeRule([
                                            StepClipping(
                                                exp_config['step_clipping']),
                                            eval(exp_config['step_rule'])()
                                        ]),
                                        on_unused_sources='warn')
    else:
        algorithm = GradientDescent(cost=cost,
                                    parameters=cg.parameters,
                                    step_rule=CompositeRule([
                                        StepClipping(
                                            exp_config['step_clipping']),
                                        eval(exp_config['step_rule'])()
                                    ]),
                                    on_unused_sources='warn')

    # enrich the logged information
    extensions.append(Timing(every_n_batches=100))

    # Initialize main loop
    logger.info("Initializing main loop")
    main_loop = MainLoop(model=train_model,
                         algorithm=algorithm,
                         data_stream=masked_stream,
                         extensions=extensions)

    # Train!
    main_loop.run()

Example #28

Show file

    def build(self):
        config = self.config
        processor = self.processor

        source_inputs = T.imatrix()
        target_inputs = T.imatrix()
        target_outputs = T.imatrix()
        source_mask_inputs = T.matrix()
        target_mask_inputs = T.matrix()
        # map_inputs = T.tensor3()

        l_source_inputs = lasagne.layers.InputLayer(shape=(None,
                                                           config.source_len),
                                                    input_var=source_inputs)
        l_target_inputs = lasagne.layers.InputLayer(shape=(None,
                                                           config.target_len),
                                                    input_var=target_inputs)
        l_output = lasagne.layers.InputLayer(shape=(None, config.target_len),
                                             input_var=target_outputs)
        l_source_mask_inputs = lasagne.layers.InputLayer(
            shape=(None, config.source_len), input_var=source_mask_inputs)
        l_target_mask_inputs = lasagne.layers.InputLayer(
            shape=(None, config.target_len), input_var=target_mask_inputs)
        # l_map_inputs = lasagne.layers.InputLayer(shape=(None, config.source_len, processor.target_vocab_size),
        #                                          input_var=map_inputs)

        l_source = lasagne.layers.EmbeddingLayer(l_source_inputs,
                                                 processor.source_vocab_size,
                                                 config.embedding_size)
        l_target = lasagne.layers.EmbeddingLayer(l_target_inputs,
                                                 processor.target_vocab_size,
                                                 config.embedding_size)
        self.W1 = l_source.W
        self.W2 = l_target.W
        # T.sum(l_source.W)
        # l_s_gru_fw = lasagne.layers.GRULayer(l_source, config.enc_units, mask_input=l_source_mask_inputs,
        #                                      grad_clipping=config.grad_clipping)
        # l_s_gru_bw = lasagne.layers.GRULayer(l_source, config.enc_units, mask_input=l_source_mask_inputs,
        #                                      grad_clipping=config.grad_clipping)
        # l_source = lasagne.layers.ConcatLayer([l_s_gru_fw, l_s_gru_bw], axis=2)
        # l_source = lasagne.layers.GRULayer(l_source, config.enc_units, mask_input=l_source_mask_inputs,
        #                                    grad_clipping=config.grad_clipping)
        # l_source_last = lasagne.layers.ElemwiseSumLayer(l_source) #lasagne.layers.SliceLayer(l_source, -1, axis=1)

        l_target_outputs = layers.GRUCoverageTrainLayer(
            l_target_inputs,
            config.dec_units,
            mask_input=l_target_mask_inputs,
            grad_clipping=config.grad_clipping,
            source_token_cnt=processor.source_vocab_size,
            target_token_cnt=processor.target_vocab_size,
            l_enc_feat=l_source,
            l_enc_mask=l_source_mask_inputs,
            l_output=l_output,
            W_emb=self.W2,
            unk_index=processor.get_char_index(
                'UNK', False))  #, hid_init=l_source_last)
        l_t = l_target_outputs
        l_target_outputs = lasagne.layers.ReshapeLayer(
            l_target_outputs, (-1, [2]))  # (batch * dec_len, vocab + extra)

        l_gen = layers.GRUCoverageTestLayer(
            config.dec_units,
            grad_clipping=config.grad_clipping,
            source_token_cnt=processor.source_vocab_size,
            target_token_cnt=processor.target_vocab_size,
            l_enc_feat=l_source,
            l_enc_mask=l_source_mask_inputs,
            W_emb=self.W2,
            resetgate=l_t.resetgate,
            updategate=l_t.updategate,
            hidden_update=l_t.hidden_update,  #hid_init=l_source_last,
            unk_index=processor.get_char_index('UNK', False),
            start_index=processor.get_char_index('START', False),
            W_gen=l_t.W_gen,
            gen_len=config.target_len)
        l_att = layers.GRUCoverageAttLayer(
            config.dec_units,
            grad_clipping=config.grad_clipping,
            source_token_cnt=processor.source_vocab_size,
            target_token_cnt=processor.target_vocab_size,
            l_enc_feat=l_source,
            l_enc_mask=l_source_mask_inputs,
            W_emb=self.W2,
            resetgate=l_t.resetgate,
            updategate=l_t.updategate,
            hidden_update=l_t.hidden_update,  #hid_init=l_source_last,
            unk_index=processor.get_char_index('UNK', False),
            start_index=processor.get_char_index('START', False),
            W_gen=l_t.W_gen,
            gen_len=config.target_len)
        self.l = l_target_outputs

        py = lasagne.layers.get_output(l_target_outputs)
        loss = (py * T.extra_ops.to_one_hot(target_outputs.flatten(),
                                            processor.target_vocab_size)).sum(
                                                axis=1)  # (batch * dec_len)
        loss = -(loss * target_mask_inputs.flatten()).mean()

        params = lasagne.layers.get_all_params(self.l, trainable=True)
        updates = lasagne.updates.adam(loss,
                                       params,
                                       learning_rate=config.learning_rate)

        gen_y = lasagne.layers.get_output(l_gen)

        gen_att = lasagne.layers.get_output(l_att)

        self.train_fn = theano.function([
            source_inputs, target_inputs, target_outputs, source_mask_inputs,
            target_mask_inputs
        ],
                                        None,
                                        updates=updates,
                                        on_unused_input='ignore',
                                        mode=NanGuardMode(nan_is_error=True,
                                                          inf_is_error=True,
                                                          big_is_error=True))
        self.loss_fn = theano.function([
            source_inputs, target_inputs, target_outputs, source_mask_inputs,
            target_mask_inputs
        ],
                                       loss,
                                       on_unused_input='ignore',
                                       mode=NanGuardMode(nan_is_error=True,
                                                         inf_is_error=True,
                                                         big_is_error=True))
        self.test_fn = theano.function([source_inputs, source_mask_inputs],
                                       gen_y,
                                       on_unused_input='ignore')
        self.att_fn = theano.function([source_inputs, source_mask_inputs],
                                      gen_att,
                                      on_unused_input='ignore')

        l_samp = layers.GRUCopyPureSampleLayer(
            config.dec_units,
            grad_clipping=config.grad_clipping,
            source_token_cnt=processor.source_vocab_size,
            target_token_cnt=processor.target_vocab_size,
            l_enc_feat=l_source,
            l_enc_mask=l_source_mask_inputs,
            W_emb=self.W2,
            resetgate=l_t.resetgate,
            updategate=l_t.updategate,
            hidden_update=l_t.hidden_update,  #hid_init=l_source_last,
            unk_index=processor.get_char_index('UNK', False),
            start_index=processor.get_char_index('START', False),
            gen_len=config.target_len,
            W_gen=l_t.W_gen,
            MRG_stream=self.MRG_stream)  # (batch, dec_len)
        samp_y = lasagne.layers.get_output(l_samp)
        self.sample_fn = theano.function([source_inputs, source_mask_inputs],
                                         samp_y,
                                         updates=l_samp.updates,
                                         on_unused_input='ignore')

        reward_inputs = T.matrix()  # (batch, dec_len)
        reinforce_loss = (py * T.extra_ops.to_one_hot(
            target_outputs.flatten(), processor.target_vocab_size)).sum(
                axis=1)  # (batch * dec_len)
        reinforce_loss = -(reinforce_loss * target_mask_inputs.flatten() *
                           reward_inputs.flatten()).mean()
        reinforce_updates = lasagne.updates.adam(
            reinforce_loss,
            params,
            learning_rate=config.reinforce_learning_rate)
        self.reinforce_fn = theano.function([
            source_inputs, target_inputs, target_outputs, source_mask_inputs,
            target_mask_inputs, reward_inputs
        ],
                                            None,
                                            updates=reinforce_updates,
                                            on_unused_input='ignore')

        print('params', lasagne.layers.count_params(self.l, trainable=True))

Example #29

Show file

File: model_init0.py Project: hoamle/multiLabel

def main(num_epochs=500, mode="run", batchsize=96):
    # Debug
    #theano.config.profile=True
    #theano.config.optimizer_profile=True
    #theano.config.warn_float64='warn'

    # Loading all preprocessed data
    global Ws, bs
    Xtr, Ytr, Xva, Yva, imgMean_vals, Ws, bs = data_prep()

    # Sanity check: try to overfit a tiny (eg 40 instances) subset of the data
    if mode == "toy":
        batchsize = 10
        np.random.RandomState(11)
        idx = np.random.randint(0, Xtr.shape[0] / 10, batchsize * 4)
        Xtr = Xtr[idx, :, :, :]
        Ytr = Ytr[idx, :]
    """
    COMPILING THEANO function
    """
    start_time = time.time()
    # Prepare Theano variables for inputs and targets
    input_var = T.ftensor4('inputs')
    target_var = T.imatrix('targets')

    # Center the input images
    imgMean = T.TensorType(dtype='float32',
                           broadcastable=(True, False, False,
                                          False))('imgMean')
    z = (input_var - imgMean)
    center_fn = theano.function([input_var, imgMean],
                                z,
                                mode=NanGuardMode(nan_is_error=True,
                                                  inf_is_error=True,
                                                  big_is_error=True))

    print "\nbuilding model... "
    net0 = build_model(input_var)

    print "\ncompiling functions... "
    '''
    # Build loss function
    prediction = lasagne.layers.get_output(net0)
    loss = lasagne.objectives.categorical_crossentropy(prediction,
                                                       target_var)
    loss = loss.mean(axis=0)

    # Create update expression for training
    # using RMSprop
    params = lasagne.layers.get_all_params(net0, 
                                           trainable=True)
    updates = lasagne.updates.rmsprop(loss, params, 
                                      learning_rate=0.01, rho=0.9, epsilon=1e-06)
    train_fn = theano.function([input_var, target_var], loss,
                               updates=updates,
                               mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
                               )
    '''

    ## Building loss evaluation for validation set
    va_prediction = lasagne.layers.get_output(net0, deterministic=True)
    va_loss = lasagne.objectives.categorical_crossentropy(
        va_prediction, target_var)
    va_loss = va_loss.mean(axis=0)

    va_fn = theano.function(
        [input_var, target_var],
        #mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True),
        va_loss)

    print("compilation finished in {:.2f}").format(time.time() - start_time)
    """
    TRAINING - HAVENT SUBTRACT IMAGE MEAN YET!!!
    """

    print "Starting training with batchsize of %d ..." % (batchsize)
    for epoch in range(num_epochs):
        '''
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()
        for inputs, targets in iterate_minibatches(Xtr, Ytr, batchsize, shuffle=True):            
            inputs = center_fn(inputs, imgMean_vals)
            train_err += train_fn(inputs, targets)
            train_batches += 1
        '''
        # And a full pass over the validation data:
        if mode != "toy":
            va_err = 0
            va_batches = 0
            for inputs, targets in iterate_minibatches(Xtr,
                                                       Ytr,
                                                       batchsize,
                                                       shuffle=True):
                inputs = center_fn(inputs, imgMean_vals)
                va_err += va_fn(inputs, targets)
                va_batches += 1

        # Then we print the results for this epoch:
        '''
        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        print("  training loss:\t\t{:.6f}\t{:d}".format(train_err / train_batches, train_batches))
        '''
        if mode != "toy":
            print("  validation loss:\t\t{:.6f}".format(va_err / va_batches))

            # Save the model after every 5 epochs
            '''

Example #30

Show file

    def setup_train(self):

        # dimensions: (batch, time, 12)
        chord_types = T.btensor3()
        # dimensions: (batch, time)
        chord_roots = T.imatrix()
        # dimensions: (batch, time)
        relative_posns = [T.imatrix() for _ in self.encodings]
        # dimesions: (batch, time, output_data)
        encoded_melodies = [T.btensor3() for _ in self.encodings]
        # dimesions: (batch, time)
        correct_notes = T.imatrix()
        n_batch, n_time = chord_roots.shape

        def _build(det_dropout):
            all_activations = []
            for encoding, enc_lstmstack, encoded_melody, relative_pos in zip(
                    self.encodings, self.enc_lstmstacks, encoded_melodies,
                    relative_posns):
                activations = enc_lstmstack.do_preprocess_scan(
                    timestep=T.tile(T.arange(n_time), (n_batch, 1)),
                    relative_position=relative_pos,
                    cur_chord_type=chord_types,
                    cur_chord_root=chord_roots,
                    cur_input=encoded_melody,
                    deterministic_dropout=det_dropout)
                all_activations.append(activations)
            reduced_activations = functools.reduce((lambda x, y: x + y),
                                                   all_activations)
            queue_loss, feat_strengths, feat_vects, queue_info = self.qman.process(
                reduced_activations, extra_info=True)
            features = QueueManager.queue_transform(feat_strengths, feat_vects)

            all_out_probs = []
            for encoding, dec_lstmstack, encoded_melody, relative_pos in zip(
                    self.encodings, self.dec_lstmstacks, encoded_melodies,
                    relative_posns):
                activations = dec_lstmstack.do_preprocess_scan(
                    timestep=T.tile(T.arange(n_time), (n_batch, 1)),
                    relative_position=relative_pos,
                    cur_chord_type=chord_types,
                    cur_chord_root=chord_roots,
                    cur_feature=features,
                    last_output=T.concatenate([
                        T.tile(encoding.initial_encoded_form(),
                               (n_batch, 1, 1)), encoded_melody[:, :-1, :]
                    ], 1),
                    deterministic_dropout=det_dropout)
                out_probs = encoding.decode_to_probs(activations, relative_pos,
                                                     self.bounds.lowbound,
                                                     self.bounds.highbound)
                all_out_probs.append(out_probs)

            reduced_out_probs = functools.reduce((lambda x, y: x * y),
                                                 all_out_probs)
            normsum = T.sum(reduced_out_probs, 2, keepdims=True)
            normsum = T.maximum(normsum, constants.EPSILON)
            norm_out_probs = reduced_out_probs / normsum
            reconstruction_loss, reconstruction_info = Encoding.compute_loss(
                norm_out_probs, correct_notes, extra_info=True)

            queue_surrogate_loss_parts = self.qman.surrogate_loss(
                reconstruction_loss, queue_info)

            updates = []
            full_info = queue_info.copy()
            full_info.update(reconstruction_info)
            full_info["queue_loss"] = queue_loss
            full_info["reconstruction_loss"] = reconstruction_loss

            float_n_batch = T.cast(n_batch, 'float32')
            if self.loss_mode is "add":
                full_loss = queue_loss + reconstruction_loss
            elif self.loss_mode is "priority":
                curviness = np.array(self.loss_mode_params[0],
                                     np.float32) * float_n_batch
                # ln( e^x + e^y - 1 )
                # ln( C(e^x + e^y - 1) ) - ln(C)
                # ln( e^c(e^x + e^y - 1) ) - c
                # ln( e^(x+c) + e^(y+c) - e^c ) - c
                # ln( e^(x-c) + e^(y-c) - e^(-c) ) + c
                # Now let c = maximum(x,y), d = minimum(x,y). WOLOG replace x=c, y=d
                # ln( e^(c-c) + e^(d-c) - e^(-c) ) + c
                # ln( 1 + e^(d-c) - e^(-c) ) + c
                x = reconstruction_loss / curviness
                y = queue_loss / curviness
                c = T.maximum(x, y)
                d = T.minimum(x, y)
                full_loss = (T.log(1 + T.exp(d - c) - T.exp(-c)) +
                             c) * curviness
            elif self.loss_mode is "cutoff":
                cutoff_val = np.array(self.loss_mode_params[0], np.float32)
                full_loss = T.switch(
                    reconstruction_loss < cutoff_val * float_n_batch,
                    reconstruction_loss + queue_loss, reconstruction_loss)
            elif self.loss_mode is "trigger":
                trigger_val = np.array(self.loss_mode_params[0], np.float32)
                trigger_speed = np.array(1.0 / self.loss_mode_params[1],
                                         np.float32)
                trigger_is_on = theano.shared(np.array(0, np.int8))
                trigger_scale = theano.shared(np.array(0.0, np.float32))
                full_loss = reconstruction_loss + trigger_scale * queue_loss
                updates.append(
                    (trigger_is_on,
                     T.or_(trigger_is_on,
                           reconstruction_loss < trigger_val * float_n_batch)))
                updates.append((trigger_scale,
                                T.switch(
                                    trigger_is_on,
                                    T.minimum(trigger_scale + trigger_speed,
                                              np.array(1.0, np.float32)),
                                    np.array(0.0, np.float32))))
                full_info["trigger_scale"] = trigger_scale

            if queue_surrogate_loss_parts is not None:
                surrogate_loss, addtl_updates = queue_surrogate_loss_parts
                full_loss = full_loss + surrogate_loss
                updates.extend(addtl_updates)
                full_info["surrogate_loss"] = surrogate_loss

            return full_loss, full_info, updates

        train_loss, train_info, train_updates = _build(False)
        if self.train_decoder_only:
            params = list(
                itertools.chain(*(lstmstack.params
                                  for lstmstack in self.dec_lstmstacks)))
        else:
            params = self.params
        adam_updates = Adam(train_loss, params, lr=self.learning_rate_var)

        eval_loss, eval_info, _ = _build(True)

        self.loss_info_keys = list(train_info.keys())

        self.update_fun = theano.function(
            inputs=[chord_types, chord_roots, correct_notes] + relative_posns +
            encoded_melodies,
            outputs=[train_loss] + list(train_info.values()),
            updates=train_updates + adam_updates,
            allow_input_downcast=True,
            mode=(NanGuardMode(nan_is_error=True,
                               inf_is_error=True,
                               big_is_error=True) if self.nanguard else None))

        self.eval_fun = theano.function(
            inputs=[chord_types, chord_roots, correct_notes] + relative_posns +
            encoded_melodies,
            outputs=[eval_loss] + list(eval_info.values()),
            allow_input_downcast=True,
            mode=(NanGuardMode(nan_is_error=True,
                               inf_is_error=True,
                               big_is_error=True) if self.nanguard else None))