Esempio n. 1
0
def test_exclude_tags():
    lst = []
    pipe = L.list_sink(lst)
    pipe = L.exclude_tags(pipe, 'fun')
    pipe1 = L.taggify(pipe, 'warn')
    pipe2 = L.taggify(pipe, 'fun')

    pipe1.send({'message': 'take care'})
    pipe2.send({'message': 'haahaa'})

    assert len(lst) == 1, 'wrong number of messages got through'
    assert lst[0]['message'] == 'take care', 'wrong message got through'
Esempio n. 2
0
def test_tagging():
    lst = []
    pipe = L.list_sink(lst)
    pipe = L.taggify(pipe, tags=['bla', 'blubb'])
    pipe.send({})
    assert len(lst) == 1, 'nothing added to sink'
    tags = lst[0]['tags']
    assert tags == ['bla', 'blubb'], 'tags did not get through: %s' % tags

    pipe = L.taggify(pipe, tags='hopp')
    pipe.send({})
    tags = lst[-1]['tags']
    assert tags == ['hopp', 'bla', 'blubb'], 'tags did not get through: %s' % tags
Esempio n. 3
0
    def pretrain(self, schedule):
        super(DAE, self).pretrain(schedule=schedule)

        p = self.params.as_numpy_array()

        pretrained = schedule["pretrained"]

        # How many parameters in the unrolled model?
        _dec = []
        _enc = [0]
        self.psize = 0
        for layer in self:
            _enc.append(layer.shape[0] * layer.shape[1] + layer.shape[1])
            _dec.append(layer.shape[0] * layer.shape[1] + layer.shape[0])
            self.psize += _enc[-1] + _dec[-1]
        self.enc = np.cumsum(_enc)
        _dec.append(0)
        _dec.reverse()
        self.dec = np.cumsum(_dec) + self.enc[-1]

        # Build up encoder and decoder
        self.encoder = []
        self.params = gzeros(self.psize)
        for layer, (c1, c2) in izip(self, izip(self.enc[:-1], self.enc[1:])):
            self.encoder.append(layer)
            self.params[c1:c2] = p[c1:c2]
            layer.p = self.params[c1:c2]
        self.decoder = []
        for layer, (c1, c2) in izip(self[-1::-1],
                                    izip(self.dec[:-1], self.dec[1:])):
            l = layer.transpose(self.params[c1:c2])
            if pretrained:
                l.p[:l.m_end] = layer.p[:layer.m_end].reshape(
                    layer.shape).T.ravel()
            self.decoder.append(l)

        # Fix missing activations of decoder
        for i, layer in enumerate(self[-2::-1]):
            self.decoder[i].activ = layer.activ
        self.decoder[-1].activ = idnty

        msg = {"msg": "DAE unrolled: %s" % self}
        munk.taggify(self.logging, "pretty").send(msg)
Esempio n. 4
0
    def pretrain(self, schedule):
        super(DAE, self).pretrain(schedule=schedule)

        p = self.params.as_numpy_array()

        pretrained = schedule["pretrained"]

        # How many parameters in the unrolled model?
        _dec = []
        _enc = [0]
        self.psize = 0
        for layer in self:
            _enc.append(layer.shape[0] * layer.shape[1] + layer.shape[1])
            _dec.append(layer.shape[0] * layer.shape[1] + layer.shape[0])
            self.psize += _enc[-1] + _dec[-1]
        self.enc = np.cumsum(_enc)
        _dec.append(0)
        _dec.reverse()
        self.dec = np.cumsum(_dec) + self.enc[-1]

        # Build up encoder and decoder
        self.encoder = []
        self.params = gzeros(self.psize)
        for layer, (c1, c2) in izip(self, izip(self.enc[:-1], self.enc[1:])):
            self.encoder.append(layer)
            self.params[c1:c2] = p[c1:c2]
            layer.p = self.params[c1:c2]
        self.decoder = []
        for layer, (c1, c2) in izip(self[-1::-1], izip(self.dec[:-1], self.dec[1:])):
            l = layer.transpose(self.params[c1:c2])
            if pretrained:
                l.p[: l.m_end] = layer.p[: layer.m_end].reshape(layer.shape).T.ravel()
            self.decoder.append(l)

        # Fix missing activations of decoder
        for i, layer in enumerate(self[-2::-1]):
            self.decoder[i].activ = layer.activ
        self.decoder[-1].activ = idnty

        msg = {"msg": "DAE unrolled: %s" % self}
        munk.taggify(self.logging, "pretty").send(msg)
Esempio n. 5
0
    def train(self, schedule):
        train = [schedule["train"][0], schedule["train"][1]]
        valid = None if not schedule.get("valid") else [schedule["valid"][0], schedule["valid"][1]]

        assert (valid is not None) == ("valid" in schedule["eval"]), "Confusion about validation set!"

        opt_schedule = schedule["opt"]
        
        pp = {"type" : str(self)}
        munk.taggify(self.logging, "pretty").send(pp)
        log = munk.add_keyvalue(self.logging, "layer", "Stack")
       
        epochs = opt_schedule["epochs"]
        if epochs > 0:
            opt_schedule["f"] = self.score
            opt_schedule["fprime"] = self.grad

            if "eval_score" in opt_schedule:
                self._eval_score = opt_schedule["eval_score"]
                opt_schedule["eval_score"] = self.evaluate_score

            opt, evals, peeks = prepare_opt(opt_schedule, self.params, schedule, train, valid)

            stop = opt_schedule["stop"]
            if "peeks" in opt_schedule:
                peek_iv = opt_schedule["peek_intervall"]
                peek_files = {}
                for p in opt_schedule["peeks"]:
                    peek_files[p] = p + ".peek"
            else:
                peek_iv = epochs + 1

            for i, info in enumerate(opt):
                if (i+1) % stop == 0:
                    for e in evals:
                        info[e] = evals[e](self.params)
                    info = replace_gnumpy_data(info)
                    log.send(info)

                if i+1 == epochs:
                    break
                
                if (i+1) % peek_iv == 0:
                    for p in peeks:
                        prediction, inputs = peeks[p](self.params)
                        np.savez(peek_files[p], prediction, inputs)
                        pp = {"msg": "Writing peek file %s"%peek_files[p]}
                        munk.taggify(self.logging, "pretty").send(pp)

        else:
            pp = {"msg": "NO FINETUNING of stack"}
            munk.taggify(self.logging, "pretty").send(pp)

        _params = self.params.as_numpy_array().tolist()
        info = dict(params=_params, shape=self.__repr__())
        log.send(info)
Esempio n. 6
0
    def pretrain(self, schedule):
        train = [schedule["train"][0], schedule["train"][1]]
        valid = None if not schedule.get("valid") else [schedule["valid"][0], schedule["valid"][1]]

        assert (valid is not None) == ("valid" in schedule["eval"]), "Confusion about validation set!"

        for i, (layer, sched) in enumerate(izip(self, self.stack)):
            pt_params = layer.pt_init(**sched)
            
            opt_schedule = sched["opt"]
            
            pp = {"layer":i, "type":str(layer)}
            munk.taggify(self.logging, "pretty").send(pp)
            log = munk.add_keyvalue(self.logging, "layer", i)
            
            epochs = opt_schedule["epochs"]
            if epochs > 0:
                opt_schedule["f"] = layer.pt_score
                opt_schedule["fprime"] = layer.pt_grad

                opt, evals, peeks = prepare_opt(opt_schedule, pt_params, schedule, train, valid)

                stop = opt_schedule["stop"]
                for j, info in enumerate(opt):
                    if (j+1) % stop == 0:
                        for e in evals:
                            info[e] = evals[e](pt_params)
                        info = replace_gnumpy_data(info)
                        log.send(info)
                        
                    if (j+1) == epochs:
                        break
            else:
                pp = {"msg": "NO PRETRAINING of layer %i"%i}
                munk.taggify(self.logging, "pretty").send(pp)

            info = layer.pt_done(pt_params, **sched)
            pt_params = None
            log.send(info)

            # move data forward, save in temporary hdf5
            if i < (len(self) - 1):
                nxt_name = strftime("%Y-%m-%d-%H:%M:%S") + "_L" + str(i+1) + "_TMP.h5"
                nxt = h5py.File(nxt_name)
                pp = {"msg": "Take care of temporary " + nxt_name}
                munk.taggify(self.logging, "pretty").send(pp)
                # if a validation set is available, move it forward, too.
                if valid:
                    valid[0] = self.next_hdf5(layer, valid[0], "validation", nxt, chunk=512)
                train[0] = self.next_hdf5(layer, train[0], "train", nxt, chunk=512)