예제 #1
0
 def __call__(self):
     # for slow iterator
     while True:
         batch = self.data.next()
         if batch:
             self.prev_batch = batch
             break
         else:
             if self.prev_batch:
                 batch = self.prev_batch
                 break
     # Perturb the data (! and the model)
     if isinstance(batch, dict):
         batch = self.model.perturb(**batch)
     else:
         batch = self.model.perturb(*batch)
     # Load the dataset into GPU
     # Note: not the most efficient approach in general, as it involves
     # each batch is copied individually on gpu
     if isinstance(batch, dict):
         for gdata in self.gdata:
             gdata.set_value(batch[gdata.name], borrow=True)
     else:
         for gdata, data in zip(self.gdata, batch):
             gdata.set_value(data, borrow=True)
     # Run the trianing function
     g_st = time.time()
     rvals = self.train_fn()
     for schedule in self.schedules:
         schedule(self, rvals[-1])
     self.update_fn()
     g_ed = time.time()
     self.state["lr"] = float(self.lr)
     cost = rvals[-1]
     self.old_cost = cost
     whole_time = time.time() - self.step_timer
     if self.step % self.state["trainFreq"] == 0:
         msg = ".. iter %4d cost %.3f"
         vals = [self.step, cost]
         for dx, prop in enumerate(self.prop_names):
             msg += " " + prop + " %.2e"
             vals += [float(numpy.array(rvals[dx]))]
         msg += " step time %s whole time %s lr %.2e"
         vals += [print_time(g_ed - g_st), print_time(time.time() - self.step_timer), float(self.lr)]
         print msg % tuple(vals)
     self.step += 1
     ret = dict(
         [
             ("cost", float(cost)),
             ("error", float(cost)),
             ("lr", float(self.lr)),
             ("time_step", float(g_ed - g_st)),
             ("whole_time", float(whole_time)),
         ]
         + zip(self.prop_names, rvals)
     )
     return ret
예제 #2
0
    def __call__(self):
        df_st = time.time()
        batch = self.data.next()
        df_et = time.time()
        assert batch

        # Perturb the data (! and the model)
        if isinstance(batch, dict):
            batch = self.model.perturb(**batch)
        else:
            batch = self.model.perturb(*batch)
        # Load the dataset into GPU
        # Note: not the most efficient approach in general, as it involves
        # each batch is copied individually on gpu
        if isinstance(batch, dict):
            for gdata in self.gdata:
                gdata.set_value(batch[gdata.name], borrow=True)
        else:
            for gdata, data in zip(self.gdata, batch):
                gdata.set_value(data, borrow=True)
        # Run the trianing function
        g_st = time.time()
        lr_val = self.lr.get_value()
        cutoff_val = self.cutoff.get_value()
        rvals = self.train_fn()
        for schedule in self.schedules:
            schedule(self, rvals[-1])
        self.update_fn()
        g_ed = time.time()
        self.state['lr'] = lr_val
        cost = rvals[-1]
        self.old_cost = cost
        whole_time = time.time() - self.step_timer
        if self.step % self.state['trainFreq'] == 0:
            msg = '.. iter %4d cost %.3f'
            vals = [self.step, cost]
            for dx, prop in enumerate(self.prop_names):
                msg += ' '+prop+' %.2e'
                vals += [float(numpy.array(rvals[dx]))]
            msg += ' dload %s step time %s whole time %s lr %.2e co %.2e'
            vals += [print_time(df_et-df_st),
                     print_time(g_ed - g_st),
                     print_time(time.time() - self.step_timer),
                     lr_val, cutoff_val]
            print msg % tuple(vals)
        self.step += 1
        if self.state['cutoff_adapt']:
            print 'gn_log_ave ' , self.gnorm_log_ave.get_value(), ' gn_log2_ave ', self.gnorm_log2_ave.get_value(), ' gs ', self.cutoff_adapt_steps.get_value()
        ret = dict([('cost', float(cost)),
                    ('error', float(cost)),
                       ('lr', lr_val),
                       ('cutoff', cutoff_val),
                       ('time_step', float(g_ed - g_st)),
                       ('whole_time', float(whole_time))]+zip(self.prop_names, rvals))
        return ret
예제 #3
0
 def __call__(self):
     # for slow iterator
     while True:
         batch = self.data.next()
         if batch:
             self.prev_batch = batch
             break
         else:
             if self.prev_batch:
                 batch = self.prev_batch
                 break
     # Perturb the data (! and the model)
     if isinstance(batch, dict):
         batch = self.model.perturb(**batch)
     else:
         batch = self.model.perturb(*batch)
     # Load the dataset into GPU
     # Note: not the most efficient approach in general, as it involves
     # each batch is copied individually on gpu
     if isinstance(batch, dict):
         for gdata in self.gdata:
             gdata.set_value(batch[gdata.name], borrow=True)
     else:
         for gdata, data in zip(self.gdata, batch):
             gdata.set_value(data, borrow=True)
     # Run the trianing function
     g_st = time.time()
     rvals = self.train_fn()
     for schedule in self.schedules:
         schedule(self, rvals[-1])
     self.update_fn()
     g_ed = time.time()
     self.state['lr'] = float(self.lr)
     cost = rvals[-1]
     self.old_cost = cost
     whole_time = time.time() - self.step_timer
     if self.step % self.state['trainFreq'] == 0:
         msg = '.. iter %4d cost %.3f'
         vals = [self.step, cost]
         for dx, prop in enumerate(self.prop_names):
             msg += ' ' + prop + ' %.2e'
             vals += [float(numpy.array(rvals[dx]))]
         msg += ' step time %s whole time %s lr %.2e'
         vals += [
             print_time(g_ed - g_st),
             print_time(time.time() - self.step_timer),
             float(self.lr)
         ]
         print msg % tuple(vals)
     self.step += 1
     ret = dict([('cost', float(cost)), ('error', float(cost)),
                 ('lr', float(self.lr)), ('time_step', float(g_ed - g_st)),
                 ('whole_time', float(whole_time))] +
                zip(self.prop_names, rvals))
     return ret
예제 #4
0
    def __call__(self):
        batch = self.data.next()
        assert batch

        # Perturb the data (! and the model)
        if isinstance(batch, dict):
            batch = self.model.perturb(**batch)
        else:
            batch = self.model.perturb(*batch)
        # Load the dataset into GPU
        # Note: not the most efficient approach in general, as it involves
        # each batch is copied individually on gpu

        #only suitable for this mode
        if isinstance(batch, dict):
            for gdata in self.gdata:
                gdata.set_value(batch[gdata.name], borrow=True)
        else:
            for gdata, data in zip(self.gdata, batch):
                gdata.set_value(data, borrow=True)
        # Run the trianing function
        g_st = time.time()
        rvals = self.train_fn()
        ############################################################
        #exported_grad = self.export_grad_fn()
        #print exported_grad
        ############################################################
        for schedule in self.schedules:
            schedule(self, rvals[-1])
        self.update_fn()
        g_ed = time.time()
        self.state['lr'] = float(self.lr)
        cost = rvals[-1]
        self.old_cost = cost
        whole_time = time.time() - self.step_timer
        if self.step % self.state['trainFreq'] == 0:
            msg = '.. iter %4d cost %.3f'
            vals = [self.step, cost]
            for dx, prop in enumerate(self.prop_names):
                msg += ' '+prop+' %.2e'
                vals += [float(numpy.array(rvals[dx]))]
            msg += ' step time %s whole time %s lr %.2e'
            vals += [print_time(g_ed - g_st),
                     print_time(time.time() - self.step_timer),
                     float(self.lr)]
            print msg % tuple(vals)
        self.step += 1
        ret = dict([('cost', float(cost)),
                    ('error', float(cost)),
                       ('lr', float(self.lr)),
                       ('time_step', float(g_ed - g_st)),
                       ('whole_time', float(whole_time))]+zip(self.prop_names, rvals))
        return ret
예제 #5
0
    def validate(self):
        rvals = self.model.validate(self.valid_data)
        msg = '**  %d     validation:' % self.valid_id
        print_mem('validate')
        self.valid_id += 1
        self.batch_start_time = time.time()
        pos = self.step // self.state['validFreq']
        for k, v in rvals:
            msg = msg + ' ' + k + ':%f ' % float(v)
            self.timings['valid'+k][pos] = float(v)
            self.state['valid'+k] = float(v)
        msg += 'whole time %s' % print_time(time.time() - self.start_time)
        msg += ' patience %d' % self.patience
        print msg

        if self.train_cost:
            valid_rvals = rvals
            rvals = self.model.validate(self.train_data, True)
            msg = '**  %d     train:' % (self.valid_id - 1)
            for k, v in rvals:
                msg = msg + ' ' + k + ':%6.3f ' % float(v)
                self.timings['fulltrain' + k] = float(v)
                self.state['fulltrain' + k] = float(v)
            print msg
            rvals = valid_rvals

        self.state['validtime'] = float(time.time() - self.start_time)/60.
        # Just pick the first thing that the cost returns
        cost = rvals[0][1]
        if self.state['bvalidcost'] > cost:
            self.state['bvalidcost'] = float(cost)
            for k, v in rvals:
                self.state['bvalid'+k] = float(v)
            self.state['bstep'] = int(self.step)
            self.state['btime'] = int(time.time() - self.start_time)
            self.test()
        elif numpy.random.rand(1) > self.state['rand_test_inclusion']:
            print 'Shouldn''t test, but you got lucky', cost, '>', self.state['bvalidcost']
            for k, v in self.state.items():
                if 'test' in k:
                    print k, v
            self.test()
        else:
            print 'No testing', cost, '>', self.state['bvalidcost']
            for k, v in self.state.items():
                if 'test' in k:
                    print k, v
        print_mem('validate')
        if self.validate_postprocess:
            return self.validate_postprocess(cost)
        return cost
예제 #6
0
    def validate(self):
        rvals = self.model.validate(self.valid_data)
        msg = "**  %d     validation:" % self.valid_id
        self.valid_id += 1
        self.batch_start_time = time.time()
        pos = self.step // self.state["validFreq"]
        for k, v in rvals:
            msg = msg + " " + k + ":%f " % float(v)
            self.timings["valid" + k][pos] = float(v)
            self.state["valid" + k] = float(v)
        msg += "whole time %s" % print_time(time.time() - self.start_time)
        msg += " patience %d" % self.patience
        print msg

        if self.train_cost:
            valid_rvals = rvals
            rvals = self.model.validate(self.train_data, True)
            msg = "**  %d     train:" % (self.valid_id - 1)
            for k, v in rvals:
                msg = msg + " " + k + ":%6.3f " % float(v)
                self.timings["fulltrain" + k] = float(v)
                self.state["fulltrain" + k] = float(v)
            print msg
            rvals = valid_rvals

        self.state["validtime"] = float(time.time() - self.start_time) / 60.0
        # Just pick the first thing that the cost returns
        cost = rvals[0][1]
        if self.state["bvalidcost"] > cost:
            self.state["bvalidcost"] = float(cost)
            for k, v in rvals:
                self.state["bvalid" + k] = float(v)
            self.state["bstep"] = int(self.step)
            self.state["btime"] = int(time.time() - self.start_time)
            self.test()
        else:
            print "No testing", cost, ">", self.state["bvalidcost"]
            for k, v in self.state.items():
                if "test" in k:
                    print k, v
        print_mem("validate")
        if self.validate_postprocess:
            return self.validate_postprocess(cost)
        return cost
예제 #7
0
    def validate(self):
        rvals = self.model.validate(self.valid_data)
        msg = '**  %d     validation:' % self.valid_id
        self.valid_id += 1
        self.batch_start_time = time.time()
        pos = self.step // self.state['validFreq']
        for k, v in rvals:
            msg = msg + ' ' + k + ':%f ' % float(v)
            self.timings['valid' + k][pos] = float(v)
            self.state['valid' + k] = float(v)
        msg += 'whole time %s' % print_time(time.time() - self.start_time)
        msg += ' patience %d' % self.patience
        print msg

        if self.train_cost:
            valid_rvals = rvals
            rvals = self.model.validate(self.train_data, True)
            msg = '**  %d     train:' % (self.valid_id - 1)
            for k, v in rvals:
                msg = msg + ' ' + k + ':%6.3f ' % float(v)
                self.timings['fulltrain' + k] = float(v)
                self.state['fulltrain' + k] = float(v)
            print msg
            rvals = valid_rvals

        self.state['validtime'] = float(time.time() - self.start_time) / 60.
        # Just pick the first thing that the cost returns
        cost = rvals[0][1]
        if self.state['bvalidcost'] > cost:
            self.state['bvalidcost'] = float(cost)
            for k, v in rvals:
                self.state['bvalid' + k] = float(v)
            self.state['bstep'] = int(self.step)
            self.state['btime'] = int(time.time() - self.start_time)
            self.test()
        else:
            print 'No testing', cost, '>', self.state['bvalidcost']
            for k, v in self.state.items():
                if 'test' in k:
                    print k, v
        print_mem('validate')
        if self.validate_postprocess:
            return self.validate_postprocess(cost)
        return cost
예제 #8
0
    def __call__(self):
        batch = self.data.next()
        assert batch

        # Perturb the data (! and the model)
        if isinstance(batch, dict):
            batch = self.model.perturb(**batch)
        else:
            batch = self.model.perturb(*batch)
        # Load the dataset into GPU
        # Note: not the most efficient approach in general, as it involves
        # each batch is copied individually on gpu

        sampleN = self.state['sampleN']

        myL = int(1.5*len(batch['y']))

        xi = []
        for i in xrange(self.state['num_systems']):
            xi.append(batch['x'+str(i)].squeeze())
        samples, probs = self.sampler(sampleN,myL,1,*xi)

        #samples, probs = self.sampler(sampleN,myL,1,batch['x'].squeeze())

        y,b = getUnique(samples, batch['y'], self.state)

        b = numpy.array(b,dtype='float32')
#        p = probs.sum(axis=0)
#        p = [math.exp(-i) for i in p]
#        p = [i/sum(p) for i in p]

#        print p
#        print b.mean()
#        print (b*p).mean()
        Y,YM = getYM(y, self.state)
#        print b
#        print Y
#        print YM

        diffN = len(b)

        for i in xrange(self.state['num_systems']):
            X = numpy.zeros((batch['x'+str(i)].shape[0], diffN), dtype='int64')
            batch['x'+str(i)] = batch['x'+str(i)]+X
            X = numpy.zeros((batch['x'+str(i)].shape[0], diffN), dtype='float32')
            batch['x_mask'+str(i)] = batch['x_mask'+str(i)]+X

        batch['y'] = Y
        batch['y_mask'] = YM
        batch['b'] = b

#        if not hasattr(self,'Last'):
#            self.Last = True
#            self.lastbatch = batch
#        else:
#            if self.Last:
#                batch = self.lastbatch
#                self.Last = False
#            else:
#                self.lastbatch = batch
#                self.Last = True
#        print batch['y']

        if isinstance(batch, dict):
            for gdata in self.gdata:
                gdata.set_value(batch[gdata.name], borrow=True)
        else:
            for gdata, data in zip(self.gdata, batch):
                gdata.set_value(data, borrow=True)
        # Run the trianing function
        g_st = time.time()
        rvals = self.train_fn()
        ############################################################
        #exported_grad = self.export_grad_fn()
        #print exported_grad
        ############################################################
        for schedule in self.schedules:
            schedule(self, rvals[-1])
        self.update_fn()
        g_ed = time.time()
        self.state['lr'] = float(self.lr)
        cost = rvals[-1]
        self.old_cost = cost
        whole_time = time.time() - self.step_timer
        if self.step % self.state['trainFreq'] == 0:
            msg = '.. iter %4d cost %.3f'
            vals = [self.step, cost]
            for dx, prop in enumerate(self.prop_names):
                msg += ' '+prop+' %.2e'
                vals += [float(numpy.array(rvals[dx]))]
            msg += ' step time %s whole time %s lr %.2e'
            vals += [print_time(g_ed - g_st),
                     print_time(time.time() - self.step_timer),
                     float(self.lr)]
            print msg % tuple(vals)
        self.step += 1
        ret = dict([('cost', float(cost)),
                    ('error', float(cost)),
                       ('lr', float(self.lr)),
                       ('time_step', float(g_ed - g_st)),
                       ('whole_time', float(whole_time))]+zip(self.prop_names, rvals))
        return ret
예제 #9
0
    def __call__(self):
        batch = self.data.next()
        assert batch

        null_inputs = sum(batch["x"].flatten() == self.null_word) / float(len(batch["x"][0]))

        # replace occurrences of <null> with </s> (<null> should be last word in sentence)
        for i in range(1, len(batch["x"]) - 1):
            batch["x_mask"][i + 1][batch["x"][i] == self.null_word] = 0
        batch["x"][batch["x"] == self.null_word] = 0

        # if <null> was only word in sentence, add it back in to prevent empty input
        batch["x"][0][batch["x"][0] == 0] = self.null_word

        if self.state["rolling_vocab"]:  # Assumes batch is a dictionary
            batch["x"] = replace_array(batch["x"], self.model.large2small_src)
            batch["y"] = replace_array(batch["y"], self.model.large2small_trgt)

        # Perturb the data (! and the model)
        if isinstance(batch, dict):
            batch = self.model.perturb(**batch)
        else:
            batch = self.model.perturb(*batch)
        # Load the dataset into GPU
        # Note: not the most efficient approach in general, as it involves
        # each batch is copied individually on gpu
        if isinstance(batch, dict):
            for gdata in self.gdata:
                gdata.set_value(batch[gdata.name], borrow=True)
        else:
            for gdata, data in zip(self.gdata, batch):
                gdata.set_value(data, borrow=True)
        # Run the trianing function
        g_st = time.time()
        rvals = self.train_fn()
        for schedule in self.schedules:
            schedule(self, rvals[-1])
        if null_inputs > 0.5:
            self.update_fn_lm()
        else:
            self.update_fn()
        g_ed = time.time()
        self.state["lr"] = float(self.lr)
        cost = rvals[-1]
        self.old_cost = cost
        whole_time = time.time() - self.step_timer
        if self.step % self.state["trainFreq"] == 0:
            msg = ".. iter %4d cost %.3f"
            vals = [self.step, cost]
            for dx, prop in enumerate(self.prop_names):
                msg += " " + prop + " %.2e"
                vals += [float(numpy.array(rvals[dx]))]
            msg += " step time %s whole time %s lr %.2e"
            vals += [print_time(g_ed - g_st), print_time(time.time() - self.step_timer), float(self.lr)]
            print msg % tuple(vals)
        self.step += 1
        ret = dict(
            [
                ("cost", float(cost)),
                ("error", float(cost)),
                ("lr", float(self.lr)),
                ("time_step", float(g_ed - g_st)),
                ("whole_time", float(whole_time)),
            ]
            + zip(self.prop_names, rvals)
        )
        return ret