Example #1
0
 def main(self):
     print_mem('start') 
     
     #if test_data is not None:
     self.test()
     print 'Took', (time.time() - self.start_time)/60., 'min'
     avg_step = timings['time_step'][:self.step].mean()
     avg_cost2expl = timings['log2_p_expl'][:self.step].mean()
     print "Average step took {}".format(avg_step)
     print "That amounts to {} sentences in a day".format(1 / avg_step * 86400 * state['bs'])
     print "Average log2 per example is {}".format(avg_cost2expl)
Example #2
0
    def main(self):
        print_mem('start')

        #if test_data is not None:
        self.test()
        print 'Took', (time.time() - self.start_time) / 60., 'min'
        avg_step = timings['time_step'][:self.step].mean()
        avg_cost2expl = timings['log2_p_expl'][:self.step].mean()
        print "Average step took {}".format(avg_step)
        print "That amounts to {} sentences in a day".format(
            1 / avg_step * 86400 * state['bs'])
        print "Average log2 per example is {}".format(avg_cost2expl)
Example #3
0
    def validate(self):
        rvals = self.model.validate(self.valid_data)
        msg = '**  %d     validation:' % self.valid_id
        print_mem('validate')
        self.valid_id += 1
        self.batch_start_time = time.time()
        pos = self.step // self.state['validFreq']
        for k, v in rvals:
            msg = msg + ' ' + k + ':%f ' % float(v)
            self.timings['valid'+k][pos] = float(v)
            self.state['valid'+k] = float(v)
        msg += 'whole time %s' % print_time(time.time() - self.start_time)
        msg += ' patience %d' % self.patience
        print msg

        if self.train_cost:
            valid_rvals = rvals
            rvals = self.model.validate(self.train_data, True)
            msg = '**  %d     train:' % (self.valid_id - 1)
            for k, v in rvals:
                msg = msg + ' ' + k + ':%6.3f ' % float(v)
                self.timings['fulltrain' + k] = float(v)
                self.state['fulltrain' + k] = float(v)
            print msg
            rvals = valid_rvals

        self.state['validtime'] = float(time.time() - self.start_time)/60.
        # Just pick the first thing that the cost returns
        cost = rvals[0][1]
        if self.state['bvalidcost'] > cost:
            self.state['bvalidcost'] = float(cost)
            for k, v in rvals:
                self.state['bvalid'+k] = float(v)
            self.state['bstep'] = int(self.step)
            self.state['btime'] = int(time.time() - self.start_time)
            self.test()
        elif numpy.random.rand(1) > self.state['rand_test_inclusion']:
            print 'Shouldn''t test, but you got lucky', cost, '>', self.state['bvalidcost']
            for k, v in self.state.items():
                if 'test' in k:
                    print k, v
            self.test()
        else:
            print 'No testing', cost, '>', self.state['bvalidcost']
            for k, v in self.state.items():
                if 'test' in k:
                    print k, v
        print_mem('validate')
        if self.validate_postprocess:
            return self.validate_postprocess(cost)
        return cost
Example #4
0
    def validate(self):
        rvals = self.model.validate(self.valid_data)
        msg = "**  %d     validation:" % self.valid_id
        self.valid_id += 1
        self.batch_start_time = time.time()
        pos = self.step // self.state["validFreq"]
        for k, v in rvals:
            msg = msg + " " + k + ":%f " % float(v)
            self.timings["valid" + k][pos] = float(v)
            self.state["valid" + k] = float(v)
        msg += "whole time %s" % print_time(time.time() - self.start_time)
        msg += " patience %d" % self.patience
        print msg

        if self.train_cost:
            valid_rvals = rvals
            rvals = self.model.validate(self.train_data, True)
            msg = "**  %d     train:" % (self.valid_id - 1)
            for k, v in rvals:
                msg = msg + " " + k + ":%6.3f " % float(v)
                self.timings["fulltrain" + k] = float(v)
                self.state["fulltrain" + k] = float(v)
            print msg
            rvals = valid_rvals

        self.state["validtime"] = float(time.time() - self.start_time) / 60.0
        # Just pick the first thing that the cost returns
        cost = rvals[0][1]
        if self.state["bvalidcost"] > cost:
            self.state["bvalidcost"] = float(cost)
            for k, v in rvals:
                self.state["bvalid" + k] = float(v)
            self.state["bstep"] = int(self.step)
            self.state["btime"] = int(time.time() - self.start_time)
            self.test()
        else:
            print "No testing", cost, ">", self.state["bvalidcost"]
            for k, v in self.state.items():
                if "test" in k:
                    print k, v
        print_mem("validate")
        if self.validate_postprocess:
            return self.validate_postprocess(cost)
        return cost
Example #5
0
    def validate(self):
        rvals = self.model.validate(self.valid_data)
        msg = '**  %d     validation:' % self.valid_id
        self.valid_id += 1
        self.batch_start_time = time.time()
        pos = self.step // self.state['validFreq']
        for k, v in rvals:
            msg = msg + ' ' + k + ':%f ' % float(v)
            self.timings['valid' + k][pos] = float(v)
            self.state['valid' + k] = float(v)
        msg += 'whole time %s' % print_time(time.time() - self.start_time)
        msg += ' patience %d' % self.patience
        print msg

        if self.train_cost:
            valid_rvals = rvals
            rvals = self.model.validate(self.train_data, True)
            msg = '**  %d     train:' % (self.valid_id - 1)
            for k, v in rvals:
                msg = msg + ' ' + k + ':%6.3f ' % float(v)
                self.timings['fulltrain' + k] = float(v)
                self.state['fulltrain' + k] = float(v)
            print msg
            rvals = valid_rvals

        self.state['validtime'] = float(time.time() - self.start_time) / 60.
        # Just pick the first thing that the cost returns
        cost = rvals[0][1]
        if self.state['bvalidcost'] > cost:
            self.state['bvalidcost'] = float(cost)
            for k, v in rvals:
                self.state['bvalid' + k] = float(v)
            self.state['bstep'] = int(self.step)
            self.state['btime'] = int(time.time() - self.start_time)
            self.test()
        else:
            print 'No testing', cost, '>', self.state['bvalidcost']
            for k, v in self.state.items():
                if 'test' in k:
                    print k, v
        print_mem('validate')
        if self.validate_postprocess:
            return self.validate_postprocess(cost)
        return cost
Example #6
0
    def main(self):
        assert self.reset == -1

        print_mem('start')
        self.state['gotNaN'] = 0
        start_time = time.time()
        self.start_time = start_time
        self.batch_start_time = time.time()

        self.step = int(self.timings['step'])
        self.algo.step = self.step

        self.save_iter = 0
        self.save()
        if self.channel is not None:
            self.channel.save()
        self.save_time = time.time()

        last_cost = 1.
        self.state['clr'] = self.state['lr']
        self.train_data.start(self.timings['next_offset']
                if 'next_offset' in self.timings
                else -1)

        while (self.step < self.state['loopIters'] and
               last_cost > .1*self.state['minerr'] and
               (time.time() - start_time)/60. < self.state['timeStop'] and
               self.state['lr'] > self.state['minlr']):
            print 'step:', self.step
            if self.step > 0 and (time.time() - self.save_time)/60. >= self.state['saveFreq']:
                self.save()
                if self.channel is not None:
                    self.channel.save()
                self.save_time = time.time()
            if self.state['save_by_iter'] and self.step % self.state['saveiter'] == 0:
                self.save_DIY()
            st = time.time()
            try:
                rvals = self.algo()
                self.state['traincost'] = float(rvals['cost'])
                self.state['step'] = self.step
                last_cost = rvals['cost']
                for name in rvals.keys():
                    self.timings[name][self.step] = float(numpy.array(rvals[name]))
                if self.l2_params:
                    for param in self.model.params:
                        self.timings["l2_" + param.name][self.step] =\
                            numpy.mean(param.get_value() ** 2) ** 0.5

                if (numpy.isinf(rvals['cost']) or
                   numpy.isnan(rvals['cost'])) and\
                   self.state['on_nan'] == 'raise':
                    self.state['gotNaN'] = 1
                    self.save()
                    if self.channel:
                        self.channel.save()
                    print 'Got NaN while training'
                    last_cost = 0
                if self.valid_data is not None and\
                   self.step % self.state['validFreq'] == 0 and\
                   self.step > 1:
                    valcost = self.validate()
                    if valcost > self.old_cost * self.state['cost_threshold']:
                        self.patience -= 1
                        if 'lr_start' in self.state and\
                           self.state['lr_start'] == 'on_error':
                                self.state['lr_start'] = self.step
                    elif valcost < self.old_cost:
                        self.patience = self.state['patience']
                        self.old_cost = valcost

                    if self.state['divide_lr'] and \
                       self.patience < 1:
                        # Divide lr by 2
                        self.algo.lr = self.algo.lr / self.state['divide_lr']
                        bparams = dict(self.model.best_params)
                        self.patience = self.state['patience']
                        for p in self.model.params:
                            p.set_value(bparams[p.name])
                if self.state['hookFreq'] > 0 and \
                   self.step % self.state['hookFreq'] == 0 and \
                   self.hooks:
                    [fn() for fn in self.hooks]
                if self.reset > 0 and self.step > 1 and \
                   self.step % self.reset == 0:
                    print 'Resetting the data iterator'
                    self.train_data.reset()
                self.step += 1
                self.timings['step'] = self.step
                self.timings['next_offset'] = self.train_data.next_offset
            except KeyboardInterrupt:
                break

        self.state['wholetime'] = float(time.time() - start_time)
        if self.valid_data is not None:
            self.validate()
        self.save()
        if self.channel:
            self.channel.save()
        print 'Took', (time.time() - start_time)/60., 'min'
        avg_step = self.timings['time_step'][:self.step].mean()
        avg_cost2expl = self.timings['log2_p_expl'][:self.step].mean()
        print "Average step took {}".format(avg_step)
        print "That amounts to {} sentences in a day".format(1 / avg_step * 86400 * self.state['bs'])
        print "Average log2 per example is {}".format(avg_cost2expl)
Example #7
0
    def main(self):
        assert self.reset == -1

        print_mem("start")
        self.state["gotNaN"] = 0
        start_time = time.time()
        self.start_time = start_time
        self.batch_start_time = time.time()

        self.step = int(self.timings["step"])
        self.algo.step = self.step

        if self.state["save_iter"] < 0:
            self.save_iter = 0
            self.state["save_iter"] = 0
            self.save()
            if self.channel is not None:
                self.channel.save()
        else:  # Fake saving
            self.save_iter += 1
            self.state["save_iter"] = self.save_iter
        self.save_time = time.time()

        last_cost = 1.0
        self.state["clr"] = self.state["lr"]
        self.train_data.start(self.timings["next_offset"] if "next_offset" in self.timings else -1)

        if self.state["rolling_vocab"]:
            for i in xrange(self.timings["step"] - self.timings["super_step"]):
                self.train_data.next()

        if self.state["rolling_vocab"]:
            # Make sure dictionary is current.
            # If training is interrupted when the vocabularies are exchanged,
            # things may get broken.
            step_modulo = self.step % self.model.total_num_batches
            if step_modulo in self.model.rolling_vocab_dict:  # 0 always in.
                cur_key = step_modulo
            else:
                cur_key = 0
                for key in self.model.rolling_vocab_dict:
                    if (key < step_modulo) and (key > cur_key):  # Find largest key smaller than step_modulo
                        cur_key = key
            new_large2small_src = self.model.Dx_shelve[str(cur_key)]
            new_large2small_trgt = self.model.Dy_shelve[str(cur_key)]
            self.roll_vocab_update_dicts(new_large2small_src, new_large2small_trgt)
            self.zero_or_reload = True

        while (
            self.step < self.state["loopIters"]
            and last_cost > 0.1 * self.state["minerr"]
            and (time.time() - start_time) / 60.0 < self.state["timeStop"]
            and self.state["lr"] > self.state["minlr"]
        ):
            if self.step > 0 and (time.time() - self.save_time) / 60.0 >= self.state["saveFreq"]:
                self.save()
                if self.channel is not None:
                    self.channel.save()
                self.save_time = time.time()
            st = time.time()
            try:
                if self.state["rolling_vocab"]:
                    step_modulo = self.step % self.model.total_num_batches
                    if step_modulo in self.model.rolling_vocab_dict:
                        if not self.zero_or_reload:
                            self.roll_vocab_small2large()  # Not necessary for 0 or when reloading a properly saved model
                            new_large2small_src = self.model.Dx_shelve[str(step_modulo)]
                            new_large2small_trgt = self.model.Dy_shelve[str(step_modulo)]
                            self.roll_vocab_update_dicts(
                                new_large2small_src, new_large2small_trgt
                            )  # Done above for 0 or reloaded model
                        self.roll_vocab_large2small()
                        try:
                            tmp_batch = self.train_data.next(peek=True)
                        except StopIteration:
                            if self.state["reprocess_each_iteration"]:
                                logger.info("Reached end of file; re-preprocessing")
                                subprocess.check_call(self.state["reprocess_each_iteration"], shell=True)
                                if self.state["rolling_vocab"]:
                                    os.remove(self.state["Dx_file"])
                                    os.remove(self.state["Dy_file"])
                                    tmp_state = copy.deepcopy(self.state)
                                    rolling_dicts.main(tmp_state)
                                    with open(self.state["rolling_vocab_dict"], "rb") as f:
                                        self.model.rolling_vocab_dict = cPickle.load(f)
                                    self.model.total_num_batches = max(self.model.rolling_vocab_dict)
                                    self.model.Dx_shelve = shelve.open(self.state["Dx_file"])
                                    self.model.Dy_shelve = shelve.open(self.state["Dy_file"])
                                    # round up/down number of steps so modulo is 0 (hack because total_num_batches can change)
                                    logger.debug("step before restart: {0}".format(self.step))
                                    if self.step % self.model.total_num_batches < self.model.total_num_batches / 2:
                                        self.step -= self.step % self.model.total_num_batches
                                    else:
                                        self.step += self.model.total_num_batches - (
                                            self.step % self.model.total_num_batches
                                        )
                                    logger.debug("step after restart: {0}".format(self.step))

                                logger.debug("Load data")
                                self.train_data = get_batch_iterator(
                                    self.state, numpy.random.RandomState(self.state["seed"])
                                )
                                self.train_data.start(-1)
                                self.timings["next_offset"] = -1

                                step_modulo = self.step % self.model.total_num_batches
                                if step_modulo in self.model.rolling_vocab_dict:
                                    if not self.zero_or_reload:
                                        self.roll_vocab_small2large()  # Not necessary for 0 or when reloading a properly saved model
                                        new_large2small_src = self.model.Dx_shelve[str(step_modulo)]
                                        new_large2small_trgt = self.model.Dy_shelve[str(step_modulo)]
                                        self.roll_vocab_update_dicts(
                                            new_large2small_src, new_large2small_trgt
                                        )  # Done above for 0 or reloaded model
                                    self.roll_vocab_large2small()

                                self.algo.data = self.train_data
                                self.algo.step = self.step
                                tmp_batch = self.train_data.next(peek=True)

                                if self.hooks:
                                    self.hooks[0].train_iter = self.train_data
                            else:
                                self.save()
                                raise
                        if (
                            tmp_batch["x"][:, 0].tolist(),
                            tmp_batch["y"][:, 0].tolist(),
                        ) == self.model.rolling_vocab_dict[step_modulo]:
                            logger.debug("Identical first sentences. OK")
                        else:
                            logger.error("Batches do not correspond.")
                    elif self.state["hookFreq"] > 0 and self.step % self.state["hookFreq"] == 0 and self.hooks:
                        [fn() for fn in self.hooks]
                    # Hook first so that the peeked batch is the same as the one used in algo
                    # Use elif not to peek twice
                try:
                    rvals = self.algo()
                except StopIteration:
                    if self.state["reprocess_each_iteration"]:
                        logger.info("Reached end of file; re-preprocessing")
                        subprocess.check_call(self.state["reprocess_each_iteration"], shell=True)
                        logger.debug("Load data")
                        self.train_data = get_batch_iterator(self.state, numpy.random.RandomState(self.state["seed"]))
                        self.train_data.start(-1)
                        self.timings["next_offset"] = -1
                        self.algo.data = self.train_data
                        self.algo.step = self.step

                        rvals = self.algo()

                        if self.hooks:
                            self.hooks[0].train_iter = self.train_data
                    else:
                        self.save()
                        raise
                self.state["traincost"] = float(rvals["cost"])
                self.state["step"] = self.step
                last_cost = rvals["cost"]
                for name in rvals.keys():
                    self.timings[name][self.step] = float(numpy.array(rvals[name]))
                if self.l2_params:
                    for param in self.model.params:
                        self.timings["l2_" + param.name][self.step] = numpy.mean(param.get_value() ** 2) ** 0.5

                if (numpy.isinf(rvals["cost"]) or numpy.isnan(rvals["cost"])) and self.state["on_nan"] == "raise":
                    self.state["gotNaN"] = 1
                    self.save()
                    if self.channel:
                        self.channel.save()
                    print "Got NaN while training"
                    last_cost = 0
                if self.valid_data is not None and self.step % self.state["validFreq"] == 0 and self.step > 1:
                    valcost = self.validate()
                    if valcost > self.old_cost * self.state["cost_threshold"]:
                        self.patience -= 1
                        if "lr_start" in self.state and self.state["lr_start"] == "on_error":
                            self.state["lr_start"] = self.step
                    elif valcost < self.old_cost:
                        self.patience = self.state["patience"]
                        self.old_cost = valcost

                    if self.state["divide_lr"] and self.patience < 1:
                        # Divide lr by 2
                        self.algo.lr = self.algo.lr / self.state["divide_lr"]
                        bparams = dict(self.model.best_params)
                        self.patience = self.state["patience"]
                        for p in self.model.params:
                            p.set_value(bparams[p.name])

                if not self.state["rolling_vocab"]:  # Standard use of hooks
                    if self.state["hookFreq"] > 0 and self.step % self.state["hookFreq"] == 0 and self.hooks:
                        [fn() for fn in self.hooks]
                if self.reset > 0 and self.step > 1 and self.step % self.reset == 0:
                    print "Resetting the data iterator"
                    self.train_data.reset()

                self.step += 1
                if self.state["rolling_vocab"]:
                    self.zero_or_reload = False
                    self.timings["step"] = self.step  # Step now
                    if (self.step % self.model.total_num_batches) % self.state[
                        "sort_k_batches"
                    ] == 0:  # Start of a super_batch.
                        logger.debug("Set super_step and next_offset")
                        # This log shoud appear just before 'logger.debug("Start of a super batch")' in 'get_homogeneous_batch_iter()'
                        self.timings["super_step"] = self.step
                        # Step at start of superbatch. super_step < step
                        self.timings["next_offset"] = self.train_data.next_offset
                        # Where to start after reload. Will need to call next() a few times
                else:
                    self.timings["step"] = self.step
                    self.timings["next_offset"] = self.train_data.next_offset

            except KeyboardInterrupt:
                break

        if self.state["rolling_vocab"]:
            self.roll_vocab_small2large()
        self.state["wholetime"] = float(time.time() - start_time)
        if self.valid_data is not None:
            self.validate()
        self.save()
        if self.channel:
            self.channel.save()
        print "Took", (time.time() - start_time) / 60.0, "min"
        avg_step = self.timings["time_step"][: self.step].mean()
        avg_cost2expl = self.timings["log2_p_expl"][: self.step].mean()
        print "Average step took {}".format(avg_step)
        print "That amounts to {} sentences in a day".format(1 / avg_step * 86400 * self.state["bs"])
        print "Average log2 per example is {}".format(avg_cost2expl)
Example #8
0
    def main(self):
        assert self.reset == -1

        print_mem('start')
        self.state['gotNaN'] = 0
        start_time = time.time()
        self.start_time = start_time
        self.batch_start_time = time.time()

        self.step = int(self.timings['step'])
        self.algo.step = self.step

        self.save_iter = 0
        self.save()
        if self.channel is not None:
            self.channel.save()
        self.save_time = time.time()

        print 'syscomb'
        last_cost = 1.
        self.state['clr'] = self.state['lr']
        self.train_data.start(self.timings['next_offset'] if 'next_offset' in
                              self.timings else -1)

        while (self.step < self.state['loopIters']
               and last_cost > .1 * self.state['minerr']
               and (time.time() - start_time) / 60. < self.state['timeStop']
               and self.state['lr'] > self.state['minlr']):
            print 'step:', self.step
            if self.step > 0 and (time.time() - self.save_time
                                  ) / 60. >= self.state['saveFreq']:
                self.save()
                if self.channel is not None:
                    self.channel.save()
                self.save_time = time.time()
            if self.state['save_by_iter'] and self.step % self.state[
                    'saveiter'] == 0:
                self.save_DIY()
            st = time.time()
            try:
                rvals = self.algo()
                self.state['traincost'] = float(rvals['cost'])
                self.state['step'] = self.step
                last_cost = rvals['cost']
                for name in rvals.keys():
                    self.timings[name][self.step] = float(
                        numpy.array(rvals[name]))
                if self.l2_params:
                    for param in self.model.params:
                        self.timings["l2_" + param.name][self.step] =\
                            numpy.mean(param.get_value() ** 2) ** 0.5

                if (numpy.isinf(rvals['cost']) or
                   numpy.isnan(rvals['cost'])) and\
                   self.state['on_nan'] == 'raise':
                    self.state['gotNaN'] = 1
                    self.save()
                    if self.channel:
                        self.channel.save()
                    print 'Got NaN while training'
                    last_cost = 0
                if self.valid_data is not None and\
                   self.step % self.state['validFreq'] == 0 and\
                   self.step > 1:
                    valcost = self.validate()
                    if valcost > self.old_cost * self.state['cost_threshold']:
                        self.patience -= 1
                        if 'lr_start' in self.state and\
                           self.state['lr_start'] == 'on_error':
                            self.state['lr_start'] = self.step
                    elif valcost < self.old_cost:
                        self.patience = self.state['patience']
                        self.old_cost = valcost

                    if self.state['divide_lr'] and \
                       self.patience < 1:
                        # Divide lr by 2
                        self.algo.lr = self.algo.lr / self.state['divide_lr']
                        bparams = dict(self.model.best_params)
                        self.patience = self.state['patience']
                        for p in self.model.params:
                            p.set_value(bparams[p.name])

                if self.state['hookFreq'] > 0 and \
                   self.step % self.state['hookFreq'] == 0 and \
                   self.hooks:
                    try:
                        [fn() for fn in self.hooks]
                    except:
                        print 'sample failed'
                if self.reset > 0 and self.step > 1 and \
                   self.step % self.reset == 0:
                    print 'Resetting the data iterator'
                    self.train_data.reset()

                self.step += 1
                self.timings['step'] = self.step
                self.timings['next_offset'] = self.train_data.next_offset
            except KeyboardInterrupt:
                break

        self.state['wholetime'] = float(time.time() - start_time)
        if self.valid_data is not None:
            self.validate()
        self.save()
        if self.channel:
            self.channel.save()
        print 'Took', (time.time() - start_time) / 60., 'min'
        avg_step = self.timings['time_step'][:self.step].mean()
        avg_cost2expl = self.timings['log2_p_expl'][:self.step].mean()
        print "Average step took {}".format(avg_step)
        print "That amounts to {} sentences in a day".format(
            1 / avg_step * 86400 * self.state['bs'])
        print "Average log2 per example is {}".format(avg_cost2expl)
Example #9
0
    def main(self):
        assert self.reset == -1

        print_mem('start')
        self.state['gotNaN'] = 0
        start_time = time.time()
        self.start_time = start_time
        self.batch_start_time = time.time()

        self.step = int(self.timings['step'])
        self.algo.step = self.step

        self.save_iter = 0
        self.save()
        if self.channel is not None:
            self.channel.save()
        self.save_time = time.time()

        last_cost = 1.
        self.state['clr'] = self.state['lr']
        self.train_data.start(self.timings['next_offset']
                if 'next_offset' in self.timings
                else -1)

        # added by Zhaopeng Tu, 2016-01-08
        # for halving the learning rate every full epoch after the 10th epoch
        # in our experiences, the best performance is achieved around 10th epoch, then decreases after that
        # that's why we introduce the strategy of havling the learning rate
        epoch_batch_number = int(self.train_data.data_len / self.state['bs'])
        print 'Iterations per epoch', epoch_batch_number

        while (self.step < self.state['loopIters'] and
               last_cost > .1*self.state['minerr'] and
               (time.time() - start_time)/60. < self.state['timeStop'] and
               self.state['lr'] > self.state['minlr']):
            if self.step > 0 and (time.time() - self.save_time)/60. >= self.state['saveFreq']:
                self.save()
                if self.channel is not None:
                    self.channel.save()
                self.save_time = time.time()
            st = time.time()
            try:
                # print 'Doubly cost weight:', self.model.cost_layer.DC.get_value(), 'ori_cost:', self.model.cost_layer.ori_cost.get_value(), 'doubly_cost:', self.model.cost_layer.doubly_cost.get_value(), 'cost:', self.model.cost_layer.cost.get_value()
                # print 'Doubly cost weight:', self.model.cost_layer.DC.get_value()

                # print_mem('iter%d'%self.step)
                rvals = self.algo()
                self.state['traincost'] = float(rvals['cost'])
                self.state['step'] = self.step
                last_cost = rvals['cost']
                for name in rvals.keys():
                    self.timings[name][self.step] = float(numpy.array(rvals[name]))
                if self.l2_params:
                    for param in self.model.params:
                        self.timings["l2_" + param.name][self.step] =\
                            numpy.mean(param.get_value() ** 2) ** 0.5

                if (numpy.isinf(rvals['cost']) or
                   numpy.isnan(rvals['cost'])) and\
                   self.state['on_nan'] == 'raise':
                    self.state['gotNaN'] = 1
                    self.save()
                    if self.channel:
                        self.channel.save()
                    print 'Got NaN while training'
                    last_cost = 0
                if self.valid_data is not None and\
                   self.step % self.state['validFreq'] == 0 and\
                   self.step > 1:
                    valcost = self.validate()
                    if valcost > self.old_cost * self.state['cost_threshold']:
                        self.patience -= 1
                        if 'lr_start' in self.state and\
                           self.state['lr_start'] == 'on_error':
                                self.state['lr_start'] = self.step
                    elif valcost < self.old_cost:
                        self.patience = self.state['patience']
                        self.old_cost = valcost

                    if self.state['divide_lr'] and \
                       self.patience < 1:
                        # Divide lr by 2
                        self.algo.lr = self.algo.lr / self.state['divide_lr']
                        bparams = dict(self.model.best_params)
                        self.patience = self.state['patience']
                        for p in self.model.params:
                            p.set_value(bparams[p.name])
                
                if self.state['hookFreq'] > 0 and \
                   self.step % self.state['hookFreq'] == 0 and \
                   self.hooks:
                    [fn() for fn in self.hooks]
                if self.reset > 0 and self.step > 1 and \
                   self.step % self.reset == 0:
                    print 'Resetting the data iterator'
                    self.train_data.reset()

                self.step += 1
                self.timings['step'] = self.step
                self.timings['next_offset'] = self.train_data.next_offset

                # added by Zhaopeng Tu, 2016-01-08
                # for halving the learning rate every full epoch after the 10th epoch
                # if self.step % self.train_data.data_len == 0 and self.step / self.train_data.data_len >= 10:
                if self.step % epoch_batch_number  == 0 and self.step / epoch_batch_number >= 10:
                        # Divide lr by 2
                        self.algo.lr = self.algo.lr / self.state['divide_lr']

            except KeyboardInterrupt:
                break

        self.state['wholetime'] = float(time.time() - start_time)
        if self.valid_data is not None:
            self.validate()
        self.save()
        if self.channel:
            self.channel.save()
        print 'Took', (time.time() - start_time)/60., 'min'
        avg_step = self.timings['time_step'][:self.step].mean()
        avg_cost2expl = self.timings['log2_p_expl'][:self.step].mean()
        print "Average step took {}".format(avg_step)
        print "That amounts to {} sentences in a day".format(1 / avg_step * 86400 * self.state['bs'])
        print "Average log2 per example is {}".format(avg_cost2expl)
Example #10
0
    def main(self):
        assert self.reset == -1

        print_mem('start')
        self.state['gotNaN'] = 0
        start_time = time.time()
        self.start_time = start_time
        self.batch_start_time = time.time()

        self.step = int(self.timings['step'])
        self.algo.step = self.step

        if self.state['save_iter'] < 0:
            self.save_iter = 0
            self.state['save_iter'] = 0
            self.save()
            if self.channel is not None:
                self.channel.save()
        else: # Fake saving
            self.save_iter += 1
            self.state['save_iter'] = self.save_iter
        self.save_time = time.time()

        last_cost = 1.
        self.state['clr'] = self.state['lr']
        self.train_data.start(self.timings['next_offset']
                if 'next_offset' in self.timings
                else -1)

        if self.state['rolling_vocab']:
            for i in xrange(self.timings['step'] - self.timings['super_step']):
                self.train_data.next()

        if self.state['rolling_vocab']:
            # Make sure dictionary is current.
            # If training is interrupted when the vocabularies are exchanged,
            # things may get broken.
            step_modulo = self.step % self.model.total_num_batches
            if step_modulo in self.model.rolling_vocab_dict: # 0 always in.
                cur_key = step_modulo
            else:
                cur_key = 0
                for key in self.model.rolling_vocab_dict:
                    if (key < step_modulo) and (key > cur_key): # Find largest key smaller than step_modulo
                        cur_key = key
            new_large2small_src = self.model.Dx_shelve[str(cur_key)]
            new_large2small_trgt = self.model.Dy_shelve[str(cur_key)]
            self.roll_vocab_update_dicts(new_large2small_src, new_large2small_trgt)
            self.zero_or_reload = True

        while (self.step < self.state['loopIters'] and
               last_cost > .1*self.state['minerr'] and
               (time.time() - start_time)/60. < self.state['timeStop'] and
               self.state['lr'] > self.state['minlr']):
            if self.step > 0 and (time.time() - self.save_time)/60. >= self.state['saveFreq']:
                self.save()
                if self.channel is not None:
                    self.channel.save()
                self.save_time = time.time()
            st = time.time()
            try:
                if self.state['rolling_vocab']:
                    step_modulo = self.step % self.model.total_num_batches
                    if step_modulo in self.model.rolling_vocab_dict:
                        if not self.zero_or_reload:
                            self.roll_vocab_small2large() # Not necessary for 0 or when reloading a properly saved model
                            new_large2small_src = self.model.Dx_shelve[str(step_modulo)]
                            new_large2small_trgt = self.model.Dy_shelve[str(step_modulo)]
                            self.roll_vocab_update_dicts(new_large2small_src, new_large2small_trgt) # Done above for 0 or reloaded model
                        self.roll_vocab_large2small()
                        tmp_batch = self.train_data.next(peek=True)
                        if (tmp_batch['x'][:,0].tolist(), tmp_batch['y'][:,0].tolist()) == self.model.rolling_vocab_dict[step_modulo]:
                            logger.debug("Identical first sentences. OK")
                        else:
                            logger.error("Batches do not correspond.")
                    elif self.state['hookFreq'] > 0 and \
                       self.step % self.state['hookFreq'] == 0 and \
                       self.hooks:
                        [fn() for fn in self.hooks]
                    # Hook first so that the peeked batch is the same as the one used in algo
                    # Use elif not to peek twice
                rvals = self.algo()
                self.state['traincost'] = float(rvals['cost'])
                self.state['step'] = self.step
                last_cost = rvals['cost']
                for name in rvals.keys():
                    self.timings[name][self.step] = float(numpy.array(rvals[name]))
                if self.l2_params:
                    for param in self.model.params:
                        self.timings["l2_" + param.name][self.step] =\
                            numpy.mean(param.get_value() ** 2) ** 0.5

                if (numpy.isinf(rvals['cost']) or
                   numpy.isnan(rvals['cost'])) and\
                   self.state['on_nan'] == 'raise':
                    self.state['gotNaN'] = 1
                    self.save()
                    if self.channel:
                        self.channel.save()
                    print 'Got NaN while training'
                    last_cost = 0
                if self.valid_data is not None and\
                   self.step % self.state['validFreq'] == 0 and\
                   self.step > 1:
                    valcost = self.validate()
                    if valcost > self.old_cost * self.state['cost_threshold']:
                        self.patience -= 1
                        if 'lr_start' in self.state and\
                           self.state['lr_start'] == 'on_error':
                                self.state['lr_start'] = self.step
                    elif valcost < self.old_cost:
                        self.patience = self.state['patience']
                        self.old_cost = valcost

                    if self.state['divide_lr'] and \
                       self.patience < 1:
                        # Divide lr by 2
                        self.algo.lr = self.algo.lr / self.state['divide_lr']
                        bparams = dict(self.model.best_params)
                        self.patience = self.state['patience']
                        for p in self.model.params:
                            p.set_value(bparams[p.name])

                if not self.state['rolling_vocab']: # Standard use of hooks
                    if self.state['hookFreq'] > 0 and \
                       self.step % self.state['hookFreq'] == 0 and \
                       self.hooks:
                        [fn() for fn in self.hooks]
                if self.reset > 0 and self.step > 1 and \
                   self.step % self.reset == 0:
                    print 'Resetting the data iterator'
                    self.train_data.reset()

                self.step += 1
                if self.state['rolling_vocab']:
                    self.zero_or_reload = False
                    self.timings['step'] = self.step # Step now
                    if (self.step % self.model.total_num_batches) % self.state['sort_k_batches'] == 0: # Start of a super_batch.
                        logger.debug("Set super_step and next_offset")
                        # This log shoud appear just before 'logger.debug("Start of a super batch")' in 'get_homogeneous_batch_iter()'
                        self.timings['super_step'] = self.step
                        # Step at start of superbatch. super_step < step
                        self.timings['next_offset'] = self.train_data.next_offset
                        # Where to start after reload. Will need to call next() a few times
                else:
                    self.timings['step'] = self.step
                    self.timings['next_offset'] = self.train_data.next_offset

            except KeyboardInterrupt:
                break

        if self.state['rolling_vocab']:
            self.roll_vocab_small2large()
        self.state['wholetime'] = float(time.time() - start_time)
        if self.valid_data is not None:
            self.validate()
        self.save()
        if self.channel:
            self.channel.save()
        print 'Took', (time.time() - start_time)/60., 'min'
        avg_step = self.timings['time_step'][:self.step].mean()
        avg_cost2expl = self.timings['log2_p_expl'][:self.step].mean()
        print "Average step took {}".format(avg_step)
        print "That amounts to {} sentences in a day".format(1 / avg_step * 86400 * self.state['bs'])
        print "Average log2 per example is {}".format(avg_cost2expl)
Example #11
0
    def main(self):
        print_mem('start')
        self.state['gotNaN'] = 0
        self.start_time = time.time()
        self.batch_start_time = time.time()
        self.step = int(self.timings['step'])
        self.algo.step = self.step
        self.save_iter = 0
        self.save()
        if self.channel is not None:
            self.channel.save()
        self.save_time = time.time()
        last_cost = 1.
        start_time = time.time()
        self.start_time = start_time
        self.state['clr'] = self.state['lr']

        while (self.step < self.state['loopIters']
               and last_cost > .1 * self.state['minerr']
               and (time.time() - start_time) / 60. < self.state['timeStop']
               and self.state['lr'] > self.state['minlr']):
            if (time.time() - self.save_time) / 60. >= self.state['saveFreq']:
                self.save()
                if self.channel is not None:
                    self.channel.save()
                self.save_time = time.time()
            st = time.time()
            try:
                rvals = self.algo()
                self.state['traincost'] = float(rvals['cost'])
                self.state['step'] = self.step
                last_cost = rvals['cost']
                for name in rvals.keys():
                    pos = self.step // self.state['trainFreq']
                    self.timings[name][pos] = float(numpy.array(rvals[name]))

                if (numpy.isinf(rvals['cost']) or
                   numpy.isnan(rvals['cost'])) and\
                   self.state['on_nan'] == 'raise':
                    self.state['gotNaN'] = 1
                    self.save()
                    if self.channel:
                        self.channel.save()
                    print 'Got NaN while training'
                    last_cost = 0
                if self.valid_data is not None and\
                   self.step % self.state['validFreq'] == 0 and\
                   self.step > 1:
                    valcost = self.validate()
                    if valcost > self.old_cost * self.state['cost_threshold']:
                        self.patience -= 1
                        if 'lr_start' in self.state and\
                           self.state['lr_start'] == 'on_error':
                            self.state['lr_start'] = self.step
                    elif valcost < self.old_cost:
                        self.patience = self.state['patience']
                        self.old_cost = valcost

                    if self.state['divide_lr'] and \
                       self.patience < 1:
                        # Divide lr by 2
                        self.algo.lr = self.algo.lr / self.state['divide_lr']
                        bparams = dict(self.model.best_params)
                        self.patience = self.state['patience']
                        for p in self.model.params:
                            p.set_value(bparams[p.name])

                if self.state['hookFreq'] > 0 and \
                   self.step % self.state['hookFreq'] == 0 and \
                   self.hooks:
                    [fn() for fn in self.hooks]
                if self.reset > 0 and self.step > 1 and \
                   self.step % self.reset == 0:
                    print 'Resetting the data iterator'
                    self.train_data.reset()

                self.step += 1
                self.timings['step'] = self.step
                print "took {}".format(time.time() - st)
            except:
                self.state['wholetime'] = float(time.time() - start_time)
                self.save()
                if self.channel:
                    self.channel.save()

                last_cost = 0
                print 'Error in running algo (lr issue)'
                print 'Took', (time.time() - start_time) / 60., 'min'
                raise

        self.state['wholetime'] = float(time.time() - start_time)
        if self.valid_data is not None:
            self.validate()
        self.save()
        if self.channel:
            self.channel.save()
        print 'Took', (time.time() - start_time) / 60., 'min'
Example #12
0
    def main(self):
        print_mem('start')
        self.state['gotNaN'] = 0
        self.start_time = time.time()
        self.batch_start_time = time.time()
        self.step = 0
        self.save_iter = 0
        self.save()
        if self.channel is not None:
            self.channel.save()
        self.save_time = time.time()
        last_cost = 1.
        start_time = time.time()
        self.start_time = start_time
        self.state['clr'] = self.state['lr']

        while (self.step < self.state['loopIters'] and
               last_cost > .1*self.state['minerr'] and
               (time.time() - start_time)/60. < self.state['timeStop'] and
               self.state['lr'] > self.state['minlr']):
            if (time.time() - self.save_time)/60. > self.state['saveFreq']:
                self.save()
                if self.channel is not None:
                    self.channel.save()
                self.save_time = time.time()
            st = time.time()
            try:
                rvals = self.algo()
                self.state['traincost'] = float(rvals['cost'])
                self.state['step'] = self.step
                last_cost = rvals['cost']
                for name in rvals.keys():
                    pos = self.step // self.state['trainFreq']
                    self.timings[name][pos] = float(numpy.array(rvals[name]))

                if (numpy.isinf(rvals['cost']) or
                   numpy.isnan(rvals['cost'])) and\
                   self.state['on_nan'] == 'raise':
                    self.state['gotNaN'] = 1
                    self.save()
                    if self.channel:
                        self.channel.save()
                    print 'Got NaN while training'
                    last_cost = 0
                if self.valid_data is not None and\
                   self.step % self.state['validFreq'] == 0 and\
                   self.step > 1:
                    valcost = self.validate()
                    if valcost > self.old_cost * self.state['cost_threshold']:
                        self.patience -= 1
                        if 'lr_start' in self.state and\
                           self.state['lr_start'] == 'on_error':
                                self.state['lr_start'] = self.step
                    elif valcost < self.old_cost:
                        self.patience = self.state['patience']
                        self.old_cost = valcost

                    if self.state['divide_lr'] and \
                       self.patience < 1:
                        # Divide lr by 2
                        self.algo.lr = self.algo.lr / self.state['divide_lr']
                        bparams = dict(self.model.best_params)
                        self.patience = self.state['patience']
                        for p in self.model.params:
                            p.set_value(bparams[p.name])

                if self.state['hookFreq'] > 0 and \
                   self.step % self.state['hookFreq'] == 0 and \
                   self.hooks:
                    [fn() for fn in self.hooks]
                if self.reset > 0 and self.step > 1 and \
                   self.step % self.reset == 0:
                    print 'Resetting the data iterator'
                    self.train_data.reset()

                self.step += 1
            except:
                self.state['wholetime'] = float(time.time() - start_time)
                self.save()
                if self.channel:
                    self.channel.save()

                last_cost = 0
                print 'Error in running algo (lr issue)'
                print 'Took', (time.time() - start_time)/60., 'min'
                raise

        self.state['wholetime'] = float(time.time() - start_time)
        if self.valid_data is not None:
            self.validate()
        self.save()
        if self.channel:
            self.channel.save()
        print 'Took', (time.time() - start_time)/60., 'min'
Example #13
0
    def main(self):
        assert self.reset == -1

        print_mem('start')
        self.state['gotNaN'] = 0
        start_time = time.time()
        self.start_time = start_time
        self.batch_start_time = time.time()

        self.step = int(self.timings['step'])
        self.algo.step = self.step

        self.save_iter = 0
        self.save()
        if self.channel is not None:
            self.channel.save()
        self.save_time = time.time()

        last_cost = 1.
        self.state['clr'] = self.state['lr']
        self.train_data.start(self.timings['next_offset'] if 'next_offset' in
                              self.timings else -1)

        # added by Zhaopeng Tu, 2016-01-08
        # for halving the learning rate every full epoch after the 10th epoch
        # in our experiences, the best performance is achieved around 10th epoch, then decreases after that
        # that's why we introduce the strategy of havling the learning rate
        epoch_batch_number = int(self.train_data.data_len / self.state['bs'])
        print 'Iterations per epoch', epoch_batch_number

        while (self.step < self.state['loopIters']
               and last_cost > .1 * self.state['minerr']
               and (time.time() - start_time) / 60. < self.state['timeStop']
               and self.state['lr'] > self.state['minlr']):
            if self.step > 0 and (time.time() - self.save_time
                                  ) / 60. >= self.state['saveFreq']:
                self.save()
                if self.channel is not None:
                    self.channel.save()
                self.save_time = time.time()
            st = time.time()
            try:
                # print 'Doubly cost weight:', self.model.cost_layer.DC.get_value(), 'ori_cost:', self.model.cost_layer.ori_cost.get_value(), 'doubly_cost:', self.model.cost_layer.doubly_cost.get_value(), 'cost:', self.model.cost_layer.cost.get_value()
                # print 'Doubly cost weight:', self.model.cost_layer.DC.get_value()

                # print_mem('iter%d'%self.step)
                rvals = self.algo()
                self.state['traincost'] = float(rvals['cost'])
                self.state['step'] = self.step
                last_cost = rvals['cost']
                for name in rvals.keys():
                    self.timings[name][self.step] = float(
                        numpy.array(rvals[name]))
                if self.l2_params:
                    for param in self.model.params:
                        self.timings["l2_" + param.name][self.step] =\
                            numpy.mean(param.get_value() ** 2) ** 0.5

                if (numpy.isinf(rvals['cost']) or
                   numpy.isnan(rvals['cost'])) and\
                   self.state['on_nan'] == 'raise':
                    self.state['gotNaN'] = 1
                    self.save()
                    if self.channel:
                        self.channel.save()
                    print 'Got NaN while training'
                    last_cost = 0
                if self.valid_data is not None and\
                   self.step % self.state['validFreq'] == 0 and\
                   self.step > 1:
                    valcost = self.validate()
                    if valcost > self.old_cost * self.state['cost_threshold']:
                        self.patience -= 1
                        if 'lr_start' in self.state and\
                           self.state['lr_start'] == 'on_error':
                            self.state['lr_start'] = self.step
                    elif valcost < self.old_cost:
                        self.patience = self.state['patience']
                        self.old_cost = valcost

                    if self.state['divide_lr'] and \
                       self.patience < 1:
                        # Divide lr by 2
                        self.algo.lr = self.algo.lr / self.state['divide_lr']
                        bparams = dict(self.model.best_params)
                        self.patience = self.state['patience']
                        for p in self.model.params:
                            p.set_value(bparams[p.name])

                if self.state['hookFreq'] > 0 and \
                   self.step % self.state['hookFreq'] == 0 and \
                   self.hooks:
                    [fn() for fn in self.hooks]
                if self.reset > 0 and self.step > 1 and \
                   self.step % self.reset == 0:
                    print 'Resetting the data iterator'
                    self.train_data.reset()

                self.step += 1
                self.timings['step'] = self.step
                self.timings['next_offset'] = self.train_data.next_offset

                # added by Zhaopeng Tu, 2016-01-08
                # for halving the learning rate every full epoch after the 10th epoch
                # if self.step % self.train_data.data_len == 0 and self.step / self.train_data.data_len >= 10:
                if self.step % epoch_batch_number == 0 and self.step / epoch_batch_number >= 10:
                    # Divide lr by 2
                    self.algo.lr = self.algo.lr / self.state['divide_lr']

            except KeyboardInterrupt:
                break

        self.state['wholetime'] = float(time.time() - start_time)
        if self.valid_data is not None:
            self.validate()
        self.save()
        if self.channel:
            self.channel.save()
        print 'Took', (time.time() - start_time) / 60., 'min'
        avg_step = self.timings['time_step'][:self.step].mean()
        avg_cost2expl = self.timings['log2_p_expl'][:self.step].mean()
        print "Average step took {}".format(avg_step)
        print "That amounts to {} sentences in a day".format(
            1 / avg_step * 86400 * self.state['bs'])
        print "Average log2 per example is {}".format(avg_cost2expl)
Example #14
0
    def main(self):
        assert self.reset == -1

        print_mem('start')
        self.state['gotNaN'] = 0
        start_time = time.time()
        self.start_time = start_time
        self.batch_start_time = time.time()

        self.step = int(self.timings['step'])
        self.algo.step = self.step

        self.save_iter = 0
        self.save()
        if self.channel is not None:
            self.channel.save()
        self.save_time = time.time()

        last_cost = 1.
        self.state['clr'] = self.state['lr']

        ###############################################################################
        # by He Wei
        if 'next_offset' in self.state and self.state['next_offset'] <= 0:
            self.train_data.start(-1)
        elif 'next_offset' in self.timings:
            self.train_data.start(self.timings['next_offset'])
        else:
            self.train_data.start(-1)
        #self.train_data.start(self.timings['next_offset']
        #        if 'next_offset' in self.timings
        #        else -1)

        #if 'next_offset' in self.state and self.state['next_offset'] <= 0:
        #    self.train_data.reset()

        while (self.step < self.state['loopIters'] and
               last_cost > .1*self.state['minerr'] and
               (time.time() - start_time)/60. < self.state['timeStop'] and
               self.state['lr'] > self.state['minlr']):
            ####################################################################################
            #add by hewei
            if self.step % int(self.state['copy_model_freq']) == 0:
                self.save()
                if self.channel is not None:
                    self.channel.save()
                try:
                    copy_model_path = "%s/iter_%d" % (self.state['copy_model_path'], self.step)
                    cmd = "mkdir -p %s" % copy_model_path
                    if not os.path.exists(copy_model_path):
                        print >> sys.stderr, cmd
                        os.system(cmd)
                    cmd = "cp %s* %s" % (self.state['prefix'], copy_model_path)
                    print >> sys.stderr, cmd
                    os.system(cmd)
                except Exception:
                    print 'mainLoop: failed to copy model file to %s' % copy_model_path
                    traceback.print_exc()
            ####################################################################################
            if self.step > 0 and (time.time() - self.save_time)/60. >= self.state['saveFreq']:
                self.save()
                if self.channel is not None:
                    self.channel.save()
                self.save_time = time.time()
            st = time.time()
            try:
                rvals = self.algo()
                self.state['traincost'] = float(rvals['cost'])
                self.state['step'] = self.step
                last_cost = rvals['cost']
                for name in rvals.keys():
                    self.timings[name][self.step] = float(numpy.array(rvals[name]))
                if self.l2_params:
                    for param in self.model.params:
                        self.timings["l2_" + param.name][self.step] =\
                            numpy.mean(param.get_value() ** 2) ** 0.5

                if (numpy.isinf(rvals['cost']) or
                   numpy.isnan(rvals['cost'])) and\
                   self.state['on_nan'] == 'raise':
                    self.state['gotNaN'] = 1
                    self.save()
                    if self.channel:
                        self.channel.save()
                    print 'Got NaN while training'
                    last_cost = 0
                if self.valid_data is not None and\
                   self.step % self.state['validFreq'] == 0 and\
                   self.step > 1:
                    valcost = self.validate()
                    if valcost > self.old_cost * self.state['cost_threshold']:
                        self.patience -= 1
                        if 'lr_start' in self.state and\
                           self.state['lr_start'] == 'on_error':
                                self.state['lr_start'] = self.step
                    elif valcost < self.old_cost:
                        self.patience = self.state['patience']
                        self.old_cost = valcost

                    if self.state['divide_lr'] and \
                       self.patience < 1:
                        # Divide lr by 2
                        self.algo.lr = self.algo.lr / self.state['divide_lr']
                        bparams = dict(self.model.best_params)
                        self.patience = self.state['patience']
                        for p in self.model.params:
                            p.set_value(bparams[p.name])

                if self.state['hookFreq'] > 0 and \
                   self.step % self.state['hookFreq'] == 0 and \
                   self.hooks:
                    [fn() for fn in self.hooks]
                if self.reset > 0 and self.step > 1 and \
                   self.step % self.reset == 0:
                    print 'Resetting the data iterator'
                    self.train_data.reset()

                self.step += 1
                self.timings['step'] = self.step
                self.timings['next_offset'] = self.train_data.next_offset
            except KeyboardInterrupt:
                break

        self.state['wholetime'] = float(time.time() - start_time)
        if self.valid_data is not None:
            self.validate()
        self.save()
        if self.channel:
            self.channel.save()
        print 'Took', (time.time() - start_time)/60., 'min'
        avg_step = self.timings['time_step'][:self.step].mean()
        avg_cost2expl = self.timings['log2_p_expl'][:self.step].mean()
        print "Average step took {}".format(avg_step)
        print "That amounts to {} sentences in a day".format(1 / avg_step * 86400 * self.state['bs'])
        print "Average log2 per example is {}".format(avg_cost2expl)
Example #15
0
    def main(self):
        assert self.reset == -1

        print_mem('start')
        self.state['gotNaN'] = 0
        start_time = time.time()
        self.start_time = start_time
        self.batch_start_time = time.time()

        self.step = int(self.timings['step'])
        self.algo.step = self.step

        self.save_iter = 0
        self.save()
        if self.channel is not None:
            self.channel.save()
        self.save_time = time.time()

        last_cost = 1.
        self.state['clr'] = self.state['lr']

        ###############################################################################
        # by He Wei
        if 'next_offset' in self.state and self.state['next_offset'] <= 0:
            self.train_data.start(-1)
        elif 'next_offset' in self.timings:
            self.train_data.start(self.timings['next_offset'])
        else:
            self.train_data.start(-1)
        #self.train_data.start(self.timings['next_offset']
        #        if 'next_offset' in self.timings
        #        else -1)

        #if 'next_offset' in self.state and self.state['next_offset'] <= 0:
        #    self.train_data.reset()

        while (self.step < self.state['loopIters']
               and last_cost > .1 * self.state['minerr']
               and (time.time() - start_time) / 60. < self.state['timeStop']
               and self.state['lr'] > self.state['minlr']):
            ####################################################################################
            #add by hewei
            if self.step % int(self.state['copy_model_freq']) == 0:
                self.save()
                if self.channel is not None:
                    self.channel.save()
                try:
                    copy_model_path = "%s/iter_%d" % (
                        self.state['copy_model_path'], self.step)
                    cmd = "mkdir -p %s" % copy_model_path
                    if not os.path.exists(copy_model_path):
                        print >> sys.stderr, cmd
                        os.system(cmd)
                    cmd = "cp %s* %s" % (self.state['prefix'], copy_model_path)
                    print >> sys.stderr, cmd
                    os.system(cmd)
                except Exception:
                    print 'mainLoop: failed to copy model file to %s' % copy_model_path
                    traceback.print_exc()
            ####################################################################################
            if self.step > 0 and (time.time() - self.save_time
                                  ) / 60. >= self.state['saveFreq']:
                self.save()
                if self.channel is not None:
                    self.channel.save()
                self.save_time = time.time()
            st = time.time()
            try:
                rvals = self.algo()
                self.state['traincost'] = float(rvals['cost'])
                self.state['step'] = self.step
                last_cost = rvals['cost']
                for name in rvals.keys():
                    self.timings[name][self.step] = float(
                        numpy.array(rvals[name]))
                if self.l2_params:
                    for param in self.model.params:
                        self.timings["l2_" + param.name][self.step] =\
                            numpy.mean(param.get_value() ** 2) ** 0.5

                if (numpy.isinf(rvals['cost']) or
                   numpy.isnan(rvals['cost'])) and\
                   self.state['on_nan'] == 'raise':
                    self.state['gotNaN'] = 1
                    self.save()
                    if self.channel:
                        self.channel.save()
                    print 'Got NaN while training'
                    last_cost = 0
                if self.valid_data is not None and\
                   self.step % self.state['validFreq'] == 0 and\
                   self.step > 1:
                    valcost = self.validate()
                    if valcost > self.old_cost * self.state['cost_threshold']:
                        self.patience -= 1
                        if 'lr_start' in self.state and\
                           self.state['lr_start'] == 'on_error':
                            self.state['lr_start'] = self.step
                    elif valcost < self.old_cost:
                        self.patience = self.state['patience']
                        self.old_cost = valcost

                    if self.state['divide_lr'] and \
                       self.patience < 1:
                        # Divide lr by 2
                        self.algo.lr = self.algo.lr / self.state['divide_lr']
                        bparams = dict(self.model.best_params)
                        self.patience = self.state['patience']
                        for p in self.model.params:
                            p.set_value(bparams[p.name])

                if self.state['hookFreq'] > 0 and \
                   self.step % self.state['hookFreq'] == 0 and \
                   self.hooks:
                    [fn() for fn in self.hooks]
                if self.reset > 0 and self.step > 1 and \
                   self.step % self.reset == 0:
                    print 'Resetting the data iterator'
                    self.train_data.reset()

                self.step += 1
                self.timings['step'] = self.step
                self.timings['next_offset'] = self.train_data.next_offset
            except KeyboardInterrupt:
                break

        self.state['wholetime'] = float(time.time() - start_time)
        if self.valid_data is not None:
            self.validate()
        self.save()
        if self.channel:
            self.channel.save()
        print 'Took', (time.time() - start_time) / 60., 'min'
        avg_step = self.timings['time_step'][:self.step].mean()
        avg_cost2expl = self.timings['log2_p_expl'][:self.step].mean()
        print "Average step took {}".format(avg_step)
        print "That amounts to {} sentences in a day".format(
            1 / avg_step * 86400 * self.state['bs'])
        print "Average log2 per example is {}".format(avg_cost2expl)