Esempio n. 1
0
    def fit(self, sess, saver, train_examples_raw, dev_set_raw):
        best_score = 0.

        train_examples = self.preprocess_sequence_data(train_examples_raw)
        dev_set = self.preprocess_sequence_data(dev_set_raw)

        for epoch in range(self.config.n_epochs):
            logger.info("Epoch %d out of %d", epoch + 1, self.config.n_epochs)
            # You may use the progress bar to monitor the training progress
            # Addition of progress bar will not be graded, but may help when debugging
            prog = Progbar(target=1 +
                           int(len(train_examples) / self.config.batch_size))

            # The general idea is to loop over minibatches from train_examples,
            # and run train_on_batch inside the loop.
            # Hint: train_examples could be a list containing the feature data and label data
            # Read the doc for utils.get_minibatches to find out how to use it.
            # Note that get_minibatches could either return a list, or a list of list
            # [features, labels]. This makes expanding tuples into arguments (* operator) handy

            ### YOUR CODE HERE (2-3 lines)
            for minibatch in minibatches(train_examples,
                                         self.config.batch_size):
                self.train_on_batch(sess, *minibatch)
                prog.add(1)

            ### END YOUR CODE

            logger.info("Evaluating on development data")
            token_cm, entity_scores = self.evaluate(sess, dev_set, dev_set_raw)
            logger.debug("Token-level confusion matrix:\n" +
                         token_cm.as_table())
            logger.debug("Token-level scores:\n" + token_cm.summary())
            logger.info("Entity level P/R/F1: %.2f/%.2f/%.2f", *entity_scores)

            score = entity_scores[-1]

            if score > best_score:
                best_score = score
                if saver:
                    logger.info("New best score! Saving model in %s",
                                self.config.model_output)
                    saver.save(sess, self.config.model_output)
            print("")
            if self.report:
                self.report.log_epoch()
                self.report.save()
        return best_score
Esempio n. 2
0
def get_experiment_fitnesses(experiments, optimizer, config, logger):
    with model.get_db_provider() as db:
        progbar = Progbar(len(experiments), interval=0.0)
        logger.info("Waiting for fitnesses from %s experiments" %
                    len(experiments))

        bad_line_dicts = [dict() for x in range(len(experiments))]
        has_result = [False for i in range(len(experiments))]
        fitnesses = [0.0 for i in range(len(experiments))]
        behaviors = [None for i in range(len(experiments))]
        term_criterion = config['optimizer']['termination_criterion']
        skip_gen_thres = term_criterion['skip_gen_thres']
        skip_gen_timeout = term_criterion['skip_gen_timeout']
        result_timestamp = time.time()

        while sum(has_result) < len(experiments):
            for i, experiment in enumerate(experiments):
                if float(sum(has_result)) / len(experiments) >= skip_gen_thres\
                        and time.time() - result_timestamp > skip_gen_timeout:
                    logger.warn(
                        "Skipping to next gen with %s of solutions evaled" %
                        (float(
                            sum(has_result)) /
                            len(experiments)))
                    has_result = [True] * len(experiments)
                    break
                if has_result[i]:
                    continue
                returned_experiment = db.get_experiment(experiment.key,
                                                        getinfo=True)
                # try:
                #     experiment_output = returned_experiment.info['logtail']
                # except BaseException:
                #     logger.warn('Cannot access "logtail" in experiment.info')
                output = db._get_experiment_logtail(
                    returned_experiment)
                if output is None:
                    continue

                for j, line in enumerate(output):

                    if line.startswith(
                            "Traceback (most recent call last):") and \
                            j not in bad_line_dicts[i]:
                        logger.warn("Experiment %s: error"
                                    " discovered in output" %
                                    returned_experiment.key)
                        logger.warn("".join(output[j:]))
                        bad_line_dicts[i][j] = True

                    if line.startswith("Behavior") or \
                            line.startswith("behavior"):
                        try:
                            behavior = eval(line.rstrip().split(':')[1])
                            if isinstance(behavior, np.ndarray):
                                pass
                            elif isinstance(behavior, list):
                                behavior = np.array(behavior)
                            else:
                                raise

                        except BaseException:
                            if j not in bad_line_dicts[i]:
                                logger.warn(
                                    'Experiment %s: error parsing or invalid'
                                    ' behavior' %
                                    returned_experiment.key)
                                logger.warn(line)
                                bad_line_dicts[i][j] = True
                        else:
                            behaviors[i] = behavior

                    if line.startswith("Fitness") or \
                            line.startswith("fitness"):
                        try:
                            fitness = float(line.rstrip().split(':')[1])
                            # assert fitness >= 0.0
                        except BaseException:
                            if j not in bad_line_dicts[i]:
                                logger.warn(
                                    'Experiment %s: error parsing or invalid'
                                    ' fitness' %
                                    returned_experiment.key)
                                logger.warn(line)
                                bad_line_dicts[i][j] = True
                        else:
                            if fitness < 0.0:
                                logger.warn('Experiment %s: returned'
                                            ' fitness is less than zero,'
                                            ' setting it to zero' %
                                            returned_experiment.key)
                                fitness = 0.0

                            fitnesses[i] = fitness
                            has_result[i] = True
                            progbar.add(1)
                            result_timestamp = time.time()
                            break

            time.sleep(config['sleep_time'])
        print
        return fitnesses, behaviors