Exemple #1
0
    def next_action(self):
        self.read_messages()
        # Start timing
        self.time_checkpoint = time.clock()
        # Fit learner
        self.fit(self.train_idx, self.feature_subset)
        # Make predictions on held out set and evaluate
        predictions = self.predict('X_train', self.test_idx, self.feature_subset)
        truth = self.test_truth
        score = libscores.eval_metric(metric=self.data_info['eval_metric'],
                                      truth=truth,
                                      predictions=predictions,
                                      task=self.data_info['task'])

        self.score_times.append(time.clock() - self.time_checkpoint + self.time_before_checkpoint)
        self.score_values.append(score)
        # Send score and time to parent
        self.send_to_parent(dict(subject='score', sender=self.name,
                                 time=self.score_times[-1],
                                 score=self.score_values[-1]))
        # Save total time taken
        # TODO - this is ignoring the time taken to make valid and test predictions
        self.time_before_checkpoint += time.clock() - self.time_checkpoint
        # Now make predictions on valid, test and held out sets

        # FIXME - send all of this data at the same time to prevent gotchas

        if 'X_valid' in self.data:
            predictions = self.predict('X_valid', 'all', self.feature_subset)
            tmp_filename = util.random_temp_file_name('.npy')
            np.save(tmp_filename, predictions)
            self.valid_prediction_files.append(tmp_filename)
            self.valid_prediction_times.append(self.time_before_checkpoint)
            self.send_to_parent(dict(subject='predictions', sender=self.name, partition='valid',
                                     time=self.valid_prediction_times[-1],
                                     filename=self.valid_prediction_files[-1]))
        if 'X_test' in self.data:
            predictions = self.predict('X_test', 'all', self.feature_subset)
            tmp_filename = util.random_temp_file_name('.npy')
            np.save(tmp_filename, predictions)
            self.test_prediction_files.append(tmp_filename)
            self.test_prediction_times.append(self.time_before_checkpoint)
            self.send_to_parent(dict(subject='predictions', sender=self.name, partition='test',
                                     time=self.test_prediction_times[-1],
                                     filename=self.test_prediction_files[-1]))

        predictions = self.predict('X_train', self.test_idx, self.feature_subset)
        tmp_filename = util.random_temp_file_name('.npy')
        np.save(tmp_filename, predictions)
        self.held_out_prediction_files.append(tmp_filename)
        self.held_out_prediction_times.append(self.time_before_checkpoint)
        self.send_to_parent(dict(subject='predictions', sender=self.name, partition='held out',
                                 idx=self.test_idx,
                                 time=self.held_out_prediction_times[-1],
                                 filename=self.held_out_prediction_files[-1]))
        # And I'm spent
        raise TerminationEx
Exemple #2
0
    def next_action(self):
        # Check mail
        self.read_messages()
        # Collect up scores and predictions - even if paused, children may still be finishing tasks
        min_n_scores = min(len(scores) for scores in self.child_score_values.itervalues())
        while len(self.score_values) < min_n_scores:
            n = len(self.score_values)
            num_scores = 0
            sum_scores = 0
            for child_scores in self.child_score_values.itervalues():
                # noinspection PyUnresolvedReferences
                if not np.isnan(child_scores[n]):
                    num_scores += 1
                    sum_scores += child_scores[n]
            score = sum_scores / num_scores
            # score = sum(scores[n] for scores in self.child_score_values.itervalues()) /\
            #         len(self.child_score_values)
            maxtime = max(times[n] for times in self.child_score_times.itervalues())
            self.score_values.append(score)
            self.score_times.append(maxtime)
            self.send_to_parent(dict(subject='score', sender=self.name,
                                   time=self.score_times[-1],
                                   score=self.score_values[-1]))

        # FIXME - send all of this data at the same time to prevent gotchas

        min_n_valid = min(len(times) for times in self.child_valid_prediction_times.itervalues())
        while len(self.valid_prediction_times) < min_n_valid:
            n = len(self.valid_prediction_times)
            predictions = None
            for child_name in self.child_score_times.iterkeys():
                filename = self.child_valid_prediction_files[child_name][n]
                child_predictions = np.load(filename)
                os.remove(filename)
                if predictions is None:
                    predictions = child_predictions
                else:
                    predictions += child_predictions
            predictions /= len(self.child_score_times)
            tmp_filename = util.random_temp_file_name('.npy')
            np.save(tmp_filename, predictions)
            maxtime = max(times[n] for times in self.child_valid_prediction_times.itervalues())
            self.valid_prediction_files.append(tmp_filename)
            self.valid_prediction_times.append(maxtime)
            self.send_to_parent(dict(subject='predictions', sender=self.name, partition='valid',
                                     time=self.valid_prediction_times[-1],
                                     filename=self.valid_prediction_files[-1]))

        min_n_test = min(len(times) for times in self.child_test_prediction_times.itervalues())
        while len(self.test_prediction_times) < min_n_test:
            n = len(self.test_prediction_times)
            predictions = None
            for child_name in self.child_score_times.iterkeys():
                filename = self.child_test_prediction_files[child_name][n]
                child_predictions = np.load(filename)
                os.remove(filename)
                if predictions is None:
                    predictions = child_predictions
                else:
                    predictions += child_predictions
            predictions /= len(self.child_score_times)
            tmp_filename = util.random_temp_file_name('.npy')
            np.save(tmp_filename, predictions)
            maxtime = max(times[n] for times in self.child_test_prediction_times.itervalues())
            self.test_prediction_files.append(tmp_filename)
            self.test_prediction_times.append(maxtime)
            self.send_to_parent(dict(subject='predictions', sender=self.name, partition='test',
                                     time=self.test_prediction_times[-1],
                                     filename=self.test_prediction_files[-1]))

        min_n_held_out = min(len(times) for times in self.child_held_out_prediction_times.itervalues())
        while len(self.held_out_prediction_times) < min_n_held_out:
            n = len(self.held_out_prediction_times)
            # FIXME - get rid of if else here
            if self.data_info['task'] == 'multiclass.classification':
                predictions = np.zeros(self.data['Y_train_1_of_k'].shape)
                # print('Prediction shape')
                # print(predictions.shape)
            else:
                predictions = np.zeros(self.data['Y_train'].shape)
            for child_name in self.child_score_times.iterkeys():
                filename = self.child_held_out_prediction_files[child_name][n]
                child_predictions = np.load(filename)
                os.remove(filename)
                predictions[self.child_held_out_idx[child_name]] = child_predictions
            # print('Combined predictions')
            # print(predictions[0])
            tmp_filename = util.random_temp_file_name('.npy')
            np.save(tmp_filename, predictions)
            maxtime = max(times[n] for times in self.child_held_out_prediction_times.itervalues())
            self.held_out_prediction_files.append(tmp_filename)
            self.held_out_prediction_times.append(maxtime)
            self.send_to_parent(dict(subject='predictions', sender=self.name, partition='held out',
                                     time=self.held_out_prediction_times[-1],
                                     filename=self.held_out_prediction_files[-1]))

        # Check to see if all children have terminated - if so, terminate this agent
        # immortal child dying is failure
        # mortal child dying without sending results is failure
        # any child failure should kill parent
        if self.immortal_offspring is True and len(self.conns_from_children) != len(self.child_states):
            logger.error("%s: Immortal child has died. Dying of grief", self.name)
            raise TerminationEx
        elif self.immortal_offspring is False:
            dead_kids = [x for x in self.child_states if x not in self.conns_from_children]
            for dk in dead_kids:
                if len(self.child_test_prediction_files[dk]) == 0:
                    logger.error("%s: Mortal child %s has died without sending results", self.name, dk)
                    raise TerminationEx
            if len(self.conns_from_children) == 0:
                logger.info("%s: No children remaining. Terminating.", self.name)
                raise TerminationEx
Exemple #3
0
    def next_action(self):
        # Read messages
        self.read_messages()
        # Start timing
        self.time_checkpoint = time.clock()
        predict_time = self.time_quantum / self.n_samples
        scores_so_far = 0
        # Increase estimators and learn
        while time.clock() - self.time_checkpoint < self.time_quantum:
            # Read messages - maybe compute quantum has changed?
            self.get_parent_inbox()
            self.read_messages()
            # Do learning
            self.learner.n_estimators += self.n_estimators_quantum
            start_time = time.clock()
            self.fit(self.train_idx, self.feature_subset)
            time_taken = time.clock() - start_time
            if global_data.exp['slowdown_factor'] > 1:
                util.waste_cpu_time(time_taken * (global_data.exp['slowdown_factor'] - 1))
            if time.clock() - self.time_checkpoint > predict_time:
                predictions = self.predict('X_train', self.test_idx, self.feature_subset)
                truth = self.test_truth
                score = libscores.eval_metric(metric=self.data_info['eval_metric'],
                                              truth=truth,
                                              predictions=predictions,
                                              task=self.data_info['task'])

                self.score_times.append(time.clock() - self.time_checkpoint + self.time_before_checkpoint)
                self.score_values.append(score)
                # Send score and time to parent
                self.send_to_parent(dict(subject='score', sender=self.name,
                                         time=self.score_times[-1],
                                         score=self.score_values[-1]))
                scores_so_far += 1
                # Next time at which to make a prediction
                if self.n_samples > scores_so_far:
                    predict_time = time.clock() - self.time_checkpoint + \
                                   (self.time_quantum - (time.clock() - self.time_checkpoint)) / \
                                   (self.n_samples - scores_so_far)
                else:
                    break
        # Save total time taken
        # TODO - this is ignoring the time taken to make valid and test predictions
        self.time_before_checkpoint += time.clock() - self.time_checkpoint
        # Now make predictions

        # FIXME - send all of this data at the same time to prevent gotchas

        if 'X_valid' in self.data:
            predictions = self.predict('X_valid', 'all', self.feature_subset)
            tmp_filename = util.random_temp_file_name('.npy')
            np.save(tmp_filename, predictions)
            self.valid_prediction_files.append(tmp_filename)
            self.valid_prediction_times.append(self.time_before_checkpoint)
            self.send_to_parent(dict(subject='predictions', sender=self.name, partition='valid',
                                     time=self.valid_prediction_times[-1],
                                     filename=self.valid_prediction_files[-1]))
        if 'X_test' in self.data:
            predictions = self.predict('X_test', 'all', self.feature_subset)
            tmp_filename = util.random_temp_file_name('.npy')
            np.save(tmp_filename, predictions)
            self.test_prediction_files.append(tmp_filename)
            self.test_prediction_times.append(self.time_before_checkpoint)
            self.send_to_parent(dict(subject='predictions', sender=self.name, partition='test',
                                     time=self.test_prediction_times[-1],
                                     filename=self.test_prediction_files[-1]))

        predictions = self.predict('X_train', self.test_idx, self.feature_subset)
        # print('Held out')
        # print(predictions[0])
        tmp_filename = util.random_temp_file_name('.npy')
        np.save(tmp_filename, predictions)
        self.held_out_prediction_files.append(tmp_filename)
        self.held_out_prediction_times.append(self.time_before_checkpoint)
        self.send_to_parent(dict(subject='predictions', sender=self.name, partition='held out',
                                 idx=self.test_idx,
                                 time=self.held_out_prediction_times[-1],
                                 filename=self.held_out_prediction_files[-1]))

        if self.run_one_iteration:
            self.pause()
Exemple #4
0
    def next_action(self):
        # Check mail
        self.read_messages()
        # Collect up scores and predictions - even if paused, children may still be finishing tasks
        min_n_scores = min(
            len(scores) for scores in self.child_score_values.itervalues())
        while len(self.score_values) < min_n_scores:
            n = len(self.score_values)
            num_scores = 0
            sum_scores = 0
            for child_scores in self.child_score_values.itervalues():
                # noinspection PyUnresolvedReferences
                if not np.isnan(child_scores[n]):
                    num_scores += 1
                    sum_scores += child_scores[n]
            score = sum_scores / num_scores
            # score = sum(scores[n] for scores in self.child_score_values.itervalues()) /\
            #         len(self.child_score_values)
            maxtime = max(times[n]
                          for times in self.child_score_times.itervalues())
            self.score_values.append(score)
            self.score_times.append(maxtime)
            self.send_to_parent(
                dict(subject='score',
                     sender=self.name,
                     time=self.score_times[-1],
                     score=self.score_values[-1]))

        # FIXME - send all of this data at the same time to prevent gotchas

        min_n_valid = min(
            len(times)
            for times in self.child_valid_prediction_times.itervalues())
        while len(self.valid_prediction_times) < min_n_valid:
            n = len(self.valid_prediction_times)
            predictions = None
            for child_name in self.child_score_times.iterkeys():
                filename = self.child_valid_prediction_files[child_name][n]
                child_predictions = np.load(filename)
                os.remove(filename)
                if predictions is None:
                    predictions = child_predictions
                else:
                    predictions += child_predictions
            predictions /= len(self.child_score_times)
            tmp_filename = util.random_temp_file_name('.npy')
            np.save(tmp_filename, predictions)
            maxtime = max(
                times[n]
                for times in self.child_valid_prediction_times.itervalues())
            self.valid_prediction_files.append(tmp_filename)
            self.valid_prediction_times.append(maxtime)
            self.send_to_parent(
                dict(subject='predictions',
                     sender=self.name,
                     partition='valid',
                     time=self.valid_prediction_times[-1],
                     filename=self.valid_prediction_files[-1]))

        min_n_test = min(
            len(times)
            for times in self.child_test_prediction_times.itervalues())
        while len(self.test_prediction_times) < min_n_test:
            n = len(self.test_prediction_times)
            predictions = None
            for child_name in self.child_score_times.iterkeys():
                filename = self.child_test_prediction_files[child_name][n]
                child_predictions = np.load(filename)
                os.remove(filename)
                if predictions is None:
                    predictions = child_predictions
                else:
                    predictions += child_predictions
            predictions /= len(self.child_score_times)
            tmp_filename = util.random_temp_file_name('.npy')
            np.save(tmp_filename, predictions)
            maxtime = max(
                times[n]
                for times in self.child_test_prediction_times.itervalues())
            self.test_prediction_files.append(tmp_filename)
            self.test_prediction_times.append(maxtime)
            self.send_to_parent(
                dict(subject='predictions',
                     sender=self.name,
                     partition='test',
                     time=self.test_prediction_times[-1],
                     filename=self.test_prediction_files[-1]))

        min_n_held_out = min(
            len(times)
            for times in self.child_held_out_prediction_times.itervalues())
        while len(self.held_out_prediction_times) < min_n_held_out:
            n = len(self.held_out_prediction_times)
            # FIXME - get rid of if else here
            if self.data_info['task'] == 'multiclass.classification':
                predictions = np.zeros(self.data['Y_train_1_of_k'].shape)
                # print('Prediction shape')
                # print(predictions.shape)
            else:
                predictions = np.zeros(self.data['Y_train'].shape)
            for child_name in self.child_score_times.iterkeys():
                filename = self.child_held_out_prediction_files[child_name][n]
                child_predictions = np.load(filename)
                os.remove(filename)
                predictions[
                    self.child_held_out_idx[child_name]] = child_predictions
            # print('Combined predictions')
            # print(predictions[0])
            tmp_filename = util.random_temp_file_name('.npy')
            np.save(tmp_filename, predictions)
            maxtime = max(
                times[n]
                for times in self.child_held_out_prediction_times.itervalues())
            self.held_out_prediction_files.append(tmp_filename)
            self.held_out_prediction_times.append(maxtime)
            self.send_to_parent(
                dict(subject='predictions',
                     sender=self.name,
                     partition='held out',
                     time=self.held_out_prediction_times[-1],
                     filename=self.held_out_prediction_files[-1]))

        # Check to see if all children have terminated - if so, terminate this agent
        # immortal child dying is failure
        # mortal child dying without sending results is failure
        # any child failure should kill parent
        if self.immortal_offspring is True and len(
                self.conns_from_children) != len(self.child_states):
            logger.error("%s: Immortal child has died. Dying of grief",
                         self.name)
            raise TerminationEx
        elif self.immortal_offspring is False:
            dead_kids = [
                x for x in self.child_states
                if x not in self.conns_from_children
            ]
            for dk in dead_kids:
                if len(self.child_test_prediction_files[dk]) == 0:
                    logger.error(
                        "%s: Mortal child %s has died without sending results",
                        self.name, dk)
                    raise TerminationEx
            if len(self.conns_from_children) == 0:
                logger.info("%s: No children remaining. Terminating.",
                            self.name)
                raise TerminationEx
Exemple #5
0
    def next_action(self):
        self.read_messages()
        # Start timing
        self.time_checkpoint = time.clock()
        # Fit learner
        self.fit(self.train_idx, self.feature_subset)
        # Make predictions on held out set and evaluate
        predictions = self.predict('X_train', self.test_idx,
                                   self.feature_subset)
        truth = self.test_truth
        score = libscores.eval_metric(metric=self.data_info['eval_metric'],
                                      truth=truth,
                                      predictions=predictions,
                                      task=self.data_info['task'])

        self.score_times.append(time.clock() - self.time_checkpoint +
                                self.time_before_checkpoint)
        self.score_values.append(score)
        # Send score and time to parent
        self.send_to_parent(
            dict(subject='score',
                 sender=self.name,
                 time=self.score_times[-1],
                 score=self.score_values[-1]))
        # Save total time taken
        # TODO - this is ignoring the time taken to make valid and test predictions
        self.time_before_checkpoint += time.clock() - self.time_checkpoint
        # Now make predictions on valid, test and held out sets

        # FIXME - send all of this data at the same time to prevent gotchas

        if 'X_valid' in self.data:
            predictions = self.predict('X_valid', 'all', self.feature_subset)
            tmp_filename = util.random_temp_file_name('.npy')
            np.save(tmp_filename, predictions)
            self.valid_prediction_files.append(tmp_filename)
            self.valid_prediction_times.append(self.time_before_checkpoint)
            self.send_to_parent(
                dict(subject='predictions',
                     sender=self.name,
                     partition='valid',
                     time=self.valid_prediction_times[-1],
                     filename=self.valid_prediction_files[-1]))
        if 'X_test' in self.data:
            predictions = self.predict('X_test', 'all', self.feature_subset)
            tmp_filename = util.random_temp_file_name('.npy')
            np.save(tmp_filename, predictions)
            self.test_prediction_files.append(tmp_filename)
            self.test_prediction_times.append(self.time_before_checkpoint)
            self.send_to_parent(
                dict(subject='predictions',
                     sender=self.name,
                     partition='test',
                     time=self.test_prediction_times[-1],
                     filename=self.test_prediction_files[-1]))

        predictions = self.predict('X_train', self.test_idx,
                                   self.feature_subset)
        tmp_filename = util.random_temp_file_name('.npy')
        np.save(tmp_filename, predictions)
        self.held_out_prediction_files.append(tmp_filename)
        self.held_out_prediction_times.append(self.time_before_checkpoint)
        self.send_to_parent(
            dict(subject='predictions',
                 sender=self.name,
                 partition='held out',
                 idx=self.test_idx,
                 time=self.held_out_prediction_times[-1],
                 filename=self.held_out_prediction_files[-1]))
        # And I'm spent
        raise TerminationEx
Exemple #6
0
    def next_action(self):
        # Read messages
        self.read_messages()
        # Start timing
        self.time_checkpoint = time.clock()
        predict_time = self.time_quantum / self.n_samples
        scores_so_far = 0
        # Increase estimators and learn
        while time.clock() - self.time_checkpoint < self.time_quantum:
            # Read messages - maybe compute quantum has changed?
            self.get_parent_inbox()
            self.read_messages()
            # Do learning
            self.learner.n_estimators += self.n_estimators_quantum
            start_time = time.clock()
            self.fit(self.train_idx, self.feature_subset)
            time_taken = time.clock() - start_time
            if global_data.exp['slowdown_factor'] > 1:
                util.waste_cpu_time(time_taken *
                                    (global_data.exp['slowdown_factor'] - 1))
            if time.clock() - self.time_checkpoint > predict_time:
                predictions = self.predict('X_train', self.test_idx,
                                           self.feature_subset)
                truth = self.test_truth
                score = libscores.eval_metric(
                    metric=self.data_info['eval_metric'],
                    truth=truth,
                    predictions=predictions,
                    task=self.data_info['task'])

                self.score_times.append(time.clock() - self.time_checkpoint +
                                        self.time_before_checkpoint)
                self.score_values.append(score)
                # Send score and time to parent
                self.send_to_parent(
                    dict(subject='score',
                         sender=self.name,
                         time=self.score_times[-1],
                         score=self.score_values[-1]))
                scores_so_far += 1
                # Next time at which to make a prediction
                if self.n_samples > scores_so_far:
                    predict_time = time.clock() - self.time_checkpoint + \
                                   (self.time_quantum - (time.clock() - self.time_checkpoint)) / \
                                   (self.n_samples - scores_so_far)
                else:
                    break
        # Save total time taken
        # TODO - this is ignoring the time taken to make valid and test predictions
        self.time_before_checkpoint += time.clock() - self.time_checkpoint
        # Now make predictions

        # FIXME - send all of this data at the same time to prevent gotchas

        if 'X_valid' in self.data:
            predictions = self.predict('X_valid', 'all', self.feature_subset)
            tmp_filename = util.random_temp_file_name('.npy')
            np.save(tmp_filename, predictions)
            self.valid_prediction_files.append(tmp_filename)
            self.valid_prediction_times.append(self.time_before_checkpoint)
            self.send_to_parent(
                dict(subject='predictions',
                     sender=self.name,
                     partition='valid',
                     time=self.valid_prediction_times[-1],
                     filename=self.valid_prediction_files[-1]))
        if 'X_test' in self.data:
            predictions = self.predict('X_test', 'all', self.feature_subset)
            tmp_filename = util.random_temp_file_name('.npy')
            np.save(tmp_filename, predictions)
            self.test_prediction_files.append(tmp_filename)
            self.test_prediction_times.append(self.time_before_checkpoint)
            self.send_to_parent(
                dict(subject='predictions',
                     sender=self.name,
                     partition='test',
                     time=self.test_prediction_times[-1],
                     filename=self.test_prediction_files[-1]))

        predictions = self.predict('X_train', self.test_idx,
                                   self.feature_subset)
        # print('Held out')
        # print(predictions[0])
        tmp_filename = util.random_temp_file_name('.npy')
        np.save(tmp_filename, predictions)
        self.held_out_prediction_files.append(tmp_filename)
        self.held_out_prediction_times.append(self.time_before_checkpoint)
        self.send_to_parent(
            dict(subject='predictions',
                 sender=self.name,
                 partition='held out',
                 idx=self.test_idx,
                 time=self.held_out_prediction_times[-1],
                 filename=self.held_out_prediction_files[-1]))

        if self.run_one_iteration:
            self.pause()