Exemple #1
0
    def next_action(self):
        self.read_messages()
        # Start timing
        self.time_checkpoint = time.clock()
        # Fit learner
        self.fit(self.train_idx, self.feature_subset)
        # Make predictions on held out set and evaluate
        predictions = self.predict('X_train', self.test_idx, self.feature_subset)
        truth = self.test_truth
        score = libscores.eval_metric(metric=self.data_info['eval_metric'],
                                      truth=truth,
                                      predictions=predictions,
                                      task=self.data_info['task'])

        self.score_times.append(time.clock() - self.time_checkpoint + self.time_before_checkpoint)
        self.score_values.append(score)
        # Send score and time to parent
        self.send_to_parent(dict(subject='score', sender=self.name,
                                 time=self.score_times[-1],
                                 score=self.score_values[-1]))
        # Save total time taken
        # TODO - this is ignoring the time taken to make valid and test predictions
        self.time_before_checkpoint += time.clock() - self.time_checkpoint
        # Now make predictions on valid, test and held out sets

        # FIXME - send all of this data at the same time to prevent gotchas

        if 'X_valid' in self.data:
            predictions = self.predict('X_valid', 'all', self.feature_subset)
            tmp_filename = util.random_temp_file_name('.npy')
            np.save(tmp_filename, predictions)
            self.valid_prediction_files.append(tmp_filename)
            self.valid_prediction_times.append(self.time_before_checkpoint)
            self.send_to_parent(dict(subject='predictions', sender=self.name, partition='valid',
                                     time=self.valid_prediction_times[-1],
                                     filename=self.valid_prediction_files[-1]))
        if 'X_test' in self.data:
            predictions = self.predict('X_test', 'all', self.feature_subset)
            tmp_filename = util.random_temp_file_name('.npy')
            np.save(tmp_filename, predictions)
            self.test_prediction_files.append(tmp_filename)
            self.test_prediction_times.append(self.time_before_checkpoint)
            self.send_to_parent(dict(subject='predictions', sender=self.name, partition='test',
                                     time=self.test_prediction_times[-1],
                                     filename=self.test_prediction_files[-1]))

        predictions = self.predict('X_train', self.test_idx, self.feature_subset)
        tmp_filename = util.random_temp_file_name('.npy')
        np.save(tmp_filename, predictions)
        self.held_out_prediction_files.append(tmp_filename)
        self.held_out_prediction_times.append(self.time_before_checkpoint)
        self.send_to_parent(dict(subject='predictions', sender=self.name, partition='held out',
                                 idx=self.test_idx,
                                 time=self.held_out_prediction_times[-1],
                                 filename=self.held_out_prediction_files[-1]))
        # And I'm spent
        raise TerminationEx
Exemple #2
0
    def next_action(self):
        self.read_messages()
        # Start timing
        self.time_checkpoint = time.clock()
        # Fit learner
        self.fit(self.train_idx, self.feature_subset)
        # Make predictions on held out set and evaluate
        predictions = self.predict('X_train', self.test_idx,
                                   self.feature_subset)
        truth = self.test_truth
        score = libscores.eval_metric(metric=self.data_info['eval_metric'],
                                      truth=truth,
                                      predictions=predictions,
                                      task=self.data_info['task'])

        self.score_times.append(time.clock() - self.time_checkpoint +
                                self.time_before_checkpoint)
        self.score_values.append(score)
        # Send score and time to parent
        self.send_to_parent(
            dict(subject='score',
                 sender=self.name,
                 time=self.score_times[-1],
                 score=self.score_values[-1]))
        # Save total time taken
        # TODO - this is ignoring the time taken to make valid and test predictions
        self.time_before_checkpoint += time.clock() - self.time_checkpoint
        # Now make predictions on valid, test and held out sets

        # FIXME - send all of this data at the same time to prevent gotchas

        if 'X_valid' in self.data:
            predictions = self.predict('X_valid', 'all', self.feature_subset)
            tmp_filename = util.random_temp_file_name('.npy')
            np.save(tmp_filename, predictions)
            self.valid_prediction_files.append(tmp_filename)
            self.valid_prediction_times.append(self.time_before_checkpoint)
            self.send_to_parent(
                dict(subject='predictions',
                     sender=self.name,
                     partition='valid',
                     time=self.valid_prediction_times[-1],
                     filename=self.valid_prediction_files[-1]))
        if 'X_test' in self.data:
            predictions = self.predict('X_test', 'all', self.feature_subset)
            tmp_filename = util.random_temp_file_name('.npy')
            np.save(tmp_filename, predictions)
            self.test_prediction_files.append(tmp_filename)
            self.test_prediction_times.append(self.time_before_checkpoint)
            self.send_to_parent(
                dict(subject='predictions',
                     sender=self.name,
                     partition='test',
                     time=self.test_prediction_times[-1],
                     filename=self.test_prediction_files[-1]))

        predictions = self.predict('X_train', self.test_idx,
                                   self.feature_subset)
        tmp_filename = util.random_temp_file_name('.npy')
        np.save(tmp_filename, predictions)
        self.held_out_prediction_files.append(tmp_filename)
        self.held_out_prediction_times.append(self.time_before_checkpoint)
        self.send_to_parent(
            dict(subject='predictions',
                 sender=self.name,
                 partition='held out',
                 idx=self.test_idx,
                 time=self.held_out_prediction_times[-1],
                 filename=self.held_out_prediction_files[-1]))
        # And I'm spent
        raise TerminationEx
Exemple #3
0
    def next_action(self):
        # Read messages
        self.read_messages()
        # Start timing
        self.time_checkpoint = time.clock()
        predict_time = self.time_quantum / self.n_samples
        scores_so_far = 0
        # Increase estimators and learn
        while time.clock() - self.time_checkpoint < self.time_quantum:
            # Read messages - maybe compute quantum has changed?
            self.get_parent_inbox()
            self.read_messages()
            # Do learning
            self.learner.n_estimators += self.n_estimators_quantum
            start_time = time.clock()
            self.fit(self.train_idx, self.feature_subset)
            time_taken = time.clock() - start_time
            if global_data.exp['slowdown_factor'] > 1:
                util.waste_cpu_time(time_taken * (global_data.exp['slowdown_factor'] - 1))
            if time.clock() - self.time_checkpoint > predict_time:
                predictions = self.predict('X_train', self.test_idx, self.feature_subset)
                truth = self.test_truth
                score = libscores.eval_metric(metric=self.data_info['eval_metric'],
                                              truth=truth,
                                              predictions=predictions,
                                              task=self.data_info['task'])

                self.score_times.append(time.clock() - self.time_checkpoint + self.time_before_checkpoint)
                self.score_values.append(score)
                # Send score and time to parent
                self.send_to_parent(dict(subject='score', sender=self.name,
                                         time=self.score_times[-1],
                                         score=self.score_values[-1]))
                scores_so_far += 1
                # Next time at which to make a prediction
                if self.n_samples > scores_so_far:
                    predict_time = time.clock() - self.time_checkpoint + \
                                   (self.time_quantum - (time.clock() - self.time_checkpoint)) / \
                                   (self.n_samples - scores_so_far)
                else:
                    break
        # Save total time taken
        # TODO - this is ignoring the time taken to make valid and test predictions
        self.time_before_checkpoint += time.clock() - self.time_checkpoint
        # Now make predictions

        # FIXME - send all of this data at the same time to prevent gotchas

        if 'X_valid' in self.data:
            predictions = self.predict('X_valid', 'all', self.feature_subset)
            tmp_filename = util.random_temp_file_name('.npy')
            np.save(tmp_filename, predictions)
            self.valid_prediction_files.append(tmp_filename)
            self.valid_prediction_times.append(self.time_before_checkpoint)
            self.send_to_parent(dict(subject='predictions', sender=self.name, partition='valid',
                                     time=self.valid_prediction_times[-1],
                                     filename=self.valid_prediction_files[-1]))
        if 'X_test' in self.data:
            predictions = self.predict('X_test', 'all', self.feature_subset)
            tmp_filename = util.random_temp_file_name('.npy')
            np.save(tmp_filename, predictions)
            self.test_prediction_files.append(tmp_filename)
            self.test_prediction_times.append(self.time_before_checkpoint)
            self.send_to_parent(dict(subject='predictions', sender=self.name, partition='test',
                                     time=self.test_prediction_times[-1],
                                     filename=self.test_prediction_files[-1]))

        predictions = self.predict('X_train', self.test_idx, self.feature_subset)
        # print('Held out')
        # print(predictions[0])
        tmp_filename = util.random_temp_file_name('.npy')
        np.save(tmp_filename, predictions)
        self.held_out_prediction_files.append(tmp_filename)
        self.held_out_prediction_times.append(self.time_before_checkpoint)
        self.send_to_parent(dict(subject='predictions', sender=self.name, partition='held out',
                                 idx=self.test_idx,
                                 time=self.held_out_prediction_times[-1],
                                 filename=self.held_out_prediction_files[-1]))

        if self.run_one_iteration:
            self.pause()
Exemple #4
0
    def next_action(self):
        # Read messages
        self.read_messages()
        # Start timing
        self.time_checkpoint = time.clock()
        predict_time = self.time_quantum / self.n_samples
        scores_so_far = 0
        # Increase estimators and learn
        while time.clock() - self.time_checkpoint < self.time_quantum:
            # Read messages - maybe compute quantum has changed?
            self.get_parent_inbox()
            self.read_messages()
            # Do learning
            self.learner.n_estimators += self.n_estimators_quantum
            start_time = time.clock()
            self.fit(self.train_idx, self.feature_subset)
            time_taken = time.clock() - start_time
            if global_data.exp['slowdown_factor'] > 1:
                util.waste_cpu_time(time_taken *
                                    (global_data.exp['slowdown_factor'] - 1))
            if time.clock() - self.time_checkpoint > predict_time:
                predictions = self.predict('X_train', self.test_idx,
                                           self.feature_subset)
                truth = self.test_truth
                score = libscores.eval_metric(
                    metric=self.data_info['eval_metric'],
                    truth=truth,
                    predictions=predictions,
                    task=self.data_info['task'])

                self.score_times.append(time.clock() - self.time_checkpoint +
                                        self.time_before_checkpoint)
                self.score_values.append(score)
                # Send score and time to parent
                self.send_to_parent(
                    dict(subject='score',
                         sender=self.name,
                         time=self.score_times[-1],
                         score=self.score_values[-1]))
                scores_so_far += 1
                # Next time at which to make a prediction
                if self.n_samples > scores_so_far:
                    predict_time = time.clock() - self.time_checkpoint + \
                                   (self.time_quantum - (time.clock() - self.time_checkpoint)) / \
                                   (self.n_samples - scores_so_far)
                else:
                    break
        # Save total time taken
        # TODO - this is ignoring the time taken to make valid and test predictions
        self.time_before_checkpoint += time.clock() - self.time_checkpoint
        # Now make predictions

        # FIXME - send all of this data at the same time to prevent gotchas

        if 'X_valid' in self.data:
            predictions = self.predict('X_valid', 'all', self.feature_subset)
            tmp_filename = util.random_temp_file_name('.npy')
            np.save(tmp_filename, predictions)
            self.valid_prediction_files.append(tmp_filename)
            self.valid_prediction_times.append(self.time_before_checkpoint)
            self.send_to_parent(
                dict(subject='predictions',
                     sender=self.name,
                     partition='valid',
                     time=self.valid_prediction_times[-1],
                     filename=self.valid_prediction_files[-1]))
        if 'X_test' in self.data:
            predictions = self.predict('X_test', 'all', self.feature_subset)
            tmp_filename = util.random_temp_file_name('.npy')
            np.save(tmp_filename, predictions)
            self.test_prediction_files.append(tmp_filename)
            self.test_prediction_times.append(self.time_before_checkpoint)
            self.send_to_parent(
                dict(subject='predictions',
                     sender=self.name,
                     partition='test',
                     time=self.test_prediction_times[-1],
                     filename=self.test_prediction_files[-1]))

        predictions = self.predict('X_train', self.test_idx,
                                   self.feature_subset)
        # print('Held out')
        # print(predictions[0])
        tmp_filename = util.random_temp_file_name('.npy')
        np.save(tmp_filename, predictions)
        self.held_out_prediction_files.append(tmp_filename)
        self.held_out_prediction_times.append(self.time_before_checkpoint)
        self.send_to_parent(
            dict(subject='predictions',
                 sender=self.name,
                 partition='held out',
                 idx=self.test_idx,
                 time=self.held_out_prediction_times[-1],
                 filename=self.held_out_prediction_files[-1]))

        if self.run_one_iteration:
            self.pause()