def next_action(self): self.read_messages() # Start timing self.time_checkpoint = time.clock() # Fit learner self.fit(self.train_idx, self.feature_subset) # Make predictions on held out set and evaluate predictions = self.predict('X_train', self.test_idx, self.feature_subset) truth = self.test_truth score = libscores.eval_metric(metric=self.data_info['eval_metric'], truth=truth, predictions=predictions, task=self.data_info['task']) self.score_times.append(time.clock() - self.time_checkpoint + self.time_before_checkpoint) self.score_values.append(score) # Send score and time to parent self.send_to_parent(dict(subject='score', sender=self.name, time=self.score_times[-1], score=self.score_values[-1])) # Save total time taken # TODO - this is ignoring the time taken to make valid and test predictions self.time_before_checkpoint += time.clock() - self.time_checkpoint # Now make predictions on valid, test and held out sets # FIXME - send all of this data at the same time to prevent gotchas if 'X_valid' in self.data: predictions = self.predict('X_valid', 'all', self.feature_subset) tmp_filename = util.random_temp_file_name('.npy') np.save(tmp_filename, predictions) self.valid_prediction_files.append(tmp_filename) self.valid_prediction_times.append(self.time_before_checkpoint) self.send_to_parent(dict(subject='predictions', sender=self.name, partition='valid', time=self.valid_prediction_times[-1], filename=self.valid_prediction_files[-1])) if 'X_test' in self.data: predictions = self.predict('X_test', 'all', self.feature_subset) tmp_filename = util.random_temp_file_name('.npy') np.save(tmp_filename, predictions) self.test_prediction_files.append(tmp_filename) self.test_prediction_times.append(self.time_before_checkpoint) self.send_to_parent(dict(subject='predictions', sender=self.name, partition='test', time=self.test_prediction_times[-1], filename=self.test_prediction_files[-1])) predictions = self.predict('X_train', self.test_idx, self.feature_subset) tmp_filename = util.random_temp_file_name('.npy') np.save(tmp_filename, predictions) self.held_out_prediction_files.append(tmp_filename) self.held_out_prediction_times.append(self.time_before_checkpoint) self.send_to_parent(dict(subject='predictions', sender=self.name, partition='held out', idx=self.test_idx, time=self.held_out_prediction_times[-1], filename=self.held_out_prediction_files[-1])) # And I'm spent raise TerminationEx
def next_action(self): # Check mail self.read_messages() # Collect up scores and predictions - even if paused, children may still be finishing tasks min_n_scores = min(len(scores) for scores in self.child_score_values.itervalues()) while len(self.score_values) < min_n_scores: n = len(self.score_values) num_scores = 0 sum_scores = 0 for child_scores in self.child_score_values.itervalues(): # noinspection PyUnresolvedReferences if not np.isnan(child_scores[n]): num_scores += 1 sum_scores += child_scores[n] score = sum_scores / num_scores # score = sum(scores[n] for scores in self.child_score_values.itervalues()) /\ # len(self.child_score_values) maxtime = max(times[n] for times in self.child_score_times.itervalues()) self.score_values.append(score) self.score_times.append(maxtime) self.send_to_parent(dict(subject='score', sender=self.name, time=self.score_times[-1], score=self.score_values[-1])) # FIXME - send all of this data at the same time to prevent gotchas min_n_valid = min(len(times) for times in self.child_valid_prediction_times.itervalues()) while len(self.valid_prediction_times) < min_n_valid: n = len(self.valid_prediction_times) predictions = None for child_name in self.child_score_times.iterkeys(): filename = self.child_valid_prediction_files[child_name][n] child_predictions = np.load(filename) os.remove(filename) if predictions is None: predictions = child_predictions else: predictions += child_predictions predictions /= len(self.child_score_times) tmp_filename = util.random_temp_file_name('.npy') np.save(tmp_filename, predictions) maxtime = max(times[n] for times in self.child_valid_prediction_times.itervalues()) self.valid_prediction_files.append(tmp_filename) self.valid_prediction_times.append(maxtime) self.send_to_parent(dict(subject='predictions', sender=self.name, partition='valid', time=self.valid_prediction_times[-1], filename=self.valid_prediction_files[-1])) min_n_test = min(len(times) for times in self.child_test_prediction_times.itervalues()) while len(self.test_prediction_times) < min_n_test: n = len(self.test_prediction_times) predictions = None for child_name in self.child_score_times.iterkeys(): filename = self.child_test_prediction_files[child_name][n] child_predictions = np.load(filename) os.remove(filename) if predictions is None: predictions = child_predictions else: predictions += child_predictions predictions /= len(self.child_score_times) tmp_filename = util.random_temp_file_name('.npy') np.save(tmp_filename, predictions) maxtime = max(times[n] for times in self.child_test_prediction_times.itervalues()) self.test_prediction_files.append(tmp_filename) self.test_prediction_times.append(maxtime) self.send_to_parent(dict(subject='predictions', sender=self.name, partition='test', time=self.test_prediction_times[-1], filename=self.test_prediction_files[-1])) min_n_held_out = min(len(times) for times in self.child_held_out_prediction_times.itervalues()) while len(self.held_out_prediction_times) < min_n_held_out: n = len(self.held_out_prediction_times) # FIXME - get rid of if else here if self.data_info['task'] == 'multiclass.classification': predictions = np.zeros(self.data['Y_train_1_of_k'].shape) # print('Prediction shape') # print(predictions.shape) else: predictions = np.zeros(self.data['Y_train'].shape) for child_name in self.child_score_times.iterkeys(): filename = self.child_held_out_prediction_files[child_name][n] child_predictions = np.load(filename) os.remove(filename) predictions[self.child_held_out_idx[child_name]] = child_predictions # print('Combined predictions') # print(predictions[0]) tmp_filename = util.random_temp_file_name('.npy') np.save(tmp_filename, predictions) maxtime = max(times[n] for times in self.child_held_out_prediction_times.itervalues()) self.held_out_prediction_files.append(tmp_filename) self.held_out_prediction_times.append(maxtime) self.send_to_parent(dict(subject='predictions', sender=self.name, partition='held out', time=self.held_out_prediction_times[-1], filename=self.held_out_prediction_files[-1])) # Check to see if all children have terminated - if so, terminate this agent # immortal child dying is failure # mortal child dying without sending results is failure # any child failure should kill parent if self.immortal_offspring is True and len(self.conns_from_children) != len(self.child_states): logger.error("%s: Immortal child has died. Dying of grief", self.name) raise TerminationEx elif self.immortal_offspring is False: dead_kids = [x for x in self.child_states if x not in self.conns_from_children] for dk in dead_kids: if len(self.child_test_prediction_files[dk]) == 0: logger.error("%s: Mortal child %s has died without sending results", self.name, dk) raise TerminationEx if len(self.conns_from_children) == 0: logger.info("%s: No children remaining. Terminating.", self.name) raise TerminationEx
def next_action(self): # Read messages self.read_messages() # Start timing self.time_checkpoint = time.clock() predict_time = self.time_quantum / self.n_samples scores_so_far = 0 # Increase estimators and learn while time.clock() - self.time_checkpoint < self.time_quantum: # Read messages - maybe compute quantum has changed? self.get_parent_inbox() self.read_messages() # Do learning self.learner.n_estimators += self.n_estimators_quantum start_time = time.clock() self.fit(self.train_idx, self.feature_subset) time_taken = time.clock() - start_time if global_data.exp['slowdown_factor'] > 1: util.waste_cpu_time(time_taken * (global_data.exp['slowdown_factor'] - 1)) if time.clock() - self.time_checkpoint > predict_time: predictions = self.predict('X_train', self.test_idx, self.feature_subset) truth = self.test_truth score = libscores.eval_metric(metric=self.data_info['eval_metric'], truth=truth, predictions=predictions, task=self.data_info['task']) self.score_times.append(time.clock() - self.time_checkpoint + self.time_before_checkpoint) self.score_values.append(score) # Send score and time to parent self.send_to_parent(dict(subject='score', sender=self.name, time=self.score_times[-1], score=self.score_values[-1])) scores_so_far += 1 # Next time at which to make a prediction if self.n_samples > scores_so_far: predict_time = time.clock() - self.time_checkpoint + \ (self.time_quantum - (time.clock() - self.time_checkpoint)) / \ (self.n_samples - scores_so_far) else: break # Save total time taken # TODO - this is ignoring the time taken to make valid and test predictions self.time_before_checkpoint += time.clock() - self.time_checkpoint # Now make predictions # FIXME - send all of this data at the same time to prevent gotchas if 'X_valid' in self.data: predictions = self.predict('X_valid', 'all', self.feature_subset) tmp_filename = util.random_temp_file_name('.npy') np.save(tmp_filename, predictions) self.valid_prediction_files.append(tmp_filename) self.valid_prediction_times.append(self.time_before_checkpoint) self.send_to_parent(dict(subject='predictions', sender=self.name, partition='valid', time=self.valid_prediction_times[-1], filename=self.valid_prediction_files[-1])) if 'X_test' in self.data: predictions = self.predict('X_test', 'all', self.feature_subset) tmp_filename = util.random_temp_file_name('.npy') np.save(tmp_filename, predictions) self.test_prediction_files.append(tmp_filename) self.test_prediction_times.append(self.time_before_checkpoint) self.send_to_parent(dict(subject='predictions', sender=self.name, partition='test', time=self.test_prediction_times[-1], filename=self.test_prediction_files[-1])) predictions = self.predict('X_train', self.test_idx, self.feature_subset) # print('Held out') # print(predictions[0]) tmp_filename = util.random_temp_file_name('.npy') np.save(tmp_filename, predictions) self.held_out_prediction_files.append(tmp_filename) self.held_out_prediction_times.append(self.time_before_checkpoint) self.send_to_parent(dict(subject='predictions', sender=self.name, partition='held out', idx=self.test_idx, time=self.held_out_prediction_times[-1], filename=self.held_out_prediction_files[-1])) if self.run_one_iteration: self.pause()
def next_action(self): # Check mail self.read_messages() # Collect up scores and predictions - even if paused, children may still be finishing tasks min_n_scores = min( len(scores) for scores in self.child_score_values.itervalues()) while len(self.score_values) < min_n_scores: n = len(self.score_values) num_scores = 0 sum_scores = 0 for child_scores in self.child_score_values.itervalues(): # noinspection PyUnresolvedReferences if not np.isnan(child_scores[n]): num_scores += 1 sum_scores += child_scores[n] score = sum_scores / num_scores # score = sum(scores[n] for scores in self.child_score_values.itervalues()) /\ # len(self.child_score_values) maxtime = max(times[n] for times in self.child_score_times.itervalues()) self.score_values.append(score) self.score_times.append(maxtime) self.send_to_parent( dict(subject='score', sender=self.name, time=self.score_times[-1], score=self.score_values[-1])) # FIXME - send all of this data at the same time to prevent gotchas min_n_valid = min( len(times) for times in self.child_valid_prediction_times.itervalues()) while len(self.valid_prediction_times) < min_n_valid: n = len(self.valid_prediction_times) predictions = None for child_name in self.child_score_times.iterkeys(): filename = self.child_valid_prediction_files[child_name][n] child_predictions = np.load(filename) os.remove(filename) if predictions is None: predictions = child_predictions else: predictions += child_predictions predictions /= len(self.child_score_times) tmp_filename = util.random_temp_file_name('.npy') np.save(tmp_filename, predictions) maxtime = max( times[n] for times in self.child_valid_prediction_times.itervalues()) self.valid_prediction_files.append(tmp_filename) self.valid_prediction_times.append(maxtime) self.send_to_parent( dict(subject='predictions', sender=self.name, partition='valid', time=self.valid_prediction_times[-1], filename=self.valid_prediction_files[-1])) min_n_test = min( len(times) for times in self.child_test_prediction_times.itervalues()) while len(self.test_prediction_times) < min_n_test: n = len(self.test_prediction_times) predictions = None for child_name in self.child_score_times.iterkeys(): filename = self.child_test_prediction_files[child_name][n] child_predictions = np.load(filename) os.remove(filename) if predictions is None: predictions = child_predictions else: predictions += child_predictions predictions /= len(self.child_score_times) tmp_filename = util.random_temp_file_name('.npy') np.save(tmp_filename, predictions) maxtime = max( times[n] for times in self.child_test_prediction_times.itervalues()) self.test_prediction_files.append(tmp_filename) self.test_prediction_times.append(maxtime) self.send_to_parent( dict(subject='predictions', sender=self.name, partition='test', time=self.test_prediction_times[-1], filename=self.test_prediction_files[-1])) min_n_held_out = min( len(times) for times in self.child_held_out_prediction_times.itervalues()) while len(self.held_out_prediction_times) < min_n_held_out: n = len(self.held_out_prediction_times) # FIXME - get rid of if else here if self.data_info['task'] == 'multiclass.classification': predictions = np.zeros(self.data['Y_train_1_of_k'].shape) # print('Prediction shape') # print(predictions.shape) else: predictions = np.zeros(self.data['Y_train'].shape) for child_name in self.child_score_times.iterkeys(): filename = self.child_held_out_prediction_files[child_name][n] child_predictions = np.load(filename) os.remove(filename) predictions[ self.child_held_out_idx[child_name]] = child_predictions # print('Combined predictions') # print(predictions[0]) tmp_filename = util.random_temp_file_name('.npy') np.save(tmp_filename, predictions) maxtime = max( times[n] for times in self.child_held_out_prediction_times.itervalues()) self.held_out_prediction_files.append(tmp_filename) self.held_out_prediction_times.append(maxtime) self.send_to_parent( dict(subject='predictions', sender=self.name, partition='held out', time=self.held_out_prediction_times[-1], filename=self.held_out_prediction_files[-1])) # Check to see if all children have terminated - if so, terminate this agent # immortal child dying is failure # mortal child dying without sending results is failure # any child failure should kill parent if self.immortal_offspring is True and len( self.conns_from_children) != len(self.child_states): logger.error("%s: Immortal child has died. Dying of grief", self.name) raise TerminationEx elif self.immortal_offspring is False: dead_kids = [ x for x in self.child_states if x not in self.conns_from_children ] for dk in dead_kids: if len(self.child_test_prediction_files[dk]) == 0: logger.error( "%s: Mortal child %s has died without sending results", self.name, dk) raise TerminationEx if len(self.conns_from_children) == 0: logger.info("%s: No children remaining. Terminating.", self.name) raise TerminationEx
def next_action(self): self.read_messages() # Start timing self.time_checkpoint = time.clock() # Fit learner self.fit(self.train_idx, self.feature_subset) # Make predictions on held out set and evaluate predictions = self.predict('X_train', self.test_idx, self.feature_subset) truth = self.test_truth score = libscores.eval_metric(metric=self.data_info['eval_metric'], truth=truth, predictions=predictions, task=self.data_info['task']) self.score_times.append(time.clock() - self.time_checkpoint + self.time_before_checkpoint) self.score_values.append(score) # Send score and time to parent self.send_to_parent( dict(subject='score', sender=self.name, time=self.score_times[-1], score=self.score_values[-1])) # Save total time taken # TODO - this is ignoring the time taken to make valid and test predictions self.time_before_checkpoint += time.clock() - self.time_checkpoint # Now make predictions on valid, test and held out sets # FIXME - send all of this data at the same time to prevent gotchas if 'X_valid' in self.data: predictions = self.predict('X_valid', 'all', self.feature_subset) tmp_filename = util.random_temp_file_name('.npy') np.save(tmp_filename, predictions) self.valid_prediction_files.append(tmp_filename) self.valid_prediction_times.append(self.time_before_checkpoint) self.send_to_parent( dict(subject='predictions', sender=self.name, partition='valid', time=self.valid_prediction_times[-1], filename=self.valid_prediction_files[-1])) if 'X_test' in self.data: predictions = self.predict('X_test', 'all', self.feature_subset) tmp_filename = util.random_temp_file_name('.npy') np.save(tmp_filename, predictions) self.test_prediction_files.append(tmp_filename) self.test_prediction_times.append(self.time_before_checkpoint) self.send_to_parent( dict(subject='predictions', sender=self.name, partition='test', time=self.test_prediction_times[-1], filename=self.test_prediction_files[-1])) predictions = self.predict('X_train', self.test_idx, self.feature_subset) tmp_filename = util.random_temp_file_name('.npy') np.save(tmp_filename, predictions) self.held_out_prediction_files.append(tmp_filename) self.held_out_prediction_times.append(self.time_before_checkpoint) self.send_to_parent( dict(subject='predictions', sender=self.name, partition='held out', idx=self.test_idx, time=self.held_out_prediction_times[-1], filename=self.held_out_prediction_files[-1])) # And I'm spent raise TerminationEx
def next_action(self): # Read messages self.read_messages() # Start timing self.time_checkpoint = time.clock() predict_time = self.time_quantum / self.n_samples scores_so_far = 0 # Increase estimators and learn while time.clock() - self.time_checkpoint < self.time_quantum: # Read messages - maybe compute quantum has changed? self.get_parent_inbox() self.read_messages() # Do learning self.learner.n_estimators += self.n_estimators_quantum start_time = time.clock() self.fit(self.train_idx, self.feature_subset) time_taken = time.clock() - start_time if global_data.exp['slowdown_factor'] > 1: util.waste_cpu_time(time_taken * (global_data.exp['slowdown_factor'] - 1)) if time.clock() - self.time_checkpoint > predict_time: predictions = self.predict('X_train', self.test_idx, self.feature_subset) truth = self.test_truth score = libscores.eval_metric( metric=self.data_info['eval_metric'], truth=truth, predictions=predictions, task=self.data_info['task']) self.score_times.append(time.clock() - self.time_checkpoint + self.time_before_checkpoint) self.score_values.append(score) # Send score and time to parent self.send_to_parent( dict(subject='score', sender=self.name, time=self.score_times[-1], score=self.score_values[-1])) scores_so_far += 1 # Next time at which to make a prediction if self.n_samples > scores_so_far: predict_time = time.clock() - self.time_checkpoint + \ (self.time_quantum - (time.clock() - self.time_checkpoint)) / \ (self.n_samples - scores_so_far) else: break # Save total time taken # TODO - this is ignoring the time taken to make valid and test predictions self.time_before_checkpoint += time.clock() - self.time_checkpoint # Now make predictions # FIXME - send all of this data at the same time to prevent gotchas if 'X_valid' in self.data: predictions = self.predict('X_valid', 'all', self.feature_subset) tmp_filename = util.random_temp_file_name('.npy') np.save(tmp_filename, predictions) self.valid_prediction_files.append(tmp_filename) self.valid_prediction_times.append(self.time_before_checkpoint) self.send_to_parent( dict(subject='predictions', sender=self.name, partition='valid', time=self.valid_prediction_times[-1], filename=self.valid_prediction_files[-1])) if 'X_test' in self.data: predictions = self.predict('X_test', 'all', self.feature_subset) tmp_filename = util.random_temp_file_name('.npy') np.save(tmp_filename, predictions) self.test_prediction_files.append(tmp_filename) self.test_prediction_times.append(self.time_before_checkpoint) self.send_to_parent( dict(subject='predictions', sender=self.name, partition='test', time=self.test_prediction_times[-1], filename=self.test_prediction_files[-1])) predictions = self.predict('X_train', self.test_idx, self.feature_subset) # print('Held out') # print(predictions[0]) tmp_filename = util.random_temp_file_name('.npy') np.save(tmp_filename, predictions) self.held_out_prediction_files.append(tmp_filename) self.held_out_prediction_times.append(self.time_before_checkpoint) self.send_to_parent( dict(subject='predictions', sender=self.name, partition='held out', idx=self.test_idx, time=self.held_out_prediction_times[-1], filename=self.held_out_prediction_files[-1])) if self.run_one_iteration: self.pause()