def select_data(self, neural_net: AbstractAI) -> DataModel: if self.data_source() == self.TRAINING_DATA: data = neural_net.get_training_data() elif self.data_source() == self.EVALUATION_DATA: data = neural_net.get_evaluation_data() else: raise RuntimeError( 'Unknown data_source ({}) found in data splitter builder.'. format(self.data_source())) return data
def build(self, ml_model: AbstractAI) -> AbstractAI: strategy: EstimatorStrategy = EstimatorStrategyFactory.get_strategy( ml_model, self.estimator_type(), self.kwargs()) assert strategy is not None, 'Strategy for building Estimator of type {} not found.' \ .format(self.estimator_type()) self.estimator = strategy.build() ml_model.set_estimator(self.estimator) return ml_model
def build(self, ml_model: AbstractAI): strategy: OptimizerStrategy = OptimizerStrategyFactory.get_strategy( ml_model, self.optimizer_type(), self.learning_rate(), self.gradient_clipping(), self.kwargs()) assert strategy is not None, 'Strategy for building Optimizer of type {} not found.' \ .format(self.optimizer_type()) optimizer = strategy.build() ml_model.set_optimizer(optimizer) return ml_model
def build(self, ml_model: AbstractAI) -> AbstractAI: data = self.select_data(ml_model) if self.randomize(): self.randomize_data(data, self.seed()) splitter = DataSetSplitter(data_model=data) split_data = splitter.split_by_ratio( [self.training_data_percentage(), self.evaluation_data_perc()]) ml_model.set_training_data(split_data[0]) ml_model.set_evaluation_data(split_data[1]) return ml_model
def _get_logged_names(ml_model: AbstractAI) -> List[str]: """ Gets the dir names from the tensor_board dir. """ tensor_board_path = ml_model.get_log_dir() + '/tensor_board' if not os.path.isdir(tensor_board_path): warnings.warn(f'Creating missing tensor board dir {tensor_board_path}.') os.makedirs(tensor_board_path) return next(os.walk(tensor_board_path))[1]
def determine(self, ml_model: AbstractAI) -> AbstractAI: """ Assign name to model, if required. """ self.existing_names = self._get_logged_names(ml_model) if ml_model.get_name() is None or ml_model.get_name() is ml_model.get_project_name(): new_name = self._generate_name(ml_model) ml_model.set_name(new_name) return ml_model if ml_model.get_name() in self.existing_names: ml_model.set_name(ml_model.get_name() + '_1') return ml_model if ml_model.get_name() is not None: return ml_model raise RuntimeError(f'{__class__} failed to set name.')
def build(self, ml_model: AbstractAI) -> AbstractAI: training_data: Optional[DataModel] = ml_model.get_training_data() evaluation_data: Optional[DataModel] = ml_model.get_evaluation_data() prediction_data: Optional[DataModel] = ml_model.get_prediction_data() if training_data is not None: self.build_feature_columns(training_data) ml_model.set_training_data(training_data) if evaluation_data is not None: self.build_feature_columns(evaluation_data) ml_model.set_evaluation_data(evaluation_data) if prediction_data is not None: self.build_feature_columns(prediction_data) ml_model.set_evaluation_data(prediction_data) return ml_model
def build(self, ml_model: AbstractAI) -> AbstractAI: data_model = self.select_data(ml_model) df = data_model.get_dataframe() training_cats = self.training_categories() eval_cats = self.eval_categories() all_cats = df[self.column_name()].unique() eval_cats, training_cats = self.set_categories(all_cats, eval_cats, training_cats) training_data = df[df[self.column_name()].isin(training_cats)] ml_model.set_training_data(self.load_data(data_model, training_data)) evaluation_data = df[df[self.column_name()].isin(eval_cats)] ml_model.set_evaluation_data( self.load_data(data_model, evaluation_data)) if self.verbosity > 0: print(f'{len(training_data)} items in training data.') print(f'{len(evaluation_data)} items in evaluation data.') return ml_model
def build(self, ml_model: AbstractAI) -> AbstractAI: training_data_model: Optional[DataModel] = ml_model.get_training_data() evaluation_data_model: Optional[ DataModel] = ml_model.get_evaluation_data() prediction_data_model: Optional[ DataModel] = ml_model.get_prediction_data() if None is not training_data_model: ml_model.set_training_data( self.build_meta_data(training_data_model)) if None is not evaluation_data_model: ml_model.set_evaluation_data( self.build_meta_data(evaluation_data_model)) if None is not prediction_data_model: ml_model.set_prediction_data( self.build_meta_data(prediction_data_model)) return ml_model
def _generate_name(self, ml_model: AbstractAI) -> str: """ Generates name based on existing dirs in tensor_board dir. """ for name in self.existing_names: version = self._get_version(name=name) self.versions.append(version) last_version = 0 if len(self.versions) > 0: last_version = max(self.versions) new_version = last_version + 1 new_name = ml_model.get_project_name() + '_' + str(new_version) assert new_name not in self.existing_names, f'New model name not unique, {new_name}' \ f' already in tensor_board folder.' return new_name
def print_ai_description(self, ai: AbstractAI, time_stamp: str = None, ai_hash: str = None): self.line('--- AI: ' + ai.get_name() + ' ---') if time_stamp is not None: self.line('--- time: ' + time_stamp + ' ---') if ai_hash is not None: self.line('--- description hash: ' + str(ai_hash)) for builder_name, description in ai.description.items(): self.line(builder_name) if type(description) is not dict: self.line(' - ' + description) continue for element, value in description.items(): self.line(' - ' + element + ': ' + str(value))
def build(self, ml_model: AbstractAI) -> AbstractAI: if self.data_source() is not None: data = self.load_data(self.data_source()) ml_model.set_training_data(data) if self.eval_data_source() is not None: validation_data = self.load_data(self.eval_data_source()) ml_model.set_evaluation_data(validation_data) if self.prediction_data_source() is not None: prediction_data = self.load_data(self.prediction_data_source()) ml_model.set_prediction_data(prediction_data) return ml_model
def build(self, ml_model: AbstractAI) -> AbstractAI: training_data = ml_model.training_data validation_data = ml_model.evaluation_data prediction_data = ml_model.get_prediction_data() if training_data is not None: self.and_scrubber_training.validate_metadata( deepcopy(training_data.metadata)) self.and_scrubber_training.scrub(training_data) if validation_data is not None: self.and_scrubber_validation.validate_metadata( deepcopy(validation_data.metadata)) self.and_scrubber_validation.scrub(validation_data) if prediction_data is not None: self.and_scrubber_prediction.validate_metadata( deepcopy(prediction_data.metadata)) self.and_scrubber_prediction.scrub(prediction_data) return ml_model
def build(self, ml_model: AbstractAI) -> AbstractAI: if not self.randomize(): return ml_model train_data = ml_model.get_training_data() self.randomize_data(train_data, self.seed()) ml_model.set_training_data(training_data=train_data) eval_data = ml_model.get_evaluation_data() if eval_data is None: return ml_model self.randomize_data(eval_data, self.seed()) ml_model.set_evaluation_data(eval_data) return ml_model
def build(self, ml_model: AbstractAI) -> AbstractAI: if self.build_train: train_function = self.assign_fn(ml_model.training_data, self.train_fn_name(), self.train_kwargs) ml_model.set_training_fn(train_function) if self.build_eval: evaluation_function = self.assign_fn(ml_model.evaluation_data, self.evaluation_fn_name(), self.evaluation_kwargs) ml_model.set_evaluation_fn(evaluation_function) if self.build_predict: prediction_function = self.assign_prediction_fn( ml_model.prediction_data, self.prediction_fn_name(), self.prediction_kwargs) ml_model.set_prediction_fn(prediction_function) return ml_model