class AnalyzeJobConf(Bean): job_name = StringField() dataset_name = StringField() sample_conf = BeanField(SampleConf) path = StringField() temporary_dataset = BooleanField() label_col = StringField()
class Feature(Bean): name = StringField() type = StringField() data_type = StringField() correlation = BeanField(bean_cls=FeatureCorrelation) missing = BeanField(bean_cls=FeatureMissing) unique = BeanField(bean_cls=FeatureUnique) extension = DictField()
class JobStep(Bean): type = StringField() status = StringField() took = FloatField() datetime = IntegerField() extension = DictField() class Status: Succeed = "succeed" Failed = "failed"
class DatasetStats(Bean): label_col = StringField() file_path = StringField() has_header = BooleanField() n_rows = IntegerField() n_cols = IntegerField() features = ListBeanField(Feature) feature_summary = BeanField(FeatureTypeStats) create_datetime = IntegerField() @property def features_names(self): return [f.name for f in self.features]
class SampleConf(Bean): sample_strategy = StringField() percentage = IntegerField() n_rows = IntegerField() class Strategy: RandomRows = "random_rows" Percentage = "percentage" WholeData = "whole_data"
class TrainJobConf(Bean): framework = StringField() name = StringField() model_name = StringField() searcher = StringField() max_trails = IntegerField() search_space = StringField() class SearchSpace: Complex = "complex" Basic = "basic" Minimal = "minimal" class Searcher: RandomSearcher = 'random_searcher' EvolutionSearcher = 'evolution_searcher' MCTSSearcher = 'MCTS_searcher' EnasSearcher = 'Enas_searcher'
class ExperimentConf(Bean): dataset_name = StringField() dataset_has_header = BooleanField() dataset_default_headers = ListObjectField() train_mode = StringField() engine = StringField() label_col = StringField() pos_label = ObjectField() task_type = StringField() # calc in frontend partition_strategy = StringField() cross_validation = BeanField(CrossValidation) train_validation_holdout = BeanField(TrainValidationHoldout) datetime_series_col = StringField() partition_col = StringField() # feature_series_name = StringField() file_path = StringField() test_file_path = StringField() class PartitionStrategy: CrossValidation = 'cross_validation' TrainValidationHoldout = 'train_validation_holdout' Manual = 'manual'
class FeatureMissing(Bean): value = IntegerField() percentage = FloatField() status = StringField() class Status: TooHigh = 'too_high' @staticmethod def calc_status(percentage): if percentage > 70: return FeatureMissing.Status.TooHigh else: return FeatureNormalStatus
class FeatureUnique(Bean): value = IntegerField() percentage = FloatField() status = StringField() class Status: ID_ness = 'ID-ness' Stable = 'stable' @staticmethod def calc_status(n_uniques, percentage): if n_uniques == 1: return FeatureUnique.Status.Stable else: if percentage > 90: return FeatureUnique.Status.ID_ness else: return FeatureNormalStatus
class FeatureCorrelation(Bean): value = FloatField() status = StringField() class Status: TooHigh = 'too_high' TooLow = 'too_low' @staticmethod def calc_status(correlation, is_target_col): _c = abs(correlation) if _c > 0.5: if is_target_col is True: return FeatureNormalStatus else: return FeatureCorrelation.Status.TooHigh elif _c < 0.01: return FeatureCorrelation.Status.TooLow else: return FeatureNormalStatus
class RespPreviewDataset(Bean): headers = ListObjectField() rows = ListObjectField() count = IntegerField() file_path = StringField()
class FeatureMode(Bean): value = StringField() count = IntegerField() percentage = FloatField()
class Model(Bean): name = StringField() framework = StringField() dataset_name = StringField() model_file_size = IntegerField() no_experiment = IntegerField() inputs = ListBeanField(ModelFeature) task_type = StringField() performance = BeanField(Performance) model_path = StringField() status = StringField() pid = IntegerField() score = FloatField() progress = StringField() train_job_name = StringField() train_trail_no = IntegerField() trails = ListBeanField(TrainTrail) extension = DictField() create_datetime = DatetimeField() finish_datetime = DatetimeField() last_update_datetime = DatetimeField() def escaped_time(self): if self.status in [ModelStatusType.Succeed, ModelStatusType.Failed]: if self.finish_datetime is None: raise Exception( "Internal error, train finished but has no finish_datetime. " ) escaped = util.datetime_diff_human_format_by_minute( self.finish_datetime, self.create_datetime) else: escaped = util.datetime_diff_human_format_by_minute( util.get_now_datetime(), self.create_datetime) return escaped def escaped_time_by_seconds(self): if self.status in [ModelStatusType.Succeed, ModelStatusType.Failed]: if self.finish_datetime is None: raise Exception( f"Internal error, model name = {self.name} train finished but has no finish_datetime. " ) escaped = util.datetime_diff(self.finish_datetime, self.create_datetime) else: escaped = util.datetime_diff(util.get_now_datetime(), self.create_datetime) return escaped def default_metric(self): m = \ { 'multi_classification': "logloss", 'regression': "mae", 'binary_classification': "auc" } return m[self.task_type] def log_file_path(self): # exits begin from train start return util.relative_path(P.join(str(self.model_path), 'train.log')) def train_source_code_path(self): # exits begin from train start return util.relative_path(P.join(str(self.model_path), 'train.py')) def train_notebook_uri(self): # exits begin from train start train_notebook_path = P.join(str(self.model_path), 'train.ipynb') return util.relative_path(train_notebook_path)
class ModelFeature(Bean): name = StringField() type = StringField() data_type = StringField()
class TrainTrial(Bean): trial_no = IntegerField() status = StringField() extension = DictField()