コード例 #1
0
ファイル: model.py プロジェクト: highboo52na/Cooka
class AnalyzeJobConf(Bean):

    job_name = StringField()
    dataset_name = StringField()
    sample_conf = BeanField(SampleConf)
    path = StringField()
    temporary_dataset = BooleanField()
    label_col = StringField()
コード例 #2
0
ファイル: model.py プロジェクト: highboo52na/Cooka
class Feature(Bean):
    name = StringField()
    type = StringField()
    data_type = StringField()
    correlation = BeanField(bean_cls=FeatureCorrelation)
    missing = BeanField(bean_cls=FeatureMissing)
    unique = BeanField(bean_cls=FeatureUnique)
    extension = DictField()
コード例 #3
0
ファイル: model.py プロジェクト: highboo52na/Cooka
class JobStep(Bean):
    type = StringField()
    status = StringField()
    took = FloatField()
    datetime = IntegerField()
    extension = DictField()

    class Status:
        Succeed = "succeed"
        Failed = "failed"
コード例 #4
0
ファイル: model.py プロジェクト: highboo52na/Cooka
class DatasetStats(Bean):
    label_col = StringField()
    file_path = StringField()
    has_header = BooleanField()
    n_rows = IntegerField()
    n_cols = IntegerField()
    features = ListBeanField(Feature)
    feature_summary = BeanField(FeatureTypeStats)
    create_datetime = IntegerField()

    @property
    def features_names(self):
        return [f.name for f in self.features]
コード例 #5
0
ファイル: model.py プロジェクト: highboo52na/Cooka
class SampleConf(Bean):
    sample_strategy = StringField()
    percentage = IntegerField()
    n_rows = IntegerField()

    class Strategy:
        RandomRows = "random_rows"
        Percentage = "percentage"
        WholeData = "whole_data"
コード例 #6
0
ファイル: model.py プロジェクト: highboo52na/Cooka
class TrainJobConf(Bean):
    framework = StringField()
    name = StringField()
    model_name = StringField()
    searcher = StringField()
    max_trails = IntegerField()
    search_space = StringField()

    class SearchSpace:
        Complex = "complex"
        Basic = "basic"
        Minimal = "minimal"

    class Searcher:
        RandomSearcher = 'random_searcher'
        EvolutionSearcher = 'evolution_searcher'
        MCTSSearcher = 'MCTS_searcher'
        EnasSearcher = 'Enas_searcher'
コード例 #7
0
ファイル: model.py プロジェクト: zerocurve/Cooka
class ExperimentConf(Bean):
    dataset_name = StringField()
    dataset_has_header = BooleanField()
    dataset_default_headers = ListObjectField()
    train_mode = StringField()
    engine = StringField()
    label_col = StringField()
    pos_label = ObjectField()
    task_type = StringField()  # calc in frontend
    partition_strategy = StringField()
    cross_validation = BeanField(CrossValidation)
    train_validation_holdout = BeanField(TrainValidationHoldout)
    datetime_series_col = StringField()
    partition_col = StringField()
    # feature_series_name = StringField()
    file_path = StringField()
    test_file_path = StringField()

    class PartitionStrategy:
        CrossValidation = 'cross_validation'
        TrainValidationHoldout = 'train_validation_holdout'
        Manual = 'manual'
コード例 #8
0
ファイル: model.py プロジェクト: highboo52na/Cooka
class FeatureMissing(Bean):
    value = IntegerField()
    percentage = FloatField()
    status = StringField()

    class Status:
        TooHigh = 'too_high'

    @staticmethod
    def calc_status(percentage):
        if percentage > 70:
            return FeatureMissing.Status.TooHigh
        else:
            return FeatureNormalStatus
コード例 #9
0
ファイル: model.py プロジェクト: highboo52na/Cooka
class FeatureUnique(Bean):
    value = IntegerField()
    percentage = FloatField()
    status = StringField()

    class Status:
        ID_ness = 'ID-ness'
        Stable = 'stable'

    @staticmethod
    def calc_status(n_uniques, percentage):
        if n_uniques == 1:
            return FeatureUnique.Status.Stable
        else:
            if percentage > 90:
                return FeatureUnique.Status.ID_ness
            else:
                return FeatureNormalStatus
コード例 #10
0
ファイル: model.py プロジェクト: highboo52na/Cooka
class FeatureCorrelation(Bean):
    value = FloatField()
    status = StringField()

    class Status:
        TooHigh = 'too_high'
        TooLow = 'too_low'

    @staticmethod
    def calc_status(correlation, is_target_col):
        _c = abs(correlation)
        if _c > 0.5:
            if is_target_col is True:
                return FeatureNormalStatus
            else:
                return FeatureCorrelation.Status.TooHigh
        elif _c < 0.01:
            return FeatureCorrelation.Status.TooLow
        else:
            return FeatureNormalStatus
コード例 #11
0
ファイル: model.py プロジェクト: highboo52na/Cooka
class RespPreviewDataset(Bean):
    headers = ListObjectField()
    rows = ListObjectField()
    count = IntegerField()
    file_path = StringField()
コード例 #12
0
ファイル: model.py プロジェクト: highboo52na/Cooka
class FeatureMode(Bean):
    value = StringField()
    count = IntegerField()
    percentage = FloatField()
コード例 #13
0
ファイル: model.py プロジェクト: highboo52na/Cooka
class Model(Bean):
    name = StringField()
    framework = StringField()
    dataset_name = StringField()
    model_file_size = IntegerField()
    no_experiment = IntegerField()
    inputs = ListBeanField(ModelFeature)
    task_type = StringField()
    performance = BeanField(Performance)
    model_path = StringField()
    status = StringField()
    pid = IntegerField()
    score = FloatField()
    progress = StringField()
    train_job_name = StringField()
    train_trail_no = IntegerField()
    trails = ListBeanField(TrainTrail)
    extension = DictField()
    create_datetime = DatetimeField()
    finish_datetime = DatetimeField()
    last_update_datetime = DatetimeField()

    def escaped_time(self):
        if self.status in [ModelStatusType.Succeed, ModelStatusType.Failed]:
            if self.finish_datetime is None:
                raise Exception(
                    "Internal error, train finished but has no finish_datetime. "
                )
            escaped = util.datetime_diff_human_format_by_minute(
                self.finish_datetime, self.create_datetime)
        else:
            escaped = util.datetime_diff_human_format_by_minute(
                util.get_now_datetime(), self.create_datetime)
        return escaped

    def escaped_time_by_seconds(self):
        if self.status in [ModelStatusType.Succeed, ModelStatusType.Failed]:
            if self.finish_datetime is None:
                raise Exception(
                    f"Internal error, model name = {self.name} train finished but has no finish_datetime. "
                )
            escaped = util.datetime_diff(self.finish_datetime,
                                         self.create_datetime)
        else:
            escaped = util.datetime_diff(util.get_now_datetime(),
                                         self.create_datetime)
        return escaped

    def default_metric(self):
        m = \
            {
                'multi_classification': "logloss",
                'regression': "mae",
                'binary_classification': "auc"
            }
        return m[self.task_type]

    def log_file_path(self):
        # exits begin from train start
        return util.relative_path(P.join(str(self.model_path), 'train.log'))

    def train_source_code_path(self):
        # exits begin from train start
        return util.relative_path(P.join(str(self.model_path), 'train.py'))

    def train_notebook_uri(self):
        # exits begin from train start
        train_notebook_path = P.join(str(self.model_path), 'train.ipynb')
        return util.relative_path(train_notebook_path)
コード例 #14
0
ファイル: model.py プロジェクト: highboo52na/Cooka
class ModelFeature(Bean):
    name = StringField()
    type = StringField()
    data_type = StringField()
コード例 #15
0
ファイル: model.py プロジェクト: zerocurve/Cooka
class TrainTrial(Bean):
    trial_no = IntegerField()
    status = StringField()
    extension = DictField()