Beispiel #1
0
 def experiment(self):
     self.root.setup()  # Loading configuration (if exists)
     db_url = self.root.db_url
     config = self._load_config()
     db_engine = create_engine(db_url)
     common_kwargs = {
         "db_engine": db_engine,
         "project_path": self.args.project_path,
         "config": config,
         "replace": self.args.replace,
         "materialize_subquery_fromobjs": self.args.materialize_fromobjs,
         "features_ignore_cohort": self.args.features_ignore_cohort,
         "matrix_storage_class": self.matrix_storage_map[self.args.matrix_format],
         "profile": self.args.profile,
         "save_predictions": self.args.save_predictions,
         "skip_validation": not self.args.validate
     }
     if self.args.n_db_processes > 1 or self.args.n_processes > 1:
         experiment = MultiCoreExperiment(
             n_db_processes=self.args.n_db_processes,
             n_processes=self.args.n_processes,
             **common_kwargs,
         )
     else:
         experiment = SingleThreadedExperiment(**common_kwargs)
     return experiment
Beispiel #2
0
 def runner(self):
     self.root.setup()  # Loading configuration (if exists)
     db_url = self.root.db_url
     dir_plot = self.args.directory
     config = yaml.load(self.args.config)
     db_engine = create_engine(db_url)
     return AuditionRunner(config, db_engine, dir_plot)
Beispiel #3
0
 def experiment(self):
     self.root.setup()  # Loading configuration (if exists)
     db_url = self.root.db_url
     config = yaml.load(self.args.config)
     db_engine = create_engine(db_url)
     common_kwargs = {
         "db_engine":
         db_engine,
         "project_path":
         self.args.project_path,
         "config":
         config,
         "replace":
         self.args.replace,
         "matrix_storage_class":
         self.matrix_storage_map[self.args.matrix_format],
     }
     if self.args.n_db_processes > 1 or self.args.n_processes > 1:
         experiment = MultiCoreExperiment(
             n_db_processes=self.args.n_db_processes,
             n_processes=self.args.n_processes,
             **common_kwargs,
         )
     else:
         experiment = SingleThreadedExperiment(**common_kwargs)
     return experiment
Beispiel #4
0
 def __call__(self, args):
     db_engine = create_engine(self.root.db_url)
     retrainer = Retrainer(
         db_engine,
         args.project_path,
         args.model_group_id,
     )
     retrainer.retrain(args.prediction_date)
     retrainer.predict(args.prediction_date)
Beispiel #5
0
    def __call__(self, args):
        db_engine = create_engine(self.root.db_url)
        feature_config = yaml.load(args.feature_config_file)

        FeatureGenerator(db_engine, 'features_test').create_features_before_imputation(
            feature_aggregation_config=feature_config,
            feature_dates=[args.as_of_date]
        )
        logging.info('Features created for feature_config %s and date %s', feature_config, args.as_of_date)
Beispiel #6
0
    def __call__(self):
        db_engine = create_engine(self.root.db_url)
        config = yaml.full_load(self.args.configfile)

        add_predictions(
            db_engine=db_engine,
            model_groups=config["model_group_ids"],
            project_path=config["project_path"],
            experiment_hashes=config.get("experiments"),
            train_end_times_range=config.get("train_end_times"),
        )
Beispiel #7
0
    def __init__(
        self,
        config,
        db_engine,
        model_storage_class=FSModelStorageEngine,
        project_path=None,
        replace=True,
        cleanup=False,
        cleanup_timeout=None,
    ):
        self._check_config_version(config)
        self.config = config

        if isinstance(db_engine, Engine):
            logging.warning(
                'Raw, unserializable SQLAlchemy engine passed. URL will be used, other options may be lost in multi-process environments'
            )
            self.db_engine = create_engine(db_engine.url)
        else:
            self.db_engine = db_engine

        if model_storage_class:
            self.model_storage_engine = model_storage_class(
                project_path=project_path)
        self.matrix_store_class = CSVMatrixStore  # can't be configurable until Architect obeys
        self.project_path = project_path
        self.replace = replace
        ensure_db(self.db_engine)

        self.features_schema_name = 'features'
        if project_path:
            self.matrices_directory = os.path.join(self.project_path,
                                                   'matrices')
            if not os.path.exists(self.matrices_directory):
                os.makedirs(self.matrices_directory)

        self.experiment_hash = save_experiment_and_get_hash(
            self.config, self.db_engine)
        self.labels_table_name = 'labels_{}'.format(self.experiment_hash)
        self.initialize_components()

        self.cleanup = cleanup
        if self.cleanup:
            logging.info(
                'cleanup is set to True, so intermediate tables (labels and states) will be removed after matrix creation'
            )
        else:
            logging.info(
                'cleanup is set to False, so intermediate tables (labels and states) will not be removed after matrix creation'
            )
        self.cleanup_timeout = (self.cleanup_timeout if cleanup_timeout is None
                                else cleanup_timeout)
Beispiel #8
0
    def __call__(self, args):
        self.root.setup()  # Loading configuration (if exists)
        db_engine = create_engine(self.root.db_url)
        feature_config = yaml.load(args.feature_config_file)

        FeatureGenerator(db_engine, "features_test").create_features_before_imputation(
            feature_aggregation_config=feature_config, feature_dates=[args.as_of_date]
        )
        logging.info(
            "Features created for feature_config %s and date %s",
            feature_config,
            args.as_of_date,
        )
Beispiel #9
0
 def experiment(self):
     self.root.setup()  # Loading configuration (if exists)
     db_url = self.root.db_url
     config = self._load_config()
     db_engine = create_engine(db_url)
     common_kwargs = {
         "db_engine":
         db_engine,
         "project_path":
         self.args.project_path,
         "config":
         config,
         "replace":
         self.args.replace,
         "materialize_subquery_fromobjs":
         self.args.materialize_fromobjs,
         "features_ignore_cohort":
         self.args.features_ignore_cohort,
         "matrix_storage_class":
         self.matrix_storage_map[self.args.matrix_format],
         "profile":
         self.args.profile,
         "save_predictions":
         self.args.save_predictions,
         "skip_validation":
         not self.args.validate
     }
     logger.info(f"Setting up the experiment")
     logger.info(f"Configuration file: {self.args.config}")
     logger.info(f"Results will be stored in DB: {self.root.db_url}")
     logger.info(f"Artifacts will be saved in {self.args.project_path}")
     try:
         if self.args.n_db_processes > 1 or self.args.n_processes > 1:
             experiment = MultiCoreExperiment(
                 n_db_processes=self.args.n_db_processes,
                 n_processes=self.args.n_processes,
                 **common_kwargs,
             )
             logger.info(
                 f"Experiment will run in multi core  mode using {self.args.n_processes} processes and {self.args.n_db_processes} db processes"
             )
         else:
             experiment = SingleThreadedExperiment(**common_kwargs)
             logger.info("Experiment will run in serial fashion")
         return experiment
     except Exception:
         logger.exception("Error occurred while creating the experiment!")
         logger.info(
             f"Experiment [config file: {self.args.config}] failed at creation"
         )
Beispiel #10
0
    def __init__(
        self,
        config,
        db_engine,
        project_path=None,
        matrix_storage_class=CSVMatrixStore,
        replace=True,
        cleanup=False,
        cleanup_timeout=None,
    ):
        self._check_config_version(config)
        self.config = config

        if isinstance(db_engine, Engine):
            logging.warning(
                "Raw, unserializable SQLAlchemy engine passed. "
                "URL will be used, other options may be lost in multi-process environments"
            )
            self.db_engine = create_engine(db_engine.url)
        else:
            self.db_engine = db_engine

        self.project_storage = ProjectStorage(project_path)
        self.model_storage_engine = ModelStorageEngine(self.project_storage)
        self.matrix_storage_engine = MatrixStorageEngine(
            self.project_storage, matrix_storage_class)
        self.project_path = project_path
        self.replace = replace
        upgrade_db(db_engine=self.db_engine)

        self.features_schema_name = "features"
        self.experiment_hash = save_experiment_and_get_hash(
            self.config, self.db_engine)
        self.labels_table_name = "labels_{}".format(self.experiment_hash)
        self.initialize_components()

        self.cleanup = cleanup
        if self.cleanup:
            logging.info(
                "cleanup is set to True, so intermediate tables (labels and states) "
                "will be removed after matrix creation")
        else:
            logging.info(
                "cleanup is set to False, so intermediate tables (labels and states) "
                "will not be removed after matrix creation")
        self.cleanup_timeout = (self.cleanup_timeout if cleanup_timeout is None
                                else cleanup_timeout)
Beispiel #11
0
 def experiment(self):
     db_url = self.root.db_url
     config = yaml.load(self.args.config)
     db_engine = create_engine(db_url)
     common_kwargs = {
         'db_engine': db_engine,
         'project_path': self.args.project_path,
         'config': config,
         'replace': self.args.replace,
     }
     if self.args.n_db_processes > 1 or self.args.n_processes > 1:
         experiment = MultiCoreExperiment(
             n_db_processes=self.args.n_db_processes,
             n_processes=self.args.n_processes,
             **common_kwargs
         )
     else:
         experiment = SingleThreadedExperiment(**common_kwargs)
     return experiment
Beispiel #12
0
    def __call__(self, args):
        self.root.setup()  # Loading configuration (if exists)
        db_engine = create_engine(self.root.db_url)
        full_config = yaml.full_load(args.feature_config_file)
        feature_config = full_config['feature_aggregations']
        cohort_config = full_config.get('cohort_config', None)
        if cohort_config:
            EntityDateTableGenerator(
                entity_date_table_name="features_test.test_cohort",
                db_engine=db_engine,
                query=cohort_config["query"],
                replace=True).generate_entity_date_table(
                    as_of_dates=[args.as_of_date])

        FeatureGenerator(db_engine,
                         "features_test").create_features_before_imputation(
                             feature_aggregation_config=feature_config,
                             feature_dates=[args.as_of_date],
                             state_table="features_test.test_cohort")
        logger.success(
            f"Features created for feature_config {feature_config} and date {args.as_of_date}"
        )
Beispiel #13
0
host = os.environ['POSTGRES_HOST']
user = os.environ['POSTGRES_USER']
db = os.environ['POSTGRES_DB']
password = os.environ['POSTGRES_PASSWORD']
port = os.environ['POSTGRES_PORT']

db_url = f"postgresql://{user}:{password}@{host}:{port}/{db}"

logging.info(f"Using the database: postgresql://{user}:XXXXX@{host}:{port}/{db}")


logging.basicConfig(level=logging.INFO)

# create a db_engine 
db_url = 'your db url here'
db_engine = create_engine(db_url)

feature_config = [{
    'prefix': 'aprefix',
    'aggregates': [
        {
        'quantity': 'quantity_one',
        'metrics': ['sum', 'count'],
    ],
    'categoricals': [
        {
            'column': 'cat_one',
            'choices': ['good', 'bad'],
            'metrics': ['sum']
        },
    ],
Beispiel #14
0
 def __call__(self, args):
     db_engine = create_engine(self.root.db_url)
     config_store = Store.factory(args.config)
     with config_store.open() as fd:
         config = CrosstabsConfigLoader(config=yaml.full_load(fd))
     run_crosstabs(db_engine, config)
Beispiel #15
0
 def __call__(self, args):
     db_engine = create_engine(self.root.db_url)
     predict_forward_with_existed_model(
         db_engine, args.project_path, args.model_id, args.as_of_date
     )
Beispiel #16
0
 def runner(self):
     db_url = self.root.db_url
     dir_plot = self.args.directory
     config = yaml.load(self.args.config)
     db_engine = create_engine(db_url)
     return AuditionRunner(config, db_engine, dir_plot)