def experiment(self): self.root.setup() # Loading configuration (if exists) db_url = self.root.db_url config = self._load_config() db_engine = create_engine(db_url) common_kwargs = { "db_engine": db_engine, "project_path": self.args.project_path, "config": config, "replace": self.args.replace, "materialize_subquery_fromobjs": self.args.materialize_fromobjs, "features_ignore_cohort": self.args.features_ignore_cohort, "matrix_storage_class": self.matrix_storage_map[self.args.matrix_format], "profile": self.args.profile, "save_predictions": self.args.save_predictions, "skip_validation": not self.args.validate } if self.args.n_db_processes > 1 or self.args.n_processes > 1: experiment = MultiCoreExperiment( n_db_processes=self.args.n_db_processes, n_processes=self.args.n_processes, **common_kwargs, ) else: experiment = SingleThreadedExperiment(**common_kwargs) return experiment
def runner(self): self.root.setup() # Loading configuration (if exists) db_url = self.root.db_url dir_plot = self.args.directory config = yaml.load(self.args.config) db_engine = create_engine(db_url) return AuditionRunner(config, db_engine, dir_plot)
def experiment(self): self.root.setup() # Loading configuration (if exists) db_url = self.root.db_url config = yaml.load(self.args.config) db_engine = create_engine(db_url) common_kwargs = { "db_engine": db_engine, "project_path": self.args.project_path, "config": config, "replace": self.args.replace, "matrix_storage_class": self.matrix_storage_map[self.args.matrix_format], } if self.args.n_db_processes > 1 or self.args.n_processes > 1: experiment = MultiCoreExperiment( n_db_processes=self.args.n_db_processes, n_processes=self.args.n_processes, **common_kwargs, ) else: experiment = SingleThreadedExperiment(**common_kwargs) return experiment
def __call__(self, args): db_engine = create_engine(self.root.db_url) retrainer = Retrainer( db_engine, args.project_path, args.model_group_id, ) retrainer.retrain(args.prediction_date) retrainer.predict(args.prediction_date)
def __call__(self, args): db_engine = create_engine(self.root.db_url) feature_config = yaml.load(args.feature_config_file) FeatureGenerator(db_engine, 'features_test').create_features_before_imputation( feature_aggregation_config=feature_config, feature_dates=[args.as_of_date] ) logging.info('Features created for feature_config %s and date %s', feature_config, args.as_of_date)
def __call__(self): db_engine = create_engine(self.root.db_url) config = yaml.full_load(self.args.configfile) add_predictions( db_engine=db_engine, model_groups=config["model_group_ids"], project_path=config["project_path"], experiment_hashes=config.get("experiments"), train_end_times_range=config.get("train_end_times"), )
def __init__( self, config, db_engine, model_storage_class=FSModelStorageEngine, project_path=None, replace=True, cleanup=False, cleanup_timeout=None, ): self._check_config_version(config) self.config = config if isinstance(db_engine, Engine): logging.warning( 'Raw, unserializable SQLAlchemy engine passed. URL will be used, other options may be lost in multi-process environments' ) self.db_engine = create_engine(db_engine.url) else: self.db_engine = db_engine if model_storage_class: self.model_storage_engine = model_storage_class( project_path=project_path) self.matrix_store_class = CSVMatrixStore # can't be configurable until Architect obeys self.project_path = project_path self.replace = replace ensure_db(self.db_engine) self.features_schema_name = 'features' if project_path: self.matrices_directory = os.path.join(self.project_path, 'matrices') if not os.path.exists(self.matrices_directory): os.makedirs(self.matrices_directory) self.experiment_hash = save_experiment_and_get_hash( self.config, self.db_engine) self.labels_table_name = 'labels_{}'.format(self.experiment_hash) self.initialize_components() self.cleanup = cleanup if self.cleanup: logging.info( 'cleanup is set to True, so intermediate tables (labels and states) will be removed after matrix creation' ) else: logging.info( 'cleanup is set to False, so intermediate tables (labels and states) will not be removed after matrix creation' ) self.cleanup_timeout = (self.cleanup_timeout if cleanup_timeout is None else cleanup_timeout)
def __call__(self, args): self.root.setup() # Loading configuration (if exists) db_engine = create_engine(self.root.db_url) feature_config = yaml.load(args.feature_config_file) FeatureGenerator(db_engine, "features_test").create_features_before_imputation( feature_aggregation_config=feature_config, feature_dates=[args.as_of_date] ) logging.info( "Features created for feature_config %s and date %s", feature_config, args.as_of_date, )
def experiment(self): self.root.setup() # Loading configuration (if exists) db_url = self.root.db_url config = self._load_config() db_engine = create_engine(db_url) common_kwargs = { "db_engine": db_engine, "project_path": self.args.project_path, "config": config, "replace": self.args.replace, "materialize_subquery_fromobjs": self.args.materialize_fromobjs, "features_ignore_cohort": self.args.features_ignore_cohort, "matrix_storage_class": self.matrix_storage_map[self.args.matrix_format], "profile": self.args.profile, "save_predictions": self.args.save_predictions, "skip_validation": not self.args.validate } logger.info(f"Setting up the experiment") logger.info(f"Configuration file: {self.args.config}") logger.info(f"Results will be stored in DB: {self.root.db_url}") logger.info(f"Artifacts will be saved in {self.args.project_path}") try: if self.args.n_db_processes > 1 or self.args.n_processes > 1: experiment = MultiCoreExperiment( n_db_processes=self.args.n_db_processes, n_processes=self.args.n_processes, **common_kwargs, ) logger.info( f"Experiment will run in multi core mode using {self.args.n_processes} processes and {self.args.n_db_processes} db processes" ) else: experiment = SingleThreadedExperiment(**common_kwargs) logger.info("Experiment will run in serial fashion") return experiment except Exception: logger.exception("Error occurred while creating the experiment!") logger.info( f"Experiment [config file: {self.args.config}] failed at creation" )
def __init__( self, config, db_engine, project_path=None, matrix_storage_class=CSVMatrixStore, replace=True, cleanup=False, cleanup_timeout=None, ): self._check_config_version(config) self.config = config if isinstance(db_engine, Engine): logging.warning( "Raw, unserializable SQLAlchemy engine passed. " "URL will be used, other options may be lost in multi-process environments" ) self.db_engine = create_engine(db_engine.url) else: self.db_engine = db_engine self.project_storage = ProjectStorage(project_path) self.model_storage_engine = ModelStorageEngine(self.project_storage) self.matrix_storage_engine = MatrixStorageEngine( self.project_storage, matrix_storage_class) self.project_path = project_path self.replace = replace upgrade_db(db_engine=self.db_engine) self.features_schema_name = "features" self.experiment_hash = save_experiment_and_get_hash( self.config, self.db_engine) self.labels_table_name = "labels_{}".format(self.experiment_hash) self.initialize_components() self.cleanup = cleanup if self.cleanup: logging.info( "cleanup is set to True, so intermediate tables (labels and states) " "will be removed after matrix creation") else: logging.info( "cleanup is set to False, so intermediate tables (labels and states) " "will not be removed after matrix creation") self.cleanup_timeout = (self.cleanup_timeout if cleanup_timeout is None else cleanup_timeout)
def experiment(self): db_url = self.root.db_url config = yaml.load(self.args.config) db_engine = create_engine(db_url) common_kwargs = { 'db_engine': db_engine, 'project_path': self.args.project_path, 'config': config, 'replace': self.args.replace, } if self.args.n_db_processes > 1 or self.args.n_processes > 1: experiment = MultiCoreExperiment( n_db_processes=self.args.n_db_processes, n_processes=self.args.n_processes, **common_kwargs ) else: experiment = SingleThreadedExperiment(**common_kwargs) return experiment
def __call__(self, args): self.root.setup() # Loading configuration (if exists) db_engine = create_engine(self.root.db_url) full_config = yaml.full_load(args.feature_config_file) feature_config = full_config['feature_aggregations'] cohort_config = full_config.get('cohort_config', None) if cohort_config: EntityDateTableGenerator( entity_date_table_name="features_test.test_cohort", db_engine=db_engine, query=cohort_config["query"], replace=True).generate_entity_date_table( as_of_dates=[args.as_of_date]) FeatureGenerator(db_engine, "features_test").create_features_before_imputation( feature_aggregation_config=feature_config, feature_dates=[args.as_of_date], state_table="features_test.test_cohort") logger.success( f"Features created for feature_config {feature_config} and date {args.as_of_date}" )
host = os.environ['POSTGRES_HOST'] user = os.environ['POSTGRES_USER'] db = os.environ['POSTGRES_DB'] password = os.environ['POSTGRES_PASSWORD'] port = os.environ['POSTGRES_PORT'] db_url = f"postgresql://{user}:{password}@{host}:{port}/{db}" logging.info(f"Using the database: postgresql://{user}:XXXXX@{host}:{port}/{db}") logging.basicConfig(level=logging.INFO) # create a db_engine db_url = 'your db url here' db_engine = create_engine(db_url) feature_config = [{ 'prefix': 'aprefix', 'aggregates': [ { 'quantity': 'quantity_one', 'metrics': ['sum', 'count'], ], 'categoricals': [ { 'column': 'cat_one', 'choices': ['good', 'bad'], 'metrics': ['sum'] }, ],
def __call__(self, args): db_engine = create_engine(self.root.db_url) config_store = Store.factory(args.config) with config_store.open() as fd: config = CrosstabsConfigLoader(config=yaml.full_load(fd)) run_crosstabs(db_engine, config)
def __call__(self, args): db_engine = create_engine(self.root.db_url) predict_forward_with_existed_model( db_engine, args.project_path, args.model_id, args.as_of_date )
def runner(self): db_url = self.root.db_url dir_plot = self.args.directory config = yaml.load(self.args.config) db_engine = create_engine(db_url) return AuditionRunner(config, db_engine, dir_plot)