def init_db(options): from tkp.config import initialize_pipeline_config, get_database_config cfgfile = os.path.join(os.getcwd(), "pipeline.cfg") if os.path.exists(cfgfile): pipe_config = initialize_pipeline_config(cfgfile, "notset") dbconfig = get_database_config(pipe_config['database'], apply=False) else: dbconfig = get_database_config(None, apply=False) if 'engine' not in dbconfig or not dbconfig['engine']: dbconfig['engine'] = 'postgresql' if 'port' not in dbconfig or not dbconfig['port']: if dbconfig['engine'] == 'monetdb': dbconfig['port'] = 50000 else: dbconfig['port'] = 5432 if 'database' not in dbconfig or not dbconfig['database']: dbconfig['database'] = getpass.getuser() if 'user' not in dbconfig or not dbconfig['user']: dbconfig['user'] = dbconfig['database'] if 'password' not in dbconfig or not dbconfig['password']: dbconfig['password'] = dbconfig['user'] if 'host' not in dbconfig or not dbconfig['host']: dbconfig['host'] = 'localhost' dbconfig['yes'] = options.yes dbconfig['destroy'] = options.destroy populate(dbconfig)
def get_db_config(): cfgfile = os.path.join(os.getcwd(), "pipeline.cfg") if os.path.exists(cfgfile): pipe_config = initialize_pipeline_config(cfgfile, "notset") dbconfig = get_database_config(pipe_config['database'], apply=False) else: dbconfig = get_database_config(None, apply=False) return dbconfig
def init_db(options): from tkp.config import initialize_pipeline_config, get_database_config cfgfile = os.path.join(os.getcwd(), "pipeline.cfg") if os.path.exists(cfgfile): pipe_config = initialize_pipeline_config(cfgfile, "notset") dbconfig = get_database_config(pipe_config['database'], apply=False) else: dbconfig = get_database_config(None, apply=False) dbconfig['yes'] = options.yes dbconfig['destroy'] = options.destroy populate(dbconfig)
def setup(pipe_config, supplied_mon_coords=None): """ Initialises the pipeline run. """ if not supplied_mon_coords: supplied_mon_coords = [] # Setup logfile before we do anything else log_dir = pipe_config.logging.log_dir setup_logging(log_dir, debug=pipe_config.logging.debug, use_colorlog=pipe_config.logging.colorlog) job_dir = pipe_config.DEFAULT.job_directory if not os.access(job_dir, os.X_OK): msg = "can't access job folder %s" % job_dir logger.error(msg) raise IOError(msg) logger.info("Job dir: %s", job_dir) db_config = get_database_config(pipe_config.database, apply=True) dump_database_backup(db_config, job_dir) job_config = load_job_config(pipe_config) dump_configs_to_logdir(log_dir, job_config, pipe_config) sync_rejectreasons(tkp.db.Database().Session()) job_config, dataset_id = initialise_dataset(job_config, supplied_mon_coords) return job_dir, job_config, dataset_id
def test_use_username_as_default(self): # database name and password default to the username os.environ["TKP_DBUSER"] = DUMMY_VALUE os.environ["TKP_DBENGINE"] = "monetdb" os.environ["TKP_DBHOST"] = DUMMY_VALUE os.environ["TKP_DBPORT"] = DUMMY_INT db_config = get_database_config(self.pipeline_cfg['database']) self._test_for_dummy_values(db_config)
def test_env_vars(self): # Demonstrate that we correctly read the environment os.environ["TKP_DBENGINE"] = "monetdb" os.environ["TKP_DBNAME"] = DUMMY_VALUE os.environ["TKP_DBUSER"] = DUMMY_VALUE os.environ["TKP_DBPASSWORD"] = DUMMY_VALUE os.environ["TKP_DBHOST"] = DUMMY_VALUE os.environ["TKP_DBPORT"] = DUMMY_INT db_config = get_database_config(self.pipeline_cfg['database']) self._test_for_dummy_values(db_config)
def test_defaults_monetdb(self): # Demonstrate that we get the expected default values os.environ["TKP_DBENGINE"] = "monetdb" username = getpass.getuser() db_config = get_database_config() self.assertEqual(db_config['engine'], "monetdb") self.assertEqual(db_config['database'], username) self.assertEqual(db_config['user'], "monetdb") self.assertEqual(db_config['password'], "monetdb") self.assertEqual(db_config['host'], "localhost") self.assertEqual(db_config['port'], 50000)
def test_defaults_monetdb(self): # Demonstrate that we get the expected default values os.environ["TKP_DBENGINE"] = "monetdb" username = getpass.getuser() db_config = get_database_config() self.assertEqual(db_config['engine'], "monetdb") self.assertEqual(db_config['database'], username) self.assertEqual(db_config['user'], username) self.assertEqual(db_config['password'], username) self.assertEqual(db_config['host'], "localhost") self.assertEqual(db_config['port'], 50000)
def test_database_dump_monet(self): dbconfig = get_database_config() with NamedTemporaryFile() as dumpfile: dump_monetdb(dbconfig['host'], dbconfig['port'], dbconfig['database'], dbconfig['user'], dbconfig['password'], dumpfile.name) # Output should start with "START TRANSACTION;" and end with # "COMMIT;" dumpfile.seek(0) self.assertEqual(dumpfile.readline().strip(), "START TRANSACTION;") dumpfile.seek(-8, os.SEEK_END) self.assertEqual(dumpfile.readline().strip(), "COMMIT;")
def test_database_dump_monet(self): dbconfig = get_database_config() with NamedTemporaryFile() as dumpfile: dump_monetdb( dbconfig['host'], dbconfig['port'], dbconfig['database'], dbconfig['user'], dbconfig['password'], dumpfile.name ) # Output should start with "START TRANSACTION;" and end with # "COMMIT;" dumpfile.seek(0) self.assertEqual(dumpfile.readline().strip(), "START TRANSACTION;") dumpfile.seek(-8, os.SEEK_END) self.assertEqual(dumpfile.readline().strip(), "COMMIT;")
class TestDump(unittest.TestCase): @requires_database() @unittest.skipUnless(get_database_config()['engine'] == "postgresql", "Postgres disabled") def test_database_dump_pg(self): dbconfig = get_database_config() with NamedTemporaryFile() as dumpfile: dump_pg(dbconfig['host'], dbconfig['port'], dbconfig['database'], dbconfig['user'], dbconfig['password'], dumpfile.name) # Output should start with "--\n-- PostgreSQL database dump\n": dumpfile.seek(0) self.assertEqual(dumpfile.readline().strip(), "--") self.assertEqual(dumpfile.readline().strip(), "-- PostgreSQL database dump") # And end with "-- PostgreSQL database dump complete\n--\n\n" dumpfile.seek(-41, os.SEEK_END) self.assertEqual(dumpfile.read().strip(), "-- PostgreSQL database dump complete\n--") @requires_database() @unittest.skipUnless(get_database_config()['engine'] == "monetdb", "Monet disabled") def test_database_dump_monet(self): dbconfig = get_database_config() with NamedTemporaryFile() as dumpfile: dump_monetdb(dbconfig['host'], dbconfig['port'], dbconfig['database'], dbconfig['user'], dbconfig['password'], dumpfile.name) # Output should start with "START TRANSACTION;" and end with # "COMMIT;" dumpfile.seek(0) self.assertEqual(dumpfile.readline().strip(), "START TRANSACTION;") dumpfile.seek(-8, os.SEEK_END) self.assertEqual(dumpfile.readline().strip(), "COMMIT;") def test_database_dump_unknown(self): self.assertRaises(NotImplementedError, dump_db, "dummy_engine", None, None, None, None, None, None)
def test_database_dump_pg(self): dbconfig = get_database_config() with NamedTemporaryFile() as dumpfile: dump_pg(dbconfig['host'], dbconfig['port'], dbconfig['database'], dbconfig['user'], dbconfig['password'], dumpfile.name) # Output should start with "--\n-- PostgreSQL database dump\n": dumpfile.seek(0) self.assertEqual(dumpfile.readline().strip(), "--") self.assertEqual(dumpfile.readline().strip(), "-- PostgreSQL database dump") # And end with "-- PostgreSQL database dump complete\n--\n\n" dumpfile.seek(-41, os.SEEK_END) self.assertEqual(dumpfile.read().strip(), "-- PostgreSQL database dump complete\n--")
def test_database_dump_pg(self): dbconfig = get_database_config() with NamedTemporaryFile() as dumpfile: dump_pg( dbconfig['host'], dbconfig['port'], dbconfig['database'], dbconfig['user'], dbconfig['password'], dumpfile.name ) # Output should start with "--\n-- PostgreSQL database dump\n": dumpfile.seek(0) self.assertEqual(dumpfile.readline().strip(), "--") self.assertEqual(dumpfile.readline().strip(), "-- PostgreSQL database dump") # And end with "-- PostgreSQL database dump complete\n--\n\n" dumpfile.seek(-41, os.SEEK_END) self.assertEqual(dumpfile.read().strip(), "-- PostgreSQL database dump complete\n--")
def populate(dbconfig): """ Populates a database with TRAP tables. args: dbconfig: a dict containing db connection settings raises an exception when one of the tables already exists. """ if not dbconfig['yes']: verify(dbconfig) # configure the database before we do anyting else get_database_config(dbconfig, apply=True) database = tkp.db.database.Database() database.connect(check=False) if dbconfig['destroy']: destroy(dbconfig) if dbconfig['engine'] == 'postgresql': # make sure plpgsql is enabled try: database.session.execute("CREATE LANGUAGE plpgsql;") except ProgrammingError: database.session.rollback() if dbconfig['engine'] == 'monetdb': set_monetdb_schema(database.session, dbconfig) # reconnect to switch to schema database.session.commit() database.reconnect() batch_file = os.path.join(sql_repo, 'batch') error = "\nproblem processing \"%s\".\nMaybe the DB is already populated. "\ "Try -d/--destroy argument for initdb cmd.\n\n" tkp.db.model.Base.metadata.create_all(database.alchemy_engine) version = tkp.db.model.Version(name='revision', value=tkp.db.model.SCHEMA_VERSION) database.session.add(version) tkp.db.quality.sync_rejectreasons(database.session) for line in [l.strip() for l in open(batch_file) if not l.startswith("#")]: if not line: # skip empty lines continue print "processing %s" % line sql_file = os.path.join(sql_repo, line) with open(sql_file) as sql_handler: sql = sql_handler.read() dialected = dialectise(sql, dbconfig['engine']).strip() if not dialected: # empty query, can happen continue try: database.session.execute(dialected) except Exception as e: sys.stderr.write(error % sql_file) raise database.session.commit() database.close()
def run(job_name, mon_coords, local=False): setup_event_listening(celery_app) pipe_config = initialize_pipeline_config( os.path.join(os.getcwd(), "pipeline.cfg"), job_name) debug = pipe_config.logging.debug #Setup logfile before we do anything else log_dir = pipe_config.logging.log_dir setup_log_file(log_dir, debug) job_dir = pipe_config.DEFAULT.job_directory if not os.access(job_dir, os.X_OK): msg = "can't access job folder %s" % job_dir logger.error(msg) raise IOError(msg) logger.info("Job dir: %s", job_dir) db_config = get_database_config(pipe_config.database, apply=True) dump_database_backup(db_config, job_dir) job_config = load_job_config(pipe_config) se_parset = job_config.source_extraction deruiter_radius = job_config.association.deruiter_radius all_images = imp.load_source('images_to_process', os.path.join(job_dir, 'images_to_process.py')).images logger.info("dataset %s contains %s images" % (job_name, len(all_images))) logger.info("performing database consistency check") if not dbconsistency.check(): logger.error("Inconsistent database found; aborting") return 1 dataset_id = create_dataset(job_config.persistence.dataset_id, job_config.persistence.description) if job_config.persistence.dataset_id == -1: store_config(job_config, dataset_id) # new data set else: job_config_from_db = fetch_config(dataset_id) # existing data set if check_job_configs_match(job_config, job_config_from_db): logger.debug("Job configs from file / database match OK.") else: logger.warn("Job config file has changed since dataset was " "first loaded into database. ") logger.warn("Using job config settings loaded from database, see " "log dir for details") job_config = job_config_from_db dump_configs_to_logdir(log_dir, job_config, pipe_config) logger.info("performing persistence step") image_cache_params = pipe_config.image_cache imgs = [[img] for img in all_images] metadatas = runner(tasks.persistence_node_step, imgs, [image_cache_params], local) metadatas = [m[0] for m in metadatas] logger.info("Storing images") image_ids = store_images(metadatas, job_config.source_extraction.extraction_radius_pix, dataset_id) db_images = [Image(id=image_id) for image_id in image_ids] logger.info("performing quality check") urls = [img.url for img in db_images] arguments = [job_config] rejecteds = runner(tasks.quality_reject_check, urls, arguments, local) good_images = [] for image, rejected in zip(db_images, rejecteds): if rejected: reason, comment = rejected steps.quality.reject_image(image.id, reason, comment) else: good_images.append(image) if not good_images: logger.warn("No good images under these quality checking criteria") return grouped_images = group_per_timestep(good_images) timestep_num = len(grouped_images) for n, (timestep, images) in enumerate(grouped_images): msg = "processing %s images in timestep %s (%s/%s)" logger.info(msg % (len(images), timestep, n+1, timestep_num)) logger.info("performing source extraction") urls = [img.url for img in images] arguments = [se_parset] extract_sources = runner(tasks.extract_sources, urls, arguments, local) logger.info("storing extracted to database") for image, sources in zip(images, extract_sources): dbgen.insert_extracted_sources(image.id, sources, 'blind') logger.info("performing database operations") for image in images: logger.info("performing DB operations for image %s" % image.id) logger.info("performing source association") dbass.associate_extracted_sources(image.id, deRuiter_r=deruiter_radius) logger.info("performing null detections") null_detections = dbnd.get_nulldetections(image.id) logger.info("Found %s null detections" % len(null_detections)) # Only if we found null_detections the next steps are necessary if len(null_detections) > 0: logger.info("performing forced fits") ff_nd = forced_fits(image.url, null_detections, se_parset) dbgen.insert_extracted_sources(image.id, ff_nd, 'ff_nd') logger.info("adding null detections") dbnd.associate_nd(image.id) if len(mon_coords) > 0: logger.info("performing monitoringlist") ff_ms = forced_fits(image.url, mon_coords, se_parset) dbgen.insert_extracted_sources(image.id, ff_ms, 'ff_ms') logger.info("adding monitoring sources") dbmon.associate_ms(image.id) transients = search_transients(image.id, job_config['transient_search']) dbgen.update_dataset_process_end_ts(dataset_id)
def test_invalid_dbengine(self): # Should *not* raise; database_config does not sanity check. os.environ["TKP_DBENGINE"] = DUMMY_VALUE get_database_config()
def test_unconfigured(self): # Should *not* raise. get_database_config()
def run(job_name, supplied_mon_coords=[]): pipe_config = initialize_pipeline_config( os.path.join(os.getcwd(), "pipeline.cfg"), job_name) # get parallelise props. Defaults to multiproc with autodetect num cores parallelise = pipe_config.get('parallelise', {}) distributor = os.environ.get('TKP_PARALLELISE', parallelise.get('method', 'multiproc')) runner = Runner(distributor=distributor, cores=parallelise.get('cores', 0)) debug = pipe_config.logging.debug #Setup logfile before we do anything else log_dir = pipe_config.logging.log_dir setup_log_file(log_dir, debug) job_dir = pipe_config.DEFAULT.job_directory if not os.access(job_dir, os.X_OK): msg = "can't access job folder %s" % job_dir logger.error(msg) raise IOError(msg) logger.info("Job dir: %s", job_dir) db_config = get_database_config(pipe_config.database, apply=True) dump_database_backup(db_config, job_dir) job_config = load_job_config(pipe_config) se_parset = job_config.source_extraction deruiter_radius = job_config.association.deruiter_radius beamwidths_limit = job_config.association.beamwidths_limit new_src_sigma = job_config.transient_search.new_source_sigma_margin all_images = imp.load_source('images_to_process', os.path.join(job_dir, 'images_to_process.py')).images logger.info("dataset %s contains %s images" % (job_name, len(all_images))) logger.info("performing database consistency check") if not dbconsistency.check(): logger.error("Inconsistent database found; aborting") return 1 dataset_id = create_dataset(job_config.persistence.dataset_id, job_config.persistence.description) if job_config.persistence.dataset_id == -1: store_config(job_config, dataset_id) # new data set if supplied_mon_coords: dbgen.insert_monitor_positions(dataset_id,supplied_mon_coords) else: job_config_from_db = fetch_config(dataset_id) # existing data set if check_job_configs_match(job_config, job_config_from_db): logger.debug("Job configs from file / database match OK.") else: logger.warn("Job config file has changed since dataset was " "first loaded into database. ") logger.warn("Using job config settings loaded from database, see " "log dir for details") job_config = job_config_from_db if supplied_mon_coords: logger.warn("Monitor positions supplied will be ignored. " "(Previous dataset specified)") dump_configs_to_logdir(log_dir, job_config, pipe_config) logger.info("performing persistence step") image_cache_params = pipe_config.image_cache imgs = [[img] for img in all_images] rms_est_sigma = job_config.persistence.rms_est_sigma rms_est_fraction = job_config.persistence.rms_est_fraction metadatas = runner.map("persistence_node_step", imgs, [image_cache_params, rms_est_sigma, rms_est_fraction]) metadatas = [m[0] for m in metadatas if m] logger.info("Storing images") image_ids = store_images(metadatas, job_config.source_extraction.extraction_radius_pix, dataset_id) db_images = [Image(id=image_id) for image_id in image_ids] logger.info("performing quality check") urls = [img.url for img in db_images] arguments = [job_config] rejecteds = runner.map("quality_reject_check", urls, arguments) good_images = [] for image, rejected in zip(db_images, rejecteds): if rejected: reason, comment = rejected steps.quality.reject_image(image.id, reason, comment) else: good_images.append(image) if not good_images: logger.warn("No good images under these quality checking criteria") return grouped_images = group_per_timestep(good_images) timestep_num = len(grouped_images) for n, (timestep, images) in enumerate(grouped_images): msg = "processing %s images in timestep %s (%s/%s)" logger.info(msg % (len(images), timestep, n+1, timestep_num)) logger.info("performing source extraction") urls = [img.url for img in images] arguments = [se_parset] extraction_results = runner.map("extract_sources", urls, arguments) logger.info("storing extracted sources to database") # we also set the image max,min RMS values which calculated during # source extraction for image, results in zip(images, extraction_results): image.update(rms_min=results.rms_min, rms_max=results.rms_max, detection_thresh=se_parset['detection_threshold'], analysis_thresh=se_parset['analysis_threshold']) dbgen.insert_extracted_sources(image.id, results.sources, 'blind') logger.info("performing database operations") for image in images: logger.info("performing DB operations for image %s" % image.id) logger.info("performing source association") dbass.associate_extracted_sources(image.id, deRuiter_r=deruiter_radius, new_source_sigma_margin=new_src_sigma) expiration = job_config.source_extraction.expiration all_fit_posns, all_fit_ids = steps_ff.get_forced_fit_requests(image, expiration) if all_fit_posns: successful_fits, successful_ids = steps_ff.perform_forced_fits( all_fit_posns, all_fit_ids, image.url, se_parset) steps_ff.insert_and_associate_forced_fits(image.id,successful_fits, successful_ids) dbgen.update_dataset_process_end_ts(dataset_id) logger.info("calculating variability metrics") execute_store_varmetric(dataset_id)
def run(job_name, supplied_mon_coords=[]): pipe_config = initialize_pipeline_config( os.path.join(os.getcwd(), "pipeline.cfg"), job_name) # get parallelise props. Defaults to multiproc with autodetect num cores parallelise = pipe_config.get('parallelise', {}) distributor = os.environ.get('TKP_PARALLELISE', parallelise.get('method', 'multiproc')) runner = Runner(distributor=distributor, cores=parallelise.get('cores', 0)) debug = pipe_config.logging.debug #Setup logfile before we do anything else log_dir = pipe_config.logging.log_dir setup_log_file(log_dir, debug) job_dir = pipe_config.DEFAULT.job_directory if not os.access(job_dir, os.X_OK): msg = "can't access job folder %s" % job_dir logger.error(msg) raise IOError(msg) logger.info("Job dir: %s", job_dir) db_config = get_database_config(pipe_config.database, apply=True) dump_database_backup(db_config, job_dir) job_config = load_job_config(pipe_config) se_parset = job_config.source_extraction deruiter_radius = job_config.association.deruiter_radius beamwidths_limit = job_config.association.beamwidths_limit new_src_sigma = job_config.transient_search.new_source_sigma_margin all_images = imp.load_source('images_to_process', os.path.join(job_dir, 'images_to_process.py')).images logger.info("dataset %s contains %s images" % (job_name, len(all_images))) logger.info("performing database consistency check") if not dbconsistency.check(): logger.error("Inconsistent database found; aborting") return 1 dataset_id = create_dataset(job_config.persistence.dataset_id, job_config.persistence.description) if job_config.persistence.dataset_id == -1: store_config(job_config, dataset_id) # new data set if supplied_mon_coords: dbgen.insert_monitor_positions(dataset_id,supplied_mon_coords) else: job_config_from_db = fetch_config(dataset_id) # existing data set if check_job_configs_match(job_config, job_config_from_db): logger.debug("Job configs from file / database match OK.") else: logger.warn("Job config file has changed since dataset was " "first loaded into database. ") logger.warn("Using job config settings loaded from database, see " "log dir for details") job_config = job_config_from_db if supplied_mon_coords: logger.warn("Monitor positions supplied will be ignored. " "(Previous dataset specified)") dump_configs_to_logdir(log_dir, job_config, pipe_config) logger.info("performing persistence step") image_cache_params = pipe_config.image_cache imgs = [[img] for img in all_images] rms_est_sigma = job_config.persistence.rms_est_sigma rms_est_fraction = job_config.persistence.rms_est_fraction metadatas = runner.map("persistence_node_step", imgs, [image_cache_params, rms_est_sigma, rms_est_fraction]) metadatas = [m[0] for m in metadatas if m] logger.info("Storing images") image_ids = store_images(metadatas, job_config.source_extraction.extraction_radius_pix, dataset_id) db_images = [Image(id=image_id) for image_id in image_ids] logger.info("performing quality check") urls = [img.url for img in db_images] arguments = [job_config] rejecteds = runner.map("quality_reject_check", urls, arguments) good_images = [] for image, rejected in zip(db_images, rejecteds): if rejected: reason, comment = rejected steps.quality.reject_image(image.id, reason, comment) else: good_images.append(image) if not good_images: logger.warn("No good images under these quality checking criteria") return grouped_images = group_per_timestep(good_images) timestep_num = len(grouped_images) for n, (timestep, images) in enumerate(grouped_images): msg = "processing %s images in timestep %s (%s/%s)" logger.info(msg % (len(images), timestep, n+1, timestep_num)) logger.info("performing source extraction") urls = [img.url for img in images] arguments = [se_parset] extraction_results = runner.map("extract_sources", urls, arguments) logger.info("storing extracted sources to database") # we also set the image max,min RMS values which calculated during # source extraction for image, results in zip(images, extraction_results): image.update(rms_min=results.rms_min, rms_max=results.rms_max, detection_thresh=se_parset['detection_threshold'], analysis_thresh=se_parset['analysis_threshold']) dbgen.insert_extracted_sources(image.id, results.sources, 'blind') logger.info("performing database operations") for image in images: logger.info("performing DB operations for image %s" % image.id) logger.info("performing source association") dbass.associate_extracted_sources(image.id, deRuiter_r=deruiter_radius, new_source_sigma_margin=new_src_sigma) all_fit_posns, all_fit_ids = steps_ff.get_forced_fit_requests(image) if all_fit_posns: successful_fits, successful_ids = steps_ff.perform_forced_fits( all_fit_posns, all_fit_ids, image.url, se_parset) steps_ff.insert_and_associate_forced_fits(image.id,successful_fits, successful_ids) dbgen.update_dataset_process_end_ts(dataset_id)