Beispiel #1
0
def initialise_dataset(job_config, supplied_mon_coords):
    """
    sets up a dataset in the database.

    if the dataset already exists it will return the job_config from the
    previous dataset run.

    args:
        job_config: a job configuration object
        supplied_mon_coords (list): a list of monitoring positions

    returns:
        tuple: job_config and dataset ID
    """
    dataset_id = create_dataset(job_config.persistence.dataset_id,
                                job_config.persistence.description)

    if job_config.persistence.dataset_id == -1:
        store_config(job_config, dataset_id)  # new data set
        if supplied_mon_coords:
            dbgen.insert_monitor_positions(dataset_id, supplied_mon_coords)
    else:
        job_config_from_db = fetch_config(dataset_id)  # existing data set
        if check_job_configs_match(job_config, job_config_from_db):
            logger.debug("Job configs from file / database match OK.")
        else:
            logger.warn("Job config file has changed since dataset was "
                        "first loaded into database. ")
            logger.warn("Using job config settings loaded from database, see "
                        "log dir for details")
        job_config = job_config_from_db
        if supplied_mon_coords:
            logger.warn("Monitor positions supplied will be ignored. "
                        "(Previous dataset specified)")
    return job_config, dataset_id
Beispiel #2
0
def initialise_dataset(job_config, supplied_mon_coords):
    """
    sets up a dataset in the database.

    if the dataset already exists it will return the job_config from the
    previous dataset run.

    args:
        job_config: a job configuration object
        supplied_mon_coords (tuple): a list of monitoring positions

    returns:
        tuple: job_config and dataset ID
    """
    dataset_id = create_dataset(job_config.persistence.dataset_id,
                                job_config.persistence.description)

    if job_config.persistence.dataset_id == -1:
        store_config(job_config, dataset_id)  # new data set
        if supplied_mon_coords:
            dbgen.insert_monitor_positions(dataset_id, supplied_mon_coords)
    else:
        job_config_from_db = fetch_config(dataset_id)  # existing data set
        if check_job_configs_match(job_config, job_config_from_db):
            logger.debug("Job configs from file / database match OK.")
        else:
            logger.warn("Job config file has changed since dataset was "
                        "first loaded into database. ")
            logger.warn("Using job config settings loaded from database, see "
                        "log dir for details")
        job_config = job_config_from_db
        if supplied_mon_coords:
            logger.warn("Monitor positions supplied will be ignored. "
                        "(Previous dataset specified)")
    return job_config, dataset_id
    def test_basic_case(self):
        im_params = self.im_params

        blind_src = db_subs.example_extractedsource_tuple(
            ra=im_params[0]['centre_ra'],
            dec=im_params[0]['centre_decl'],
        )
        superimposed_mon_src = blind_src
        mon_src_in_field = blind_src._replace(ra=blind_src.ra + 0.001)
        # Simulate a source that does not get fit, for good measure:
        mon_src_out_of_field = blind_src._replace(ra=blind_src.ra + 90.)

        #Sorted by increasing RA:
        mon_srcs = [
            superimposed_mon_src, mon_src_in_field, mon_src_out_of_field
        ]
        mon_posns = [(m.ra, m.dec) for m in mon_srcs]
        dbgen.insert_monitor_positions(self.dataset.id, mon_posns)

        images = []
        for img_pars in self.im_params:
            img = tkp.db.Image(dataset=self.dataset, data=img_pars)
            dbgen.insert_extracted_sources(img.id, [blind_src], 'blind')
            associate_extracted_sources(img.id, deRuiter_r=5.68)
            nd_requests = get_nulldetections(img.id)
            self.assertEqual(len(nd_requests), 0)
            mon_requests = dbmon.get_monitor_entries(self.dataset.id)
            self.assertEqual(len(mon_requests), len(mon_srcs))
            # mon requests is a list of tuples [(id,ra,decl)]
            # Ensure sorted by RA for cross-checking:
            mon_requests = sorted(mon_requests, key=lambda s: s[1])

            for idx in range(len(mon_srcs)):
                self.assertAlmostEqual(mon_requests[idx][1], mon_srcs[idx].ra)
                self.assertAlmostEqual(mon_requests[idx][2], mon_srcs[idx].dec)

            #Insert fits for the in-field sources and then associate
            dbgen.insert_extracted_sources(
                img.id, [superimposed_mon_src, mon_src_in_field],
                'ff_ms',
                ff_monitor_ids=[mon_requests[0][0], mon_requests[1][0]])
            dbmon.associate_ms(img.id)

        query = """\
        SELECT r.id
              ,r.mon_src
              ,rf.f_datapoints
          FROM runningcatalog r
              ,runningcatalog_flux rf
         WHERE r.dataset = %(dataset_id)s
           AND rf.runcat = r.id
        ORDER BY r.wm_ra
                ,r.mon_src
        """
        cursor = tkp.db.execute(query, {'dataset_id': self.dataset.id})
        runcat_flux = get_db_rows_as_dicts(cursor)

        self.assertEqual(len(runcat_flux), 3)
        # First entry (lowest RA, mon_src = False) is the regular one;
        self.assertEqual(runcat_flux[0]['mon_src'], False)
        # The higher RA source is the monitoring one
        self.assertEqual(runcat_flux[1]['mon_src'], True)
        self.assertEqual(runcat_flux[2]['mon_src'], True)

        for entry in runcat_flux:
            self.assertEqual(entry['f_datapoints'], len(self.im_params))

        #Let's verify the association types
        blind_src_assocs = get_assoc_entries(self.dataset.database,
                                             runcat_flux[0]['id'])

        superimposed_mon_src_assocs = get_assoc_entries(
            self.dataset.database, runcat_flux[1]['id'])
        offset_mon_src_assocs = get_assoc_entries(self.dataset.database,
                                                  runcat_flux[2]['id'])

        assoc_lists = [
            blind_src_assocs, superimposed_mon_src_assocs,
            offset_mon_src_assocs
        ]

        for al in assoc_lists:
            self.assertEqual(len(al), 3)

        # The individual light-curve datapoints for the "normal" source
        # It was new at first timestep
        self.assertEqual(blind_src_assocs[0]['type'], 4)
        self.assertEqual(superimposed_mon_src_assocs[0]['type'], 8)
        self.assertEqual(offset_mon_src_assocs[0]['type'], 8)

        for idx, img_pars in enumerate(self.im_params):
            if idx != 0:
                self.assertEqual(blind_src_assocs[idx]['type'], 3)
                self.assertEqual(superimposed_mon_src_assocs[idx]['type'], 9)
                self.assertEqual(offset_mon_src_assocs[idx]['type'], 9)

            #And the extraction types:
            self.assertEqual(blind_src_assocs[idx]['extract_type'], 0)
            self.assertEqual(superimposed_mon_src_assocs[idx]['extract_type'],
                             2)
            self.assertEqual(offset_mon_src_assocs[idx]['extract_type'], 2)

            #Sanity check the timestamps while we're at it
            for al in assoc_lists:
                self.assertEqual(al[idx]['taustart_ts'],
                                 img_pars['taustart_ts'])
 def test_insert(self):
     dataset1 = DataSet(data=self.description)
     monitor_positions = [(5., 5), (123, 85.)]
     dbgen.insert_monitor_positions(dataset1.id, monitor_positions)
Beispiel #5
0
def run(job_name, supplied_mon_coords=[]):
    pipe_config = initialize_pipeline_config(
        os.path.join(os.getcwd(), "pipeline.cfg"),
        job_name)

    # get parallelise props. Defaults to multiproc with autodetect num cores
    parallelise = pipe_config.get('parallelise', {})
    distributor = os.environ.get('TKP_PARALLELISE', parallelise.get('method',
                                                                    'multiproc'))
    runner = Runner(distributor=distributor,
                    cores=parallelise.get('cores', 0))

    debug = pipe_config.logging.debug
    #Setup logfile before we do anything else
    log_dir = pipe_config.logging.log_dir
    setup_log_file(log_dir, debug)

    job_dir = pipe_config.DEFAULT.job_directory
    if not os.access(job_dir, os.X_OK):
        msg = "can't access job folder %s" % job_dir
        logger.error(msg)
        raise IOError(msg)
    logger.info("Job dir: %s", job_dir)

    db_config = get_database_config(pipe_config.database, apply=True)
    dump_database_backup(db_config, job_dir)

    job_config = load_job_config(pipe_config)
    se_parset = job_config.source_extraction
    deruiter_radius = job_config.association.deruiter_radius
    beamwidths_limit = job_config.association.beamwidths_limit
    new_src_sigma = job_config.transient_search.new_source_sigma_margin

    all_images = imp.load_source('images_to_process',
                                 os.path.join(job_dir,
                                              'images_to_process.py')).images

    logger.info("dataset %s contains %s images" % (job_name, len(all_images)))

    logger.info("performing database consistency check")
    if not dbconsistency.check():
        logger.error("Inconsistent database found; aborting")
        return 1

    dataset_id = create_dataset(job_config.persistence.dataset_id,
                                job_config.persistence.description)

    if job_config.persistence.dataset_id == -1:
        store_config(job_config, dataset_id)  # new data set
        if supplied_mon_coords:
            dbgen.insert_monitor_positions(dataset_id,supplied_mon_coords)
    else:
        job_config_from_db = fetch_config(dataset_id)  # existing data set
        if check_job_configs_match(job_config, job_config_from_db):
            logger.debug("Job configs from file / database match OK.")
        else:
            logger.warn("Job config file has changed since dataset was "
                        "first loaded into database. ")
            logger.warn("Using job config settings loaded from database, see "
                        "log dir for details")
        job_config = job_config_from_db
        if supplied_mon_coords:
            logger.warn("Monitor positions supplied will be ignored. "
                        "(Previous dataset specified)")

    dump_configs_to_logdir(log_dir, job_config, pipe_config)

    logger.info("performing persistence step")
    image_cache_params = pipe_config.image_cache
    imgs = [[img] for img in all_images]

    rms_est_sigma = job_config.persistence.rms_est_sigma
    rms_est_fraction = job_config.persistence.rms_est_fraction
    metadatas = runner.map("persistence_node_step", imgs,
                           [image_cache_params, rms_est_sigma, rms_est_fraction])
    metadatas = [m[0] for m in metadatas if m]

    logger.info("Storing images")
    image_ids = store_images(metadatas,
                             job_config.source_extraction.extraction_radius_pix,
                             dataset_id)

    db_images = [Image(id=image_id) for image_id in image_ids]

    logger.info("performing quality check")
    urls = [img.url for img in db_images]
    arguments = [job_config]
    rejecteds = runner.map("quality_reject_check", urls, arguments)

    good_images = []
    for image, rejected in zip(db_images, rejecteds):
        if rejected:
            reason, comment = rejected
            steps.quality.reject_image(image.id, reason, comment)
        else:
            good_images.append(image)

    if not good_images:
        logger.warn("No good images under these quality checking criteria")
        return

    grouped_images = group_per_timestep(good_images)
    timestep_num = len(grouped_images)
    for n, (timestep, images) in enumerate(grouped_images):
        msg = "processing %s images in timestep %s (%s/%s)"
        logger.info(msg % (len(images), timestep, n+1, timestep_num))

        logger.info("performing source extraction")
        urls = [img.url for img in images]
        arguments = [se_parset]

        extraction_results = runner.map("extract_sources", urls, arguments)

        logger.info("storing extracted sources to database")
        # we also set the image max,min RMS values which calculated during
        # source extraction
        for image, results in zip(images, extraction_results):
            image.update(rms_min=results.rms_min, rms_max=results.rms_max,
                detection_thresh=se_parset['detection_threshold'],
                analysis_thresh=se_parset['analysis_threshold'])
            dbgen.insert_extracted_sources(image.id, results.sources, 'blind')

        logger.info("performing database operations")

        for image in images:
            logger.info("performing DB operations for image %s" % image.id)

            logger.info("performing source association")
            dbass.associate_extracted_sources(image.id,
                                              deRuiter_r=deruiter_radius,
                                              new_source_sigma_margin=new_src_sigma)

            all_fit_posns, all_fit_ids = steps_ff.get_forced_fit_requests(image)
            if all_fit_posns:
                successful_fits, successful_ids = steps_ff.perform_forced_fits(
                    all_fit_posns, all_fit_ids, image.url, se_parset)

                steps_ff.insert_and_associate_forced_fits(image.id,successful_fits,
                                                          successful_ids)


        dbgen.update_dataset_process_end_ts(dataset_id)
Beispiel #6
0
def run(job_name, supplied_mon_coords=[]):
    pipe_config = initialize_pipeline_config(
        os.path.join(os.getcwd(), "pipeline.cfg"),
        job_name)

    # get parallelise props. Defaults to multiproc with autodetect num cores
    parallelise = pipe_config.get('parallelise', {})
    distributor = os.environ.get('TKP_PARALLELISE', parallelise.get('method',
                                                                    'multiproc'))
    runner = Runner(distributor=distributor,
                    cores=parallelise.get('cores', 0))

    debug = pipe_config.logging.debug
    #Setup logfile before we do anything else
    log_dir = pipe_config.logging.log_dir
    setup_log_file(log_dir, debug)

    job_dir = pipe_config.DEFAULT.job_directory
    if not os.access(job_dir, os.X_OK):
        msg = "can't access job folder %s" % job_dir
        logger.error(msg)
        raise IOError(msg)
    logger.info("Job dir: %s", job_dir)

    db_config = get_database_config(pipe_config.database, apply=True)
    dump_database_backup(db_config, job_dir)

    job_config = load_job_config(pipe_config)
    se_parset = job_config.source_extraction
    deruiter_radius = job_config.association.deruiter_radius
    beamwidths_limit = job_config.association.beamwidths_limit
    new_src_sigma = job_config.transient_search.new_source_sigma_margin

    all_images = imp.load_source('images_to_process',
                                 os.path.join(job_dir,
                                              'images_to_process.py')).images

    logger.info("dataset %s contains %s images" % (job_name, len(all_images)))

    logger.info("performing database consistency check")
    if not dbconsistency.check():
        logger.error("Inconsistent database found; aborting")
        return 1

    dataset_id = create_dataset(job_config.persistence.dataset_id,
                                job_config.persistence.description)

    if job_config.persistence.dataset_id == -1:
        store_config(job_config, dataset_id)  # new data set
        if supplied_mon_coords:
            dbgen.insert_monitor_positions(dataset_id,supplied_mon_coords)
    else:
        job_config_from_db = fetch_config(dataset_id)  # existing data set
        if check_job_configs_match(job_config, job_config_from_db):
            logger.debug("Job configs from file / database match OK.")
        else:
            logger.warn("Job config file has changed since dataset was "
                        "first loaded into database. ")
            logger.warn("Using job config settings loaded from database, see "
                        "log dir for details")
        job_config = job_config_from_db
        if supplied_mon_coords:
            logger.warn("Monitor positions supplied will be ignored. "
                        "(Previous dataset specified)")

    dump_configs_to_logdir(log_dir, job_config, pipe_config)

    logger.info("performing persistence step")
    image_cache_params = pipe_config.image_cache
    imgs = [[img] for img in all_images]

    rms_est_sigma = job_config.persistence.rms_est_sigma
    rms_est_fraction = job_config.persistence.rms_est_fraction
    metadatas = runner.map("persistence_node_step", imgs,
                           [image_cache_params, rms_est_sigma, rms_est_fraction])
    metadatas = [m[0] for m in metadatas if m]

    logger.info("Storing images")
    image_ids = store_images(metadatas,
                             job_config.source_extraction.extraction_radius_pix,
                             dataset_id)

    db_images = [Image(id=image_id) for image_id in image_ids]

    logger.info("performing quality check")
    urls = [img.url for img in db_images]
    arguments = [job_config]
    rejecteds = runner.map("quality_reject_check", urls, arguments)

    good_images = []
    for image, rejected in zip(db_images, rejecteds):
        if rejected:
            reason, comment = rejected
            steps.quality.reject_image(image.id, reason, comment)
        else:
            good_images.append(image)

    if not good_images:
        logger.warn("No good images under these quality checking criteria")
        return

    grouped_images = group_per_timestep(good_images)
    timestep_num = len(grouped_images)
    for n, (timestep, images) in enumerate(grouped_images):
        msg = "processing %s images in timestep %s (%s/%s)"
        logger.info(msg % (len(images), timestep, n+1, timestep_num))

        logger.info("performing source extraction")
        urls = [img.url for img in images]
        arguments = [se_parset]

        extraction_results = runner.map("extract_sources", urls, arguments)

        logger.info("storing extracted sources to database")
        # we also set the image max,min RMS values which calculated during
        # source extraction
        for image, results in zip(images, extraction_results):
            image.update(rms_min=results.rms_min, rms_max=results.rms_max,
                detection_thresh=se_parset['detection_threshold'],
                analysis_thresh=se_parset['analysis_threshold'])
            dbgen.insert_extracted_sources(image.id, results.sources, 'blind')

        logger.info("performing database operations")

        for image in images:
            logger.info("performing DB operations for image %s" % image.id)

            logger.info("performing source association")
            dbass.associate_extracted_sources(image.id,
                                              deRuiter_r=deruiter_radius,
                                              new_source_sigma_margin=new_src_sigma)

            expiration = job_config.source_extraction.expiration
            all_fit_posns, all_fit_ids = steps_ff.get_forced_fit_requests(image,
                                                                          expiration)
            if all_fit_posns:
                successful_fits, successful_ids = steps_ff.perform_forced_fits(
                    all_fit_posns, all_fit_ids, image.url, se_parset)

                steps_ff.insert_and_associate_forced_fits(image.id,successful_fits,
                                                          successful_ids)


        dbgen.update_dataset_process_end_ts(dataset_id)

    logger.info("calculating variability metrics")
    execute_store_varmetric(dataset_id)
Beispiel #7
0
    def test_basic_case(self):
        im_params = self.im_params

        blind_src = db_subs.example_extractedsource_tuple(
                 ra=im_params[0]['centre_ra'],
                 dec=im_params[0]['centre_decl'],
             )
        superimposed_mon_src = blind_src
        mon_src_in_field = blind_src._replace(ra = blind_src.ra+0.001)
        # Simulate a source that does not get fit, for good measure:
        mon_src_out_of_field = blind_src._replace(ra = blind_src.ra+90.)

        #Sorted by increasing RA:
        mon_srcs = [superimposed_mon_src, mon_src_in_field, mon_src_out_of_field]
        mon_posns = [(m.ra, m.dec) for m in mon_srcs]
        dbgen.insert_monitor_positions(self.dataset.id,mon_posns)

        images = []
        for img_pars in self.im_params:
            img = tkp.db.Image(dataset=self.dataset, data=img_pars)
            dbgen.insert_extracted_sources(img.id, [blind_src], 'blind')
            associate_extracted_sources(img.id, deRuiter_r=5.68)
            nd_requests = get_nulldetections(img.id)
            self.assertEqual(len(nd_requests),0)
            mon_requests = dbmon.get_monitor_entries(self.dataset.id)
            self.assertEqual(len(mon_requests),len(mon_srcs))
            # mon requests is a list of tuples [(id,ra,decl)]
            # Ensure sorted by RA for cross-checking:
            mon_requests = sorted(mon_requests, key = lambda s: s[1])

            for idx in range(len(mon_srcs)):
                self.assertAlmostEqual(mon_requests[idx][1],mon_srcs[idx].ra)
                self.assertAlmostEqual(mon_requests[idx][2],mon_srcs[idx].dec)

            #Insert fits for the in-field sources and then associate
            dbgen.insert_extracted_sources(img.id,
                       [superimposed_mon_src, mon_src_in_field], 'ff_ms',
                       ff_monitor_ids=[mon_requests[0][0],
                                       mon_requests[1][0]])
            dbmon.associate_ms(img.id)

        query = """\
        SELECT r.id
              ,r.mon_src
              ,rf.f_datapoints
          FROM runningcatalog r
              ,runningcatalog_flux rf
         WHERE r.dataset = %(dataset_id)s
           AND rf.runcat = r.id
        ORDER BY r.wm_ra
                ,r.mon_src
        """
        cursor = tkp.db.execute(query, {'dataset_id': self.dataset.id})
        runcat_flux = get_db_rows_as_dicts(cursor)


        self.assertEqual(len(runcat_flux), 3)
        # First entry (lowest RA, mon_src = False) is the regular one;
        self.assertEqual(runcat_flux[0]['mon_src'], False)
        # The higher RA source is the monitoring one
        self.assertEqual(runcat_flux[1]['mon_src'], True)
        self.assertEqual(runcat_flux[2]['mon_src'], True)

        for entry in runcat_flux:
            self.assertEqual(entry['f_datapoints'], len(self.im_params))



        #Let's verify the association types
        blind_src_assocs = get_assoc_entries(self.dataset.database,
                                             runcat_flux[0]['id'])

        superimposed_mon_src_assocs = get_assoc_entries(self.dataset.database,
                                             runcat_flux[1]['id'])
        offset_mon_src_assocs = get_assoc_entries(self.dataset.database,
                                             runcat_flux[2]['id'])

        assoc_lists = [blind_src_assocs,
                       superimposed_mon_src_assocs,
                       offset_mon_src_assocs]

        for al in assoc_lists:
            self.assertEqual(len(al), 3)


        # The individual light-curve datapoints for the "normal" source
        # It was new at first timestep
        self.assertEqual(blind_src_assocs[0]['type'], 4)
        self.assertEqual(superimposed_mon_src_assocs[0]['type'], 8)
        self.assertEqual(offset_mon_src_assocs[0]['type'], 8)

        for idx, img_pars in enumerate(self.im_params):
            if idx != 0:
                self.assertEqual(blind_src_assocs[idx]['type'], 3)
                self.assertEqual(superimposed_mon_src_assocs[idx]['type'], 9)
                self.assertEqual(offset_mon_src_assocs[idx]['type'], 9)

            #And the extraction types:
            self.assertEqual(blind_src_assocs[idx]['extract_type'],0)
            self.assertEqual(superimposed_mon_src_assocs[idx]['extract_type'],2)
            self.assertEqual(offset_mon_src_assocs[idx]['extract_type'],2)

            #Sanity check the timestamps while we're at it
            for al in assoc_lists:
                self.assertEqual(al[idx]['taustart_ts'],
                             img_pars['taustart_ts'])
Beispiel #8
0
 def test_insert(self):
     dataset1 = DataSet(data=self.description)
     monitor_positions = [ (5., 5), (123,85.)]
     dbgen.insert_monitor_positions(dataset1.id, monitor_positions)