def test_taskmanager_meta_classifier(PRIVATE_TODO_FILE): """Test of TaskManager when running with MetaClassifier""" with TaskManager(PRIVATE_TODO_FILE, overwrite=True, classes=StellarClassesLevel1) as tm: # Create fake results from SLOSH: tm.save_results({'priority': 17, 'classifier': 'slosh', 'status': STATUS.OK, 'starclass_results': { StellarClassesLevel1.SOLARLIKE: 0.2, StellarClassesLevel1.DSCT_BCEP: 0.1, StellarClassesLevel1.ECLIPSE: 0.7 }}) # Get the first task in the TODO file for the MetaClassifier: task1 = tm.get_task(classifier='meta') print(task1) # It should be the only missing task with SLOSH: assert task1['priority'] == 17 assert task1['classifier'] == 'meta' assert isinstance(task1['other_classifiers'], Table) tab = task1['other_classifiers'] print(tab) for row in tab: assert isinstance(row['class'], StellarClassesLevel1) assert tab[tab['class'] == StellarClassesLevel1.SOLARLIKE]['prob'] == 0.2 assert tab[tab['class'] == StellarClassesLevel1.DSCT_BCEP]['prob'] == 0.1 assert tab[tab['class'] == StellarClassesLevel1.ECLIPSE]['prob'] == 0.7
def test_metaclassifier_load_star(PRIVATE_INPUT_DIR): # Use the following training set as input: tset = testing_tset() # Set a dummy features cache inside the private input dir: features_cache_name = 'features_cache' features_cache = os.path.join(PRIVATE_INPUT_DIR, features_cache_name) os.makedirs(features_cache, exist_ok=True) with TaskManager(PRIVATE_INPUT_DIR, classes=tset.StellarClasses) as tm: for k in range(2): # Try loading twice - second time we should load from cache with MetaClassifier(tset=tset, features_cache=features_cache) as cl: # Check that the second time there should still be nothing in the cache: assert len(os.listdir(features_cache)) == 0 task = tm.get_task(priority=17, classifier='meta') feat = cl.load_star(task) print(feat) # Check "transfered" features: assert feat['priority'] == 17 assert feat['priority'] == task['priority'] assert feat['starid'] == task['starid'] # Check the complex objects: assert 'other_classifiers' in feat assert isinstance(feat['other_classifiers'], Table) # For the MetaClassifier these things should not be included: assert 'lightcurve' not in feat assert 'powerspectrum' not in feat assert 'frequencies' not in feat
def test_taskmanager_switch_classifier(PRIVATE_TODO_FILE, chunk): """Test of TaskManager - Automatic switching between classifiers.""" with TaskManager(PRIVATE_TODO_FILE, overwrite=True) as tm: tm.cursor.execute("INSERT INTO starclass_diagnostics (priority,classifier,status) SELECT priority,'slosh',1 FROM todolist;") tm.cursor.execute("DELETE FROM starclass_diagnostics WHERE priority=17;") tm.conn.commit() # Get the first task in the TODO file: task1 = tm.get_task(classifier='slosh', chunk=chunk) print(task1) # It should be the only missing task with SLOSH: if chunk > 1: assert len(task1) == 1 task1 = task1[0] assert task1['priority'] == 17 assert task1['classifier'] == 'slosh' # Start task with priority=1: tm.start_task(task1) # Get the next task, which should be the one with priority=2: task2 = tm.get_task(classifier='slosh', chunk=chunk) print(task2) if chunk > 1: assert len(task2) == chunk task2 = task2[0] # We should now get the highest priority target, but not with SLOSH: assert task2['priority'] == 17 assert task2['classifier'] is not None and task2['classifier'] != 'slosh'
def test_taskmanager_invalid(): """Test of TaskManager with invalid TODO-file input.""" # Load the first image in the input directory: INPUT_DIR = os.path.join(os.path.dirname(__file__), 'input') with pytest.raises(FileNotFoundError): TaskManager(os.path.join(INPUT_DIR, 'does-not-exists'))
def test_taskmanager_save_and_settings(PRIVATE_TODO_FILE): """Test of TaskManager saving results and settings.""" with TaskManager(PRIVATE_TODO_FILE, overwrite=True, classes=StellarClassesLevel1) as tm: # Check the settings table: assert tm.tset is None tm.cursor.execute("SELECT * FROM starclass_settings;") settings = tm.cursor.fetchone() assert settings is None # Start a random task: task = tm.get_task(classifier='meta') print(task) tm.start_task(task) # Make a fake result we can save; starclass_results = { StellarClassesLevel1.SOLARLIKE: 0.8, StellarClassesLevel1.APERIODIC: 0.2 } result = task.copy() result['tset'] = 'keplerq9v3' result['classifier'] = 'meta' result['status'] = STATUS.OK result['elaptime'] = 3.14 result['worker_wait_time'] = 1.0 result['details'] = {'errors': ['There was actually no error']} result['starclass_results'] = starclass_results # Save the result: tm.save_results(result) # Check the setting again - it should now have changed: assert tm.tset == 'keplerq9v3' tm.cursor.execute("SELECT * FROM starclass_settings;") settings = tm.cursor.fetchone() assert settings['tset'] == 'keplerq9v3' # Check that the additional diagnostic was saved correctly: tm.cursor.execute("SELECT * FROM starclass_diagnostics WHERE priority=?;", [result['priority']]) row = tm.cursor.fetchone() print(dict(row)) assert row['status'] == STATUS.OK.value assert row['classifier'] == 'meta' assert row['elaptime'] == 3.14 assert row['worker_wait_time'] == 1.0 assert row['errors'] == 'There was actually no error' # Check that the results were saved correctly: tm.cursor.execute("SELECT class,prob FROM starclass_results WHERE priority=? AND classifier=?;", [result['priority'], 'meta']) for row in tm.cursor.fetchall(): assert row['prob'] == starclass_results[StellarClassesLevel1[row['class']]] # This should fail when we try to save it: result['tset'] = 'another' with pytest.raises(ValueError) as e: tm.save_results(result) assert str(e.value) == "Attempting to mix results from multiple training sets. Previous='keplerq9v3', New='another'."
def test_taskmanager_get_tasks_priority(PRIVATE_TODO_FILE): """Test of TaskManager.get_tasks with priority""" with TaskManager(PRIVATE_TODO_FILE, overwrite=True) as tm: task = tm.get_task(priority=17) assert task['priority'] == 17 # Call with non-existing starid: task = tm.get_task(priority=-1234567890) assert task is None
def test_taskmanager_moat_create_wrong(PRIVATE_TODO_FILE): """Test moat-create with wrong input""" with TaskManager(PRIVATE_TODO_FILE, overwrite=True, classes=StellarClassesLevel1) as tm: # Wrong classifier: with pytest.raises(ValueError) as e: tm.moat_create('nonsense', ['freq1', 'freq2']) assert str(e.value) == 'Invalid classifier: nonsense' with pytest.raises(ValueError) as e: tm.moat_create('common', [])
def test_taskmanager_no_diagnostics(PRIVATE_TODO_FILE): """Test of TaskManager with invalid TODO-file, missing diagnostics_corr table.""" # Delete the table from the TODO-file: with closing(sqlite3.connect(PRIVATE_TODO_FILE)) as conn: conn.execute('DROP TABLE IF EXISTS diagnostics_corr;') conn.commit() # The TaskManager should now throw an error: with pytest.raises(ValueError) as err: TaskManager(PRIVATE_TODO_FILE) assert str(err.value) == "The TODO-file does not contain diagnostics_corr. Are you sure corrections have been run?"
def test_taskmanager_moat_wrong_existing_table(PRIVATE_TODO_FILE): """Test of TaskManager with invalid TODO-file, missing diagnostics_corr table.""" # Add a table to the TODO-file, following the naming scheme, but from # a wrong (dummy) classifier: with closing(sqlite3.connect(PRIVATE_TODO_FILE)) as conn: conn.execute("""CREATE TABLE starclass_features_dummy ( priority integer not null, dummy_var real );""") conn.commit() # The TaskManager should now throw an error: with pytest.raises(ValueError) as err: TaskManager(PRIVATE_TODO_FILE) assert str(err.value) == "Invalid classifier: dummy"
def test_taskmanager_chunks(PRIVATE_TODO_FILE): """Test TaskManager, getting chunks of tasks at a time""" # Reset the TODO-file completely, and mark the first task as STARTED: with TaskManager(PRIVATE_TODO_FILE) as tm: task1 = tm.get_task(classifier='rfgc') assert isinstance(task1, dict) task10 = tm.get_task(classifier='rfgc', chunk=10) assert isinstance(task10, list) assert len(task10) == 10 for task in task10: assert isinstance(task, dict) tm.start_task(task10) tm.cursor.execute("SELECT COUNT(*) FROM starclass_diagnostics WHERE classifier='rfgc' AND status=?;", [STATUS.STARTED.value]) assert tm.cursor.fetchone()[0] == 10
def test_taskmanager_get_tasks(PRIVATE_TODO_FILE): """Test of TaskManager""" input_dir = os.path.dirname(PRIVATE_TODO_FILE) with TaskManager(PRIVATE_TODO_FILE, overwrite=True) as tm: # Get the number of tasks: with pytest.raises(NotImplementedError): tm.get_number_tasks() #print(numtasks) #assert(numtasks == 168642) # In STARCLASS, we have to ask with either a priority or a classifier as input: with pytest.raises(ValueError): tm.get_task() # Get the first task in the TODO file: task1 = tm.get_task(classifier='slosh') print(task1) # Check that it contains what we know it should: # The first priority in the TODO file is the following: assert task1['priority'] == 17 assert task1['starid'] == 29281992 assert task1['lightcurve'] == os.path.join(input_dir, 'tess00029281992-s01-c1800-dr01-v04-tasoc-cbv_lc.fits.gz') assert task1['classifier'] == 'slosh' # Start task with priority=1: tm.start_task(task1) # Get the next task, which should be the one with priority=2: task2 = tm.get_task(classifier='slosh') print(task2) assert task2['priority'] == 26 assert task2['starid'] == 29859905 assert task2['lightcurve'] == os.path.join(input_dir, 'ffi/00029/tess00029859905-s01-c1800-dr01-v04-tasoc-cbv_lc.fits.gz') assert task2['classifier'] == 'slosh' # Check that the status did actually change in the todolist: tm.cursor.execute("SELECT status FROM starclass_diagnostics WHERE priority=?;", (task1['priority'],)) task1_status = tm.cursor.fetchone()['status'] print(task1_status) assert task1_status == STATUS.STARTED.value
def test_baseclassifier_load_star(PRIVATE_INPUT_DIR, linfit): # Use the following training set as input: tsetclass = get_trainingset() tset = tsetclass(linfit=linfit) # Set a dummy features cache inside the private input dir: features_cache_name = 'features_cache' if linfit: features_cache_name += '_linfit' features_cache = os.path.join(PRIVATE_INPUT_DIR, features_cache_name) os.makedirs(features_cache, exist_ok=True) # The features cache should be empty to begin with: assert len(os.listdir(features_cache)) == 0 with TaskManager(PRIVATE_INPUT_DIR) as tm: for k in range(2): # Try loading twice - second time we should load from cache with BaseClassifier(tset=tset, features_cache=features_cache) as cl: # Check that the second time there is something in the features cache: if k > 0: assert os.listdir(features_cache) == ['features-17.pickle'] task = tm.get_task(priority=17) print(task) feat = cl.load_star(task) print(feat) # Check the complex objects: assert isinstance(feat['lightcurve'], TessLightCurve) assert isinstance(feat['powerspectrum'], powerspectrum) assert isinstance(feat['frequencies'], Table) # Check "transfered" features: assert feat['priority'] == 17 assert feat['priority'] == task['priority'] assert feat['starid'] == task['starid'] assert feat['tmag'] == task['tmag'] assert feat['variance'] == task['variance'] assert feat['rms_hour'] == task['rms_hour'] assert feat['ptp'] == task['ptp'] # Check FliPer: assert np.isfinite(feat['Fp07']) assert np.isfinite(feat['Fp7']) assert np.isfinite(feat['Fp20']) assert np.isfinite(feat['Fp50']) assert np.isfinite(feat['FpWhite']) assert np.isfinite(feat['Fphi']) assert np.isfinite(feat['Fplo']) # Check frequencies: freqtab = feat['frequencies'] for k in np.unique(freqtab['num']): assert np.isfinite(feat['freq%d' % k]) or np.isnan(feat['freq%d' % k]), "Invalid frequency" assert np.isfinite(feat['amp%d' % k]) or np.isnan(feat['amp%d' % k]), "Invalid amplitude" assert np.isfinite(feat['phase%d' % k]) or np.isnan(feat['phase%d' % k]), "Invalid phase" peak = freqtab[(freqtab['num'] == k) & (freqtab['harmonic'] == 0)] np.testing.assert_allclose(feat['freq%d' % k], peak['frequency']) np.testing.assert_allclose(feat['amp%d' % k], peak['amplitude']) np.testing.assert_allclose(feat['phase%d' % k], peak['phase']) # Check details about lightkurve object: lc = feat['lightcurve'] lc.show_properties() assert lc.targetid == feat['starid'] assert lc.label == 'TIC %d' % feat['starid'] assert lc.mission == 'TESS' assert lc.time_format == 'btjd' assert lc.time_format == 'btjd' assert lc.camera == 1 assert lc.ccd == 4 assert lc.sector == 1 # When running with linfit enabled, the features should contain # an extra set of coefficients from the detrending: if linfit: assert 'detrend_coeff' in feat assert len(feat['detrend_coeff']) == 2 assert np.all(np.isfinite(feat['detrend_coeff'])) else: assert 'detrend_coeff' not in feat
def test_taskmanager_moat(PRIVATE_TODO_FILE, classifier): with TaskManager(PRIVATE_TODO_FILE, overwrite=True, classes=StellarClassesLevel1) as tm: # Start a random task: task = tm.get_task(classifier=classifier) print(task) # Create dummy features which we will save and restore: features_common = {'freq1': 42.0, 'amp1': 43.0, 'phase1': 4.0} features = {'unique_feature': 2187.0, 'special_feature': 1234.0} # Make a fake result we can save; result = task.copy() result['tset'] = 'keplerq9v3' result['classifier'] = classifier result['status'] = STATUS.OK result['elaptime'] = 3.14 result['starclass_results'] = { StellarClassesLevel1.SOLARLIKE: 0.8, StellarClassesLevel1.APERIODIC: 0.2 } # This is the important part in this test: result['features'] = features result['features_common'] = features_common # Save the result: tm.save_results(result) # Check common features were stored in the table: tm.cursor.execute("SELECT * FROM starclass_features_common WHERE priority=?;", [task['priority']]) row1 = dict(tm.cursor.fetchone()) del row1['priority'] assert row1 == features_common tm.cursor.execute("SELECT * FROM starclass_features_%s WHERE priority=?;" % classifier, [task['priority']]) row2 = dict(tm.cursor.fetchone()) del row2['priority'] assert row2 == features # Try to extract the features again, they should be identical to the ones we put in: extracted_features = tm.moat_query('common', task['priority']) assert extracted_features == features_common extracted_features = tm.moat_query(classifier, task['priority']) assert extracted_features == features # If we ask for the exact same target, we should get another classifier, # but the common features should now be provided to us: task2 = tm.get_task(priority=task['priority'], classifier=classifier) print('TASK2: %s' % task2) assert task2['classifier'] != classifier assert task2['features_common'] == features_common # Reload the TaskManager, with overwrite, which should remove all previous results, # but the MOAT should still exist: with TaskManager(PRIVATE_TODO_FILE, overwrite=True, classes=StellarClassesLevel1) as tm: # If we ask for the exact same target, we should get THE SAME classifier, # but the common features should now be provided to us: task3 = tm.get_task(priority=task['priority'], classifier=classifier) print('TASK3: %s' % task3) assert task3['classifier'] == classifier assert task3['features_common'] == features_common assert task3['features'] == features # Clear the moat, which should delete all MOAT tables: tm.moat_clear() # Check that there are no more MOAT tables in the todo-file: tm.cursor.execute("SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name LIKE 'starclass_features_%';") assert tm.cursor.fetchone()[0] == 0 # Using the query should now return nothing: assert tm.moat_query('common', task['priority']) is None assert tm.moat_query(classifier, task['priority']) is None