class TestDataReading(unittest.TestCase): def setUp(self): self.outdir = "test_outdir" self.data_dir = "tests/DATA/" self.default_args_list = [ "--ini", "tests/test_data_generation.ini", "--outdir", self.outdir, "--data-label", "TEST", ] self.parser = create_generation_parser() self.inputs = DataGenerationInput(*parse_args(self.default_args_list, self.parser), create_data=False) self.det = "H1" self.channel = "H1:DCS-CALIB_STRAIN_C02" self.start_time = 1126259356.0 self.end_time = 1126259357.0 def tearDown(self): del self.inputs if os.path.isdir(self.outdir): shutil.rmtree(self.outdir) def test_read_data_gwf(self): self.inputs.data_dict = {self.det: f"{self.data_dir}/test_data.gwf"} data = self.inputs._gwpy_read(self.det, self.channel, self.start_time, self.end_time) self.assertEqual(data.times[0].value, self.start_time) self.assertEqual(len(data), 16384) def test_read_data_txt(self): self.inputs.data_dict = {self.det: f"{self.data_dir}/test_data.txt"} data = self.inputs._gwpy_read(self.det, self.channel, self.start_time, self.end_time) self.assertEqual(data.times[0].value, self.start_time) self.assertEqual(len(data), 16384) def test_read_data_hdf5(self): self.inputs.data_dict = {self.det: f"{self.data_dir}/test_data.hdf5"} data = self.inputs._gwpy_read(self.det, self.channel, self.start_time, self.end_time) self.assertEqual(data.times[0].value, self.start_time) self.assertEqual(len(data), 16384)
def perform_ini_save_load_cycle(self, args): """ Performs a full cycle of saving the ini file from the provided args, then loading and parsing the ini file """ from core.submit import bilby_ini_to_args from bilby_pipe.data_generation import DataGenerationInput ini = args_to_bilby_ini(args) args = bilby_ini_to_args(ini.decode('utf-8')) args.idx = 1 args.ini = None input_args = DataGenerationInput(args, [], create_data=False) input_args.create_data(args) return input_args
def test_generation_seed_increases_with_injection_index(self): """Assert that generation seed increments for each job. ie JOB 0 -- seed X JOB 1 -- seed X + 1 ... JOB N -- seed X + N This is so that the gaussian data for each job will be different. """ idx = 0 generation_seed = 0 args_list = [ "--ini", "tests/test_data_generation.ini", f"--generation-seed={generation_seed}", f"--idx={idx}", "--gaussian-noise", "--trigger-time", "2", "--outdir", self.outdir, "--label", "TEST", ] self.inputs = DataGenerationInput(*parse_args(args_list, self.parser)) self.assertEqual(self.inputs.generation_seed, idx + generation_seed) idx = 2 generation_seed = 0 args_list = [ "--ini", "tests/test_data_generation.ini", f"--generation-seed={generation_seed}", f"--idx={idx}", "--gaussian-noise", "--trigger-time", "1126259462", "--outdir", self.outdir, "--label", "TEST", ] self.inputs = DataGenerationInput(*parse_args(args_list, self.parser)) self.assertEqual(self.inputs.generation_seed, idx + generation_seed)
def test_inject_signal_into_gaussian_noise(self): args_list = [ "tests/test_injection_in_gaussian_noise.ini", "--outdir", self.outdir, ] data_input = DataGenerationInput(*parse_args(args_list, self.parser)) injection_param = data_input.injection_parameters self.assertTrue(injection_param["geocent_time"] == 0)
def setUp(self): self.outdir = "test_outdir" self.data_dir = "tests/DATA/" self.default_args_list = [ "--ini", "tests/test_data_generation.ini", "--outdir", self.outdir, "--data-label", "TEST", ] self.parser = create_generation_parser() self.inputs = DataGenerationInput(*parse_args(self.default_args_list, self.parser), create_data=False) self.det = "H1" self.channel = "H1:DCS-CALIB_STRAIN_C02" self.start_time = 1126259356.0 self.end_time = 1126259357.0
def test_data_quality_ignore_flag(self, mock_logs, is_data_good, get_data_method): timeseries, _ = load_test_strain_data() is_data_good.return_value = False get_data_method.return_value = timeseries args_list = [ "tests/test_basic_ini.ini", "--detectors", "[H1, L1]", "--channel-dict", "{'H1': 'GDS-CALIB_STRAIN', 'L1': 'GDS-CALIB_STRAIN'}", "--duration", " 1", "--prior_file", "tests/example_prior.prior", "--waveform-approximant", "IMRPhenomPv2", "--idx", "0", "--trigger_time", "1126259462.4", "--label", "QUALITY_TEST", ] # make sure that when the flag is present, no error args, unknown = parse_args(args_list, create_generation_parser()) args.trigger_time = 1126259462.4 input = DataGenerationInput(args, unknown) self.assertFalse(input._is_gwpy_data_good()) self.assertTrue(input.ignore_gwpy_data_quality_check) # make sure that when the flag is not present, error present args, unknown = parse_args(args_list, create_generation_parser()) args.trigger_time = 1126259462.4 args.ignore_gwpy_data_quality_check = False with self.assertRaises(BilbyPipeError): DataGenerationInput(args, unknown) self.assertFalse(input._is_gwpy_data_good()) self.assertFalse(input.ignore_gwpy_data_quality_check)
def test_injections_no_file(self): args_list = [ "--ini", "tests/test_data_generation.ini", "--outdir", self.outdir, "--injection-file", "not_a_file", "--data-label", "TEST", ] with self.assertRaises(FileNotFoundError): self.inputs = DataGenerationInput( *parse_args(args_list, self.parser))
def test_data_quality_exception(self, mock_logs, quality_query): """Test the data quality function's PASS state. Parameters ---------- mock_logs: the logging module being used inside this function """ start_time_good, end_time_good = 1241725028.9, 1241725029 quality_query.side_effect = Exception("Some exception from GWpy") data_is_good = DataGenerationInput._is_gwpy_data_good( start_time=start_time_good, end_time=end_time_good, det="H1") self.assertTrue(data_is_good is None) self.assertTrue(mock_logs.warning.called)
def setUp(self): self.outdir = "test_outdir" self.default_args_list = [ "--ini", "tests/test_data_generation.ini", "--outdir", self.outdir, "--data-label", "TEST", ] self.parser = create_generation_parser() self.inputs = DataGenerationInput(*parse_args(self.default_args_list, self.parser), create_data=False) self.gps_file = "tests/gps_file.txt"
def test_data_quality_pass(self, mock_logs, quality_query): """Test the data quality function's PASS state. Parameters ---------- mock_logs: the logging module being used inside this function """ full_data = gwpy.segments.DataQualityFlag.read( "tests/DATA/data_quality.hdf5") start_time_good, end_time_good = 1241725028.9, 1241725029 quality_query.return_value = full_data data_is_good = DataGenerationInput._is_gwpy_data_good( start_time=start_time_good, end_time=end_time_good, det="H1") self.assertTrue(data_is_good) self.assertFalse(mock_logs.warning.called)
def test_data_quality_fail(self, mock_logs, quality_query): """Test the data quality check function's FAIL state. Parameters ---------- mock_logs: the logging module being used inside this function """ full_data = gwpy.segments.DataQualityFlag.read( "tests/DATA/data_quality.hdf5") start_time_bad, end_time_bad = 1241725028.9, 1241725029.1 quality_query.return_value = full_data data_is_good = DataGenerationInput._is_gwpy_data_good( start_time=start_time_bad, end_time=end_time_bad, det="H1") self.assertFalse(data_is_good) self.assertTrue(mock_logs.warning.called) warning_log_str = mock_logs.warning.call_args.args[0] self.assertTrue("Data quality check: FAILED" in warning_log_str)
def test_generation_seed_is_random_if_none_provided(self): """Assert that the generation seed is some random value if not provided.""" idx = 0 generation_seed = None args_list = [ "--ini", "tests/test_data_generation.ini", f"--generation-seed={generation_seed}", f"--idx={idx}", "--gaussian-noise", "--trigger-time", "1126259462", "--outdir", self.outdir, "--label", "TEST", ] self.inputs = DataGenerationInput(*parse_args(args_list, self.parser)) self.assertTrue(1 <= self.inputs.generation_seed <= 1e6)
def test_data_generation_data_get_with_timeslide_values(self, mock_logger): """Test timeslide values configured in bilby_pipe.data_generation._get_data()""" gps_times = np.loadtxt(self.gps_file) timeslides = np.loadtxt(self.timeslide_file) idx = 0 self.generate_ini( self.ini, extra_lines=[ f"gps-file={self.gps_file}", f"timeslide-file={self.timeslide_file}\n", f"idx={idx}", f"trigger-time={gps_times[idx] - 2}", "channel-dict={'H1': 'GDS-CALIB_STRAIN', 'L1': 'GDS-CALIB_STRAIN'}", "data-dict={'H1':tests/DATA/strain.hdf5, 'L1':tests/DATA/strain.hdf5}", "psd-dict={'H1':tests/DATA/psd.txt, 'L1':tests/DATA/psd.txt}", "psd-duration=4", "create-plots=True", ], ) parser = create_generation_parser() inputs = DataGenerationInput( *bilby_pipe.main.parse_args([self.ini], parser)) timeslide_dict = inputs.timeslide_dict expected_dict = dict(H1=timeslides[idx][0], L1=timeslides[idx][1]) self.assertDictEqual(timeslide_dict, expected_dict) logs = [ll.args[0] for ll in mock_logger.info.call_args_list] t_log = "Applying timeshift of {tval}. Time range {t0} - {t1} => {nt0} - {nt1}" for ifo_num, ifo in enumerate(inputs.interferometers): # make sure timeslide was applied tval = timeslides[idx][ifo_num] t0 = gps_times[idx] - inputs.duration t1 = gps_times[idx] nt0, nt1 = t0 + tval, t1 + tval ifo_log = t_log.format(tval=tval, t0=t0, t1=t1, nt0=nt0, nt1=nt1) self.assertTrue(ifo_log in logs, msg=f"log '{ifo_log}' not in {logs}") # Check that the ifo's start time is reset to match after timeslides applied self.assertEqual(ifo.strain_data.start_time, t0)
def test_inject_signal_into_time_domain_data( self, inject_signal_into_timeseries_method, get_data_method): timeseries, metadata = load_test_strain_data() get_data_method.return_value = timeseries inject_signal_into_timeseries_method.return_value = (timeseries, metadata) args_list = ["tests/test_injection.ini", "--outdir", self.outdir] inputs = DataGenerationInput(*parse_args(args_list, self.parser)) self.assertTrue(inputs.injection_parameters["geocent_time"] == 0) self.assertEqual(inject_signal_into_timeseries_method.call_count, 2) self.assertTrue(get_data_method.called) t0 = 1126259463.4 t1 = t0 + 1 t0_psd = t0 - 32 t1_psd = t0 get_data_method.assert_any_call("H1", "GWOSC", t0, t1) # SIGNAL get_data_method.assert_any_call("H1", "GWOSC", t0_psd, t1_psd) # PSD get_data_method.assert_any_call("L1", "GWOSC", t0, t1) # SIGNAL get_data_method.assert_any_call("L1", "GWOSC", t0_psd, t1_psd) # PSD
def test_plot_data(self, data_get, is_data_good): data_get.side_effect = [self.psd, self.strain] is_data_good.return_value = True args_list = [ "--ini", "tests/test_basic_ini.ini", "--detectors", "[H1]", "--channel-dict", "{'H1': 'GWOSC',}", "--outdir", self.outdir, "--trigger-time", "1126259462.4", "idx", "0", "--data-label", "TEST", "--label", "TEST", "--create-plots", ] parser = create_generation_parser() plot_filenames = [ "H1_TEST_D4_data.png", "H1_TEST_D32_data.png", "H1_TEST_frequency_domain_data.png", ] plot_dir = os.path.join(self.outdir, "data") plot_filenames = [os.path.join(plot_dir, p) for p in plot_filenames] DataGenerationInput(*bilby_pipe.main.parse_args(args_list, parser)) for p in plot_filenames: self.assertTrue(os.path.isfile(p), p)
def test_script_inputs_detectors_from_command_line(self): args_list = self.default_args_list + [ "--detectors", "H1", "--detectors", "L1" ] inputs = DataGenerationInput(*parse_args(args_list, self.parser), create_data=False) self.assertEqual(inputs.detectors, ["H1", "L1"]) args_list = self.default_args_list + ["--detectors", "H1 L1"] inputs = DataGenerationInput(*parse_args(args_list, self.parser), create_data=False) self.assertEqual(inputs.detectors, ["H1", "L1"]) args_list = self.default_args_list + ["--detectors", "L1 H1"] inputs = DataGenerationInput(*parse_args(args_list, self.parser), create_data=False) self.assertEqual(inputs.detectors, ["H1", "L1"]) args_list = self.default_args_list + ["--detectors", "[L1, H1]"] inputs = DataGenerationInput(*parse_args(args_list, self.parser), create_data=False) args_list = self.default_args_list + ["--detectors", "[L1 H1]"] inputs = DataGenerationInput(*parse_args(args_list, self.parser), create_data=False) self.assertEqual(inputs.detectors, ["H1", "L1"]) args_list = self.default_args_list + ["--detectors", '["L1", "H1"]'] inputs = DataGenerationInput(*parse_args(args_list, self.parser), create_data=False) self.assertEqual(inputs.detectors, ["H1", "L1"]) args_list = self.default_args_list + ["--detectors", "['L1', 'H1']"] inputs = DataGenerationInput(*parse_args(args_list, self.parser), create_data=False) self.assertEqual(inputs.detectors, ["H1", "L1"])
def generate_parameter_output(job): """ Generates a complete JobParameterOutput for a job :input job: The BilbyJob instance to generate the JobParameterOutput for :result: The complete JobParameterOutput """ # Parse the job ini file and create a bilby input class that can be used to read values from the ini args = bilby_ini_string_to_args(job.ini_string.encode('utf-8')) args.idx = None args.ini = None # Sanitize the output directory if args.outdir == '.': args.outdir = "./" parser = DataGenerationInput(args, [], create_data=False) # Channels channels = ChannelsOutput() if parser.channel_dict: for attr, key in ( ('hanford_channel', 'H1'), ('livingston_channel', 'L1'), ('virgo_channel', 'V1'), ): setattr(channels, attr, parser.channel_dict.get(key, None)) # Data data = DataOutput( data_choice="simulated" if args.n_simulation else "real", # trigger_time = None or str representing the decimal value trigger_time=to_dec(args.trigger_time), channels=channels) # Detector # Trigger the duration setter parser.duration = args.duration detector = DetectorOutput( duration=to_dec(parser.duration), sampling_frequency=to_dec(parser.sampling_frequency), ) for k, v in {('hanford', 'H1'), ('livingston', 'L1'), ('virgo', 'V1')}: if v in parser.detectors: setattr(detector, k, True) setattr(detector, f"{k}_minimum_frequency", to_dec(parser.minimum_frequency_dict[v])) setattr(detector, f"{k}_maximum_frequency", to_dec(parser.maximum_frequency_dict[v])) else: setattr(detector, k, False) setattr(detector, f"{k}_minimum_frequency", to_dec(parser.minimum_frequency)) setattr(detector, f"{k}_maximum_frequency", to_dec(parser.maximum_frequency)) # Prior prior = PriorOutput( # args.prior_file is correct here rather than parser. parser actually fills out the entire path to the prior # file prior_default=args.prior_file) # Sampler # Trigger sampler setter in the parser parser.sampler = args.sampler parser.request_cpus = args.request_cpus parser.sampler_kwargs = args.sampler_kwargs sampler = SamplerOutput(sampler_choice=parser.sampler, cpus=args.request_cpus) for k, v in parser.sampler_kwargs.items(): setattr(sampler, k, to_dec(v)) # Waveform model = "unknown" if parser.frequency_domain_source_model == "lal_binary_black_hole": model = "binaryBlackHole" elif parser.frequency_domain_source_model == "lal_binary_neutron_star": model = "binaryNeutronStar" waveform = WaveformOutput(model=model) return JobParameterOutput(details=JobDetailsOutput( name=job.name, description=job.description, private=job.private), data=data, detector=detector, prior=prior, sampler=sampler, waveform=waveform)
def upload_bilby_job(upload_token, details, job_file): # Check that the uploaded file is a tar.gz file if not job_file.name.endswith('tar.gz'): raise Exception("Job upload should be a tar.gz file") # Check that the job upload directory exists os.makedirs(settings.JOB_UPLOAD_STAGING_DIR, exist_ok=True) # Write out the uploaded job to disk and unpack the archive to a temporary staging directory with TemporaryDirectory(dir=settings.JOB_UPLOAD_STAGING_DIR) as job_staging_dir, \ NamedTemporaryFile(dir=settings.JOB_UPLOAD_STAGING_DIR, suffix='.tar.gz') as job_upload_file, \ UploadedFile(job_file) as django_job_file: # Write the uploaded file to the temporary file for c in django_job_file.chunks(): job_upload_file.write(c) job_upload_file.flush() # Unpack the archive to the temporary directory p = subprocess.Popen(['tar', '-xvf', job_upload_file.name, '.'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=job_staging_dir) out, err = p.communicate() logging.info( f"Unpacking uploaded job archive {job_file.name} had return code {p.returncode}" ) logging.info(f"stdout: {out}") logging.info(f"stderr: {err}") if p.returncode != 0: raise Exception("Invalid or corrupt tar.gz file") # Validate the directory structure, this should include 'data', 'result', and 'results_page' at minimum for directory in ['data', 'result', 'results_page']: if not os.path.isdir(os.path.join(job_staging_dir, directory)): raise Exception( f"Invalid directory structure, expected directory ./{directory} to exist." ) # Find the config complete ini ini_file = list( filter( lambda x: os.path.isfile(os.path.join(job_staging_dir, x)) and x.endswith("_config_complete.ini"), os.listdir(job_staging_dir))) if len(ini_file) != 1: raise Exception( "Invalid number of ini files ending in `_config_complete.ini`. There should be exactly one." ) ini_file = ini_file[0] # Read the ini file with open(os.path.join(job_staging_dir, ini_file), 'r') as f: ini_content = f.read() # Parse the ini file to check it's validity args = bilby_ini_string_to_args(ini_content.encode('utf-8')) args.idx = None args.ini = None # Override the output directory - since in the supported directory structure the output is always relative to # the current working directory (root of the job) args.outdir = "./" parser = DataGenerationInput(args, [], create_data=False) # Verify that a non-ligo user can't upload a ligo job, and check if this job is a ligo job or not if args.n_simulation == 0 and (any([ channel != 'GWOSC' for channel in (parser.channel_dict or {}).values() ])): # This is a real job, with a channel that is not GWOSC if not upload_token.is_ligo: # User is not a ligo user, so they may not submit this job raise Exception( "Non-LIGO members may only upload real jobs on GWOSC channels" ) else: is_ligo_job = True else: is_ligo_job = False # Convert the modified arguments back to an ini string ini_string = bilby_args_to_ini_string(args) with transaction.atomic(): # This is in an atomic block in case:- # * The ini file somehow ends up broken # * The final move of the staging directory to the job directory raises an exception (Disk full etc) # * The generation of the archive.tar.gz file fails (Disk full etc) # Create the bilby job bilby_job = BilbyJob(user_id=upload_token.user_id, name=args.label, description=details.description, private=details.private, ini_string=ini_string, is_ligo_job=is_ligo_job, is_uploaded_job=True) bilby_job.save() # Now we have the bilby job id, we can move the staging directory to the actual job directory job_dir = bilby_job.get_upload_directory() shutil.move(job_staging_dir, job_dir) # Finally generate the archive.tar.gz file p = subprocess.Popen(['tar', '-cvf', 'archive.tar.gz', '.'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=job_dir) out, err = p.communicate() logging.info( f"Packing uploaded job archive for {job_file.name} had return code {p.returncode}" ) logging.info(f"stdout: {out}") logging.info(f"stderr: {err}") if p.returncode != 0: raise Exception("Unable to repack the uploaded job") # Job is validated and uploaded, return the job return bilby_job
def test_set_reference_frequency(self): args_list = self.default_args_list + ["--reference-frequency", "10"] inputs = DataGenerationInput(*parse_args(args_list, self.parser), create_data=False) self.assertEqual(inputs.reference_frequency, 10)
def create_bilby_job_from_ini_string(user, params): # Parse the job ini file and create a bilby input class that can be used to read values from the ini args = bilby_ini_string_to_args( params.ini_string.ini_string.encode('utf-8')) args.idx = None args.ini = None if args.outdir == '.': args.outdir = "./" # Strip the prior, gps, timeslide, and injection file # as DataGenerationInput has trouble without the actual file existing prior_file = args.prior_file args.prior_file = None gps_file = args.gps_file args.gps_file = None timeslide_file = args.timeslide_file args.timeslide_file = None injection_file = args.injection_file args.injection_file = None parser = DataGenerationInput(args, [], create_data=False) # Parse any supporting files supporting_files = parse_supporting_files(parser, args, prior_file, gps_file, timeslide_file, injection_file) if args.n_simulation == 0 and (any([ channel != 'GWOSC' for channel in (parser.channel_dict or {}).values() ])): # This is a real job, with a channel that is not GWOSC if not user.is_ligo: # User is not a ligo user, so they may not submit this job raise Exception( "Non-LIGO members may only run real jobs on GWOSC channels") else: is_ligo_job = True else: is_ligo_job = False # Override any required fields args.label = params.details.name # Convert the modified arguments back to an ini string ini_string = bilby_args_to_ini_string(args) bilby_job = BilbyJob(user_id=user.user_id, name=params.details.name, description=params.details.description, private=params.details.private, ini_string=ini_string, is_ligo_job=is_ligo_job, cluster=params.details.cluster) bilby_job.save() # Save any supporting file records supporting_file_details = SupportingFile.save_from_parsed( bilby_job, supporting_files) # Submit the job to the job controller if there are no supporting files if not bilby_job.has_supporting_files(): bilby_job.submit() return bilby_job, supporting_file_details