def one_det_main(input_file_name="default.hdf", template_file_name="default_templates.hdf", output_file_name="default_snrs.hdf", snr_range="default"): # ----------------------------------------------------------------------------- # Preliminaries # ----------------------------------------------------------------------------- # Disable output buffering ('flush' option is not available for Python 2) #sys.stderr = os.fdopen(sys.stderr.fileno(), 'w', 0) # Start the stopwatch script_start = time.time() #multiprocessing.log_to_stderr() #logger = multiprocessing.get_logger() #logger.setLevel(logging.DEBUG) print('') print('GENERATE A GW SAMPLE SNR TIME-SERIES') print('') # ----------------------------------------------------------------------------- # Parse the command line arguments # ----------------------------------------------------------------------------- # Set up the parser and add arguments parser = argparse.ArgumentParser(description='Generate a GW data sample.') # Add arguments (and set default values where applicable) parser.add_argument('--config-file', help='Name of the JSON configuration file which ' 'controls the sample generation process.', default='default.json') parser.add_argument('--filter-injection-samples', help='Boolean expression for whether to' 'calculate SNRs of injection signals.' 'Default: True', default=True) parser.add_argument('--filter-templates', help='Boolean expression for whether to calculate' 'SNRs of all signals using a set of templates.' 'Default: True', default=True) parser.add_argument('--trim-output', help='Boolean expression for whether to trim the' 'SNR time series output.' 'Default: True', default=True) # Parse the arguments that were passed when calling this script print('Parsing command line arguments...', end=' ') arguments = vars(parser.parse_args()) print('Done!') # Set up shortcut for the command line arguments filter_injection_samples = bool(arguments['filter_injection_samples']) filter_templates = bool(arguments['filter_templates']) trim_output = bool(arguments['trim_output']) # ------------------------------------------------------------------------- # Read in JSON config file specifying the sample generation process # ------------------------------------------------------------------------- # Build the full path to the config file json_config_name = arguments['config_file'] json_config_path = os.path.join('.', 'config_files', json_config_name) # Read the JSON configuration into a dict print('Reading and validating in JSON configuration file...', end=' ') config = read_json_config(json_config_path) print('Done!') # ------------------------------------------------------------------------- # Read in INI config file specifying the static_args and variable_args # ------------------------------------------------------------------------- # Build the full path to the waveform params file ini_config_name = config['waveform_params_file_name'] ini_config_path = os.path.join('.', 'config_files', ini_config_name) # Read in the variable_arguments and static_arguments print('Reading and validating in INI configuration file...', end=' ') variable_arguments, static_arguments = read_ini_config(ini_config_path) print('Done!\n') # Check output file directory exists output_dir = os.path.join('.', 'output') if snr_range == "default": samples_output_dir = output_dir else: samples_output_dir = os.path.join(output_dir, snr_range) if not os.path.exists(samples_output_dir): print("Output folder cannot be found. Please create a folder", "named 'output' to store data in.") quit() # Get file names from config file if template_file_name == "default_templates.hdf": templates_file_path = os.path.join(output_dir, config['template_output_file_name']) else: templates_file_path = os.path.join(output_dir, template_file_name) if input_file_name == "default.hdf": input_file_path = os.path.join(samples_output_dir, config['output_file_name']) else: input_file_path = os.path.join(samples_output_dir, input_file_name) if output_file_name == "default_snrs.hdf": output_file_path = os.path.join(output_dir, config['snr_output_file_name']) else: output_file_path = os.path.join(samples_output_dir, output_file_name) # ------------------------------------------------------------------------- # Read in the sample file # ------------------------------------------------------------------------- print('Reading in samples HDF file...', end=' ') df = h5py.File(input_file_path, 'r') print('Done!') # ------------------------------------------------------------------------- # Create dataframe column to store SNR time-series # ------------------------------------------------------------------------- # Get approximant for generating matched filter templates from config files if static_arguments["approximant"] not in td_approximants(): print("Invalid waveform approximant. Please put a valid time-series" "approximant in the waveform params file..") quit() apx = static_arguments["approximant"] sample_length = static_arguments[ "seconds_before_event"] + static_arguments["seconds_after_event"] delta_f = 1.0 / sample_length # Get f-lower and delta-t from config files f_low = static_arguments["f_lower"] delta_t = 1.0 / static_arguments["target_sampling_rate"] # Initialise list of all parameters required for generating template waveforms param_dict = dict(injections=dict(mass1=[], mass2=[], spin1z=[], spin2z=[], ra=[], dec=[], coa_phase=[], inclination=[], polarization=[], injection_snr=[], f_lower=f_low, approximant=apx, delta_t=delta_t)) # Store number of injection samples, should be identical for all detectors n_injection_samples = config['n_injection_samples'] # Store number of noise samples, should be identical for all detectors n_noise_samples = config['n_noise_samples'] # Store number of templates n_templates = config['n_template_samples'] # Get trim cutoff trim_cutoff_low = config['snr_output_cutoff_low'] * static_arguments[ "target_sampling_rate"] trim_cutoff_high = config['snr_output_cutoff_high'] * static_arguments[ "target_sampling_rate"] trim_cutoff_variation = config[ 'snr_output_cutoff_variation'] * static_arguments[ "target_sampling_rate"] / 2 # Initialize arrays for random offset values inj_low, inj_high, noise_low, noise_high, temp_inj_low, temp_inj_high = ( [] for i in range(6)) # Generate random time shits and apply to start and end times for each type of sample for i in range(n_injection_samples): rand = random.randint(-trim_cutoff_variation, trim_cutoff_variation) rand_low = trim_cutoff_low + rand rand_high = trim_cutoff_high + rand inj_low.append(rand_low) inj_high.append(rand_high) for i in range(n_noise_samples * n_templates): rand = random.randint(-trim_cutoff_variation, trim_cutoff_variation) rand_low = trim_cutoff_low + rand rand_high = trim_cutoff_high + rand noise_low.append(rand_low) noise_high.append(rand_high) for i in range(n_injection_samples * n_templates): rand = random.randint(-trim_cutoff_variation, trim_cutoff_variation) rand_low = trim_cutoff_low + rand rand_high = trim_cutoff_high + rand temp_inj_low.append(rand_low) temp_inj_high.append(rand_high) # ------------------------------------------------------------------------- # Compute SNR time-series # ------------------------------------------------------------------------- # Generate optimal SNR time series if filter_injection_samples: print('Generating OMF SNR time-series for injection samples...') if n_injection_samples > 0: injections_build_files = InjectionsBuildFiles( output_file_path=output_file_path, param_dict=param_dict, df=df, n_samples=n_injection_samples, trim_output=trim_output, inj_low=inj_low, inj_high=inj_high) injections_build_files.run() print('Done!') else: print('Done! (n-samples = 0)\n') else: print('No SNR time-series generated for injections.' 'Please set filter-injection-samples to True.') # Generate SNR time series with template bank, using injection and noise samples if filter_templates: print('Reading in the templates HDF file...', end=' ') templates_df = h5py.File(templates_file_path, 'r') print('Done!') print( "Generating SNR time-series for injection and noise samples using a template set..." ) if n_templates == 0: print('Done! (n-templates = 0)' 'Please generate templates before running.\n') elif (n_noise_samples > 0) or (n_injection_samples > 0): filters_build_files = FiltersBuildFiles( output_file_path=output_file_path, df=df, templates_df=templates_df, n_noise_samples=n_noise_samples, n_injection_samples=n_injection_samples, n_templates=n_templates, f_low=f_low, delta_t=delta_t, filter_injection_samples=filter_injection_samples, delta_f=delta_f, trim_output=trim_output, inj_low=temp_inj_low, inj_high=temp_inj_high, noise_low=noise_low, noise_high=noise_high) filters_build_files.run() print('Done!') else: print('Done! (n-noise-samples = 0)\n') else: print('No SNR time-series generated for injections.' 'Please set filter-templates to True.') # Get file size in MB and print the result sample_file_size = os.path.getsize(output_file_path) / 1024**2 print('Size of resulting HDF file: {:.2f}MB'.format(sample_file_size)) print('') # ------------------------------------------------------------------------- # Postliminaries # ------------------------------------------------------------------------- # Print the total run time print('Total runtime: {:.1f} seconds!'.format(time.time() - script_start)) print('')
template_id = int(arguments['template_id']) seconds_before = float(arguments['seconds_before']) seconds_after = float(arguments['seconds_after']) plot_path = str(arguments['plot_path']) # ------------------------------------------------------------------------- # Read in JSON config file specifying the sample generation process # ------------------------------------------------------------------------- # Build the full path to the config file json_config_name = arguments['config_file'] json_config_path = os.path.join('.', 'config_files', json_config_name) # Read the JSON configuration into a dict print('Reading and validating in JSON configuration file...', end=' ') config = read_json_config(json_config_path) print('Done!') # ------------------------------------------------------------------------- # Read in INI config file specifying the static_args and variable_args # ------------------------------------------------------------------------- # Build the full path to the waveform params file ini_config_name = config['waveform_params_file_name'] ini_config_path = os.path.join('.', 'config_files', ini_config_name) # Read in the variable_arguments and static_arguments print('Reading and validating in INI configuration file...', end=' ') variable_arguments, static_arguments = read_ini_config(ini_config_path) print('Done!\n')
def two_det_main(random_seed=-1,noise_random_seed=-1,output_file_name='default.hdf'): # ------------------------------------------------------------------------- # Preliminaries # ------------------------------------------------------------------------- # Disable output buffering ('flush' option is not available for Python 2) #sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) sys.stdout = Unbuffered(sys.stdout) # Start the stopwatch script_start = time.time() print('') print('GENERATE A GW DATA SAMPLE FILE') print('') # ------------------------------------------------------------------------- # Parse the command line arguments # ------------------------------------------------------------------------- # Set up the parser and add arguments parser = argparse.ArgumentParser(description='Generate a GW data sample.') parser.add_argument('--config-file', help='Name of the JSON configuration file which ' 'controls the sample generation process.', default='default.json') # Parse the arguments that were passed when calling this script print('Parsing command line arguments...', end=' ') command_line_arguments = vars(parser.parse_args()) print('Done!') # ------------------------------------------------------------------------- # Read in JSON config file specifying the sample generation process # ------------------------------------------------------------------------- # Build the full path to the config file json_config_name = command_line_arguments['config_file'] json_config_path = os.path.join('.', 'config_files', json_config_name) # Read the JSON configuration into a dict print('Reading and validating in JSON configuration file...', end=' ') config = read_json_config(json_config_path) print('Done!') # ------------------------------------------------------------------------- # Read in INI config file specifying the static_args and variable_args # ------------------------------------------------------------------------- # Build the full path to the waveform params file ini_config_name = config['waveform_params_file_name'] ini_config_path = os.path.join('.', 'config_files', ini_config_name) # Read in the variable_arguments and static_arguments print('Reading and validating in INI configuration file...', end=' ') variable_arguments, static_arguments = read_ini_config(ini_config_path) print('Done!\n') # ------------------------------------------------------------------------- # Shortcuts and random seed # ------------------------------------------------------------------------- # Set the random seed for this script if random_seed==-1: np.random.seed(config['random_seed']) random_seed = config['random_seed'] else: np.random.seed(random_seed) if noise_random_seed==-1: noise_random_seed = config['noise_random_seed'] # Define some useful shortcuts max_runtime = config['max_runtime'] bkg_data_dir = config['background_data_directory'] # ------------------------------------------------------------------------- # Construct a generator for sampling waveform parameters # ------------------------------------------------------------------------- # Initialize a waveform parameter generator that can sample injection # parameters from the distributions specified in the config file waveform_parameter_generator = \ WaveformParameterGenerator(config_file=ini_config_path, random_seed=random_seed) # Wrap it in a generator expression so that we can we can easily sample # from it by calling next(waveform_parameters) waveform_parameters = \ (waveform_parameter_generator.draw() for _ in iter(int, 1)) # ------------------------------------------------------------------------- # Construct a generator for sampling valid noise times # ------------------------------------------------------------------------- # If the 'background_data_directory' is None, we will use synthetic noise if config['background_data_directory'] is None: print('Using synthetic noise! (background_data_directory = None)\n') # Create a iterator that returns a fake "event time", which we will # use as a seed for the RNG to ensure the reproducibility of the # generated synthetic noise. # For the HDF file path that contains that time, we always yield # None, so that we know that we need to generate synthetic noise. noise_times = ((1000000000 + _, None) for _ in count()) # Otherwise, we set up a timeline object for the background noise, that # is, we read in all HDF files in the raw_data_directory and figure out # which parts of it are useable (i.e., have the right data quality and # injection bits set as specified in the config file). else: print('Using real noise from LIGO recordings! ' '(background_data_directory = {})'.format(bkg_data_dir)) print('Reading in raw data. This may take several minutes...', end=' ') # Create a timeline object by running over all HDF files once noise_timeline = NoiseTimeline(background_data_directory=bkg_data_dir, random_seed=random_seed) # Create a noise time generator so that can sample valid noise times # simply by calling next(noise_time_generator) delta_t = int(static_arguments['noise_interval_width'] / 2) noise_times = (noise_timeline.sample(delta_t=delta_t, dq_bits=config['dq_bits'], inj_bits=config['inj_bits'], return_paths=True) for _ in iter(int, 1)) print('Done!\n') # ------------------------------------------------------------------------- # Define a convenience function to generate arguments for the simulation # ------------------------------------------------------------------------- # Prevent waveform parameter variable from generating new parameter values # for every generated sample (ie. here we set the parameters for the whole file) sample_params = next(waveform_parameters) def generate_arguments(injection=True): # Only sample waveform parameters if we are making an injection waveform_params = sample_params if injection else None # Return all necessary arguments as a dictionary return dict(static_arguments=static_arguments, event_tuple=next(noise_times), waveform_params=waveform_params, noise_random_seed=noise_random_seed) # ------------------------------------------------------------------------- # Finally: Create our samples! # ------------------------------------------------------------------------- # Keep track of all the samples (and parameters) we have generated samples = dict(injection_samples=[], noise_samples=[]) injection_parameters = dict(injection_samples=[], noise_samples=[]) # The procedure for generating samples with and without injections is # mostly the same; the only real difference is which arguments_generator # we have have to use: for sample_type in ('injection_samples', 'noise_samples'): # --------------------------------------------------------------------- # Define some sample_type-specific shortcuts # --------------------------------------------------------------------- if sample_type == 'injection_samples': print('Generating samples containing an injection...') n_samples = config['n_injection_samples'] arguments_generator = \ (generate_arguments(injection=True) for _ in iter(int, 1)) else: print('Generating samples *not* containing an injection...') n_samples = config['n_noise_samples'] arguments_generator = \ (generate_arguments(injection=False) for _ in iter(int, 1)) # --------------------------------------------------------------------- # If we do not need to generate any samples, skip ahead: # --------------------------------------------------------------------- if n_samples == 0: print('Done! (n_samples=0)\n') continue # --------------------------------------------------------------------- # Initialize queues for the simulation arguments and the results # --------------------------------------------------------------------- # Initialize a Queue and fill it with as many arguments as we # want to generate samples arguments_queue = Queue() for i in range(n_samples): arguments_queue.put(next(arguments_generator)) # Initialize a Queue and a list to store the generated samples results_queue = Queue() results_list = [] # --------------------------------------------------------------------- # Use process-based multiprocessing to generate samples in parallel # --------------------------------------------------------------------- # Use a tqdm context manager for the progress bar tqdm_args = dict(total=n_samples, ncols=80, unit='sample') with tqdm(**tqdm_args) as progressbar: # Keep track of all running processes list_of_processes = [] # While we haven't produced as many results as desired, keep going while len(results_list) < n_samples: # ------------------------------------------------------------- # Loop over processes to see if anything finished or got stuck # ------------------------------------------------------------- for process_dict in list_of_processes: # Get the process object and its current runtime process = process_dict['process'] runtime = time.time() - process_dict['start_time'] # Check if the process is still running when it should # have terminated already (according to max_runtime) if process.is_alive() and (runtime > max_runtime): # Kill process that's been running too long process.terminate() process.join() list_of_processes.remove(process_dict) # Add new arguments to queue to replace the failed ones new_arguments = next(arguments_generator) arguments_queue.put(new_arguments) # If process has terminated already elif not process.is_alive(): # If the process failed, add new arguments to queue if process.exitcode != 0: new_arguments = next(arguments_generator) arguments_queue.put(new_arguments) # Remove process from the list of running processes list_of_processes.remove(process_dict) # ------------------------------------------------------------- # Start new processes if necessary # ------------------------------------------------------------- # Start new processes until the arguments_queue is empty, or # we have reached the maximum number of processes while (arguments_queue.qsize() > 0 and len(list_of_processes) < config['n_processes']): # Get arguments from queue and start new process arguments = arguments_queue.get() p = Process(target=queue_worker, kwargs=dict(arguments=arguments, results_queue=results_queue)) # Remember this process and its starting time process_dict = dict(process=p, start_time=time.time()) list_of_processes.append(process_dict) # Finally, start the process p.start() # ------------------------------------------------------------- # Move results from results_queue to results_list # ------------------------------------------------------------- # Without this part, the results_queue blocks the worker # processes so that they won't terminate while results_queue.qsize() > 0: results_list.append(results_queue.get()) # Update the progress bar based on the number of results progressbar.update(len(results_list) - progressbar.n) # Sleep for some time before we check the processes again time.sleep(0.5) # --------------------------------------------------------------------- # Process results in the results_list # --------------------------------------------------------------------- # Separate the samples and the injection parameters samples[sample_type], injection_parameters[sample_type] = \ zip(*results_list) # Sort all results by the event_time idx = np.argsort([_['event_time'] for _ in list(samples[sample_type])]) samples[sample_type] = \ list([samples[sample_type][i] for i in idx]) injection_parameters[sample_type] = \ list([injection_parameters[sample_type][i] for i in idx]) print('Sample generation completed!\n') # ------------------------------------------------------------------------- # Compute the normalization parameters for this file # ------------------------------------------------------------------------- print('Computing normalization parameters for sample...', end=' ') # Gather all samples (with and without injection) in one list all_samples = list(samples['injection_samples'] + samples['noise_samples']) # Group all samples by detector h1_samples = [_['h1_strain'] for _ in all_samples] l1_samples = [_['l1_strain'] for _ in all_samples] # Stack recordings along first axis h1_samples = np.vstack(h1_samples) l1_samples = np.vstack(l1_samples) # Compute the mean and standard deviation for both detectors as the median # of the means / standard deviations for each sample. This is more robust # towards outliers than computing "global" parameters by concatenating all # samples and treating them as a single, long time series. normalization_parameters = \ dict(h1_mean=np.median(np.mean(h1_samples, axis=1), axis=0), l1_mean=np.median(np.mean(l1_samples, axis=1), axis=0), h1_std=np.median(np.std(h1_samples, axis=1), axis=0), l1_std=np.median(np.std(l1_samples, axis=1), axis=0)) print('Done!\n') # ------------------------------------------------------------------------- # Create a SampleFile dict from list of samples and save it as an HDF file # ------------------------------------------------------------------------- print('Saving the results to HDF file ...', end=' ') # Initialize the dictionary that we use to create a SampleFile object sample_file_dict = dict(command_line_arguments=command_line_arguments, injection_parameters=dict(), injection_samples=dict(), noise_samples=dict(), normalization_parameters=normalization_parameters, static_arguments=static_arguments) # Collect and add samples (with and without injection) for sample_type in ('injection_samples', 'noise_samples'): for key in ('event_time', 'h1_strain', 'l1_strain'): if samples[sample_type]: value = np.array([_[key] for _ in list(samples[sample_type])]) else: value = None sample_file_dict[sample_type][key] = value # Collect and add injection_parameters (ignore noise samples here, because # for those, the injection_parameters are always None) other_keys = ['h1_signal', 'h1_snr', 'l1_signal', 'l1_snr', 'scale_factor'] for key in list(variable_arguments + other_keys): if injection_parameters['injection_samples']: value = np.array([_[key] for _ in injection_parameters['injection_samples']]) else: value = None sample_file_dict['injection_parameters'][key] = value # Construct the path for the output HDF file if output_file_name=='default.hdf': output_file_name = config['output_file_name'] output_dir = os.path.join('.', 'output') if not os.path.exists(output_dir): os.mkdir(output_dir) sample_file_path = os.path.join(output_dir, output_file_name) # Create the SampleFile object and save it to the specified output file sample_file = SampleFile(data=sample_file_dict) sample_file.to_hdf(file_path=sample_file_path) print('Done!') # Get file size in MB and print the result sample_file_size = os.path.getsize(sample_file_path) / 1024**2 print('Size of resulting HDF file: {:.2f}MB'.format(sample_file_size)) print('') # ------------------------------------------------------------------------- # Postliminaries # ------------------------------------------------------------------------- # PyCBC always create a copy of the waveform parameters file, which we # can delete at the end of the sample generation process duplicate_path = os.path.join('.', config['waveform_params_file_name']) if os.path.exists(duplicate_path): os.remove(duplicate_path) # Print the total run time print('Total runtime: {:.1f} seconds!'.format(time.time() - script_start)) print('')