def one_det_main(input_file_name="default.hdf",
                 template_file_name="default_templates.hdf",
                 output_file_name="default_snrs.hdf",
                 snr_range="default"):

    # -----------------------------------------------------------------------------
    # Preliminaries
    # -----------------------------------------------------------------------------

    # Disable output buffering ('flush' option is not available for Python 2)
    #sys.stderr = os.fdopen(sys.stderr.fileno(), 'w', 0)

    # Start the stopwatch
    script_start = time.time()

    #multiprocessing.log_to_stderr()
    #logger = multiprocessing.get_logger()
    #logger.setLevel(logging.DEBUG)

    print('')
    print('GENERATE A GW SAMPLE SNR TIME-SERIES')
    print('')

    # -----------------------------------------------------------------------------
    # Parse the command line arguments
    # -----------------------------------------------------------------------------

    # Set up the parser and add arguments
    parser = argparse.ArgumentParser(description='Generate a GW data sample.')

    # Add arguments (and set default values where applicable)
    parser.add_argument('--config-file',
                        help='Name of the JSON configuration file which '
                        'controls the sample generation process.',
                        default='default.json')
    parser.add_argument('--filter-injection-samples',
                        help='Boolean expression for whether to'
                        'calculate SNRs of injection signals.'
                        'Default: True',
                        default=True)
    parser.add_argument('--filter-templates',
                        help='Boolean expression for whether to calculate'
                        'SNRs of all signals using a set of templates.'
                        'Default: True',
                        default=True)
    parser.add_argument('--trim-output',
                        help='Boolean expression for whether to trim the'
                        'SNR time series output.'
                        'Default: True',
                        default=True)

    # Parse the arguments that were passed when calling this script
    print('Parsing command line arguments...', end=' ')
    arguments = vars(parser.parse_args())
    print('Done!')

    # Set up shortcut for the command line arguments
    filter_injection_samples = bool(arguments['filter_injection_samples'])
    filter_templates = bool(arguments['filter_templates'])
    trim_output = bool(arguments['trim_output'])

    # -------------------------------------------------------------------------
    # Read in JSON config file specifying the sample generation process
    # -------------------------------------------------------------------------

    # Build the full path to the config file
    json_config_name = arguments['config_file']
    json_config_path = os.path.join('.', 'config_files', json_config_name)

    # Read the JSON configuration into a dict
    print('Reading and validating in JSON configuration file...', end=' ')
    config = read_json_config(json_config_path)
    print('Done!')

    # -------------------------------------------------------------------------
    # Read in INI config file specifying the static_args and variable_args
    # -------------------------------------------------------------------------

    # Build the full path to the waveform params file
    ini_config_name = config['waveform_params_file_name']
    ini_config_path = os.path.join('.', 'config_files', ini_config_name)

    # Read in the variable_arguments and static_arguments
    print('Reading and validating in INI configuration file...', end=' ')
    variable_arguments, static_arguments = read_ini_config(ini_config_path)
    print('Done!\n')

    # Check output file directory exists
    output_dir = os.path.join('.', 'output')
    if snr_range == "default":
        samples_output_dir = output_dir
    else:
        samples_output_dir = os.path.join(output_dir, snr_range)
    if not os.path.exists(samples_output_dir):
        print("Output folder cannot be found. Please create a folder",
              "named 'output' to store data in.")
        quit()

    # Get file names from config file
    if template_file_name == "default_templates.hdf":
        templates_file_path = os.path.join(output_dir,
                                           config['template_output_file_name'])
    else:
        templates_file_path = os.path.join(output_dir, template_file_name)

    if input_file_name == "default.hdf":
        input_file_path = os.path.join(samples_output_dir,
                                       config['output_file_name'])
    else:
        input_file_path = os.path.join(samples_output_dir, input_file_name)

    if output_file_name == "default_snrs.hdf":
        output_file_path = os.path.join(output_dir,
                                        config['snr_output_file_name'])
    else:
        output_file_path = os.path.join(samples_output_dir, output_file_name)

    # -------------------------------------------------------------------------
    # Read in the sample file
    # -------------------------------------------------------------------------

    print('Reading in samples HDF file...', end=' ')

    df = h5py.File(input_file_path, 'r')

    print('Done!')

    # -------------------------------------------------------------------------
    # Create dataframe column to store SNR time-series
    # -------------------------------------------------------------------------

    # Get approximant for generating matched filter templates from config files
    if static_arguments["approximant"] not in td_approximants():
        print("Invalid waveform approximant. Please put a valid time-series"
              "approximant in the waveform params file..")
        quit()
    apx = static_arguments["approximant"]

    sample_length = static_arguments[
        "seconds_before_event"] + static_arguments["seconds_after_event"]
    delta_f = 1.0 / sample_length

    # Get f-lower and delta-t from config files
    f_low = static_arguments["f_lower"]
    delta_t = 1.0 / static_arguments["target_sampling_rate"]

    # Initialise list of all parameters required for generating template waveforms
    param_dict = dict(injections=dict(mass1=[],
                                      mass2=[],
                                      spin1z=[],
                                      spin2z=[],
                                      ra=[],
                                      dec=[],
                                      coa_phase=[],
                                      inclination=[],
                                      polarization=[],
                                      injection_snr=[],
                                      f_lower=f_low,
                                      approximant=apx,
                                      delta_t=delta_t))

    # Store number of injection samples, should be identical for all detectors
    n_injection_samples = config['n_injection_samples']

    # Store number of noise samples, should be identical for all detectors
    n_noise_samples = config['n_noise_samples']

    # Store number of templates
    n_templates = config['n_template_samples']

    # Get trim cutoff
    trim_cutoff_low = config['snr_output_cutoff_low'] * static_arguments[
        "target_sampling_rate"]
    trim_cutoff_high = config['snr_output_cutoff_high'] * static_arguments[
        "target_sampling_rate"]
    trim_cutoff_variation = config[
        'snr_output_cutoff_variation'] * static_arguments[
            "target_sampling_rate"] / 2

    # Initialize arrays for random offset values
    inj_low, inj_high, noise_low, noise_high, temp_inj_low, temp_inj_high = (
        [] for i in range(6))

    # Generate random time shits and apply to start and end times for each type of sample
    for i in range(n_injection_samples):
        rand = random.randint(-trim_cutoff_variation, trim_cutoff_variation)
        rand_low = trim_cutoff_low + rand
        rand_high = trim_cutoff_high + rand
        inj_low.append(rand_low)
        inj_high.append(rand_high)
    for i in range(n_noise_samples * n_templates):
        rand = random.randint(-trim_cutoff_variation, trim_cutoff_variation)
        rand_low = trim_cutoff_low + rand
        rand_high = trim_cutoff_high + rand
        noise_low.append(rand_low)
        noise_high.append(rand_high)
    for i in range(n_injection_samples * n_templates):
        rand = random.randint(-trim_cutoff_variation, trim_cutoff_variation)
        rand_low = trim_cutoff_low + rand
        rand_high = trim_cutoff_high + rand
        temp_inj_low.append(rand_low)
        temp_inj_high.append(rand_high)

    # -------------------------------------------------------------------------
    # Compute SNR time-series
    # -------------------------------------------------------------------------

    # Generate optimal SNR time series
    if filter_injection_samples:

        print('Generating OMF SNR time-series for injection samples...')

        if n_injection_samples > 0:
            injections_build_files = InjectionsBuildFiles(
                output_file_path=output_file_path,
                param_dict=param_dict,
                df=df,
                n_samples=n_injection_samples,
                trim_output=trim_output,
                inj_low=inj_low,
                inj_high=inj_high)
            injections_build_files.run()

            print('Done!')
        else:
            print('Done! (n-samples = 0)\n')
    else:
        print('No SNR time-series generated for injections.'
              'Please set filter-injection-samples to True.')

    # Generate SNR time series with template bank, using injection and noise samples
    if filter_templates:

        print('Reading in the templates HDF file...', end=' ')

        templates_df = h5py.File(templates_file_path, 'r')

        print('Done!')

        print(
            "Generating SNR time-series for injection and noise samples using a template set..."
        )

        if n_templates == 0:
            print('Done! (n-templates = 0)'
                  'Please generate templates before running.\n')
        elif (n_noise_samples > 0) or (n_injection_samples > 0):
            filters_build_files = FiltersBuildFiles(
                output_file_path=output_file_path,
                df=df,
                templates_df=templates_df,
                n_noise_samples=n_noise_samples,
                n_injection_samples=n_injection_samples,
                n_templates=n_templates,
                f_low=f_low,
                delta_t=delta_t,
                filter_injection_samples=filter_injection_samples,
                delta_f=delta_f,
                trim_output=trim_output,
                inj_low=temp_inj_low,
                inj_high=temp_inj_high,
                noise_low=noise_low,
                noise_high=noise_high)
            filters_build_files.run()

            print('Done!')
        else:
            print('Done! (n-noise-samples = 0)\n')
    else:
        print('No SNR time-series generated for injections.'
              'Please set filter-templates to True.')

    # Get file size in MB and print the result
    sample_file_size = os.path.getsize(output_file_path) / 1024**2
    print('Size of resulting HDF file: {:.2f}MB'.format(sample_file_size))
    print('')

    # -------------------------------------------------------------------------
    # Postliminaries
    # -------------------------------------------------------------------------

    # Print the total run time
    print('Total runtime: {:.1f} seconds!'.format(time.time() - script_start))
    print('')
コード例 #2
0
    template_id = int(arguments['template_id'])
    seconds_before = float(arguments['seconds_before'])
    seconds_after = float(arguments['seconds_after'])
    plot_path = str(arguments['plot_path'])

    # -------------------------------------------------------------------------
    # Read in JSON config file specifying the sample generation process
    # -------------------------------------------------------------------------

    # Build the full path to the config file
    json_config_name = arguments['config_file']
    json_config_path = os.path.join('.', 'config_files', json_config_name)

    # Read the JSON configuration into a dict
    print('Reading and validating in JSON configuration file...', end=' ')
    config = read_json_config(json_config_path)
    print('Done!')

    # -------------------------------------------------------------------------
    # Read in INI config file specifying the static_args and variable_args
    # -------------------------------------------------------------------------

    # Build the full path to the waveform params file
    ini_config_name = config['waveform_params_file_name']
    ini_config_path = os.path.join('.', 'config_files', ini_config_name)

    # Read in the variable_arguments and static_arguments
    print('Reading and validating in INI configuration file...', end=' ')
    variable_arguments, static_arguments = read_ini_config(ini_config_path)
    print('Done!\n')
コード例 #3
0
def two_det_main(random_seed=-1,noise_random_seed=-1,output_file_name='default.hdf'):

    # -------------------------------------------------------------------------
    # Preliminaries
    # -------------------------------------------------------------------------

    # Disable output buffering ('flush' option is not available for Python 2)
    #sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
    sys.stdout = Unbuffered(sys.stdout)

    # Start the stopwatch
    script_start = time.time()

    print('')
    print('GENERATE A GW DATA SAMPLE FILE')
    print('')
    
    # -------------------------------------------------------------------------
    # Parse the command line arguments
    # -------------------------------------------------------------------------

    # Set up the parser and add arguments
    parser = argparse.ArgumentParser(description='Generate a GW data sample.')
    parser.add_argument('--config-file',
                        help='Name of the JSON configuration file which '
                             'controls the sample generation process.',
                        default='default.json')

    # Parse the arguments that were passed when calling this script
    print('Parsing command line arguments...', end=' ')
    command_line_arguments = vars(parser.parse_args())
    print('Done!')

    # -------------------------------------------------------------------------
    # Read in JSON config file specifying the sample generation process
    # -------------------------------------------------------------------------

    # Build the full path to the config file
    json_config_name = command_line_arguments['config_file']
    json_config_path = os.path.join('.', 'config_files', json_config_name)
    
    # Read the JSON configuration into a dict
    print('Reading and validating in JSON configuration file...', end=' ')
    config = read_json_config(json_config_path)
    print('Done!')

    # -------------------------------------------------------------------------
    # Read in INI config file specifying the static_args and variable_args
    # -------------------------------------------------------------------------

    # Build the full path to the waveform params file
    ini_config_name = config['waveform_params_file_name']
    ini_config_path = os.path.join('.', 'config_files', ini_config_name)

    # Read in the variable_arguments and static_arguments
    print('Reading and validating in INI configuration file...', end=' ')
    variable_arguments, static_arguments = read_ini_config(ini_config_path)
    print('Done!\n')

    # -------------------------------------------------------------------------
    # Shortcuts and random seed
    # -------------------------------------------------------------------------

    # Set the random seed for this script
    if random_seed==-1:
        np.random.seed(config['random_seed'])
        random_seed = config['random_seed']
    else:
        np.random.seed(random_seed)

    if noise_random_seed==-1:
        noise_random_seed = config['noise_random_seed']

    # Define some useful shortcuts
    max_runtime = config['max_runtime']
    bkg_data_dir = config['background_data_directory']

    # -------------------------------------------------------------------------
    # Construct a generator for sampling waveform parameters
    # -------------------------------------------------------------------------

    # Initialize a waveform parameter generator that can sample injection
    # parameters from the distributions specified in the config file
    waveform_parameter_generator = \
        WaveformParameterGenerator(config_file=ini_config_path,
                                   random_seed=random_seed)

    # Wrap it in a generator expression so that we can we can easily sample
    # from it by calling next(waveform_parameters)
    waveform_parameters = \
        (waveform_parameter_generator.draw() for _ in iter(int, 1))

    # -------------------------------------------------------------------------
    # Construct a generator for sampling valid noise times
    # -------------------------------------------------------------------------

    # If the 'background_data_directory' is None, we will use synthetic noise
    if config['background_data_directory'] is None:

        print('Using synthetic noise! (background_data_directory = None)\n')

        # Create a iterator that returns a fake "event time", which we will
        # use as a seed for the RNG to ensure the reproducibility of the
        # generated synthetic noise.
        # For the HDF file path that contains that time, we always yield
        # None, so that we know that we need to generate synthetic noise.
        noise_times = ((1000000000 + _, None) for _ in count())

    # Otherwise, we set up a timeline object for the background noise, that
    # is, we read in all HDF files in the raw_data_directory and figure out
    # which parts of it are useable (i.e., have the right data quality and
    # injection bits set as specified in the config file).
    else:

        print('Using real noise from LIGO recordings! '
              '(background_data_directory = {})'.format(bkg_data_dir))
        print('Reading in raw data. This may take several minutes...', end=' ')

        # Create a timeline object by running over all HDF files once
        noise_timeline = NoiseTimeline(background_data_directory=bkg_data_dir,
                                       random_seed=random_seed)

        # Create a noise time generator so that can sample valid noise times
        # simply by calling next(noise_time_generator)
        delta_t = int(static_arguments['noise_interval_width'] / 2)
        noise_times = (noise_timeline.sample(delta_t=delta_t,
                                             dq_bits=config['dq_bits'],
                                             inj_bits=config['inj_bits'],
                                             return_paths=True)
                       for _ in iter(int, 1))
        
        print('Done!\n')

    # -------------------------------------------------------------------------
    # Define a convenience function to generate arguments for the simulation
    # -------------------------------------------------------------------------

    # Prevent waveform parameter variable from generating new parameter values
    # for every generated sample (ie. here we set the parameters for the whole file)
    sample_params = next(waveform_parameters)

    def generate_arguments(injection=True):

        # Only sample waveform parameters if we are making an injection
        waveform_params = sample_params if injection else None

        # Return all necessary arguments as a dictionary
        return dict(static_arguments=static_arguments,
                    event_tuple=next(noise_times),
                    waveform_params=waveform_params,
                    noise_random_seed=noise_random_seed)

    # -------------------------------------------------------------------------
    # Finally: Create our samples!
    # -------------------------------------------------------------------------

    # Keep track of all the samples (and parameters) we have generated
    samples = dict(injection_samples=[], noise_samples=[])
    injection_parameters = dict(injection_samples=[], noise_samples=[])

    # The procedure for generating samples with and without injections is
    # mostly the same; the only real difference is which arguments_generator
    # we have have to use:
    for sample_type in ('injection_samples', 'noise_samples'):
    
        # ---------------------------------------------------------------------
        # Define some sample_type-specific shortcuts
        # ---------------------------------------------------------------------
        
        if sample_type == 'injection_samples':
            print('Generating samples containing an injection...')
            n_samples = config['n_injection_samples']
            arguments_generator = \
                (generate_arguments(injection=True) for _ in iter(int, 1))
            
        else:
            print('Generating samples *not* containing an injection...')
            n_samples = config['n_noise_samples']
            arguments_generator = \
                (generate_arguments(injection=False) for _ in iter(int, 1))

        # ---------------------------------------------------------------------
        # If we do not need to generate any samples, skip ahead:
        # ---------------------------------------------------------------------

        if n_samples == 0:
            print('Done! (n_samples=0)\n')
            continue

        # ---------------------------------------------------------------------
        # Initialize queues for the simulation arguments and the results
        # ---------------------------------------------------------------------

        # Initialize a Queue and fill it with as many arguments as we
        # want to generate samples
        arguments_queue = Queue()
        for i in range(n_samples):
            arguments_queue.put(next(arguments_generator))

        # Initialize a Queue and a list to store the generated samples
        results_queue = Queue()
        results_list = []

        # ---------------------------------------------------------------------
        # Use process-based multiprocessing to generate samples in parallel
        # ---------------------------------------------------------------------

        # Use a tqdm context manager for the progress bar
        tqdm_args = dict(total=n_samples, ncols=80, unit='sample')
        with tqdm(**tqdm_args) as progressbar:

            # Keep track of all running processes
            list_of_processes = []

            # While we haven't produced as many results as desired, keep going
            while len(results_list) < n_samples:
    
                # -------------------------------------------------------------
                # Loop over processes to see if anything finished or got stuck
                # -------------------------------------------------------------
                
                for process_dict in list_of_processes:
        
                    # Get the process object and its current runtime
                    process = process_dict['process']
                    runtime = time.time() - process_dict['start_time']
        
                    # Check if the process is still running when it should
                    # have terminated already (according to max_runtime)
                    if process.is_alive() and (runtime > max_runtime):
            
                        # Kill process that's been running too long
                        process.terminate()
                        process.join()
                        list_of_processes.remove(process_dict)
            
                        # Add new arguments to queue to replace the failed ones
                        new_arguments = next(arguments_generator)
                        arguments_queue.put(new_arguments)
        
                    # If process has terminated already
                    elif not process.is_alive():
            
                        # If the process failed, add new arguments to queue
                        if process.exitcode != 0:
                            new_arguments = next(arguments_generator)
                            arguments_queue.put(new_arguments)
            
                        # Remove process from the list of running processes
                        list_of_processes.remove(process_dict)

                # -------------------------------------------------------------
                # Start new processes if necessary
                # -------------------------------------------------------------
    
                # Start new processes until the arguments_queue is empty, or
                # we have reached the maximum number of processes
                while (arguments_queue.qsize() > 0 and
                       len(list_of_processes) < config['n_processes']):
                    
                    # Get arguments from queue and start new process
                    arguments = arguments_queue.get()
                    p = Process(target=queue_worker,
                                kwargs=dict(arguments=arguments,
                                            results_queue=results_queue))
        
                    # Remember this process and its starting time
                    process_dict = dict(process=p, start_time=time.time())
                    list_of_processes.append(process_dict)
                    
                    # Finally, start the process
                    p.start()

                # -------------------------------------------------------------
                # Move results from results_queue to results_list
                # -------------------------------------------------------------

                # Without this part, the results_queue blocks the worker
                # processes so that they won't terminate
                while results_queue.qsize() > 0:
                    results_list.append(results_queue.get())

                # Update the progress bar based on the number of results
                progressbar.update(len(results_list) - progressbar.n)

                # Sleep for some time before we check the processes again
                time.sleep(0.5)
            
        # ---------------------------------------------------------------------
        # Process results in the results_list
        # ---------------------------------------------------------------------

        # Separate the samples and the injection parameters
        samples[sample_type], injection_parameters[sample_type] = \
            zip(*results_list)

        # Sort all results by the event_time
        idx = np.argsort([_['event_time'] for _ in list(samples[sample_type])])
        samples[sample_type] = \
            list([samples[sample_type][i] for i in idx])
        injection_parameters[sample_type] = \
            list([injection_parameters[sample_type][i] for i in idx])

        print('Sample generation completed!\n')

    # -------------------------------------------------------------------------
    # Compute the normalization parameters for this file
    # -------------------------------------------------------------------------

    print('Computing normalization parameters for sample...', end=' ')

    # Gather all samples (with and without injection) in one list
    all_samples = list(samples['injection_samples'] + samples['noise_samples'])

    # Group all samples by detector
    h1_samples = [_['h1_strain'] for _ in all_samples]
    l1_samples = [_['l1_strain'] for _ in all_samples]

    # Stack recordings along first axis
    h1_samples = np.vstack(h1_samples)
    l1_samples = np.vstack(l1_samples)


    # Compute the mean and standard deviation for both detectors as the median
    # of the means / standard deviations for each sample. This is more robust
    # towards outliers than computing "global" parameters by concatenating all
    # samples and treating them as a single, long time series.
    normalization_parameters = \
        dict(h1_mean=np.median(np.mean(h1_samples, axis=1), axis=0),
             l1_mean=np.median(np.mean(l1_samples, axis=1), axis=0),
             h1_std=np.median(np.std(h1_samples, axis=1), axis=0),
             l1_std=np.median(np.std(l1_samples, axis=1), axis=0))
    
    print('Done!\n')

    # -------------------------------------------------------------------------
    # Create a SampleFile dict from list of samples and save it as an HDF file
    # -------------------------------------------------------------------------

    print('Saving the results to HDF file ...', end=' ')

    # Initialize the dictionary that we use to create a SampleFile object
    sample_file_dict = dict(command_line_arguments=command_line_arguments,
                            injection_parameters=dict(),
                            injection_samples=dict(),
                            noise_samples=dict(),
                            normalization_parameters=normalization_parameters,
                            static_arguments=static_arguments)

    # Collect and add samples (with and without injection)
    for sample_type in ('injection_samples', 'noise_samples'):
        for key in ('event_time', 'h1_strain', 'l1_strain'):
            if samples[sample_type]:
                value = np.array([_[key] for _ in list(samples[sample_type])])
            else:
                value = None
            sample_file_dict[sample_type][key] = value

    # Collect and add injection_parameters (ignore noise samples here, because
    # for those, the injection_parameters are always None)
    other_keys = ['h1_signal', 'h1_snr', 'l1_signal', 'l1_snr', 'scale_factor']
    for key in list(variable_arguments + other_keys):
        if injection_parameters['injection_samples']:
            value = np.array([_[key] for _ in
                              injection_parameters['injection_samples']])
        else:
            value = None
        sample_file_dict['injection_parameters'][key] = value

    # Construct the path for the output HDF file
    if output_file_name=='default.hdf':
        output_file_name = config['output_file_name']
    output_dir = os.path.join('.', 'output')
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    sample_file_path = os.path.join(output_dir, output_file_name)

    # Create the SampleFile object and save it to the specified output file
    sample_file = SampleFile(data=sample_file_dict)
    sample_file.to_hdf(file_path=sample_file_path)

    print('Done!')

    # Get file size in MB and print the result
    sample_file_size = os.path.getsize(sample_file_path) / 1024**2
    print('Size of resulting HDF file: {:.2f}MB'.format(sample_file_size))
    print('')

    # -------------------------------------------------------------------------
    # Postliminaries
    # -------------------------------------------------------------------------

    # PyCBC always create a copy of the waveform parameters file, which we
    # can delete at the end of the sample generation process
    duplicate_path = os.path.join('.', config['waveform_params_file_name'])
    if os.path.exists(duplicate_path):
        os.remove(duplicate_path)

    # Print the total run time
    print('Total runtime: {:.1f} seconds!'.format(time.time() - script_start))
    print('')