Esempio n. 1
0
    def generate_mg_process(mg_directory,
                            temp_directory,
                            proc_card_file,
                            mg_process_directory,
                            initial_command=None,
                            log_file=None):
        """
        Lets MadGraph create the process folder.

        :param mg_directory: MadGraph 5 directory.
        :param temp_directory: A temporary directory.
        :param proc_card_file: Path to the process card that tells MadGraph how to generate the process.
        :param mg_process_directory: Path to the MG process directory.
        :param initial_command: Initial shell commands that have to be executed before MG is run (e.g. loading a virtual
                                environment).
        :param log_file: Path to a log file in which the MadGraph output is saved.
        """

        logging.info('Generating MadGraph process folder from %s at %s',
                     proc_card_file, mg_process_directory)

        create_missing_folders(
            [temp_directory, mg_process_directory,
             os.path.dirname(log_file)])

        generate_mg_process(mg_directory,
                            temp_directory,
                            proc_card_file,
                            mg_process_directory,
                            initial_command=initial_command,
                            log_file=log_file)
Esempio n. 2
0
    def extract_samples_test(self,
                             theta,
                             n_samples,
                             folder,
                             filename,
                             test_split=0.3):
        """
        Extracts evaluation samples x ~ p(x|theta).

        :param theta: tuple (type, value) that defines the parameter point or prior over parameter points used for the
                      sampling. Use the helper functions constant_benchmark_theta(), multiple_benchmark_thetas(),
                      constant_morphing_theta(), multiple_morphing_thetas(), or random_morphing_thetas().
        :param n_samples: Total number of samples to be drawn.
        :param folder: Folder for the resulting samples.
        :param filename: Label for the filenames. The actual filenames will add a prefix such as 'x_', and the extension
                         '.npy'.
        :param test_split: Fraction of events reserved for this evaluation sample (will not be used for any training
                           samples).
        """

        logging.info('Extracting evaluation sample. Sampling according to %s',
                     theta)

        create_missing_folders([folder])

        # Thetas
        theta_types, theta_values, n_samples_per_theta = parse_theta(
            theta, n_samples)

        # Train / test split
        if test_split is None or test_split <= 0. or test_split >= 1.:
            first_test_index = 0
        else:
            first_test_index = int(round(
                (1. - test_split) * self.n_samples, 0)) + 1

            if first_test_index < 0 or first_test_index > self.n_samples:
                raise ValueError(
                    "Irregular in train / test split: sample {} / {}",
                    first_test_index, self.n_samples)

        # Extract information
        x, _, (theta, ) = self.extract_sample(
            theta_sets_types=[theta_types],
            theta_sets_values=[theta_values],
            n_samples_per_theta=n_samples_per_theta,
            start_event=first_test_index,
            end_event=None)

        # Save data
        np.save(folder + '/theta_' + filename + '.npy', theta)
        np.save(folder + '/x_' + filename + '.npy', x)

        return x, theta
Esempio n. 3
0
    def save(self, filename):

        create_missing_folders([os.path.dirname(filename)])

        if self.morpher is not None:
            logging.info('Saving setup (including morphing) to %s', filename)

            save_madminer_settings(
                filename=filename,
                parameters=self.parameters,
                benchmarks=self.benchmarks,
                morphing_components=self.morpher.components,
                morphing_matrix=self.morpher.morphing_matrix)
        else:
            logging.info('Saving setup (without morphing) to %s', filename)

            save_madminer_settings(filename=filename,
                                   parameters=self.parameters,
                                   benchmarks=self.benchmarks)
Esempio n. 4
0
    def run_mg_pythia(mg_directory,
                      mg_process_directory,
                      temp_directory,
                      run_card_file=None,
                      param_card_file=None,
                      reweight_card_file=None,
                      pythia8_card_file=None,
                      initial_command=None,
                      log_file=None):
        """
        Runs the event generation with MadGraph and Pythia.

        :param mg_directory: Path to the MadGraph 5 base directory.
        :param mg_process_directory: Path to the MG process directory.
        :param temp_directory: Path to a temporary directory.
        :param run_card_file: Path to the MadGraph run card. If None, the card present in the process folder is used.
        :param param_card_file: Path to the MadGraph run card. If None, the card present in the process folder is used.
        :param reweight_card_file: Path to the MadGraph reweight card. If None, the card present in the process folder
                                   is used.
        :param pythia8_card_file: Path to the MadGraph Pythia8 card. If None, the card present in the process folder
                                  is used.
        :param initial_command: Initial shell commands that have to be executed before MG is run (e.g. loading a virtual
                                environment).
        :param log_file: Path to a log file in which the MadGraph output is saved.
        """

        logging.info('Starting MadGraph and Pythia in %s',
                     mg_process_directory)

        create_missing_folders(
            [temp_directory, mg_process_directory,
             os.path.dirname(log_file)])

        run_mg_pythia(mg_directory,
                      mg_process_directory,
                      temp_directory,
                      run_card_file,
                      param_card_file,
                      reweight_card_file,
                      pythia8_card_file,
                      initial_command=initial_command,
                      log_file=log_file)
Esempio n. 5
0
    def extract_samples_train_more_ratios(self,
                                          theta0,
                                          theta1,
                                          n_samples,
                                          folder,
                                          filename,
                                          additional_thetas=None,
                                          test_split=0.3):
        """
        Extracts training samples x ~ p(x|theta0) and x ~ p(x|theta1) together with the class label y, the joint
        likelihood ratio r(x,z|theta0, theta1), and the joint scores t(x,z|theta0) as well as t(x,z|theta1) for methods
        such as CARL, ROLR, CASCAL, and RASCAL.

        :param theta0: tuple (type, value) that defines the numerator parameter point or prior over parameter points.
                       Use the helper functions constant_benchmark_theta(), multiple_benchmark_thetas(),
                       constant_morphing_theta(), multiple_morphing_thetas(), or random_morphing_thetas().
        :param theta1: tuple (type, value) that defines the numerator parameter point or prior over parameter points.
                       Use the helper functions constant_benchmark_theta(), multiple_benchmark_thetas(),
                       constant_morphing_theta(), multiple_morphing_thetas(), or random_morphing_thetas().
        :param n_samples: Total number of samples to be drawn.
        :param folder: Folder for the resulting samples.
        :param filename: Label for the filenames. The actual filenames will add a prefix such as 'x_', and the extension
                         '.npy'.
        :param additional_thetas: list of tuples (type, value) that defines additional theta points at which ratio and
                                  score are evaluated, and which are then used to create additional training data
                                  points. Use the helper functions constant_benchmark_theta(),
                                  multiple_benchmark_thetas(), constant_morphing_theta(), multiple_morphing_thetas(), or
                                  random_morphing_thetas().
        :param test_split: Fraction of events reserved for the test sample (will not be used for any training samples).
        """

        logging.info(
            'Extracting training sample for ratio-based methods. Numerator hypothesis: %s, denominator '
            'hypothesis: %s', theta0, theta1)

        if self.morpher is None:
            raise RuntimeError(
                'No morphing setup loaded. Cannot calculate score.')

        create_missing_folders([folder])

        if additional_thetas is None:
            additional_thetas = []
        n_additional_thetas = len(additional_thetas)

        # Augmented data (gold)
        augmented_data_definitions_0 = [('ratio', 0, 1), ('score', 0),
                                        ('score', 1)]
        augmented_data_definitions_1 = [('ratio', 0, 1), ('score', 0),
                                        ('score', 1)]
        for i in range(n_additional_thetas):
            augmented_data_definitions_0.append(('ratio', 0, i + 2))
            augmented_data_definitions_0.append(('score', i + 2))
            augmented_data_definitions_1.append(('ratio', i + 2, 1))
            augmented_data_definitions_1.append(('score', i + 2))

        # Train / test split
        if test_split is None or test_split <= 0. or test_split >= 1.:
            last_train_index = None
        else:
            last_train_index = int(round((1. - test_split) * self.n_samples,
                                         0))

            if last_train_index < 0 or last_train_index > self.n_samples:
                raise ValueError(
                    "Irregular train / test split: sample {} / {}",
                    last_train_index, self.n_samples)

        # Parse thetas for theta0 sampling
        theta_types = []
        theta_values = []
        n_samples_per_theta = 1000000

        theta0_types, theta0_values, this_n_samples = parse_theta(
            theta0, n_samples // 2)
        theta_types.append(theta0_types)
        theta_values.append(theta0_values)
        n_samples_per_theta = min(this_n_samples, n_samples_per_theta)

        theta1_types, theta1_values, this_n_samples = parse_theta(
            theta1, n_samples // 2)
        theta_types.append(theta1_types)
        theta_values.append(theta1_values)
        n_samples_per_theta = min(this_n_samples, n_samples_per_theta)

        for additional_theta in additional_thetas:
            additional_theta_types, additional_theta_values, this_n_samples = parse_theta(
                additional_theta, n_samples // 2)
            theta_types.append(additional_theta_types)
            theta_values.append(additional_theta_values)
            n_samples_per_theta = min(this_n_samples, n_samples_per_theta)

        # Start for theta0
        x_0, augmented_data_0, thetas_0 = self.extract_sample(
            theta_sets_types=theta_types,
            theta_sets_values=theta_values,
            n_samples_per_theta=n_samples_per_theta,
            augmented_data_definitions=augmented_data_definitions_0,
            sampling_theta_index=0,
            start_event=0,
            end_event=last_train_index)
        n_actual_samples = x_0.shape[0]

        # Analyse theta values from theta0 run
        theta0_0 = thetas_0[0]
        theta1_0 = thetas_0[1]
        thetas_eval = thetas_0[2:]

        # Analyse augmented data from theta0 run
        r_xz_0 = augmented_data_0[0]
        t_xz0_0 = augmented_data_0[1]
        t_xz1_0 = augmented_data_0[2]

        r_xz_eval = []
        t_xz_eval = []
        for i, theta_eval in enumerate(thetas_eval):
            r_xz_eval.append(augmented_data_0[3 + i * 2])
            t_xz_eval.append(augmented_data_0[4 + i * 2])

        x_0 = np.vstack([x_0 for _ in range(1 + n_additional_thetas)])
        r_xz_0 = np.vstack([r_xz_0] + r_xz_eval)
        t_xz0_0 = np.vstack([t_xz0_0 for _ in range(1 + n_additional_thetas)])
        t_xz1_0 = np.vstack([t_xz1_0] + t_xz_eval)
        theta0_0 = np.vstack(
            [theta0_0 for _ in range(1 + n_additional_thetas)])
        theta1_0 = np.vstack([theta1_0] + thetas_eval)

        # Parse thetas for theta1 sampling
        theta_types = []
        theta_values = []
        n_samples_per_theta = 1000000

        theta0_types, theta0_values, this_n_samples = parse_theta(
            theta0, n_samples // 2)
        theta_types.append(theta0_types)
        theta_values.append(theta0_values)
        n_samples_per_theta = min(this_n_samples, n_samples_per_theta)

        theta1_types, theta1_values, this_n_samples = parse_theta(
            theta1, n_samples // 2)
        theta_types.append(theta1_types)
        theta_values.append(theta1_values)
        n_samples_per_theta = min(this_n_samples, n_samples_per_theta)

        for additional_theta in additional_thetas:
            additional_theta_types, additional_theta_values, this_n_samples = parse_theta(
                additional_theta, n_samples // 2)
            theta_types.append(additional_theta_types)
            theta_values.append(additional_theta_values)
            n_samples_per_theta = min(this_n_samples, n_samples_per_theta)

        # Start for theta1
        x_1, augmented_data_1, thetas_1 = self.extract_sample(
            theta_sets_types=theta_types,
            theta_sets_values=theta_values,
            n_samples_per_theta=n_samples_per_theta,
            augmented_data_definitions=augmented_data_definitions_1,
            sampling_theta_index=1,
            start_event=0,
            end_event=last_train_index)
        n_actual_samples += x_1.shape[0]

        # Analyse theta values from theta1 run
        theta0_1 = thetas_1[0]
        theta1_1 = thetas_1[1]
        thetas_eval = thetas_1[2:]

        # Analyse augmented data from theta1 run
        r_xz_1 = augmented_data_1[0]
        t_xz0_1 = augmented_data_1[1]
        t_xz1_1 = augmented_data_1[2]

        r_xz_eval = []
        t_xz_eval = []
        for i, theta_eval in enumerate(thetas_eval):
            r_xz_eval.append(augmented_data_1[3 + i * 2])
            t_xz_eval.append(augmented_data_1[4 + i * 2])

        x_1 = np.vstack([x_1 for _ in range(1 + n_additional_thetas)])
        r_xz_1 = np.vstack([r_xz_1] + r_xz_eval)
        t_xz0_1 = np.vstack([t_xz0_1] + t_xz_eval)
        t_xz1_1 = np.vstack([t_xz1_1 for _ in range(1 + n_additional_thetas)])
        theta0_1 = np.vstack([theta0_1] + thetas_eval)
        theta1_1 = np.vstack(
            [theta1_1 for _ in range(1 + n_additional_thetas)])

        # Combine
        x = np.vstack([x_0, x_1])
        r_xz = np.vstack([r_xz_0, r_xz_1])
        t_xz0 = np.vstack([t_xz0_0, t_xz0_1])
        t_xz1 = np.vstack([t_xz1_0, t_xz1_1])
        theta0 = np.vstack([theta0_0, theta0_1])
        theta1 = np.vstack([theta1_0, theta1_1])
        y = np.zeros(x.shape[0])
        y[x_0.shape[0]:] = 1.

        if n_additional_thetas > 0:
            logging.info(
                'Oversampling: created %s training samples from %s original unweighted events',
                x.shape[0], n_actual_samples)

        # Shuffle
        x, r_xz, t_xz0, t_xz1, theta0, theta1, y = shuffle(
            x, r_xz, t_xz0, t_xz1, theta0, theta1, y)

        # y shape
        y = y.reshape((-1, 1))

        # Save data
        np.save(folder + '/theta0_' + filename + '.npy', theta0)
        np.save(folder + '/theta1_' + filename + '.npy', theta1)
        np.save(folder + '/x_' + filename + '.npy', x)
        np.save(folder + '/y_' + filename + '.npy', y)
        np.save(folder + '/r_xz_' + filename + '.npy', r_xz)
        np.save(folder + '/t_xz0_' + filename + '.npy', t_xz0)
        np.save(folder + '/t_xz1_' + filename + '.npy', t_xz1)

        return x, theta0, theta1, y, r_xz, t_xz0, t_xz1
Esempio n. 6
0
    def extract_samples_train_ratio(self,
                                    theta0,
                                    theta1,
                                    n_samples,
                                    folder,
                                    filename,
                                    test_split=0.3):
        """
        Extracts training samples x ~ p(x|theta0) and x ~ p(x|theta1) together with the class label y, the joint
        likelihood ratio r(x,z|theta0, theta1), and the joint score t(x,z|theta0) for methods such as CARL, ROLR,
        CASCAL, and RASCAL.

        :param theta0: tuple (type, value) that defines the numerator parameter point or prior over parameter points.
                       Use the helper functions constant_benchmark_theta(), multiple_benchmark_thetas(),
                       constant_morphing_theta(), multiple_morphing_thetas(), or random_morphing_thetas().
        :param theta1: tuple (type, value) that defines the numerator parameter point or prior over parameter points.
                       Use the helper functions constant_benchmark_theta(), multiple_benchmark_thetas(),
                       constant_morphing_theta(), multiple_morphing_thetas(), or random_morphing_thetas().
        :param n_samples: Total number of samples to be drawn.
        :param folder: Folder for the resulting samples.
        :param filename: Label for the filenames. The actual filenames will add a prefix such as 'x_', and the extension
                         '.npy'.
        :param test_split: Fraction of events reserved for the test sample (will not be used for any training samples).
        """

        logging.info(
            'Extracting training sample for ratio-based methods. Numerator hypothesis: %s, denominator '
            'hypothesis: %s', theta0, theta1)

        if self.morpher is None:
            raise RuntimeError(
                'No morphing setup loaded. Cannot calculate score.')

        create_missing_folders([folder])

        # Augmented data (gold)
        augmented_data_definitions = [('ratio', 0, 1), ('score', 0)]

        # Train / test split
        if test_split is None or test_split <= 0. or test_split >= 1.:
            last_train_index = None
        else:
            last_train_index = int(round((1. - test_split) * self.n_samples,
                                         0))

            if last_train_index < 0 or last_train_index > self.n_samples:
                raise ValueError(
                    "Irregular train / test split: sample {} / {}",
                    last_train_index, self.n_samples)

        # Thetas for theta0 sampling
        theta0_types, theta0_values, n_samples_per_theta0 = parse_theta(
            theta0, n_samples // 2)
        theta1_types, theta1_values, n_samples_per_theta1 = parse_theta(
            theta1, n_samples // 2)

        n_samples_per_theta = min(n_samples_per_theta0, n_samples_per_theta1)

        # Start for theta0
        x0, (r_xz0, t_xz0), (theta0_0, theta1_0) = self.extract_sample(
            theta_sets_types=[theta0_types, theta1_types],
            theta_sets_values=[theta0_values, theta1_values],
            sampling_theta_index=0,
            n_samples_per_theta=n_samples_per_theta,
            augmented_data_definitions=augmented_data_definitions,
            start_event=0,
            end_event=last_train_index)

        # Thetas for theta1 sampling (could be different if num or denom are random)
        theta0_types, theta0_values, n_samples_per_theta0 = parse_theta(
            theta0, n_samples // 2)
        theta1_types, theta1_values, n_samples_per_theta1 = parse_theta(
            theta1, n_samples // 2)

        n_samples_per_theta = min(n_samples_per_theta0, n_samples_per_theta1)

        # Start for theta1
        x1, (r_xz1, t_xz1), (theta0_1, theta1_1) = self.extract_sample(
            theta_sets_types=[theta0_types, theta1_types],
            theta_sets_values=[theta0_values, theta1_values],
            sampling_theta_index=1,
            n_samples_per_theta=n_samples_per_theta,
            augmented_data_definitions=augmented_data_definitions,
            start_event=0,
            end_event=last_train_index)

        # Combine
        x = np.vstack([x0, x1])
        r_xz = np.vstack([r_xz0, r_xz1])
        t_xz = np.vstack([t_xz0, t_xz1])
        theta0 = np.vstack([theta0_0, theta0_1])
        theta1 = np.vstack([theta1_0, theta1_1])
        y = np.zeros(x.shape[0])
        y[x0.shape[0]:] = 1.

        # Shuffle
        x, r_xz, t_xz, theta0, theta1, y = shuffle(x, r_xz, t_xz, theta0,
                                                   theta1, y)

        # y shape
        y = y.reshape((-1, 1))

        # Save data
        np.save(folder + '/theta0_' + filename + '.npy', theta0)
        np.save(folder + '/theta1_' + filename + '.npy', theta1)
        np.save(folder + '/x_' + filename + '.npy', x)
        np.save(folder + '/y_' + filename + '.npy', y)
        np.save(folder + '/r_xz_' + filename + '.npy', r_xz)
        np.save(folder + '/t_xz_' + filename + '.npy', t_xz)

        return x, theta0, theta1, y, r_xz, t_xz
Esempio n. 7
0
    def extract_samples_train_local(self,
                                    theta,
                                    n_samples,
                                    folder,
                                    filename,
                                    test_split=0.3):
        """
        Extracts training samples x ~ p(x|theta) as well as the joint score t(x, z|theta) for SALLY and SALLINO.

        :param theta: tuple (type, value) that defines the parameter point or prior over parameter points for the
                      sampling. This is also where the score is evaluated. Use the helper functions, in particular
                      constant_benchmark_theta() and constant_morphing_theta().
        :param n_samples: Total number of samples to be drawn.
        :param folder: Folder for the resulting samples.
        :param filename: Label for the filenames. The actual filenames will add a prefix such as 'x_', and the extension
                         '.npy'.
        :param test_split: Fraction of events reserved for the test sample (will not be used for any training samples).
        """

        logging.info(
            'Extracting training sample for local score regression. Sampling and score evaluation according to'
            ' %s', theta)

        create_missing_folders([folder])

        if self.morpher is None:
            raise RuntimeError(
                'No morphing setup loaded. Cannot calculate score.')

        # Thetas
        theta_types, theta_values, n_samples_per_theta = parse_theta(
            theta, n_samples)

        # Augmented data (gold)
        augmented_data_definitions = [('score', 0)]

        # Train / test split
        if test_split is None or test_split <= 0. or test_split >= 1.:
            last_train_index = None
        else:
            last_train_index = int(round((1. - test_split) * self.n_samples,
                                         0))

            if last_train_index < 0 or last_train_index > self.n_samples:
                raise ValueError(
                    "Irregular train / test split: sample {} / {}",
                    last_train_index, self.n_samples)

        # Start
        x, (t_xz, ), (theta, ) = self.extract_sample(
            theta_sets_types=[theta_types],
            theta_sets_values=[theta_values],
            n_samples_per_theta=n_samples_per_theta,
            augmented_data_definitions=augmented_data_definitions,
            start_event=0,
            end_event=last_train_index)

        # Save data
        np.save(folder + '/theta_' + filename + '.npy', theta)
        np.save(folder + '/x_' + filename + '.npy', x)
        np.save(folder + '/t_xz_' + filename + '.npy', t_xz)

        return x, theta, t_xz