예제 #1
0
    def test_higher_dimensional_search_vwafr_with_third_param(self):
        """Integration test with the V-WAfr probe set data.

        This adds a third parameter to the data, which dominates the
        probe set count. The counts are set so that they are minimized
        when the third parameter is equal to 20. The loss function
        will still try to push this parameter down, but if it has
        a big enough impact on the barrier when it deviates from 25
        (i.e., by exceeding the max probe count) then the minimizer
        should pick a value of roughly 20.
        """
        param_names = ('mismatches', 'cover_extension', 'p3')

        pc = {}
        for dataset in self.probe_counts_vwafr.keys():
            pc[dataset] = {}
            for param_vals in self.probe_counts_vwafr[dataset]:
                count = self.probe_counts_vwafr[dataset][param_vals]
                for k in [0, 10, 20, 30, 40]:
                    # Make a new count that is small when k=25
                    new_count = count + 100000 * (k / 20.0 - 1)**2
                    param_vals_with_k = tuple(list(param_vals) + [k])
                    pc[dataset][param_vals_with_k] = new_count

        ss = param_search.higher_dimensional_search(param_names,
                                                    pc,
                                                    150000,
                                                    loss_coeffs=(1.0, 1.0,
                                                                 1.0))
        opt_params, opt_params_count, opt_params_loss = ss

        self.assertLess(opt_params_count, 150000)

        # Verify that the optimal value of the third parameter is
        # around 20 (namely, 15-25)
        for dataset, param_vals in opt_params.items():
            mismatches, cover_extension, p3 = param_vals
            self.assertTrue(15 <= p3 <= 25)
예제 #2
0
 def search_fn(max_total_count):
     return param_search.higher_dimensional_search(
         self.param_names_vwafr,
         self.probe_counts_vwafr,
         max_total_count,
         loss_coeffs=(1.0, 1.0 / 100.0))
예제 #3
0
def main(args):
    # Read the table of probe counts
    param_names, probe_counts = pool_probes_io.read_table_of_probe_counts(
        args.probe_count_tsv)

    if args.dataset_weights_tsv:
        dataset_weights = pool_probes_io.read_table_of_dataset_weights(
            args.dataset_weights_tsv, probe_counts.keys())
    else:
        dataset_weights = None

    # Check that, if loss coefficients were provided, there are the
    # same number of them as parameters
    if args.loss_coeffs and len(args.loss_coeffs) != len(param_names):
        raise Exception(("If using --loss-coeffs, the number of "
            "coefficients (%d) must be the same as the number of "
            "parameters provided in the input table (%d)") %
            (len(args.loss_coeffs), len(param_names)))

    if args.use_nd:
        # This does not round parameters after searching over the
        # dimensional space
        if args.round_params:
            raise Exception(("The arguments '--use-nd' and '--round-params' "
                "cannot both be used; this does not round parameters "
                "after searching over a space with n > 2"))

        # Perform a higher dimensional search for optimal values of
        # the parameters
        s_results = param_search.higher_dimensional_search(
            param_names, probe_counts, args.target_probe_count,
            loss_coeffs=args.loss_coeffs,
            dataset_weights=dataset_weights)
        write_type = 'float'
    else:
        # For the standard search, the only parameters must be (in order):
        #' mismatches' and 'cover_extension'. Verify this.
        if param_names != ('mismatches', 'cover_extension'):
            raise Exception(("For a standard search, the only parameters "
                "in the input table must be, in order: 'mismatches' and "
                "'cover_extension'. Consider using the '--use-nd' argument "
                "to search over additional parameters."))

        # Perform a standard search for optimal values of mismatches and
        # cover extension
        s_results = param_search.standard_search(
            probe_counts, args.target_probe_count,
            round_params=args.round_params,
            loss_coeffs=args.loss_coeffs,
            dataset_weights=dataset_weights)
        write_type = 'int'

    opt_params, opt_params_count, opt_params_loss = s_results

    # Write a table of the optimal parameter values
    pool_probes_io.write_param_values_across_datasets(param_names, opt_params,
        args.param_vals_tsv, type=write_type)

    # Print the total number of probes and loss
    print("Number of probes: %d" % opt_params_count)
    print("Loss: %f" % opt_params_loss)