def test_higher_dimensional_search_vwafr_with_third_param(self): """Integration test with the V-WAfr probe set data. This adds a third parameter to the data, which dominates the probe set count. The counts are set so that they are minimized when the third parameter is equal to 20. The loss function will still try to push this parameter down, but if it has a big enough impact on the barrier when it deviates from 25 (i.e., by exceeding the max probe count) then the minimizer should pick a value of roughly 20. """ param_names = ('mismatches', 'cover_extension', 'p3') pc = {} for dataset in self.probe_counts_vwafr.keys(): pc[dataset] = {} for param_vals in self.probe_counts_vwafr[dataset]: count = self.probe_counts_vwafr[dataset][param_vals] for k in [0, 10, 20, 30, 40]: # Make a new count that is small when k=25 new_count = count + 100000 * (k / 20.0 - 1)**2 param_vals_with_k = tuple(list(param_vals) + [k]) pc[dataset][param_vals_with_k] = new_count ss = param_search.higher_dimensional_search(param_names, pc, 150000, loss_coeffs=(1.0, 1.0, 1.0)) opt_params, opt_params_count, opt_params_loss = ss self.assertLess(opt_params_count, 150000) # Verify that the optimal value of the third parameter is # around 20 (namely, 15-25) for dataset, param_vals in opt_params.items(): mismatches, cover_extension, p3 = param_vals self.assertTrue(15 <= p3 <= 25)
def search_fn(max_total_count): return param_search.higher_dimensional_search( self.param_names_vwafr, self.probe_counts_vwafr, max_total_count, loss_coeffs=(1.0, 1.0 / 100.0))
def main(args): # Read the table of probe counts param_names, probe_counts = pool_probes_io.read_table_of_probe_counts( args.probe_count_tsv) if args.dataset_weights_tsv: dataset_weights = pool_probes_io.read_table_of_dataset_weights( args.dataset_weights_tsv, probe_counts.keys()) else: dataset_weights = None # Check that, if loss coefficients were provided, there are the # same number of them as parameters if args.loss_coeffs and len(args.loss_coeffs) != len(param_names): raise Exception(("If using --loss-coeffs, the number of " "coefficients (%d) must be the same as the number of " "parameters provided in the input table (%d)") % (len(args.loss_coeffs), len(param_names))) if args.use_nd: # This does not round parameters after searching over the # dimensional space if args.round_params: raise Exception(("The arguments '--use-nd' and '--round-params' " "cannot both be used; this does not round parameters " "after searching over a space with n > 2")) # Perform a higher dimensional search for optimal values of # the parameters s_results = param_search.higher_dimensional_search( param_names, probe_counts, args.target_probe_count, loss_coeffs=args.loss_coeffs, dataset_weights=dataset_weights) write_type = 'float' else: # For the standard search, the only parameters must be (in order): #' mismatches' and 'cover_extension'. Verify this. if param_names != ('mismatches', 'cover_extension'): raise Exception(("For a standard search, the only parameters " "in the input table must be, in order: 'mismatches' and " "'cover_extension'. Consider using the '--use-nd' argument " "to search over additional parameters.")) # Perform a standard search for optimal values of mismatches and # cover extension s_results = param_search.standard_search( probe_counts, args.target_probe_count, round_params=args.round_params, loss_coeffs=args.loss_coeffs, dataset_weights=dataset_weights) write_type = 'int' opt_params, opt_params_count, opt_params_loss = s_results # Write a table of the optimal parameter values pool_probes_io.write_param_values_across_datasets(param_names, opt_params, args.param_vals_tsv, type=write_type) # Print the total number of probes and loss print("Number of probes: %d" % opt_params_count) print("Loss: %f" % opt_params_loss)