def main(argv): num_threads = 1 num_runs = 30 try: opts, args = getopt.getopt(argv,"f:z:a:b:c:s:m:r:t:") except getopt.GetoptError: print "Bad Arguments to python script" sys.exit(2) settings = Elastic_Net_Settings() for opt, arg in opts: if opt == '-f': settings.num_features = int(arg) elif opt == '-z': settings.num_nonzero_features = int(arg) elif opt == '-a': settings.train_size = int(arg) elif opt == '-b': settings.validate_size = int(arg) elif opt == '-c': settings.test_size = int(arg) elif opt == "-s": settings.snr = float(arg) elif opt == "-m": assert(arg in METHODS) settings.method = arg elif opt == "-t": num_threads = int(arg) elif opt == "-r": num_runs = int(arg) settings.print_settings() sys.stdout.flush() data_gen = DataGenerator(settings) run_data = [] for i in range(num_runs): observed_data = data_gen.make_correlated(settings.num_features, settings.num_nonzero_features) run_data.append(Iteration_Data(i, observed_data, settings)) if settings.method not in ["SP", "SP0"] and num_threads > 1: print "Do multiprocessing" pool = Pool(num_threads) results = pool.map(fit_data_for_iter_safe, run_data) else: print "Avoiding multiprocessing" results = map(fit_data_for_iter_safe, run_data) method_results = MethodResults(settings.method, settings.method_result_keys) num_crashes = 0 for r in results: if r is not None: method_results.append(r) else: num_crashes += 1 print "==========TOTAL RUNS %d============" % method_results.get_num_runs() method_results.print_results() print "num crashes %d" % num_crashes
def main(argv): seed = 10 print "seed", seed np.random.seed(seed) num_threads = 1 num_runs = 1 try: opts, args = getopt.getopt(argv, "m:t:r:") except getopt.GetoptError: print "Bad argument given to realdata_eval.py" sys.exit(2) settings = RealDataSettings() for opt, arg in opts: if opt == "-m": assert (arg in ["HC", "GS"]) settings.method = arg elif opt == "-t": num_threads = int(arg) elif opt == "-r": num_runs = int(arg) print "TOTAL NUM RUNS %d" % num_runs sys.stdout.flush() geneset_dict = read_geneset_file() X_genesets, y, genesets = read_gene_expr_data(geneset_dict) print "num features", sum( [X_genesets[i].shape[1] for i in range(0, len(X_genesets))]) print "total genesets ever", len(X_genesets) X_genesets = normalize_data(X_genesets) run_data = [] for i in range(num_runs): data = Shuffled_Gene_Data(X_genesets, y, genesets) run_data.append(Iteration_Data(i, data, settings)) if num_threads > 1: print "Do multiprocessing" pool = Pool(num_threads) results = pool.map(fit_data_for_iter_safe, run_data) else: print "Avoiding multiprocessing" results = map(fit_data_for_iter_safe, run_data) method_results = MethodResults(settings.method, settings.method_result_keys) num_crashes = 0 for r in results: if r is not None: method_results.append(r) else: num_crashes += 1 print "==========TOTAL RUNS %d============" % method_results.get_num_runs() method_results.print_results() print "num crashes %d" % num_crashes
def fit_penalized(self, train_set, penalty_params, max_em_iters, val_set_evaluator=None, init_theta=None, reference_pen_param=None, pool=None): """ @param penalty_params: penalty parameter for fitting penalized model @param val_set_evaluator: LikelihoodComparer with a given reference model @param reference_pen_param: the penalty parameters for the reference model @return the fitted model after the 2-step procedure """ if init_theta is None: init_theta = initialize_theta(self.theta_shape, self.possible_theta_mask, self.zero_theta_mask) penalized_theta, _, _, _ = self.em_algo.run( train_set, self.feat_generator, theta=init_theta, possible_theta_mask=self.possible_theta_mask, zero_theta_mask=self.zero_theta_mask, burn_in=self.burn_in, penalty_params=penalty_params, max_em_iters=max_em_iters, max_e_samples=self.num_e_samples * 4, pool=pool, ) curr_model_results = MethodResults(penalty_params) #### Calculate validation log likelihood (EM surrogate), use to determine if model is any good. log_lik_ratio_lower_bound, log_lik_ratio = self._do_validation_set_checks( penalized_theta, val_set_evaluator, ) curr_model_results.set_penalized_theta( penalized_theta, log_lik_ratio_lower_bound, log_lik_ratio, model_masks=ModelTruncation(penalized_theta, self.feat_generator), reference_penalty_param=reference_pen_param, ) log.info("==== Penalized theta, %s, nonzero %d ====" % (penalty_params, curr_model_results.penalized_num_nonzero)) log.info( get_nonzero_theta_print_lines(penalized_theta, self.feat_generator)) return curr_model_results
def main(argv): seed = 10 print "seed", seed np.random.seed(seed) num_threads = 1 num_runs = 1 try: opts, args = getopt.getopt(argv, "g:f:a:b:c:s:m:t:r:i") except getopt.GetoptError: print "Bad argument given to sgl_eval.py" sys.exit(2) settings = SGL_Settings() for opt, arg in opts: if opt == '-g': settings.expert_num_groups = int(arg) elif opt == '-f': settings.num_features = int(arg) elif opt == '-a': settings.train_size = int(arg) elif opt == '-b': settings.validate_size = int(arg) elif opt == '-c': settings.test_size = int(arg) elif opt == "-s": settings.snr = float(arg) elif opt == "-m": assert (arg in METHODS) settings.method = arg elif opt == "-t": num_threads = int(arg) elif opt == "-r": num_runs = int(arg) print "TOTAL NUM RUNS %d" % num_runs settings.print_settings() sys.stdout.flush() data_gen = DataGenerator(settings) run_data = [] for i in range(num_runs): observed_data = data_gen.sparse_groups() run_data.append(Iteration_Data(i, observed_data, settings)) if settings.method != "SP" and num_threads > 1: print "Do multiprocessing" pool = Pool(num_threads) results = pool.map(fit_data_for_iter_safe, run_data) else: print "Avoiding multiprocessing" results = map(fit_data_for_iter_safe, run_data) method_results = MethodResults(settings.method, settings.method_result_keys) num_crashes = 0 for r in results: if r is not None: method_results.append(r) else: num_crashes += 1 print "==========TOTAL RUNS %d============" % method_results.get_num_runs() method_results.print_results() print "num crashes %d" % num_crashes
def main(argv): seed = 10 print "seed", seed np.random.seed(seed) num_threads = 1 num_runs = 1 try: opts, args = getopt.getopt(argv, "d:z:f:a:v:s:m:t:r:i") except getopt.GetoptError: print "Bad argument given to Matrix_Completion_eval.py" sys.exit(2) settings = Matrix_Completion_Settings() for opt, arg in opts: if opt == '-d': arg_split = arg.split(",") settings.num_rows = int(arg_split[0]) settings.num_cols = int(arg_split[1]) elif opt == '-z': arg_split = arg.split(",") settings.num_nonzero_row_features = int(arg_split[0]) settings.num_nonzero_col_features = int(arg_split[1]) elif opt == '-f': arg_split = arg.split(",") settings.num_row_features = int(arg_split[0]) settings.num_col_features = int(arg_split[1]) elif opt == '-a': arg_split = arg.split(",") settings.train_perc = float(arg_split[0]) settings.validate_perc = float(arg_split[1]) settings.test_perc = float(arg_split[2]) assert (settings.train_perc + settings.validate_perc + settings.test_perc < 1) elif opt == "-v": settings.num_nonzero_s = int(arg) elif opt == "-s": settings.snr = float(arg) elif opt == "-m": assert (arg in METHODS) settings.method = arg elif opt == "-t": num_threads = int(arg) elif opt == "-r": num_runs = int(arg) elif opt == "-i": settings.big_init_set = True assert (settings.num_nonzero_s <= settings.num_rows and settings.num_nonzero_s <= settings.num_cols) # SP does not care about initialization assert (not (settings.big_init_set == True and settings.method in ["SP", "SP0"])) settings.matrix_size = settings.num_rows * settings.num_cols settings.train_size = int(settings.train_perc * settings.matrix_size) settings.validate_size = int(settings.validate_perc * settings.matrix_size) settings.test_size = int(settings.test_perc * settings.matrix_size) print "TOTAL NUM RUNS %d" % num_runs settings.print_settings() sys.stdout.flush() data_gen = DataGenerator(settings) run_data = [] for i in range(num_runs): observed_data = data_gen.matrix_completion() run_data.append(Iteration_Data(i, observed_data, settings)) if settings.method != "SP" and num_threads > 1: print "Do multiprocessing" pool = Pool(num_threads) results = pool.map(fit_data_for_iter_safe, run_data) else: print "Avoiding multiprocessing" results = map(fit_data_for_iter_safe, run_data) method_results = MethodResults(settings.method, settings.method_result_keys) num_crashes = 0 for r in results: if r is not None: method_results.append(r) else: num_crashes += 1 print "==========TOTAL RUNS %d============" % method_results.get_num_runs() method_results.print_results() print "num crashes %d" % num_crashes
def main(argv): num_threads = 1 num_runs = 1 try: opts, args = getopt.getopt(argv, "d:z:f:g:a:v:s:m:t:r:i:") except getopt.GetoptError: print "Bad argument given" sys.exit(2) settings = Matrix_Completion_Group_Settings() for opt, arg in opts: if opt == '-d': arg_split = arg.split(",") settings.num_rows = int(arg_split[0]) settings.num_cols = int(arg_split[1]) elif opt == '-z': arg_split = arg.split(",") settings.num_nonzero_row_groups = int(arg_split[0]) settings.num_nonzero_col_groups = int(arg_split[1]) elif opt == '-f': arg_split = arg.split(",") settings.num_row_features = int(arg_split[0]) settings.num_col_features = int(arg_split[1]) elif opt == '-g': arg_split = arg.split(",") settings.num_row_groups = int(arg_split[0]) settings.num_col_groups = int(arg_split[1]) elif opt == '-a': arg_split = arg.split(",") settings.train_perc = float(arg_split[0]) settings.validate_perc = float(arg_split[1]) assert (settings.train_perc + settings.validate_perc <= 1.0) elif opt == "-v": settings.num_nonzero_s = int(arg) elif opt == "-s": settings.snr = float(arg) elif opt == "-m": assert (arg in METHODS) settings.method = arg elif opt == "-t": num_threads = int(arg) elif opt == "-r": num_runs = int(arg) elif opt == "-i": settings.gamma_to_row_col_m = float(arg) assert (settings.num_nonzero_s <= settings.num_rows and settings.num_nonzero_s <= settings.num_cols) settings.matrix_size = settings.num_rows * settings.num_cols print "TOTAL NUM RUNS %d" % num_runs settings.print_settings() sys.stdout.flush() data_gen = DataGenerator(settings) run_data = [] for i in range(num_runs): observed_data = data_gen.matrix_completion_groups( gamma_to_row_col_m=settings.gamma_to_row_col_m, feat_factor=settings.feat_factor) run_data.append(Iteration_Data(i, observed_data, settings)) if settings.method != "SP" and num_threads > 1: print "Do multiprocessing" pool = Pool(num_threads) results = pool.map(fit_data_for_iter_safe, run_data) else: print "Avoiding multiprocessing" results = map(fit_data_for_iter_safe, run_data) method_results = MethodResults(settings.method, settings.method_result_keys) num_crashes = 0 for r in results: if r is not None: method_results.append(r) else: num_crashes += 1 print "==========TOTAL RUNS %d============" % method_results.get_num_runs() method_results.print_results() print "num crashes %d" % num_crashes
def main(argv): num_threads = 1 num_runs = 1 try: opts, args = getopt.getopt(argv, "f:z:a:b:c:s:m:t:r:") except getopt.GetoptError: sys.exit(2) settings = Sparse_Add_Models_Settings() for opt, arg in opts: if opt == '-f': settings.num_funcs = int(arg) elif opt == '-z': settings.num_zero_funcs = int(arg) elif opt == '-a': settings.train_size = int(arg) elif opt == '-b': settings.validate_size = int(arg) elif opt == '-c': settings.test_size = int(arg) elif opt == "-s": settings.snr = float(arg) elif opt == "-m": assert (arg in METHODS) settings.method = arg elif opt == "-t": num_threads = int(arg) elif opt == "-r": num_runs = int(arg) print "TOTAL NUM RUNS %d" % num_runs settings.print_settings() sys.stdout.flush() assert (settings.num_funcs <= len(settings.smooth_fcns)) smooth_fcn_list = settings.smooth_fcns[:settings.num_funcs] + [ const_zero ] * settings.num_zero_funcs data_gen = DataGenerator(settings) run_data = [] for i in range(num_runs): observed_data = data_gen.make_additive_smooth_data(smooth_fcn_list) run_data.append(Iteration_Data(i, observed_data, settings)) if settings.method != "SP" and num_threads > 1: print "Do multiprocessing" pool = Pool(num_threads) results = pool.map(fit_data_for_iter_safe, run_data) else: print "Avoiding multiprocessing" results = map(fit_data_for_iter_safe, run_data) method_results = MethodResults(settings.method, settings.method_result_keys) num_crashes = 0 for r in results: if r is not None: method_results.append(r) else: num_crashes += 1 print "==========TOTAL RUNS %d============" % method_results.get_num_runs() method_results.print_results() print "num crashes %d" % num_crashes