def main(argv): num_threads = 1 num_runs = 30 try: opts, args = getopt.getopt(argv,"f:z:a:b:c:s:m:r:t:") except getopt.GetoptError: print "Bad Arguments to python script" sys.exit(2) settings = Elastic_Net_Settings() for opt, arg in opts: if opt == '-f': settings.num_features = int(arg) elif opt == '-z': settings.num_nonzero_features = int(arg) elif opt == '-a': settings.train_size = int(arg) elif opt == '-b': settings.validate_size = int(arg) elif opt == '-c': settings.test_size = int(arg) elif opt == "-s": settings.snr = float(arg) elif opt == "-m": assert(arg in METHODS) settings.method = arg elif opt == "-t": num_threads = int(arg) elif opt == "-r": num_runs = int(arg) settings.print_settings() sys.stdout.flush() data_gen = DataGenerator(settings) run_data = [] for i in range(num_runs): observed_data = data_gen.make_correlated(settings.num_features, settings.num_nonzero_features) run_data.append(Iteration_Data(i, observed_data, settings)) if settings.method not in ["SP", "SP0"] and num_threads > 1: print "Do multiprocessing" pool = Pool(num_threads) results = pool.map(fit_data_for_iter_safe, run_data) else: print "Avoiding multiprocessing" results = map(fit_data_for_iter_safe, run_data) method_results = MethodResults(settings.method, settings.method_result_keys) num_crashes = 0 for r in results: if r is not None: method_results.append(r) else: num_crashes += 1 print "==========TOTAL RUNS %d============" % method_results.get_num_runs() method_results.print_results() print "num crashes %d" % num_crashes
def main(argv): seed = 10 print "seed", seed np.random.seed(seed) num_threads = 1 num_runs = 1 try: opts, args = getopt.getopt(argv, "m:t:r:") except getopt.GetoptError: print "Bad argument given to realdata_eval.py" sys.exit(2) settings = RealDataSettings() for opt, arg in opts: if opt == "-m": assert (arg in ["HC", "GS"]) settings.method = arg elif opt == "-t": num_threads = int(arg) elif opt == "-r": num_runs = int(arg) print "TOTAL NUM RUNS %d" % num_runs sys.stdout.flush() geneset_dict = read_geneset_file() X_genesets, y, genesets = read_gene_expr_data(geneset_dict) print "num features", sum( [X_genesets[i].shape[1] for i in range(0, len(X_genesets))]) print "total genesets ever", len(X_genesets) X_genesets = normalize_data(X_genesets) run_data = [] for i in range(num_runs): data = Shuffled_Gene_Data(X_genesets, y, genesets) run_data.append(Iteration_Data(i, data, settings)) if num_threads > 1: print "Do multiprocessing" pool = Pool(num_threads) results = pool.map(fit_data_for_iter_safe, run_data) else: print "Avoiding multiprocessing" results = map(fit_data_for_iter_safe, run_data) method_results = MethodResults(settings.method, settings.method_result_keys) num_crashes = 0 for r in results: if r is not None: method_results.append(r) else: num_crashes += 1 print "==========TOTAL RUNS %d============" % method_results.get_num_runs() method_results.print_results() print "num crashes %d" % num_crashes
def main(argv): seed = 10 print "seed", seed np.random.seed(seed) num_threads = 1 num_runs = 1 try: opts, args = getopt.getopt(argv, "g:f:a:b:c:s:m:t:r:i") except getopt.GetoptError: print "Bad argument given to sgl_eval.py" sys.exit(2) settings = SGL_Settings() for opt, arg in opts: if opt == '-g': settings.expert_num_groups = int(arg) elif opt == '-f': settings.num_features = int(arg) elif opt == '-a': settings.train_size = int(arg) elif opt == '-b': settings.validate_size = int(arg) elif opt == '-c': settings.test_size = int(arg) elif opt == "-s": settings.snr = float(arg) elif opt == "-m": assert (arg in METHODS) settings.method = arg elif opt == "-t": num_threads = int(arg) elif opt == "-r": num_runs = int(arg) print "TOTAL NUM RUNS %d" % num_runs settings.print_settings() sys.stdout.flush() data_gen = DataGenerator(settings) run_data = [] for i in range(num_runs): observed_data = data_gen.sparse_groups() run_data.append(Iteration_Data(i, observed_data, settings)) if settings.method != "SP" and num_threads > 1: print "Do multiprocessing" pool = Pool(num_threads) results = pool.map(fit_data_for_iter_safe, run_data) else: print "Avoiding multiprocessing" results = map(fit_data_for_iter_safe, run_data) method_results = MethodResults(settings.method, settings.method_result_keys) num_crashes = 0 for r in results: if r is not None: method_results.append(r) else: num_crashes += 1 print "==========TOTAL RUNS %d============" % method_results.get_num_runs() method_results.print_results() print "num crashes %d" % num_crashes
def main(argv): seed = 10 print "seed", seed np.random.seed(seed) num_threads = 1 num_runs = 1 try: opts, args = getopt.getopt(argv, "d:z:f:a:v:s:m:t:r:i") except getopt.GetoptError: print "Bad argument given to Matrix_Completion_eval.py" sys.exit(2) settings = Matrix_Completion_Settings() for opt, arg in opts: if opt == '-d': arg_split = arg.split(",") settings.num_rows = int(arg_split[0]) settings.num_cols = int(arg_split[1]) elif opt == '-z': arg_split = arg.split(",") settings.num_nonzero_row_features = int(arg_split[0]) settings.num_nonzero_col_features = int(arg_split[1]) elif opt == '-f': arg_split = arg.split(",") settings.num_row_features = int(arg_split[0]) settings.num_col_features = int(arg_split[1]) elif opt == '-a': arg_split = arg.split(",") settings.train_perc = float(arg_split[0]) settings.validate_perc = float(arg_split[1]) settings.test_perc = float(arg_split[2]) assert (settings.train_perc + settings.validate_perc + settings.test_perc < 1) elif opt == "-v": settings.num_nonzero_s = int(arg) elif opt == "-s": settings.snr = float(arg) elif opt == "-m": assert (arg in METHODS) settings.method = arg elif opt == "-t": num_threads = int(arg) elif opt == "-r": num_runs = int(arg) elif opt == "-i": settings.big_init_set = True assert (settings.num_nonzero_s <= settings.num_rows and settings.num_nonzero_s <= settings.num_cols) # SP does not care about initialization assert (not (settings.big_init_set == True and settings.method in ["SP", "SP0"])) settings.matrix_size = settings.num_rows * settings.num_cols settings.train_size = int(settings.train_perc * settings.matrix_size) settings.validate_size = int(settings.validate_perc * settings.matrix_size) settings.test_size = int(settings.test_perc * settings.matrix_size) print "TOTAL NUM RUNS %d" % num_runs settings.print_settings() sys.stdout.flush() data_gen = DataGenerator(settings) run_data = [] for i in range(num_runs): observed_data = data_gen.matrix_completion() run_data.append(Iteration_Data(i, observed_data, settings)) if settings.method != "SP" and num_threads > 1: print "Do multiprocessing" pool = Pool(num_threads) results = pool.map(fit_data_for_iter_safe, run_data) else: print "Avoiding multiprocessing" results = map(fit_data_for_iter_safe, run_data) method_results = MethodResults(settings.method, settings.method_result_keys) num_crashes = 0 for r in results: if r is not None: method_results.append(r) else: num_crashes += 1 print "==========TOTAL RUNS %d============" % method_results.get_num_runs() method_results.print_results() print "num crashes %d" % num_crashes
def main(argv): num_threads = 1 num_runs = 1 try: opts, args = getopt.getopt(argv, "d:z:f:g:a:v:s:m:t:r:i:") except getopt.GetoptError: print "Bad argument given" sys.exit(2) settings = Matrix_Completion_Group_Settings() for opt, arg in opts: if opt == '-d': arg_split = arg.split(",") settings.num_rows = int(arg_split[0]) settings.num_cols = int(arg_split[1]) elif opt == '-z': arg_split = arg.split(",") settings.num_nonzero_row_groups = int(arg_split[0]) settings.num_nonzero_col_groups = int(arg_split[1]) elif opt == '-f': arg_split = arg.split(",") settings.num_row_features = int(arg_split[0]) settings.num_col_features = int(arg_split[1]) elif opt == '-g': arg_split = arg.split(",") settings.num_row_groups = int(arg_split[0]) settings.num_col_groups = int(arg_split[1]) elif opt == '-a': arg_split = arg.split(",") settings.train_perc = float(arg_split[0]) settings.validate_perc = float(arg_split[1]) assert (settings.train_perc + settings.validate_perc <= 1.0) elif opt == "-v": settings.num_nonzero_s = int(arg) elif opt == "-s": settings.snr = float(arg) elif opt == "-m": assert (arg in METHODS) settings.method = arg elif opt == "-t": num_threads = int(arg) elif opt == "-r": num_runs = int(arg) elif opt == "-i": settings.gamma_to_row_col_m = float(arg) assert (settings.num_nonzero_s <= settings.num_rows and settings.num_nonzero_s <= settings.num_cols) settings.matrix_size = settings.num_rows * settings.num_cols print "TOTAL NUM RUNS %d" % num_runs settings.print_settings() sys.stdout.flush() data_gen = DataGenerator(settings) run_data = [] for i in range(num_runs): observed_data = data_gen.matrix_completion_groups( gamma_to_row_col_m=settings.gamma_to_row_col_m, feat_factor=settings.feat_factor) run_data.append(Iteration_Data(i, observed_data, settings)) if settings.method != "SP" and num_threads > 1: print "Do multiprocessing" pool = Pool(num_threads) results = pool.map(fit_data_for_iter_safe, run_data) else: print "Avoiding multiprocessing" results = map(fit_data_for_iter_safe, run_data) method_results = MethodResults(settings.method, settings.method_result_keys) num_crashes = 0 for r in results: if r is not None: method_results.append(r) else: num_crashes += 1 print "==========TOTAL RUNS %d============" % method_results.get_num_runs() method_results.print_results() print "num crashes %d" % num_crashes
def main(argv): num_threads = 1 num_runs = 1 try: opts, args = getopt.getopt(argv, "f:z:a:b:c:s:m:t:r:") except getopt.GetoptError: sys.exit(2) settings = Sparse_Add_Models_Settings() for opt, arg in opts: if opt == '-f': settings.num_funcs = int(arg) elif opt == '-z': settings.num_zero_funcs = int(arg) elif opt == '-a': settings.train_size = int(arg) elif opt == '-b': settings.validate_size = int(arg) elif opt == '-c': settings.test_size = int(arg) elif opt == "-s": settings.snr = float(arg) elif opt == "-m": assert (arg in METHODS) settings.method = arg elif opt == "-t": num_threads = int(arg) elif opt == "-r": num_runs = int(arg) print "TOTAL NUM RUNS %d" % num_runs settings.print_settings() sys.stdout.flush() assert (settings.num_funcs <= len(settings.smooth_fcns)) smooth_fcn_list = settings.smooth_fcns[:settings.num_funcs] + [ const_zero ] * settings.num_zero_funcs data_gen = DataGenerator(settings) run_data = [] for i in range(num_runs): observed_data = data_gen.make_additive_smooth_data(smooth_fcn_list) run_data.append(Iteration_Data(i, observed_data, settings)) if settings.method != "SP" and num_threads > 1: print "Do multiprocessing" pool = Pool(num_threads) results = pool.map(fit_data_for_iter_safe, run_data) else: print "Avoiding multiprocessing" results = map(fit_data_for_iter_safe, run_data) method_results = MethodResults(settings.method, settings.method_result_keys) num_crashes = 0 for r in results: if r is not None: method_results.append(r) else: num_crashes += 1 print "==========TOTAL RUNS %d============" % method_results.get_num_runs() method_results.print_results() print "num crashes %d" % num_crashes
def main(argv): num_threads = 1 seed = 20 print "seed", seed np.random.seed(seed) try: opts, args = getopt.getopt(argv, "t:r:f:z:a:b:c:s:m:i:") except getopt.GetoptError: sys.exit(2) settings = Sparse_Add_Models_Multiple_Starts_Settings() for opt, arg in opts: if opt == '-t': num_threads = int(arg) elif opt == '-f': settings.num_funcs = int(arg) elif opt == '-z': settings.num_zero_funcs = int(arg) elif opt == '-a': settings.train_size = int(arg) elif opt == '-b': settings.validate_size = int(arg) elif opt == '-c': settings.test_size = int(arg) elif opt == "-s": settings.snr = float(arg) elif opt == "-m": assert (arg in ["HC", "NM"]) settings.method = arg elif opt == "-i": settings.init_size = int(arg) assert (settings.num_funcs <= len(settings.smooth_fcns)) smooth_fcn_list = settings.smooth_fcns[:settings.num_funcs] + [ const_zero ] * settings.num_zero_funcs data_gen = DataGenerator(settings) observed_data = data_gen.make_additive_smooth_data(smooth_fcn_list) # Create initial lambdas num_lambdas = 1 + settings.num_funcs + settings.num_zero_funcs # initial_lambdas_set = [ # np.array([10] + [1] * (num_lambdas - 1)), # np.array([0.1] + [0.01] * (num_lambdas - 1)), # ] # for i in range(settings.init_size - 2): # init_l = np.power(10.0, np.random.randint(low=settings.min_init_log_lambda, high=settings.max_init_log_lambda, size=num_lambdas)) # Pool the last lambdas together. Shuffle the possibilities initial_lambdas_set = [] init_values = np.power( 10.0, np.arange(settings.min_init_log_lambda, settings.max_init_log_lambda + 1)) for l1 in init_values: for l2 in init_values: full_init_l = np.array([l1] + [l2] * (num_lambdas - 1)) initial_lambdas_set.append(full_init_l) permute_idxs = np.random.permutation( np.arange(0, len(init_values) * len(init_values))) settings.init_size = permute_idxs.size settings.print_settings() sys.stdout.flush() run_data = [] for i, idx in enumerate(permute_idxs): init_lambdas = initial_lambdas_set[idx] print "init_lambdas", init_lambdas run_data.append( Iteration_Data(i, observed_data, settings, init_lambdas=[init_lambdas])) if num_threads > 1: print "Do multiprocessing" pool = Pool(num_threads) results = pool.map(fit_data_for_iter_safe, run_data) else: print "Avoiding multiprocessing" results = map(fit_data_for_iter_safe, run_data) print "results", results cum_results = CumulativeInitializationResults(observed_data, settings) for r in results: print r cum_results.update(r) print "==========RUNS============" print "cumulative_val_cost", cum_results.cumulative_val_cost print "cumulative_test_cost", cum_results.cumulative_test_cost pickle_file_name = "%s/tmp/%s_many_inits_%d_%d_%d_%d_%d_%d_%d.pkl" % ( settings.results_folder, settings.method, settings.num_funcs, settings.num_zero_funcs, settings.train_size, settings.validate_size, settings.test_size, settings.snr, settings.init_size, ) print "pickle_file_name", pickle_file_name with open(pickle_file_name, "wb") as f: pickle.dump( { "initial_lambdas_set": initial_lambdas_set, "cum_results": cum_results, }, f) # plot_mult_inits(cum_results, str_identifer) print "DONE!"