예제 #1
0
def main(argv):
    num_threads = 1
    num_runs = 30

    try:
        opts, args = getopt.getopt(argv,"f:z:a:b:c:s:m:r:t:")
    except getopt.GetoptError:
        print "Bad Arguments to python script"
        sys.exit(2)

    settings = Elastic_Net_Settings()
    for opt, arg in opts:
        if opt == '-f':
            settings.num_features = int(arg)
        elif opt == '-z':
            settings.num_nonzero_features = int(arg)
        elif opt == '-a':
            settings.train_size = int(arg)
        elif opt == '-b':
            settings.validate_size = int(arg)
        elif opt == '-c':
            settings.test_size = int(arg)
        elif opt == "-s":
            settings.snr = float(arg)
        elif opt == "-m":
            assert(arg in METHODS)
            settings.method = arg
        elif opt == "-t":
            num_threads = int(arg)
        elif opt == "-r":
            num_runs = int(arg)

    settings.print_settings()
    sys.stdout.flush()
    data_gen = DataGenerator(settings)

    run_data = []
    for i in range(num_runs):
        observed_data = data_gen.make_correlated(settings.num_features, settings.num_nonzero_features)
        run_data.append(Iteration_Data(i, observed_data, settings))

    if settings.method not in ["SP", "SP0"] and num_threads > 1:
        print "Do multiprocessing"
        pool = Pool(num_threads)
        results = pool.map(fit_data_for_iter_safe, run_data)
    else:
        print "Avoiding multiprocessing"
        results = map(fit_data_for_iter_safe, run_data)

    method_results = MethodResults(settings.method, settings.method_result_keys)
    num_crashes = 0
    for r in results:
        if r is not None:
            method_results.append(r)
        else:
            num_crashes += 1
    print "==========TOTAL RUNS %d============" % method_results.get_num_runs()
    method_results.print_results()
    print "num crashes %d" % num_crashes
def main(argv):
    seed = 10
    print "seed", seed
    np.random.seed(seed)

    num_threads = 1
    num_runs = 1

    try:
        opts, args = getopt.getopt(argv, "m:t:r:")
    except getopt.GetoptError:
        print "Bad argument given to realdata_eval.py"
        sys.exit(2)

    settings = RealDataSettings()
    for opt, arg in opts:
        if opt == "-m":
            assert (arg in ["HC", "GS"])
            settings.method = arg
        elif opt == "-t":
            num_threads = int(arg)
        elif opt == "-r":
            num_runs = int(arg)

    print "TOTAL NUM RUNS %d" % num_runs
    sys.stdout.flush()

    geneset_dict = read_geneset_file()
    X_genesets, y, genesets = read_gene_expr_data(geneset_dict)
    print "num features", sum(
        [X_genesets[i].shape[1] for i in range(0, len(X_genesets))])
    print "total genesets ever", len(X_genesets)
    X_genesets = normalize_data(X_genesets)

    run_data = []
    for i in range(num_runs):
        data = Shuffled_Gene_Data(X_genesets, y, genesets)
        run_data.append(Iteration_Data(i, data, settings))

    if num_threads > 1:
        print "Do multiprocessing"
        pool = Pool(num_threads)
        results = pool.map(fit_data_for_iter_safe, run_data)
    else:
        print "Avoiding multiprocessing"
        results = map(fit_data_for_iter_safe, run_data)

    method_results = MethodResults(settings.method,
                                   settings.method_result_keys)
    num_crashes = 0
    for r in results:
        if r is not None:
            method_results.append(r)
        else:
            num_crashes += 1
    print "==========TOTAL RUNS %d============" % method_results.get_num_runs()
    method_results.print_results()
    print "num crashes %d" % num_crashes
예제 #3
0
    def fit_penalized(self,
                      train_set,
                      penalty_params,
                      max_em_iters,
                      val_set_evaluator=None,
                      init_theta=None,
                      reference_pen_param=None,
                      pool=None):
        """
        @param penalty_params: penalty parameter for fitting penalized model
        @param val_set_evaluator: LikelihoodComparer with a given reference model
        @param reference_pen_param: the penalty parameters for the reference model

        @return the fitted model after the 2-step procedure
        """
        if init_theta is None:
            init_theta = initialize_theta(self.theta_shape,
                                          self.possible_theta_mask,
                                          self.zero_theta_mask)

        penalized_theta, _, _, _ = self.em_algo.run(
            train_set,
            self.feat_generator,
            theta=init_theta,
            possible_theta_mask=self.possible_theta_mask,
            zero_theta_mask=self.zero_theta_mask,
            burn_in=self.burn_in,
            penalty_params=penalty_params,
            max_em_iters=max_em_iters,
            max_e_samples=self.num_e_samples * 4,
            pool=pool,
        )
        curr_model_results = MethodResults(penalty_params)

        #### Calculate validation log likelihood (EM surrogate), use to determine if model is any good.
        log_lik_ratio_lower_bound, log_lik_ratio = self._do_validation_set_checks(
            penalized_theta,
            val_set_evaluator,
        )
        curr_model_results.set_penalized_theta(
            penalized_theta,
            log_lik_ratio_lower_bound,
            log_lik_ratio,
            model_masks=ModelTruncation(penalized_theta, self.feat_generator),
            reference_penalty_param=reference_pen_param,
        )

        log.info("==== Penalized theta, %s, nonzero %d ====" %
                 (penalty_params, curr_model_results.penalized_num_nonzero))
        log.info(
            get_nonzero_theta_print_lines(penalized_theta,
                                          self.feat_generator))
        return curr_model_results
예제 #4
0
def main(argv):
    seed = 10
    print "seed", seed
    np.random.seed(seed)

    num_threads = 1
    num_runs = 1

    try:
        opts, args = getopt.getopt(argv, "g:f:a:b:c:s:m:t:r:i")
    except getopt.GetoptError:
        print "Bad argument given to sgl_eval.py"
        sys.exit(2)

    settings = SGL_Settings()
    for opt, arg in opts:
        if opt == '-g':
            settings.expert_num_groups = int(arg)
        elif opt == '-f':
            settings.num_features = int(arg)
        elif opt == '-a':
            settings.train_size = int(arg)
        elif opt == '-b':
            settings.validate_size = int(arg)
        elif opt == '-c':
            settings.test_size = int(arg)
        elif opt == "-s":
            settings.snr = float(arg)
        elif opt == "-m":
            assert (arg in METHODS)
            settings.method = arg
        elif opt == "-t":
            num_threads = int(arg)
        elif opt == "-r":
            num_runs = int(arg)

    print "TOTAL NUM RUNS %d" % num_runs
    settings.print_settings()
    sys.stdout.flush()

    data_gen = DataGenerator(settings)

    run_data = []
    for i in range(num_runs):
        observed_data = data_gen.sparse_groups()
        run_data.append(Iteration_Data(i, observed_data, settings))

    if settings.method != "SP" and num_threads > 1:
        print "Do multiprocessing"
        pool = Pool(num_threads)
        results = pool.map(fit_data_for_iter_safe, run_data)
    else:
        print "Avoiding multiprocessing"
        results = map(fit_data_for_iter_safe, run_data)

    method_results = MethodResults(settings.method,
                                   settings.method_result_keys)
    num_crashes = 0
    for r in results:
        if r is not None:
            method_results.append(r)
        else:
            num_crashes += 1
    print "==========TOTAL RUNS %d============" % method_results.get_num_runs()
    method_results.print_results()
    print "num crashes %d" % num_crashes
예제 #5
0
def main(argv):
    seed = 10
    print "seed", seed
    np.random.seed(seed)

    num_threads = 1
    num_runs = 1

    try:
        opts, args = getopt.getopt(argv, "d:z:f:a:v:s:m:t:r:i")
    except getopt.GetoptError:
        print "Bad argument given to Matrix_Completion_eval.py"
        sys.exit(2)

    settings = Matrix_Completion_Settings()
    for opt, arg in opts:
        if opt == '-d':
            arg_split = arg.split(",")
            settings.num_rows = int(arg_split[0])
            settings.num_cols = int(arg_split[1])
        elif opt == '-z':
            arg_split = arg.split(",")
            settings.num_nonzero_row_features = int(arg_split[0])
            settings.num_nonzero_col_features = int(arg_split[1])
        elif opt == '-f':
            arg_split = arg.split(",")
            settings.num_row_features = int(arg_split[0])
            settings.num_col_features = int(arg_split[1])
        elif opt == '-a':
            arg_split = arg.split(",")
            settings.train_perc = float(arg_split[0])
            settings.validate_perc = float(arg_split[1])
            settings.test_perc = float(arg_split[2])
            assert (settings.train_perc + settings.validate_perc +
                    settings.test_perc < 1)
        elif opt == "-v":
            settings.num_nonzero_s = int(arg)
        elif opt == "-s":
            settings.snr = float(arg)
        elif opt == "-m":
            assert (arg in METHODS)
            settings.method = arg
        elif opt == "-t":
            num_threads = int(arg)
        elif opt == "-r":
            num_runs = int(arg)
        elif opt == "-i":
            settings.big_init_set = True

    assert (settings.num_nonzero_s <= settings.num_rows
            and settings.num_nonzero_s <= settings.num_cols)
    # SP does not care about initialization
    assert (not (settings.big_init_set == True
                 and settings.method in ["SP", "SP0"]))

    settings.matrix_size = settings.num_rows * settings.num_cols
    settings.train_size = int(settings.train_perc * settings.matrix_size)
    settings.validate_size = int(settings.validate_perc * settings.matrix_size)
    settings.test_size = int(settings.test_perc * settings.matrix_size)

    print "TOTAL NUM RUNS %d" % num_runs
    settings.print_settings()
    sys.stdout.flush()

    data_gen = DataGenerator(settings)

    run_data = []
    for i in range(num_runs):
        observed_data = data_gen.matrix_completion()
        run_data.append(Iteration_Data(i, observed_data, settings))

    if settings.method != "SP" and num_threads > 1:
        print "Do multiprocessing"
        pool = Pool(num_threads)
        results = pool.map(fit_data_for_iter_safe, run_data)
    else:
        print "Avoiding multiprocessing"
        results = map(fit_data_for_iter_safe, run_data)

    method_results = MethodResults(settings.method,
                                   settings.method_result_keys)
    num_crashes = 0
    for r in results:
        if r is not None:
            method_results.append(r)
        else:
            num_crashes += 1
    print "==========TOTAL RUNS %d============" % method_results.get_num_runs()
    method_results.print_results()
    print "num crashes %d" % num_crashes
예제 #6
0
def main(argv):
    num_threads = 1
    num_runs = 1

    try:
        opts, args = getopt.getopt(argv, "d:z:f:g:a:v:s:m:t:r:i:")
    except getopt.GetoptError:
        print "Bad argument given"
        sys.exit(2)

    settings = Matrix_Completion_Group_Settings()
    for opt, arg in opts:
        if opt == '-d':
            arg_split = arg.split(",")
            settings.num_rows = int(arg_split[0])
            settings.num_cols = int(arg_split[1])
        elif opt == '-z':
            arg_split = arg.split(",")
            settings.num_nonzero_row_groups = int(arg_split[0])
            settings.num_nonzero_col_groups = int(arg_split[1])
        elif opt == '-f':
            arg_split = arg.split(",")
            settings.num_row_features = int(arg_split[0])
            settings.num_col_features = int(arg_split[1])
        elif opt == '-g':
            arg_split = arg.split(",")
            settings.num_row_groups = int(arg_split[0])
            settings.num_col_groups = int(arg_split[1])
        elif opt == '-a':
            arg_split = arg.split(",")
            settings.train_perc = float(arg_split[0])
            settings.validate_perc = float(arg_split[1])
            assert (settings.train_perc + settings.validate_perc <= 1.0)
        elif opt == "-v":
            settings.num_nonzero_s = int(arg)
        elif opt == "-s":
            settings.snr = float(arg)
        elif opt == "-m":
            assert (arg in METHODS)
            settings.method = arg
        elif opt == "-t":
            num_threads = int(arg)
        elif opt == "-r":
            num_runs = int(arg)
        elif opt == "-i":
            settings.gamma_to_row_col_m = float(arg)

    assert (settings.num_nonzero_s <= settings.num_rows
            and settings.num_nonzero_s <= settings.num_cols)

    settings.matrix_size = settings.num_rows * settings.num_cols

    print "TOTAL NUM RUNS %d" % num_runs
    settings.print_settings()
    sys.stdout.flush()

    data_gen = DataGenerator(settings)

    run_data = []
    for i in range(num_runs):
        observed_data = data_gen.matrix_completion_groups(
            gamma_to_row_col_m=settings.gamma_to_row_col_m,
            feat_factor=settings.feat_factor)
        run_data.append(Iteration_Data(i, observed_data, settings))

    if settings.method != "SP" and num_threads > 1:
        print "Do multiprocessing"
        pool = Pool(num_threads)
        results = pool.map(fit_data_for_iter_safe, run_data)
    else:
        print "Avoiding multiprocessing"
        results = map(fit_data_for_iter_safe, run_data)

    method_results = MethodResults(settings.method,
                                   settings.method_result_keys)
    num_crashes = 0
    for r in results:
        if r is not None:
            method_results.append(r)
        else:
            num_crashes += 1
    print "==========TOTAL RUNS %d============" % method_results.get_num_runs()
    method_results.print_results()
    print "num crashes %d" % num_crashes
예제 #7
0
def main(argv):
    num_threads = 1
    num_runs = 1

    try:
        opts, args = getopt.getopt(argv, "f:z:a:b:c:s:m:t:r:")
    except getopt.GetoptError:
        sys.exit(2)

    settings = Sparse_Add_Models_Settings()
    for opt, arg in opts:
        if opt == '-f':
            settings.num_funcs = int(arg)
        elif opt == '-z':
            settings.num_zero_funcs = int(arg)
        elif opt == '-a':
            settings.train_size = int(arg)
        elif opt == '-b':
            settings.validate_size = int(arg)
        elif opt == '-c':
            settings.test_size = int(arg)
        elif opt == "-s":
            settings.snr = float(arg)
        elif opt == "-m":
            assert (arg in METHODS)
            settings.method = arg
        elif opt == "-t":
            num_threads = int(arg)
        elif opt == "-r":
            num_runs = int(arg)

    print "TOTAL NUM RUNS %d" % num_runs
    settings.print_settings()
    sys.stdout.flush()

    assert (settings.num_funcs <= len(settings.smooth_fcns))
    smooth_fcn_list = settings.smooth_fcns[:settings.num_funcs] + [
        const_zero
    ] * settings.num_zero_funcs
    data_gen = DataGenerator(settings)

    run_data = []
    for i in range(num_runs):
        observed_data = data_gen.make_additive_smooth_data(smooth_fcn_list)
        run_data.append(Iteration_Data(i, observed_data, settings))

    if settings.method != "SP" and num_threads > 1:
        print "Do multiprocessing"
        pool = Pool(num_threads)
        results = pool.map(fit_data_for_iter_safe, run_data)
    else:
        print "Avoiding multiprocessing"
        results = map(fit_data_for_iter_safe, run_data)

    method_results = MethodResults(settings.method,
                                   settings.method_result_keys)
    num_crashes = 0
    for r in results:
        if r is not None:
            method_results.append(r)
        else:
            num_crashes += 1
    print "==========TOTAL RUNS %d============" % method_results.get_num_runs()
    method_results.print_results()
    print "num crashes %d" % num_crashes