Ejemplo n.º 1
0
 def simulate_relationship(self,
                           sample_size=100000,
                           sparsity_factor=0.001,
                           gen_exp=1.4,
                           init_keep_rate=0.8,
                           **kwargs):
     """
     Create a simulated example of a pedigree and compute its relationship.
     :param sample_size: Size of the cohort.
     :param sparsity_factor: Number of nonzero entries in the IBD matrix.
     :param gen_exp: Gen size = gen_exp X prev gen size.
     :param init_keep_rate: 1 - number of edges to remove before iteration begins.
     :return: A class Pedigree object, and an entry list of the simulation.
     """
     assert sample_size > 0, "Sample size should be a positive number"
     assert (sparsity_factor > 0) and (sparsity_factor < 1), \
         "Sparsity factor must be within the range (0, 1)"
     assert gen_exp > 0, "gen_exp should be a positive number"
     assert (init_keep_rate > 0) and (init_keep_rate < 1), \
         "init_keep_rate must be within the range (0, 1)"
     rel, sex, _ = simulate_tree(sample_size, sparsity_factor, gen_exp,
                                 init_keep_rate)
     write_fam(os.path.join(self.output_folder, "rel.fam"), rel, sex, None)
     return self.compute_relationships(
         os.path.join(self.output_folder, "rel.fam"))
Ejemplo n.º 2
0
def SciLMM(
    simulate=False,
    sample_size=100000,
    sparsity_factor=0.001,
    gen_exp=1.4,
    init_keep_rate=0.8,
    fam=None,
    ibd=False,
    epis=False,
    dom=False,
    ibd_path=False,
    epis_path=False,
    dom_path=False,
    gen_y=False,
    y=None,
    cov=None,
    he=False,
    lmm=False,
    reml=False,
    sim_num=100,
    intercept=False,
    verbose=False,
    output_folder=".",
    remove_cycles=False,
    check_num_parents=False,
):
    if ibd or epis or dom:
        if not os.path.exists(output_folder):
            raise Exception("The output folder does not exists")

    if he or lmm:
        if y is None and gen_y is False:
            raise Exception("Can't estimate without a target value (--y)")

    rel, interest_in_relevant = None, None
    if fam:
        rel_org, sex, interest, entries_dict = read_fam(fam_file_path=fam)
        rel, interest_in_relevant = organize_rel(
            rel_org,
            interest,
            remove_cycles=remove_cycles,
            check_num_parents=check_num_parents,
        )
        # TODO: have to do sex as well in this version
        entries_list = np.array(list(entries_dict.values()))[
            interest_in_relevant
        ]
        np.save(os.path.join(output_folder, "entries_ids.npy"), entries_list)
    elif simulate:
        if sample_size <= 0:
            raise Exception("Sample size should be a positive number")
        if (sparsity_factor <= 0) or (sparsity_factor >= 1):
            raise Exception("Sparsity factor is within the range (0, 1)")
        if gen_exp <= 0:
            raise Exception("gen_exp is a positive number")
        if (init_keep_rate <= 0) or (init_keep_rate > 1):
            raise Exception("init_keep_rate is within the range (0, 1)")
        rel, sex, _ = simulate_tree(
            sample_size, sparsity_factor, gen_exp, init_keep_rate
        )
        write_fam(os.path.join(output_folder, "rel.fam"), rel, sex, None)

    # if no subset of interest has been specified, keep all indices
    if interest_in_relevant is None:
        interest_in_relevant = np.ones((rel.shape[0])).astype(np.bool)

    if ibd_path:
        ibd = load_sparse_csr(os.path.join(output_folder, "IBD.npz"))
    elif ibd:
        if rel is None:
            raise Exception("No relationship matrix given")
        ibd, L, D = simple_numerator(rel)
        # keep the original L and D because they are useless otherwise
        save_sparse_csr(os.path.join(output_folder, "IBD.npz"), ibd)
        save_sparse_csr(os.path.join(output_folder, "L.npz"), L)
        save_sparse_csr(os.path.join(output_folder, "D.npz"), D)
    else:
        ibd = None

    if epis_path:
        epis = load_sparse_csr(os.path.join(output_folder, "Epistasis.npz"))
    elif epis:
        if ibd is None:
            raise Exception("Pairwise-epistasis requires an ibd matrix")
        epis = pairwise_epistasis(ibd)
        save_sparse_csr(os.path.join(output_folder, "Epistasis.npz"), epis)
    else:
        epis = None

    if dom_path:
        dom = load_sparse_csr(os.path.join(output_folder, "Dominance.npz"))
    elif dom:
        if ibd is None or rel is None:
            raise Exception(
                "Dominance requires both an ibd matrix and a relationship matrix"
            )
        dom = dominance(rel, ibd)
        save_sparse_csr(os.path.join(output_folder, "Dominance.npz"), dom)
    else:
        dom = None

    covariance_matrices = []
    for mat in [ibd, epis, dom]:
        if mat is not None:
            covariance_matrices.append(mat)

    if cov is not None:
        cov = np.hstack((cov, np.load(cov)))
    else:
        cov = sex[:, np.newaxis]

    y = None
    if gen_y:
        sigs = np.random.rand(len(covariance_matrices) + 1)
        sigs /= sigs.sum()
        fe = np.random.rand(cov.shape[1] + intercept) / 100
        print(
            "Generating y with fixed effects: {} and sigmas : {}".format(
                fe, sigs
            )
        )
        y = simulate_phenotype(covariance_matrices, cov, sigs, fe, intercept)
        np.save(os.path.join(output_folder, "y.npy"), y)
    if y is not None:
        y = np.load(y)

    if he:
        print(compute_HE(y, cov, covariance_matrices, intercept))

    if lmm:
        print(
            LMM(
                SparseCholesky(),
                covariance_matrices,
                cov,
                y,
                with_intercept=intercept,
                reml=reml,
                sim_num=sim_num,
            )
        )