Beispiel #1
0
    def __init__(
        self,
        entries_path,
        phenotype_path=None,
        covariate_path=None,
        ibd_path=None,
        one_hot_covariates=[],
        bool_covariates=[],
        drop_covariates=[],
    ):
        """
        Creates a Population class instance.

        :param pedigree: A Pedigree object
        :param pheotypes: Series of phenotypes indexed by IID
        :param covariates: Dataframe of covariates indexed by IID
        :param covariances: List of covariance matrices aligned to Pedigree object
            entries list
        """
        self.entries = self._load_entries(entries_path)
        self.entry_map = pd.Series(self.entries.index,
                                   index=self.entries.values,
                                   name='id')
        self.phenotype = self._load_phenotype(phenotype_path)
        self.covariate_info = None
        self.one_hot_covariates = one_hot_covariates
        self.drop_covariates = drop_covariates
        self.bool_covariates = bool_covariates
        self.covariates = self._load_covariates(covariate_path)
        self.ibd = load_sparse_csr(ibd_path) if ibd_path else None
        self.he = None
        self.results = None

        self.informative_indices = self._informative_indices()
Beispiel #2
0
    def _load_ibd(self, ibd_path):
        """
        Loads in IBD matrix

        :param ibd_path: Path to IBD file
        """
        if ibd_path is None:
            return None

        ibd = load_sparse_csr(ibd_path)
        return ibd[self.entries.to_numpy()][:, self.entries.to_numpy()]
Beispiel #3
0
def SciLMM(
    simulate=False,
    sample_size=100000,
    sparsity_factor=0.001,
    gen_exp=1.4,
    init_keep_rate=0.8,
    fam=None,
    ibd=False,
    epis=False,
    dom=False,
    ibd_path=False,
    epis_path=False,
    dom_path=False,
    gen_y=False,
    y=None,
    cov=None,
    he=False,
    lmm=False,
    reml=False,
    sim_num=100,
    intercept=False,
    verbose=False,
    output_folder=".",
    remove_cycles=False,
    check_num_parents=False,
):
    if ibd or epis or dom:
        if not os.path.exists(output_folder):
            raise Exception("The output folder does not exists")

    if he or lmm:
        if y is None and gen_y is False:
            raise Exception("Can't estimate without a target value (--y)")

    rel, interest_in_relevant = None, None
    if fam:
        rel_org, sex, interest, entries_dict = read_fam(fam_file_path=fam)
        rel, interest_in_relevant = organize_rel(
            rel_org,
            interest,
            remove_cycles=remove_cycles,
            check_num_parents=check_num_parents,
        )
        # TODO: have to do sex as well in this version
        entries_list = np.array(list(entries_dict.values()))[
            interest_in_relevant
        ]
        np.save(os.path.join(output_folder, "entries_ids.npy"), entries_list)
    elif simulate:
        if sample_size <= 0:
            raise Exception("Sample size should be a positive number")
        if (sparsity_factor <= 0) or (sparsity_factor >= 1):
            raise Exception("Sparsity factor is within the range (0, 1)")
        if gen_exp <= 0:
            raise Exception("gen_exp is a positive number")
        if (init_keep_rate <= 0) or (init_keep_rate > 1):
            raise Exception("init_keep_rate is within the range (0, 1)")
        rel, sex, _ = simulate_tree(
            sample_size, sparsity_factor, gen_exp, init_keep_rate
        )
        write_fam(os.path.join(output_folder, "rel.fam"), rel, sex, None)

    # if no subset of interest has been specified, keep all indices
    if interest_in_relevant is None:
        interest_in_relevant = np.ones((rel.shape[0])).astype(np.bool)

    if ibd_path:
        ibd = load_sparse_csr(os.path.join(output_folder, "IBD.npz"))
    elif ibd:
        if rel is None:
            raise Exception("No relationship matrix given")
        ibd, L, D = simple_numerator(rel)
        # keep the original L and D because they are useless otherwise
        save_sparse_csr(os.path.join(output_folder, "IBD.npz"), ibd)
        save_sparse_csr(os.path.join(output_folder, "L.npz"), L)
        save_sparse_csr(os.path.join(output_folder, "D.npz"), D)
    else:
        ibd = None

    if epis_path:
        epis = load_sparse_csr(os.path.join(output_folder, "Epistasis.npz"))
    elif epis:
        if ibd is None:
            raise Exception("Pairwise-epistasis requires an ibd matrix")
        epis = pairwise_epistasis(ibd)
        save_sparse_csr(os.path.join(output_folder, "Epistasis.npz"), epis)
    else:
        epis = None

    if dom_path:
        dom = load_sparse_csr(os.path.join(output_folder, "Dominance.npz"))
    elif dom:
        if ibd is None or rel is None:
            raise Exception(
                "Dominance requires both an ibd matrix and a relationship matrix"
            )
        dom = dominance(rel, ibd)
        save_sparse_csr(os.path.join(output_folder, "Dominance.npz"), dom)
    else:
        dom = None

    covariance_matrices = []
    for mat in [ibd, epis, dom]:
        if mat is not None:
            covariance_matrices.append(mat)

    if cov is not None:
        cov = np.hstack((cov, np.load(cov)))
    else:
        cov = sex[:, np.newaxis]

    y = None
    if gen_y:
        sigs = np.random.rand(len(covariance_matrices) + 1)
        sigs /= sigs.sum()
        fe = np.random.rand(cov.shape[1] + intercept) / 100
        print(
            "Generating y with fixed effects: {} and sigmas : {}".format(
                fe, sigs
            )
        )
        y = simulate_phenotype(covariance_matrices, cov, sigs, fe, intercept)
        np.save(os.path.join(output_folder, "y.npy"), y)
    if y is not None:
        y = np.load(y)

    if he:
        print(compute_HE(y, cov, covariance_matrices, intercept))

    if lmm:
        print(
            LMM(
                SparseCholesky(),
                covariance_matrices,
                cov,
                y,
                with_intercept=intercept,
                reml=reml,
                sim_num=sim_num,
            )
        )
Beispiel #4
0
                        help='Gen size = gen_exp X prev gen size')
    parser.add_argument(
        '--init_keep_rate',
        dest='init_keep_rate',
        type=float,
        default=0.8,
        help='1 - number of edges to remove before iteration begins')
    parser.add_argument('--save_folder',
                        dest='save_folder',
                        type=str,
                        default='.',
                        help='which folder it should save the output to.')

    args = parser.parse_args()
    if args.sample_size <= 0:
        raise Exception("Sample size should be a positive number")
    if (args.sparsity_factor <= 0) or (args.sparsity_factor >= 1):
        raise Exception("Sparsity factor is within the range (0, 1)")
    if args.gen_exp <= 0:
        raise Exception("gen_exp is a positive number")
    if (args.init_keep_rate <= 0) or (args.init_keep_rate > 1):
        raise Exception("init_keep_rate is within the range (0, 1)")

    rel = load_sparse_csr('rel.npz')
    print(count_IBD_nonzero(rel))

    # rel, sex, gen_ind = simulate_tree(args.sample_size, args.sparsity_factor, args.gen_exp, args.init_keep_rate)
    # save_sparse_csr(os.path.join(args.save_folder, 'rel.npz'), rel)
    # np.save(os.path.join(args.save_folder, 'sex.npy'), sex)
    # np.save(os.path.join(args.save_folder, 'gen_ind.npy'), gen_ind)
Beispiel #5
0
        dest="init_keep_rate",
        type=float,
        default=0.8,
        help="1 - number of edges to remove before iteration begins",
    )
    parser.add_argument(
        "--save_folder",
        dest="save_folder",
        type=str,
        default=".",
        help="which folder it should save the output to.",
    )

    args = parser.parse_args()
    if args.sample_size <= 0:
        raise Exception("Sample size should be a positive number")
    if (args.sparsity_factor <= 0) or (args.sparsity_factor >= 1):
        raise Exception("Sparsity factor is within the range (0, 1)")
    if args.gen_exp <= 0:
        raise Exception("gen_exp is a positive number")
    if (args.init_keep_rate <= 0) or (args.init_keep_rate > 1):
        raise Exception("init_keep_rate is within the range (0, 1)")

    rel = load_sparse_csr("rel.npz")
    print(count_IBD_nonzero(rel))

    # rel, sex, gen_ind = simulate_tree(args.sample_size, args.sparsity_factor, args.gen_exp, args.init_keep_rate)
    # save_sparse_csr(os.path.join(args.save_folder, 'rel.npz'), rel)
    # np.save(os.path.join(args.save_folder, 'sex.npy'), sex)
    # np.save(os.path.join(args.save_folder, 'gen_ind.npy'), gen_ind)