Ejemplo n.º 1
0
    def test_merge_chrom(self):
        s1 = SNPs("tests/input/generic.csv")
        df = s1.snps.append(
            self.create_snp_df(
                rsid=["rs100", "rs101", "rs102", "rs103"],
                chrom=["Y", "Y", "Y", "Y"],
                pos=[100, 101, 102, 103],
                genotype=["A", np.nan, "A", "A"],
            ))
        s1._snps = df.copy()
        s2 = SNPs()
        s2._build = 37
        s2._snps = df.copy()

        # set values for chrom that will be ignored (that would otherwise result in
        # identification of discrepant SNPs or updating genotype)
        s2._snps.loc["rs3", "pos"] = 1003  # discrepant position
        s2._snps.loc["rs4", "genotype"] = "AA"  # discrepant genotype
        s2._snps.loc["rs5", "genotype"] = "AA"

        # set values for chrom to be merged
        s2._snps.loc["rs100", "genotype"] = "T"  # discrepant genotype
        s2._snps.loc["rs101", "genotype"] = "A"
        s2._snps.loc["rs102", "pos"] = 1002  # discrepant position

        # set expected values for merge result
        df.loc["rs100",
               "genotype"] = np.nan  # discrepant genotype sets to np.nan
        df.loc["rs101", "genotype"] = "A"  # updates np.nan

        results = s1.merge([s2], chrom="Y")

        pd.testing.assert_frame_equal(s1.snps, df, check_exact=True)

        self.assert_results(
            results,
            [{
                "merged":
                True,
                "common_rsids":
                pd.Index(["rs100", "rs101", "rs102", "rs103"], name="rsid"),
                "discrepant_position_rsids":
                pd.Index(["rs102"], name="rsid"),
                "discrepant_genotype_rsids":
                pd.Index(["rs100"], name="rsid"),
            }],
        )

        self.assertEqual(len(s1.discrepant_merge_positions), 1)
        self.assertEqual(len(s1.discrepant_merge_genotypes), 1)
Ejemplo n.º 2
0
    def simulate_snps(
        self,
        chrom="1",
        pos_start=1,
        pos_max=248140902,
        pos_step=100,
        genotype="AA",
        insert_nulls=True,
        null_snp_step=101,
        complement_genotype_one_chrom=False,
        complement_genotype_two_chroms=False,
        complement_snp_step=50,
    ):
        s = SNPs()

        s._build = 37

        positions = np.arange(pos_start, pos_max, pos_step, dtype=np.uint32)
        snps = pd.DataFrame(
            {"chrom": chrom},
            index=pd.Index(["rs" + str(x + 1) for x in range(len(positions))],
                           name="rsid"),
        )
        snps["pos"] = positions
        snps["genotype"] = genotype

        if insert_nulls:
            snps.loc[snps.iloc[0::null_snp_step, :].index, "genotype"] = np.nan

        indices = snps.iloc[0::complement_snp_step, :].index
        if complement_genotype_two_chroms:
            snps.loc[indices,
                     "genotype"] = snps.loc[indices, "genotype"].apply(
                         self.complement_two_chroms)
        elif complement_genotype_one_chrom:
            snps.loc[indices,
                     "genotype"] = snps.loc[indices, "genotype"].apply(
                         self.complement_one_chrom)

        s._snps = snps

        return s