def test_merge_chrom(self): s1 = SNPs("tests/input/generic.csv") df = s1.snps.append( self.create_snp_df( rsid=["rs100", "rs101", "rs102", "rs103"], chrom=["Y", "Y", "Y", "Y"], pos=[100, 101, 102, 103], genotype=["A", np.nan, "A", "A"], )) s1._snps = df.copy() s2 = SNPs() s2._build = 37 s2._snps = df.copy() # set values for chrom that will be ignored (that would otherwise result in # identification of discrepant SNPs or updating genotype) s2._snps.loc["rs3", "pos"] = 1003 # discrepant position s2._snps.loc["rs4", "genotype"] = "AA" # discrepant genotype s2._snps.loc["rs5", "genotype"] = "AA" # set values for chrom to be merged s2._snps.loc["rs100", "genotype"] = "T" # discrepant genotype s2._snps.loc["rs101", "genotype"] = "A" s2._snps.loc["rs102", "pos"] = 1002 # discrepant position # set expected values for merge result df.loc["rs100", "genotype"] = np.nan # discrepant genotype sets to np.nan df.loc["rs101", "genotype"] = "A" # updates np.nan results = s1.merge([s2], chrom="Y") pd.testing.assert_frame_equal(s1.snps, df, check_exact=True) self.assert_results( results, [{ "merged": True, "common_rsids": pd.Index(["rs100", "rs101", "rs102", "rs103"], name="rsid"), "discrepant_position_rsids": pd.Index(["rs102"], name="rsid"), "discrepant_genotype_rsids": pd.Index(["rs100"], name="rsid"), }], ) self.assertEqual(len(s1.discrepant_merge_positions), 1) self.assertEqual(len(s1.discrepant_merge_genotypes), 1)
def simulate_snps( self, chrom="1", pos_start=1, pos_max=248140902, pos_step=100, genotype="AA", insert_nulls=True, null_snp_step=101, complement_genotype_one_chrom=False, complement_genotype_two_chroms=False, complement_snp_step=50, ): s = SNPs() s._build = 37 positions = np.arange(pos_start, pos_max, pos_step, dtype=np.uint32) snps = pd.DataFrame( {"chrom": chrom}, index=pd.Index(["rs" + str(x + 1) for x in range(len(positions))], name="rsid"), ) snps["pos"] = positions snps["genotype"] = genotype if insert_nulls: snps.loc[snps.iloc[0::null_snp_step, :].index, "genotype"] = np.nan indices = snps.iloc[0::complement_snp_step, :].index if complement_genotype_two_chroms: snps.loc[indices, "genotype"] = snps.loc[indices, "genotype"].apply( self.complement_two_chroms) elif complement_genotype_one_chrom: snps.loc[indices, "genotype"] = snps.loc[indices, "genotype"].apply( self.complement_one_chrom) s._snps = snps return s