def test_nextSNPs(self, b): bin = ld.VcfBINFile('test/vcf_test/test.bin', self.N, self.snp) x = bin.nextSNPs(b) assert x.shape == (5, b) print np.mean(x, axis=0) # roundoff error --> can't test equality to zero directly assert np.all(np.abs(np.mean(x, axis=0)) < 0.01) assert np.all(np.abs(np.std(x, axis=0) - 1) < 0.01)
def setUp(self): self.M = 10 self.N = 2 annot = np.array( (1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1), dtype='float64') self.annot = annot.reshape((10, 2)) self.snp = ps.VcfSNPFile('test/vcf_test/test.ldscore.snp') self.bin = ld.VcfBINFile('test/vcf_test/test.ldscore.bin', self.N, self.snp)
def test_nextSNPs_maf_ref(self, b): bin = ld.VcfBINFile('test/vcf_test/test.bin', self.N, self.snp) x = bin.nextSNPs(b) bin._currentSNP -= b y = bin.nextSNPs(b, minorRef=True) assert np.all(x == y) # switch reference alleles bin.geno = ~bin.geno bin.freq = np.ones(bin.m) - bin.freq print bin.geno bin._currentSNP -= b z = bin.nextSNPs(b, minorRef=True) assert np.all(y == z)
def test_bin(self): bin = ld.VcfBINFile('test/vcf_test/test.bin', self.N, self.snp, mafMin=0.2) print bin.geno print len(bin.geno) print bin.freq assert bin.m == 4 assert bin.n == self.N assert len(bin.geno) == bin.n * bin.m assert bin.geno == self.geno assert np.all(bin.freq == 0.4 * np.ones(4))
def test_filter_indivs(self): ''' The genotype matrix should look like this after filtering individuals 011111 011100 010011 The monomorphic SNPs at left should be removed by _filter_monomorphic ''' k = [1, 2, 3] bin = ld.VcfBINFile('test/vcf_test/test.bin', self.N, self.snp, keep_indivs=k) assert bin.n == 3 assert bin.m == 4 assert bin.geno == ba.bitarray('110110101101')
def test_filter_snps_and_indivs(self): ''' The genotype matrix should look like this after filtering SNPs and individuals 111 110 101 The monomorphic SNP at left should be removed by _filter_monomorphic ''' ks = [1, 3, 5] ki = [1, 2, 3] bin = ld.VcfBINFile('test/vcf_test/test.bin', self.N, self.snp, keep_snps=ks, keep_indivs=ki) assert bin.n == 3 assert bin.m == 2 assert bin.geno == ba.bitarray('110101')
def test_filter_snps(self): ''' The genotype matrix should look like this after filtering SNPs 100 111 110 101 100 The monomorphic SNP at left should be removed by _filter_monomorphic ''' k = [1, 3, 4] bin = ld.VcfBINFile('test/vcf_test/test.bin', self.N, self.snp, keep_snps=k) print bin.geno assert bin.m == 2 assert bin.n == 5 assert bin.geno == ba.bitarray('0110001010')
def test_nextSNPs_errors2(self): bin = ld.VcfBINFile('test/vcf_test/test.bin', self.N, self.snp) bin.nextSNPs(7)
def test_bad_filename(self): bin = ld.VcfBINFile('test/vcf_test/test.ind', self.N, self.snp)
def test_mafMin(self): bin = ld.VcfBINFile('test/vcf_test/test.bin', self.N, self.snp, mafMin=0.45)