def test_read_sumstats(): x = ps.sumstats( os.path.join(DIR, 'parse_test/test.sumstats'), dropna=True, alleles=True) assert_equal(len(x), 1) assert_array_equal(x.SNP, 'rs1') assert_raises(ValueError, ps.sumstats, os.path.join( DIR, 'parse_test/test.l2.ldscore.gz'))
def _read_sumstats(args, log, fh, alleles=False, dropna=False): '''Parse summary statistics.''' log.log('Reading summary statistics from {S} ...'.format(S=fh)) sumstats = ps.sumstats(fh, alleles=alleles, dropna=dropna) log_msg = 'Read summary statistics for {N} SNPs.' log.log(log_msg.format(N=len(sumstats))) m = len(sumstats) sumstats = sumstats.drop_duplicates(subset='SNP') if m > len(sumstats): log.log( 'Dropped {M} SNPs with duplicated rs numbers.'.format(M=m - len(sumstats))) return sumstats