def test_hist_metrics(insert_metrics): module, command, version, end, pdir = insert_metrics fn = pdir.join("medium.insert_size_metrics") metrics = odo(str(fn), DataFrame) hist = odo(str(fn), DataFrame, key="hist") assert all(metrics["MEDIAN_INSERT_SIZE"] == [367]) assert all(hist["insert_size"][0:3] == [19, 22, 23])
def test_vsearch_fastq_stats(data): module, command, version, end, pdir = data fn = pdir.join("medium.fastq_stats.txt") df = odo(str(fn), DataFrame) assert list(df.columns) == ["N", "Pct", "AccPct"] assert df.index.name == "L" df = odo(str(fn), DataFrame, key="Truncate at first Q") assert list(df.columns) == ["Q=5", "Q=10", "Q=15", "Q=20"] assert df.index.name == "Len"
def test_qualimap(data): module, command, version, end, pdir = data if command.startswith("qualimap_bamqc_genome_results"): fn = pdir.listdir()[0] df = odo(str(fn), DataFrame, key='Coverage_per_contig') assert list(df.columns) == ['chrlen', 'mapped_bases', 'mean_coverage', 'sd'] assert list(df.index)[0] == 'scaffold1' else: fn = pdir.listdir()[0] df = odo(str(fn), DataFrame) assert "#" not in df.columns[0]
def test_QUAL(bcftools_stats): module, command, version, end, pdir = bcftools_stats fn = str(pdir.join("medium.call.stats")) df = odo(fn, DataFrame, key="QUAL") assert "number_of_transitions_(1st_ALT)" in list(df.columns) nsnps = 83 if end == "pe" else 90 assert (df.loc[3]["number_of_SNPs"] == nsnps)
def test_basic_statistics(bcftools_stats): module, command, version, end, pdir = bcftools_stats fn = str(pdir.join("medium.call.stats")) df = odo(fn, DataFrame) assert (list(df.index)[0] == 'number of samples') n = 10667 if end == "pe" else 7400 assert (df.loc["number of records", "value"] == n)
def test_cutadapt(cutadapt_metrics): module, command, version, end, pdir = cutadapt_metrics fn = str(pdir.join("cutadapt_metrics.txt")) df = odo(fn, DataFrame) if end == "se": assert df.loc["Reads with adapters"]["value"] == 792 elif end == "pe": assert df.loc["Read 1 with adapter"]["value"] == 792
def test_metrics(align_metrics): module, command, version, end, pdir = align_metrics fn = pdir.join("medium.align_metrics") metrics = odo(str(fn), DataFrame) if end == "pe": assert metrics.loc["FIRST_OF_PAIR"]["MEAN_READ_LENGTH"] - 92.29 < 0.01 else: assert metrics.loc["UNPAIRED"]["MEAN_READ_LENGTH"] - 92.29975 < 0.001
def test_bamtools_pivot(bamtools_data): module, command, version, end, pdir = bamtools_data df = odo(str(pdir.listdir()[0]), DataFrame, values=["value", "percent"], columns="statistic", index="sample", regex=".*/(?P<sample>medium.*)") n = 59499 if end == "se" else 119413 assert df["value", "Mapped reads"].loc["medium.stats"] == n
def test_per_base_sequence_quality(fastqc_data): module, command, version, end, pdir = fastqc_data fn = str(pdir.join("medium_fastqc.zip")) df = odo(fn, DataFrame, key="Per_base_sequence_quality") major, minor, patch = version.split(".") if int(minor) <= 10: assert df.shape[0] == 28 else: assert df.shape[0] == 55 assert df.shape[1] == 6
def test_rseqc_parse(data): module, command, version, end, pdir = data fn = pdir.listdir()[0] if command == "rseqc_read_duplication": odo(str(fn), DataFrame) fn = pdir.listdir()[1] odo(str(fn), DataFrame) else: odo(str(fn), DataFrame)
def test_basic_statistics(fastqc_data): module, command, version, end, pdir = fastqc_data fn = str(pdir.join("medium_fastqc.zip")) df = odo(fn, DataFrame) major, minor, patch = version.split(".") if int(minor) >= 11: assert(list(df.index) == ['Filename', 'File type', 'Encoding', 'Total Sequences', 'Sequences flagged as poor quality', 'Sequence length', '%GC']) else: assert(list(df.index) == ['Filename', 'File type', 'Encoding', 'Total Sequences', 'Filtered Sequences', 'Sequence length', '%GC']) assert(df.loc["Filename", "Value"] == "medium.bam")
def test_GCC(samtools_stats): _gcc_stats = { '1.2': { 'se': 30.12, 'pe': 30.21 }, '1.3.1': { 'se': 30.19, 'pe': 30.27 }, '1.4.1': { 'se': 30.19, 'pe': 30.27 } } module, command, version, end, pdir = samtools_stats fn = str(pdir.join("medium.stats.txt")) df = odo(samtools.resource_samtools_stats(fn, key="GCC"), DataFrame) assert (df.loc[1]["A"] == _gcc_stats[version][end])
def test_FFQ(samtools_stats): _ffq_stats = { '1.2': { 'se': 27624, 'pe': 27630 }, '1.3.1': { 'se': 27598, 'pe': 27598 }, '1.4.1': { 'se': 27598, 'pe': 27598 } } module, command, version, end, pdir = samtools_stats fn = str(pdir.join("medium.stats.txt")) df = odo(samtools.resource_samtools_stats(fn, key="FFQ"), DataFrame) assert (df.loc[1][33] == _ffq_stats[version][end])
def test_basic_statistics(samtools_stats): _stats = { '1.2': { 'se': 60037, 'pe': 120110 }, '1.3.1': { 'se': 60000, 'pe': 120000 }, '1.4.1': { 'se': 60000, 'pe': 120000 } } module, command, version, end, pdir = samtools_stats fn = str(pdir.join("medium.stats.txt")) df = odo(samtools.resource_samtools_stats(fn), DataFrame) assert (list(df.index)[0] == 'raw total sequences') assert (df.loc["sequences", "value"] == _stats[version][end])
def test_mapdamage_5pCtoT(mapdamage_data): module, command, version, end, pdir = mapdamage_data fn = pdir.join("5pCtoT_freq.txt") df = odo(str(fn), DataFrame) assert (df.index.name == "pos")
def test_mapdamage_dnacomp_genome(mapdamage_data): module, command, version, end, pdir = mapdamage_data fn = pdir.join("dnacomp_genome.csv") df = odo(str(fn), DataFrame) assert (list(df["A"])[0] - 0.265786 < 0.0001)
def test_bamtools(bamtools_data): module, command, version, end, pdir = bamtools_data df = odo(str(pdir.listdir()[0]), DataFrame) n = 59499 if end == "se" else 119413 assert df.loc["Mapped reads", "value"] == n
def test_mapdamage_lgdistribution(mapdamage_data): module, command, version, end, pdir = mapdamage_data fn = pdir.join("lgdistribution.txt") df = odo(str(fn), DataFrame) assert (list(df.columns) == ['Std', 'Length', 'Occurences'])
def test_sga_filter(sga_filter_data): _filter_stats = {'0.10.13': {'se': 9400, 'pe': 16670}} module, command, version, end, pdir = sga_filter_data df = odo(str(pdir.listdir()[0]), DataFrame) assert (df.loc["Reads failed kmer check", "value"] == _filter_stats[version][end])
def test_sga_preprocess(sga_preprocess_data): module, command, version, end, pdir = sga_preprocess_data df = odo(str(pdir.listdir()[0]), DataFrame) n = 10000 if end == "se" else 20000 assert df.loc["Reads parsed", "value"] == n
def test_star_final_log(data): module, command, version, end, pdir = data fn = pdir.join("medium.Log.final.out") df = odo(str(fn), DataFrame) assert df.loc["Number of input reads", "value"] == 30483
def test_mapdamage_misincorporation(mapdamage_data): module, command, version, end, pdir = mapdamage_data fn = pdir.join("misincorporation.txt") df = odo(str(fn), DataFrame) assert (df.shape[1] == 30)
def test_IDD(bcftools_stats): module, command, version, end, pdir = bcftools_stats fn = str(pdir.join("medium.call.stats")) df = odo(fn, DataFrame, key="IDD") count = 123 if end == "pe" else 95 assert (df.loc[-1]["count"] == count)
def test_TSTV(bcftools_stats): module, command, version, end, pdir = bcftools_stats fn = str(pdir.join("medium.call.stats")) df = odo(fn, DataFrame, key="TSTV") tstv = 2.12 if end == "pe" else 2.19 assert (df.loc[0]["ts/tv"] == tstv)
def test_mapdamage_dnacomp(mapdamage_data): module, command, version, end, pdir = mapdamage_data fn = pdir.join("dnacomp.txt") df = odo(str(fn), DataFrame) assert (df["Chr"][0] == "scaffold1")
def test_summary(fastqc_data): module, command, version, end, pdir = fastqc_data fn = str(pdir.join("medium_fastqc.zip")) df = odo(fn, DataFrame, key="Summary") assert(df.loc['Basic_Statistics', 'Value'] == "pass")
def test_mapdamage_mcmc_correct_prob(mapdamage_data): module, command, version, end, pdir = mapdamage_data fn = pdir.join("Stats_out_MCMC_correct_prob.csv") df = odo(str(fn), DataFrame) assert (df.index.name == "Position") assert (df.shape[1] == 2)
def test_wrong_key(fastqc_data): module, command, version, end, pdir = fastqc_data fn = str(pdir.join("medium_fastqc.zip")) with pytest.raises(KeyError): odo(fn, DataFrame, key="foo")
def test_idxstats(samtools_idxstats): module, command, version, end, pdir = samtools_idxstats fn = str(pdir.join("medium.idxstats.txt")) df = odo(fn, DataFrame) assert (df.loc[0][0] == "scaffold1")
def test_mapdamage_mcmc_iter_summ(mapdamage_data): module, command, version, end, pdir = mapdamage_data fn = pdir.join("Stats_out_MCMC_iter_summ_stat.csv") df = odo(str(fn), DataFrame) assert (df.shape[1] == 6)