def test_phenos_agree_on_features(): constraints = { "d0": { "must_be_same": True, "method": "equal", "params": {} }, "d1": { "must_be_same": False, "method": "range", "params": { "low": 1, "high": 2 }, }, } pheno_compare = phenotype_compare.PhenotypeCompare(constraints) p1 = {"d0": True, "d1": 1.5} p2 = {"d0": True, "d1": 1.5} p3 = {"d0": True, "d1": 0.5} p4 = {"d0": False, "d1": 1.5} features = {"d0", "d1"} assert pheno_compare.phenos_agree_on_features(p1, p2, features) assert not pheno_compare.phenos_agree_on_features(p1, p3, features) assert not pheno_compare.phenos_agree_on_features(p1, p4, features)
def run(options): with open(options.case_names_file) as f: case_sample_names = [x.rstrip() for x in f] genos = genotypes.Genotypes( file_of_vcf_filenames=options.vcfs_tsv, distance_matrix_file=options.distance_matrix, variant_counts_file=options.var_counts_file, ) phenos = phenotypes.Phenotypes(options.phenos_tsv) with open(options.pheno_constraints_json) as f: pheno_constraints = json.load(f) pheno_compare = phenotype_compare.PhenotypeCompare(pheno_constraints) triples = strain_triples.StrainTriples( genos, phenos, pheno_compare, top_n_genos=options.top_n_genos, max_pheno_diffs=options.max_pheno_diffs, processes=options.processes, ) triples.run_analysis(case_sample_names, options.out, mask_file=options.mask_bed_file)
def test_satisfy_required_differences_two_the_same(): constraints = { "d0": { "must_be_same": True, "method": "equal", "params": {} }, "d1": { "must_be_same": True, "method": "equal", "params": {} }, "d2": { "must_be_same": False, "method": "range", "params": { "low": 1, "high": 2 }, }, } pheno_compare = phenotype_compare.PhenotypeCompare(constraints) p1 = {"d0": True, "d1": True, "d2": 1.5} p2 = {"d0": True, "d1": True, "d2": 1.1} p3 = {"d0": True, "d1": False, "d2": 1.5} p4 = {"d0": True, "d1": True, "d2": 1.5} p5 = {"d0": True, "d1": True, "d2": 2.5} assert not pheno_compare.satisfy_required_differences(p1, p2) assert not pheno_compare.satisfy_required_differences(p2, p1) assert not pheno_compare.satisfy_required_differences(p1, p3) assert not pheno_compare.satisfy_required_differences(p3, p1) assert not pheno_compare.satisfy_required_differences(p1, p4) assert not pheno_compare.satisfy_required_differences(p4, p1) assert pheno_compare.satisfy_required_differences(p1, p5) assert pheno_compare.satisfy_required_differences(p5, p1)
def test_satisfy_required_differences_one_the_same(): constraints = { "d0": { "must_be_same": True, "method": "equal", "params": {} }, "d1": { "must_be_same": False, "method": "equal", "params": {} }, } pheno_compare = phenotype_compare.PhenotypeCompare(constraints) p1 = {"d1": True} p2 = {"d1": True} p3 = {"d1": False} p4 = {"d1": None} p5 = {"d1": None} assert pheno_compare.satisfy_required_differences(p1, p3) assert pheno_compare.satisfy_required_differences(p3, p1) assert not pheno_compare.satisfy_required_differences(p1, p1) assert not pheno_compare.satisfy_required_differences(p1, p2) assert not pheno_compare.satisfy_required_differences(p2, p1) assert not pheno_compare.satisfy_required_differences(p1, p4) assert not pheno_compare.satisfy_required_differences(p4, p1) assert not pheno_compare.satisfy_required_differences(p4, p5) assert not pheno_compare.satisfy_required_differences(p5, p4)
def run(options): wanted_phenos = utils.command_line_wanted_phenos_to_dict( options.wanted_pheno) phenos = phenotypes.Phenotypes(options.phenos_tsv) with open(options.pheno_constraints_json) as f: pheno_constraints = json.load(f) pheno_compare = phenotype_compare.PhenotypeCompare(pheno_constraints) samples = phenos.find_matching_cases(wanted_phenos, pheno_compare) if len(samples) == 0: raise RuntimeError("No matching samples found") with open(options.outfile, "w") as f: print(*samples, sep="\n", file=f)
def test_ranked_neighbours_for_one_sample(genos, phenos, constraints): pheno_compare = phenotype_compare.PhenotypeCompare(constraints) got = sample_neighbours_finding.ranked_neighbours_for_one_sample( genos, phenos, pheno_compare, "s1") expect = [ sample_neighbours_finding.RankData( sample="s5", rank_sum=2, geno_rank=0, pheno_rank=2, geno_dist=1, pheno_dist=3, ), sample_neighbours_finding.RankData( sample="s4", rank_sum=2, geno_rank=1, pheno_rank=1, geno_dist=2, pheno_dist=2, ), sample_neighbours_finding.RankData( sample="s3", rank_sum=2, geno_rank=2, pheno_rank=0, geno_dist=3, pheno_dist=1, ), ] assert got == expect got = sample_neighbours_finding.ranked_neighbours_for_one_sample( genos, phenos, pheno_compare, "s1", max_pheno_dist=2) del expect[0] assert got == expect got = sample_neighbours_finding.ranked_neighbours_for_one_sample( genos, phenos, pheno_compare, "s1", top_n_genos=2) expect = [ sample_neighbours_finding.RankData( sample="s5", rank_sum=0, geno_rank=0, pheno_rank=0, geno_dist=1, pheno_dist=3, ) ] assert got == expect
def test_differences(): constraints = { "d0": { "must_be_same": True, "method": "equal", "params": {} }, "d1": { "must_be_same": True, "method": "equal", "params": {} }, "d2": { "must_be_same": True, "method": "equal", "params": {} }, "d3": { "must_be_same": False, "method": "range", "params": { "low": 1, "high": 2 }, }, } pheno_compare = phenotype_compare.PhenotypeCompare(constraints) p1 = {"d0": True, "d1": True, "d2": True, "d3": 2.5} p2 = {"d0": True, "d1": True, "d2": True, "d3": 2.5} p3 = {"d0": True, "d1": True, "d2": False, "d3": 2.5} p4 = {"d0": True, "d1": False, "d2": False, "d3": 2.5} p5 = {"d0": False, "d1": False, "d2": False, "d3": 2.5} assert pheno_compare.differences(p1, p1) == 0 assert pheno_compare.differences(p1, p2) == 0 assert pheno_compare.differences(p2, p1) == 0 assert pheno_compare.differences(p1, p3) == 1 assert pheno_compare.differences(p3, p1) == 1 assert pheno_compare.differences(p1, p4) == 2 assert pheno_compare.differences(p4, p1) == 2 assert pheno_compare.differences(p1, p5) == 3 assert pheno_compare.differences(p5, p1) == 3
def test_run_analysis(genos, phenos, constraints, caplog): caplog.set_level(logging.INFO) pheno_compare = phenotype_compare.PhenotypeCompare(constraints) triples = strain_triples.StrainTriples(genos, phenos, pheno_compare, top_n_genos=10) case_sample_names = ["s1", "s2"] tmp_dir = "tmp.strain_triples.run_analysis" subprocess.check_output(f"rm -rf {tmp_dir}", shell=True) os.mkdir(tmp_dir) outprefix = os.path.join(tmp_dir, "out") mask_file = os.path.join(data_dir, "run_analysis.mask.bed") got = triples.run_analysis(case_sample_names, outprefix, mask_file=mask_file) # The contents of these files are checked elsewhere. This test is just to # check then run of whole pipeline doesn't crash for filename in got.values(): assert os.path.exists(filename) subprocess.check_output(f"rm -r {tmp_dir}", shell=True)
def test_find_strain_triples(genos, phenos, constraints, caplog): caplog.set_level(logging.INFO) pheno_compare = phenotype_compare.PhenotypeCompare(constraints) triples = strain_triples.StrainTriples(genos, phenos, pheno_compare, top_n_genos=10) case_sample_names = ["s1", "s2"] got_triples = triples.find_strain_triples(case_sample_names) rank_data_s1_1 = sample_neighbours_finding.RankData(sample="s3", rank_sum=0, geno_rank=0, pheno_rank=0, geno_dist=1, pheno_dist=0) rank_data_s1_2 = sample_neighbours_finding.RankData(sample="s4", rank_sum=0, geno_rank=0, pheno_rank=0, geno_dist=1, pheno_dist=0) rank_data_s2_1 = sample_neighbours_finding.RankData(sample="s6", rank_sum=0, geno_rank=0, pheno_rank=0, geno_dist=1, pheno_dist=0) rank_data_s2_2 = sample_neighbours_finding.RankData(sample="s7", rank_sum=1, geno_rank=1, pheno_rank=0, geno_dist=2, pheno_dist=0) expect = [ strain_triple.StrainTriple("s1", rank_data_s1_1, rank_data_s1_2), strain_triple.StrainTriple("s2", rank_data_s2_1, rank_data_s2_2), ] assert got_triples == expect
def test_geno_and_pheno_distances_for_one_sample(genos, phenos, constraints): pheno_compare = phenotype_compare.PhenotypeCompare(constraints) ( got_geno_distances, got_pheno_distances, ) = sample_neighbours_finding._geno_and_pheno_distances_for_one_sample( genos, phenos, pheno_compare, "s1") expect_geno_distances = {"s3": 3, "s4": 2, "s5": 1} expect_pheno_distances = {"s3": 1, "s4": 2, "s5": 3} assert got_geno_distances == expect_geno_distances assert got_pheno_distances == expect_pheno_distances ( got_geno_distances, got_pheno_distances, ) = sample_neighbours_finding._geno_and_pheno_distances_for_one_sample( genos, phenos, pheno_compare, "s1", top_n_genos=3) expect_geno_distances = {"s4": 2, "s5": 1} expect_pheno_distances = {"s4": 2, "s5": 3} assert got_geno_distances == expect_geno_distances assert got_pheno_distances == expect_pheno_distances
def test_init_in_particular_sanity_check_constraints(): constraints = { "d1": { "must_be_same": True, "method": "equal", "params": {} } } phenotype_compare.PhenotypeCompare(constraints) # Uses an unknown method constraints = { "d1": { "must_be_same": True, "method": "WRONG", "params": {} } } with pytest.raises(RuntimeError): phenotype_compare.PhenotypeCompare(constraints) # equal method should not have any params constraints = { "d1": { "must_be_same": True, "method": "equal", "params": { "low": 1 } } } with pytest.raises(RuntimeError): phenotype_compare.PhenotypeCompare(constraints) # Range method should have low and high params constraints = { "d1": { "must_be_same": True, "method": "range", "params": {} } } with pytest.raises(RuntimeError): phenotype_compare.PhenotypeCompare(constraints) constraints = { "d1": { "must_be_same": True, "method": "range", "params": { "low": 1 } } } with pytest.raises(RuntimeError): phenotype_compare.PhenotypeCompare(constraints) constraints = { "d1": { "must_be_same": True, "method": "range", "params": { "low": 1, "high": 2 } } } phenotype_compare.PhenotypeCompare(constraints) # abs_distance should have max_dist param constraints = { "d1": { "must_be_same": True, "method": "abs_distance", "params": {} } } with pytest.raises(RuntimeError): phenotype_compare.PhenotypeCompare(constraints) constraints = { "d1": { "must_be_same": True, "method": "abs_distance", "params": { "max_dist": 1 }, } } phenotype_compare.PhenotypeCompare(constraints) # percent_distance should have max_percent param constraints = { "d1": { "must_be_same": True, "method": "percent_distance", "params": {} } } with pytest.raises(RuntimeError): phenotype_compare.PhenotypeCompare(constraints) constraints = { "d1": { "must_be_same": True, "method": "percent_distance", "params": { "max_percent": 42 }, } } phenotype_compare.PhenotypeCompare(constraints)