예제 #1
0
def create_test_snpit():
    positions = [2, 5, 8, 11, 14, 16, 20]
    names = [
        [("L1", "A")],
        [("L2", "C")],
        [("L1", "A"), ("L4", "C")],
        [("L2", "T"), ("L3", "G")],
        [("L3", "A")],
        [("L4", "G")],
        [("L1", "C"), ("L3", "T"), ("L4", "G")],
    ]
    lineages = {
        "L1": Lineage(name="L1", species="S1", lineage="Lin1", sublineage="SL1"),
        "L2": Lineage(name="L2", species="S2", lineage="Lin2", sublineage="SL2"),
        "L3": Lineage(name="L3", species="S3", lineage="Lin3", sublineage="SL3"),
        "L4": Lineage(name="L4", species="S4", lineage="Lineage 4", sublineage="SL4"),
    }
    position_map = dict()

    for i in range(len(positions)):
        position_map[positions[i]] = {}
        for name, variant in names[i]:
            if variant not in position_map[positions[i]]:
                position_map[positions[i]][variant] = [name]
            else:
                position_map[positions[i]][variant].append(name)

    snpit = SnpIt(threshold=10)
    snpit.lineages = lineages
    snpit.lineage_positions = position_map
    return snpit
예제 #2
0
def test_fromCsvEntry_emptyEntryReturnsEmptyLineage():
    entry = dict()

    actual = Lineage.from_csv_entry(entry)
    expected = Lineage()

    assert actual == expected
예제 #3
0
def test_addSnps_emptyFileSnpsRemainEmpty():
    lineage_variant_file = TEST_CASE_DIR / "empty.tsv"
    lineage = Lineage()
    lineage.add_snps(lineage_variant_file)

    actual = lineage.snps
    expected = dict()

    assert actual == expected
예제 #4
0
def test_fromCsvEntry_realEntryEntryReturnsLineage():
    library = csv.DictReader(TEST_CASE_DIR.joinpath("test_library.csv").open())
    entry = next(library)

    actual = Lineage.from_csv_entry(entry)
    expected = Lineage(
        species="M. tuberculosis",
        lineage="Lineage 1",
        sublineage="Sublineage 7",
        name="Indo_Oceanic",
    )

    assert actual == expected
예제 #5
0
def test_addSnps_realFileSnpsContainAllEntries():
    lineage_variant_file = TEST_CASE_DIR / "test_lineage.tsv"
    lineage = Lineage()
    lineage.add_snps(lineage_variant_file)

    actual = lineage.snps
    expected = {
        1011511: "C",
        1022003: "C",
        1028217: "A",
        1034758: "T",
        1071966: "G"
    }

    assert actual == expected
예제 #6
0
def test_classifyFasta_multiSampleFastaReturnMultiSample():
    fasta_path = TEST_CASE_DIR / "multi_sample.fa"
    snpit = create_test_snpit()
    snpit.lineages["L1"].snps = {x: "A" for x in range(10)}
    snpit.lineages["L2"].snps = {x: "A" for x in range(20)}
    snpit.lineages["L3"].snps = {x: "A" for x in range(20)}
    snpit.lineages["L4"].snps = {x: "A" for x in range(2)}

    actual = snpit.classify_fasta(fasta_path)
    expected = {
        "Sample1": (50.0, Lineage(name="L4")),
        "Sample2": (20.0, Lineage(name="L1")),
    }

    assert actual == expected
예제 #7
0
def test_outputResults_twoSamplesResultsWritesTwoSamples():
    outfile = StringIO()
    results = {
        "Sample1": (25.2533333, Lineage(name="L1", sublineage="SL1")),
        "Sample2": (75.75899, Lineage(name="L2", species="S2", lineage="Lin2")),
    }
    output_results(outfile, results)
    outfile.seek(0)

    actual = outfile.read()
    expected = (
        "Sample\tSpecies\tLineage\tSublineage\tName\tPercentage\n"
        "Sample1\tN/A\tN/A\tSL1\tL1\t25.25\n"
        "Sample2\tS2\tLin2\tN/A\tL2\t75.76\n"
    )

    assert actual == expected
예제 #8
0
def test_determineLineage_emptyCountsInReturnsEmptyResults():
    snpit = create_test_snpit()
    snpit.lineages = {}

    actual_percentage, actual_lineage = snpit.determine_lineage(Counter())
    expected_percentage, expected_lineage = (0, Lineage())

    assert actual_percentage == expected_percentage
    assert actual_lineage == actual_lineage
예제 #9
0
def test_loadLineagesFromCsv_fileWithTwoEntriesReturnsDictWithTwoLineages():
    filepath = TEST_CASE_DIR / "test_library.csv"
    beijing_lineage = Lineage(
        name="beijing", species="M. tuberculosis", lineage="Lineage 2", sublineage=""
    )
    indo_lineage = Lineage(
        name="Indo_Oceanic",
        species="M. tuberculosis",
        lineage="Lineage 1",
        sublineage="Sublineage 7",
    )

    actual_lineages, actual_position_map = load_lineages_from_csv(filepath)

    assert len(actual_position_map) == 714
    assert actual_position_map[1083755] == {"G": [indo_lineage.name]}
    assert actual_position_map[1288698] == {"A": [beijing_lineage.name]}

    assert actual_lineages["beijing"] == beijing_lineage
    assert actual_lineages["Indo_Oceanic"] == indo_lineage
예제 #10
0
def test_outputResults_emptySampleResultsWritesSampleWithNAs():
    outfile = StringIO()
    results = dict(Sample1=(0, Lineage()))
    output_results(outfile, results)
    outfile.seek(0)

    actual = outfile.read()
    expected = (
        "Sample\tSpecies\tLineage\tSublineage\tName\tPercentage\n"
        "Sample1\tN/A\tN/A\tN/A\tN/A\t0\n"
    )

    assert actual == expected
예제 #11
0
def test_classifyVcf_exampleVcfReturnsCorrectClassification():
    snpit = SnpIt(10, True)
    vcf_path = TEST_CASE_DIR / "example.vcf"

    actual = snpit.classify_vcf(vcf_path)
    expected = {
        "example": (
            97.2972972972973,
            Lineage(lineage="Lineage 2", species="M. tuberculosis", name="beijing"),
        )
    }

    assert actual == expected
예제 #12
0
def test_determineLineage_L4MostCountsReturnsL4():
    snpit = create_test_snpit()
    dummy_snps = {x: "A" for x in range(10)}
    snpit.lineages["L4"].snps = dummy_snps
    snpit.lineages["L1"].snps = dummy_snps

    actual_percentage, actual_lineage = snpit.determine_lineage(
        Counter(["L4", "L1", "L4"])
    )
    expected_percentage, expected_lineage = (
        20.0,
        Lineage(name="L4", species="S3", lineage="Lin3", sublineage="SL3"),
    )

    assert actual_percentage == expected_percentage
    assert actual_lineage == actual_lineage
예제 #13
0
def test_determineLineage_L4NoSublineageMostCountsNextBestL4WithSublineageReturnsNextBest():
    snpit = create_test_snpit()
    dummy_snps = {x: "A" for x in range(10)}
    snpit.lineages["L4"].snps = dummy_snps
    snpit.lineages["L1"].snps = dummy_snps
    snpit.lineages["L4"].sublineage = ""
    snpit.lineages["L1"].lineage = "Lineage 4"
    snpit.lineages["L1"].sublineage = "corner case"

    actual_percentage, actual_lineage = snpit.determine_lineage(
        Counter(["L4", "L1", "L4"])
    )
    expected_percentage, expected_lineage = (
        20.0,
        Lineage(name="L1", species="S1", lineage="Lineage 4", sublineage="corner case"),
    )

    assert actual_percentage == expected_percentage
    assert actual_lineage == actual_lineage
예제 #14
0
def test_inequalityOperator_twoNonEqualReturnsTrue():
    lineage1 = Lineage(name="foo", lineage="Lineage 5")
    lineage2 = Lineage(name="test", lineage="Lineage 6")

    assert lineage1 != lineage2
예제 #15
0
def test_lessThanOperator_xEqualsYReturnsFalse():
    x = Lineage(name="bar")
    y = Lineage(name="bar")

    assert not (x < y)
예제 #16
0
def test_inequalityOperator_twoEqualReturnsFalse():
    lineage1 = Lineage(name="test", lineage="Lineage 5")
    lineage2 = Lineage(name="test", lineage="Lineage 6")

    assert not (lineage1 != lineage2)
예제 #17
0
def test_greaterThanOperator_xLessThanYReturnsFalse():
    x = Lineage(name="bar")
    y = Lineage(name="foo")

    assert not (x > y)
예제 #18
0
def test_lessThanOperator_xLessThanYReturnsTrue():
    x = Lineage(name="bar")
    y = Lineage(name="foo")

    assert x < y
예제 #19
0
def test_greaterThanOperator_xGreaterThanYReturnsTrue():
    x = Lineage(name="foo")
    y = Lineage(name="bar")

    assert x > y
예제 #20
0
def test_equalityOperator_twoEqualReturnsTrue():
    lineage1 = Lineage(name="test", lineage="Lineage 5")
    lineage2 = Lineage(name="test", lineage="Lineage 6")

    assert lineage1 == lineage2
예제 #21
0
def test_greaterThanOperator_xEqualsYReturnsFalse():
    x = Lineage(name="bar")
    y = Lineage(name="bar")

    assert not (x > y)