Beispiel #1
0
def test_write_warnings_with_limit():
    summary = StudySummary()
    summary.unmapped_samples.extend(
        ((f'run{i // 10}', f'sample{i}') for i in range(50)))
    expected_warnings = """\
WARNING, some samples did not map to participant ids:
run0:
  sample0
  sample1
  sample2
  sample3
  sample4
  sample5
  sample6
  sample7
  sample8
  sample9
run1:
  sample10
  sample11
  [8 more samples...]
[30 more samples in 3 more runs...]
"""
    report = StringIO()

    summary.write_warnings(report, limit=12)

    assert report.getvalue() == expected_warnings
Beispiel #2
0
def test_load_multiple_run_outcomes():
    summary = StudySummary()
    outcome1_summary_csv = StringIO("""\
sample,run,passed,error
P1-NFLHIVDNA_S1,200101_M11111,True,
P2-NFLHIVDNA_S2,200101_M11111,False,primer error
""")
    outcome2_summary_csv = StringIO("""\
sample,run,passed,error
P2-NFLHIVDNA_S1,200108_M11111,True,
P3-NFLHIVDNA_S2,200108_M11111,False,multiple contigs
""")
    expected_run_counts = {
        '200101_M11111': dict(samples=2, passed=1, errors=1, no_primer=1),
        '200108_M11111': dict(samples=2,
                              passed=1,
                              errors=1,
                              multiple_contigs=1)
    }
    expected_participant_counts = {
        'P1': dict(samples=1, passed=1),
        'P2': dict(samples=2, passed=1, errors=1, no_primer=1),
        'P3': dict(samples=1, errors=1, multiple_contigs=1)
    }

    summary.load_outcome(outcome1_summary_csv)
    summary.load_outcome(outcome2_summary_csv)

    assert summary.run_counts == expected_run_counts
    assert summary.participant_counts == expected_participant_counts
Beispiel #3
0
def test_load_runs():
    summary = StudySummary()
    requested_runs = [
        '/fake/path/200101_M11111_0001_0000000-J1HRQ',
        '/fake/path/200108_M11111_0002_0000000-P3RS4'
    ]

    summary.load_runs(requested_runs)

    assert summary.run_paths == tuple(
        Path(run_path) for run_path in requested_runs)
Beispiel #4
0
def test_combined_error_types():
    summary = StudySummary()
    outcome_summary_csv = StringIO("""\
sample,run,passed,error
P1-NFLHIVDNA_S1,200101_M11111,True,
P2-NFLHIVDNA_S2,200101_M11111,False,low end read coverage
P3-NFLHIVDNA_S3,200101_M11111,False,low internal read coverage
""")
    expected_run_counts = {
        '200101_M11111': dict(samples=3, passed=1, errors=2, low_cov=2)
    }

    summary.load_outcome(outcome_summary_csv)

    assert summary.run_counts == expected_run_counts
Beispiel #5
0
def test_load_outcome():
    summary = StudySummary()
    outcome_summary_csv = StringIO("""\
sample,run,passed,error
P1-NFLHIVDNA_S1,200101_M11111,True,
P2-NFLHIVDNA_S2,200101_M11111,False,primer error
P3-NFLHIVDNA_S3,200101_M11111,False,multiple contigs
""")
    expected_run_counts = {
        '200101_M11111':
        dict(samples=3, passed=1, errors=2, no_primer=1, multiple_contigs=1)
    }

    summary.load_outcome(outcome_summary_csv)

    assert summary.run_counts == expected_run_counts
Beispiel #6
0
def test_write_warnings():
    summary = StudySummary()
    summary.unmapped_samples.extend((('200101_M11111', 'E0003-NFLHIVDNA_S3'),
                                     ('200101_M11111', 'E0007-NFLHIVDNA_S5'),
                                     ('200108_M11111', 'E0009-NFLHIVDNA_S2')))
    expected_warnings = """\
WARNING, some samples did not map to participant ids:
200101_M11111:
  E0003-NFLHIVDNA_S3
  E0007-NFLHIVDNA_S5
200108_M11111:
  E0009-NFLHIVDNA_S2
"""
    report = StringIO()

    summary.write_warnings(report)

    assert report.getvalue() == expected_warnings
Beispiel #7
0
def test_load_samples(tmp_path):
    runs_root = tmp_path
    run1_path = runs_root / '200101_M11111_0001_0000000-J1HRQ'
    run1_path.mkdir()
    run2_path = runs_root / '200115_M22222_0003_0000000-Y8E4T'
    run2_path.mkdir()

    samples_csv = StringIO("""\
sample,run,pid
E0001_S1,200101_M11111,P1
E0002_S2,200101_M11111,P2
E0003_S1,200115_M22222,P2
""")
    summary = StudySummary()

    summary.load_samples(samples_csv, runs_root)

    assert summary.run_paths == (run1_path, run2_path)
Beispiel #8
0
def test_find_unmapped_samples(tmp_path):
    runs_root = tmp_path
    run1_path = runs_root / '200101_M11111_0001_0000000-J1HRQ'
    run1_path.mkdir()

    samples_csv = StringIO("""\
sample,run,pid
E0001-NFLHIVDNA_S1,200101_M11111,P1
E0002-NFLHIVDNA_S2,200101_M11111,P2
""")
    outcome_summary_csv = StringIO("""\
sample,run,passed,error
E0001-NFLHIVDNA_S1,200101_M11111,True,
E0002-NFLHIVDNA_S2,200101_M11111,False,primer error
E0003-NFLHIVDNA_S3,200101_M11111,False,multiple contigs
""")
    expected_participant_counts = {
        'P1': dict(samples=1, passed=1),
        'P2': dict(samples=1, errors=1, no_primer=1),
        'E0003': dict(samples=1, errors=1, multiple_contigs=1)
    }
    expected_unmapped_samples = [('200101_M11111', 'E0003-NFLHIVDNA_S3')]
    summary = StudySummary()

    summary.load_samples(samples_csv, runs_root)
    summary.load_outcome(outcome_summary_csv)

    assert summary.participant_counts == expected_participant_counts
    assert summary.unmapped_samples == expected_unmapped_samples
Beispiel #9
0
def test_load_runs_not_in_samples(tmp_path):
    runs_root = tmp_path
    run1_path = runs_root / '200101_M11111_0001_0000000-J1HRQ'
    run1_path.mkdir()
    run2_path = runs_root / '200115_M22222_0003_0000000-Y8E4T'
    run2_path.mkdir()

    samples_csv = StringIO("""\
sample,run,pid
E0001-NFLHIVDNA_S1,200101_M11111,P1
E0002-NFLHIVDNA_S2,200101_M11111,P2
""")
    requested_runs = [str(run1_path), str(run2_path)]
    summary = StudySummary()

    summary.load_runs(requested_runs)
    summary.load_samples(samples_csv, runs_root)

    assert summary.run_paths == (run1_path, run2_path)
Beispiel #10
0
def test_write():
    summary = StudySummary()
    outcome_summary_csv = StringIO("""\
sample,run,passed,error
P1-NFLHIVDNA_S1,200101_M11111,True,
P2-NFLHIVDNA_S2,200101_M11111,False,primer error
P3-NFLHIVDNA_S3,200101_M11111,False,multiple contigs
""")
    expected_study_summary_csv = """\
type,name,samples,passed,errors,no_sequence,non_hiv,\
no_primer,low_cov,multiple_contigs,hiv_but_failed
run,200101_M11111,3,1,2,0,0,1,0,1,0
participant,P1,1,1,0,0,0,0,0,0,0
participant,P2,1,0,1,0,0,1,0,0,0
participant,P3,1,0,1,0,0,0,0,1,0
total,total,3,1,2,0,0,1,0,1,0
"""
    study_summary_csv = StringIO()

    summary.load_outcome(outcome_summary_csv)
    summary.write(study_summary_csv)

    assert study_summary_csv.getvalue() == expected_study_summary_csv