Ejemplo n.º 1
0
def test_no_reads():
    merged_reads = prepare_reads()
    builder = ConsensusBuilder()

    list(builder.build(merged_reads))

    with pytest.raises(IndexError):
        builder.get_consensus()
Ejemplo n.º 2
0
def test_build_by_lengths_no_reads():
    merged_reads = prepare_reads()
    expected_consensuses = []
    builder = ConsensusBuilder()

    list(builder.build(merged_reads))

    consensuses = list(builder.get_consensus_by_lengths())
    assert expected_consensuses == consensuses
Ejemplo n.º 3
0
def test_build_by_lengths_with_no_neighbours():
    raw_reads = ['A' * 120] * 9 + ['G' * 220] * 10
    merged_reads = prepare_reads(*raw_reads)
    expected_consensuses = ['G' * 220]
    builder = ConsensusBuilder()

    list(builder.build(merged_reads))

    consensuses = list(builder.get_consensus_by_lengths())
    assert expected_consensuses == consensuses
Ejemplo n.º 4
0
def test_different_lengths():
    merged_reads = prepare_reads("AAACCCTTTGGGAAACCC", "ATACCCTTTGGGAAACCC",
                                 "AAACCCTTTGGGAAACCC", "CATGAGACATCACACAC",
                                 "CATGAGACATCACACA", "CATGAGACATCACAC",
                                 "CATGAGACATCACA")
    expected_consensus = "AAACCCTTTGGGAAACCC"
    builder = ConsensusBuilder()

    list(builder.build(merged_reads))

    assert expected_consensus == builder.get_consensus()
Ejemplo n.º 5
0
def test_mixture():
    merged_reads = list(
        prepare_reads("AAACCCTTTGGGAAACCC", "ATACCCTATGGGAAACCC",
                      "AAACCCTTTGGGAAACCC"))
    expected_consensus = "AAACCCTTTGGGAAACCC"
    builder = ConsensusBuilder()

    returned_reads = list(builder.build(merged_reads))

    assert merged_reads == returned_reads
    assert expected_consensus == builder.get_consensus()
Ejemplo n.º 6
0
def test_tie():
    """ If there's an exact 50/50 mixture, pick alphabetically first. """
    merged_reads = list(
        prepare_reads("AAACCCTTTGGGAAACCC", "ATACCCTATGGGAAACCC",
                      "ATACCCTATGGGAAACCC", "AAACCCTTTGGGAAACCC"))
    expected_consensus = "AAACCCTATGGGAAACCC"
    builder = ConsensusBuilder()

    returned_reads = list(builder.build(merged_reads))

    assert merged_reads == returned_reads
    assert expected_consensus == builder.get_consensus()
Ejemplo n.º 7
0
def test_build_by_lengths_when_spike_too_short():
    raw_reads = []
    for n in range(100, 141):
        if n == 120:
            raw_reads += ['A' * n] * 99
        else:
            raw_reads += ['C' * n] * 5
    merged_reads = prepare_reads(*raw_reads)
    expected_consensuses = []
    builder = ConsensusBuilder()

    list(builder.build(merged_reads))

    consensuses = list(builder.get_consensus_by_lengths())
    assert expected_consensuses == consensuses
Ejemplo n.º 8
0
def fastq_g2p(pssm,
              fastq1,
              fastq2,
              g2p_csv,
              g2p_summary_csv=None,
              unmapped1=None,
              unmapped2=None,
              aligned_csv=None,
              min_count=1,
              min_valid=1,
              min_valid_percent=0.0,
              merged_contigs_csv=None):
    g2p_filename = getattr(g2p_csv, 'name', None)
    if g2p_filename is None:
        count_prefix = None
    else:
        working_path = os.path.dirname(g2p_csv.name)
        count_prefix = os.path.join(working_path, 'read_counts')
    project_config = ProjectConfig.loadDefault()
    hiv_seed = project_config.getReference(G2P_SEED_NAME)
    coordinate_ref = project_config.getReference(COORDINATE_REF_NAME)
    v3loop_ref = extract_target(hiv_seed, coordinate_ref)
    reader = FastqReader(fastq1, fastq2)
    merged_reads = merge_reads(reader)
    consensus_builder = ConsensusBuilder()
    counted_reads = consensus_builder.build(merged_reads)
    trimmed_reads = trim_reads(counted_reads, v3loop_ref)
    mapped_reads = write_unmapped_reads(trimmed_reads, unmapped1, unmapped2)
    read_counts = count_reads(mapped_reads, count_prefix)
    if aligned_csv is not None:
        read_counts = write_aligned_reads(read_counts, aligned_csv, hiv_seed,
                                          v3loop_ref)

    write_rows(pssm,
               read_counts,
               g2p_csv,
               g2p_summary_csv,
               min_count,
               min_valid=min_valid,
               min_valid_percent=min_valid_percent)
    if merged_contigs_csv is not None:
        contig_writer = DictWriter(merged_contigs_csv, ['contig'])
        contig_writer.writeheader()
        for consensus in consensus_builder.get_consensus_by_lengths():
            unambiguous_consensus = consensus.replace('N', '').replace('-', '')
            if unambiguous_consensus:
                contig_writer.writerow(dict(contig=consensus))
Ejemplo n.º 9
0
def test_build_by_lengths():
    raw_reads = []
    for n in range(100, 141):
        if n == 120:
            raw_reads += ['A' * n] * 100
        else:
            raw_reads += ['C' * n] * 2
    for n in range(200, 241):
        if n == 220:
            raw_reads += ['G' * n] * 1000
        else:
            raw_reads += ['T' * n] * 20
    merged_reads = prepare_reads(*raw_reads)
    expected_consensuses = ['A' * 120, 'G' * 220]
    builder = ConsensusBuilder()

    list(builder.build(merged_reads))

    consensuses = list(builder.get_consensus_by_lengths())
    assert expected_consensuses == consensuses