예제 #1
0
def poagen(groups, gpu_percent=0.8):
    free, total = cuda.cuda_get_mem_info(cuda.cuda_get_device())
    gpu_mem_per_batch = gpu_percent * free

    max_seq_sz = 0
    max_sequences_per_poa = 0

    for group in groups:
        longest_seq = len(max(group, key=len))
        max_seq_sz = longest_seq if longest_seq > max_seq_sz else max_seq_sz
        seq_in_poa = len(group)
        max_sequences_per_poa = seq_in_poa if seq_in_poa > max_sequences_per_poa else max_sequences_per_poa

    batch = CudaPoaBatch(
        max_sequences_per_poa,
        max_seq_sz,
        gpu_mem_per_batch,
        output_type="consensus",
        cuda_banded_alignment=True,
        alignment_band_width=256,
    )

    poa_index = 0
    initial_count = 0

    while poa_index < len(groups):

        group = groups[poa_index]
        group_status, seq_status = batch.add_poa_group(group)

        # If group was added and more space is left in batch, continue onto next group.
        if group_status == 0:
            for seq_index, status in enumerate(seq_status):
                if status != 0:
                    print("Could not add sequence {} to POA {} - error {}".
                          format(seq_index, poa_index, status_to_str(status)),
                          file=sys.stderr)
            poa_index += 1

        # Once batch is full or no groups are left, run POA processing.
        if ((group_status == 1)
                or ((group_status == 0) and (poa_index == len(groups)))):
            batch.generate_poa()
            consensus, coverage, con_status = batch.get_consensus()
            for p, status in enumerate(con_status):
                if status != 0:
                    print(
                        "Could not get consensus for POA group {} - {}".format(
                            initial_count + p, status_to_str(status)),
                        file=sys.stderr)
            yield from consensus
            initial_count = poa_index
            batch.reset()

        # In the case where POA group wasn't processed correctly.
        elif group_status != 0:
            print("Could not add POA group {} to batch - {}".format(
                poa_index, status_to_str(group_status)),
                  file=sys.stderr)
            poa_index += 1
def test_cudapoa_complex_batch():
    random.seed(2)
    read_len = 500
    ref = ''.join(
        [random.choice(['A', 'C', 'G', 'T']) for _ in range(read_len)])
    num_reads = 100
    mutation_rate = 0.02
    reads = []
    for _ in range(num_reads):
        new_read = ''.join([
            r if random.random() > mutation_rate else random.choice(
                ['A', 'C', 'G', 'T']) for r in ref
        ])
        reads.append(new_read)

    device = cuda.cuda_get_device()
    free, total = cuda.cuda_get_mem_info(device)
    stream = cuda.CudaStream()
    batch = CudaPoaBatch(1000,
                         1024,
                         0.9 * free,
                         stream=stream,
                         device_id=device)
    (add_status, seq_status) = batch.add_poa_group(reads)
    batch.generate_poa()

    consensus, coverage, status = batch.get_consensus()

    consensus = consensus[0]
    assert (len(consensus) == len(ref))

    match_ratio = SequenceMatcher(None, ref, consensus).ratio()
    assert (match_ratio == 1.0)
def test_cudaaligner_long_alignments(ref_length, num_alignments):
    """Test varying batches of long and short alignments and check for successful
    completion of alignment.
    """
    device = cuda.cuda_get_device()
    genome_sim = PoissonGenomeSimulator()
    read_sim = NoisyReadSimulator()

    batch = CudaAlignerBatch(ref_length,
                             ref_length,
                             num_alignments,
                             device_id=device)

    for _ in range(num_alignments):
        reference = genome_sim.build_reference(ref_length)
        query, start, end = read_sim.generate_read(reference,
                                                   ref_length,
                                                   insertion_error_rate=0.0)
        target, start, end = read_sim.generate_read(reference,
                                                    ref_length,
                                                    insertion_error_rate=0.0)

        batch.add_alignment(query, target)

    batch.align_all()
    batch.get_alignments()

    # Test reset
    batch.reset()
    assert (len(batch.get_alignments()) == 0)
def test_cudaaligner_various_arguments(max_seq_len, max_alignments, seq_len,
                                       num_alignments, should_succeed):
    """
    Pass legal and illegal arguments, and test for correct exception throwing behavior.
    """
    device = cuda.cuda_get_device()
    genome_sim = PoissonGenomeSimulator()
    read_sim = NoisyReadSimulator()

    batch = CudaAlignerBatch(max_seq_len,
                             max_seq_len,
                             max_alignments,
                             device_id=device)

    success = True
    for _ in range(num_alignments):
        reference = genome_sim.build_reference(seq_len)
        query, start, end = read_sim.generate_read(reference,
                                                   seq_len,
                                                   insertion_error_rate=0.0)
        target, start, end = read_sim.generate_read(reference,
                                                    seq_len,
                                                    insertion_error_rate=0.0)

        status = batch.add_alignment(query, target)
        if status != 0:
            success &= False

    batch.align_all()

    assert (success is should_succeed)
def test_cudapoa_valid_output_type():
    device = cuda.cuda_get_device()
    free, total = cuda.cuda_get_mem_info(device)
    try:
        CudaPoaBatch(10,
                     1024,
                     0.9 * free,
                     deivce_id=device,
                     output_type='consensus')
    except RuntimeError:
        assert (False)
def test_cudapoa_incorrect_output_type():
    device = cuda.cuda_get_device()
    free, total = cuda.cuda_get_mem_info(device)
    try:
        CudaPoaBatch(10,
                     1024,
                     0.9 * free,
                     deivce_id=device,
                     output_type='error_input')
        assert (False)
    except RuntimeError:
        pass
def test_cudapoa_reset_batch():
    device = cuda.cuda_get_device()
    free, total = cuda.cuda_get_mem_info(device)
    batch = CudaPoaBatch(10, 1024, 0.9 * free, device_id=device)
    poa_1 = ["ACTGACTG", "ACTTACTG", "ACGGACTG", "ATCGACTG"]
    batch.add_poa_group(poa_1)
    batch.generate_poa()
    consensus, coverage, status = batch.get_consensus()

    assert (batch.total_poas == 1)

    batch.reset()

    assert (batch.total_poas == 0)
def test_cudapoa_simple_batch():
    device = cuda.cuda_get_device()
    free, total = cuda.cuda_get_mem_info(device)
    batch = CudaPoaBatch(10,
                         1024,
                         0.9 * free,
                         deivce_id=device,
                         output_mask='consensus')
    poa_1 = ["ACTGACTG", "ACTTACTG", "ACGGACTG", "ATCGACTG"]
    poa_2 = ["ACTGAC", "ACTTAC", "ACGGAC", "ATCGAC"]
    batch.add_poa_group(poa_1)
    batch.add_poa_group(poa_2)
    batch.generate_poa()
    consensus, coverage, status = batch.get_consensus()

    assert (len(consensus) == 2)
    assert (batch.total_poas == 2)
def test_cudaaligner_simple_batch(query, target, cigar, get_stream):
    """Test valid calculation of alignments by checking cigar strings.
    """
    device = cuda.cuda_get_device()
    stream = get_stream
    batch = CudaAlignerBatch(len(query),
                             len(target),
                             1,
                             alignment_type="global",
                             stream=stream,
                             device_id=device)
    batch.add_alignment(query, target)
    batch.align_all()
    alignments = batch.get_alignments()

    assert (len(alignments) == 1)
    assert (alignments[0].cigar == cigar)
def test_cudapoa_graph():
    device = cuda.cuda_get_device()
    free, total = cuda.cuda_get_mem_info(device)
    batch = CudaPoaBatch(10, 1024, 0.9 * free, device_id=device)
    poa_1 = ["ACTGACTG", "ACTTACTG", "ACTCACTG"]
    batch.add_poa_group(poa_1)
    batch.generate_poa()
    consensus, coverage, status = batch.get_consensus()

    assert (batch.total_poas == 1)

    # Expected graph
    #           - -> G -> -
    #           |         |
    # A -> C -> T -> T -> A -> C -> T -> G
    #           |         |
    #           - -> C -> -

    graphs, status = batch.get_graphs()
    assert (len(graphs) == 1)

    digraph = graphs[0]
    assert (digraph.number_of_nodes() == 10)
    assert (digraph.number_of_edges() == 11)
예제 #11
0
def test_cuda_device_selection():
    device_count = cuda.cuda_get_device_count()
    if (device_count > 0):
        for device in range(device_count):
            cuda.cuda_set_device(device)
            assert(cuda.cuda_get_device() == device)