def test_cudapoa_complex_batch(): random.seed(2) read_len = 500 ref = ''.join( [random.choice(['A', 'C', 'G', 'T']) for _ in range(read_len)]) num_reads = 100 mutation_rate = 0.02 reads = [] for _ in range(num_reads): new_read = ''.join([ r if random.random() > mutation_rate else random.choice( ['A', 'C', 'G', 'T']) for r in ref ]) reads.append(new_read) device = cuda.cuda_get_device() free, total = cuda.cuda_get_mem_info(device) stream = cuda.CudaStream() batch = CudaPoaBatch(1000, 1024, 0.9 * free, stream=stream, device_id=device) (add_status, seq_status) = batch.add_poa_group(reads) batch.generate_poa() consensus, coverage, status = batch.get_consensus() consensus = consensus[0] assert (len(consensus) == len(ref)) match_ratio = SequenceMatcher(None, ref, consensus).ratio() assert (match_ratio == 1.0)
def poagen(groups, gpu_percent=0.8): free, total = cuda.cuda_get_mem_info(cuda.cuda_get_device()) gpu_mem_per_batch = gpu_percent * free max_seq_sz = 0 max_sequences_per_poa = 0 for group in groups: longest_seq = len(max(group, key=len)) max_seq_sz = longest_seq if longest_seq > max_seq_sz else max_seq_sz seq_in_poa = len(group) max_sequences_per_poa = seq_in_poa if seq_in_poa > max_sequences_per_poa else max_sequences_per_poa batch = CudaPoaBatch( max_sequences_per_poa, max_seq_sz, gpu_mem_per_batch, output_type="consensus", cuda_banded_alignment=True, alignment_band_width=256, ) poa_index = 0 initial_count = 0 while poa_index < len(groups): group = groups[poa_index] group_status, seq_status = batch.add_poa_group(group) # If group was added and more space is left in batch, continue onto next group. if group_status == 0: for seq_index, status in enumerate(seq_status): if status != 0: print("Could not add sequence {} to POA {} - error {}". format(seq_index, poa_index, status_to_str(status)), file=sys.stderr) poa_index += 1 # Once batch is full or no groups are left, run POA processing. if ((group_status == 1) or ((group_status == 0) and (poa_index == len(groups)))): batch.generate_poa() consensus, coverage, con_status = batch.get_consensus() for p, status in enumerate(con_status): if status != 0: print( "Could not get consensus for POA group {} - {}".format( initial_count + p, status_to_str(status)), file=sys.stderr) yield from consensus initial_count = poa_index batch.reset() # In the case where POA group wasn't processed correctly. elif group_status != 0: print("Could not add POA group {} to batch - {}".format( poa_index, status_to_str(group_status)), file=sys.stderr) poa_index += 1
def test_cudapoa_reset_batch(): device = cuda.cuda_get_device() free, total = cuda.cuda_get_mem_info(device) batch = CudaPoaBatch(10, 1024, 0.9 * free, device_id=device) poa_1 = ["ACTGACTG", "ACTTACTG", "ACGGACTG", "ATCGACTG"] batch.add_poa_group(poa_1) batch.generate_poa() consensus, coverage, status = batch.get_consensus() assert (batch.total_poas == 1) batch.reset() assert (batch.total_poas == 0)
def test_cudapoa_simple_batch(): device = cuda.cuda_get_device() free, total = cuda.cuda_get_mem_info(device) batch = CudaPoaBatch(10, 1024, 0.9 * free, deivce_id=device, output_mask='consensus') poa_1 = ["ACTGACTG", "ACTTACTG", "ACGGACTG", "ATCGACTG"] poa_2 = ["ACTGAC", "ACTTAC", "ACGGAC", "ATCGAC"] batch.add_poa_group(poa_1) batch.add_poa_group(poa_2) batch.generate_poa() consensus, coverage, status = batch.get_consensus() assert (len(consensus) == 2) assert (batch.total_poas == 2)
def test_cudapoa_graph(): device = cuda.cuda_get_device() free, total = cuda.cuda_get_mem_info(device) batch = CudaPoaBatch(10, 1024, 0.9 * free, device_id=device) poa_1 = ["ACTGACTG", "ACTTACTG", "ACTCACTG"] batch.add_poa_group(poa_1) batch.generate_poa() consensus, coverage, status = batch.get_consensus() assert (batch.total_poas == 1) # Expected graph # - -> G -> - # | | # A -> C -> T -> T -> A -> C -> T -> G # | | # - -> C -> - graphs, status = batch.get_graphs() assert (len(graphs) == 1) digraph = graphs[0] assert (digraph.number_of_nodes() == 10) assert (digraph.number_of_edges() == 11)