def test_selection_with_preferred_sources(): readset = string_to_readset(""" 1 1 """, source_id=3) more_reads = string_to_readset(""" 1111 111 1111 """, source_id=1) for read in more_reads: readset.add(read) selected_reads = readselection(readset, max_cov=2, preferred_source_ids=None, bridging=True) assert selected_reads == set([1, 2, 3]), str(selected_reads) selected_reads = readselection(readset, max_cov=2, preferred_source_ids=set([3]), bridging=True) assert selected_reads == set([0, 1, 3]), str(selected_reads)
def test_selection(): reads = string_to_readset(""" 1 1 00 0 1 10 1 1 1 11 0 1 1 1 """) selected_reads = readselection(reads, max_cov=1, preferred_source_ids=None, bridging=False) assert selected_reads == set([1, 5]) selected_reads = readselection(reads, max_cov=2, preferred_source_ids=None, bridging=False) assert selected_reads == set([1, 3, 5]), str(selected_reads) selected_reads = readselection(reads, max_cov=3, preferred_source_ids=None, bridging=False) assert selected_reads == set([1, 3, 5, 7]), str(selected_reads) selected_reads = readselection(reads, max_cov=3, preferred_source_ids=None, bridging=True) #Here the assert is wrong, because the bridging doesn't come into account , because in the slice_read the selected # reads have already coverage 3 by set ([1,3,5,7]) because first each position has to covered at least once before #the bridging starts assert selected_reads == set([1, 3, 5, 7]), str(selected_reads)
def test_components_of_readselection(): reads = string_to_readset(""" 111 000 00 00 1 1 """) selected_reads = readselection(reads, max_cov=2, bridging=False) assert selected_reads == set([0, 1, 2, 3]), str(selected_reads) # assert len(set(new_components.values())) == 2 selected_reads = readselection(reads, max_cov=2, bridging=True) assert selected_reads == set([0, 1, 4]), str(selected_reads)
def test_bridging(): reads = string_to_readset(""" 11 00 11 00 11 00 1 1 """) selected_reads = readselection(reads, max_cov=2, bridging=False) assert selected_reads == set([0, 1, 2, 3, 4, 5]) selected_reads = readselection(reads, max_cov=2, bridging=True) #Not sure why 0 is there selected and not 1... assert selected_reads == set([0, 3, 5, 6])
def test_selection2(): reads = string_to_readset(""" 1111 111 1 111 1 11 1 11 """) selected_reads = readselection(reads, max_cov=4, bridging=False) assert selected_reads == set([0, 1, 2, 3]), str(selected_reads)
def select_reads(readset, max_coverage, preferred_source_ids): logger.info( "Reducing coverage to at most %dX by selecting most informative reads ...", max_coverage, ) selected_indices = readselection(readset, max_coverage, preferred_source_ids) selected_reads = readset.subset(selected_indices) logger.info( "Selected %d reads covering %d variants", len(selected_reads), len(selected_reads.get_positions()), ) return selected_reads
def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) #print('INPUT READ SET') gfa_filename = sys.argv[1] max_gap = int(sys.argv[2]) max_coverage = int(sys.argv[3]) readset = gfa_to_readset(gfa_filename, max_gap) readset = readset.subset( [i for i, read in enumerate(readset) if len(read) >= 2]) selected_indices = readselection(readset, max_coverage) selected_reads = readset.subset(selected_indices) readset_length = 0 for read in selected_reads: readset_length += len(read) #print(selected_reads) def bipartition(reads): positions = reads.get_positions() # create genotypes over your variants: all heterozygous (=1) genotypes = canonic_index_list_to_biallelic_gt_list([1] * len(positions)) # genotype likelihoods are None genotype_likelihoods = [None] * len(positions)