def test_n_labels(): lh = GraphLabels(20, 1e7, 4) filename = utils.get_test_data('test-labels.fa') lh.consume_fasta_and_tag_with_labels(filename) print(lh.n_labels()) assert lh.n_labels() == 4
def test_consume_fasta_and_tag_with_labels(): lb = GraphLabels(20, 1e7, 4) read_1 = 'ACGTAACCGGTTAAACCCGGGTTTAAAACCCCGGGGTTTT' filename = utils.get_test_data('test-transcript.fa') total_reads, n_consumed = lb.consume_fasta_and_tag_with_labels(filename) print("doing get") assert lb.graph.get(read_1[:20]) assert total_reads == 3 print("doing n_labels") print(lb.n_labels()) print("doing label dict") print(lb.get_label_dict()) print("get tagset") for tag in lb.graph.get_tagset(): print("forward hash") print(tag, khmer.forward_hash(tag, 20)) for record in screed.open(filename): print("Sweeping tags") print(lb.sweep_tag_neighborhood(record.sequence, 40)) print("Sweeping labels...") print(lb.sweep_label_neighborhood(record.sequence, 40)) assert lb.n_labels() == 3
def test_consume_partitioned_fasta_and_tag_with_labels(): lb = GraphLabels(20, 1e7, 4) filename = utils.get_test_data('real-partition-small.fa') lb.consume_partitioned_fasta_and_tag_with_labels(filename) labels = set() for record in screed.open(filename): seq = record.sequence labels.update(lb.sweep_label_neighborhood(seq, 0, False, False)) # print(lb.n_labels()) # print(labels) assert len(labels) == 1 assert labels.pop() == 2 assert lb.n_labels() == 1
def test_consume_fasta_and_tag_with_labels(): lb = GraphLabels(20, 1e7, 4) read_1 = 'ACGTAACCGGTTAAACCCGGGTTTAAAACCCCGGGGTTTT' filename = utils.get_test_data('test-transcript.fa') total_reads, _ = lb.consume_fasta_and_tag_with_labels(filename) print("doing get") assert lb.graph.get(read_1[:20]) assert total_reads == 3 print("doing n_labels") print(lb.n_labels()) print("doing all labels") print(lb.get_all_labels()) print("get tagset") for tag in lb.graph.get_tagset(): print("forward hash") print(tag, khmer.forward_hash(tag, 20)) for record in screed.open(filename): print("Sweeping tags") print(lb.sweep_tag_neighborhood(record.sequence, 40)) print("Sweeping labels...") print(lb.sweep_label_neighborhood(record.sequence, 40)) assert lb.n_labels() == 3
def test_consume_partitioned_fasta_and_tag_with_labels(): lb = GraphLabels(20, 1e7, 4) filename = utils.get_test_data('real-partition-small.fa') total_reads, n_consumed = lb.consume_partitioned_fasta_and_tag_with_labels( filename) labels = set() for record in screed.open(filename): seq = record.sequence labels.update(lb.sweep_label_neighborhood(seq, 0, False, False)) # print(lb.n_labels()) # print(labels) assert len(labels) == 1 assert labels.pop() == 2 assert lb.n_labels() == 1