def test_sweep_tag_neighborhood(): lb = GraphLabels(20, 1e7, 4) filename = utils.get_test_data('single-read.fq') lb.graph.consume_fasta_and_tag(filename) tags = lb.sweep_tag_neighborhood('CAGGCGCCCACCACCGTGCCCTCCAACCTGATGGT') assert len(tags) == 1 assert tags.pop() == 173473779682
def test_sweep_tag_neighborhood(): lb = GraphLabels(20, 1e7, 4) filename = utils.get_test_data('single-read.fq') lb.graph.consume_fasta_and_tag(filename) tags = lb.sweep_tag_neighborhood('CAGGCGCCCACCACCGTGCCCTCCAACCTGATGGT') assert len(tags) == 1 assert list(tags) == [173473779682]
def test_label_tag_correctness_save_load(): lb_pre = GraphLabels(20, 1e7, 4) filename = utils.get_test_data('test-labels.fa') lb_pre.consume_fasta_and_tag_with_labels(filename) # save labels to a file savepath = utils.get_temp_filename('saved.labels') lb_pre.save_labels_and_tags(savepath) # trash the old GraphLabels del lb_pre # create new, load labels & tags lb = GraphLabels(20, 1e7, 4) lb.load_labels_and_tags(savepath) # read A labels = lb.sweep_label_neighborhood( 'ATCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGGCTCTGCCTAGAG' 'CTAGGCTAGGTGTGCTCTGCCTAGAGCTAGGCTAGGTGT') print( lb.sweep_tag_neighborhood( 'TTCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGGCTCTGCCTAGAG' 'CTAGGCTAGGTGTGCTCTGCTAGAGCTAGGCTAGGTGT')) print(labels) print(len('ATCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAG') - 19) assert len(labels) == 2 assert 0 in labels assert 1 in labels # read B labels = lb.sweep_label_neighborhood( 'GCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGCTCTGCCTAGAGCTAGGCTAGGTGTTGGGGATAG' 'ATAGATAGATGACCTAGAGCTAGGCTAGGTGTTGGGGATAGATAGATAGATGA') print(labels) assert len(labels) == 3 assert 0 in labels assert 1 in labels assert 2 in labels # read C labels = lb.sweep_label_neighborhood( 'TGGGATAGATAGATAGATGACCTAGAGCTAGGCTAGGTGTTGGGGATAGATAGATAGATGACCTAGAG' 'CTAGGCTAGGTGTTGGGGATAGATAGATAGATGAGTTGGGGATAGATAGATAGATGAGTGTAGATCCA' 'ACAACACATACA') print(labels) assert len(labels) == 2 assert 1 in labels assert 2 in labels # read D labels = lb.sweep_label_neighborhood( 'TATATATATAGCTAGCTAGCTAACTAGCTAGCATCGATCGATCGATC') print(labels) assert len(labels) == 1 assert 3 in labels
def test_label_tag_correctness_save_load(): lb_pre = GraphLabels(20, 1e7, 4) filename = utils.get_test_data('test-labels.fa') lb_pre.consume_fasta_and_tag_with_labels(filename) # save labels to a file savepath = utils.get_temp_filename('saved.labels') lb_pre.save_labels_and_tags(savepath) # trash the old GraphLabels del lb_pre # create new, load labels & tags lb = GraphLabels(20, 1e7, 4) lb.load_labels_and_tags(savepath) # read A labels = lb.sweep_label_neighborhood( 'ATCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGGCTCTGCCTAGAG' 'CTAGGCTAGGTGTGCTCTGCCTAGAGCTAGGCTAGGTGT') print(lb.sweep_tag_neighborhood( 'TTCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGGCTCTGCCTAGAG' 'CTAGGCTAGGTGTGCTCTGCTAGAGCTAGGCTAGGTGT')) print(labels) print(len('ATCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAG') - 19) assert len(labels) == 2 assert 0 in labels assert 1 in labels # read B labels = lb.sweep_label_neighborhood( 'GCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGCTCTGCCTAGAGCTAGGCTAGGTGTTGGGGATAG' 'ATAGATAGATGACCTAGAGCTAGGCTAGGTGTTGGGGATAGATAGATAGATGA') print(labels) assert len(labels) == 3 assert 0 in labels assert 1 in labels assert 2 in labels # read C labels = lb.sweep_label_neighborhood( 'TGGGATAGATAGATAGATGACCTAGAGCTAGGCTAGGTGTTGGGGATAGATAGATAGATGACCTAGAG' 'CTAGGCTAGGTGTTGGGGATAGATAGATAGATGAGTTGGGGATAGATAGATAGATGAGTGTAGATCCA' 'ACAACACATACA') print(labels) assert len(labels) == 2 assert 1 in labels assert 2 in labels # read D labels = lb.sweep_label_neighborhood( 'TATATATATAGCTAGCTAGCTAACTAGCTAGCATCGATCGATCGATC') print(labels) assert len(labels) == 1 assert 3 in labels
def test_label_tag_correctness(): lb = GraphLabels(20, 1e7, 4) filename = utils.get_test_data('test-labels.fa') lb.consume_fasta_and_tag_with_labels(filename) # read A labels = lb.sweep_label_neighborhood( 'ATCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGGCTCTGCCTAGAG' 'CTAGGCTAGGTGTGCTCTGCCTAGAGCTAGGCTAGGTGT') print( lb.sweep_tag_neighborhood( 'TTCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGGCTCTGCCTAGAG' 'CTAGGCTAGGTGTGCTCTGCTAGAGCTAGGCTAGGTGT')) print(labels) print(len('ATCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAG') - 19) assert len(labels) == 2 assert 0 in labels assert 1 in labels # read B labels = lb.sweep_label_neighborhood( 'GCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGCTCTGCCTAGAGCTAGGCTAGGTGTTGGGGATAG' 'ATAGATAGATGACCTAGAGCTAGGCTAGGTGTTGGGGATAGATAGATAGATGA') print(labels) assert len(labels) == 3 assert 0 in labels assert 1 in labels assert 2 in labels # read C labels = lb.sweep_label_neighborhood( 'TGGGATAGATAGATAGATGACCTAGAGCTAGGCTAGGTGTTGGGGATAGATAGATAGATGACCTAGAG' 'CTAGGCTAGGTGTTGGGGATAGATAGATAGATGAGTTGGGGATAGATAGATAGATGAGTGTAGATCCA' 'ACAACACATACA') print(labels) assert len(labels) == 2 assert 1 in labels assert 2 in labels # read D labels = lb.sweep_label_neighborhood( 'TATATATATAGCTAGCTAGCTAACTAGCTAGCATCGATCGATCGATC') print(labels) assert len(labels) == 1 assert 3 in labels
def test_label_tag_correctness(): lb = GraphLabels(20, 1e7, 4) filename = utils.get_test_data('test-labels.fa') lb.consume_fasta_and_tag_with_labels(filename) # read A labels = lb.sweep_label_neighborhood( 'ATCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGGCTCTGCCTAGAG' 'CTAGGCTAGGTGTGCTCTGCCTAGAGCTAGGCTAGGTGT') print(lb.sweep_tag_neighborhood( 'TTCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGGCTCTGCCTAGAG' 'CTAGGCTAGGTGTGCTCTGCTAGAGCTAGGCTAGGTGT')) print(labels) print(len('ATCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAG') - 19) assert len(labels) == 2 assert 0 in labels assert 1 in labels # read B labels = lb.sweep_label_neighborhood( 'GCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGCTCTGCCTAGAGCTAGGCTAGGTGTTGGGGATAG' 'ATAGATAGATGACCTAGAGCTAGGCTAGGTGTTGGGGATAGATAGATAGATGA') print(labels) assert len(labels) == 3 assert 0 in labels assert 1 in labels assert 2 in labels # read C labels = lb.sweep_label_neighborhood( 'TGGGATAGATAGATAGATGACCTAGAGCTAGGCTAGGTGTTGGGGATAGATAGATAGATGACCTAGAG' 'CTAGGCTAGGTGTTGGGGATAGATAGATAGATGAGTTGGGGATAGATAGATAGATGAGTGTAGATCCA' 'ACAACACATACA') print(labels) assert len(labels) == 2 assert 1 in labels assert 2 in labels # read D labels = lb.sweep_label_neighborhood( 'TATATATATAGCTAGCTAGCTAACTAGCTAGCATCGATCGATCGATC') print(labels) assert len(labels) == 1 assert 3 in labels
def test_consume_fasta_and_tag_with_labels(): lb = GraphLabels(20, 1e7, 4) read_1 = 'ACGTAACCGGTTAAACCCGGGTTTAAAACCCCGGGGTTTT' filename = utils.get_test_data('test-transcript.fa') total_reads, n_consumed = lb.consume_fasta_and_tag_with_labels(filename) print("doing get") assert lb.graph.get(read_1[:20]) assert total_reads == 3 print("doing n_labels") print(lb.n_labels()) print("doing label dict") print(lb.get_label_dict()) print("get tagset") for tag in lb.graph.get_tagset(): print("forward hash") print(tag, khmer.forward_hash(tag, 20)) for record in screed.open(filename): print("Sweeping tags") print(lb.sweep_tag_neighborhood(record.sequence, 40)) print("Sweeping labels...") print(lb.sweep_label_neighborhood(record.sequence, 40)) assert lb.n_labels() == 3
def test_consume_fasta_and_tag_with_labels(): lb = GraphLabels(20, 1e7, 4) read_1 = 'ACGTAACCGGTTAAACCCGGGTTTAAAACCCCGGGGTTTT' filename = utils.get_test_data('test-transcript.fa') total_reads, _ = lb.consume_fasta_and_tag_with_labels(filename) print("doing get") assert lb.graph.get(read_1[:20]) assert total_reads == 3 print("doing n_labels") print(lb.n_labels()) print("doing all labels") print(lb.get_all_labels()) print("get tagset") for tag in lb.graph.get_tagset(): print("forward hash") print(tag, khmer.forward_hash(tag, 20)) for record in screed.open(filename): print("Sweeping tags") print(lb.sweep_tag_neighborhood(record.sequence, 40)) print("Sweeping labels...") print(lb.sweep_label_neighborhood(record.sequence, 40)) assert lb.n_labels() == 3