Example #1
0
def test_sweep_tag_neighborhood():
    lb = GraphLabels(20, 1e7, 4)
    filename = utils.get_test_data('single-read.fq')
    lb.graph.consume_fasta_and_tag(filename)

    tags = lb.sweep_tag_neighborhood('CAGGCGCCCACCACCGTGCCCTCCAACCTGATGGT')
    assert len(tags) == 1
    assert tags.pop() == 173473779682
Example #2
0
def test_sweep_tag_neighborhood():
    lb = GraphLabels(20, 1e7, 4)
    filename = utils.get_test_data('single-read.fq')
    lb.graph.consume_fasta_and_tag(filename)

    tags = lb.sweep_tag_neighborhood('CAGGCGCCCACCACCGTGCCCTCCAACCTGATGGT')
    assert len(tags) == 1
    assert list(tags) == [173473779682]
Example #3
0
def test_label_tag_correctness_save_load():
    lb_pre = GraphLabels(20, 1e7, 4)
    filename = utils.get_test_data('test-labels.fa')
    lb_pre.consume_fasta_and_tag_with_labels(filename)

    # save labels to a file
    savepath = utils.get_temp_filename('saved.labels')
    lb_pre.save_labels_and_tags(savepath)

    # trash the old GraphLabels
    del lb_pre

    # create new, load labels & tags
    lb = GraphLabels(20, 1e7, 4)
    lb.load_labels_and_tags(savepath)

    # read A
    labels = lb.sweep_label_neighborhood(
        'ATCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGGCTCTGCCTAGAG'
        'CTAGGCTAGGTGTGCTCTGCCTAGAGCTAGGCTAGGTGT')
    print(
        lb.sweep_tag_neighborhood(
            'TTCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGGCTCTGCCTAGAG'
            'CTAGGCTAGGTGTGCTCTGCTAGAGCTAGGCTAGGTGT'))
    print(labels)
    print(len('ATCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAG') - 19)
    assert len(labels) == 2
    assert 0 in labels
    assert 1 in labels

    # read B
    labels = lb.sweep_label_neighborhood(
        'GCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGCTCTGCCTAGAGCTAGGCTAGGTGTTGGGGATAG'
        'ATAGATAGATGACCTAGAGCTAGGCTAGGTGTTGGGGATAGATAGATAGATGA')
    print(labels)
    assert len(labels) == 3
    assert 0 in labels
    assert 1 in labels
    assert 2 in labels

    # read C
    labels = lb.sweep_label_neighborhood(
        'TGGGATAGATAGATAGATGACCTAGAGCTAGGCTAGGTGTTGGGGATAGATAGATAGATGACCTAGAG'
        'CTAGGCTAGGTGTTGGGGATAGATAGATAGATGAGTTGGGGATAGATAGATAGATGAGTGTAGATCCA'
        'ACAACACATACA')
    print(labels)
    assert len(labels) == 2
    assert 1 in labels
    assert 2 in labels

    # read D
    labels = lb.sweep_label_neighborhood(
        'TATATATATAGCTAGCTAGCTAACTAGCTAGCATCGATCGATCGATC')
    print(labels)
    assert len(labels) == 1
    assert 3 in labels
Example #4
0
def test_label_tag_correctness_save_load():
    lb_pre = GraphLabels(20, 1e7, 4)
    filename = utils.get_test_data('test-labels.fa')
    lb_pre.consume_fasta_and_tag_with_labels(filename)

    # save labels to a file
    savepath = utils.get_temp_filename('saved.labels')
    lb_pre.save_labels_and_tags(savepath)

    # trash the old GraphLabels
    del lb_pre

    # create new, load labels & tags
    lb = GraphLabels(20, 1e7, 4)
    lb.load_labels_and_tags(savepath)

    # read A
    labels = lb.sweep_label_neighborhood(
        'ATCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGGCTCTGCCTAGAG'
        'CTAGGCTAGGTGTGCTCTGCCTAGAGCTAGGCTAGGTGT')
    print(lb.sweep_tag_neighborhood(
        'TTCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGGCTCTGCCTAGAG'
        'CTAGGCTAGGTGTGCTCTGCTAGAGCTAGGCTAGGTGT'))
    print(labels)
    print(len('ATCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAG') - 19)
    assert len(labels) == 2
    assert 0 in labels
    assert 1 in labels

    # read B
    labels = lb.sweep_label_neighborhood(
        'GCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGCTCTGCCTAGAGCTAGGCTAGGTGTTGGGGATAG'
        'ATAGATAGATGACCTAGAGCTAGGCTAGGTGTTGGGGATAGATAGATAGATGA')
    print(labels)
    assert len(labels) == 3
    assert 0 in labels
    assert 1 in labels
    assert 2 in labels

    # read C
    labels = lb.sweep_label_neighborhood(
        'TGGGATAGATAGATAGATGACCTAGAGCTAGGCTAGGTGTTGGGGATAGATAGATAGATGACCTAGAG'
        'CTAGGCTAGGTGTTGGGGATAGATAGATAGATGAGTTGGGGATAGATAGATAGATGAGTGTAGATCCA'
        'ACAACACATACA')
    print(labels)
    assert len(labels) == 2
    assert 1 in labels
    assert 2 in labels

    # read D
    labels = lb.sweep_label_neighborhood(
        'TATATATATAGCTAGCTAGCTAACTAGCTAGCATCGATCGATCGATC')
    print(labels)
    assert len(labels) == 1
    assert 3 in labels
Example #5
0
def test_label_tag_correctness():
    lb = GraphLabels(20, 1e7, 4)
    filename = utils.get_test_data('test-labels.fa')
    lb.consume_fasta_and_tag_with_labels(filename)

    # read A
    labels = lb.sweep_label_neighborhood(
        'ATCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGGCTCTGCCTAGAG'
        'CTAGGCTAGGTGTGCTCTGCCTAGAGCTAGGCTAGGTGT')
    print(
        lb.sweep_tag_neighborhood(
            'TTCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGGCTCTGCCTAGAG'
            'CTAGGCTAGGTGTGCTCTGCTAGAGCTAGGCTAGGTGT'))
    print(labels)
    print(len('ATCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAG') - 19)
    assert len(labels) == 2
    assert 0 in labels
    assert 1 in labels

    # read B
    labels = lb.sweep_label_neighborhood(
        'GCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGCTCTGCCTAGAGCTAGGCTAGGTGTTGGGGATAG'
        'ATAGATAGATGACCTAGAGCTAGGCTAGGTGTTGGGGATAGATAGATAGATGA')
    print(labels)
    assert len(labels) == 3
    assert 0 in labels
    assert 1 in labels
    assert 2 in labels

    # read C
    labels = lb.sweep_label_neighborhood(
        'TGGGATAGATAGATAGATGACCTAGAGCTAGGCTAGGTGTTGGGGATAGATAGATAGATGACCTAGAG'
        'CTAGGCTAGGTGTTGGGGATAGATAGATAGATGAGTTGGGGATAGATAGATAGATGAGTGTAGATCCA'
        'ACAACACATACA')
    print(labels)
    assert len(labels) == 2
    assert 1 in labels
    assert 2 in labels

    # read D
    labels = lb.sweep_label_neighborhood(
        'TATATATATAGCTAGCTAGCTAACTAGCTAGCATCGATCGATCGATC')
    print(labels)
    assert len(labels) == 1
    assert 3 in labels
Example #6
0
def test_label_tag_correctness():
    lb = GraphLabels(20, 1e7, 4)
    filename = utils.get_test_data('test-labels.fa')
    lb.consume_fasta_and_tag_with_labels(filename)

    # read A
    labels = lb.sweep_label_neighborhood(
        'ATCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGGCTCTGCCTAGAG'
        'CTAGGCTAGGTGTGCTCTGCCTAGAGCTAGGCTAGGTGT')
    print(lb.sweep_tag_neighborhood(
        'TTCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGGCTCTGCCTAGAG'
        'CTAGGCTAGGTGTGCTCTGCTAGAGCTAGGCTAGGTGT'))
    print(labels)
    print(len('ATCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAG') - 19)
    assert len(labels) == 2
    assert 0 in labels
    assert 1 in labels

    # read B
    labels = lb.sweep_label_neighborhood(
        'GCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGCTCTGCCTAGAGCTAGGCTAGGTGTTGGGGATAG'
        'ATAGATAGATGACCTAGAGCTAGGCTAGGTGTTGGGGATAGATAGATAGATGA')
    print(labels)
    assert len(labels) == 3
    assert 0 in labels
    assert 1 in labels
    assert 2 in labels

    # read C
    labels = lb.sweep_label_neighborhood(
        'TGGGATAGATAGATAGATGACCTAGAGCTAGGCTAGGTGTTGGGGATAGATAGATAGATGACCTAGAG'
        'CTAGGCTAGGTGTTGGGGATAGATAGATAGATGAGTTGGGGATAGATAGATAGATGAGTGTAGATCCA'
        'ACAACACATACA')
    print(labels)
    assert len(labels) == 2
    assert 1 in labels
    assert 2 in labels

    # read D
    labels = lb.sweep_label_neighborhood(
        'TATATATATAGCTAGCTAGCTAACTAGCTAGCATCGATCGATCGATC')
    print(labels)
    assert len(labels) == 1
    assert 3 in labels
Example #7
0
def test_consume_fasta_and_tag_with_labels():
    lb = GraphLabels(20, 1e7, 4)
    read_1 = 'ACGTAACCGGTTAAACCCGGGTTTAAAACCCCGGGGTTTT'
    filename = utils.get_test_data('test-transcript.fa')

    total_reads, n_consumed = lb.consume_fasta_and_tag_with_labels(filename)
    print("doing get")
    assert lb.graph.get(read_1[:20])
    assert total_reads == 3
    print("doing n_labels")
    print(lb.n_labels())
    print("doing label dict")
    print(lb.get_label_dict())
    print("get tagset")
    for tag in lb.graph.get_tagset():
        print("forward hash")
        print(tag, khmer.forward_hash(tag, 20))
    for record in screed.open(filename):
        print("Sweeping tags")
        print(lb.sweep_tag_neighborhood(record.sequence, 40))
        print("Sweeping labels...")
        print(lb.sweep_label_neighborhood(record.sequence, 40))
    assert lb.n_labels() == 3
Example #8
0
def test_consume_fasta_and_tag_with_labels():
    lb = GraphLabels(20, 1e7, 4)
    read_1 = 'ACGTAACCGGTTAAACCCGGGTTTAAAACCCCGGGGTTTT'
    filename = utils.get_test_data('test-transcript.fa')

    total_reads, _ = lb.consume_fasta_and_tag_with_labels(filename)
    print("doing get")
    assert lb.graph.get(read_1[:20])
    assert total_reads == 3
    print("doing n_labels")
    print(lb.n_labels())
    print("doing all labels")
    print(lb.get_all_labels())
    print("get tagset")
    for tag in lb.graph.get_tagset():
        print("forward hash")
        print(tag, khmer.forward_hash(tag, 20))
    for record in screed.open(filename):
        print("Sweeping tags")
        print(lb.sweep_tag_neighborhood(record.sequence, 40))
        print("Sweeping labels...")
        print(lb.sweep_label_neighborhood(record.sequence, 40))
    assert lb.n_labels() == 3