Beispiel #1
0
def test_counting_label_tag_correctness():
    lb = GraphLabels.CountGraphLabels(20, 1e7, 4)
    filename = utils.get_test_data('test-labels.fa')
    lb.consume_seqfile_and_tag_with_labels(filename)

    # read A
    labels = list(
        lb.sweep_label_neighborhood(
            'ATCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGGCTCTGCCTAGAG'
            'CTAGGCTAGGTGTGCTCTGCCTAGAGCTAGGCTAGGTGT'))
    print(
        lb.sweep_tag_neighborhood(
            'TTCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGGCTCTGCCTAGAG'
            'CTAGGCTAGGTGTGCTCTGCTAGAGCTAGGCTAGGTGT'))
    print(labels)
    print(len('ATCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAG') - 19)
    assert len(labels) == 2
    assert 0 in labels
    assert 1 in labels

    # read B
    labels = list(
        lb.sweep_label_neighborhood(
            'GCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGCTCTGCCTAGAGCTAGGCTAGGTGTTGGGGATAG'
            'ATAGATAGATGACCTAGAGCTAGGCTAGGTGTTGGGGATAGATAGATAGATGA'))
    print(labels)
    assert len(labels) == 3
    assert 0 in labels
    assert 1 in labels
    assert 2 in labels

    # read C
    labels = list(
        lb.sweep_label_neighborhood(
            'TGGGATAGATAGATAGATGACCTAGAGCTAGGCTAGGTGTTGGGGATAGATAGATAGATGACCTAGAG'
            'CTAGGCTAGGTGTTGGGGATAGATAGATAGATGAGTTGGGGATAGATAGATAGATGAGTGTAGATCCA'
            'ACAACACATACA'))
    print(labels)
    assert len(labels) == 2
    assert 1 in labels
    assert 2 in labels

    # read D
    labels = list(
        lb.sweep_label_neighborhood(
            'TATATATATAGCTAGCTAGCTAACTAGCTAGCATCGATCGATCGATC'))
    print(labels)
    assert len(labels) == 1
    assert 3 in labels