Example #1
0
def test_n_labels():
    lh = GraphLabels(20, 1e7, 4)
    filename = utils.get_test_data('test-labels.fa')
    lh.consume_fasta_and_tag_with_labels(filename)

    print(lh.n_labels())
    assert lh.n_labels() == 4
Example #2
0
def test_n_labels():
    lh = GraphLabels(20, 1e7, 4)
    filename = utils.get_test_data('test-labels.fa')
    lh.consume_fasta_and_tag_with_labels(filename)

    print(lh.n_labels())
    assert lh.n_labels() == 4
Example #3
0
def test_consume_fasta_and_tag_with_labels():
    lb = GraphLabels(20, 1e7, 4)
    read_1 = 'ACGTAACCGGTTAAACCCGGGTTTAAAACCCCGGGGTTTT'
    filename = utils.get_test_data('test-transcript.fa')

    total_reads, n_consumed = lb.consume_fasta_and_tag_with_labels(filename)
    print("doing get")
    assert lb.graph.get(read_1[:20])
    assert total_reads == 3
    print("doing n_labels")
    print(lb.n_labels())
    print("doing label dict")
    print(lb.get_label_dict())
    print("get tagset")
    for tag in lb.graph.get_tagset():
        print("forward hash")
        print(tag, khmer.forward_hash(tag, 20))
    for record in screed.open(filename):
        print("Sweeping tags")
        print(lb.sweep_tag_neighborhood(record.sequence, 40))
        print("Sweeping labels...")
        print(lb.sweep_label_neighborhood(record.sequence, 40))
    assert lb.n_labels() == 3
Example #4
0
def test_consume_partitioned_fasta_and_tag_with_labels():
    lb = GraphLabels(20, 1e7, 4)
    filename = utils.get_test_data('real-partition-small.fa')

    lb.consume_partitioned_fasta_and_tag_with_labels(filename)
    labels = set()
    for record in screed.open(filename):
        seq = record.sequence
        labels.update(lb.sweep_label_neighborhood(seq, 0, False, False))
    # print(lb.n_labels())
    # print(labels)
    assert len(labels) == 1
    assert labels.pop() == 2
    assert lb.n_labels() == 1
Example #5
0
def test_consume_fasta_and_tag_with_labels():
    lb = GraphLabels(20, 1e7, 4)
    read_1 = 'ACGTAACCGGTTAAACCCGGGTTTAAAACCCCGGGGTTTT'
    filename = utils.get_test_data('test-transcript.fa')

    total_reads, _ = lb.consume_fasta_and_tag_with_labels(filename)
    print("doing get")
    assert lb.graph.get(read_1[:20])
    assert total_reads == 3
    print("doing n_labels")
    print(lb.n_labels())
    print("doing all labels")
    print(lb.get_all_labels())
    print("get tagset")
    for tag in lb.graph.get_tagset():
        print("forward hash")
        print(tag, khmer.forward_hash(tag, 20))
    for record in screed.open(filename):
        print("Sweeping tags")
        print(lb.sweep_tag_neighborhood(record.sequence, 40))
        print("Sweeping labels...")
        print(lb.sweep_label_neighborhood(record.sequence, 40))
    assert lb.n_labels() == 3
Example #6
0
def test_consume_partitioned_fasta_and_tag_with_labels():
    lb = GraphLabels(20, 1e7, 4)
    filename = utils.get_test_data('real-partition-small.fa')

    total_reads, n_consumed = lb.consume_partitioned_fasta_and_tag_with_labels(
        filename)
    labels = set()
    for record in screed.open(filename):
        seq = record.sequence
        labels.update(lb.sweep_label_neighborhood(seq, 0, False, False))
    # print(lb.n_labels())
    # print(labels)
    assert len(labels) == 1
    assert labels.pop() == 2
    assert lb.n_labels() == 1