Ejemplo n.º 1
0
def test_dory_extract_reads_fq(location):
    # check that FASTQ is passed through properly.

    copy_dory_catlas()
    copy_dory_catlas_search()
    copy_dory_subset()

    # run make_bgzf - FIXTURE
    print('** running make_bgzf')
    args = ['dory-subset.fq', '-o', relative_file('dory/dory.reads.bgz')]
    make_bgzf.main(args)

    # run label_cdbg - FIXTURE
    print('** running label_cdbg')
    args = ['dory_k21_r1',
            relative_file('dory/dory.reads.bgz'),
            'dory_k21_r1/reads.bgz.labels']
    label_cdbg.main(args)

    # run extract_reads
    print('** running extract_reads')
    args = [relative_file('dory/dory.reads.bgz'),
            'dory_k21_r1/reads.bgz.labels',
            'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.txt.gz',
            '-o',
            'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.reads.fa.gz']
    extract_reads.main(args)

    reads_filename = 'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.reads.fa.gz'
    reads = [ record for record in screed.open(reads_filename) ]
    assert len(reads) == 2
    assert len(reads[0].quality)          # FASTQ preserved!
Ejemplo n.º 2
0
def test_dory_extract_reads(location):
    copy_dory_catlas()
    copy_dory_catlas_search()
    copy_dory_subset()

    # run make_bgzf - FIXTURE
    print('** running make_bgzf')
    args = ['dory-subset.fa', '-o', relative_file('dory/dory.reads.bgz')]
    make_bgzf.main(args)

    # run label_cdbg - FIXTURE
    print('** running label_cdbg')
    args = ['dory_k21_r1',
            relative_file('dory/dory.reads.bgz'),
            'dory_k21_r1/reads.bgz.labels']
    label_cdbg.main(args)

    # run extract_reads
    print('** running extract_reads')
    args = [relative_file('dory/dory.reads.bgz'),
            'dory_k21_r1/reads.bgz.labels',
            'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.txt.gz',
            '-o',
            'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.reads.fa.gz']
    extract_reads.main(args)

    reads_filename = 'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.reads.fa.gz'
    reads = [ record for record in screed.open(reads_filename) ]
    assert len(reads) == 2
Ejemplo n.º 3
0
def test_dory_label_cdbg(location):
    copy_dory_catlas()
    copy_dory_subset()

    # run make_bgzf - FIXTURE
    print('** running make_bgzf')
    args = ['dory-subset.fa', '-o', relative_file('dory/dory.reads.bgz')]
    make_bgzf.main(args)

    # run label_cdbg
    print('** running label_cdbg')
    args = ['dory_k21_r1',
            relative_file('dory/dory.reads.bgz'),
            'dory_k21_r1/reads.bgz.labels']
    label_cdbg.main(args)
Ejemplo n.º 4
0
def test_dory_make_bgzf(location):
    copy_dory_subset()

    # run make_bgzf
    print('** running make_bgzf')
    args = ['dory-subset.fa', '-o', relative_file('dory/dory.reads.bgz')]
    make_bgzf.main(args)
Ejemplo n.º 5
0
def test_dory_query_workflow(location):
    from spacegraphcats.cdbg import bcalm_to_gxt
    copy_dory_head()
    copy_dory_subset()

    # make the output directory
    try:
        os.mkdir('dory_k21_r1')
    except FileExistsError:
        pass

    # convert the bcalm file to gxt
    args = ['-k', '21', '-P',
            relative_file('dory/bcalm.dory.k21.unitigs.fa'),
            'dory_k21_r1/cdbg.gxt',
            'dory_k21_r1/contigs.fa.gz']

    bcalm_to_gxt.main(args)

    # build catlas
    args = pytest_utils.Args()
    args.no_checkpoint = True
    args.level = 0
    args.radius = 1
    args.project = 'dory_k21_r1'
    print('** running catlas')
    catlas.main(args)

    # make k-mer search index
    args = '-k 21 dory_k21_r1'.split()
    print('** running index_contigs_by_kmer')
    index_contigs_by_kmer.main(args)

    # do search!!
    args='dory_k21_r1 dory_k21_r1_search_oh0 --query dory-head.fa -k 21'.split()
    try:
        query_by_sequence.main(args)
    except SystemExit as e:
        assert e.code == 0, str(e)

    # check output!
    output_path = 'dory_k21_r1_search_oh0/'
    assert os.path.exists(output_path + 'command.txt')
    assert os.path.exists(output_path + 'dory-head.fa.frontier.txt.gz')
    assert os.path.exists(output_path + 'dory-head.fa.cdbg_ids.txt.gz')
    assert os.path.exists(output_path + 'dory-head.fa.response.txt')
    assert os.path.exists(output_path + 'dory-head.fa.contigs.sig')
    assert os.path.exists(output_path + 'results.csv')

    with open(output_path + 'results.csv') as fp:
        lines = fp.readlines()
        assert len(lines) == 2

        last_line = lines[-1].strip()
        assert last_line == 'dory-head.fa,1.0,1.0,1671,2,21,1631,1.0,0.0,0.0,dory_k21_r1'
Ejemplo n.º 6
0
def test_dory_query_by_hashval(location):
    testdata = relative_file('data/dory-k31-hashval-queries.txt')
    shutil.copyfile(testdata, 'dory-k31-hashval-queries.txt')

    copy_dory_catlas()

    # index by hashval
    args = '-k 31 dory_k21_r1/contigs.fa.gz dory_k21_r1_mh.pickle'
    index_cdbg_by_minhash.main(args.split())

    args = '-k 31 dory_k21_r1 dory_k21_r1_mh.pickle dory-k31-hashval-queries.txt dory_k21_r1_hashval_k31'
    query_by_hashval.main(args.split())
    assert os.path.exists('dory_k21_r1_hashval_k31/hashval_results.csv')
Ejemplo n.º 7
0
def test_dory_search_nomatch(location):
    # test situations where zero k-mers match - should not fail.
    copy_dory_catlas()

    testdata = relative_file('data/random-query-nomatch.fa')
    shutil.copyfile(testdata, 'random-query.fa')

    # make k-mer search index
    args = '-k 21 dory_k21_r1'.split()
    print('** running index_contigs_by_kmer')
    index_contigs_by_kmer.main(args)

    # do search!!
    args='dory_k21_r1 dory_k21_r1_search_oh0 --query random-query.fa -k 21'.split()
    try:
        query_by_sequence.main(args)
    except SystemExit as e:
        assert e.code == 0, str(e)
Ejemplo n.º 8
0
def copy_dory_subset():
    testdata = relative_file('data/dory-subset.fa')
    shutil.copyfile(testdata, 'dory-subset.fa')

    testdata = relative_file('data/dory-subset.fq')
    shutil.copyfile(testdata, 'dory-subset.fq')
Ejemplo n.º 9
0
def copy_dory_head():
    testdata = relative_file('data/dory-head.fa')
    shutil.copyfile(testdata, 'dory-head.fa')
Ejemplo n.º 10
0
def copy_dory_sig():
    testdata = relative_file('data/dory-subset.fq.sig')
    shutil.copyfile(testdata, 'dory-subset.fq.sig')