Beispiel #1
0
def test_dory_multifasta_query(location):
    copy_dory_head()
    copy_dory_catlas()
    copy_dory_sig()

    # make k-mer search index - FIXTURE
    args = '-k 21 dory_k21_r1'.split()
    index_contigs_by_kmer.main(args)

    # index by multifasta
    os.mkdir('dory_k21_r1_multifasta')
    args = 'dory_k21_r1 dory_k21_r1_multifasta/multifasta.pickle --query dory-head.fa -k 21'
    index_cdbg_by_multifasta.main(args.split())

    args = '-k 21 --scaled 100 dory_k21_r1/contigs.fa.gz dory_k21_r1_multifasta/hashval.pickle'
    index_cdbg_by_minhash.main(args.split())

    args = '--hashvals dory_k21_r1_multifasta/hashval.pickle --multi-idx dory_k21_r1_multifasta/multifasta.pickle  --query-sig dory-subset.fq.sig --output dory_k21_r1_multifasta/query-results.csv -k 21 --scaled 100'
    query_multifasta_by_sig.main(args.split())

    args = '--multi-idx dory_k21_r1_multifasta/multifasta.pickle --output dory_k21_r1_multifasta/multifasta.cdbg_by_record.csv --info-csv dory_k21_r1/contigs.fa.gz.info.csv'
    extract_cdbg_by_multifasta.main(args.split())

    assert os.path.exists(
        'dory_k21_r1_multifasta/multifasta.cdbg_by_record.csv')
    assert os.path.exists('dory_k21_r1_multifasta/query-results.csv')
Beispiel #2
0
def test_dory_query_workflow(location):
    from spacegraphcats.cdbg import bcalm_to_gxt
    copy_dory_head()
    copy_dory_subset()

    # make the output directory
    try:
        os.mkdir('dory_k21_r1')
    except FileExistsError:
        pass

    # convert the bcalm file to gxt
    args = ['-k', '21', '-P',
            relative_file('dory/bcalm.dory.k21.unitigs.fa'),
            'dory_k21_r1/cdbg.gxt',
            'dory_k21_r1/contigs.fa.gz']

    bcalm_to_gxt.main(args)

    # build catlas
    args = pytest_utils.Args()
    args.no_checkpoint = True
    args.level = 0
    args.radius = 1
    args.project = 'dory_k21_r1'
    print('** running catlas')
    catlas.main(args)

    # make k-mer search index
    args = '-k 21 dory_k21_r1'.split()
    print('** running index_contigs_by_kmer')
    index_contigs_by_kmer.main(args)

    # do search!!
    args='dory_k21_r1 dory_k21_r1_search_oh0 --query dory-head.fa -k 21'.split()
    try:
        query_by_sequence.main(args)
    except SystemExit as e:
        assert e.code == 0, str(e)

    # check output!
    output_path = 'dory_k21_r1_search_oh0/'
    assert os.path.exists(output_path + 'command.txt')
    assert os.path.exists(output_path + 'dory-head.fa.frontier.txt.gz')
    assert os.path.exists(output_path + 'dory-head.fa.cdbg_ids.txt.gz')
    assert os.path.exists(output_path + 'dory-head.fa.response.txt')
    assert os.path.exists(output_path + 'dory-head.fa.contigs.sig')
    assert os.path.exists(output_path + 'results.csv')

    with open(output_path + 'results.csv') as fp:
        lines = fp.readlines()
        assert len(lines) == 2

        last_line = lines[-1].strip()
        assert last_line == 'dory-head.fa,1.0,1.0,1671,2,21,1631,1.0,0.0,0.0,dory_k21_r1'
Beispiel #3
0
def test_dory_extract_unassembled_nodes(location):
    copy_dory_catlas()
    copy_dory_head()

    # make k-mer search index - FIXTURE
    args = '-k 21 dory_k21_r1'.split()
    index_contigs_by_kmer.main(args)

    # run extract_unassembled_regions
    args = 'dory_k21_r1 dory-head.fa dory.regions -k 21'.split()
    extract_unassembled_nodes.main(args)
Beispiel #4
0
def test_dory_estimate_query_abundance(location):
    copy_dory_catlas()
    copy_dory_head()

    # make k-mer search index - FIXTURE
    args = '-k 21 dory_k21_r1'.split()
    index_contigs_by_kmer.main(args)

    # calculate query abundances
    args = 'dory_k21_r1 dory-head.fa -o abundances.csv -k 21'.split()
    print('** running estimate_query_abundance')
    estimate_query_abundance.main(args)

    abunds = open('abundances.csv', 'rt').read()
Beispiel #5
0
def test_dory_search_nomatch(location):
    # test situations where zero k-mers match - should not fail.
    copy_dory_catlas()

    testdata = relative_file('data/random-query-nomatch.fa')
    shutil.copyfile(testdata, 'random-query.fa')

    # make k-mer search index
    args = '-k 21 dory_k21_r1'.split()
    print('** running index_contigs_by_kmer')
    index_contigs_by_kmer.main(args)

    # do search!!
    args='dory_k21_r1 dory_k21_r1_search_oh0 --query random-query.fa -k 21'.split()
    try:
        query_by_sequence.main(args)
    except SystemExit as e:
        assert e.code == 0, str(e)
Beispiel #6
0
def test_dory():
    with TempDirectory() as location:
        from spacegraphcats.cdbg import bcalm_to_gxt

        # make the output directory
        try:
            os.mkdir('dory_k21_r1')
        except FileExistsError:
            pass

        # convert the bcalm file to gxt
        args = [
            '-k', '-21', '-P',
            relative_filename('dory/bcalm.dory.k21.unitigs.fa'),
            'dory_k21_r1/cdbg.gxt', 'dory_k21_r1/contigs.fa.gz'
        ]

        bcalm_to_gxt.main(args)

        # build catlas

        args = Args()
        args.no_checkpoint = True
        args.level = 0
        args.radius = 1
        args.project = 'dory_k21_r1'

        catlas.main(args)

        # make k-mer search index
        args = '-k 21 dory_k21_r1'.split()
        index_contigs_by_kmer.main(args)

        # do search!!
        extract_nodes_by_query

        args = 'dory_k21_r1 dory_k21_r1_search_oh0 --query data/dory-head.fa -k 21 --overhead=0.0'.split(
        )

        try:
            extract_nodes_by_query.main(args)
        except SystemExit as e:
            assert e.code == 0, str(e)

        # check output!
        output_path = 'dory_k21_r1_search_oh0/'
        assert os.path.exists(output_path + 'command.txt')
        assert os.path.exists(output_path + 'dory-head.fa.frontier.txt.gz')
        assert os.path.exists(output_path + 'dory-head.fa.cdbg_ids.txt.gz')
        assert os.path.exists(output_path + 'dory-head.fa.response.txt')
        assert os.path.exists(output_path + 'dory-head.fa.contigs.sig')
        assert os.path.exists(output_path + 'results.csv')

        with open(output_path + 'results.csv') as fp:
            lines = fp.readlines()
            assert len(lines) == 2

            last_line = lines[-1].strip()
            assert last_line == 'data/dory-head.fa,1.0,1.0,1671,2,21,1631,1.0,0.0,0.0'

        # run characterize_catlas_regions
        args = 'dory_k21_r1 dory_k1_r1.vec'.split()
        characterize_catlas_regions.main(args)

        # run extract_unassembled_regions
        args = 'dory_k21_r1 data/dory-head.fa dory.regions -k 21'.split()
        extract_unassembled_nodes.main(args)

        # run catlas info
        catlas_info.main(['dory_k21_r1'])

        # run extract_contigs
        args = [
            'dory_k21_r1',
            'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.txt.gz', '-o',
            'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.contigs.fa.gz'
        ]
        extract_contigs.main(args)

        assert os.path.exists(
            'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.contigs.fa.gz')

        # run make_bgzf
        args = ['data/dory-subset.fa', '-o', 'dory/dory.reads.bgz']
        make_bgzf.main(args)

        # run label_cdbg
        args = [
            'dory_k21_r1', 'dory/dory.reads.bgz',
            'dory_k21_r1/reads.bgz.labels'
        ]
        label_cdbg.main(args)

        # run extract_reads
        args = [
            'dory/dory.reads.bgz', 'dory_k21_r1/reads.bgz.labels',
            'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.txt.gz', '-o',
            'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.reads.fa.gz'
        ]
        extract_reads.main(args)
def test_dory():
    with TempDirectory() as location:
        from spacegraphcats.cdbg import bcalm_to_gxt

        # make the output directory
        try:
            os.mkdir('dory_k21_r1')
        except FileExistsError:
            pass

        # convert the bcalm file to gxt
        args = ['-k', '-21', '-P',
                relative_filename('dory/bcalm.dory.k21.unitigs.fa'),
                'dory_k21_r1/cdbg.gxt',
                'dory_k21_r1/contigs.fa.gz']

        bcalm_to_gxt.main(args)

        # build catlas

        args = Args()
        args.no_checkpoint = True
        args.level = 0
        args.radius = 1
        args.project = 'dory_k21_r1'

        catlas.main(args)

        # make k-mer search index
        args = '-k 21 dory_k21_r1'.split()
        index_contigs_by_kmer.main(args)

        # do search!!
        args='dory_k21_r1 dory_k21_r1_search_oh0 --query data/dory-head.fa -k 21 --overhead=0.0'.split()

        try:
            extract_nodes_by_query.main(args)
        except SystemExit as e:
            assert e.code == 0, str(e)

        # check output!
        output_path = 'dory_k21_r1_search_oh0/'
        assert os.path.exists(output_path + 'command.txt')
        assert os.path.exists(output_path + 'dory-head.fa.frontier.txt.gz')
        assert os.path.exists(output_path + 'dory-head.fa.cdbg_ids.txt.gz')
        assert os.path.exists(output_path + 'dory-head.fa.response.txt')
        assert os.path.exists(output_path + 'dory-head.fa.contigs.sig')
        assert os.path.exists(output_path + 'results.csv')

        with open(output_path + 'results.csv') as fp:
            lines = fp.readlines()
            assert len(lines) == 2

            last_line = lines[-1].strip()
            assert last_line == 'data/dory-head.fa,1.0,1.0,1671,2,21,1631,1.0,0.0,0.0'

        # run characterize_catlas_regions
        args = 'dory_k21_r1 dory_k1_r1.vec'.split()
        characterize_catlas_regions.main(args)

        # run extract_unassembled_regions
        args = 'dory_k21_r1 data/dory-head.fa dory.regions -k 21'.split()
        extract_unassembled_nodes.main(args)

        # run catlas info
        catlas_info.main(['dory_k21_r1'])

        # run extract_contigs
        args = ['dory_k21_r1',
                'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.txt.gz',
                '-o',
                'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.contigs.fa.gz']
        extract_contigs.main(args)

        assert os.path.exists('dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.contigs.fa.gz')

        # run make_bgzf
        args = ['data/dory-subset.fa', '-o', 'dory/dory.reads.bgz']
        make_bgzf.main(args)

        # run label_cdbg
        args = ['dory_k21_r1',
                'dory/dory.reads.bgz', 'dory_k21_r1/reads.bgz.labels']
        label_cdbg.main(args)

        # run extract_reads
        args = ['dory/dory.reads.bgz',
                'dory_k21_r1/reads.bgz.labels',
                'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.txt.gz',
                '-o',
                'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.reads.fa.gz']
        extract_reads.main(args)

        # calculate query abundances
        args = 'dory_k21_r1 data/dory-head.fa -o abundances.csv -k 21'.split()
        estimate_query_abundance.main(args)

        abunds = open('abundances.csv', 'rt').read()
        assert 'data/dory-head.fa,1.0,1.05' in abunds