def test_default(): params = Params({ 'workspace_id': 'ws_id', 'amp_mat_upa': '1/2/3', 'output_name': 'out_name', 'rdp_clsf': { 'conf': 0.8, 'gene': 'silva_138_ssu', }, }) assert params['amp_mat_upa'] == '1/2/3' assert params['output_name'] == 'out_name' assert params.getd('conf') == 0.8 assert params.getd('gene') == 'silva_138_ssu' assert( params.get_prose_args() == { 'conf': '0.8', 'gene': 'silva_138_ssu', } ), json.dumps(params.get_prose_args(), indent=4) assert( params.cli_args == [ '--train_propfile', Var.propfile['silva_138_ssu'], ] ), params.cli_args str(params) # should not throw
def test_non_default(): params = Params({ 'workspace_id': 'ws_id', 'amp_mat_upa': '5/5/5', 'output_name': 'out_name', 'rdp_clsf': { 'conf': 0.99999, 'gene': 'fungallsu', }, }) assert params['amp_mat_upa'] == '5/5/5' assert params['output_name'] == 'out_name' assert params.getd('conf') == 0.99999 assert params.getd('gene') == 'fungallsu' assert( params.get_prose_args() == { 'conf': '0.99999', 'gene': 'fungallsu', } ), json.dumps(params.get_prose_args(), indent=4) assert( params.cli_args == [ '--conf', '0.99999', '--gene', 'fungallsu', ] ), params.cli_args str(params) # should not throw
def test_no_user_supplied_values(): params = Params({ 'workspace_id': 'ws_id', 'amp_mat_upa': '6/6/6', 'output_name': 'out_name', }) assert params['amp_mat_upa'] == '6/6/6' assert params['output_name'] == 'out_name' assert params.getd('conf') == 0.8 assert params.getd('gene') == 'silva_138_ssu' assert( params.get_prose_args() == { 'conf': '0.8', 'gene': 'silva_138_ssu', } ), json.dumps(params.get_prose_args(), indent=4) assert( params.cli_args == [ '--train_propfile', Var.propfile['silva_138_ssu'], ] ), params.cli_args str(params) # should not throw
def test_get_fix_filtered_id2tax(): ''' Test: demangling, skipped ranks, filtered ranks ''' Var.out_allRank_flpth = os.path.join(TEST_DATA_DIR, 'example_allRank.tsv') # old one without 'Genera Incertae Sedis' Var.params = Params({ **req, 'conf': 0.8 }) id2tax = app_file.get_fix_filtered_id2tax() id2tax_first = { '000b0b88ceb19d19678f6c3a39f2db73': 'Bacteria;Proteobacteria;Alphaproteobacteria;Rhizobiales;Xanthobacteraceae;uncultured;', '0013aacacdba8495d504237c6334ddfb': 'Bacteria;', '00209e5d2aadb8c762613b92f7f4b21c': 'Bacteria;Verrucomicrobiota;Omnitrophia;Omnitrophales;Omnitrophaceae;Candidatus Omnitrophus;', '0022fa9d411c3edb1b584d7fb5193749': 'Bacteria;Verrucomicrobiota;Verrucomicrobiae;Pedosphaerales;Pedosphaeraceae;', '00250d6b968f2ad29019c64d89ece283': 'Bacteria;Proteobacteria;Gammaproteobacteria;Burkholderiales;Nitrosomonadaceae;MND1;', '002597380ec40da68af2362f47542f89': '', '0030f073f1f67fa8c7f01795583706e6': 'Bacteria;Proteobacteria;Gammaproteobacteria;Burkholderiales;Comamonadaceae;Rhodoferax;', '0032ab5da12fadd744023dc9bbba5169': 'Bacteria;Proteobacteria;Gammaproteobacteria;Burkholderiales;', '00360e9b675235e3ba5f1e6cf402cd73': 'Bacteria;Proteobacteria;Gammaproteobacteria;Burkholderiales;', '0037cbd3eb6eaf9cba6a4c2b7d866d1f': 'Bacteria;Proteobacteria;Gammaproteobacteria;Diplorickettsiales;Diplorickettsiaceae;Aquicella;', } id2tax_skipRanks = { '0134a9074c4afc9ac763f0482578f9e5': 'Bacteria;Firmicutes;Limnochordia;;;Hydrogenispora;', '18a980ed927822f60eebf93600ee8ff4': 'Bacteria;Acidobacteriota;Acidobacteriae;;;Paludibaculum;', '1a9c864d1f706bf185928fbf666f6880': 'Bacteria;Firmicutes;Clostridia;;Hungateiclostridiaceae;', '1d1f284b1a3fc4545be7430ae7365c91': 'Bacteria;Firmicutes;Desulfotomaculia;Desulfotomaculales;;Pelotomaculum;', '1ec12548cafe627d1212ffa9f9ef24c3': 'Bacteria;Acidobacteriota;Acidobacteriae;;;Paludibaculum;', '209d2a7b32e4fd9500896c1910760c01': 'Bacteria;Acidobacteriota;Acidobacteriae;;;Paludibaculum;', '22b3c70cdfb1714224e1bf8a6314bee9': 'Bacteria;Firmicutes;Clostridia;Peptostreptococcales-Tissierellales;;Finegoldia;', '24a0b0f872dcff5625c7e671b86607bc': 'Bacteria;Firmicutes;Clostridia;;Hungateiclostridiaceae;HN-HF0106;', 'faec42549c32f5c311164c00ff260f72': 'Bacteria;Acidobacteriota;Acidobacteriae;;;Paludibaculum;', 'ff48ea2b9d30c737b53aeb9841179d57': 'Bacteria;Firmicutes;Clostridia;Peptostreptococcales-Tissierellales;;Parvimonas;', } id2tax_demangle = { '000b0b88ceb19d19678f6c3a39f2db73': 'Bacteria;Proteobacteria;Alphaproteobacteria;Rhizobiales;Xanthobacteraceae;uncultured;', '006344897e91aa92c15114a635558e4c': 'Bacteria;Firmicutes;Dethiobacteria;Dethiobacterales;Dethiobacteraceae;uncultured;', '0063886685aeb8abd4643ca43d63c6fd': 'Bacteria;Gemmatimonadota;Gemmatimonadetes;Gemmatimonadales;Gemmatimonadaceae;uncultured;', '00d4306620fb956f4cb8ea6616da13a3': 'Bacteria;Proteobacteria;Alphaproteobacteria;Rhizobiales;Rhizobiales Incertae Sedis;uncultured;', '010a64540ce2e484b2177289a377246b': 'Bacteria;Proteobacteria;Alphaproteobacteria;Rhizobiales;Xanthobacteraceae;uncultured;', '017740e814c004489ef488d9ba7e6bba': 'Bacteria;Chloroflexi;Anaerolineae;Anaerolineales;Anaerolineaceae;uncultured;', '018e02f503aa9297bdf4fa8a3a5caf0d': 'Bacteria;Spirochaetota;Spirochaetia;Spirochaetales;Spirochaetaceae;uncultured;', '019a8e93cde9dfce660c99097f2bc8ef': 'Bacteria;Proteobacteria;Alphaproteobacteria;Reyranellales;Reyranellaceae;uncultured;', 'ffdb83232218318beacfa5b9d2802d23': 'Bacteria;Gemmatimonadota;Gemmatimonadetes;Gemmatimonadales;Gemmatimonadaceae;uncultured;', 'ffe9568c3bbd5838f866595074ba1148': 'Bacteria;Desulfobacterota;Desulfuromonadia;Geobacterales;Geobacteraceae;uncultured;', } id2tax_expected = _add_dicts( id2tax_first, id2tax_skipRanks, id2tax_demangle, ) dprint('len(id2tax_expected) # should be about 30') for id in id2tax_expected.keys(): assert id2tax[id] == id2tax_expected[id], '%s:\n%s\n%s' % (id, id2tax[id], id2tax_expected[id])
def test_small(): out_dir = os.path.join(TEST_DATA_DIR, 'return/enigma50by30/return/RDP_Classifier_output') Var.out_allRank_flpth = os.path.join(out_dir, 'out_allRank.tsv') Var.report_dir = os.path.join(run_dir, 'report_enigma50by30') os.mkdir(Var.report_dir) Var.params = Params(dict(**req, conf=0.8)) report.HTMLReportWriter(cmd_l=['test,', 'test,', 'small']).write()
def test_validation(): p = dict( workspace_id=None, amp_mat_upa=None, rdp_clsf=dict( gene=None, conf=None, ), output_name=None, ) Params(p) p = dict( workspace_id=None, amp_mat_upa=None, rdp_clsf=dict( gene=None, conf=None, ), outptu_name=None, ) with raises(Exception): Params(p)
def test_dummy(i, conf): out_dir = os.path.join(TEST_DATA_DIR, 'return/dummy10by8/return/RDP_Classifier_output') Var.out_allRank_flpth = os.path.join(out_dir, 'out_allRank.tsv') Var.report_dir = os.path.join(run_dir, 'report_dummy10by8_conf%g' % conf) os.mkdir(Var.report_dir) Var.params = Params(dict( **req, conf=conf, )) report.HTMLReportWriter( cmd_l=['test,', 'test,', 'dummy10by8', 'conf=%g' % conf]).write()
def test_small_linspace(): out_dir = os.path.join(TEST_DATA_DIR, 'return/enigma50by30/return/RDP_Classifier_output') Var.out_allRank_flpth = os.path.join(out_dir, 'out_allRank.tsv') for i, conf in enumerate(np.linspace(0, 1, 11)): Var.report_dir = os.path.join(run_dir, 'report_enigma50by30_conf%g' % conf) os.mkdir(Var.report_dir) Var.params = Params(dict(**req, conf=conf)) report.HTMLReportWriter( cmd_l=['test,', 'test,', 'small', 'conf=%g' % conf]).write()
def test_tiny(i): out_dir = os.path.join(TEST_DATA_DIR, 'return/dummyTiny/return/RDP_Classifier_output') Var.out_allRank_flpth = os.path.join(out_dir, 'out_allRank%d.tsv' % i) Var.report_dir = os.path.join(run_dir, 'report_dummyTiny_%d' % i) os.mkdir(Var.report_dir) Var.params = Params(dict( **req, conf=0.55555, )) with open(Var.out_allRank_flpth) as fh: allRank_lines = fh.readlines() report.HTMLReportWriter(cmd_l=['test,', 'test,', 'dummyTiny', 'i=%d' % i] + allRank_lines).write()
def test_large(): ''' Globals used: report_dir, report_template_flpth out_allRank_flpth params ''' out_dir = os.path.join( TEST_DATA_DIR, 'return/enigma17770by511/return/RDP_Classifier_output/') Var.out_allRank_flpth = os.path.join(out_dir, 'out_allRank.tsv') Var.report_dir = os.path.join(RUN_DIR, 'report_enigma17770by511') os.mkdir(Var.report_dir) Var.params = Params(dict(**req, conf=0.7777)) report.HTMLReportWriter(cmd_l=['test,', 'test,', 'large'] * 10).write()
def test_noRowAttrMap(): ''' Test row AttributeMapping behavior when AmpliconMatrix has no row AttributeMapping ''' Var.run_dir = os.path.join( scratch, 'test_AmpliconMatix_noRowAttributeMapping_AttributeMapping_' + str(uuid.uuid4())) Var.params = Params( dict( workspace_id='ws_id', workspace_name='ws_name', amp_mat_upa='amp/mat/upa', output_name='out_name', )) amp_mat = AmpliconMatrix(dummy10by8_AmpMat_noRowAttrMap) attr_map = AttributeMapping(amp_mat.obj.get('row_attributemapping_ref'), amp_mat) ## ## write new attribute/source ind_0, attr_name_0 = attr_map.add_attribute_slot('biome', 'testing') assert ind_0 == 0 assert attr_name_0 == 'biome', json.dumps(attr_map.obj, indent=3) attr_map.update_attribute( ind_0, { "amplicon_id_0": "dummy0", "amplicon_id_1": "dummy0", "amplicon_id_2": "dummy0", "amplicon_id_3": "dummy0", "amplicon_id_4": "dummy0", "amplicon_id_5": "dummy0", "amplicon_id_6": "dummy0", "amplicon_id_7": "dummy0", "amplicon_id_8": "dummy0", "amplicon_id_9": "dummy0" }) assert attr_map.obj['instances']['amplicon_id_4'][ind_0] == 'dummy0' ## ## all same length num_attr = len(attr_map.obj['attributes']) for attr_l in attr_map.obj['instances'].values(): assert len(attr_l) == num_attr
def test_flatten(): d = { 'key0': 'hat', 'key1': 'bat', 'nest0': { 'key2': 'cat', 'key3': 'sat', }, 'key4': 'chat', 'nest1': { 'key5': 'gnat', }, 'key6': 'mat', } flatd = Params.flatten(d) assert len(flatd) == 7 assert all(['key%d' % i in flatd for i in range(7)]) assert all(['nest%d' % i not in flatd for i in range(2)]) assert flatd['key5'] == 'gnat'