def test_chain_init(): foo_var = SimpleNamespace(draw_random=lambda: True, draw_value=0.1, name="foo", current_value=0.15) bar_var = SimpleNamespace(draw_random=lambda: True, draw_value=0.5, name="bar", current_value=0.51) walker1 = SimpleNamespace(variables=[foo_var, bar_var]) walker2 = SimpleNamespace(variables=[foo_var, bar_var]) tmp_file = br.TempFile() chain = mcmcmc._Chain(walkers=[walker1, walker2], outfile=tmp_file.path, cold_heat=0.01, hot_heat=0.2) assert chain.walkers == [walker1, walker2] assert chain.outfile == tmp_file.path assert chain.cold_heat == 0.01 assert chain.hot_heat == 0.2 assert chain.step_counter == 0 assert chain.best_score_ever_seen == 0 assert tmp_file.read() == """\
def test_chain_apply_dump(capsys): walker1 = SimpleNamespace( _apply_dump=lambda *_: print("Applying dump to walker1")) walker2 = SimpleNamespace( _apply_dump=lambda *_: print("Applying dump to walker2")) tmp_file = br.TempFile() chain = SimpleNamespace(walkers=[walker1, walker2], outfile=tmp_file.path, cold_heat=None, hot_heat=None, step_counter=None, best_score_ever_seen=None, _apply_dump=mcmcmc._Chain._apply_dump) var_dict = { "walkers": [None, None], "cold_heat": 0.1, "hot_heat": 0.2, "step_count": 20, "best_score": 100, "results": "Some results" } chain._apply_dump(chain, var_dict) assert chain.walkers == [walker1, walker2] out, err = capsys.readouterr() assert out == "Applying dump to walker1\nApplying dump to walker2\n" assert chain.cold_heat == 0.1 assert chain.hot_heat == 0.2 assert chain.step_counter == 20 assert chain.best_score_ever_seen == 100 assert tmp_file.read() == "Some results"
def test_mcmcmc_mc_step_run(): tmp_file = br.TempFile() walker = SimpleNamespace(function=lambda func_args: 1234, params=[], proposed_score_file=tmp_file) mcmcmc.MCMCMC.mc_step_run(walker, ["foo"]) assert tmp_file.read() == "1234" tmp_file.clear() walker.params = ["bar", "baz"] walker.function = lambda func_args, params: 4321 mcmcmc.MCMCMC.mc_step_run(walker, ["foo"]) assert tmp_file.read() == "4321"
def test_chain_get_results(): tmp_file = br.TempFile() tmp_file.write("""rec_id1,rec_id2,r_square BOL-PanxαB,Bab-PanxαA,0.016894041431 BOL-PanxαB,Bch-PanxαA,0.087311057754 BOL-PanxαB,Bfo-PanxαE,0.274041115357""") chain = SimpleNamespace(outfile=tmp_file.path, get_results=mcmcmc._Chain.get_results) assert type(chain.get_results(chain)) == pd.DataFrame assert str(chain.get_results(chain)) == """\ rec_id1 rec_id2 r_square 0 BOL-PanxαB Bab-PanxαA 0.016894041431 1 BOL-PanxαB Bch-PanxαA 0.087311057754 2 BOL-PanxαB Bfo-PanxαE 0.274041115357""", print(chain.get_results(chain))
def test_main_strip_taxa(monkeypatch, hf, capsys): tmp_file = br.TempFile() seqbuddy = Sb.SeqBuddy(os.path.join(hf.resource_path, "Cteno_pannexins.fa")) seqbuddy = Sb.rename(seqbuddy, "^.*?\-") tmp_file.write(str(seqbuddy)) argv = [ 'rdmcl.py', os.path.join(hf.resource_path, "final_clusters.txt"), tmp_file.path, "-s" ] monkeypatch.setattr(sys, "argv", argv) group_by_cluster.main() out, err = capsys.readouterr() assert hf.string2hash(out) == "3020ea067affd21c77b7446f35689a6a", print( out)
def test_markov_clustering_write(): data = """\ Bab\tCfu\t0.3 Bab\tOma\t0.5 Bab\tMle\t0 Cfu\tMle\t0.7 Cfu\tOma\t0.7 Oma\tMle\t0""" df = pd.read_csv(StringIO(data), sep="\t", header=None, index_col=False) df.columns = ["seq1", "seq2", "score"] mcl = helpers.MarkovClustering(df, 2) mcl.run() tmp_file = br.TempFile() mcl.write(tmp_file.path) assert tmp_file.read() == "Bab Cfu Mle Oma\n"
def test_mcmcmc_resume(capsys): mc_obj = SimpleNamespace(dumpfile="does_not_exist", resume=mcmcmc.MCMCMC.resume) assert mc_obj.resume(mc_obj) is False tmp_file = br.TempFile(byte_mode=True) dill.dump(["a", "b", "c"], tmp_file) mc_obj.dumpfile = tmp_file.path chain1 = SimpleNamespace(_apply_dump=lambda *_: print("applying chain1")) chain2 = SimpleNamespace(_apply_dump=lambda *_: print("applying chain2")) chain3 = SimpleNamespace(_apply_dump=lambda *_: print("applying chain3")) mc_obj.chains = [chain1, chain2, chain3] mc_obj.run = lambda *_: print("Running") assert mc_obj.resume(mc_obj) is True out, err = capsys.readouterr() assert out == "applying chain1\napplying chain2\napplying chain3\nRunning\n", print( out)
def test_logger(): tmp = br.TempFile() logger = helpers.Logger(tmp.path) assert type(logger.logger) == helpers.logging.RootLogger assert type(logger.console) == helpers.logging.StreamHandler assert logger.logger.level == 20 handlers = [type(handler) for handler in logger.logger.handlers] assert len(logger.logger.handlers) == 2, print(handlers) assert type( logger.logger.handlers[1]) == helpers.logging.StreamHandler, print( handlers) assert logger.console.level == 30 logger.logger.log(helpers.logging.WARNING, "Some info") helpers.logging.warning("Some Warnings") logger.move_log("%sfirst.log" % tmp.path) with open("%sfirst.log" % tmp.path, "r") as ofile: assert ofile.read() == "Some info\nSome Warnings\n"
def mc_blast(records_list, args): # separate the args into its respective variable database, outfile = args # set temp_file as a buddy resource variable temp_file = br.TempFile() # set this variable to each record in records_list -- fasta format sub_input_seqs = sb.SeqBuddy(records_list, out_format='fasta') # write each sequence/record name to the temp_file of a certain path sub_input_seqs.write(temp_file.path) # generic blastp command for each file, blastdb used blast_cmd = "blastp -query %s -db %s -num_threads 3 -max_target_seqs 1 -outfmt 6" % ( temp_file.path, database) # utilize Popen to write the full blastp command to execute output = Popen(blast_cmd, stdout=PIPE, shell=True).communicate() # output = [stdout, stderr] - get stdout and decode output = output[0].decode() # write to file while locked so no other processes can write at the same time with lock: with open(outfile, 'a') as ofile: ofile.write(output) return
def test_chain_dump_obj(): walker1 = SimpleNamespace(_dump_obj=lambda *_: "walker1") walker2 = SimpleNamespace(_dump_obj=lambda *_: "walker2") tmp_file = br.TempFile() tmp_file.write("outfile results") chain = SimpleNamespace(walkers=[walker1, walker2], outfile=tmp_file.path, cold_heat=0.1, hot_heat=0.2, step_counter=20, best_score_ever_seen=100, _dump_obj=mcmcmc._Chain._dump_obj) dump = chain._dump_obj(chain) assert dump["walkers"] == ["walker1", "walker2"] assert dump["cold_heat"] == 0.1 assert dump["hot_heat"] == 0.2 assert dump["step_count"] == 20 assert dump["best_score"] == 100 assert dump["results"] == "outfile results"
def mc_run_blast(records, args): blastdbs, evalue, threads = args tmp_file = br.TempFile() with open(tmp_file.path, "w") as _ofile: SeqIO.write(records, _ofile, "fasta") for blastdb in blastdbs: _cmd = "blastp -query %s -db %s -evalue %s -max_target_seqs 1000 -num_threads %s -dbsize 1000000000 " \ "-outfmt '6 qacc sacc pident length mismatch gapopen qstart qend sstart send evalue bitscore " \ "qlen slen nident'" % (tmp_file.path, blastdb, evalue, threads) _output = Popen(_cmd, stdout=PIPE, shell=True).communicate() _output = _output[0].decode() with file_lock: with open("%s/temp_blast_hits.csv" % in_args.outdir, "a") as _ofile: _ofile.write(_output) return
def test_chain_write_sample(): foo_var = SimpleNamespace(draw_random=lambda: True, draw_value=0.1, name="foo", current_value=0.15) bar_var = SimpleNamespace(draw_random=lambda: True, draw_value=0.5, name="bar", current_value=0.51) walker1 = SimpleNamespace(variables=[foo_var, bar_var], lava=False, ice=False, current_score=35, heat=0.1) tmp_file = br.TempFile() chain = SimpleNamespace(step_counter=2, get_cold_walker=lambda *_: walker1, outfile=tmp_file.path, write_sample=mcmcmc._Chain.write_sample) chain.write_sample(chain) assert tmp_file.read() == "2\t0.15\t0.51\t35\n", print(tmp_file.read())
def test_mcmcmc_run(capsys): rand_gen = random.Random(1) foo_var = SimpleNamespace( name="foo", draw_random=lambda: print("foo_var draw_raindom()"), current_value=0.98, draw_new_value=lambda heat: print("foo_var draw_new_value()"), draw_value=0.12) bar_var = SimpleNamespace( name="bar", draw_random=lambda: print("bar_var draw_raindom()"), current_value=0.87, draw_new_value=lambda heat: print("bar_var draw_new_value()"), draw_value=0.23) walker1_1 = SimpleNamespace( name="1_1", variables=[foo_var, bar_var], lava=False, current_score=3, heat=0.25, score_history=[1.12, 3.42, 7.9, 0.91, 3.3, 0.1, 3.4]) walker1_2 = SimpleNamespace( name="1_2", variables=[foo_var, bar_var], lava=False, current_score=3, heat=0.25, score_history=[1.12, 3.42, 7.9, 0.91, 3.3, 0.1, 3.4]) walker1_3 = SimpleNamespace( name="1_3", variables=[foo_var, bar_var], lava=False, current_score=3, heat=0.25, score_history=[1.12, 3.42, 7.9, 0.91, 3.3, 0.1, 3.4]) walker2_1 = SimpleNamespace( name="2_1", variables=[foo_var, bar_var], lava=False, current_score=3, heat=0.25, score_history=[1.12, 3.42, 7.9, 0.91, 3.3, 0.1, 3.4]) walker2_2 = SimpleNamespace( name="2_2", variables=[foo_var, bar_var], lava=False, current_score=3, heat=0.25, score_history=[1.12, 3.42, 7.9, 0.91, 3.3, 0.1, 3.4]) walker2_3 = SimpleNamespace( name="2_3", variables=[foo_var, bar_var], lava=False, current_score=3, heat=0.25, score_history=[1.12, 3.42, 7.9, 0.91, 3.3, 0.1, 3.4]) walker3_1 = SimpleNamespace( name="3_1", variables=[foo_var, bar_var], lava=False, current_score=3, heat=0.25, score_history=[1.12, 3.42, 7.9, 0.91, 3.3, 0.1, 3.4]) walker3_2 = SimpleNamespace( name="3_2", variables=[foo_var, bar_var], lava=False, current_score=3, heat=0.25, score_history=[1.12, 3.42, 7.9, 0.91, 3.3, 0.1, 3.4]) walker3_3 = SimpleNamespace( name="3_3", variables=[foo_var, bar_var], lava=False, current_score=3, heat=0.25, score_history=[1.12, 3.42, 7.9, 0.91, 3.3, 0.1, 3.4]) chain1 = SimpleNamespace( walkers=[walker1_1, walker1_2, walker1_3], step_counter=99, _dump_obj=lambda: b"chain1_obj\n", swap_hot_cold=lambda: print("Chain1 swap_hot_cold()"), write_sample=lambda: print("Chain1 write_sample()")) chain2 = SimpleNamespace( walkers=[walker2_1, walker2_2, walker2_3], step_counter=99, _dump_obj=lambda: b"chain2_obj\n", swap_hot_cold=lambda: print("Chain2 swap_hot_cold()"), write_sample=lambda: print("Chain2 write_sample()")) chain3 = SimpleNamespace( walkers=[walker3_1, walker3_2, walker3_3], step_counter=99, _dump_obj=lambda: b"chain3_obj\n", swap_hot_cold=lambda: print("Chain3 swap_hot_cold()"), write_sample=lambda: print("Chain3 write_sample()")) global convergence_counter convergence_counter = 0 def mock_check_convergence(): global convergence_counter convergence_counter += 1 return convergence_counter > 5 tmp_file = br.TempFile() mc_obj = SimpleNamespace( run=mcmcmc.MCMCMC.run, _check_convergence=mock_check_convergence, steps=1, dumpfile=tmp_file.path, chains=[chain1, chain2, chain3], rand_gen=rand_gen, mc_step_run=lambda *args: print("mc_step_run", args), step_parse=lambda *args: print("step_parse:", args), best={ "score": None, "variables": {} }, sample_rate=1) # Break out when counter > steps mc_obj.run(mc_obj) out, err = capsys.readouterr() out = out.split("\n") assert out.count("foo_var draw_raindom()") == 0 assert out.count("foo_var draw_new_value()") == 18 assert out.count("bar_var draw_raindom()") == 0 assert out.count("bar_var draw_new_value()") == 18 assert out.count("Chain1 swap_hot_cold()") == 2 assert out.count("Chain2 swap_hot_cold()") == 2 assert out.count("Chain3 swap_hot_cold()") == 2 assert out.count("Chain1 write_sample()") == 2 assert out.count("Chain2 write_sample()") == 2 assert out.count("Chain3 write_sample()") == 2 # assert len([None for x in out if "mc_step_run:" in x]) == 18, print(out) assert len([None for x in out if "step_parse:" in x]) == 18 with open(tmp_file.path, "br") as ifile: dump_file = dill.load(ifile) assert dump_file == [b'chain1_obj\n', b'chain2_obj\n', b'chain3_obj\n'] # Break when _check_convergence() pops, and include a lava walker mc_obj.steps = 0 walker1_1.lava = True mc_obj.run(mc_obj) out, err = capsys.readouterr() out = out.split("\n") assert out.count("foo_var draw_raindom()") == 2 assert out.count("foo_var draw_new_value()") == 16 assert out.count("bar_var draw_raindom()") == 2 assert out.count("bar_var draw_new_value()") == 16
def test_mcmcmc_step_parse(capsys): rand_gen = random.Random(4) tmp_file = br.TempFile() walker = SimpleNamespace( name="qwerty", proposed_score=None, score_history=[1.12, 3.42], current_score=3.42, accept=lambda *_: print("Calling accept() method"), rand_gen=rand_gen, heat=0.25, ice=False, lava=False, proposed_score_file=tmp_file) # Accept higher score tmp_file.write("7.90", mode="w") mcmcmc.MCMCMC.step_parse(walker=walker, std=1.5) assert walker.score_history == [1.12, 3.42, 7.9] assert walker.proposed_score == 7.9 out, err = capsys.readouterr() assert out == "Calling accept() method\n" # Reject lower score tmp_file.write("0.91", mode="w") mcmcmc.MCMCMC.step_parse(walker=walker, std=3.1) assert walker.score_history == [1.12, 3.42, 7.9, 0.91] assert walker.proposed_score == 0.91 out, err = capsys.readouterr() assert out == "" # Accept lower score tmp_file.write("3.3", mode="w") mcmcmc.MCMCMC.step_parse(walker=walker, std=3.1) assert walker.score_history == [1.12, 3.42, 7.9, 0.91, 3.3] assert walker.proposed_score == 3.3 out, err = capsys.readouterr() assert out == "Calling accept() method\n", print(out) # Lava walker accepts any score tmp_file.write("0.1", mode="w") walker.lava = True mcmcmc.MCMCMC.step_parse(walker=walker, std=3.1) assert walker.score_history == [1.12, 3.42, 7.9, 0.91, 3.3, 0.1] out, err = capsys.readouterr() assert out == "Calling accept() method\n" # Ice walker rejects any lower scores tmp_file.write("3.4", mode="w") walker.lava = False walker.ice = True mcmcmc.MCMCMC.step_parse(walker=walker, std=3.1) assert walker.score_history == [1.12, 3.42, 7.9, 0.91, 3.3, 0.1, 3.4] out, err = capsys.readouterr() assert out == "" # Do not allow history to grow over 1000 items long walker.score_history = [1 for _ in range(1000)] assert len(walker.score_history) == 1000 mcmcmc.MCMCMC.step_parse(walker, 3.1) assert len(walker.score_history) == 1000 assert walker.score_history[-1] == 3.4
def test_chain_swap_hot_cold(monkeypatch, capsys): foo_var = SimpleNamespace(draw_random=lambda: True, draw_value=0.1, name="foo", current_value=0.15) bar_var = SimpleNamespace(draw_random=lambda: True, draw_value=0.5, name="bar", current_value=0.51) lava_foo_var = SimpleNamespace(draw_random=lambda: True, draw_value=0.1, name="foo", current_value=0.222) lava_bar_var = SimpleNamespace(draw_random=lambda: True, draw_value=0.1, name="bar", current_value=0.999) ice_foo_var = SimpleNamespace(draw_random=lambda: True, draw_value=0.1, name="foo", current_value=0.123) ice_bar_var = SimpleNamespace(draw_random=lambda: True, draw_value=0.1, name="bar", current_value=0.321) walker1 = SimpleNamespace( variables=[foo_var, bar_var], lava=False, ice=False, current_score=35, set_heat=lambda heat: print("Setting walker1 heat = %s" % heat)) walker2 = SimpleNamespace( variables=[foo_var, bar_var], lava=False, ice=False, current_score=15, set_heat=lambda heat: print("Setting walker2 heat = %s" % heat)) lavawalker = SimpleNamespace( variables=[lava_foo_var, lava_bar_var], lava=True, ice=False, current_score=45, set_heat=lambda heat: print("Changing lava_walker heat! Oh Nos!")) ice_walker = SimpleNamespace( variables=[ice_foo_var, ice_bar_var], lava=False, ice=True, current_score=10, set_heat=lambda heat: print("Changing ice_walker heat! Oh Nos!")) tmp_file = br.TempFile() monkeypatch.setattr(mcmcmc._Chain, "get_best_walker", lambda *_: walker1) monkeypatch.setattr(mcmcmc._Chain, "get_cold_walker", lambda *_: walker2) monkeypatch.setattr(mcmcmc._Chain, "get_ice_walker", lambda *_: False) chain = mcmcmc._Chain(walkers=[walker1, walker2], outfile=tmp_file.path, cold_heat=0.01, hot_heat=0.2) chain.swap_hot_cold() out, err = capsys.readouterr() assert "Setting walker1 heat = 0.01" in out assert "Setting walker2 heat = 0.2" in out assert chain.best_score_ever_seen == 35 monkeypatch.setattr(mcmcmc._Chain, "get_best_walker", lambda *_: lavawalker) monkeypatch.setattr(mcmcmc._Chain, "get_cold_walker", lambda *_: walker1) chain.walkers.append(lavawalker) chain.swap_hot_cold() out, err = capsys.readouterr() assert not out assert chain.best_score_ever_seen == 45 assert foo_var.current_value == 0.222 assert bar_var.current_value == 0.999 monkeypatch.setattr(mcmcmc._Chain, "get_ice_walker", lambda *_: ice_walker) lavawalker.current_score = 55 chain.walkers.append(ice_walker) chain.swap_hot_cold() out, err = capsys.readouterr() assert not out assert chain.best_score_ever_seen == 55 assert ice_foo_var.current_value == 0.222 assert ice_bar_var.current_value == 0.999 # Ice chain returned as best, but is lower than best ever, so do not copy values monkeypatch.setattr(mcmcmc._Chain, "get_best_walker", lambda *_: ice_walker) ice_foo_var.current_value = 0.01 ice_bar_var.current_value = 0.10101 chain.swap_hot_cold() out, err = capsys.readouterr() assert not out assert chain.best_score_ever_seen == 55 assert foo_var.current_value == 0.222 assert bar_var.current_value == 0.999 # Now give ice walker the best score ever monkeypatch.setattr(mcmcmc._Chain, "get_best_walker", lambda *_: ice_walker) ice_walker.current_score = 100 chain.swap_hot_cold() out, err = capsys.readouterr() assert not out assert chain.best_score_ever_seen == 100 assert foo_var.current_value == 0.01 assert bar_var.current_value == 0.10101