Exemple #1
0
def test_chain_init():
    foo_var = SimpleNamespace(draw_random=lambda: True,
                              draw_value=0.1,
                              name="foo",
                              current_value=0.15)
    bar_var = SimpleNamespace(draw_random=lambda: True,
                              draw_value=0.5,
                              name="bar",
                              current_value=0.51)
    walker1 = SimpleNamespace(variables=[foo_var, bar_var])
    walker2 = SimpleNamespace(variables=[foo_var, bar_var])

    tmp_file = br.TempFile()

    chain = mcmcmc._Chain(walkers=[walker1, walker2],
                          outfile=tmp_file.path,
                          cold_heat=0.01,
                          hot_heat=0.2)
    assert chain.walkers == [walker1, walker2]
    assert chain.outfile == tmp_file.path
    assert chain.cold_heat == 0.01
    assert chain.hot_heat == 0.2
    assert chain.step_counter == 0
    assert chain.best_score_ever_seen == 0
    assert tmp_file.read() == """\
Exemple #2
0
def test_chain_apply_dump(capsys):
    walker1 = SimpleNamespace(
        _apply_dump=lambda *_: print("Applying dump to walker1"))
    walker2 = SimpleNamespace(
        _apply_dump=lambda *_: print("Applying dump to walker2"))

    tmp_file = br.TempFile()
    chain = SimpleNamespace(walkers=[walker1, walker2],
                            outfile=tmp_file.path,
                            cold_heat=None,
                            hot_heat=None,
                            step_counter=None,
                            best_score_ever_seen=None,
                            _apply_dump=mcmcmc._Chain._apply_dump)

    var_dict = {
        "walkers": [None, None],
        "cold_heat": 0.1,
        "hot_heat": 0.2,
        "step_count": 20,
        "best_score": 100,
        "results": "Some results"
    }
    chain._apply_dump(chain, var_dict)
    assert chain.walkers == [walker1, walker2]
    out, err = capsys.readouterr()
    assert out == "Applying dump to walker1\nApplying dump to walker2\n"
    assert chain.cold_heat == 0.1
    assert chain.hot_heat == 0.2
    assert chain.step_counter == 20
    assert chain.best_score_ever_seen == 100
    assert tmp_file.read() == "Some results"
Exemple #3
0
def test_mcmcmc_mc_step_run():
    tmp_file = br.TempFile()
    walker = SimpleNamespace(function=lambda func_args: 1234,
                             params=[],
                             proposed_score_file=tmp_file)
    mcmcmc.MCMCMC.mc_step_run(walker, ["foo"])
    assert tmp_file.read() == "1234"

    tmp_file.clear()
    walker.params = ["bar", "baz"]
    walker.function = lambda func_args, params: 4321
    mcmcmc.MCMCMC.mc_step_run(walker, ["foo"])
    assert tmp_file.read() == "4321"
Exemple #4
0
def test_chain_get_results():
    tmp_file = br.TempFile()
    tmp_file.write("""rec_id1,rec_id2,r_square
BOL-PanxαB,Bab-PanxαA,0.016894041431
BOL-PanxαB,Bch-PanxαA,0.087311057754
BOL-PanxαB,Bfo-PanxαE,0.274041115357""")

    chain = SimpleNamespace(outfile=tmp_file.path,
                            get_results=mcmcmc._Chain.get_results)
    assert type(chain.get_results(chain)) == pd.DataFrame
    assert str(chain.get_results(chain)) == """\
      rec_id1     rec_id2        r_square
0  BOL-PanxαB  Bab-PanxαA  0.016894041431
1  BOL-PanxαB  Bch-PanxαA  0.087311057754
2  BOL-PanxαB  Bfo-PanxαE  0.274041115357""", print(chain.get_results(chain))
def test_main_strip_taxa(monkeypatch, hf, capsys):
    tmp_file = br.TempFile()
    seqbuddy = Sb.SeqBuddy(os.path.join(hf.resource_path,
                                        "Cteno_pannexins.fa"))
    seqbuddy = Sb.rename(seqbuddy, "^.*?\-")
    tmp_file.write(str(seqbuddy))
    argv = [
        'rdmcl.py',
        os.path.join(hf.resource_path, "final_clusters.txt"), tmp_file.path,
        "-s"
    ]
    monkeypatch.setattr(sys, "argv", argv)
    group_by_cluster.main()
    out, err = capsys.readouterr()
    assert hf.string2hash(out) == "3020ea067affd21c77b7446f35689a6a", print(
        out)
Exemple #6
0
def test_markov_clustering_write():
    data = """\
Bab\tCfu\t0.3
Bab\tOma\t0.5
Bab\tMle\t0
Cfu\tMle\t0.7
Cfu\tOma\t0.7
Oma\tMle\t0"""
    df = pd.read_csv(StringIO(data), sep="\t", header=None, index_col=False)
    df.columns = ["seq1", "seq2", "score"]

    mcl = helpers.MarkovClustering(df, 2)
    mcl.run()

    tmp_file = br.TempFile()
    mcl.write(tmp_file.path)
    assert tmp_file.read() == "Bab	Cfu	Mle	Oma\n"
Exemple #7
0
def test_mcmcmc_resume(capsys):
    mc_obj = SimpleNamespace(dumpfile="does_not_exist",
                             resume=mcmcmc.MCMCMC.resume)
    assert mc_obj.resume(mc_obj) is False

    tmp_file = br.TempFile(byte_mode=True)
    dill.dump(["a", "b", "c"], tmp_file)

    mc_obj.dumpfile = tmp_file.path
    chain1 = SimpleNamespace(_apply_dump=lambda *_: print("applying chain1"))
    chain2 = SimpleNamespace(_apply_dump=lambda *_: print("applying chain2"))
    chain3 = SimpleNamespace(_apply_dump=lambda *_: print("applying chain3"))
    mc_obj.chains = [chain1, chain2, chain3]
    mc_obj.run = lambda *_: print("Running")

    assert mc_obj.resume(mc_obj) is True
    out, err = capsys.readouterr()
    assert out == "applying chain1\napplying chain2\napplying chain3\nRunning\n", print(
        out)
Exemple #8
0
def test_logger():
    tmp = br.TempFile()
    logger = helpers.Logger(tmp.path)

    assert type(logger.logger) == helpers.logging.RootLogger
    assert type(logger.console) == helpers.logging.StreamHandler
    assert logger.logger.level == 20
    handlers = [type(handler) for handler in logger.logger.handlers]
    assert len(logger.logger.handlers) == 2, print(handlers)
    assert type(
        logger.logger.handlers[1]) == helpers.logging.StreamHandler, print(
            handlers)
    assert logger.console.level == 30

    logger.logger.log(helpers.logging.WARNING, "Some info")
    helpers.logging.warning("Some Warnings")

    logger.move_log("%sfirst.log" % tmp.path)
    with open("%sfirst.log" % tmp.path, "r") as ofile:
        assert ofile.read() == "Some info\nSome Warnings\n"
def mc_blast(records_list, args):
    # separate the args into its respective variable
    database, outfile = args
    # set temp_file as a buddy resource variable
    temp_file = br.TempFile()
    # set this variable to each record in records_list -- fasta format
    sub_input_seqs = sb.SeqBuddy(records_list, out_format='fasta')
    # write each sequence/record name to the temp_file of a certain path
    sub_input_seqs.write(temp_file.path)
    # generic blastp command for each file, blastdb used
    blast_cmd = "blastp -query %s -db %s -num_threads 3 -max_target_seqs 1 -outfmt 6" % (
        temp_file.path, database)
    # utilize Popen to write the full blastp command to execute
    output = Popen(blast_cmd, stdout=PIPE, shell=True).communicate()
    # output = [stdout, stderr] - get stdout and decode
    output = output[0].decode()
    # write to file while locked so no other processes can write at the same time
    with lock:
        with open(outfile, 'a') as ofile:
            ofile.write(output)
    return
Exemple #10
0
def test_chain_dump_obj():
    walker1 = SimpleNamespace(_dump_obj=lambda *_: "walker1")
    walker2 = SimpleNamespace(_dump_obj=lambda *_: "walker2")
    tmp_file = br.TempFile()
    tmp_file.write("outfile results")

    chain = SimpleNamespace(walkers=[walker1, walker2],
                            outfile=tmp_file.path,
                            cold_heat=0.1,
                            hot_heat=0.2,
                            step_counter=20,
                            best_score_ever_seen=100,
                            _dump_obj=mcmcmc._Chain._dump_obj)

    dump = chain._dump_obj(chain)
    assert dump["walkers"] == ["walker1", "walker2"]
    assert dump["cold_heat"] == 0.1
    assert dump["hot_heat"] == 0.2
    assert dump["step_count"] == 20
    assert dump["best_score"] == 100
    assert dump["results"] == "outfile results"
def mc_run_blast(records, args):
    blastdbs, evalue, threads = args

    tmp_file = br.TempFile()
    with open(tmp_file.path, "w") as _ofile:
        SeqIO.write(records, _ofile, "fasta")

    for blastdb in blastdbs:
        _cmd = "blastp -query %s -db %s -evalue %s -max_target_seqs 1000 -num_threads %s -dbsize 1000000000 " \
               "-outfmt '6 qacc sacc pident length mismatch gapopen qstart qend sstart send evalue bitscore " \
               "qlen slen nident'" % (tmp_file.path, blastdb, evalue, threads)

        _output = Popen(_cmd, stdout=PIPE, shell=True).communicate()
        _output = _output[0].decode()

        with file_lock:
            with open("%s/temp_blast_hits.csv" % in_args.outdir,
                      "a") as _ofile:
                _ofile.write(_output)

    return
Exemple #12
0
def test_chain_write_sample():
    foo_var = SimpleNamespace(draw_random=lambda: True,
                              draw_value=0.1,
                              name="foo",
                              current_value=0.15)
    bar_var = SimpleNamespace(draw_random=lambda: True,
                              draw_value=0.5,
                              name="bar",
                              current_value=0.51)
    walker1 = SimpleNamespace(variables=[foo_var, bar_var],
                              lava=False,
                              ice=False,
                              current_score=35,
                              heat=0.1)

    tmp_file = br.TempFile()
    chain = SimpleNamespace(step_counter=2,
                            get_cold_walker=lambda *_: walker1,
                            outfile=tmp_file.path,
                            write_sample=mcmcmc._Chain.write_sample)

    chain.write_sample(chain)
    assert tmp_file.read() == "2\t0.15\t0.51\t35\n", print(tmp_file.read())
Exemple #13
0
def test_mcmcmc_run(capsys):
    rand_gen = random.Random(1)

    foo_var = SimpleNamespace(
        name="foo",
        draw_random=lambda: print("foo_var draw_raindom()"),
        current_value=0.98,
        draw_new_value=lambda heat: print("foo_var draw_new_value()"),
        draw_value=0.12)
    bar_var = SimpleNamespace(
        name="bar",
        draw_random=lambda: print("bar_var draw_raindom()"),
        current_value=0.87,
        draw_new_value=lambda heat: print("bar_var draw_new_value()"),
        draw_value=0.23)

    walker1_1 = SimpleNamespace(
        name="1_1",
        variables=[foo_var, bar_var],
        lava=False,
        current_score=3,
        heat=0.25,
        score_history=[1.12, 3.42, 7.9, 0.91, 3.3, 0.1, 3.4])
    walker1_2 = SimpleNamespace(
        name="1_2",
        variables=[foo_var, bar_var],
        lava=False,
        current_score=3,
        heat=0.25,
        score_history=[1.12, 3.42, 7.9, 0.91, 3.3, 0.1, 3.4])
    walker1_3 = SimpleNamespace(
        name="1_3",
        variables=[foo_var, bar_var],
        lava=False,
        current_score=3,
        heat=0.25,
        score_history=[1.12, 3.42, 7.9, 0.91, 3.3, 0.1, 3.4])
    walker2_1 = SimpleNamespace(
        name="2_1",
        variables=[foo_var, bar_var],
        lava=False,
        current_score=3,
        heat=0.25,
        score_history=[1.12, 3.42, 7.9, 0.91, 3.3, 0.1, 3.4])
    walker2_2 = SimpleNamespace(
        name="2_2",
        variables=[foo_var, bar_var],
        lava=False,
        current_score=3,
        heat=0.25,
        score_history=[1.12, 3.42, 7.9, 0.91, 3.3, 0.1, 3.4])
    walker2_3 = SimpleNamespace(
        name="2_3",
        variables=[foo_var, bar_var],
        lava=False,
        current_score=3,
        heat=0.25,
        score_history=[1.12, 3.42, 7.9, 0.91, 3.3, 0.1, 3.4])
    walker3_1 = SimpleNamespace(
        name="3_1",
        variables=[foo_var, bar_var],
        lava=False,
        current_score=3,
        heat=0.25,
        score_history=[1.12, 3.42, 7.9, 0.91, 3.3, 0.1, 3.4])
    walker3_2 = SimpleNamespace(
        name="3_2",
        variables=[foo_var, bar_var],
        lava=False,
        current_score=3,
        heat=0.25,
        score_history=[1.12, 3.42, 7.9, 0.91, 3.3, 0.1, 3.4])
    walker3_3 = SimpleNamespace(
        name="3_3",
        variables=[foo_var, bar_var],
        lava=False,
        current_score=3,
        heat=0.25,
        score_history=[1.12, 3.42, 7.9, 0.91, 3.3, 0.1, 3.4])

    chain1 = SimpleNamespace(
        walkers=[walker1_1, walker1_2, walker1_3],
        step_counter=99,
        _dump_obj=lambda: b"chain1_obj\n",
        swap_hot_cold=lambda: print("Chain1 swap_hot_cold()"),
        write_sample=lambda: print("Chain1 write_sample()"))
    chain2 = SimpleNamespace(
        walkers=[walker2_1, walker2_2, walker2_3],
        step_counter=99,
        _dump_obj=lambda: b"chain2_obj\n",
        swap_hot_cold=lambda: print("Chain2 swap_hot_cold()"),
        write_sample=lambda: print("Chain2 write_sample()"))
    chain3 = SimpleNamespace(
        walkers=[walker3_1, walker3_2, walker3_3],
        step_counter=99,
        _dump_obj=lambda: b"chain3_obj\n",
        swap_hot_cold=lambda: print("Chain3 swap_hot_cold()"),
        write_sample=lambda: print("Chain3 write_sample()"))

    global convergence_counter
    convergence_counter = 0

    def mock_check_convergence():
        global convergence_counter
        convergence_counter += 1
        return convergence_counter > 5

    tmp_file = br.TempFile()
    mc_obj = SimpleNamespace(
        run=mcmcmc.MCMCMC.run,
        _check_convergence=mock_check_convergence,
        steps=1,
        dumpfile=tmp_file.path,
        chains=[chain1, chain2, chain3],
        rand_gen=rand_gen,
        mc_step_run=lambda *args: print("mc_step_run", args),
        step_parse=lambda *args: print("step_parse:", args),
        best={
            "score": None,
            "variables": {}
        },
        sample_rate=1)

    # Break out when counter > steps
    mc_obj.run(mc_obj)
    out, err = capsys.readouterr()

    out = out.split("\n")
    assert out.count("foo_var draw_raindom()") == 0
    assert out.count("foo_var draw_new_value()") == 18

    assert out.count("bar_var draw_raindom()") == 0
    assert out.count("bar_var draw_new_value()") == 18

    assert out.count("Chain1 swap_hot_cold()") == 2
    assert out.count("Chain2 swap_hot_cold()") == 2
    assert out.count("Chain3 swap_hot_cold()") == 2

    assert out.count("Chain1 write_sample()") == 2
    assert out.count("Chain2 write_sample()") == 2
    assert out.count("Chain3 write_sample()") == 2

    # assert len([None for x in out if "mc_step_run:" in x]) == 18, print(out)
    assert len([None for x in out if "step_parse:" in x]) == 18

    with open(tmp_file.path, "br") as ifile:
        dump_file = dill.load(ifile)
    assert dump_file == [b'chain1_obj\n', b'chain2_obj\n', b'chain3_obj\n']

    # Break when _check_convergence() pops, and include a lava walker
    mc_obj.steps = 0
    walker1_1.lava = True
    mc_obj.run(mc_obj)
    out, err = capsys.readouterr()

    out = out.split("\n")
    assert out.count("foo_var draw_raindom()") == 2
    assert out.count("foo_var draw_new_value()") == 16

    assert out.count("bar_var draw_raindom()") == 2
    assert out.count("bar_var draw_new_value()") == 16
Exemple #14
0
def test_mcmcmc_step_parse(capsys):
    rand_gen = random.Random(4)
    tmp_file = br.TempFile()
    walker = SimpleNamespace(
        name="qwerty",
        proposed_score=None,
        score_history=[1.12, 3.42],
        current_score=3.42,
        accept=lambda *_: print("Calling accept() method"),
        rand_gen=rand_gen,
        heat=0.25,
        ice=False,
        lava=False,
        proposed_score_file=tmp_file)

    # Accept higher score
    tmp_file.write("7.90", mode="w")

    mcmcmc.MCMCMC.step_parse(walker=walker, std=1.5)
    assert walker.score_history == [1.12, 3.42, 7.9]
    assert walker.proposed_score == 7.9
    out, err = capsys.readouterr()
    assert out == "Calling accept() method\n"

    # Reject lower score
    tmp_file.write("0.91", mode="w")

    mcmcmc.MCMCMC.step_parse(walker=walker, std=3.1)
    assert walker.score_history == [1.12, 3.42, 7.9, 0.91]
    assert walker.proposed_score == 0.91
    out, err = capsys.readouterr()
    assert out == ""

    # Accept lower score
    tmp_file.write("3.3", mode="w")

    mcmcmc.MCMCMC.step_parse(walker=walker, std=3.1)
    assert walker.score_history == [1.12, 3.42, 7.9, 0.91, 3.3]
    assert walker.proposed_score == 3.3
    out, err = capsys.readouterr()
    assert out == "Calling accept() method\n", print(out)

    # Lava walker accepts any score
    tmp_file.write("0.1", mode="w")

    walker.lava = True
    mcmcmc.MCMCMC.step_parse(walker=walker, std=3.1)
    assert walker.score_history == [1.12, 3.42, 7.9, 0.91, 3.3, 0.1]
    out, err = capsys.readouterr()
    assert out == "Calling accept() method\n"

    # Ice walker rejects any lower scores
    tmp_file.write("3.4", mode="w")

    walker.lava = False
    walker.ice = True
    mcmcmc.MCMCMC.step_parse(walker=walker, std=3.1)
    assert walker.score_history == [1.12, 3.42, 7.9, 0.91, 3.3, 0.1, 3.4]
    out, err = capsys.readouterr()
    assert out == ""

    # Do not allow history to grow over 1000 items long
    walker.score_history = [1 for _ in range(1000)]
    assert len(walker.score_history) == 1000
    mcmcmc.MCMCMC.step_parse(walker, 3.1)
    assert len(walker.score_history) == 1000
    assert walker.score_history[-1] == 3.4
Exemple #15
0
def test_chain_swap_hot_cold(monkeypatch, capsys):
    foo_var = SimpleNamespace(draw_random=lambda: True,
                              draw_value=0.1,
                              name="foo",
                              current_value=0.15)
    bar_var = SimpleNamespace(draw_random=lambda: True,
                              draw_value=0.5,
                              name="bar",
                              current_value=0.51)
    lava_foo_var = SimpleNamespace(draw_random=lambda: True,
                                   draw_value=0.1,
                                   name="foo",
                                   current_value=0.222)
    lava_bar_var = SimpleNamespace(draw_random=lambda: True,
                                   draw_value=0.1,
                                   name="bar",
                                   current_value=0.999)
    ice_foo_var = SimpleNamespace(draw_random=lambda: True,
                                  draw_value=0.1,
                                  name="foo",
                                  current_value=0.123)
    ice_bar_var = SimpleNamespace(draw_random=lambda: True,
                                  draw_value=0.1,
                                  name="bar",
                                  current_value=0.321)

    walker1 = SimpleNamespace(
        variables=[foo_var, bar_var],
        lava=False,
        ice=False,
        current_score=35,
        set_heat=lambda heat: print("Setting walker1 heat = %s" % heat))
    walker2 = SimpleNamespace(
        variables=[foo_var, bar_var],
        lava=False,
        ice=False,
        current_score=15,
        set_heat=lambda heat: print("Setting walker2 heat = %s" % heat))
    lavawalker = SimpleNamespace(
        variables=[lava_foo_var, lava_bar_var],
        lava=True,
        ice=False,
        current_score=45,
        set_heat=lambda heat: print("Changing lava_walker heat! Oh Nos!"))
    ice_walker = SimpleNamespace(
        variables=[ice_foo_var, ice_bar_var],
        lava=False,
        ice=True,
        current_score=10,
        set_heat=lambda heat: print("Changing ice_walker heat! Oh Nos!"))

    tmp_file = br.TempFile()

    monkeypatch.setattr(mcmcmc._Chain, "get_best_walker", lambda *_: walker1)
    monkeypatch.setattr(mcmcmc._Chain, "get_cold_walker", lambda *_: walker2)
    monkeypatch.setattr(mcmcmc._Chain, "get_ice_walker", lambda *_: False)

    chain = mcmcmc._Chain(walkers=[walker1, walker2],
                          outfile=tmp_file.path,
                          cold_heat=0.01,
                          hot_heat=0.2)
    chain.swap_hot_cold()
    out, err = capsys.readouterr()
    assert "Setting walker1 heat = 0.01" in out
    assert "Setting walker2 heat = 0.2" in out
    assert chain.best_score_ever_seen == 35

    monkeypatch.setattr(mcmcmc._Chain, "get_best_walker",
                        lambda *_: lavawalker)
    monkeypatch.setattr(mcmcmc._Chain, "get_cold_walker", lambda *_: walker1)

    chain.walkers.append(lavawalker)
    chain.swap_hot_cold()
    out, err = capsys.readouterr()
    assert not out
    assert chain.best_score_ever_seen == 45
    assert foo_var.current_value == 0.222
    assert bar_var.current_value == 0.999

    monkeypatch.setattr(mcmcmc._Chain, "get_ice_walker", lambda *_: ice_walker)

    lavawalker.current_score = 55
    chain.walkers.append(ice_walker)
    chain.swap_hot_cold()
    out, err = capsys.readouterr()
    assert not out
    assert chain.best_score_ever_seen == 55
    assert ice_foo_var.current_value == 0.222
    assert ice_bar_var.current_value == 0.999

    # Ice chain returned as best, but is lower than best ever, so do not copy values
    monkeypatch.setattr(mcmcmc._Chain, "get_best_walker",
                        lambda *_: ice_walker)

    ice_foo_var.current_value = 0.01
    ice_bar_var.current_value = 0.10101

    chain.swap_hot_cold()
    out, err = capsys.readouterr()
    assert not out
    assert chain.best_score_ever_seen == 55
    assert foo_var.current_value == 0.222
    assert bar_var.current_value == 0.999

    # Now give ice walker the best score ever
    monkeypatch.setattr(mcmcmc._Chain, "get_best_walker",
                        lambda *_: ice_walker)

    ice_walker.current_score = 100

    chain.swap_hot_cold()
    out, err = capsys.readouterr()
    assert not out
    assert chain.best_score_ever_seen == 100
    assert foo_var.current_value == 0.01
    assert bar_var.current_value == 0.10101