Ejemplo n.º 1
0
    def test_get_mit_pc1(self):
        """returns the correct MI terms when the pseudocount=1"""
        mit = get_mit(self.ref_data, pseudocount=1,
                      check_valid=True).round(decimals=5)

        expect_mit = array([[0.02377, 0.02377], [0, -0.02388],
                            [0.09895, -0.01639], [0.09895, 0.22805]])

        numpy.testing.assert_equal(mit, expect_mit)
Ejemplo n.º 2
0
    def test_get_mit_pc0(self):
        """returns the correct MI terms when the pseudocount=0"""
        mit = get_mit(self.ref_data, pseudocount=0,
                      check_valid=True).round(decimals=5)

        expect_mit = array([[0.03561, 0.03561], [0.05782, 0],
                            [0.16781, -0.02109], [0.16781, 0.0]])

        numpy.testing.assert_equal(mit, expect_mit)
Ejemplo n.º 3
0
def mi(json_path, plot_cfg, no_type3, figpath, format, sample_size,
       force_overwrite, dry_run):
    """draws conventional sequence logo, using MI, from first order effects"""
    # the following is for logging
    json_path = util.abspath(json_path)
    args = locals()
    if no_type3:
        util.exclude_type3_fonts()

    if not figpath:
        dirname = os.path.dirname(json_path)
        figpath = os.path.join(dirname, "MI.%s" % format)
        log_file_path = os.path.join(dirname, "MI.log")
    else:
        figpath = util.abspath(figpath)
        log_file_path = "%s.log" % ".".join(figpath.split(".")[:-1])

    LOGGER.log_file_path = log_file_path

    if plot_cfg:
        LOGGER.input_file(plot_cfg)

    LOGGER.log_message(str(args), label='vars')

    data = util.load_loglin_stats(json_path)
    positions = list(data.keys())
    positions.sort()
    num_pos = len(positions) + 1
    mp = num_pos // 2
    counts_array = numpy.zeros((4, num_pos), int)
    for i, pos in enumerate(positions):
        if i >= mp:
            i += 1
        pos_stats = data[pos]['stats']
        counts = pos_stats[pos_stats['mut'] == 'M'][["base", "count"]]
        counts = dict(zip(counts['base'], counts['count']))
        for base in counts:
            base_index = DNA.alphabet.index(base)
            counts_array[base_index, i] = counts[base]

    freq_matrix = entropy.counts_to_freq_matrix(counts_array)
    mit = entropy.get_mit(freq_matrix, freq_matrix=True)
    mi = mit.sum(axis=0)
    char_hts = get_mi_char_heights(numpy.fabs(mit), mi)

    plot_cfg = util.get_plot_configs(cfg_path=plot_cfg)
    figsize = plot_cfg.get('1-way plot', 'figsize')
    ytick_font = plot_cfg.get('1-way plot', 'ytick_fontsize')
    xtick_font = plot_cfg.get('1-way plot', 'xtick_fontsize')
    ylabel_font = plot_cfg.get('1-way plot', 'ylabel_fontsize')
    xlabel_font = plot_cfg.get('1-way plot', 'xlabel_fontsize')
    fig = logo.draw_multi_position(char_hts.T,
                                   characters=[list(DNA)] * num_pos,
                                   position_indices=list(range(num_pos)),
                                   figsize=figsize,
                                   figwidth=figsize[0],
                                   xtick_fontsize=xtick_font,
                                   ytick_fontsize=ytick_font,
                                   sort_data=True)

    ax = fig.gca()
    ax.tick_params(axis='y', labelsize=ytick_font)
    ax.tick_params(axis='x', labelsize=xtick_font)
    ax.set_ylabel("MI", fontsize=ylabel_font)
    ax.set_xlabel("Position", fontsize=xlabel_font)
    fig.tight_layout()
    fig.savefig(figpath)
    LOGGER.output_file(figpath)
    click.secho("Wrote %s" % figpath, fg="green")