Esempio n. 1
0
def get_qscore_dataframe(run_folder: Path) -> pd.DataFrame:
    """
    Returns a pandas DataFrame containing x (Q-score), y (Total in millions) values for the run
    """
    df_dict = {}
    x_vals = []
    y_vals = []

    run_metrics = py_interop_run_metrics.run_metrics()
    valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0)
    valid_to_load[py_interop_run.Q] = 1
    run_metrics.read(str(run_folder), valid_to_load)
    bar_data = py_interop_plot.bar_plot_data()
    boundary = 30
    options = py_interop_plot.filter_options(
        run_metrics.run_info().flowcell().naming_method())
    py_interop_plot.plot_qscore_histogram(run_metrics, options, bar_data,
                                          boundary)

    for i in range(bar_data.size() - 1):
        x = [bar_data.at(i).at(j).x() for j in range(bar_data.at(i).size())]
        y = [bar_data.at(i).at(j).y() for j in range(bar_data.at(i).size())]
        x_vals += x
        y_vals += y
    df_dict['x'] = x_vals
    df_dict['y'] = y_vals
    df = pd.DataFrame.from_dict(df_dict)
    return df
Esempio n. 2
0
    def test_invalid_filter_option(self):
        """
        Test that exceptions can be caught and they have the expected message
        """

        run_info = py_interop_run.info()
        options = py_interop_plot.filter_options(py_interop_run.FourDigit)
        try:
            options.validate(py_interop_run.Intensity, run_info)
            self.fail("invalid_filter_option should have been thrown")
        except py_interop_plot.invalid_filter_option as ex:
            self.assertEqual(str(ex).split('\n')[0], "Invalid tile naming method: does not match RunInfo.xml")
Esempio n. 3
0
    def test_plot_qscore_heatmap(self):
        """
        Test that plot_flowcell_map is properly wrapped
        """

        run = py_interop_run_metrics.run_metrics()
        options = py_interop_plot.filter_options(run.run_info().flowcell().naming_method())
        rows = py_interop_plot.count_rows_for_heatmap(run)
        cols = py_interop_plot.count_columns_for_heatmap(run)
        dataBuffer = numpy.zeros((rows, cols), dtype=numpy.float32)
        data = py_interop_plot.heatmap_data()
        try:
            py_interop_plot.plot_qscore_heatmap(run, options, data, dataBuffer.ravel())
        except py_interop_plot.invalid_filter_option: pass
Esempio n. 4
0
    def test_plot_flowcell_map(self):
        """
        Test that plot_flowcell_map is properly wrapped
        """

        run = py_interop_run_metrics.run_metrics()
        options = py_interop_plot.filter_options(run.run_info().flowcell().naming_method())
        bufferSize = py_interop_plot.calculate_flowcell_buffer_size(run, options)
        dataBuffer = numpy.zeros(bufferSize, dtype=numpy.float32)
        idBuffer = numpy.zeros(bufferSize, dtype=numpy.uint32)
        data = py_interop_plot.flowcell_data()
        try:
            py_interop_plot.plot_flowcell_map2(run, py_interop_run.Intensity, options, data, dataBuffer, idBuffer)
        except py_interop_plot.invalid_filter_option:
            pass
Esempio n. 5
0
def plot_percent_base(run_folder: str, output_svg="percent_base.svg"):
    """
    Plots the base % across each cycle. Each line represents a different base.
    Reference lines are added for each read.

    Base %: The percentage of clusters for which the selected base (A, C, T, or G) has been called.
    """
    # Initialize interop objects
    run_metrics = py_interop_run_metrics.run_metrics()
    valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0)
    py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load)

    # Read from the run folder
    run_metrics.read(run_folder, valid_to_load)

    logger.info('Generating % base plot')
    plot_data = py_interop_plot.candle_stick_plot_data()
    options = py_interop_plot.filter_options(run_metrics.run_info().flowcell().naming_method())
    py_interop_plot.plot_by_cycle(run_metrics, "BasePercent", options, plot_data)

    # Plot each base
    for base_index in range(plot_data.size()):
        line_data = plot_data.at(base_index)
        x = [line_data.at(i).x() for i in range(line_data.size())]
        y = [line_data.at(i).y() for i in range(line_data.size())]
        plt.plot(x, y, color=line_data.color(), linewidth=0.5, label=line_data.title())

    # Plot reference lines for reads
    read_vector = run_metrics.run_info().reads()
    for read_index in range(read_vector.size()):
        read_name = f'R{read_vector[read_index].number()}'
        cycle_start = read_vector[read_index].first_cycle()
        plt.axvline(x=cycle_start, color='purple', linestyle='--', linewidth=0.35)
        plt.text(cycle_start, plt.gca().get_ylim()[1], read_name, fontsize=8, color='purple')

    # Plot settings
    axes_data = plot_data.xyaxes()
    plt.xlabel(axes_data.x().label(), fontsize=10)
    plt.ylabel(axes_data.y().label(), fontsize=10)
    plt.title(plot_data.title(), fontsize=10)
    plt.legend()
    plt.ylim([axes_data.y().min(), axes_data.y().max()])
    plt.xlim([axes_data.x().min(), axes_data.x().max()])

    # Save figure
    plt.savefig(output_svg)