Esempio n. 1
0
    def test_count_imaging_table_columns(self):
        """
        Test if imaging logic is properly wrapped
        """

        columns = py_interop_table.imaging_column_vector()
        self.assertEqual(py_interop_table.count_table_columns(columns), 0)
Esempio n. 2
0
    def test_populate_imaging_table(self):
        """
        Test if imaging logic can be properly used
        """

        tmp = numpy.asarray([2,38
            ,7,0,90,4,1,0,-12,-56,15,64,-98,35,12,64,0,0,0,0,0,0,0,0,46,1,17,1,0,0,0,0,96,-41,-104,36,122,-86,-46,-120
            ,7,0,-66,4,1,0,96,-43,14,64,-63,49,13,64,0,0,0,0,0,0,0,0,56,1,17,1,0,0,0,0,112,125,77,38,122,-86,-46,-120
            ,7,0,66,8,1,0,74,-68,6,64,-118,-7,8,64,0,0,0,0,0,0,0,0,93,1,46,1,0,0,0,0,-47,-104,2,40,122,-86,-46,-120],
                            dtype=numpy.uint8)
        run = py_interop_run_metrics.run_metrics()
        py_interop_comm.read_interop_from_buffer(tmp, run.extraction_metric_set())
        self.assertEqual(run.extraction_metric_set().size(), 3)

        reads = py_interop_run.read_info_vector()
        reads.append(py_interop_run.read_info(1, 1, 26))
        reads.append(py_interop_run.read_info(2, 27, 76))
        run.run_info(py_interop_run.info(
            py_interop_run.flowcell_layout(2, 2, 2, 16),
            reads
        ))
        run.legacy_channel_update(py_interop_run.HiSeq)
        columns = py_interop_table.imaging_column_vector()
        py_interop_table.create_imaging_table_columns(run, columns)
        row_offsets = py_interop_table.map_id_offset()
        py_interop_table.count_table_rows(run, row_offsets)
        column_count = py_interop_table.count_table_columns(columns)
        data = numpy.zeros((len(row_offsets), column_count), dtype=numpy.float32)
        py_interop_table.populate_imaging_table_data(run, columns, row_offsets, data.ravel())
        self.assertEqual(data[0, 0], 7)
Esempio n. 3
0
    def test_invalid_column_type(self):
        """
        Test that exceptions can be caught and they have the expected message
        """

        channels = py_interop_run.string_vector()
        filled = py_interop_run.bool_vector()
        columns = py_interop_table.imaging_column_vector()
        try:
            py_interop_table.create_imaging_table_columns(channels, filled, columns, 8, py_interop_run.SquareLayout)
            self.fail("invalid_column_type should have been thrown")
        except py_interop_table.invalid_column_type as ex:
            self.assertEqual(str(ex).split('\n')[0], "Filled vector does not match number of column names")
Esempio n. 4
0
def get_percent_occupied_by_lane(run_folder_path):
    df = pd.DataFrame
    for item in NOVASEQ:
        if 'myrun' not in run_folder_path.lower() and item.lower(
        ) in run_folder_path.lower():
            valid_to_load = py_interop_run.uchar_vector(
                py_interop_run.MetricCount, 0)
            valid_to_load[py_interop_run.ExtendedTile] = 1
            valid_to_load[py_interop_run.Tile] = 1
            valid_to_load[py_interop_run.Extraction] = 1
            run_metrics = py_interop_run_metrics.run_metrics()
            run_metrics.read(run_folder_path, valid_to_load)
            columns = py_interop_table.imaging_column_vector()
            py_interop_table.create_imaging_table_columns(run_metrics, columns)
            headers = get_headers(columns, run_folder_path)
            column_count = py_interop_table.count_table_columns(columns)
            row_offsets = py_interop_table.map_id_offset()
            py_interop_table.count_table_rows(run_metrics, row_offsets)
            data = np.zeros((row_offsets.size(), column_count),
                            dtype=np.float32)
            py_interop_table.populate_imaging_table_data(
                run_metrics, columns, row_offsets, data.ravel())

            header_subset = ["Lane", "Tile", "Cycle", "% Occupied"]
            header_index = [(header, headers.index(header))
                            for header in header_subset]
            ids = np.asarray(
                [headers.index(header) for header in header_subset[:3]])

            data_for_selected_header_subset = []
            for label, col in header_index:
                data_for_selected_header_subset.append(
                    (label,
                     pd.Series([val for val in data[:, col]],
                               index=[tuple(r) for r in data[:, ids]])))

            df = pd.DataFrame.from_dict(dict(data_for_selected_header_subset))
    return df
Esempio n. 5
0
def plot_occupancy(run_folder: str, output_jpg_prefix="occupancy"):
    """
    To optimize loading concentrations on the NovaSeq platform, the % Occupied and % Pass Filter
    metrics can be plotted to determine if a run was underloaded, optimally loaded, or overloaded.

    More information:
    https://support.illumina.com/bulletins/2020/03/plotting---occupied-by---pass-filter-to-optimize-loading-concent.html
    """

    # Initialize interop objects
    run_metrics = py_interop_run_metrics.run_metrics()
    valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0)
    valid_to_load[py_interop_run.ExtendedTile] = 1
    valid_to_load[py_interop_run.Tile] = 1
    valid_to_load[py_interop_run.Extraction] = 1

    # Read from the run folder
    run_metrics.read(run_folder, valid_to_load)

    # Create the columns
    columns = py_interop_table.imaging_column_vector()
    py_interop_table.create_imaging_table_columns(run_metrics, columns)

    headers = []
    for i in range(columns.size()):
        column = columns[i]
        if column.has_children():
            headers.extend(
                [f"{column.name()} ({subname})" for subname in column.subcolumns()])
        else:
            headers.append(column.name())

    column_count = py_interop_table.count_table_columns(columns)
    row_offsets = py_interop_table.map_id_offset()
    py_interop_table.count_table_rows(run_metrics, row_offsets)
    data = np.zeros((row_offsets.size(), column_count), dtype=np.float32)
    py_interop_table.populate_imaging_table_data(
        run_metrics, columns, row_offsets, data.ravel()
    )

    # Make a DataFrame
    df = pd.DataFrame(data, columns=headers)

    # Skip if there is no data (% Occupied only available on NovaSeq)
    if df.shape[0] == 0 or "% Occupied" not in df:
        # Stop
        print("Occupancy plot skipped, no data available")
        return

    x = "% Occupied"
    y = "% Pass Filter"
    hues = ["Tile", "Lane", "Cycle"]

    # Make a few different types of plots
    for hue in hues:
        sns.scatterplot(
            data=df,
            x=x,
            y=y,
            hue=hue,
            alpha=0.5,
            linewidth=0,
        )
        plt.xlim([0, 100])
        plt.ylim([0, 100])
        plt.legend(title=hue, bbox_to_anchor=[1.2, 0.9])
        plt.tight_layout()
        plt.savefig(f"{output_jpg_prefix}_{hue.lower()}.jpg", dpi=600)
        plt.close()