예제 #1
0
def main():
    """ Retrieve run folder paths from the command line
     Ensure only metrics required for summary are loaded
     Load the run metrics
     Calculate the summary metrics
     Display error by lane, read
     """
    logging.basicConfig(level=logging.INFO)

    run_metrics = py_interop_run_metrics.run_metrics()
    summary = py_interop_summary.run_summary()

    valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0)
    py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load)

    for run_folder_path in sys.argv[1:]:
        run_folder = os.path.basename(run_folder_path)
        try:
            run_metrics.read(run_folder_path, valid_to_load)
        except ex:
            logging.warn("Skipping - cannot read RunInfo.xml: %s - %s" %
                         (run_folder, str(ex)))
            continue
        py_interop_summary.summarize_run_metrics(run_metrics, summary)
        error_rate_read_lane_surface = numpy.zeros(
            (summary.size(), summary.lane_count(), summary.surface_count()))
        for read_index in range(summary.size()):
            for lane_index in range(summary.lane_count()):
                for surface_index in range(summary.surface_count()):
                    error_rate_read_lane_surface[read_index, lane_index, surface_index] = \
                        summary.at(read_index).at(lane_index).at(surface_index).error_rate().mean()
                    logging.info("Run Folder: " + run_folder)
        for read_index in range(summary.size()):
            read_summary = summary.at(read_index)
            logging.info("Read " + str(read_summary.read().number()))
예제 #2
0
    def run(self):
        run_metrics = py_interop_run_metrics.run_metrics()
        run_metrics.run_info()

        valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount,
                                                    0)
        py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load)
        run_metrics.read(self.runfolder, valid_to_load)

        summary = py_interop_summary.run_summary()
        py_interop_summary.summarize_run_metrics(run_metrics, summary)

        lanes = summary.lane_count()
        reads = self.get_non_index_reads(summary)
        for lane in range(lanes):
            # The interop library uses zero based indexing, however most people uses read 1/2
            # to denote the different reads, this enumeration is used to transform from
            # zero based indexing to this form. /JD 2017-10-27
            for new_read_nbr, original_read_nbr in enumerate(reads):
                read = summary.at(original_read_nbr).at(lane)
                error_rate = read.error_rate().mean()
                percent_q30 = read.percent_gt_q30()
                self._send_to_subscribers(("error_rate", {
                    "lane": lane + 1,
                    "read": new_read_nbr + 1,
                    "error_rate": error_rate
                }))
                self._send_to_subscribers(("percent_q30", {
                    "lane": lane + 1,
                    "read": new_read_nbr + 1,
                    "percent_q30": percent_q30
                }))
예제 #3
0
def parse_single_json(run_folder_path, xrange=None):
    run_folder = os.path.basename(run_folder_path)
    run_metrics = py_interop_run_metrics.run_metrics()
    summary = py_interop_summary.run_summary()
    valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0)
    py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load)
    percent_occupied_df = pd.DataFrame
    if 'MyRun' not in str(run_folder_path):
        percent_occupied_df = get_percent_occupied_by_lane(run_folder_path)
    summary_dict = {}
    try:
        run_metrics.read(run_folder_path, valid_to_load)
        py_interop_summary.summarize_run_metrics(run_metrics, summary)
        logging.info("Read: {}, Lane: {}".format(summary.size(),
                                                 summary.lane_count()))
        for read_index in range(summary.size()):
            logging.info("Read {}".format(read_index + 1))
            summary_dict.setdefault("run", run_folder)
            for lane_index in range(summary.lane_count()):
                read_summary = summary.at(read_index)
                lane_summary = read_summary.at(lane_index)
                summary_dict.setdefault("data", []).append({
                    "runname":
                    parse_runid(run_folder),
                    "read":
                    read_summary.read().number(),
                    "lane":
                    lane_summary.lane(),
                    "density":
                    parse_float(lane_summary.density().mean() / 1000),
                    "density_stddev":
                    parse_float(lane_summary.density().stddev() / 1000),
                    "clusterpf":
                    parse_float(lane_summary.percent_pf().mean()),
                    "clusterpf_stddev":
                    parse_float(lane_summary.percent_pf().stddev()),
                    "readsm":
                    parse_float(lane_summary.reads() / 1000000),
                    "readspfm":
                    parse_float(lane_summary.reads_pf() / 1000000),
                    "q30":
                    parse_float(lane_summary.percent_gt_q30()),
                    "aligned":
                    parse_float(lane_summary.percent_aligned().mean()),
                    "aligned_stddev":
                    parse_float(lane_summary.percent_aligned().stddev()),
                    "errorrate":
                    parse_float(lane_summary.error_rate().mean()),
                    "errorrate_stddev":
                    parse_float(lane_summary.error_rate().stddev()),
                    "percent_occupied":
                    parse_float(
                        parse_lane_occupancy(lane_index, percent_occupied_df))
                })
        return summary_dict
    except Exception as ex:
        logging.warn("Skipping - ERROR: %s - %s" % (run_folder, str(ex)))
예제 #4
0
def get_qc_run_summary(ngs_folder_path):

	run_folder = ngs_folder_path

	run_metrics = py_interop_run_metrics.run_metrics()
	valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0)
	py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load)

	run_folder = run_metrics.read(run_folder, valid_to_load)

	summary = py_interop_summary.run_summary()
	py_interop_summary.summarize_run_metrics(run_metrics, summary)

	return summary
예제 #5
0
    def __init__(self, run_folder_path):
        # Initialize interop objects
        self.run_metrics = py_interop_run_metrics.run_metrics()
        valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0)
        py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load)

        # Read from run folder
        self.run_metrics.read(run_folder_path, valid_to_load)

        # Load up summary metrics
        self.summary = py_interop_summary.run_summary()
        py_interop_summary.summarize_run_metrics(self.run_metrics, self.summary)

        # Cached result tables for subsequent calls
        self.run_summary_df = None
        self.read_summary_dfs = {}
예제 #6
0
def parse(run_folder, dictionary):
    run_metrics = py_interop_run_metrics.run_metrics()
    valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0)
    py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load)
    run_folder = run_metrics.read(run_folder, valid_to_load)
    summary = py_interop_summary.run_summary()
    py_interop_summary.summarize_run_metrics(run_metrics, summary)
    # Parse data from interop files -- % reads over Q30, cluster density, clusters passing filter
    dictionary["Percent Q30"] = round(summary.total_summary().percent_gt_q30(),
                                      2)
    dictionary["Cluster density"] = round(
        summary.at(0).at(0).density().mean() / 1000, 2)
    dictionary["Percent PF"] = round(
        summary.at(0).at(0).percent_pf().mean(), 2)
    dictionary["Phasing"] = round(summary.at(0).at(0).phasing().mean(), 2)
    dictionary["Prephasing"] = round(
        summary.at(0).at(0).prephasing().mean(), 2)
    dictionary["Error rate"] = round(summary.total_summary().error_rate(), 2)
    dictionary["Aligned"] = round(summary.total_summary().percent_aligned(), 2)
예제 #7
0
def get_qc_run_summary(ngs_folder_path):
	"""
	Creates the InterOp summary object.

	Input:

	ngs_folder_path = The Illumina folder containing the QC data. Should contain the following:

		1) InterOp/ containing *MetricsOut.bin files.
		2) RunInfo.xml
		3) RunParameters.xml

	Output:

	Returns False if an error occurs otherwise:

	summary = The InterOp simmary object.



	See https://github.com/Illumina/interop/blob/master/docs/src/Tutorial_01_Intro.ipynb

	"""

	try:

		run_folder = ngs_folder_path

	except:

		return False #An error has occured.

	run_metrics = py_interop_run_metrics.run_metrics()
	valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0)
	py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load)

	run_folder = run_metrics.read(run_folder, valid_to_load)

	summary = py_interop_summary.run_summary()
	py_interop_summary.summarize_run_metrics(run_metrics, summary)

	return summary
예제 #8
0
파일: phix.py 프로젝트: carden24/OLCTools
 def interop_parse(self):
     """
     Use interop to parse the files in the InterOp folder to extract the number of reads mapping to PhiX as well as
     the error rate
     """
     # Parse the files and load the data
     run_metrics = py_interop_run_metrics.run_metrics()
     valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount,
                                                 0)
     py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load)
     run_metrics.read(self.path, valid_to_load)
     summary = py_interop_summary.run_summary()
     py_interop_summary.summarize_run_metrics(run_metrics, summary)
     # PhiX error rate for run over all "usable cycles"
     errorrate = summary.total_summary().error_rate()
     # Percent aligned PhiX
     pctaligned = summary.total_summary().percent_aligned()
     # Add the error rate and the percent of reads that align to PhiX to the metadata object
     for sample in self.metadata:
         sample.run.error_rate = '{:.2f}'.format(errorrate)
         sample.run.phix_aligned = '{:.2f}'.format(pctaligned)
예제 #9
0
def parse_illumina_interop(run_dir):
    # Read interop
    run_metrics = py_interop_run_metrics.run_metrics()
    valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0)
    py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load)
    try:
        run_metrics.read(run_dir, valid_to_load)
    except Exception:
        sys.stderr.write("Cannot parse information in InterOp")
        sys.exit(2)
    summary = py_interop_summary.run_summary()
    py_interop_summary.summarize_run_metrics(run_metrics, summary)
    lanes = summary.lane_count()
    reads = summary.size()
    # Parse the interop stats lane by lane for non-index reads
    run_stats_summary = dict()
    for lane in range(lanes):
        lane_nbr = getattr(summary.at(0).at(lane), "lane")()
        for read in range(reads):
            if not summary.at(read).read().is_index():
                stats = dict()
                stats.update({"density"               :  getattr(summary.at(read).at(lane), "density")().mean()/1000})
                stats.update({"error_rate"            :  getattr(summary.at(read).at(lane), "error_rate")().mean()})
                stats.update({"first_cycle_intensity" :  getattr(summary.at(read).at(lane), "first_cycle_intensity")().mean()})
                stats.update({"percent_aligned"       :  getattr(summary.at(read).at(lane), "percent_aligned")().mean()})
                stats.update({"percent_gt_q30"        :  getattr(summary.at(read).at(lane), "percent_gt_q30")()})
                stats.update({"percent_pf"            :  getattr(summary.at(read).at(lane), "percent_pf")().mean()})
                stats.update({"phasing"               :  getattr(summary.at(read).at(lane), "phasing")().mean()})
                stats.update({"prephasing"            :  getattr(summary.at(read).at(lane), "prephasing")().mean()})
                stats.update({"reads_pf"              :  getattr(summary.at(read).at(lane), "reads_pf")()})
                stats.update({"yield_g"               :  getattr(summary.at(read).at(lane), "yield_g")()})
                if lane_nbr not in list(run_stats_summary.keys()):
                    run_stats_summary.update({lane_nbr: {read: stats}})
                else:
                    run_stats_summary[lane_nbr].update({read : stats})
    return run_stats_summary
예제 #10
0
#    run_dir_all = glob.glob("/data/runScratch.boston/demultiplexed/*/*/{}".format(run_id))
    run_dir_all = glob.glob("/data/runScratch.boston/nova-interop-temp-marius/{}".format(run_id))
    if not run_dir_all:
        print("No run folder for", run_id)
    else:
        run_dir = run_dir_all[0]
        try: # Ignore parsing error, to not disturb the sequencer integrations

            # Parse InterOp data
            valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0)
            py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load)
            valid_to_load[py_interop_run.ExtendedTile] = 1
            run_metrics = py_interop_run_metrics.run_metrics()
            run_metrics.read(run_dir, valid_to_load)
            summary = py_interop_summary.run_summary()
            py_interop_summary.summarize_run_metrics(run_metrics, summary)
            extended_tile_metrics = run_metrics.extended_tile_metric_set()

            read_count = summary.size()
            lane_count = summary.lane_count()

            if lane_count != len(lane_artifacts):
                raise RuntimeError("Error: Number of lanes in InterOp data: {}, does not match the number "
                    "of lanes in LIMS: {}.".format(lane_count, len(lane_artifacts)))

            result = {}
            phix_pct = [] # We report PhiX % per read R1 / R2 (non-index)
            for lane_number, artifact in lane_artifacts.items():
                lane_index = lane_number - 1
                nonindex_read_count = 0
                for read in range(read_count):
def parsing_run_metrics_files(local_run_metric_folder, run_process_obj,
                              experiment_name):
    '''
    Description:
        The function parse the information from the run metric files
    Input:
        local_run_metric_folder   # local folder with the run metric files
        run_process_obj           # RunProcess object for this run
        experiment_name           # experiment name
    Import:
        py_interop_run
        py_interop_run_metrics
    Variables:
        bin_run_stats_summary_list # list of dictionnary with the summary
                                    information
        run_stats_read_list  # list of dictionnary with the read information
    Return:
        bin_run_stats_summary_list, run_stats_read_list
    '''
    logger = logging.getLogger(__name__)
    logger.debug('%s : Starting function parsing_run_metrics', experiment_name)
    run_param_obj = RunningParameters.objects.get(runName_id=run_process_obj)
    # get the number of lanes for the run
    number_of_lanes = int(run_param_obj.get_number_of_lanes())
    # get number of reads for the run
    num_of_reads = run_param_obj.get_number_of_reads()
    logger.info('%s : Fetched run information  needed for running metrics',
                experiment_name)

    run_metrics = py_interop_run_metrics.run_metrics()
    valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0)
    py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load)
    run_metric_folder = run_metrics.read(local_run_metric_folder)

    summary = py_interop_summary.run_summary()

    py_interop_summary.summarize_run_metrics(run_metrics, summary)

    bin_run_stats_summary_list = []
    logger.info('%s : Starts collecting data for run metric ', experiment_name)
    # get the Run Summary for each Read
    for read_level in range(num_of_reads):
        run_summary_stats_level = {}
        # summary yield total
        run_summary_stats_level['yieldTotal'] = format(
            summary.at(read_level).summary().yield_g(), '.3f')
        # summary projected total yield
        run_summary_stats_level['projectedTotalYield'] = format(
            summary.at(read_level).summary().projected_yield_g(), '.3f')

        # percent yield
        run_summary_stats_level['aligned'] = format(
            summary.at(read_level).summary().percent_aligned(), '.3f')
        # Error rate
        run_summary_stats_level['errorRate'] = format(
            summary.at(read_level).summary().error_rate(), '.3f')
        # intensity cycle 1
        run_summary_stats_level['intensityCycle'] = str(
            round(summary.at(read_level).summary().first_cycle_intensity()))
        # Q30
        run_summary_stats_level['biggerQ30'] = format(
            summary.at(read_level).summary().percent_gt_q30(), '.3f')

        run_summary_stats_level['level'] = str(read_level + 1)

        bin_run_stats_summary_list.append(run_summary_stats_level)
    logger.info('%s : Parsed run Metrics on summary level ', experiment_name)

    # get the run summary for Total
    run_summary_stats_level = {}
    # total summary
    run_summary_stats_level['yieldTotal'] = format(
        summary.total_summary().yield_g(), '.3f')
    # total projected_yield_g
    run_summary_stats_level['projectedTotalYield'] = format(
        summary.total_summary().projected_yield_g(), '.3f')
    # total percent aligned
    run_summary_stats_level['aligned'] = format(
        summary.total_summary().percent_aligned(), '.3f')
    # total error rate
    run_summary_stats_level['errorRate'] = format(
        summary.total_summary().error_rate(), '.3f')
    # total intensity cycle
    run_summary_stats_level['intensityCycle'] = str(
        round(summary.total_summary().first_cycle_intensity()))
    # total Q 30
    run_summary_stats_level['biggerQ30'] = format(
        summary.total_summary().percent_gt_q30(), '.3f')

    run_summary_stats_level['level'] = 'Total'

    logger.info('%s : Parsed run Metrics on Total lane', experiment_name)

    bin_run_stats_summary_list.append(run_summary_stats_level)

    # get the run summary for non index
    run_summary_stats_level = {}
    # non index yield
    run_summary_stats_level['yieldTotal'] = format(
        summary.nonindex_summary().yield_g(), '.3f')
    #  non index projected yield
    run_summary_stats_level['projectedTotalYield'] = format(
        summary.nonindex_summary().projected_yield_g(), '.3f')

    # non index percent aligned
    run_summary_stats_level['aligned'] = format(
        summary.nonindex_summary().percent_aligned(), '.3f')
    # non index percent error rate
    run_summary_stats_level['errorRate'] = format(
        summary.nonindex_summary().error_rate(), '.3f')
    # non index intensity cycle
    run_summary_stats_level['intensityCycle'] = str(
        round(summary.nonindex_summary().first_cycle_intensity()))
    # non index Q 30
    run_summary_stats_level['biggerQ30'] = format(
        summary.nonindex_summary().percent_gt_q30(), '.3f')

    run_summary_stats_level['level'] = 'Non Index'
    logger.info('%s : Parsed run metric for Non Index lane', experiment_name)

    bin_run_stats_summary_list.append(run_summary_stats_level)

    ### information per reads
    run_stats_read_list = []
    #lan_summary= py_interop_summary.lane_summary()
    # Tiles
    for read_number in range(num_of_reads):
        for lane_number in range(number_of_lanes):
            logger.info(
                '%s : Processing run metrics stats on Read %s and on Lane %s',
                experiment_name, read_number, lane_number)
            run_read_stats_level = {}
            run_read_stats_level['tiles'] = str(
                int(summary.at(read_number).at(lane_number).tile_count()) * 2)
            # Density (k/mm2) divide the value by 1000 to have it K/mm2
            # get the +/- with the steddev
            try:
                read_lane_density_mean = str(
                    round(
                        float(
                            summary.at(read_number).at(
                                lane_number).density().mean()) / 1000))
                read_lane_density_stddev = str(
                    round(
                        float(
                            summary.at(read_number).at(
                                lane_number).density().stddev()) / 1000))
            except:
                read_lane_density_mean = 'NaN'
                read_lane_density_stddev = 'NaN'
                string_message = experiment_name + ' : Unable to convert to float '
                logging_warnings(string_message, False)
            run_read_stats_level[
                'density'] = read_lane_density_mean + '  ' + chr(
                    177) + '  ' + read_lane_density_stddev
            # cluster _pf  in %
            try:
                read_lane_percent_pf_mean = format(
                    summary.at(read_number).at(
                        lane_number).percent_pf().mean(), '.3f')
                read_lane_percent_pf_stddev = format(
                    summary.at(read_number).at(
                        lane_number).percent_pf().stddev(), '.3f')
            except:
                read_lane_percent_pf_mean = 'NaN'
                read_lane_percent_pf_stddev = 'NaN'
                string_message = experiment_name + ' : Unable to format to float read_lane_percent_pf'
                logging_warnings(string_message, False)
            run_read_stats_level[
                'cluster_PF'] = read_lane_percent_pf_mean + '  ' + chr(
                    177) + '  ' + read_lane_percent_pf_stddev
            # phas/ prepas in %
            try:
                read_lane_phasing_mean = format(
                    summary.at(read_number).at(lane_number).phasing().mean(),
                    '.3f')
                read_lane_phasing_dev = format(
                    summary.at(read_number).at(lane_number).phasing().stddev(),
                    '.1f')
                read_lane_prephasing_mean = format(
                    summary.at(read_number).at(
                        lane_number).prephasing().mean(), '.3f')
                read_lane_prephasing_stddev = format(
                    summary.at(read_number).at(
                        lane_number).prephasing().stddev(), '.3f')
            except:
                read_lane_phasing_mean, read_lane_phasing_dev, read_lane_prephasing_mean, read_lane_prephasing_stddev = 'NaN', 'NaN', 'NaN', 'NaN'
                string_message = experiment_name + ' : Unable to format to float read_lane_phasing'
                logging_warnings(string_message, False)
            run_read_stats_level['phas_prephas'] = read_lane_phasing_mean + '  ' + chr(
                177
            ) + '  ' + read_lane_phasing_dev + '  /  ' + read_lane_prephasing_mean + '  ' + chr(
                177) + '  ' + read_lane_prephasing_stddev
            # reads (M)
            try:
                run_read_stats_level['reads'] = format(
                    float(summary.at(read_number).at(lane_number).reads()) /
                    1000000, '.3f')
            except:
                run_read_stats_level['reads'] = 'NaN'
                string_message = experiment_name + ' : Unable to format to float run_read_stats_level[reads]'
                logging_warnings(string_message, False)
            #reads PF (M)
            try:
                run_read_stats_level['reads_PF'] = format(
                    float(summary.at(read_number).at(lane_number).reads_pf()) /
                    1000000, '.3f')
            except:
                run_read_stats_level['reads_PF'] = 'NaN'
                string_message = experiment_name + ' : Unable to format to float run_read_stats_level[reads_PF]'
                logging_warnings(string_message, False)
            # percent q30
            try:
                run_read_stats_level['q30'] = format(
                    summary.at(read_number).at(lane_number).percent_gt_q30(),
                    '.3f')
            except:
                run_read_stats_level['q30'] = 'NaN'
                string_message = experiment_name + ' : Unable to format to float run_read_stats_level[q30]'
                logging_warnings(string_message, False)
            # yield _g
            try:
                run_read_stats_level['yields'] = format(
                    summary.at(read_number).at(lane_number).yield_g(), '.3f')
            except:
                run_read_stats_level['yields'] = 'NaN'
                string_message = experiment_name + ' : Unable to format to float run_read_stats_level[yields]'
                logging_warnings(string_message, False)
            # cycles err Rate
            try:
                run_read_stats_level['cyclesErrRated'] = str(
                    summary.at(read_number).at(lane_number).cycle_state().
                    error_cycle_range().first_cycle())
            except:
                run_read_stats_level['cyclesErrRated'] = 'NaN'
                string_message = experiment_name + ' : Unable to format to float run_read_stats_level[cyclesErrRated]'
                logging_warnings(string_message, False)
            #percent_aligned
            try:
                read_lane_percent_aligned_mean = format(
                    summary.at(read_number).at(
                        lane_number).percent_aligned().mean(), '.3f')
                read_lane_percent_aligned_stddev = format(
                    summary.at(read_number).at(
                        lane_number).percent_aligned().stddev(), '3f')
            except:
                read_lane_percent_aligned_mean, read_lane_percent_aligned_stddev = 'NaN', 'NaN'
                string_message = experiment_name + ' : Unable to format to float read_lane_percent_aligned_mean'
                logging_warnings(string_message, False)
            run_read_stats_level[
                'aligned'] = read_lane_percent_aligned_mean + '  ' + chr(
                    177) + '  ' + read_lane_percent_aligned_stddev
            #error rate
            try:
                read_lane_error_rate_mean = format(
                    summary.at(read_number).at(
                        lane_number).error_rate().mean(), '.3f')
                read_lane_error_rate_stddev = format(
                    summary.at(read_number).at(
                        lane_number).error_rate().stddev(), '.3f')
            except:
                read_lane_error_rate_mean, read_lane_error_rate_stddev = 'NaN', 'NaN'
                string_message = experiment_name + ' : Unable to format to float read_lane_error_rate_mean'
                logging_warnings(string_message, False)
            run_read_stats_level[
                'errorRate'] = read_lane_error_rate_mean + '  ' + chr(
                    177) + '  ' + read_lane_error_rate_stddev
            #error rate_35
            try:
                read_lane_error_rate_35_mean = format(
                    summary.at(read_number).at(
                        lane_number).error_rate_35().mean(), '.3f')
                read_lane_error_rate_35_stddev = format(
                    summary.at(read_number).at(
                        lane_number).error_rate_35().stddev(), '.3f')
            except:
                read_lane_error_rate_35_mean, read_lane_error_rate_35_stddev = 'NaN', 'NaN'
                string_message = experiment_name + ' : Unable to format to float read_lane_error_rate_35_mean'
                logging_warnings(string_message, False)
            run_read_stats_level[
                'errorRate35'] = read_lane_error_rate_35_mean + '  ' + chr(
                    177) + '  ' + read_lane_error_rate_35_stddev
            #error rate 50
            try:
                read_lane_error_rate_50_mean = format(
                    summary.at(read_number).at(
                        lane_number).error_rate_50().mean(), '.3f')
                read_lane_error_rate_50_stddev = format(
                    summary.at(read_number).at(
                        lane_number).error_rate_50().stddev(), '.3f')
            except:
                read_lane_error_rate_50_mean, read_lane_error_rate_50_stddev = 'NaN', 'NaN'
                string_message = experiment_name + ' : Unable to format to float read_lane_error_rate_50_mean'
                logging_warnings(string_message, False)
            run_read_stats_level[
                'errorRate50'] = read_lane_error_rate_50_mean + '  ' + chr(
                    177) + '  ' + read_lane_error_rate_50_stddev
            #error rate 75
            try:
                read_lane_error_rate_75_mean = format(
                    summary.at(read_number).at(
                        lane_number).error_rate_75().mean(), '.3f')
                read_lane_error_rate_75_stddev = format(
                    summary.at(read_number).at(
                        lane_number).error_rate_75().stddev(), '.3f')
            except:
                read_lane_error_rate_75_mean, read_lane_error_rate_75_stddev = 'NaN', 'NaN'
                string_message = experiment_name + ' : Unable to format to float read_lane_error_rate_75_mean'
                logging_warnings(string_message, False)
            run_read_stats_level[
                'errorRate75'] = read_lane_error_rate_75_mean + '  ' + chr(
                    177) + '  ' + read_lane_error_rate_75_stddev
            #error rate 100
            try:
                read_lane_error_rate_100_mean = format(
                    summary.at(read_number).at(
                        lane_number).error_rate_100().mean(), '.3f')
                read_lane_error_rate_100_stddev = format(
                    summary.at(read_number).at(
                        lane_number).error_rate_100().stddev(), '.3f')
            except:
                read_lane_error_rate_100_mean, read_lane_error_rate_100_stddev = 'NaN', 'NaN'
                string_message = experiment_name + ' : Unable to format to float read_lane_error_rate_100_mean'
                logging_warnings(string_message, False)
            run_read_stats_level[
                'errorRate100'] = read_lane_error_rate_100_mean + '  ' + chr(
                    177) + '  ' + read_lane_error_rate_100_stddev
            # intensity cycle 1
            try:
                read_lane_intensity_cycle_mean = format(
                    summary.at(read_number).at(
                        lane_number).first_cycle_intensity().mean(),
                    '.3f')  # get tiles for read 1 and lane 1
                read_lane_intensity_cycle_stddev = format(
                    summary.at(read_number).at(
                        lane_number).first_cycle_intensity().stddev(), '.3f')
            except:
                read_lane_intensity_cycle_mean, read_lane_intensity_cycle_stddev = 'NaN', 'NaN'
                string_message = experiment_name + ' : Unable to format to float read_lane_intensity_cycle_mean'
                logging_warnings(string_message, False)
            run_read_stats_level[
                'intensityCycle'] = read_lane_intensity_cycle_mean + '  ' + chr(
                    177) + '  ' + read_lane_intensity_cycle_stddev

            run_read_stats_level['read'] = str(read_number + 1)
            run_read_stats_level['lane'] = str(lane_number + 1)
            # append run_read_stats_level information to run_stats_read_list
            run_stats_read_list.append(run_read_stats_level)

    logger.debug('%s : End function parsing_run_metrics', experiment_name)
    return bin_run_stats_summary_list, run_stats_read_list
예제 #12
0
def parse_interop_data(run_folder_dir, num_reads, num_lanes):
    """
	Parses summary statistics out of interops data using the Illumina interops package
	"""

    # make empty dict to store output
    interop_dict = {'read_summaries': {}}

    # taken from illumina interops package documentation, all of this is required,
    # even though only the summary variable is used further on
    run_metrics = py_interop_run_metrics.run_metrics()
    valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0)
    py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load)
    run_folder = run_metrics.read(run_folder_dir, valid_to_load)
    summary = py_interop_summary.run_summary()
    py_interop_summary.summarize_run_metrics(run_metrics, summary)

    for read in range(num_reads):

        new_read = read + 1

        if new_read not in interop_dict['read_summaries']:

            interop_dict['read_summaries'][new_read] = {}

        for lane in range(num_lanes):

            new_lane = lane + 1

            if new_lane not in interop_dict['read_summaries'][new_read]:

                interop_dict['read_summaries'][new_read][new_lane] = {}

            interop_dict['read_summaries'][read + 1][
                lane +
                1]['percent_q30'] = summary.at(read).at(lane).percent_gt_q30()
            interop_dict['read_summaries'][read + 1][
                lane +
                1]['density'] = summary.at(read).at(lane).density().mean()
            interop_dict['read_summaries'][read + 1][lane + 1][
                'density_pf'] = summary.at(read).at(lane).density_pf().mean()
            interop_dict['read_summaries'][read + 1][
                lane + 1]['cluster_count'] = summary.at(read).at(
                    lane).density_pf().mean()
            interop_dict['read_summaries'][read + 1][
                lane + 1]['cluster_count_pf'] = summary.at(read).at(
                    lane).cluster_count_pf().mean()
            interop_dict['read_summaries'][read + 1][lane + 1][
                'error_rate'] = summary.at(read).at(lane).error_rate().mean()
            interop_dict['read_summaries'][read + 1][
                lane + 1]['percent_aligned'] = summary.at(read).at(
                    lane).percent_aligned().mean()
            interop_dict['read_summaries'][read + 1][lane + 1][
                'percent_pf'] = summary.at(read).at(lane).percent_pf().mean()
            interop_dict['read_summaries'][read + 1][
                lane +
                1]['phasing'] = summary.at(read).at(lane).phasing().mean()
            interop_dict['read_summaries'][read + 1][lane + 1][
                'prephasing'] = summary.at(read).at(lane).prephasing().mean()
            interop_dict['read_summaries'][read + 1][
                lane + 1]['reads'] = summary.at(read).at(lane).reads()
            interop_dict['read_summaries'][read + 1][
                lane + 1]['reads_pf'] = summary.at(read).at(lane).reads_pf()
            interop_dict['read_summaries'][read + 1][
                lane + 1]['yield_g'] = summary.at(read).at(lane).yield_g()

            for key in interop_dict['read_summaries'][read + 1][lane + 1]:

                if math.isnan(interop_dict['read_summaries'][read +
                                                             1][lane +
                                                                1][key]):

                    interop_dict['read_summaries'][read + 1][lane +
                                                             1][key] = None

    run_metrics = py_interop_run_metrics.run_metrics()
    valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0)
    py_interop_run_metrics.list_index_metrics_to_load(valid_to_load)
    run_folder = run_metrics.read(run_folder_dir, valid_to_load)

    return interop_dict