def main(): """ Retrieve run folder paths from the command line Ensure only metrics required for summary are loaded Load the run metrics Calculate the summary metrics Display error by lane, read """ logging.basicConfig(level=logging.INFO) run_metrics = py_interop_run_metrics.run_metrics() summary = py_interop_summary.run_summary() valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0) py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load) for run_folder_path in sys.argv[1:]: run_folder = os.path.basename(run_folder_path) try: run_metrics.read(run_folder_path, valid_to_load) except ex: logging.warn("Skipping - cannot read RunInfo.xml: %s - %s" % (run_folder, str(ex))) continue py_interop_summary.summarize_run_metrics(run_metrics, summary) error_rate_read_lane_surface = numpy.zeros( (summary.size(), summary.lane_count(), summary.surface_count())) for read_index in range(summary.size()): for lane_index in range(summary.lane_count()): for surface_index in range(summary.surface_count()): error_rate_read_lane_surface[read_index, lane_index, surface_index] = \ summary.at(read_index).at(lane_index).at(surface_index).error_rate().mean() logging.info("Run Folder: " + run_folder) for read_index in range(summary.size()): read_summary = summary.at(read_index) logging.info("Read " + str(read_summary.read().number()))
def run(self): run_metrics = py_interop_run_metrics.run_metrics() run_metrics.run_info() valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0) py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load) run_metrics.read(self.runfolder, valid_to_load) summary = py_interop_summary.run_summary() py_interop_summary.summarize_run_metrics(run_metrics, summary) lanes = summary.lane_count() reads = self.get_non_index_reads(summary) for lane in range(lanes): # The interop library uses zero based indexing, however most people uses read 1/2 # to denote the different reads, this enumeration is used to transform from # zero based indexing to this form. /JD 2017-10-27 for new_read_nbr, original_read_nbr in enumerate(reads): read = summary.at(original_read_nbr).at(lane) error_rate = read.error_rate().mean() percent_q30 = read.percent_gt_q30() self._send_to_subscribers(("error_rate", { "lane": lane + 1, "read": new_read_nbr + 1, "error_rate": error_rate })) self._send_to_subscribers(("percent_q30", { "lane": lane + 1, "read": new_read_nbr + 1, "percent_q30": percent_q30 }))
def parse_single_json(run_folder_path, xrange=None): run_folder = os.path.basename(run_folder_path) run_metrics = py_interop_run_metrics.run_metrics() summary = py_interop_summary.run_summary() valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0) py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load) percent_occupied_df = pd.DataFrame if 'MyRun' not in str(run_folder_path): percent_occupied_df = get_percent_occupied_by_lane(run_folder_path) summary_dict = {} try: run_metrics.read(run_folder_path, valid_to_load) py_interop_summary.summarize_run_metrics(run_metrics, summary) logging.info("Read: {}, Lane: {}".format(summary.size(), summary.lane_count())) for read_index in range(summary.size()): logging.info("Read {}".format(read_index + 1)) summary_dict.setdefault("run", run_folder) for lane_index in range(summary.lane_count()): read_summary = summary.at(read_index) lane_summary = read_summary.at(lane_index) summary_dict.setdefault("data", []).append({ "runname": parse_runid(run_folder), "read": read_summary.read().number(), "lane": lane_summary.lane(), "density": parse_float(lane_summary.density().mean() / 1000), "density_stddev": parse_float(lane_summary.density().stddev() / 1000), "clusterpf": parse_float(lane_summary.percent_pf().mean()), "clusterpf_stddev": parse_float(lane_summary.percent_pf().stddev()), "readsm": parse_float(lane_summary.reads() / 1000000), "readspfm": parse_float(lane_summary.reads_pf() / 1000000), "q30": parse_float(lane_summary.percent_gt_q30()), "aligned": parse_float(lane_summary.percent_aligned().mean()), "aligned_stddev": parse_float(lane_summary.percent_aligned().stddev()), "errorrate": parse_float(lane_summary.error_rate().mean()), "errorrate_stddev": parse_float(lane_summary.error_rate().stddev()), "percent_occupied": parse_float( parse_lane_occupancy(lane_index, percent_occupied_df)) }) return summary_dict except Exception as ex: logging.warn("Skipping - ERROR: %s - %s" % (run_folder, str(ex)))
def get_qc_run_summary(ngs_folder_path): run_folder = ngs_folder_path run_metrics = py_interop_run_metrics.run_metrics() valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0) py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load) run_folder = run_metrics.read(run_folder, valid_to_load) summary = py_interop_summary.run_summary() py_interop_summary.summarize_run_metrics(run_metrics, summary) return summary
def __init__(self, run_folder_path): # Initialize interop objects self.run_metrics = py_interop_run_metrics.run_metrics() valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0) py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load) # Read from run folder self.run_metrics.read(run_folder_path, valid_to_load) # Load up summary metrics self.summary = py_interop_summary.run_summary() py_interop_summary.summarize_run_metrics(self.run_metrics, self.summary) # Cached result tables for subsequent calls self.run_summary_df = None self.read_summary_dfs = {}
def parse(run_folder, dictionary): run_metrics = py_interop_run_metrics.run_metrics() valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0) py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load) run_folder = run_metrics.read(run_folder, valid_to_load) summary = py_interop_summary.run_summary() py_interop_summary.summarize_run_metrics(run_metrics, summary) # Parse data from interop files -- % reads over Q30, cluster density, clusters passing filter dictionary["Percent Q30"] = round(summary.total_summary().percent_gt_q30(), 2) dictionary["Cluster density"] = round( summary.at(0).at(0).density().mean() / 1000, 2) dictionary["Percent PF"] = round( summary.at(0).at(0).percent_pf().mean(), 2) dictionary["Phasing"] = round(summary.at(0).at(0).phasing().mean(), 2) dictionary["Prephasing"] = round( summary.at(0).at(0).prephasing().mean(), 2) dictionary["Error rate"] = round(summary.total_summary().error_rate(), 2) dictionary["Aligned"] = round(summary.total_summary().percent_aligned(), 2)
def get_qc_run_summary(ngs_folder_path): """ Creates the InterOp summary object. Input: ngs_folder_path = The Illumina folder containing the QC data. Should contain the following: 1) InterOp/ containing *MetricsOut.bin files. 2) RunInfo.xml 3) RunParameters.xml Output: Returns False if an error occurs otherwise: summary = The InterOp simmary object. See https://github.com/Illumina/interop/blob/master/docs/src/Tutorial_01_Intro.ipynb """ try: run_folder = ngs_folder_path except: return False #An error has occured. run_metrics = py_interop_run_metrics.run_metrics() valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0) py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load) run_folder = run_metrics.read(run_folder, valid_to_load) summary = py_interop_summary.run_summary() py_interop_summary.summarize_run_metrics(run_metrics, summary) return summary
def interop_parse(self): """ Use interop to parse the files in the InterOp folder to extract the number of reads mapping to PhiX as well as the error rate """ # Parse the files and load the data run_metrics = py_interop_run_metrics.run_metrics() valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0) py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load) run_metrics.read(self.path, valid_to_load) summary = py_interop_summary.run_summary() py_interop_summary.summarize_run_metrics(run_metrics, summary) # PhiX error rate for run over all "usable cycles" errorrate = summary.total_summary().error_rate() # Percent aligned PhiX pctaligned = summary.total_summary().percent_aligned() # Add the error rate and the percent of reads that align to PhiX to the metadata object for sample in self.metadata: sample.run.error_rate = '{:.2f}'.format(errorrate) sample.run.phix_aligned = '{:.2f}'.format(pctaligned)
def parse_illumina_interop(run_dir): # Read interop run_metrics = py_interop_run_metrics.run_metrics() valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0) py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load) try: run_metrics.read(run_dir, valid_to_load) except Exception: sys.stderr.write("Cannot parse information in InterOp") sys.exit(2) summary = py_interop_summary.run_summary() py_interop_summary.summarize_run_metrics(run_metrics, summary) lanes = summary.lane_count() reads = summary.size() # Parse the interop stats lane by lane for non-index reads run_stats_summary = dict() for lane in range(lanes): lane_nbr = getattr(summary.at(0).at(lane), "lane")() for read in range(reads): if not summary.at(read).read().is_index(): stats = dict() stats.update({"density" : getattr(summary.at(read).at(lane), "density")().mean()/1000}) stats.update({"error_rate" : getattr(summary.at(read).at(lane), "error_rate")().mean()}) stats.update({"first_cycle_intensity" : getattr(summary.at(read).at(lane), "first_cycle_intensity")().mean()}) stats.update({"percent_aligned" : getattr(summary.at(read).at(lane), "percent_aligned")().mean()}) stats.update({"percent_gt_q30" : getattr(summary.at(read).at(lane), "percent_gt_q30")()}) stats.update({"percent_pf" : getattr(summary.at(read).at(lane), "percent_pf")().mean()}) stats.update({"phasing" : getattr(summary.at(read).at(lane), "phasing")().mean()}) stats.update({"prephasing" : getattr(summary.at(read).at(lane), "prephasing")().mean()}) stats.update({"reads_pf" : getattr(summary.at(read).at(lane), "reads_pf")()}) stats.update({"yield_g" : getattr(summary.at(read).at(lane), "yield_g")()}) if lane_nbr not in list(run_stats_summary.keys()): run_stats_summary.update({lane_nbr: {read: stats}}) else: run_stats_summary[lane_nbr].update({read : stats}) return run_stats_summary
# run_dir_all = glob.glob("/data/runScratch.boston/demultiplexed/*/*/{}".format(run_id)) run_dir_all = glob.glob("/data/runScratch.boston/nova-interop-temp-marius/{}".format(run_id)) if not run_dir_all: print("No run folder for", run_id) else: run_dir = run_dir_all[0] try: # Ignore parsing error, to not disturb the sequencer integrations # Parse InterOp data valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0) py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load) valid_to_load[py_interop_run.ExtendedTile] = 1 run_metrics = py_interop_run_metrics.run_metrics() run_metrics.read(run_dir, valid_to_load) summary = py_interop_summary.run_summary() py_interop_summary.summarize_run_metrics(run_metrics, summary) extended_tile_metrics = run_metrics.extended_tile_metric_set() read_count = summary.size() lane_count = summary.lane_count() if lane_count != len(lane_artifacts): raise RuntimeError("Error: Number of lanes in InterOp data: {}, does not match the number " "of lanes in LIMS: {}.".format(lane_count, len(lane_artifacts))) result = {} phix_pct = [] # We report PhiX % per read R1 / R2 (non-index) for lane_number, artifact in lane_artifacts.items(): lane_index = lane_number - 1 nonindex_read_count = 0
def parsing_run_metrics_files(local_run_metric_folder, run_process_obj, experiment_name): ''' Description: The function parse the information from the run metric files Input: local_run_metric_folder # local folder with the run metric files run_process_obj # RunProcess object for this run experiment_name # experiment name Import: py_interop_run py_interop_run_metrics Variables: bin_run_stats_summary_list # list of dictionnary with the summary information run_stats_read_list # list of dictionnary with the read information Return: bin_run_stats_summary_list, run_stats_read_list ''' logger = logging.getLogger(__name__) logger.debug('%s : Starting function parsing_run_metrics', experiment_name) run_param_obj = RunningParameters.objects.get(runName_id=run_process_obj) # get the number of lanes for the run number_of_lanes = int(run_param_obj.get_number_of_lanes()) # get number of reads for the run num_of_reads = run_param_obj.get_number_of_reads() logger.info('%s : Fetched run information needed for running metrics', experiment_name) run_metrics = py_interop_run_metrics.run_metrics() valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0) py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load) run_metric_folder = run_metrics.read(local_run_metric_folder) summary = py_interop_summary.run_summary() py_interop_summary.summarize_run_metrics(run_metrics, summary) bin_run_stats_summary_list = [] logger.info('%s : Starts collecting data for run metric ', experiment_name) # get the Run Summary for each Read for read_level in range(num_of_reads): run_summary_stats_level = {} # summary yield total run_summary_stats_level['yieldTotal'] = format( summary.at(read_level).summary().yield_g(), '.3f') # summary projected total yield run_summary_stats_level['projectedTotalYield'] = format( summary.at(read_level).summary().projected_yield_g(), '.3f') # percent yield run_summary_stats_level['aligned'] = format( summary.at(read_level).summary().percent_aligned(), '.3f') # Error rate run_summary_stats_level['errorRate'] = format( summary.at(read_level).summary().error_rate(), '.3f') # intensity cycle 1 run_summary_stats_level['intensityCycle'] = str( round(summary.at(read_level).summary().first_cycle_intensity())) # Q30 run_summary_stats_level['biggerQ30'] = format( summary.at(read_level).summary().percent_gt_q30(), '.3f') run_summary_stats_level['level'] = str(read_level + 1) bin_run_stats_summary_list.append(run_summary_stats_level) logger.info('%s : Parsed run Metrics on summary level ', experiment_name) # get the run summary for Total run_summary_stats_level = {} # total summary run_summary_stats_level['yieldTotal'] = format( summary.total_summary().yield_g(), '.3f') # total projected_yield_g run_summary_stats_level['projectedTotalYield'] = format( summary.total_summary().projected_yield_g(), '.3f') # total percent aligned run_summary_stats_level['aligned'] = format( summary.total_summary().percent_aligned(), '.3f') # total error rate run_summary_stats_level['errorRate'] = format( summary.total_summary().error_rate(), '.3f') # total intensity cycle run_summary_stats_level['intensityCycle'] = str( round(summary.total_summary().first_cycle_intensity())) # total Q 30 run_summary_stats_level['biggerQ30'] = format( summary.total_summary().percent_gt_q30(), '.3f') run_summary_stats_level['level'] = 'Total' logger.info('%s : Parsed run Metrics on Total lane', experiment_name) bin_run_stats_summary_list.append(run_summary_stats_level) # get the run summary for non index run_summary_stats_level = {} # non index yield run_summary_stats_level['yieldTotal'] = format( summary.nonindex_summary().yield_g(), '.3f') # non index projected yield run_summary_stats_level['projectedTotalYield'] = format( summary.nonindex_summary().projected_yield_g(), '.3f') # non index percent aligned run_summary_stats_level['aligned'] = format( summary.nonindex_summary().percent_aligned(), '.3f') # non index percent error rate run_summary_stats_level['errorRate'] = format( summary.nonindex_summary().error_rate(), '.3f') # non index intensity cycle run_summary_stats_level['intensityCycle'] = str( round(summary.nonindex_summary().first_cycle_intensity())) # non index Q 30 run_summary_stats_level['biggerQ30'] = format( summary.nonindex_summary().percent_gt_q30(), '.3f') run_summary_stats_level['level'] = 'Non Index' logger.info('%s : Parsed run metric for Non Index lane', experiment_name) bin_run_stats_summary_list.append(run_summary_stats_level) ### information per reads run_stats_read_list = [] #lan_summary= py_interop_summary.lane_summary() # Tiles for read_number in range(num_of_reads): for lane_number in range(number_of_lanes): logger.info( '%s : Processing run metrics stats on Read %s and on Lane %s', experiment_name, read_number, lane_number) run_read_stats_level = {} run_read_stats_level['tiles'] = str( int(summary.at(read_number).at(lane_number).tile_count()) * 2) # Density (k/mm2) divide the value by 1000 to have it K/mm2 # get the +/- with the steddev try: read_lane_density_mean = str( round( float( summary.at(read_number).at( lane_number).density().mean()) / 1000)) read_lane_density_stddev = str( round( float( summary.at(read_number).at( lane_number).density().stddev()) / 1000)) except: read_lane_density_mean = 'NaN' read_lane_density_stddev = 'NaN' string_message = experiment_name + ' : Unable to convert to float ' logging_warnings(string_message, False) run_read_stats_level[ 'density'] = read_lane_density_mean + ' ' + chr( 177) + ' ' + read_lane_density_stddev # cluster _pf in % try: read_lane_percent_pf_mean = format( summary.at(read_number).at( lane_number).percent_pf().mean(), '.3f') read_lane_percent_pf_stddev = format( summary.at(read_number).at( lane_number).percent_pf().stddev(), '.3f') except: read_lane_percent_pf_mean = 'NaN' read_lane_percent_pf_stddev = 'NaN' string_message = experiment_name + ' : Unable to format to float read_lane_percent_pf' logging_warnings(string_message, False) run_read_stats_level[ 'cluster_PF'] = read_lane_percent_pf_mean + ' ' + chr( 177) + ' ' + read_lane_percent_pf_stddev # phas/ prepas in % try: read_lane_phasing_mean = format( summary.at(read_number).at(lane_number).phasing().mean(), '.3f') read_lane_phasing_dev = format( summary.at(read_number).at(lane_number).phasing().stddev(), '.1f') read_lane_prephasing_mean = format( summary.at(read_number).at( lane_number).prephasing().mean(), '.3f') read_lane_prephasing_stddev = format( summary.at(read_number).at( lane_number).prephasing().stddev(), '.3f') except: read_lane_phasing_mean, read_lane_phasing_dev, read_lane_prephasing_mean, read_lane_prephasing_stddev = 'NaN', 'NaN', 'NaN', 'NaN' string_message = experiment_name + ' : Unable to format to float read_lane_phasing' logging_warnings(string_message, False) run_read_stats_level['phas_prephas'] = read_lane_phasing_mean + ' ' + chr( 177 ) + ' ' + read_lane_phasing_dev + ' / ' + read_lane_prephasing_mean + ' ' + chr( 177) + ' ' + read_lane_prephasing_stddev # reads (M) try: run_read_stats_level['reads'] = format( float(summary.at(read_number).at(lane_number).reads()) / 1000000, '.3f') except: run_read_stats_level['reads'] = 'NaN' string_message = experiment_name + ' : Unable to format to float run_read_stats_level[reads]' logging_warnings(string_message, False) #reads PF (M) try: run_read_stats_level['reads_PF'] = format( float(summary.at(read_number).at(lane_number).reads_pf()) / 1000000, '.3f') except: run_read_stats_level['reads_PF'] = 'NaN' string_message = experiment_name + ' : Unable to format to float run_read_stats_level[reads_PF]' logging_warnings(string_message, False) # percent q30 try: run_read_stats_level['q30'] = format( summary.at(read_number).at(lane_number).percent_gt_q30(), '.3f') except: run_read_stats_level['q30'] = 'NaN' string_message = experiment_name + ' : Unable to format to float run_read_stats_level[q30]' logging_warnings(string_message, False) # yield _g try: run_read_stats_level['yields'] = format( summary.at(read_number).at(lane_number).yield_g(), '.3f') except: run_read_stats_level['yields'] = 'NaN' string_message = experiment_name + ' : Unable to format to float run_read_stats_level[yields]' logging_warnings(string_message, False) # cycles err Rate try: run_read_stats_level['cyclesErrRated'] = str( summary.at(read_number).at(lane_number).cycle_state(). error_cycle_range().first_cycle()) except: run_read_stats_level['cyclesErrRated'] = 'NaN' string_message = experiment_name + ' : Unable to format to float run_read_stats_level[cyclesErrRated]' logging_warnings(string_message, False) #percent_aligned try: read_lane_percent_aligned_mean = format( summary.at(read_number).at( lane_number).percent_aligned().mean(), '.3f') read_lane_percent_aligned_stddev = format( summary.at(read_number).at( lane_number).percent_aligned().stddev(), '3f') except: read_lane_percent_aligned_mean, read_lane_percent_aligned_stddev = 'NaN', 'NaN' string_message = experiment_name + ' : Unable to format to float read_lane_percent_aligned_mean' logging_warnings(string_message, False) run_read_stats_level[ 'aligned'] = read_lane_percent_aligned_mean + ' ' + chr( 177) + ' ' + read_lane_percent_aligned_stddev #error rate try: read_lane_error_rate_mean = format( summary.at(read_number).at( lane_number).error_rate().mean(), '.3f') read_lane_error_rate_stddev = format( summary.at(read_number).at( lane_number).error_rate().stddev(), '.3f') except: read_lane_error_rate_mean, read_lane_error_rate_stddev = 'NaN', 'NaN' string_message = experiment_name + ' : Unable to format to float read_lane_error_rate_mean' logging_warnings(string_message, False) run_read_stats_level[ 'errorRate'] = read_lane_error_rate_mean + ' ' + chr( 177) + ' ' + read_lane_error_rate_stddev #error rate_35 try: read_lane_error_rate_35_mean = format( summary.at(read_number).at( lane_number).error_rate_35().mean(), '.3f') read_lane_error_rate_35_stddev = format( summary.at(read_number).at( lane_number).error_rate_35().stddev(), '.3f') except: read_lane_error_rate_35_mean, read_lane_error_rate_35_stddev = 'NaN', 'NaN' string_message = experiment_name + ' : Unable to format to float read_lane_error_rate_35_mean' logging_warnings(string_message, False) run_read_stats_level[ 'errorRate35'] = read_lane_error_rate_35_mean + ' ' + chr( 177) + ' ' + read_lane_error_rate_35_stddev #error rate 50 try: read_lane_error_rate_50_mean = format( summary.at(read_number).at( lane_number).error_rate_50().mean(), '.3f') read_lane_error_rate_50_stddev = format( summary.at(read_number).at( lane_number).error_rate_50().stddev(), '.3f') except: read_lane_error_rate_50_mean, read_lane_error_rate_50_stddev = 'NaN', 'NaN' string_message = experiment_name + ' : Unable to format to float read_lane_error_rate_50_mean' logging_warnings(string_message, False) run_read_stats_level[ 'errorRate50'] = read_lane_error_rate_50_mean + ' ' + chr( 177) + ' ' + read_lane_error_rate_50_stddev #error rate 75 try: read_lane_error_rate_75_mean = format( summary.at(read_number).at( lane_number).error_rate_75().mean(), '.3f') read_lane_error_rate_75_stddev = format( summary.at(read_number).at( lane_number).error_rate_75().stddev(), '.3f') except: read_lane_error_rate_75_mean, read_lane_error_rate_75_stddev = 'NaN', 'NaN' string_message = experiment_name + ' : Unable to format to float read_lane_error_rate_75_mean' logging_warnings(string_message, False) run_read_stats_level[ 'errorRate75'] = read_lane_error_rate_75_mean + ' ' + chr( 177) + ' ' + read_lane_error_rate_75_stddev #error rate 100 try: read_lane_error_rate_100_mean = format( summary.at(read_number).at( lane_number).error_rate_100().mean(), '.3f') read_lane_error_rate_100_stddev = format( summary.at(read_number).at( lane_number).error_rate_100().stddev(), '.3f') except: read_lane_error_rate_100_mean, read_lane_error_rate_100_stddev = 'NaN', 'NaN' string_message = experiment_name + ' : Unable to format to float read_lane_error_rate_100_mean' logging_warnings(string_message, False) run_read_stats_level[ 'errorRate100'] = read_lane_error_rate_100_mean + ' ' + chr( 177) + ' ' + read_lane_error_rate_100_stddev # intensity cycle 1 try: read_lane_intensity_cycle_mean = format( summary.at(read_number).at( lane_number).first_cycle_intensity().mean(), '.3f') # get tiles for read 1 and lane 1 read_lane_intensity_cycle_stddev = format( summary.at(read_number).at( lane_number).first_cycle_intensity().stddev(), '.3f') except: read_lane_intensity_cycle_mean, read_lane_intensity_cycle_stddev = 'NaN', 'NaN' string_message = experiment_name + ' : Unable to format to float read_lane_intensity_cycle_mean' logging_warnings(string_message, False) run_read_stats_level[ 'intensityCycle'] = read_lane_intensity_cycle_mean + ' ' + chr( 177) + ' ' + read_lane_intensity_cycle_stddev run_read_stats_level['read'] = str(read_number + 1) run_read_stats_level['lane'] = str(lane_number + 1) # append run_read_stats_level information to run_stats_read_list run_stats_read_list.append(run_read_stats_level) logger.debug('%s : End function parsing_run_metrics', experiment_name) return bin_run_stats_summary_list, run_stats_read_list
def parse_interop_data(run_folder_dir, num_reads, num_lanes): """ Parses summary statistics out of interops data using the Illumina interops package """ # make empty dict to store output interop_dict = {'read_summaries': {}} # taken from illumina interops package documentation, all of this is required, # even though only the summary variable is used further on run_metrics = py_interop_run_metrics.run_metrics() valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0) py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load) run_folder = run_metrics.read(run_folder_dir, valid_to_load) summary = py_interop_summary.run_summary() py_interop_summary.summarize_run_metrics(run_metrics, summary) for read in range(num_reads): new_read = read + 1 if new_read not in interop_dict['read_summaries']: interop_dict['read_summaries'][new_read] = {} for lane in range(num_lanes): new_lane = lane + 1 if new_lane not in interop_dict['read_summaries'][new_read]: interop_dict['read_summaries'][new_read][new_lane] = {} interop_dict['read_summaries'][read + 1][ lane + 1]['percent_q30'] = summary.at(read).at(lane).percent_gt_q30() interop_dict['read_summaries'][read + 1][ lane + 1]['density'] = summary.at(read).at(lane).density().mean() interop_dict['read_summaries'][read + 1][lane + 1][ 'density_pf'] = summary.at(read).at(lane).density_pf().mean() interop_dict['read_summaries'][read + 1][ lane + 1]['cluster_count'] = summary.at(read).at( lane).density_pf().mean() interop_dict['read_summaries'][read + 1][ lane + 1]['cluster_count_pf'] = summary.at(read).at( lane).cluster_count_pf().mean() interop_dict['read_summaries'][read + 1][lane + 1][ 'error_rate'] = summary.at(read).at(lane).error_rate().mean() interop_dict['read_summaries'][read + 1][ lane + 1]['percent_aligned'] = summary.at(read).at( lane).percent_aligned().mean() interop_dict['read_summaries'][read + 1][lane + 1][ 'percent_pf'] = summary.at(read).at(lane).percent_pf().mean() interop_dict['read_summaries'][read + 1][ lane + 1]['phasing'] = summary.at(read).at(lane).phasing().mean() interop_dict['read_summaries'][read + 1][lane + 1][ 'prephasing'] = summary.at(read).at(lane).prephasing().mean() interop_dict['read_summaries'][read + 1][ lane + 1]['reads'] = summary.at(read).at(lane).reads() interop_dict['read_summaries'][read + 1][ lane + 1]['reads_pf'] = summary.at(read).at(lane).reads_pf() interop_dict['read_summaries'][read + 1][ lane + 1]['yield_g'] = summary.at(read).at(lane).yield_g() for key in interop_dict['read_summaries'][read + 1][lane + 1]: if math.isnan(interop_dict['read_summaries'][read + 1][lane + 1][key]): interop_dict['read_summaries'][read + 1][lane + 1][key] = None run_metrics = py_interop_run_metrics.run_metrics() valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0) py_interop_run_metrics.list_index_metrics_to_load(valid_to_load) run_folder = run_metrics.read(run_folder_dir, valid_to_load) return interop_dict