def demultiplex(self): """Perform demultiplexing of the flowcell. Takes software (bcl2fastq version to use) and parameters from the configuration file. """ logger.info('Building bcl2fastq command') config = CONFIG['analysis'] with chdir(self.run_dir): cl = [config.get('bcl2fastq').get(self.run_type)] if config['bcl2fastq'].has_key('options'): cl_options = config['bcl2fastq']['options'] # Append all options that appear in the configuration file to the main command. # Options that require a value, i.e --use-bases-mask Y8,I8,Y8, will be returned # as a dictionary, while options that doesn't require a value, i.e --no-lane-splitting # will be returned as a simple string for option in cl_options: if isinstance(option, dict): opt, val = option.popitem() cl.extend(['--{}'.format(opt), str(val)]) else: cl.append('--{}'.format(option)) logger.info(("BCL to FASTQ conversion and demultiplexing started for " " run {} on {}".format(os.path.basename(self.id), datetime.now()))) misc.call_external_command_detached(cl, with_log_files=True)
def demultiplex_run(self): """ Demultiplex a Xten run: - find the samplesheet - make a local copy of the samplesheet and name it SampleSheet.csv - define if necessary the bcl2fastq commands (if indexes are not of size 8, i.e. neoprep) - run bcl2fastq conversion """ ssname = self._get_samplesheet() ssparser = SampleSheetParser(ssname) #samplesheet need to be positioned in the FC directory with name SampleSheet.csv (Illumina default) #if this is not the case then create it and take special care of modification to be done on the SampleSheet samplesheet_dest = os.path.join(self.run_dir, "SampleSheet.csv") #check that the samplesheet is not already present. In this case go the next step if not os.path.exists(samplesheet_dest): try: with open(samplesheet_dest, 'wb') as fcd: fcd.write(_generate_clean_samplesheet(ssparser, fields_to_remove=['index2'], rename_samples=True, rename_qPCR_suffix = True, fields_qPCR=['SampleName'])) except Exception as e: logger.error(e.text) return False logger.info(("Created SampleSheet.csv for Flowcell {} in {} ".format(self.id, samplesheet_dest))) ##SampleSheet.csv generated ##when demultiplexing SampleSheet.csv is the one I need to use self.runParserObj.samplesheet = SampleSheetParser(os.path.join(self.run_dir, "SampleSheet.csv")) per_lane_base_masks = self._generate_per_lane_base_mask() max_different_base_masks = max([len(per_lane_base_masks[base_masks]) for base_masks in per_lane_base_masks]) if max_different_base_masks > 1: # in a HiSeqX run I cannot have different index sizes in the SAME lane logger.error("In FC {} found one or more lane with more than one base mask (i.e., different index sizes in \ in the same lane".format(self.id)) return False #I have everything to run demultiplexing now. logger.info('Building bcl2fastq command') with chdir(self.run_dir): cl = [self.CONFIG.get('bcl2fastq')['bin']] if self.CONFIG.get('bcl2fastq').has_key('options'): cl_options = self.CONFIG['bcl2fastq']['options'] # Append all options that appear in the configuration file to the main command. for option in cl_options: if isinstance(option, dict): opt, val = option.items()[0] cl.extend(['--{}'.format(opt), str(val)]) else: cl.append('--{}'.format(option)) #now add the base_mask for each lane for lane in sorted(per_lane_base_masks): #iterate thorugh each lane and add the correct --use-bases-mask for that lane #there is a single basemask for each lane, I checked it a couple of lines above base_mask = [per_lane_base_masks[lane][bm]['base_mask'] for bm in per_lane_base_masks[lane]][0] # get the base_mask base_mask_expr = "{}:".format(lane) + ",".join(base_mask) cl.extend(["--use-bases-mask", base_mask_expr]) logger.info(("BCL to FASTQ conversion and demultiplexing started for " " run {} on {}".format(os.path.basename(self.id), datetime.now()))) misc.call_external_command_detached(cl, with_log_files=True) return True
def demultiplex_run(self): """ Demultiplex a Xten run: - find the samplesheet - make a local copy of the samplesheet and name it SampleSheet.csv - define if necessary the bcl2fastq commands (if indexes are not of size 8, i.e. neoprep) - run bcl2fastq conversion """ #we have 10x lane - need to split the samples sheet and build a 10x command for bcl2fastq Complex_run = False if len(self.lanes_10X) and len(self.lanes_not_10X): Complex_run = True if Complex_run: with chdir(self.run_dir): samplesheet_dest_not_10X="SampleSheet_0.csv" with open(samplesheet_dest_not_10X, 'wb') as fcd: fcd.write(_generate_samplesheet_subset(self.runParserObj.samplesheet, self.lanes_not_10X)) samplesheet_dest_10X="SampleSheet_1.csv" with open(samplesheet_dest_10X, 'wb') as fcd: fcd.write(_generate_samplesheet_subset(self.runParserObj.samplesheet, self.lanes_10X)) else: with chdir(self.run_dir): samplesheet_dest="SampleSheet_0.csv" with open(samplesheet_dest, 'wb') as fcd: fcd.write(_generate_samplesheet_subset(self.runParserObj.samplesheet, (self.lanes_10X or self.lanes_not_10X))) per_lane_base_masks = self._generate_per_lane_base_mask() max_different_base_masks = max([len(per_lane_base_masks[base_masks]) for base_masks in per_lane_base_masks]) if max_different_base_masks > 1: # in a HiSeqX run I cannot have different index sizes in the SAME lane logger.error("In FC {} found one or more lane with more than one base mask (i.e., different index sizes in \ in the same lane".format(self.id)) return False bcl2fastq_cmd_counter = 0 with chdir(self.run_dir): # create Demultiplexing dir, this changes the status to IN_PROGRESS if not os.path.exists("Demultiplexing"): os.makedirs("Demultiplexing") with chdir(self.run_dir): if self.lanes_not_10X: cmd_normal = self.generate_bcl_command(self.lanes_not_10X, bcl2fastq_cmd_counter) misc.call_external_command_detached(cmd_normal, with_log_files = True, prefix="demux_{}".format(bcl2fastq_cmd_counter)) logger.info(("BCL to FASTQ conversion and demultiplexing started for " "normal run {} on {}".format(os.path.basename(self.id), datetime.now()))) bcl2fastq_cmd_counter += 1 if self.lanes_10X: cmd_10X = self.generate_bcl_command(self.lanes_10X, bcl2fastq_cmd_counter, is_10X = True) misc.call_external_command_detached(cmd_10X, with_log_files = True, prefix="demux_{}".format(bcl2fastq_cmd_counter)) logger.info(("BCL to FASTQ conversion and demultiplexing started for " "10X run {} on {}".format(os.path.basename(self.id), datetime.now()))) bcl2fastq_cmd_counter += 1 return True
def test_call_external_command_detached(self): """Call external command detached.""" new_file = os.path.join(self.rootdir, 'test_call_external_det') command = 'touch ' + new_file misc.call_external_command_detached(command, with_log_files=True, prefix='test_det') time.sleep(0.1) self.assertTrue(os.path.isfile(new_file)) self.assertTrue(os.path.isfile('test_det_touch.out')) os.remove('test_det_touch.out') os.remove('test_det_touch.err')
def demultiplex_run(self): """ Demultiplex a NextSeq run: - find the samplesheet - make a local copy of the samplesheet and name it SampleSheet.csv - define if necessary the bcl2fastq commands (if indexes are not of size 8, i.e. neoprep) - run bcl2fastq conversion """ if not os.path.exists(self.ssname): # We should not get here really and this run should be defined as NON NGI-RUN return False # TODO SampleSheetParser may throw an exception ssparser = SampleSheetParser(self.ssname) # Samplesheet need to be positioned in the FC directory with name SampleSheet.csv (Illumina default) # if this is not the case then create it and take special care of modification to be done on the SampleSheet samplesheet_dest = os.path.join(self.run_dir, "SampleSheet.csv") # Check that the samplesheet is not already present. In this case go the next step if not os.path.exists(samplesheet_dest): try: with open(samplesheet_dest, 'wb') as fcd: fcd.write(self._generate_clean_samplesheet(ssparser)) except Exception as e: if os.path.exists(samplesheet_dest): os.remove(samplesheet_dest) logger.error(e) return False logger.info( ("Created SampleSheet.csv for Flowcell {} in {} ".format( self.id, samplesheet_dest))) # SampleSheet.csv generated to be used in bcl2fastq self.runParserObj.samplesheet = SampleSheetParser( os.path.join(self.run_dir, "SampleSheet.csv")) # Make the demux call with chdir(self.run_dir): cl = [self.CONFIG.get('bcl2fastq')['bin']] if self.CONFIG.get('bcl2fastq').has_key('options'): cl_options = self.CONFIG['bcl2fastq']['options'] # Append all options that appear in the configuration file to the main command. for option in cl_options: if isinstance(option, dict): opt, val = option.items()[0] cl.extend(['--{}'.format(opt), str(val)]) else: cl.append('--{}'.format(option)) logger.info( ("BCL to FASTQ conversion and demultiplexing started for " " run {} on {}".format(os.path.basename(self.id), datetime.now()))) misc.call_external_command_detached(cl, with_log_files=True) return True
def demultiplex_run(self): """ Demultiplex a NextSeq run: - find the samplesheet - make a local copy of the samplesheet and name it SampleSheet.csv - define if necessary the bcl2fastq commands (if indexes are not of size 8, i.e. neoprep) - run bcl2fastq conversion """ if not os.path.exists(self.ssname): # We should not get here really and this run should be defined as NON NGI-RUN return False # TODO SampleSheetParser may throw an exception ssparser = SampleSheetParser(self.ssname) # Samplesheet need to be positioned in the FC directory with name SampleSheet.csv (Illumina default) # if this is not the case then create it and take special care of modification to be done on the SampleSheet samplesheet_dest = os.path.join(self.run_dir, "SampleSheet.csv") # Check that the samplesheet is not already present. In this case go the next step if not os.path.exists(samplesheet_dest): try: with open(samplesheet_dest, 'wb') as fcd: fcd.write(self._generate_clean_samplesheet(ssparser)) except Exception as e: if os.path.exists(samplesheet_dest): os.remove(samplesheet_dest) logger.error(e) return False logger.info(("Created SampleSheet.csv for Flowcell {} in {} " .format(self.id, samplesheet_dest))) # SampleSheet.csv generated to be used in bcl2fastq self.runParserObj.samplesheet = SampleSheetParser(os.path.join(self.run_dir, "SampleSheet.csv")) # Make the demux call with chdir(self.run_dir): cl = [self.CONFIG.get('bcl2fastq')['bin']] if self.CONFIG.get('bcl2fastq').has_key('options'): cl_options = self.CONFIG['bcl2fastq']['options'] # Append all options that appear in the configuration file to the main command. for option in cl_options: if isinstance(option, dict): opt, val = option.items()[0] cl.extend(['--{}'.format(opt), str(val)]) else: cl.append('--{}'.format(option)) logger.info(("BCL to FASTQ conversion and demultiplexing started for " " run {} on {}".format(os.path.basename(self.id), datetime.now()))) misc.call_external_command_detached(cl, with_log_files=True) return True
def demultiplex_run(self): """ Demultiplex a run: - Make sub-samplesheet based on sample classes - Decide correct bcl2fastq command parameters based on sample classes - run bcl2fastq conversion """ # Check sample types sample_type_list = [] for lane, lane_contents in self.sample_table.items(): for sample in lane_contents: sample_detail = sample[1] sample_type = sample_detail['sample_type'] if sample_type not in sample_type_list: sample_type_list.append(sample_type) # Go through sample_table for demultiplexing bcl2fastq_cmd_counter = 0 for sample_type in sorted(sample_type_list): # Looking for lanes with multiple masks under the same sample type lane_table = dict() for lane, lane_contents in self.sample_table.items(): for sample in lane_contents: sample_detail = sample[1] sample_type_t = sample_detail['sample_type'] sample_index_length = sample_detail['index_length'] if sample_type_t == sample_type: if lane_table.get(lane): if sample_index_length not in lane_table[lane]: lane_table[lane].append(sample_index_length) else: lane_table.update({lane: [sample_index_length]}) # Determine the number of demux needed for the same sample type demux_number_with_the_same_sample_type = len( max([v for k, v in lane_table.items()], key=len)) # Prepare sub-samplesheets, masks and commands for i in range(0, demux_number_with_the_same_sample_type): # Prepare sub-samplesheet # A dictionary with lane and sample IDs to include samples_to_include = dict() # A dictionary with lane and index length for generating masks mask_table = dict() for lane, lane_contents in self.sample_table.items(): try: index_length = lane_table[lane][i] mask_table.update({lane: index_length}) for sample in lane_contents: sample_name = sample[0] sample_detail = sample[1] sample_type_t = sample_detail['sample_type'] sample_index_length = sample_detail['index_length'] if sample_type_t == sample_type and sample_index_length == index_length: if samples_to_include.get(lane): samples_to_include[lane].append( sample_name) else: samples_to_include.update( {lane: [sample_name]}) except (KeyError, IndexError) as err: logger.info( ('No corresponding mask in lane {}. Skip it.'. format(lane))) continue # Make sub-samplesheet with chdir(self.run_dir): samplesheet_dest = 'SampleSheet_{}.csv'.format( bcl2fastq_cmd_counter) with open(samplesheet_dest, 'w') as fcd: fcd.write( _generate_samplesheet_subset( self.runParserObj.samplesheet, samples_to_include)) # Prepare demultiplexing dir with chdir(self.run_dir): # Create Demultiplexing dir, this changes the status to IN_PROGRESS if not os.path.exists('Demultiplexing'): os.makedirs('Demultiplexing') # Prepare demultiplexing command with chdir(self.run_dir): cmd = self.generate_bcl_command(sample_type, mask_table, bcl2fastq_cmd_counter) misc.call_external_command_detached( cmd, with_log_files=True, prefix='demux_{}'.format(bcl2fastq_cmd_counter)) logger.info(('BCL to FASTQ conversion and demultiplexing ' \ 'started for run {} on {}'.format(os.path.basename(self.id), datetime.now()))) # Demutiplexing done for one mask type and scripts will continue # Working with the next type. Command counter should increase by 1 bcl2fastq_cmd_counter += 1 return True
def compute_undetermined(self): """ This function returns true if all demux steps are done and we can proceed to QC For simple lanes with index: no check is done everything needs to be in place for complex lanes: no check is done everything needs to be in place for simple lanes and NoIndex: check if demux counts have been computed, if not compute or return waiting for thir completion """ NoIndexLanes = [lane["Lane"] for lane in self.runParserObj.samplesheet.data if "NoIndex" in lane["index"]] if len(NoIndexLanes) == 0: return True # everything is fine I can proceed to QC # otherwise proceed NoIndex_Undetermiend = os.path.join(self.run_dir, "Demultiplexing_NoIndex") if not os.path.exists(NoIndex_Undetermiend): # for these lanes I have no undetermiend as I demux them without index. # now geenrate the base masks per lane per_lane_base_masks = self._generate_per_lane_base_mask() # store here only the NoIndex lanes per_lane_base_masks_NoIndex = {} run_with_no_index = False # use this flag to check that we are not in the C.Daub case for NoIndexLane in NoIndexLanes: per_lane_base_masks_NoIndex[NoIndexLane] = per_lane_base_masks[NoIndexLane] base_mask_key = per_lane_base_masks[NoIndexLane].keys()[0] new_base_mask = [] if len(per_lane_base_masks_NoIndex[NoIndexLane][base_mask_key]["base_mask"]): # C.Daub_15_01 case, only one sample per lane and no index at all run_with_no_index = True else: for baseMask_element in per_lane_base_masks_NoIndex[NoIndexLane][base_mask_key]["base_mask"]: if baseMask_element.startswith("Y"): new_base_mask.append(baseMask_element.replace("Y", "N")) elif baseMask_element.startswith("N"): new_base_mask.append(baseMask_element.replace("N", "Y")) per_lane_base_masks_NoIndex[NoIndexLane][base_mask_key]["base_mask"] = new_base_mask if not run_with_no_index: os.makedirs(NoIndex_Undetermiend) command = self._generate_bcl2fastq_command( per_lane_base_masks_NoIndex, True, "NoIndex", mask_short_adapter_reads=True ) with chdir(self.run_dir): misc.call_external_command_detached(command, with_log_files=True, prefix="demux_NoIndex") # return false, as I need to wait to finish the demux for the NoIndex case return False else: # in this case I do not want to start a demux for th eindex, beceause I do not have the index at all # I need to softlink everythin else that is in Stats as I do not want to recompute it missingStatsFiles = glob.glob(os.path.join(self.run_dir, "Demultiplexing_0", "Stats", "*F*L*.txt")) destination = os.path.join(self.run_dir, self.demux_dir, "Stats") for source in missingStatsFiles: source_file_name = os.path.basename(source) if not os.path.exists(os.path.join(destination, source_file_name)): os.symlink(source, os.path.join(destination, source_file_name)) return True else: # in this case it means that I have already started to demux the NoIndex if not os.path.exists( os.path.join(self.run_dir, "Demultiplexing_NoIndex", "Stats", "DemultiplexingStats.xml") ): # demultiplexing of undetermined is still ongoing logger.info("Demux of NoIndex lanes ongoing") return False else: logger.info("Demux of NoIndex lanes done.") # now I need to produce the files needed in the QC flag_file = os.path.join(NoIndex_Undetermiend, "ongoing") if os.path.exists(flag_file): # it means that a previous instance of TACA is running and still processing this FC logger.info("Counting of undetermined indexes for NoIndex lanes ongoing") return False # now check if the stats have been already computed computed = True for lane_id in NoIndexLanes: demuxSummary_file = os.path.join( self.run_dir, self.demux_dir, "Stats", "DemuxSummaryF1L{}.txt".format(lane_id) ) if not os.path.exists(demuxSummary_file): # if does not exists and the ongoing falg is not present, then I need to set computed to False computed = False if computed: # in this case I already computed all the demux stats that I need return True # otherwise I need to comput them open(flag_file, "a").close() # create the flag file indicating I am working on this for lane_id in NoIndexLanes: # count the index occurences, each lane corresponds to one project, a project might have multiple lanes current_lane = [lane for lane in self.runParserObj.samplesheet.data if lane_id == lane["Lane"]][0] if current_lane["index"] != "NoIndex": logger.error( "while processing run {} NoIndex lane {}, index {} found in SampleSheet".format( self.id, lane_id, current_lane["index"] ) ) return False index_counter = {} indexes_fastq1 = glob.glob( os.path.join( NoIndex_Undetermiend, current_lane[self.runParserObj.samplesheet.dfield_proj], current_lane[self.runParserObj.samplesheet.dfield_sid], "{}_S?_L00{}_R2_001.fastq.gz".format( current_lane[self.runParserObj.samplesheet.dfield_snm], lane_id ), ) )[0] indexes_fastq2 = glob.glob( os.path.join( NoIndex_Undetermiend, current_lane[self.runParserObj.samplesheet.dfield_proj], current_lane[self.runParserObj.samplesheet.dfield_sid], "{}_S?_L00{}_R3_001.fastq.gz".format( current_lane[self.runParserObj.samplesheet.dfield_snm], lane_id ), ) )[0] # I assume these two files are always present, maybe it is posisble to have no index with a single index... logger.info("Computing Undetermiend indexes for NoIndex lane {}".format(lane_id)) zcat = subprocess.Popen(["zcat", indexes_fastq1], stdout=subprocess.PIPE) # this command allows to steam two files, print them line after line separated by a plus awk = subprocess.Popen( [ "awk", 'BEGIN {{OFS="+"}}{{ ("zcat " "{0} " ) | getline line ; print $0,line }}'.format( indexes_fastq2 ), ], stdout=subprocess.PIPE, stdin=zcat.stdout, ) # now select only the 2nd line every 4 (i.e., only the index1+index2 line) sed = subprocess.Popen(["sed", "-n", "2~4p"], stdout=subprocess.PIPE, stdin=awk.stdout) zcat.stdout.close() awk.stdout.close() output = sed.communicate()[0] zcat.wait() awk.wait() for barcode in output.split("\n")[:-1]: try: index_counter[barcode] += 1 except KeyError: index_counter[barcode] = 1 demuxSummary_file = os.path.join( self.run_dir, self.demux_dir, "Stats", "DemuxSummaryF1L{}.txt".format(lane_id) ) with open(demuxSummary_file, "w") as demuxSummary_file_fh: demuxSummary_file_fh.write("### Most Popular Unknown Index Sequences\n") demuxSummary_file_fh.write("### Columns: Index_Sequence Hit_Count\n") for (index, occ) in sorted(index_counter.items(), key=operator.itemgetter(1), reverse=True): demuxSummary_file_fh.write("{}\t{}\n".format(index, occ)) # I need to fill in the lane and laneBarcode html reports when I demux with NoIndex I do not create many values undeterminedStats = DemuxSummaryParser(os.path.join(self.run_dir, self.demux_dir, "Stats")) sample_data_old = self.runParserObj.lanes.sample_data sample_data_new = [] for lane in sample_data_old: if lane["Lane"] in NoIndexLanes: # in this case I need to fill in new values PF_clusters = undeterminedStats.TOTAL[lane["Lane"]] lane["% One mismatchbarcode"] = "0" lane["% Perfectbarcode"] = "100" lane["% of thelane"] = "100" lane["PF Clusters"] = str(PF_clusters) sample_data_new.append(lane) self.runParserObj.lanes.sample_data = sample_data_new demux_folder = os.path.join(self.run_dir, "Demultiplexing") new_html_report_lane_dir = _create_folder_structure( demux_folder, ["Reports", "html", self.flowcell_id, "all", "all", "all"] ) new_html_report_lane = os.path.join(new_html_report_lane_dir, "lane.html") _generate_lane_html(new_html_report_lane, self.runParserObj.lanes) # now do the same for laneBarcode sampleBarcode_data_old = self.runParserObj.lanebarcodes.sample_data sampleBarcode_data_new = [] for sample in sampleBarcode_data_old: if sample["Lane"] in NoIndexLanes: # in this case I need to fill in new values PF_clusters = undeterminedStats.TOTAL[lane["Lane"]] sample["% One mismatchbarcode"] = "0" sample["% Perfectbarcode"] = "100" sample["% of thelane"] = "100" sample["PF Clusters"] = str(PF_clusters) sampleBarcode_data_new.append(sample) self.runParserObj.lanebarcodes.sample_data = sampleBarcode_data_new demux_folder = os.path.join(self.run_dir, "Demultiplexing") new_html_report_sampleBarcode_dir = _create_folder_structure( demux_folder, ["Reports", "html", self.flowcell_id, "all", "all", "all"] ) new_html_report_sampleBarcode = os.path.join(new_html_report_sampleBarcode_dir, "laneBarcode.html") _generate_lane_html(new_html_report_sampleBarcode, self.runParserObj.lanebarcodes) os.remove(flag_file) # remove flag file to allow future iteration on this FC return True # return true, I have done everything I was supposed to do
def demultiplex_run(self): """ Demultiplex a HiSeq run: - find the samplesheet - make a local copy of the samplesheet and name it SampleSheet.csv - create multiple SampleSheets in case at least one lane have multiple indexes lengths - run bcl2fastq conversion """ ssname = self._get_samplesheet() if ssname is None: return None ssparser = SampleSheetParser(ssname) # Copy the original samplesheet locally. Copy again if already done as there might have been changes to the samplesheet try: shutil.copy(ssname, os.path.join(self.run_dir, "{}.csv".format(self.flowcell_id))) ssname = os.path.join(self.run_dir, os.path.split(ssname)[1]) except: raise RuntimeError("unable to copy file {} to destination {}".format(ssname, self.run_dir)) # this sample sheet has been created by the LIMS and copied by a sequencing operator. It is not ready # to be used it needs some editing # this will contain the samplesheet with all the renaiming to be used with bcl2fastq-2.17 samplesheet_dest = os.path.join(self.run_dir, "SampleSheet.csv") # check that the samplesheet is not already present. In this case go the next step if os.path.exists(samplesheet_dest): logger.info("SampleSheet.csv found ... overwriting it") try: with open(samplesheet_dest, "wb") as fcd: fcd.write(self._generate_clean_samplesheet(ssparser)) except Exception as e: logger.error(e.text) return False logger.info(("Created SampleSheet.csv for Flowcell {} in {} ".format(self.id, samplesheet_dest))) ##SampleSheet.csv generated ##when demultiplexing SampleSheet.csv is the one I need to use self.runParserObj.samplesheet = SampleSheetParser(os.path.join(self.run_dir, "SampleSheet.csv")) # now geenrate the base masks per lane and decide how to demultiplex per_lane_base_masks = self._generate_per_lane_base_mask() max_different_base_masks = max([len(per_lane_base_masks[base_masks]) for base_masks in per_lane_base_masks]) # if max_different is one, then I have a simple config and I can run a single command. Otherwirse I need to run multiples instances # extract lanes with a single base masks simple_lanes = {} complex_lanes = {} for lane in per_lane_base_masks: if len(per_lane_base_masks[lane]) == 1: simple_lanes[lane] = per_lane_base_masks[lane] else: complex_lanes[lane] = per_lane_base_masks[lane] # simple lanes contains the lanes such that there is more than one base mask bcl2fastq_commands = [] bcl2fastq_command_num = 0 if len(simple_lanes) > 0: bcl2fastq_commands.append(self._generate_bcl2fastq_command(simple_lanes, True, bcl2fastq_command_num)) bcl2fastq_command_num += 1 # compute the different masks, there will be one bcl2fastq command per mask base_masks_complex = [complex_lanes[base_masks].keys() for base_masks in complex_lanes] different_masks = list(set([item for sublist in base_masks_complex for item in sublist])) for mask in different_masks: base_masks_complex_to_demux = {} for lane in complex_lanes: if complex_lanes[lane].has_key(mask): base_masks_complex_to_demux[lane] = {} base_masks_complex_to_demux[lane][mask] = complex_lanes[lane][mask] # at this point base_masks_complex_to_demux contains only a base mask for lane. I can build the command bcl2fastq_commands.append( self._generate_bcl2fastq_command(base_masks_complex_to_demux, True, bcl2fastq_command_num) ) bcl2fastq_command_num += 1 # now bcl2fastq_commands contains all command to be executed. They can be executed in parallel, however run only one per time in order to avoid to overload the machine with chdir(self.run_dir): # create Demultiplexing dir, in this way the status of this run will became IN_PROGRESS if not os.path.exists("Demultiplexing"): os.makedirs("Demultiplexing") execution = 0 for bcl2fastq_command in bcl2fastq_commands: misc.call_external_command_detached( bcl2fastq_command, with_log_files=True, prefix="demux_{}".format(execution) ) execution += 1
def demultiplex_run(self): """ Demultiplex a Xten run: - find the samplesheet - make a local copy of the samplesheet and name it SampleSheet.csv - define if necessary the bcl2fastq commands (if indexes are not of size 8, i.e. neoprep) - run bcl2fastq conversion """ ssname = self._get_samplesheet() ssparser = SampleSheetParser(ssname) #samplesheet need to be positioned in the FC directory with name SampleSheet.csv (Illumina default) #if this is not the case then create it and take special care of modification to be done on the SampleSheet samplesheet_dest = os.path.join(self.run_dir, "SampleSheet.csv") #check that the samplesheet is not already present. In this case go the next step if not os.path.exists(samplesheet_dest): try: with open(samplesheet_dest, 'wb') as fcd: fcd.write( _generate_clean_samplesheet( ssparser, fields_to_remove=['index2'], rename_samples=True, rename_qPCR_suffix=True, fields_qPCR=[ssparser.dfield_snm])) except Exception as e: logger.error(e.text) return False logger.info( ("Created SampleSheet.csv for Flowcell {} in {} ".format( self.id, samplesheet_dest))) ##SampleSheet.csv generated ##when demultiplexing SampleSheet.csv is the one I need to use self.runParserObj.samplesheet = SampleSheetParser( os.path.join(self.run_dir, "SampleSheet.csv")) per_lane_base_masks = self._generate_per_lane_base_mask() max_different_base_masks = max([ len(per_lane_base_masks[base_masks]) for base_masks in per_lane_base_masks ]) if max_different_base_masks > 1: # in a HiSeqX run I cannot have different index sizes in the SAME lane logger.error( "In FC {} found one or more lane with more than one base mask (i.e., different index sizes in \ in the same lane".format(self.id)) return False #I have everything to run demultiplexing now. logger.info('Building bcl2fastq command') with chdir(self.run_dir): cl = [self.CONFIG.get('bcl2fastq')['bin']] if self.CONFIG.get('bcl2fastq').has_key('options'): cl_options = self.CONFIG['bcl2fastq']['options'] # Append all options that appear in the configuration file to the main command. for option in cl_options: if isinstance(option, dict): opt, val = option.items()[0] cl.extend(['--{}'.format(opt), str(val)]) else: cl.append('--{}'.format(option)) #now add the base_mask for each lane for lane in sorted(per_lane_base_masks): #iterate thorugh each lane and add the correct --use-bases-mask for that lane #there is a single basemask for each lane, I checked it a couple of lines above base_mask = [ per_lane_base_masks[lane][bm]['base_mask'] for bm in per_lane_base_masks[lane] ][0] # get the base_mask base_mask_expr = "{}:".format(lane) + ",".join(base_mask) cl.extend(["--use-bases-mask", base_mask_expr]) logger.info( ("BCL to FASTQ conversion and demultiplexing started for " " run {} on {}".format(os.path.basename(self.id), datetime.now()))) misc.call_external_command_detached(cl, with_log_files=True) return True
def demultiplex_run(self): """ Demultiplex a HiSeq run: - find the samplesheet - make a local copy of the samplesheet and name it SampleSheet.csv - create multiple SampleSheets in case at least one lane have multiple indexes lengths - run bcl2fastq conversion """ #now geenrate the base masks per lane and decide how to demultiplex per_lane_base_masks = self._generate_per_lane_base_mask() max_different_base_masks = max([ len(per_lane_base_masks[base_masks]) for base_masks in per_lane_base_masks ]) #if max_different is one, then I have a simple config and I can run a single command. Otherwirse I need to run multiples instances #extract lanes with a single base masks simple_lanes = {} complex_lanes = {} for lane in per_lane_base_masks: if len(per_lane_base_masks[lane]) == 1: simple_lanes[lane] = per_lane_base_masks[lane] else: complex_lanes[lane] = per_lane_base_masks[lane] #simple lanes contains the lanes such that there is more than one base mask bcl2fastq_commands = [] bcl2fastq_command_num = 0 if len(simple_lanes) > 0: bcl2fastq_commands.append( self._generate_bcl2fastq_command(simple_lanes, True, bcl2fastq_command_num)) bcl2fastq_command_num += 1 #compute the different masks, there will be one bcl2fastq command per mask base_masks_complex = [ complex_lanes[base_masks].keys() for base_masks in complex_lanes ] different_masks = list( set([item for sublist in base_masks_complex for item in sublist])) for mask in different_masks: base_masks_complex_to_demux = {} for lane in complex_lanes: if complex_lanes[lane].has_key(mask): base_masks_complex_to_demux[lane] = {} base_masks_complex_to_demux[lane][mask] = complex_lanes[ lane][mask] #at this point base_masks_complex_to_demux contains only a base mask for lane. I can build the command bcl2fastq_commands.append( self._generate_bcl2fastq_command(base_masks_complex_to_demux, True, bcl2fastq_command_num)) bcl2fastq_command_num += 1 #now bcl2fastq_commands contains all command to be executed. They can be executed in parallel, however run only one per time in order to avoid to overload the machine with chdir(self.run_dir): # create Demultiplexing dir, in this way the status of this run will became IN_PROGRESS if not os.path.exists("Demultiplexing"): os.makedirs("Demultiplexing") execution = 0 for bcl2fastq_command in bcl2fastq_commands: misc.call_external_command_detached( bcl2fastq_command, with_log_files=True, prefix="demux_{}".format(execution)) execution += 1
def demultiplex_run(self): """ Demultiplex a Xten run: - find the samplesheet - make a local copy of the samplesheet and name it SampleSheet.csv - define if necessary the bcl2fastq commands (if indexes are not of size 8, i.e. neoprep) - run bcl2fastq conversion """ ssname = self._get_samplesheet() ssparser = SampleSheetParser(ssname) try: indexfile = self.CONFIG['bcl2fastq']['index_path'] except KeyError: logger.error( "Path to index file (10X) not found in the config file") raise RuntimeError #samplesheet need to be positioned in the FC directory with name SampleSheet.csv (Illumina default) #if this is not the case then create it and take special care of modification to be done on the SampleSheet samplesheet_dest = os.path.join(self.run_dir, "SampleSheet.csv") #Function that returns a list of which lanes contains 10X samples. (lanes_10X, lanes_not_10X) = look_for_lanes_with_10X_indicies( indexfile, ssparser) #check that the samplesheet is not already present. In this case go the next step if not os.path.exists(samplesheet_dest): try: with open(samplesheet_dest, 'wb') as fcd: fcd.write( _generate_clean_samplesheet( ssparser, indexfile, fields_to_remove=['index2'], rename_samples=True, rename_qPCR_suffix=True, fields_qPCR=[ssparser.dfield_snm])) except Exception as e: logger.error( "encountered the following exception '{}'".format(e)) return False logger.info( ("Created SampleSheet.csv for Flowcell {} in {} ".format( self.id, samplesheet_dest))) ##SampleSheet.csv generated ##when demultiplexing SampleSheet.csv is the one I need to use ## Need to rewrite so that SampleSheet_0.csv is always used. self.runParserObj.samplesheet = SampleSheetParser( os.path.join(self.run_dir, "SampleSheet.csv")) #we have 10x lane - need to split the samples sheet and build a 10x command for bcl2fastq Complex_run = False if len(lanes_10X) and len(lanes_not_10X): Complex_run = True if Complex_run: with chdir(self.run_dir): samplesheet_dest_not_10X = "SampleSheet_0.csv" with open(samplesheet_dest_not_10X, 'wb') as fcd: fcd.write( _generate_samplesheet_subset( self.runParserObj.samplesheet, lanes_not_10X)) samplesheet_dest_10X = "SampleSheet_1.csv" with open(samplesheet_dest_10X, 'wb') as fcd: fcd.write( _generate_samplesheet_subset( self.runParserObj.samplesheet, lanes_10X)) else: with chdir(self.run_dir): shutil.copy("SampleSheet.csv", "SampleSheet_0.csv") per_lane_base_masks = self._generate_per_lane_base_mask() max_different_base_masks = max([ len(per_lane_base_masks[base_masks]) for base_masks in per_lane_base_masks ]) if max_different_base_masks > 1: # in a HiSeqX run I cannot have different index sizes in the SAME lane logger.error( "In FC {} found one or more lane with more than one base mask (i.e., different index sizes in \ in the same lane".format(self.id)) return False bcl2fastq_cmd_counter = 0 with chdir(self.run_dir): # create Demultiplexing dir, this changes the status to IN_PROGRESS if not os.path.exists("Demultiplexing"): os.makedirs("Demultiplexing") with chdir(self.run_dir): if lanes_not_10X: cmd_normal = self.generate_bcl_command(lanes_not_10X, bcl2fastq_cmd_counter) misc.call_external_command_detached( cmd_normal, with_log_files=True, prefix="demux_{}".format(bcl2fastq_cmd_counter)) logger.info( ("BCL to FASTQ conversion and demultiplexing started for " "normal run {} on {}".format(os.path.basename(self.id), datetime.now()))) bcl2fastq_cmd_counter += 1 if lanes_10X: cmd_10X = self.generate_bcl_command(lanes_10X, bcl2fastq_cmd_counter, is_10X=True) misc.call_external_command_detached( cmd_10X, with_log_files=True, prefix="demux_{}".format(bcl2fastq_cmd_counter)) logger.info( ("BCL to FASTQ conversion and demultiplexing started for " "10X run {} on {}".format(os.path.basename(self.id), datetime.now()))) bcl2fastq_cmd_counter += 1 return True
def compute_undetermined(self): """ This function returns true if all demux steps are done and we can proceed to QC For simple lanes with index: no check is done everything needs to be in place for complex lanes: no check is done everything needs to be in place for simple lanes and NoIndex: check if demux counts have been computed, if not compute or return waiting for thir completion """ NoIndexLanes = [ lane["Lane"] for lane in self.runParserObj.samplesheet.data if "NoIndex" in lane["index"] ] if len(NoIndexLanes) == 0: return True # everything is fine I can proceed to QC #otherwise proceed NoIndex_Undetermiend = os.path.join(self.run_dir, "Demultiplexing_NoIndex") if not os.path.exists(NoIndex_Undetermiend): #for these lanes I have no undetermiend as I demux them without index. #now geenrate the base masks per lane per_lane_base_masks = self._generate_per_lane_base_mask() #store here only the NoIndex lanes per_lane_base_masks_NoIndex = {} run_with_no_index = False # use this flag to check that we are not in the C.Daub case for NoIndexLane in NoIndexLanes: per_lane_base_masks_NoIndex[NoIndexLane] = per_lane_base_masks[ NoIndexLane] base_mask_key = per_lane_base_masks[NoIndexLane].keys()[0] new_base_mask = [] if len(per_lane_base_masks_NoIndex[NoIndexLane][base_mask_key] ['base_mask']): #C.Daub_15_01 case, only one sample per lane and no index at all run_with_no_index = True else: for baseMask_element in per_lane_base_masks_NoIndex[ NoIndexLane][base_mask_key]['base_mask']: if baseMask_element.startswith("Y"): new_base_mask.append( baseMask_element.replace("Y", "N")) elif baseMask_element.startswith("N"): new_base_mask.append( baseMask_element.replace("N", "Y")) per_lane_base_masks_NoIndex[NoIndexLane][base_mask_key][ 'base_mask'] = new_base_mask if not run_with_no_index: os.makedirs(NoIndex_Undetermiend) command = self._generate_bcl2fastq_command( per_lane_base_masks_NoIndex, True, "NoIndex", mask_short_adapter_reads=True) with chdir(self.run_dir): misc.call_external_command_detached(command, with_log_files=True, prefix="demux_NoIndex") #return false, as I need to wait to finish the demux for the NoIndex case return False else: #in this case I do not want to start a demux for th eindex, beceause I do not have the index at all #I need to softlink everythin else that is in Stats as I do not want to recompute it missingStatsFiles = glob.glob( os.path.join(self.run_dir, "Demultiplexing_0", "Stats", "*F*L*.txt")) destination = os.path.join(self.run_dir, self.demux_dir, "Stats") for source in missingStatsFiles: source_file_name = os.path.basename(source) if not os.path.exists( os.path.join(destination, source_file_name)): os.symlink(source, os.path.join(destination, source_file_name)) return True else: #in this case it means that I have already started to demux the NoIndex if not os.path.exists( os.path.join(self.run_dir, "Demultiplexing_NoIndex", 'Stats', 'DemultiplexingStats.xml')): #demultiplexing of undetermined is still ongoing logger.info("Demux of NoIndex lanes ongoing") return False else: logger.info("Demux of NoIndex lanes done.") #now I need to produce the files needed in the QC flag_file = os.path.join(NoIndex_Undetermiend, "ongoing") if os.path.exists(flag_file): #it means that a previous instance of TACA is running and still processing this FC logger.info( "Counting of undetermined indexes for NoIndex lanes ongoing" ) return False #now check if the stats have been already computed computed = True for lane_id in NoIndexLanes: demuxSummary_file = os.path.join( self.run_dir, self.demux_dir, "Stats", "DemuxSummaryF1L{}.txt".format(lane_id)) if not os.path.exists(demuxSummary_file): #if does not exists and the ongoing falg is not present, then I need to set computed to False computed = False if computed: #in this case I already computed all the demux stats that I need return True #otherwise I need to comput them open(flag_file, 'a').close( ) # create the flag file indicating I am working on this for lane_id in NoIndexLanes: #count the index occurences, each lane corresponds to one project, a project might have multiple lanes current_lane = [ lane for lane in self.runParserObj.samplesheet.data if lane_id == lane["Lane"] ][0] if current_lane["index"] != "NoIndex": logger.error( "while processing run {} NoIndex lane {}, index {} found in SampleSheet" .format(self.id, lane_id, current_lane["index"])) return False index_counter = {} indexes_fastq1 = glob.glob( os.path.join( NoIndex_Undetermiend, current_lane[ self.runParserObj.samplesheet.dfield_proj], current_lane[ self.runParserObj.samplesheet.dfield_sid], "{}_S?_L00{}_R2_001.fastq.gz".format( current_lane[ self.runParserObj.samplesheet.dfield_snm], lane_id)))[0] indexes_fastq2 = glob.glob( os.path.join( NoIndex_Undetermiend, current_lane[ self.runParserObj.samplesheet.dfield_proj], current_lane[ self.runParserObj.samplesheet.dfield_sid], "{}_S?_L00{}_R3_001.fastq.gz".format( current_lane[ self.runParserObj.samplesheet.dfield_snm], lane_id)))[0] # I assume these two files are always present, maybe it is posisble to have no index with a single index... logger.info( "Computing Undetermiend indexes for NoIndex lane {}". format(lane_id)) zcat = subprocess.Popen(['zcat', indexes_fastq1], stdout=subprocess.PIPE) #this command allows to steam two files, print them line after line separated by a plus awk = subprocess.Popen([ 'awk', 'BEGIN {{OFS="+"}}{{ ("zcat " "{0} " ) | getline line ; print $0,line }}' .format(indexes_fastq2) ], stdout=subprocess.PIPE, stdin=zcat.stdout) #now select only the 2nd line every 4 (i.e., only the index1+index2 line) sed = subprocess.Popen(['sed', '-n', "2~4p"], stdout=subprocess.PIPE, stdin=awk.stdout) zcat.stdout.close() awk.stdout.close() output = sed.communicate()[0] zcat.wait() awk.wait() for barcode in output.split('\n')[:-1]: try: index_counter[barcode] += 1 except KeyError: index_counter[barcode] = 1 demuxSummary_file = os.path.join( self.run_dir, self.demux_dir, "Stats", "DemuxSummaryF1L{}.txt".format(lane_id)) with open(demuxSummary_file, 'w') as demuxSummary_file_fh: demuxSummary_file_fh.write( "### Most Popular Unknown Index Sequences\n") demuxSummary_file_fh.write( "### Columns: Index_Sequence Hit_Count\n") for (index, occ) in sorted(index_counter.items(), key=operator.itemgetter(1), reverse=True): demuxSummary_file_fh.write("{}\t{}\n".format( index, occ)) #I need to fill in the lane and laneBarcode html reports when I demux with NoIndex I do not create many values undeterminedStats = DemuxSummaryParser( os.path.join(self.run_dir, self.demux_dir, "Stats")) sample_data_old = self.runParserObj.lanes.sample_data sample_data_new = [] for lane in sample_data_old: if lane["Lane"] in NoIndexLanes: #in this case I need to fill in new values PF_clusters = undeterminedStats.TOTAL[lane["Lane"]] lane["% One mismatchbarcode"] = '0' lane["% Perfectbarcode"] = '100' lane["% of thelane"] = '100' lane["PF Clusters"] = str(PF_clusters) sample_data_new.append(lane) self.runParserObj.lanes.sample_data = sample_data_new demux_folder = os.path.join(self.run_dir, "Demultiplexing") new_html_report_lane_dir = _create_folder_structure( demux_folder, ["Reports", "html", self.flowcell_id, "all", "all", "all"]) new_html_report_lane = os.path.join(new_html_report_lane_dir, "lane.html") _generate_lane_html(new_html_report_lane, self.runParserObj.lanes) #now do the same for laneBarcode sampleBarcode_data_old = self.runParserObj.lanebarcodes.sample_data sampleBarcode_data_new = [] for sample in sampleBarcode_data_old: if sample["Lane"] in NoIndexLanes: #in this case I need to fill in new values PF_clusters = undeterminedStats.TOTAL[lane["Lane"]] sample["% One mismatchbarcode"] = '0' sample["% Perfectbarcode"] = '100' sample["% of thelane"] = '100' sample["PF Clusters"] = str(PF_clusters) sampleBarcode_data_new.append(sample) self.runParserObj.lanebarcodes.sample_data = sampleBarcode_data_new demux_folder = os.path.join(self.run_dir, "Demultiplexing") new_html_report_sampleBarcode_dir = _create_folder_structure( demux_folder, ["Reports", "html", self.flowcell_id, "all", "all", "all"]) new_html_report_sampleBarcode = os.path.join( new_html_report_sampleBarcode_dir, "laneBarcode.html") _generate_lane_html(new_html_report_sampleBarcode, self.runParserObj.lanebarcodes) os.remove( flag_file ) # remove flag file to allow future iteration on this FC return True #return true, I have done everything I was supposed to do
def demultiplex_run(self): """ Demultiplex a HiSeq run: - find the samplesheet - make a local copy of the samplesheet and name it SampleSheet.csv - create multiple SampleSheets in case at least one lane have multiple indexes lengths - run bcl2fastq conversion """ ssname = self._get_samplesheet() if ssname is None: return None ssparser = SampleSheetParser(ssname) #Copy the original samplesheet locally. Copy again if already done as there might have been changes to the samplesheet try: shutil.copy( ssname, os.path.join(self.run_dir, "{}.csv".format(self.flowcell_id))) ssname = os.path.join(self.run_dir, os.path.split(ssname)[1]) except: raise RuntimeError( "unable to copy file {} to destination {}".format( ssname, self.run_dir)) #this sample sheet has been created by the LIMS and copied by a sequencing operator. It is not ready #to be used it needs some editing #this will contain the samplesheet with all the renaiming to be used with bcl2fastq-2.17 samplesheet_dest = os.path.join(self.run_dir, "SampleSheet.csv") #check that the samplesheet is not already present. In this case go the next step if os.path.exists(samplesheet_dest): logger.info("SampleSheet.csv found ... overwriting it") try: with open(samplesheet_dest, 'wb') as fcd: fcd.write(self._generate_clean_samplesheet(ssparser)) except Exception as e: logger.error(e.text) return False logger.info(("Created SampleSheet.csv for Flowcell {} in {} ".format( self.id, samplesheet_dest))) ##SampleSheet.csv generated ##when demultiplexing SampleSheet.csv is the one I need to use self.runParserObj.samplesheet = SampleSheetParser( os.path.join(self.run_dir, "SampleSheet.csv")) #now geenrate the base masks per lane and decide how to demultiplex per_lane_base_masks = self._generate_per_lane_base_mask() max_different_base_masks = max([ len(per_lane_base_masks[base_masks]) for base_masks in per_lane_base_masks ]) #if max_different is one, then I have a simple config and I can run a single command. Otherwirse I need to run multiples instances #extract lanes with a single base masks simple_lanes = {} complex_lanes = {} for lane in per_lane_base_masks: if len(per_lane_base_masks[lane]) == 1: simple_lanes[lane] = per_lane_base_masks[lane] else: complex_lanes[lane] = per_lane_base_masks[lane] #simple lanes contains the lanes such that there is more than one base mask bcl2fastq_commands = [] bcl2fastq_command_num = 0 if len(simple_lanes) > 0: bcl2fastq_commands.append( self._generate_bcl2fastq_command(simple_lanes, True, bcl2fastq_command_num)) bcl2fastq_command_num += 1 #compute the different masks, there will be one bcl2fastq command per mask base_masks_complex = [ complex_lanes[base_masks].keys() for base_masks in complex_lanes ] different_masks = list( set([item for sublist in base_masks_complex for item in sublist])) for mask in different_masks: base_masks_complex_to_demux = {} for lane in complex_lanes: if complex_lanes[lane].has_key(mask): base_masks_complex_to_demux[lane] = {} base_masks_complex_to_demux[lane][mask] = complex_lanes[ lane][mask] #at this point base_masks_complex_to_demux contains only a base mask for lane. I can build the command bcl2fastq_commands.append( self._generate_bcl2fastq_command(base_masks_complex_to_demux, True, bcl2fastq_command_num)) bcl2fastq_command_num += 1 #now bcl2fastq_commands contains all command to be executed. They can be executed in parallel, however run only one per time in order to avoid to overload the machine with chdir(self.run_dir): # create Demultiplexing dir, in this way the status of this run will became IN_PROGRESS if not os.path.exists("Demultiplexing"): os.makedirs("Demultiplexing") execution = 0 for bcl2fastq_command in bcl2fastq_commands: misc.call_external_command_detached( bcl2fastq_command, with_log_files=True, prefix="demux_{}".format(execution)) execution += 1
def demultiplex_run(self): """ Demultiplex a Xten run: - find the samplesheet - make a local copy of the samplesheet and name it SampleSheet.csv - define if necessary the bcl2fastq commands (if indexes are not of size 8, i.e. neoprep) - run bcl2fastq conversion """ #we have 10x lane - need to split the samples sheet and build a 10x command for bcl2fastq Complex_run = False if len(self.lanes_10X) and len(self.lanes_not_10X): Complex_run = True if Complex_run: with chdir(self.run_dir): samplesheet_dest_not_10X = "SampleSheet_0.csv" with open(samplesheet_dest_not_10X, 'wb') as fcd: fcd.write( _generate_samplesheet_subset( self.runParserObj.samplesheet, self.lanes_not_10X)) samplesheet_dest_10X = "SampleSheet_1.csv" with open(samplesheet_dest_10X, 'wb') as fcd: fcd.write( _generate_samplesheet_subset( self.runParserObj.samplesheet, self.lanes_10X)) else: with chdir(self.run_dir): samplesheet_dest = "SampleSheet_0.csv" with open(samplesheet_dest, 'wb') as fcd: fcd.write( _generate_samplesheet_subset( self.runParserObj.samplesheet, (self.lanes_10X or self.lanes_not_10X))) per_lane_base_masks = self._generate_per_lane_base_mask() max_different_base_masks = max([ len(per_lane_base_masks[base_masks]) for base_masks in per_lane_base_masks ]) if max_different_base_masks > 1: # in a HiSeqX run I cannot have different index sizes in the SAME lane logger.error( "In FC {} found one or more lane with more than one base mask (i.e., different index sizes in \ in the same lane".format(self.id)) return False bcl2fastq_cmd_counter = 0 with chdir(self.run_dir): # create Demultiplexing dir, this changes the status to IN_PROGRESS if not os.path.exists("Demultiplexing"): os.makedirs("Demultiplexing") with chdir(self.run_dir): if self.lanes_not_10X: cmd_normal = self.generate_bcl_command(self.lanes_not_10X, bcl2fastq_cmd_counter) misc.call_external_command_detached( cmd_normal, with_log_files=True, prefix="demux_{}".format(bcl2fastq_cmd_counter)) logger.info( ("BCL to FASTQ conversion and demultiplexing started for " "normal run {} on {}".format(os.path.basename(self.id), datetime.now()))) bcl2fastq_cmd_counter += 1 if self.lanes_10X: cmd_10X = self.generate_bcl_command(self.lanes_10X, bcl2fastq_cmd_counter, is_10X=True) misc.call_external_command_detached( cmd_10X, with_log_files=True, prefix="demux_{}".format(bcl2fastq_cmd_counter)) logger.info( ("BCL to FASTQ conversion and demultiplexing started for " "10X run {} on {}".format(os.path.basename(self.id), datetime.now()))) bcl2fastq_cmd_counter += 1 return True