def __init__(self,sample_sheet_file): """ Create a new SampleSheetBarcodes instance Arguments: sample_sheet_file (str): path of a SampleSheet.csv file """ self._sample_sheet = SampleSheet(sample_sheet_file) self._sample_lookup = {} self._barcode_lookup = {} self._lanes = [] sample_id = self._sample_sheet.sample_id_column for line in self._sample_sheet.data: if self._sample_sheet.has_lanes: lane = line['Lane'] else: lane = None if lane not in self._lanes: self._lanes.append(lane) self._sample_lookup[lane] = {} self._barcode_lookup[lane] = {} sample = line[sample_id] index_seq = normalise_barcode(samplesheet_index_sequence(line)) self._sample_lookup[lane][index_seq] = sample self._barcode_lookup[lane][sample] = index_seq
def __init__(self, sample_sheet_file): """ Create a new SampleSheetBarcodes instance Arguments: sample_sheet_file (str): path of a SampleSheet.csv file """ self._sample_sheet = SampleSheet(sample_sheet_file) self._sample_lookup = {} self._barcode_lookup = {} self._lanes = [] sample_id = self._sample_sheet.sample_id_column for line in self._sample_sheet.data: if self._sample_sheet.has_lanes: lane = line['Lane'] else: lane = None if lane not in self._lanes: self._lanes.append(lane) self._sample_lookup[lane] = {} self._barcode_lookup[lane] = {} sample = line[sample_id] index_seq = samplesheet_index_sequence(line) if index_seq is not None: index_seq = index_seq.replace('-', '+') else: index_seq = "" self._sample_lookup[lane][index_seq] = sample self._barcode_lookup[lane][sample] = index_seq
def _check_sample_sheet_indexes(self,sample_sheet_file): """ Check that empty indexes are correctly specified in samplesheet """ # Split sample sheet into sub-sheets by lane sample_sheet = SampleSheet(sample_sheet_file) if sample_sheet.has_lanes: lanes = list(set([line['Lane'] for line in sample_sheet])) sample_sheet = [make_custom_sample_sheet(sample_sheet_file, lanes=(i,)) for i in lanes] else: sample_sheet = [sample_sheet] # Check for empty indexes in each lane for s in sample_sheet: for line in s: if not samplesheet_index_sequence(line): # Lane contains an empty index # Only valid if this is the only line if len(s.data) > 1: if s.has_lanes: raise Exception("Invalid sample sheet: " "empty index must be the " "only line for this lane") else: raise Exception("Invalid sample sheet: " "empty index must be the " "only line")
def get_bases_mask_icell8(bases_mask,sample_sheet=None): """ Reset the supplied bases mask string so that only the bases containing the inline barcode and UMIs are kept, and any remaining bases are ignored. If a sample sheet is also supplied then an additional update will be made to ensure that the bases mask respects the barcode lengths given there. Arguments: bases_mask (str): initial bases mask string to update sample_sheet (str): path to optional sample sheet Returns: String: updated bases mask string """ # Extract R1 mask bases_mask = bases_mask.split(',') r1_mask = bases_mask[0] # Update to restrict to 21 bases num_cycles = int(r1_mask[1:]) icell8_inline_length = (INLINE_BARCODE_LENGTH + UMI_LENGTH) assert(num_cycles >= icell8_inline_length) discard_length = (num_cycles - icell8_inline_length) r1_mask = "y%d" % icell8_inline_length r1_mask += ("n%d" % discard_length if discard_length > 0 else "") bases_mask[0] = r1_mask # Rebuild full bases mask bases_mask = ','.join(bases_mask) # Handle sample sheet if sample_sheet is not None: index_seq = samplesheet_index_sequence( SampleSheet(sample_sheet).data[0]) if index_seq is None: index_seq = "" bases_mask = fix_bases_mask(bases_mask,index_seq) return bases_mask
lanes=(lane, )) else: s = make_custom_sample_sheet(sample_sheet, fp.name) if has_chromium_sc_indices(fp.name): logging.warning("Lane %s has 10xGenomics Chromium " "indices in sample sheet; not " "matching against samplesheet for " "this lane" % lane) continue # If mismatches not set then determine from # the barcode lengths in the temporary # samplesheet if mismatches is None: barcode_length = None for line in s: length = len(samplesheet_index_sequence(line)) if barcode_length is None: barcode_length = length elif length != barcode_length: logging.error("Lane %s has a mixture of " "barcode lengths" % lane) barcode_length = min(barcode_length, length) if barcode_length >= 6: mismatches = 1 else: mismatches = 0 # Check for collisions while mismatches and check_barcode_collisions( fp.name, mismatches): mismatches = mismatches - 1 # Check mismatches
# Don't pass the sample sheet to the reporter # for lanes with 10x indices use_sample_sheet = None logging.warning("Lane %s has 10xGenomics-style " "indices in sample sheet; not " "matching against samplesheet for " "this lane" % lane) else: # If mismatches not set then determine from # the barcode lengths in the temporary # samplesheet if mismatches is None: barcode_length = None for line in s: index_sequence = \ samplesheet_index_sequence(line) if index_sequence is None: # Empty barcode sequence in # samplesheet length = 0 else: length = len(index_sequence) if barcode_length is None: barcode_length = length elif length != barcode_length: logging.error("Lane %s has a mixture " "of barcode lengths" % lane) barcode_length = min( barcode_length, length) if barcode_length >= 6: mismatches = 1