class ICell8WellList: """ Class representing an ICELL8 well list file The file is tab-delimited and consists of an uncommented header line which lists the fields ('Row','Col','Candidate',...), followed by lines of data. The key columns are 'Sample' (gives the cell type) and 'Barcode' (the inline barcode sequence). """ def __init__(self,well_list_file): """ Create a new ICell8WellList instance. Arguments: well_list_file (str): path to the well list file. """ self._data = TabFile(filen=well_list_file, first_line_is_header=True) def barcodes(self): """ Return a list of barcodes """ return [x['Barcode'] for x in self._data] def samples(self): """ Return a list of samples """ samples = set([x['Sample'] for x in self._data]) return sorted(list(samples)) def sample(self,barcode): """ Return sample (=cell type) corresponding to barcode """ samples = self._data.lookup('Barcode',barcode) try: return samples[0]['Sample'] except IndexError: raise KeyError("Failed to locate sample for '%s'" % barcode)
# Deal with 'unassigned' reads if args.unassigned: # Count reads for barcodes not in list unassigned_reads = 0 unassigned_umis = set() if well_list is not None: expected_barcodes = well_list.barcodes() else: expected_barcodes = [l['Barcode'] for l in stats_data] for barcode in stats.barcodes(): if barcode not in expected_barcodes: unassigned_reads += stats.nreads(barcode=barcode) unassigned_umis.update( stats.distinct_umis(barcode=barcode)) # Check if 'unassigned' is already in stats file unassigned = stats_data.lookup('Barcode', 'Unassigned') try: data_line = unassigned[0] except IndexError: # Append the line data_line = stats_data.append() data_line['Barcode'] = 'Unassigned' data_line[nreads_col] = unassigned_reads data_line[umis_col] = len(unassigned_umis) # Write to file stats_data.write(filen=stats_file, include_header=True) # Report summary print "#barcodes : %s" % len(stats.barcodes()) print "#reads : %s" % stats.nreads() print "[%s] ICell8 stats completed" % time.strftime("%Y/%m/%d-%H:%M:%S")