def __init__(self, config, barcode_map=None): VariantSeqLib.__init__(self, config) BarcodeSeqLib.__init__(self, config, barcodevariant=True) try: if 'map file' in config['barcodes']: self.barcode_map = BarcodeMap(config['barcodes']['map file']) else: self.barcode_map = None self.set_filters(config['filters'], {'min quality' : 0, 'avg quality' : 0, 'chastity' : False, 'max mutations' : len(self.wt_dna)}) except KeyError as key: raise EnrichError("Missing required config value %s" % key, self.name) if self.barcode_map is None: # not in local config if barcode_map is None: # not provided on object creation raise EnrichError("Barcode map not specified", self.name) else: self.barcode_map = barcode_map self.counts['barcodes_unmapped'] = None self.filter_unmapped = True
def __init__(self, config, barcode_map=None): VariantSeqLib.__init__(self, config) BarcodeSeqLib.__init__(self, config, barcodevariant=True) try: if 'map file' in config['barcodes']: self.barcode_map = BarcodeMap(config['barcodes']['map file']) else: self.barcode_map = None self.set_filters( config['filters'], { 'min quality': 0, 'avg quality': 0, 'chastity': False, 'max mutations': len(self.wt_dna) }) except KeyError as key: raise EnrichError("Missing required config value %s" % key, self.name) if self.barcode_map is None: # not in local config if barcode_map is None: # not provided on object creation raise EnrichError("Barcode map not specified", self.name) else: self.barcode_map = barcode_map self.counts['barcodes_unmapped'] = None self.filter_unmapped = True
def calculate(self): """ Counts the barcodes using :py:meth:`BarcodeSeqLib.count` and combines them into variant counts using the :py:class:`BarcodeMap`. """ BarcodeSeqLib.calculate(self) # count the barcodes self.df_dict['variants'] = dict() logging.info( "Converting barcodes to variants [{name}]".format(name=self.name)) if self.filter_unmapped: map_mask = self.df_dict['barcodes'].index.isin(self.barcode_map) self.df_dict['barcodes_unmapped'] = self.df_dict['barcodes'][ -map_mask] self.df_dict['barcodes'] = self.df_dict['barcodes'][map_mask] del map_mask logging.info( "Writing counts for {n} unique unmapped barcodes to disk [{name}]" .format(n=len(self.df_dict['barcodes_unmapped']), name=self.name)) self.dump_data(keys=['barcodes_unmapped']) # save memory # count variants associated with the barcodes for bc, count in self.df_dict['barcodes'].iterrows(): count = count['count'] variant = self.barcode_map[bc] mutations = self.count_variant(variant, copies=count) if mutations is None: # variant has too many mutations self.filter_stats['max mutations'] += count self.filter_stats['total'] += count if self.report_filtered: self.report_filtered_variant(variant, count) if bc not in self.barcode_map.bc_variant_strings: self.barcode_map.bc_variant_strings[bc] = FILTERED_VARIANT else: if mutations not in self.barcode_map.variants: self.barcode_map.variants[mutations] = set() self.barcode_map.variants[mutations].update([bc]) self.barcode_map.bc_variant_strings[bc] = mutations self.df_dict['variants'] = \ pd.DataFrame.from_dict(self.df_dict['variants'], orient="index", dtype="int32") if len(self.df_dict['variants']) == 0: raise EnrichError("Failed to count variants", self.name) self.df_dict['variants'].columns = ['count'] self.df_dict['variants'].sort('count', ascending=False, inplace=True) logging.info( "Retained counts for {n} variants ({u} unique) [{name}]".format( n=self.df_dict['variants']['count'].sum(), u=len(self.df_dict['variants'].index), name=self.name)) if self.aligner is not None: logging.info("Aligned {n} variants [{name}]".format( n=self.aligner.calls, name=self.name)) self.aligner_cache = None self.report_filter_stats()
def calculate(self): """ Counts the barcodes using :py:meth:`BarcodeSeqLib.count` and combines them into variant counts using the :py:class:`BarcodeMap`. """ BarcodeSeqLib.calculate(self) # count the barcodes self.counts['variants'] = dict() if self.filter_unmapped: map_mask = self.counts['barcodes'].index.isin(self.barcode_map) self.counts['barcodes_unmapped'] = self.counts['barcodes'][ -map_mask] self.counts['barcodes'] = self.counts['barcodes'][map_mask] del map_mask # count variants associated with the barcodes for bc, count in self.counts['barcodes'].iterrows(): count = count['count'] variant = self.barcode_map[bc] mutations = self.count_variant(variant, copies=count) if mutations is None: # variant has too many mutations self.filter_stats['max mutations'] += count self.filter_stats['total'] += count if self.verbose: self.report_filtered_variant(variant, count) else: if mutations not in self.barcode_map.variants: self.barcode_map.variants[mutations] = list() if bc not in self.barcode_map.variants[mutations]: self.barcode_map.variants[mutations].append(bc) self.counts['variants'] = \ pd.DataFrame.from_dict(self.counts['variants'], orient="index", dtype="int32") if len(self.counts['variants']) == 0: raise EnrichError("Failed to count variants", self.name) self.counts['variants'].columns = ['count'] logging.info("Counted %d variants (%d unique) [%s]" % \ (self.counts['variants']['count'].sum(), len(self.counts['variants'].index), self.name)) if self.aligner is not None: logging.info("Aligned %d variants [%s]" % (self.aligner.calls, self.name)) self.report_filter_stats()
def calculate(self): """ Counts the barcodes using :py:meth:`BarcodeSeqLib.count` and combines them into variant counts using the :py:class:`BarcodeMap`. """ BarcodeSeqLib.calculate(self) # count the barcodes self.counts['variants'] = dict() if self.filter_unmapped: map_mask = self.counts['barcodes'].index.isin(self.barcode_map) self.counts['barcodes_unmapped'] = self.counts['barcodes'][-map_mask] self.counts['barcodes'] = self.counts['barcodes'][map_mask] del map_mask # count variants associated with the barcodes for bc, count in self.counts['barcodes'].iterrows(): count = count['count'] variant = self.barcode_map[bc] mutations = self.count_variant(variant, copies=count) if mutations is None: # variant has too many mutations self.filter_stats['max mutations'] += count self.filter_stats['total'] += count if self.verbose: self.report_filtered_variant(variant, count) else: if mutations not in self.barcode_map.variants: self.barcode_map.variants[mutations] = list() if bc not in self.barcode_map.variants[mutations]: self.barcode_map.variants[mutations].append(bc) self.counts['variants'] = \ pd.DataFrame.from_dict(self.counts['variants'], orient="index", dtype="int32") if len(self.counts['variants']) == 0: raise EnrichError("Failed to count variants", self.name) self.counts['variants'].columns = ['count'] logging.info("Counted %d variants (%d unique) [%s]" % \ (self.counts['variants']['count'].sum(), len(self.counts['variants'].index), self.name)) if self.aligner is not None: logging.info("Aligned %d variants [%s]" % (self.aligner.calls, self.name)) self.report_filter_stats()