def calculate(self): """ Counts the barcodes using :py:meth:`BarcodeSeqLib.count` and combines them into variant counts using the :py:class:`BarcodeMap`. """ BarcodeSeqLib.calculate(self) # count the barcodes self.df_dict['variants'] = dict() logging.info( "Converting barcodes to variants [{name}]".format(name=self.name)) if self.filter_unmapped: map_mask = self.df_dict['barcodes'].index.isin(self.barcode_map) self.df_dict['barcodes_unmapped'] = self.df_dict['barcodes'][ -map_mask] self.df_dict['barcodes'] = self.df_dict['barcodes'][map_mask] del map_mask logging.info( "Writing counts for {n} unique unmapped barcodes to disk [{name}]" .format(n=len(self.df_dict['barcodes_unmapped']), name=self.name)) self.dump_data(keys=['barcodes_unmapped']) # save memory # count variants associated with the barcodes for bc, count in self.df_dict['barcodes'].iterrows(): count = count['count'] variant = self.barcode_map[bc] mutations = self.count_variant(variant, copies=count) if mutations is None: # variant has too many mutations self.filter_stats['max mutations'] += count self.filter_stats['total'] += count if self.report_filtered: self.report_filtered_variant(variant, count) if bc not in self.barcode_map.bc_variant_strings: self.barcode_map.bc_variant_strings[bc] = FILTERED_VARIANT else: if mutations not in self.barcode_map.variants: self.barcode_map.variants[mutations] = set() self.barcode_map.variants[mutations].update([bc]) self.barcode_map.bc_variant_strings[bc] = mutations self.df_dict['variants'] = \ pd.DataFrame.from_dict(self.df_dict['variants'], orient="index", dtype="int32") if len(self.df_dict['variants']) == 0: raise EnrichError("Failed to count variants", self.name) self.df_dict['variants'].columns = ['count'] self.df_dict['variants'].sort('count', ascending=False, inplace=True) logging.info( "Retained counts for {n} variants ({u} unique) [{name}]".format( n=self.df_dict['variants']['count'].sum(), u=len(self.df_dict['variants'].index), name=self.name)) if self.aligner is not None: logging.info("Aligned {n} variants [{name}]".format( n=self.aligner.calls, name=self.name)) self.aligner_cache = None self.report_filter_stats()
def calculate(self): """ Counts the barcodes using :py:meth:`BarcodeSeqLib.count` and combines them into variant counts using the :py:class:`BarcodeMap`. """ BarcodeSeqLib.calculate(self) # count the barcodes self.counts['variants'] = dict() if self.filter_unmapped: map_mask = self.counts['barcodes'].index.isin(self.barcode_map) self.counts['barcodes_unmapped'] = self.counts['barcodes'][ -map_mask] self.counts['barcodes'] = self.counts['barcodes'][map_mask] del map_mask # count variants associated with the barcodes for bc, count in self.counts['barcodes'].iterrows(): count = count['count'] variant = self.barcode_map[bc] mutations = self.count_variant(variant, copies=count) if mutations is None: # variant has too many mutations self.filter_stats['max mutations'] += count self.filter_stats['total'] += count if self.verbose: self.report_filtered_variant(variant, count) else: if mutations not in self.barcode_map.variants: self.barcode_map.variants[mutations] = list() if bc not in self.barcode_map.variants[mutations]: self.barcode_map.variants[mutations].append(bc) self.counts['variants'] = \ pd.DataFrame.from_dict(self.counts['variants'], orient="index", dtype="int32") if len(self.counts['variants']) == 0: raise EnrichError("Failed to count variants", self.name) self.counts['variants'].columns = ['count'] logging.info("Counted %d variants (%d unique) [%s]" % \ (self.counts['variants']['count'].sum(), len(self.counts['variants'].index), self.name)) if self.aligner is not None: logging.info("Aligned %d variants [%s]" % (self.aligner.calls, self.name)) self.report_filter_stats()
def calculate(self): """ Counts the barcodes using :py:meth:`BarcodeSeqLib.count` and combines them into variant counts using the :py:class:`BarcodeMap`. """ BarcodeSeqLib.calculate(self) # count the barcodes self.counts['variants'] = dict() if self.filter_unmapped: map_mask = self.counts['barcodes'].index.isin(self.barcode_map) self.counts['barcodes_unmapped'] = self.counts['barcodes'][-map_mask] self.counts['barcodes'] = self.counts['barcodes'][map_mask] del map_mask # count variants associated with the barcodes for bc, count in self.counts['barcodes'].iterrows(): count = count['count'] variant = self.barcode_map[bc] mutations = self.count_variant(variant, copies=count) if mutations is None: # variant has too many mutations self.filter_stats['max mutations'] += count self.filter_stats['total'] += count if self.verbose: self.report_filtered_variant(variant, count) else: if mutations not in self.barcode_map.variants: self.barcode_map.variants[mutations] = list() if bc not in self.barcode_map.variants[mutations]: self.barcode_map.variants[mutations].append(bc) self.counts['variants'] = \ pd.DataFrame.from_dict(self.counts['variants'], orient="index", dtype="int32") if len(self.counts['variants']) == 0: raise EnrichError("Failed to count variants", self.name) self.counts['variants'].columns = ['count'] logging.info("Counted %d variants (%d unique) [%s]" % \ (self.counts['variants']['count'].sum(), len(self.counts['variants'].index), self.name)) if self.aligner is not None: logging.info("Aligned %d variants [%s]" % (self.aligner.calls, self.name)) self.report_filter_stats()