Esempio n. 1
0
    def __init__(self, config, barcode_map=None):
        VariantSeqLib.__init__(self, config)
        BarcodeSeqLib.__init__(self, config, barcodevariant=True)
        try:
            if 'map file' in config['barcodes']:
                self.barcode_map = BarcodeMap(config['barcodes']['map file'])
            else:
                self.barcode_map = None

            self.set_filters(config['filters'], {'min quality' : 0,
                                      'avg quality' : 0,
                                      'chastity' : False,
                                      'max mutations' : len(self.wt_dna)})
        except KeyError as key:
            raise EnrichError("Missing required config value %s" % key, 
                              self.name)

        if self.barcode_map is None: # not in local config
            if barcode_map is None:  # not provided on object creation
                raise EnrichError("Barcode map not specified", self.name)
            else:
                self.barcode_map = barcode_map

        self.counts['barcodes_unmapped'] = None
        self.filter_unmapped = True
Esempio n. 2
0
    def __init__(self, config, barcode_map=None):
        VariantSeqLib.__init__(self, config)
        BarcodeSeqLib.__init__(self, config, barcodevariant=True)
        try:
            if 'map file' in config['barcodes']:
                self.barcode_map = BarcodeMap(config['barcodes']['map file'])
            else:
                self.barcode_map = None

            self.set_filters(
                config['filters'], {
                    'min quality': 0,
                    'avg quality': 0,
                    'chastity': False,
                    'max mutations': len(self.wt_dna)
                })
        except KeyError as key:
            raise EnrichError("Missing required config value %s" % key,
                              self.name)

        if self.barcode_map is None:  # not in local config
            if barcode_map is None:  # not provided on object creation
                raise EnrichError("Barcode map not specified", self.name)
            else:
                self.barcode_map = barcode_map

        self.counts['barcodes_unmapped'] = None
        self.filter_unmapped = True
Esempio n. 3
0
    def calculate(self):
        """
        Counts the barcodes using :py:meth:`BarcodeSeqLib.count` and combines them into 
        variant counts using the :py:class:`BarcodeMap`.
        """
        BarcodeSeqLib.calculate(self)  # count the barcodes
        self.df_dict['variants'] = dict()

        logging.info(
            "Converting barcodes to variants [{name}]".format(name=self.name))
        if self.filter_unmapped:
            map_mask = self.df_dict['barcodes'].index.isin(self.barcode_map)
            self.df_dict['barcodes_unmapped'] = self.df_dict['barcodes'][
                -map_mask]
            self.df_dict['barcodes'] = self.df_dict['barcodes'][map_mask]
            del map_mask
            logging.info(
                "Writing counts for {n} unique unmapped barcodes to disk [{name}]"
                .format(n=len(self.df_dict['barcodes_unmapped']),
                        name=self.name))
            self.dump_data(keys=['barcodes_unmapped'])  # save memory

        # count variants associated with the barcodes
        for bc, count in self.df_dict['barcodes'].iterrows():
            count = count['count']
            variant = self.barcode_map[bc]
            mutations = self.count_variant(variant, copies=count)
            if mutations is None:  # variant has too many mutations
                self.filter_stats['max mutations'] += count
                self.filter_stats['total'] += count
                if self.report_filtered:
                    self.report_filtered_variant(variant, count)
                if bc not in self.barcode_map.bc_variant_strings:
                    self.barcode_map.bc_variant_strings[bc] = FILTERED_VARIANT
            else:
                if mutations not in self.barcode_map.variants:
                    self.barcode_map.variants[mutations] = set()
                self.barcode_map.variants[mutations].update([bc])
                self.barcode_map.bc_variant_strings[bc] = mutations


        self.df_dict['variants'] = \
                pd.DataFrame.from_dict(self.df_dict['variants'],
                                       orient="index", dtype="int32")
        if len(self.df_dict['variants']) == 0:
            raise EnrichError("Failed to count variants", self.name)
        self.df_dict['variants'].columns = ['count']
        self.df_dict['variants'].sort('count', ascending=False, inplace=True)

        logging.info(
            "Retained counts for {n} variants ({u} unique) [{name}]".format(
                n=self.df_dict['variants']['count'].sum(),
                u=len(self.df_dict['variants'].index),
                name=self.name))
        if self.aligner is not None:
            logging.info("Aligned {n} variants [{name}]".format(
                n=self.aligner.calls, name=self.name))
            self.aligner_cache = None
        self.report_filter_stats()
Esempio n. 4
0
    def calculate(self):
        """
        Counts the barcodes using :py:meth:`BarcodeSeqLib.count` and combines them into 
        variant counts using the :py:class:`BarcodeMap`.
        """
        BarcodeSeqLib.calculate(self)  # count the barcodes
        self.counts['variants'] = dict()

        if self.filter_unmapped:
            map_mask = self.counts['barcodes'].index.isin(self.barcode_map)
            self.counts['barcodes_unmapped'] = self.counts['barcodes'][
                -map_mask]
            self.counts['barcodes'] = self.counts['barcodes'][map_mask]
            del map_mask

        # count variants associated with the barcodes
        for bc, count in self.counts['barcodes'].iterrows():
            count = count['count']
            variant = self.barcode_map[bc]
            mutations = self.count_variant(variant, copies=count)
            if mutations is None:  # variant has too many mutations
                self.filter_stats['max mutations'] += count
                self.filter_stats['total'] += count
                if self.verbose:
                    self.report_filtered_variant(variant, count)
            else:
                if mutations not in self.barcode_map.variants:
                    self.barcode_map.variants[mutations] = list()
                if bc not in self.barcode_map.variants[mutations]:
                    self.barcode_map.variants[mutations].append(bc)

        self.counts['variants'] = \
                pd.DataFrame.from_dict(self.counts['variants'],
                                       orient="index", dtype="int32")
        if len(self.counts['variants']) == 0:
            raise EnrichError("Failed to count variants", self.name)
        self.counts['variants'].columns = ['count']

        logging.info("Counted %d variants (%d unique) [%s]" % \
                (self.counts['variants']['count'].sum(), len(self.counts['variants'].index), self.name))
        if self.aligner is not None:
            logging.info("Aligned %d variants [%s]" %
                         (self.aligner.calls, self.name))
        self.report_filter_stats()
Esempio n. 5
0
    def calculate(self):
        """
        Counts the barcodes using :py:meth:`BarcodeSeqLib.count` and combines them into 
        variant counts using the :py:class:`BarcodeMap`.
        """
        BarcodeSeqLib.calculate(self) # count the barcodes
        self.counts['variants'] = dict()

        if self.filter_unmapped:
            map_mask = self.counts['barcodes'].index.isin(self.barcode_map)
            self.counts['barcodes_unmapped'] = self.counts['barcodes'][-map_mask]
            self.counts['barcodes'] = self.counts['barcodes'][map_mask]
            del map_mask

        # count variants associated with the barcodes
        for bc, count in self.counts['barcodes'].iterrows():
            count = count['count']
            variant = self.barcode_map[bc]
            mutations = self.count_variant(variant, copies=count)
            if mutations is None: # variant has too many mutations
                self.filter_stats['max mutations'] += count
                self.filter_stats['total'] += count
                if self.verbose:
                    self.report_filtered_variant(variant, count)
            else:
                if mutations not in self.barcode_map.variants:
                    self.barcode_map.variants[mutations] = list()
                if bc not in self.barcode_map.variants[mutations]:
                    self.barcode_map.variants[mutations].append(bc)

        self.counts['variants'] = \
                pd.DataFrame.from_dict(self.counts['variants'], 
                                       orient="index", dtype="int32")
        if len(self.counts['variants']) == 0:
            raise EnrichError("Failed to count variants", self.name)
        self.counts['variants'].columns = ['count']

        logging.info("Counted %d variants (%d unique) [%s]" % \
                (self.counts['variants']['count'].sum(), len(self.counts['variants'].index), self.name))
        if self.aligner is not None:
            logging.info("Aligned %d variants [%s]" % (self.aligner.calls, self.name))
        self.report_filter_stats()