Esempio n. 1
0
    def test_3DEC_UmiBarcodeDemuxMethod_matching_barcode(self):

        barcode_folder = pkg_resources.resource_filename('singlecellmultiomics','modularDemultiplexer/barcodes/')
        barcode_parser = BarcodeParser(barcode_folder)

        r1 = FastqRecord(
          '@Cluster_s_1_1101_1000',
          'ATCACACACTATAGTCATTCAGGAGCAGGTTCTTCAGGTTCCCTGTAGTTGTGTGGTTTTGAGTGAGTTTTTTAAT',
          '+',
          'AAAAA#EEEEEEEEEEEAEEEEEEEAEEEEEEEEEEEEEEEEEE/EEEEEEEEEEEE/EEEEEEEEEEEEEEEEEE'
        )
        r2 = FastqRecord(
          '@Cluster_s_1_1101_1002',
          'ACCCCAGATCAACGTTGGACNTCNNCNTTNTNCTCNGCACCNNNNCNNNCTTATNCNNNANNNNNNNNNNTNNGN',
          '+',
          '6AAAAEEAEE/AEEEEEEEE#EE##<#6E#A#EEE#EAEEA####A###EE6EE#E###E##########E##A#'
        )
        demux = UmiBarcodeDemuxMethod(umiRead=0,
            umiStart=0,
            umiLength=3,
            barcodeRead=0,
            barcodeStart=3,
            barcodeLength=8,
            barcodeFileParser=barcode_parser,
            barcodeFileAlias='maya_384NLA',
            indexFileParser=None,
            indexFileAlias='illumina_merged_ThruPlex48S_RP',
            random_primer_read=None,
            random_primer_length=6)

        demultiplexed_record = demux.demultiplex([r1,r2])
        # The barcode sequence is ACACACTA (first barcode)
        self.assertEqual( demultiplexed_record[0].tags['BC'], 'ACACACTA')
        self.assertEqual( demultiplexed_record[0].tags['bi'], 1)
Esempio n. 2
0
 def __init__(self, barcodeFileParser, **kwargs):
     self.barcodeFileAlias = 'maya_mspj1'
     UmiBarcodeDemuxMethod.__init__(self,
                                    umiRead=0,
                                    umiStart=0,
                                    umiLength=3,
                                    barcodeRead=0,
                                    barcodeStart=3,
                                    barcodeLength=8,
                                    barcodeFileAlias=self.barcodeFileAlias,
                                    barcodeFileParser=barcodeFileParser,
                                    **kwargs)
     self.shortName = 'MSPJIC8U3'
     self.longName = 'MSPJI, CB: 8bp UMI: 3bp'
     self.autoDetectable = True
     self.description = 'MSPJI barcoded fragments. 3bp umi followed by 8bp cell barcode.'
Esempio n. 3
0
 def __init__(self, barcodeFileParser, **kwargs):
     self.barcodeFileAlias = 'scartrace'
     UmiBarcodeDemuxMethod.__init__(self,
                                    umiRead=0,
                                    umiStart=0,
                                    umiLength=0,
                                    barcodeRead=0,
                                    barcodeStart=0,
                                    barcodeLength=8,
                                    barcodeFileAlias=self.barcodeFileAlias,
                                    barcodeFileParser=barcodeFileParser,
                                    **kwargs)
     self.shortName = 'SCARC8R1'
     self.longName = 'Scartrace, CB: 8bp'
     self.description = '384 well format. Scar amplicon demultiplexing, cell barcode in read 1'
     self.autoDetectable = True
Esempio n. 4
0
    def demultiplex(self, records, **kwargs):

        if kwargs.get('probe') and records[0].sequence[self.barcodeLength +
                                                       self.umiLength] != 'T':
            raise NonMultiplexable

        # add first 2 bases as ligation tag:
        ligation_start = self.barcodeLength + self.umiLength
        ligation_end = ligation_start + 2
        ligation_sequence = records[0].sequence[ligation_start:ligation_end]
        ligation_qualities = records[0].qual[ligation_start:ligation_end]

        taggedRecords = UmiBarcodeDemuxMethod.demultiplex(
            self, records, **kwargs)

        taggedRecords[0].addTagByTag('lh',
                                     ligation_sequence,
                                     isPhred=False,
                                     make_safe=False)
        taggedRecords[0].addTagByTag('lq',
                                     ligation_qualities,
                                     isPhred=True,
                                     make_safe=False)
        taggedRecords[1].addTagByTag('lh',
                                     ligation_sequence,
                                     isPhred=False,
                                     make_safe=False)
        taggedRecords[1].addTagByTag('lq',
                                     ligation_qualities,
                                     isPhred=True,
                                     make_safe=False)
        #taggedRecords[0].sequence = taggedRecords[0].sequence[1:]
        #taggedRecords[0].qualities = taggedRecords[0].qualities[1:]
        return taggedRecords
    def demultiplex(self, records, **kwargs):
        if kwargs.get('probe') and records[0].sequence[self.barcodeLength + \
                      self.umiLength: self.barcodeLength + self.umiLength + 4] != 'CATG':
            raise NonMultiplexable

        taggedRecords = UmiBarcodeDemuxMethod.demultiplex(
            self, records, **kwargs)
        return taggedRecords
 def __init__(self, barcodeFileParser, **kwargs):
     self.barcodeFileAlias = 'lennart96NLA'
     UmiBarcodeDemuxMethod.__init__(self,
                                    umiRead=0,
                                    umiStart=0,
                                    umiLength=3,
                                    barcodeRead=0,
                                    barcodeStart=3,
                                    barcodeLength=8,
                                    random_primer_read=None,
                                    random_primer_length=None,
                                    barcodeFileAlias=self.barcodeFileAlias,
                                    barcodeFileParser=barcodeFileParser,
                                    **kwargs)
     self.shortName = 'NLAIII96C8U3SE'
     self.longName = 'NLAIII, 96 well CB: 8bp UMI: 3bp RP:6bp, single ended'
     self.autoDetectable = True
     self.description = '96 well format. 3bp umi followed by 8bp barcode. Single end: R2 is missing'
 def __init__(self, barcodeFileParser, **kwargs):
     self.barcodeFileAlias = 'lennart96NLA'
     UmiBarcodeDemuxMethod.__init__(self,
                                    umiRead=0,
                                    umiStart=0,
                                    umiLength=3,
                                    random_primer_read=1,
                                    random_primer_length=6,
                                    barcodeRead=0,
                                    barcodeStart=3,
                                    barcodeLength=8,
                                    barcodeFileAlias=self.barcodeFileAlias,
                                    barcodeFileParser=barcodeFileParser,
                                    **kwargs)
     self.shortName = 'NLAIII96C8U3'
     self.longName = 'NLAIII, 96well CB: 8bp UMI: 3bp RP: 6bp'
     self.autoDetectable = True
     self.description = '96 well format. 3bp umi followed by 8bp barcode. R2 starts with a 6bp random primer'
Esempio n. 8
0
 def __init__(self, barcodeFileParser, **kwargs):
     self.barcodeFileAlias = 'celseq1'
     UmiBarcodeDemuxMethod.__init__(self,
                                    umiRead=0,
                                    umiStart=8,
                                    umiLength=4,
                                    barcodeRead=0,
                                    barcodeStart=0,
                                    barcodeLength=8,
                                    random_primer_read=1,
                                    random_primer_length=6,
                                    barcodeFileAlias=self.barcodeFileAlias,
                                    barcodeFileParser=barcodeFileParser,
                                    **kwargs)
     self.shortName = 'CS1C8U4'
     self.longName = 'CELSeq 1, CB: 8bp, UMI: 4bp'
     self.autoDetectable = True
     self.description = 'R1 starts with a 8bp cell barcode followed by a 4bp UMI. R2 ends with a 6bp random primer'
Esempio n. 9
0
 def __init__(self, barcodeFileParser, **kwargs):
     self.barcodeFileAlias = 'celseq2'
     UmiBarcodeDemuxMethod.__init__(self,
                                    umiRead=1,
                                    umiStart=0,
                                    umiLength=8,
                                    barcodeRead=1,
                                    barcodeStart=8,
                                    barcodeLength=8,
                                    random_primer_read=0,
                                    random_primer_length=6,
                                    barcodeFileAlias=self.barcodeFileAlias,
                                    barcodeFileParser=barcodeFileParser,
                                    **kwargs)
     self.shortName = 'CS2C8U8S'
     self.longName = 'CELSeq 2, CB: 8bp, UMI: 8bp'
     self.autoDetectable = True
     self.description = 'R2 starts with a longer 8bp UMI  followed by a 8bp cell barcode. R1 ends with a 6bp primer'
Esempio n. 10
0
 def __init__(self, barcodeFileParser, **kwargs):
     self.barcodeFileAlias = 'celseq2'
     UmiBarcodeDemuxMethod.__init__(self,
                                    umiRead=0,
                                    umiStart=0,
                                    umiLength=6,
                                    barcodeRead=0,
                                    barcodeStart=6,
                                    barcodeLength=8,
                                    random_primer_read=None,
                                    random_primer_length=None,
                                    barcodeFileAlias=self.barcodeFileAlias,
                                    barcodeFileParser=barcodeFileParser,
                                    **kwargs)
     self.shortName = 'CS2C8U6NH'
     self.longName = 'CELSeq 2, CB: 8bp, UMI: 6bp, NO random primer'
     self.autoDetectable = False
     self.description = 'R1 starts with a 6bp UMI  followed by a 8bp cell barcode. R2 has no random primer. Use this demultiplexing method for VASA'
Esempio n. 11
0
 def __init__(self, barcodeFileParser, **kwargs):
     self.barcodeFileAlias = 'celseq2_noNla'
     UmiBarcodeDemuxMethod.__init__(self,
                                    umiRead=0,
                                    umiStart=0,
                                    umiLength=8,
                                    barcodeRead=0,
                                    barcodeStart=8,
                                    barcodeLength=8,
                                    random_primer_read=1,
                                    random_primer_length=6,
                                    barcodeFileAlias=self.barcodeFileAlias,
                                    barcodeFileParser=barcodeFileParser,
                                    **kwargs)
     self.shortName = 'CS2C8U8NNLA'
     self.longName = 'CELSeq 2, CB: 8bp, UMI: 8bp, NLAIII free'
     self.autoDetectable = True
     self.description = 'CEL-Seq2 without NLAIII digestable barcodes '
Esempio n. 12
0
    def demultiplex(self, records, **kwargs):

        if kwargs.get('probe') and not records[0].sequence[4:].startswith(
                'CCTTGAACTTCTGGTTGTAG'):
            raise NonMultiplexable

        taggedRecords = UmiBarcodeDemuxMethod.demultiplex(
            self, records, **kwargs)
        return taggedRecords
Esempio n. 13
0
 def __init__(self, barcodeFileParser, **kwargs):
     self.barcodeFileAlias = 'scartrace'
     UmiBarcodeDemuxMethod.__init__(self,
                                    umiRead=0,
                                    umiStart=0,
                                    umiLength=0,
                                    barcodeRead=1,
                                    barcodeStart=0,
                                    barcodeLength=8,
                                    barcodeFileAlias=self.barcodeFileAlias,
                                    barcodeFileParser=barcodeFileParser,
                                    random_primer_end=False,
                                    random_primer_read=0,
                                    random_primer_length=4,
                                    **kwargs)
     self.shortName = 'SCARC8R2R4'
     self.longName = 'Scartrace, CB: 8bp, with 4bp random sequence in read 1'
     self.description = '384 well format. Scar amplicon demultiplexing, cell barcode in read , 4bp random sequence in R1'
     self.autoDetectable = True
Esempio n. 14
0
    def __init__(self, barcodeFileParser, **kwargs):
        self.barcodeFileAlias = 'maya_384NLA'
        UmiBarcodeDemuxMethod.__init__(self,
                                       umiRead=0,
                                       umiStart=0,
                                       umiLength=3,
                                       barcodeRead=0,
                                       barcodeStart=3,
                                       barcodeLength=8,
                                       random_primer_read=1,
                                       random_primer_length=6,
                                       barcodeFileAlias=self.barcodeFileAlias,
                                       barcodeFileParser=barcodeFileParser,
                                       **kwargs)
        self.shortName = 'scCHIC384C8U3'
        self.longName = 'Single cell CHIC, 384well CB: 8bp UMI: 3bp, RP: 6BP'
        self.autoDetectable = True
        self.description = '384 well format. 3bp umi followed by 8bp barcode and a single A. R2 ends with a 6bp random primer'

        self.sequenceCapture[0] = slice(self.barcodeLength + self.umiLength +
                                        1, None)  # dont capture the first base
Esempio n. 15
0
    def __init__(self,
                 umiRead=0, umiStart=0, umiLength=8,  # default settings UMI
                 barcodeRead=0, barcodeStart=8, barcodeLength=8,  # default settings Barcode
                 enzymeRead=0, enzymeStart=16, enzymeLength=3,  # default settings Enzyme ID
                 ispcrRead=0, ispcrStart=19, ispcrLength=15,  # default settings ISPCR
                 ispcrSeq="CAGTGGTATCAGAGT",
                 barcodeFileParser=None,  # compatible, no need to change
                 barcodeFileAlias=None,  # passed from lower-level Classes, e.g. "reBS_nla384w"
                 indexFileParser=None,  # compatible, no need to change
                 **kwargs):  # additional arguments
        self.description = 'base class for restriction bisulfite'
        self.barcodeFileAlias = barcodeFileAlias  # description , e.g. "maya_384NLA"
        self.barcodeFileParser = barcodeFileParser  # Namespace for barcode file parse
        UmiBarcodeDemuxMethod.__init__(
            self,
            umiRead=umiRead,
            umiStart=umiStart,
            umiLength=umiLength,
            barcodeRead=barcodeRead,
            barcodeStart=barcodeStart,
            barcodeLength=barcodeLength,
            barcodeFileAlias=self.barcodeFileAlias,
            barcodeFileParser=barcodeFileParser,
            **kwargs)

        self.barcodeSummary = self.barcodeFileAlias
        self.umiRead = umiRead  # 0:Read 1, 1: Read 2 etc
        self.umiStart = umiStart  # First base
        self.umiLength = umiLength
        self.shortName = 'RB'
        self.longName = 'base class for restriction bisulfite'
        self.illumina_mux = IlluminaBaseDemultiplexer(
            indexFileParser=indexFileParser,
            indexFileAlias='illumina_merged_ThruPlex48S_RP')

        self.barcodeRead = barcodeRead
        self.barcodeStart = barcodeStart
        self.barcodeLength = barcodeLength

        self.enzymeRead = enzymeRead
        self.enzymeStart = enzymeStart
        self.enzymeLength = enzymeLength

        self.ispcrRead = ispcrRead
        self.ispcrStart = ispcrStart
        self.ispcrLength = ispcrLength

        self.autoDetectable = False

        self.sequenceCapture = [slice(None), slice(None)]  # ranges
        # TAKE OUT IF STATEMENT
        if umiLength == 0:
            # if there is a barcode only
            if barcodeStart != 0:
                raise NotImplementedError(
                    'Complicated slice where we need to capture around a region')
            self.sequenceCapture[barcodeRead] = slice(barcodeLength, None)
        else:
            if umiRead != barcodeRead:
                raise NotImplementedError()
            if not(umiStart == 0 or barcodeStart == 0):
                raise NotImplementedError(
                    'Complicated slice where we need to capture around a region')
            self.sequenceCapture[barcodeRead] = slice(
                barcodeLength + umiLength + enzymeLength + ispcrLength, None)