def setUp(self):
     self.rootdir = tempfile.mkdtemp(prefix="test_fastq_utils_")
     
     # Generate some test fastq_files
     indexes = {td.generate_barcode(): [],
                td.generate_barcode(): [],
                td.generate_barcode(): [],
                td.generate_barcode(): []}
     args = {'instrument': td.generate_instrument(),
             'run_number': random.randint(101,9999),
             'fcid': td.generate_fc_barcode(),
             'lane': 1,
             'pair': True}
     fd, f1 = tempfile.mkstemp(suffix="_R1.fastq.gz", dir=self.rootdir)
     os.close(fd) 
     f2 = f1.replace("_R1","_R2")
     f1h = fu.FastQWriter(f1)
     f2h = fu.FastQWriter(f2)
     for n in range(1000):
         args['index'] = random.choice(indexes.keys()) 
         record = td.generate_fastq_record(**args)
         indexes[args['index']].append(record[0])
         f1h.write(record[0:4])
         f2h.write(record[4:])
     f1h.close()
     f2h.close()
     
     # Create a samplesheet to use for demultiplexing using all but the last index
     samplesheet = f1.replace("_R1.fastq.gz",".csv")
     sdata = []
     for n, index in enumerate(indexes.keys()[0:-1]):
          sdata.append([args['fcid'],
                        str(args['lane']),
                        "Sample_{}".format(str(n)),
                        "unknown",
                        index,
                        "DemuxTest",
                        "0",
                        "",
                        "",
                        "DemuxTestProject"])
          
     # Add an entry that will not match
     s = copy.copy(sdata[-1])
     s[1] = str(args['lane']+1)
     sdata.append(s)
     
     self.samplesheet = td._write_samplesheet(sdata,samplesheet)
     self.indexes = indexes
     self.fastq_1 = f1
     self.fastq_2 = f2
    def setUp(self):
        self.rootdir = tempfile.mkdtemp(prefix="test_fastq_utils_")

        # Generate some test fastq_files
        indexes = {
            td.generate_barcode(): [],
            td.generate_barcode(): [],
            td.generate_barcode(): [],
            td.generate_barcode(): []
        }
        args = {
            'instrument': td.generate_instrument(),
            'run_number': random.randint(101, 9999),
            'fcid': td.generate_fc_barcode(),
            'lane': 1,
            'pair': True
        }
        fd, f1 = tempfile.mkstemp(suffix="_R1.fastq.gz", dir=self.rootdir)
        os.close(fd)
        f2 = f1.replace("_R1", "_R2")
        f1h = fu.FastQWriter(f1)
        f2h = fu.FastQWriter(f2)
        for n in range(1000):
            args['index'] = random.choice(indexes.keys())
            record = td.generate_fastq_record(**args)
            indexes[args['index']].append(record[0])
            f1h.write(record[0:4])
            f2h.write(record[4:])
        f1h.close()
        f2h.close()

        # Create a samplesheet to use for demultiplexing using all but the last index
        samplesheet = f1.replace("_R1.fastq.gz", ".csv")
        sdata = []
        for n, index in enumerate(indexes.keys()[0:-1]):
            sdata.append([
                args['fcid'],
                str(args['lane']), "Sample_{}".format(str(n)), "unknown",
                index, "DemuxTest", "0", "", "", "DemuxTestProject"
            ])

        # Add an entry that will not match
        s = copy.copy(sdata[-1])
        s[1] = str(args['lane'] + 1)
        sdata.append(s)

        self.samplesheet = td._write_samplesheet(sdata, samplesheet)
        self.indexes = indexes
        self.fastq_1 = f1
        self.fastq_2 = f2
    def setUp(self):
        """Set up a test fastq file
        """
        self.rootdir = tempfile.mkdtemp(
            prefix="test_extract_pre_casava_index_")

        # Set up a fastq file
        fd, fqfile = tempfile.mkstemp(dir=self.rootdir, suffix=".fastq.gz")
        os.close(fd)
        fqw = fu.FastQWriter(fqfile)

        # Generate a set of indexes
        seqlen = 101
        bclen = 6
        barcodes = dict(
            zip([td.generate_barcode(bclen) for i in xrange(100)],
                [random.randint(10, 100) for i in xrange(100)]))

        # Generate fastq records and append the barcode
        for barcode, nseqs in barcodes.items():
            for i in xrange(nseqs):
                rec = td.generate_fastq_record(**{
                    'index': barcode,
                    'sequence_length': seqlen
                })
                rec[1] = "{}{}A".format(rec[1], barcode)
                fqw.write(rec)
        fqw.close()

        self.sequence_length = seqlen
        self.barcode_length = bclen
        self.barcodes = barcodes
        self.fastq_file = fqfile
 def setUp(self):
     """Set up a test fastq file
     """
     self.rootdir = tempfile.mkdtemp(prefix="test_extract_pre_casava_index_")
     
     # Set up a fastq file
     fd, fqfile = tempfile.mkstemp(dir=self.rootdir,suffix=".fastq.gz")
     os.close(fd)
     fqw = fu.FastQWriter(fqfile)
     
     # Generate a set of indexes
     seqlen = 101
     bclen = 6
     barcodes = dict(zip([td.generate_barcode(bclen) for i in xrange(100)],[random.randint(10,100) for i in xrange(100)]))
     
     # Generate fastq records and append the barcode
     for barcode, nseqs in barcodes.items():
         for i in xrange(nseqs):
             rec = td.generate_fastq_record(**{'index': barcode, 'sequence_length': seqlen})
             rec[1] = "{}{}A".format(rec[1],barcode)
             fqw.write(rec)
     fqw.close()
     
     self.sequence_length = seqlen
     self.barcode_length = bclen
     self.barcodes = barcodes
     self.fastq_file = fqfile
    def setUp(self):
        self.rootdir = tempfile.mkdtemp(prefix="test_FastQParser_")

        # Create some fastq records with different properties
        lanes = range(1, 9)
        indexes = [td.generate_barcode() for n in xrange(4)]
        fd, fqfile = tempfile.mkstemp(suffix=".fastq.gz", dir=self.rootdir)
        os.close(fd)

        # For each lane and index, create a random number of records
        counts = {}
        fqw = fu.FastQWriter(fqfile)
        for l in lanes:
            counts[l] = {}
            for ix in indexes:
                no = random.randint(10, 99)
                counts[l][ix] = no
                for n in xrange(no):
                    fqw.write(td.generate_fastq_record(lane=l, index=ix))
        fqw.close()

        self.example_fq = fqfile
        self.example_counts = counts
 def setUp(self):
     self.rootdir = tempfile.mkdtemp(prefix="test_FastQParser_")
     
     # Create some fastq records with different properties
     lanes = range(1,9)
     indexes = [td.generate_barcode() for n in xrange(4)]
     fd, fqfile = tempfile.mkstemp(suffix=".fastq.gz", dir=self.rootdir)
     os.close(fd)
     
     # For each lane and index, create a random number of records
     counts = {}
     fqw = fu.FastQWriter(fqfile)
     for l in lanes:
         counts[l] = {}
         for ix in indexes:
             no = random.randint(10,99)
             counts[l][ix] = no
             for n in xrange(no):
                 fqw.write(td.generate_fastq_record(lane=l, index=ix)) 
     fqw.close()
     
     self.example_fq = fqfile
     self.example_counts = counts